diff --git a/app/boards/intel_adsp_ace15_mtpm.conf b/app/boards/intel_adsp_ace15_mtpm.conf
index 5923272fa986..bc7931633fc4 100644
--- a/app/boards/intel_adsp_ace15_mtpm.conf
+++ b/app/boards/intel_adsp_ace15_mtpm.conf
@@ -79,7 +79,7 @@ CONFIG_INTEL_ADSP_IPC=y
 CONFIG_WATCHDOG=y
 CONFIG_LL_WATCHDOG=y
 
-CONFIG_MEMORY_WIN_2_SIZE=12288
+CONFIG_MEMORY_WIN_2_SIZE=16384
 
 # Temporary disabled options
 CONFIG_TRACE=n
diff --git a/app/boards/intel_adsp_ace20_lnl.conf b/app/boards/intel_adsp_ace20_lnl.conf
index 88a8e6522cfe..dff8260f1f33 100644
--- a/app/boards/intel_adsp_ace20_lnl.conf
+++ b/app/boards/intel_adsp_ace20_lnl.conf
@@ -74,7 +74,7 @@ CONFIG_INTEL_ADSP_IPC=y
 CONFIG_PROBE=y
 CONFIG_PROBE_DMA_MAX=2
 
-CONFIG_MEMORY_WIN_2_SIZE=12288
+CONFIG_MEMORY_WIN_2_SIZE=16384
 
 
 # Temporary disabled options
diff --git a/app/prj.conf b/app/prj.conf
index 5a681ac5b74b..88e4f33e66b4 100644
--- a/app/prj.conf
+++ b/app/prj.conf
@@ -57,3 +57,10 @@ CONFIG_HEAP_MEM_POOL_SIZE=2048
 CONFIG_LLEXT=y
 CONFIG_LLEXT_STORAGE_WRITABLE=y
 CONFIG_MODULES=y
+
+#Thread info
+
+CONFIG_SOF_TELEMETRY2_SLOT=y
+CONFIG_SOF_TELEMETRY2_THREAD_INFO=y
+CONFIG_THREAD_NAME=y
+CONFIG_MEMORY_WIN_2_SIZE=16384
diff --git a/src/debug/telemetry/CMakeLists.txt b/src/debug/telemetry/CMakeLists.txt
index f2249490db60..318d349778a6 100644
--- a/src/debug/telemetry/CMakeLists.txt
+++ b/src/debug/telemetry/CMakeLists.txt
@@ -1,3 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 add_local_sources_ifdef(CONFIG_SOF_TELEMETRY sof telemetry.c)
+
+add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_SLOT sof telemetry2_slot.c)
+
+add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_THREAD_INFO sof telemetry2_thread_info.c)
diff --git a/src/debug/telemetry/Kconfig b/src/debug/telemetry/Kconfig
index 54f4b66f590d..653f5cab689c 100644
--- a/src/debug/telemetry/Kconfig
+++ b/src/debug/telemetry/Kconfig
@@ -9,3 +9,46 @@ config SOF_TELEMETRY
 	  systick_info measurement which measures scheduler task performance and may
 	  slightly affect overall performance.
 
+config SOF_TELEMETRY2_SLOT
+	bool "Enable SOF telemetry2 slot in debug window"
+	help
+	  TELEMETRY2 is a generic memory structure for passing real-time debug
+	  information from the SOF system. It only provides the framework for
+	  passing pieces of abstract data from DSP side to host side debugging
+	  tools. The option enables TELEMETRY2 debug window slot for transferring
+	  telemetry2 data.
+
+if SOF_TELEMETRY2_SLOT
+
+config SOF_TELEMETRY2_SLOT_NUMBER
+	int "Debug window slot where to put telemetry2 slot"
+	default 3
+	range 0 14
+	help
+	  Which debug slot to reserve for telemetry2. Remember to map
+	  the slot with MEMORY_WIN_2_SIZE in soc/xtensa/intel_adsp.
+
+config SOF_TELEMETRY2_THREAD_INFO
+	bool "Enable thread info chunk in telemetry2 slot"
+	select INIT_STACKS
+	select THREAD_MONITOR
+	select THREAD_STACK_INFO
+	select THREAD_RUNTIME_STATS
+	help
+	  This activates Zephyr thread info in telemetry2 slot. For
+	  the thread names to be any more than hex numbers its suggested
+	  to select THREAD_NAME=y too.
+
+if SOF_TELEMETRY2_THREAD_INFO
+
+config SOF_TELEMETRY2_THREAD_INFO_INTERVAL
+	int "Thread information collection interval in seconds"
+	default 2
+	range 1 10
+	help
+	  Decides how often thread info runs and checks execution cycle
+	  statistics and stack usage.
+
+endif
+
+endif
diff --git a/src/debug/telemetry/telemetry2_slot.c b/src/debug/telemetry/telemetry2_slot.c
new file mode 100644
index 000000000000..1462e870dd26
--- /dev/null
+++ b/src/debug/telemetry/telemetry2_slot.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2024 Intel Corporation.
+
+#include <zephyr/logging/log.h>
+#include <zephyr/spinlock.h>
+#include <adsp_debug_window.h>
+#include <adsp_memory.h>
+#include <sof/common.h>
+#include "telemetry2_slot.h"
+
+LOG_MODULE_REGISTER(telemetry2_slot);
+
+static struct k_spinlock lock;
+
+static
+struct telemetry2_chunk_hdr *telemetry2_chunk_search(struct telemetry2_chunk_hdr *chunk,
+						     int id)
+{
+	uint8_t *p;
+
+	/* Loop through the chunks and look for 'id'. If found leave a
+	 * pointer to it in 'chunk', if not leave 'chunk' pointing at
+	 * the beginning of the free space.
+	 */
+	while (chunk->id != id && chunk->size != 0) {
+		p = (((uint8_t *) chunk) + chunk->size);
+
+		chunk = (struct telemetry2_chunk_hdr *) p;
+	}
+	return chunk;
+}
+
+/*
+ * Search, or if not found reserve, a chunk of specified size and id from
+ * telemetry2 slot. Each chuck must be aligned with a 64-byte cache line.
+ * A pointer to the chunk is returned.
+ */
+void *telemetry2_chunk_get(int id, size_t required_size)
+{
+	const int slot = CONFIG_SOF_TELEMETRY2_SLOT_NUMBER;
+	struct telemetry2_payload_hdr *payload =
+		(struct telemetry2_payload_hdr *)(ADSP_DW->slots[slot]);
+	size_t hdr_size = ALIGN_UP(sizeof(*payload), CONFIG_DCACHE_LINE_SIZE);
+	struct telemetry2_chunk_hdr *chunk = (struct telemetry2_chunk_hdr *)
+		(((uint8_t *) payload) + hdr_size);
+	size_t size = ALIGN_UP(required_size, CONFIG_DCACHE_LINE_SIZE);
+	k_spinlock_key_t key;
+
+	if (ADSP_DW->descs[slot].type != ADSP_DW_SLOT_TELEMETRY2) {
+		if (ADSP_DW->descs[slot].type != 0)
+			LOG_WRN("Slot %d was not free: %u", slot, ADSP_DW->descs[slot].type);
+		LOG_INF("Initializing telemetry2 slot 0x%08x", ADSP_DW->descs[slot].type);
+		ADSP_DW->descs[slot].type = ADSP_DW_SLOT_TELEMETRY2;
+		payload->hdr_size = hdr_size;
+		payload->magic = TELEMETRY2_PAYLOAD_MAGIC;
+		payload->abi = TELEMETRY2_PAYLOAD_V0_0;
+	}
+
+	LOG_INF("Add id %u size %u (after alignment %u)", id, required_size, size);
+	chunk = telemetry2_chunk_search(chunk, id);
+	if (id == chunk->id)
+		goto match_found;
+
+	/* End of chunk list reached, but specified chunk not found. We
+	 * need to reseeve one, so take the spin lock and check if more
+	 * chunks were added since the previus search.
+	 */
+	key = k_spin_lock(&lock);
+	chunk = telemetry2_chunk_search(chunk, id);
+	/* Check if some other thread beat us to add the chunk we want */
+	if (id == chunk->id) {
+		k_spin_unlock(&lock, key);
+		goto match_found;
+	}
+
+	/* If the chunk was not found, reserve space for it after
+	 * the last reserved chunk.
+	 */
+	if (((uint8_t *) chunk) + size >= ADSP_DW->slots[slot + 1]) {
+		LOG_ERR("No space for chunk %u of size %u in slot %u, offset %u",
+			id, size, slot, ((uint8_t *) chunk) - ADSP_DW->slots[slot]);
+		k_spin_unlock(&lock, key);
+		return NULL;
+	}
+
+	if (chunk->id != TELEMETRY2_CHUNK_ID_EMPTY)
+		LOG_WRN("Chunk of size %u has type %u, assuming empty",
+			chunk->size, chunk->id);
+
+	LOG_INF("Chunk %u reserved", id);
+	chunk->size = size;
+	chunk->id = id;
+	payload->total_size = size + ((uint8_t *) chunk) - ((uint8_t *) payload);
+
+	k_spin_unlock(&lock, key);
+
+	return chunk;
+
+match_found:
+	/* If a matching chunk was found check that the size is enough
+	 * for what was requested. If yes return the value, if not
+	 * return NULL.
+	 */
+	if (required_size > chunk->size) {
+		LOG_ERR("Chunk %d size too small %u > %u",
+			id, required_size, chunk->size);
+		return NULL;
+	}
+	LOG_INF("Chunk %u found", id);
+	return chunk;
+}
diff --git a/src/debug/telemetry/telemetry2_slot.h b/src/debug/telemetry/telemetry2_slot.h
new file mode 100644
index 000000000000..49483c938b45
--- /dev/null
+++ b/src/debug/telemetry/telemetry2_slot.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2024 Intel Corporation.
+ */
+
+#ifndef __SOC_TELEMETRY2_SLOT_H__
+#define __SOC_TELEMETRY2_SLOT_H__
+
+/*
+ * TELEMETRY2 is a generic memory structure for passing real-time debug
+ * information from the SOF system. It only provides the framework for
+ * passing pieces of abstract data from DSP side to host side debugging
+ * tools.
+ *
+ * This example describes how TELEMETRY2 data is transferred from DSP
+ * to host side using debug memory window.
+ *
+ * TELEMETRY2 slot is taken from SRAM window divided into several
+ * chunks with simple header declared below.
+ *
+ * "type" describes the contents of the chunk
+ * "size" describes the size of the chunk in bytes
+ *
+ * After each chunk another chunk is expected to follow, until an
+ * empty chunk header is found. If chunk type is 0 and size 0 it
+ * indicates an empty header, and that the rest of the slot is unused.
+ * A new chunk, with the header, can be reserved stating at the end of
+ * previous chunk (which has been aligned to the end of cache line).
+ *
+ * Each chunk is aligned to start at 64-byte cache line boundary.
+ *
+ * For example:
+ *      --------------------------------------------------  ---
+ *      | magic = TELEMETRY2_PAYLOAD_MAGIC               |   |
+ *      | hdr_size = 64                                  |   |
+ *      | total_size = 320                               |   |
+ *      | abi = TELEMETRY2_PAYLOAD_V0_0                  |  64 bytes
+ *      | tstamp = <aligned with host epoch>             |   |
+ *      | <padding>                                      |   |
+ *	--------------------------------------------------  ---
+ *	| type = ADSP_DW_TELEMETRY2_ID_THREAD_INFO       |   |
+ *      | size = 256                                     |  256 bytes
+ *	|    chunk data                                  |   |
+ *	|    ...                                         |   |
+ *	--------------------------------------------------  ---
+ *	| type = ADSP_DW_TELEMETRY2_TYPE_EMPTY           |
+ *      | size = 0                                       |
+ *	--------------------------------------------------
+ *
+ * The above depicts a telemetry2 slot with thread analyzer data
+ * of 256 bytes (including the header) in the first chunk and
+ * the rest of the slot being free.
+ */
+
+#include <stdint.h>
+
+// HACK: This should be in zephyr/soc/intel/intel_adsp/common/include/adsp_debug_window.h
+#define ADSP_DW_SLOT_TELEMETRY2 0x4c455500
+
+#define TELEMETRY2_PAYLOAD_MAGIC 0x1ED15EED
+
+#define TELEMETRY2_PAYLOAD_V0_0 0
+
+/* Telemetry2 payload header
+ *
+ * The payload header should be written in the beginning of the
+ * telemetry2 payload, before the chunks.
+ */
+struct telemetry2_payload_hdr {
+	uint32_t magic;		// used to identify valid data
+	uint32_t hdr_size;	// size of this header only in bytes
+	uint32_t total_size;	// total size of the whole payload in bytes
+	uint32_t abi;		// ABI version, can be used by tool to warn users if too old.
+	uint64_t tstamp;	// aligned with host epoch.
+} __packed __aligned(CONFIG_DCACHE_LINE_SIZE);
+
+/* Defined telemetry2 chunk types */
+#define TELEMETRY2_CHUNK_ID_EMPTY		0
+#define TELEMETRY2_ID_THREAD_INFO		1
+
+/* Telemetry2 chunk header */
+struct telemetry2_chunk_hdr {
+	uint32_t id;		// Chunk ID
+	uint32_t size;		// Size of telemetry chunk
+} __packed;
+
+void *telemetry2_chunk_get(int id, size_t size);
+
+#endif /* __SOC_TELEMETRY2_SLOT_H__ */
diff --git a/src/debug/telemetry/telemetry2_thread_info.c b/src/debug/telemetry/telemetry2_thread_info.c
new file mode 100644
index 000000000000..723bd5096a17
--- /dev/null
+++ b/src/debug/telemetry/telemetry2_thread_info.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2024 Intel Corporation.
+
+#include <zephyr/logging/log.h>
+#include <zephyr/kernel.h>
+#include <stdio.h>
+#include <soc.h>
+#include <adsp_debug_window.h>
+
+#include "telemetry2_slot.h"
+
+LOG_MODULE_REGISTER(thread_info);
+
+#define THREAD_INFO_MAX_THREADS 16
+
+struct thread_info {
+	char name[14];
+	uint8_t stack_usage;	/* Relative stack usage U(0,8) fixed point value */
+	uint8_t cpu_usage;	/* Relative cpu usage U(0,8) fixed point value */
+} __packed;
+
+#define THREAD_INFO_VERSION_0_0		0
+
+#define THREAD_INFO_STATE_UNINITIALIZED	0
+#define THREAD_INFO_STATE_BEING_UPDATED	1
+#define THREAD_INFO_STATE_UPTODATE	2
+
+/* Core specific data, updated each round, including the thread table. */
+struct thread_info_core {
+	uint8_t state;		/* indicates if core data is in consistent state */
+	uint8_t counter;	/* incremented every round */
+	uint8_t load;		/* Core's load U(0,8) fixed point value */
+	uint8_t thread_count;
+	struct thread_info thread[THREAD_INFO_MAX_THREADS];
+} __packed __aligned(CONFIG_DCACHE_LINE_SIZE);
+
+/* Thread info telemetry2 chunk header. Only initialized at first
+ * thread info thread start. Should be used to find the core specific
+ * sections where the thread info is when decoding.
+ */
+struct thread_info_chunk {
+	struct telemetry2_chunk_hdr hdr;
+	uint16_t core_count;
+	uint16_t core_offset[CONFIG_MP_MAX_NUM_CPUS];
+	struct thread_info_core __aligned(CONFIG_DCACHE_LINE_SIZE) core[CONFIG_MP_MAX_NUM_CPUS];
+} __packed;
+
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+/* Data structure to store the cycle counter values from the previous
+ * round. The numbers are used to calculate what the load was on this
+ * round.
+ */
+static struct previous_counters { // Cached data from previous round
+	uint64_t active;	  // All execution cycles
+	uint64_t all;		  // All cycles including idle
+	struct thread_counters {
+		void *tid;	 // thread ID (the thread struct ptr)
+		uint64_t cycles; // cycle counter value
+	} threads[THREAD_INFO_MAX_THREADS]; // The max amount of threads we follow
+} previous[CONFIG_MP_MAX_NUM_CPUS];
+#endif
+
+/* Data structure to be passed down to thread_info_cb() by
+ * k_thread_foreach_current_cpu().
+ */
+struct user_data {
+	int core;
+	struct thread_info_core *core_data;
+	int thread_count;
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+	bool stats_valid;
+	uint32_t all_cycles;
+	void *active_threads[THREAD_INFO_MAX_THREADS];
+	struct previous_counters *previous;
+#endif
+};
+
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+
+/* Finds and/or updates cached cycle counter value for 'tid'-thread
+ * and calculates used execution cycles since previous round and
+ * returns it.
+ */
+
+static uint32_t get_cycles(void *tid, k_thread_runtime_stats_t *thread_stats,
+			   struct user_data *ud, const char *name)
+
+{
+	int i;
+
+	/* Mark the thread as active */
+	ud->active_threads[ud->thread_count] = tid;
+	/* look for cached value from previous round for 'tid'-thread */
+	for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
+		if (ud->previous->threads[i].tid == tid) {
+			/* Calculate number cycles since previous round */
+			uint32_t cycles = (uint32_t) (thread_stats->execution_cycles -
+						      ud->previous->threads[i].cycles);
+
+			LOG_DBG("%p found at %d (%s %llu)", tid, i,
+				name, thread_stats->execution_cycles);
+			/* update cached value */
+			ud->previous->threads[i].cycles = thread_stats->execution_cycles;
+			return cycles;
+		}
+	}
+
+	/* If no cached value was found, look for an empty slot to store the recent value */
+	for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
+		if (ud->previous->threads[i].tid == NULL) {
+			ud->previous->threads[i].tid = tid;
+			ud->previous->threads[i].cycles = thread_stats->execution_cycles;
+			LOG_DBG("%p placed at %d (%s %llu)", tid, i,
+				name, ud->previous->threads[i].cycles);
+			break;
+		}
+	}
+
+	/* If there is more than THREAD_INFO_MAX_THREADS threads in the system */
+	if (i == ARRAY_SIZE(ud->previous->threads))
+		LOG_INF("No place found for %s %p", name, tid);
+
+	/* If there was no previous counter value to compare, return 0 cycles. */
+	return 0;
+}
+
+static uint8_t thread_info_cpu_utilization(struct k_thread *thread,
+					   struct user_data *ud, const char *name)
+{
+	k_thread_runtime_stats_t thread_stats;
+	uint32_t cycles;
+	int ret;
+
+	if (!ud->stats_valid)
+		return 0;
+
+	if (k_thread_runtime_stats_get(thread, &thread_stats) != 0)
+		return 0;
+
+	cycles = get_cycles(thread, &thread_stats, ud, name);
+
+	LOG_DBG("thread %s %u / %u", name, cycles, ud->all_cycles);
+
+	return (uint8_t) ((255llu * cycles) / ud->all_cycles);
+}
+#else
+static uint8_t thread_info_cpu_utilization(struct k_thread *thread,
+					   struct user_data *ud, const char *name)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_THREAD_STACK_INFO
+static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name)
+{
+	size_t stack_size, stack_unused;
+	int ret;
+
+	stack_size = thread->stack_info.size;
+	ret = k_thread_stack_space_get(thread, &stack_unused);
+	if (ret) {
+		LOG_ERR(" %-20s: unable to get stack space (%d)",
+			name, ret);
+		stack_unused = 0;
+	}
+	return (UINT8_MAX * (stack_size - stack_unused)) / stack_size;
+}
+#else
+static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name)
+{
+	return 0;
+}
+#endif
+
+static void thread_info_cb(const struct k_thread *cthread, void *user_data)
+{
+	struct k_thread *thread = (struct k_thread *)cthread;
+	struct user_data *ud = user_data;
+	struct thread_info *thread_info;
+	const char *name;
+
+	if (ud->thread_count > ARRAY_SIZE(ud->core_data->thread)) {
+		LOG_ERR("Thread count %u exceeds the memory window size\n",
+			ud->thread_count++);
+		return;
+	}
+
+	thread_info = &ud->core_data->thread[ud->thread_count];
+
+	name = k_thread_name_get((k_tid_t)thread);
+	if (!name || name[0] == '\0') {
+		snprintk(thread_info->name, sizeof(thread_info->name), "%p",
+			 (void *)thread);
+	} else {
+		strncpy(thread_info->name, name, sizeof(thread_info->name));
+	}
+
+	thread_info->stack_usage = thread_info_stack_level(thread, name);
+	thread_info->cpu_usage = thread_info_cpu_utilization(thread, ud, name);
+
+	LOG_DBG("core %u %s stack %u%% cpu %u%%", ud->core,
+		thread_info->name, thread_info->stack_usage * 100U / 255,
+		thread_info->cpu_usage * 100U / 255);
+
+	ud->thread_count++;
+}
+
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+
+/* Marks cached thread cycle counter entries of removed threads
+ * free. This also happens to threads pinned to other cpu than the
+ * primary. In the beginning they are listed on primary cpu, until the
+ * pinned cpu is started up and the thread is executed for the fist
+ * time and it moves to the cpu its pinned on.
+ */
+static void cleanup_old_thread_cycles(struct user_data *ud)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
+		bool found = false;
+
+		/* This entry is already free, continue */
+		if (ud->previous->threads[i].tid == NULL)
+			continue;
+
+		/* Check if the thread was seen on previous round */
+		for (j = 0; j < ud->thread_count; j++) {
+			if (ud->active_threads[j] == ud->previous->threads[i].tid) {
+				found = true;
+				break;
+			}
+		}
+		/* If the thead is not any more active, mark the entry free */
+		if (!found) {
+			ud->previous->threads[i].tid = NULL;
+			ud->previous->threads[i].cycles = 0;
+		}
+	}
+}
+#else
+static void cleanup_old_thread_cycles(struct user_data *ud) { }
+#endif
+
+static void thread_info_get(int core, struct thread_info_core *core_data)
+{
+	k_thread_runtime_stats_t core_stats;
+	struct user_data ud = {
+		.core = core,
+		.core_data = core_data,
+		.thread_count = 0,
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+		.previous = &previous[core],
+		.active_threads = { NULL },
+#endif
+	};
+	uint8_t load = 0;
+#ifdef CONFIG_THREAD_RUNTIME_STATS
+	int ret = k_thread_runtime_stats_cpu_get(core, &core_stats);
+
+	if (ret == 0) {
+		uint32_t active_cycles = (uint32_t) (core_stats.total_cycles -
+						     ud.previous->active);
+		uint32_t all_cycles = (uint32_t) (core_stats.execution_cycles -
+						  ud.previous->all);
+
+		ud.stats_valid = true;
+		load = (uint8_t) ((255LLU * active_cycles) / all_cycles);
+		LOG_DBG("Core %u load %u / %u total %llu / %llu", core,
+			active_cycles, all_cycles,
+			core_stats.total_cycles, core_stats.execution_cycles);
+		ud.previous->active = core_stats.total_cycles;
+		ud.previous->all = core_stats.execution_cycles;
+		ud.all_cycles = all_cycles;
+	}
+#endif
+	core_data->state = THREAD_INFO_STATE_BEING_UPDATED;
+
+	core_data->load = load;
+	/* This is best effort debug tool. Unlocked version should be fine. */
+	k_thread_foreach_unlocked_filter_by_cpu(core, thread_info_cb, &ud);
+
+	cleanup_old_thread_cycles(&ud);
+
+	core_data->counter++;
+	core_data->thread_count = ud.thread_count;
+	core_data->state = THREAD_INFO_STATE_UPTODATE;
+}
+
+static void thread_info_run(void *data, void *cnum, void *a)
+{
+	struct thread_info_chunk *chunk = data;
+	int core = (int) cnum;
+	struct thread_info_core *core_data = &chunk->core[core];
+
+	for (;;) {
+		thread_info_get(core, core_data);
+		k_sleep(K_SECONDS(CONFIG_SOF_TELEMETRY2_THREAD_INFO_INTERVAL));
+	}
+}
+
+static struct thread_info_chunk *thread_info_init(void)
+{
+	struct thread_info_chunk *chunk;
+	int i;
+
+	chunk = telemetry2_chunk_get(TELEMETRY2_ID_THREAD_INFO,
+				     sizeof(*chunk));
+	if (!chunk)
+		return NULL;
+
+	chunk->core_count = CONFIG_MP_MAX_NUM_CPUS;
+	for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++)
+		chunk->core_offset[i] = offsetof(struct thread_info_chunk, core[i]);
+
+	return chunk;
+}
+
+static K_THREAD_STACK_ARRAY_DEFINE(info_thread_stacks, CONFIG_MP_MAX_NUM_CPUS,
+				   1024);
+static struct k_thread info_thread[CONFIG_MP_MAX_NUM_CPUS];
+
+static int thread_info_start(void)
+{
+	struct thread_info_chunk *chunk = thread_info_init();
+	uint32_t i;
+
+	if (!chunk)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(info_thread); i++) {
+		char name[24];
+		k_tid_t tid = NULL;
+		int ret;
+
+		tid = k_thread_create(&info_thread[i], info_thread_stacks[i],
+				      1024,
+				      thread_info_run,
+				      chunk, (void *) i, NULL,
+				      K_LOWEST_APPLICATION_THREAD_PRIO, 0, K_FOREVER);
+		if (!tid) {
+			LOG_ERR("k_thread_create() failed for core %u", i);
+			continue;
+		}
+		ret = k_thread_cpu_pin(tid, i);
+		if (ret < 0) {
+			LOG_ERR("Pinning thread to code core %u", i);
+			k_thread_abort(tid);
+			continue;
+		}
+		snprintf(name, sizeof(name), "%u thread info", i);
+		ret = k_thread_name_set(tid, name);
+		if (ret < 0)
+			LOG_INF("k_thread_name_set failed: %d for %u", ret, i);
+
+		k_thread_start(tid);
+		LOG_DBG("Thread %p for core %u started", tid, i);
+	}
+
+	return 0;
+}
+
+SYS_INIT(thread_info_start, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);
diff --git a/tools/telemetry2/thread-info.py b/tools/telemetry2/thread-info.py
new file mode 100644
index 000000000000..c5c5e5783361
--- /dev/null
+++ b/tools/telemetry2/thread-info.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Copyright (c) 2024, Intel Corporation.
+
+import argparse
+import mmap
+import ctypes
+import time
+import sys
+
+TELEMETRY2_PAYLOAD_MAGIC = 0x1ED15EED
+
+TELEMETRY2_CHUNK_ID_EMPTY = 0
+TELEMETRY2_ID_THREAD_INFO = 1
+
+TELEMETRY2_PAYLOAD_V0_0 = 0
+
+#telemetry2 payload
+class Telemetry2PayloadHdr(ctypes.Structure):
+	_fields_ = [
+		("magic", ctypes.c_uint),
+		("hdr_size", ctypes.c_uint),
+		("total_size", ctypes.c_uint),
+		("abi", ctypes.c_uint),
+		("tstamp", ctypes.c_ulonglong)
+	]
+
+# telemetry2 header struct
+class Telemetry2ChunkHdr(ctypes.Structure):
+	_fields_ = [("id", ctypes.c_uint), ("size", ctypes.c_uint)]
+
+def get_telemetry2_payload_ok(slot):
+	payload_hdr = ctypes.cast(slot,
+				  ctypes.POINTER(Telemetry2PayloadHdr))
+	if payload_hdr.contents.magic != TELEMETRY2_PAYLOAD_MAGIC:
+		print("Telemetry2 payload header bad magic 0x%x\n" %
+		      payload_hdr.contents.magic)
+		return False
+	if payload_hdr.contents.abi != TELEMETRY2_PAYLOAD_V0_0:
+		print("Unknown Telemetry2 abi version %u\n" %
+		      payload_hdr.contents.abi)
+	return True
+
+def get_telemetry2_chunk_offset(id, slot):
+	"""
+	Generic function to get telmetry2 chunk offset by type
+	"""
+	offset = 0
+	while True:
+		struct_ptr = ctypes.cast(slot[offset:],
+					 ctypes.POINTER(Telemetry2ChunkHdr))
+		#print("Checking %u id 0x%x size %u\n" %
+		#      (offset, struct_ptr.contents.id, struct_ptr.contents.size))
+		if struct_ptr.contents.id == id:
+			return offset
+		if struct_ptr.contents.size == 0:
+			return -1
+		offset = offset + struct_ptr.contents.size
+
+class ThreadInfo(ctypes.Structure):
+	_pack_ = 1
+	_fields_ = [
+		("name", ctypes.c_char * 14),
+		("stack_usage", ctypes.c_ubyte),
+		("cpu_usage", ctypes.c_ubyte),
+	]
+
+class CPUInfo(ctypes.Structure):
+	_pack_ = 1
+	_fields_ = [
+		("state", ctypes.c_ubyte),
+		("counter", ctypes.c_ubyte),
+		("load", ctypes.c_ubyte),
+		("thread_count", ctypes.c_ubyte),
+		("thread", ThreadInfo * 16),
+	]
+
+class ThreadInfoChunk(ctypes.Structure):
+	_pack_ = 1
+	_fields_ = [
+		("hdr", Telemetry2ChunkHdr ),
+		("core_count", ctypes.c_ushort),
+		("core_offsets", ctypes.c_ushort * 16),
+	]
+
+class CoreData:
+	""" Class for tracking thread analyzer info for one core """
+	counter = 0
+	offset = 0
+	core = 0
+	STATE_UNINITIALIZED = 0
+	STATE_BEING_UPDATED = 1
+	STATE_UPTODATE = 2
+
+	def __init__(self, offset, core):
+		self.counter = 0
+		self.offset = offset
+		self.core = core
+
+	def print(self, slot):
+		cpu_info = ctypes.cast(slot[self.offset:],
+				       ctypes.POINTER(CPUInfo))
+		if self.counter == cpu_info.contents.counter:
+			return
+		if cpu_info.contents.state != self.STATE_UPTODATE:
+			return
+		self.counter = cpu_info.contents.counter
+		print("Core %d load %02.1f%%" %
+		      (self.core, cpu_info.contents.load / 2.55))
+		for i in range(cpu_info.contents.thread_count):
+			thread = cpu_info.contents.thread[i]
+			print("\t%-20s cpu %02.1f%% stack %02.1f%%" %
+			       (thread.name.decode('utf-8'), thread.cpu_usage / 2.55,
+				thread.stack_usage / 2.55));
+
+class Telemetry2ThreadAnalyzerDecoder:
+	"""
+	Class for finding thread analyzer chuck and initializing CoreData objects.
+	"""
+	file_size = 4096 # ADSP debug slot size
+	f = None
+	chunk_offset = -1
+	chunk = None
+	core_data = []
+
+	def set_file(self, f):
+		self.f = f
+
+	def decode_chunk(self):
+		if self.chunk != None:
+			return
+		self.f.seek(0)
+		slot = self.f.read(self.file_size)
+		if not get_telemetry2_payload_ok(slot):
+			return
+		self.chunk_offset = get_telemetry2_chunk_offset(TELEMETRY2_ID_THREAD_INFO, slot)
+		if self.chunk_offset < 0:
+			print("Thread info chunk not found")
+			self.chunk = None
+			return
+		self.chunk = ctypes.cast(slot[self.chunk_offset:],
+					 ctypes.POINTER(ThreadInfoChunk))
+		hdr = self.chunk.contents.hdr
+		#print(f"Chunk at {self.chunk_offset} id: {hdr.id} size: {hdr.size}")
+		print(f"core_count: {self.chunk.contents.core_count}")
+
+	def init_core_data(self):
+		if self.chunk == None:
+			return
+		if len(self.core_data) == self.chunk.contents.core_count:
+			return
+		self.core_data = [None] * self.chunk.contents.core_count
+		for i in range(self.chunk.contents.core_count):
+			offset = self.chunk_offset + self.chunk.contents.core_offsets[i]
+			self.core_data[i] = CoreData(offset, i)
+			#print(f"core_offsets {i}: {self.chunk.contents.core_offsets[i]}")
+
+	def loop_cores(self):
+		if len(self.core_data) == 0:
+			return
+		self.f.seek(0)
+		slot = self.f.read(self.file_size)
+		for i in range(len(self.core_data)):
+			self.core_data[i].print(slot)
+
+	def reset(self):
+		self.f = None
+		self.chunk = None
+
+	def print_status(self, tag):
+		print(f"{tag} f {self.f} offset {self.chunk_offset} chunk {self.chunk} cores {len(self.core_data)}")
+
+def main_f(args):
+	"""
+	Map telemetry2 slot into memorey and find the chunk we are interested in
+	"""
+	decoder = Telemetry2ThreadAnalyzerDecoder()
+	prev_error = None
+	while True:
+		try:
+			with open(args.telemetry2_file, "rb") as f:
+				decoder.set_file(f)
+				decoder.decode_chunk()
+				decoder.init_core_data()
+				while True:
+					decoder.loop_cores()
+					time.sleep(args.update_interval)
+		except FileNotFoundError:
+			print(f"File {args.telemetry2_file} not found!")
+			break
+		except OSError as e:
+			if str(e) != prev_error:
+				print(f"Open {args.telemetry2_file} failed '{e}'")
+				prev_error = str(e)
+			decoder.reset()
+		time.sleep(args.update_interval)
+
+def parse_params():
+	""" Parses parameters
+	"""
+	parser = argparse.ArgumentParser(description=
+					 "SOF Telemetry2 thread info client. "+
+					 "Opens telemetry2 slot debugfs file, looks"+
+					 " for thread info chunk, and decodes its "+
+					 "contentes into stdout periodically.")
+	parser.add_argument('-t', '--update-interval', type=float,
+			    help='Telemetry2 window polling interval in seconds, default 1',
+                            default=1)
+	parser.add_argument('-f', '--telemetry2-file',
+			    help='File to read the telemetry2 data from, default /sys/kernel/debug/sof/telemetry2',
+                            default="/sys/kernel/debug/sof/telemetry2")
+	parsed_args = parser.parse_args()
+	return parsed_args
+
+args = parse_params()
+main_f(args)