diff --git a/app/boards/intel_adsp_ace15_mtpm.conf b/app/boards/intel_adsp_ace15_mtpm.conf index 5923272fa986..bc7931633fc4 100644 --- a/app/boards/intel_adsp_ace15_mtpm.conf +++ b/app/boards/intel_adsp_ace15_mtpm.conf @@ -79,7 +79,7 @@ CONFIG_INTEL_ADSP_IPC=y CONFIG_WATCHDOG=y CONFIG_LL_WATCHDOG=y -CONFIG_MEMORY_WIN_2_SIZE=12288 +CONFIG_MEMORY_WIN_2_SIZE=16384 # Temporary disabled options CONFIG_TRACE=n diff --git a/app/boards/intel_adsp_ace20_lnl.conf b/app/boards/intel_adsp_ace20_lnl.conf index 88a8e6522cfe..dff8260f1f33 100644 --- a/app/boards/intel_adsp_ace20_lnl.conf +++ b/app/boards/intel_adsp_ace20_lnl.conf @@ -74,7 +74,7 @@ CONFIG_INTEL_ADSP_IPC=y CONFIG_PROBE=y CONFIG_PROBE_DMA_MAX=2 -CONFIG_MEMORY_WIN_2_SIZE=12288 +CONFIG_MEMORY_WIN_2_SIZE=16384 # Temporary disabled options diff --git a/app/prj.conf b/app/prj.conf index 5a681ac5b74b..88e4f33e66b4 100644 --- a/app/prj.conf +++ b/app/prj.conf @@ -57,3 +57,10 @@ CONFIG_HEAP_MEM_POOL_SIZE=2048 CONFIG_LLEXT=y CONFIG_LLEXT_STORAGE_WRITABLE=y CONFIG_MODULES=y + +#Thread info + +CONFIG_SOF_TELEMETRY2_SLOT=y +CONFIG_SOF_TELEMETRY2_THREAD_INFO=y +CONFIG_THREAD_NAME=y +CONFIG_MEMORY_WIN_2_SIZE=16384 diff --git a/src/debug/telemetry/CMakeLists.txt b/src/debug/telemetry/CMakeLists.txt index f2249490db60..318d349778a6 100644 --- a/src/debug/telemetry/CMakeLists.txt +++ b/src/debug/telemetry/CMakeLists.txt @@ -1,3 +1,7 @@ # SPDX-License-Identifier: BSD-3-Clause add_local_sources_ifdef(CONFIG_SOF_TELEMETRY sof telemetry.c) + +add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_SLOT sof telemetry2_slot.c) + +add_local_sources_ifdef(CONFIG_SOF_TELEMETRY2_THREAD_INFO sof telemetry2_thread_info.c) diff --git a/src/debug/telemetry/Kconfig b/src/debug/telemetry/Kconfig index 54f4b66f590d..653f5cab689c 100644 --- a/src/debug/telemetry/Kconfig +++ b/src/debug/telemetry/Kconfig @@ -9,3 +9,46 @@ config SOF_TELEMETRY systick_info measurement which measures scheduler task performance and may slightly affect overall performance. +config SOF_TELEMETRY2_SLOT + bool "Enable SOF telemetry2 slot in debug window" + help + TELEMETRY2 is a generic memory structure for passing real-time debug + information from the SOF system. It only provides the framework for + passing pieces of abstract data from DSP side to host side debugging + tools. The option enables TELEMETRY2 debug window slot for transferring + telemetry2 data. + +if SOF_TELEMETRY2_SLOT + +config SOF_TELEMETRY2_SLOT_NUMBER + int "Debug window slot where to put telemetry2 slot" + default 3 + range 0 14 + help + Which debug slot to reserve for telemetry2. Remember to map + the slot with MEMORY_WIN_2_SIZE in soc/xtensa/intel_adsp. + +config SOF_TELEMETRY2_THREAD_INFO + bool "Enable thread info chunk in telemetry2 slot" + select INIT_STACKS + select THREAD_MONITOR + select THREAD_STACK_INFO + select THREAD_RUNTIME_STATS + help + This activates Zephyr thread info in telemetry2 slot. For + the thread names to be any more than hex numbers its suggested + to select THREAD_NAME=y too. + +if SOF_TELEMETRY2_THREAD_INFO + +config SOF_TELEMETRY2_THREAD_INFO_INTERVAL + int "Thread information collection interval in seconds" + default 2 + range 1 10 + help + Decides how often thread info runs and checks execution cycle + statistics and stack usage. + +endif + +endif diff --git a/src/debug/telemetry/telemetry2_slot.c b/src/debug/telemetry/telemetry2_slot.c new file mode 100644 index 000000000000..1462e870dd26 --- /dev/null +++ b/src/debug/telemetry/telemetry2_slot.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2024 Intel Corporation. + +#include +#include +#include +#include +#include +#include "telemetry2_slot.h" + +LOG_MODULE_REGISTER(telemetry2_slot); + +static struct k_spinlock lock; + +static +struct telemetry2_chunk_hdr *telemetry2_chunk_search(struct telemetry2_chunk_hdr *chunk, + int id) +{ + uint8_t *p; + + /* Loop through the chunks and look for 'id'. If found leave a + * pointer to it in 'chunk', if not leave 'chunk' pointing at + * the beginning of the free space. + */ + while (chunk->id != id && chunk->size != 0) { + p = (((uint8_t *) chunk) + chunk->size); + + chunk = (struct telemetry2_chunk_hdr *) p; + } + return chunk; +} + +/* + * Search, or if not found reserve, a chunk of specified size and id from + * telemetry2 slot. Each chuck must be aligned with a 64-byte cache line. + * A pointer to the chunk is returned. + */ +void *telemetry2_chunk_get(int id, size_t required_size) +{ + const int slot = CONFIG_SOF_TELEMETRY2_SLOT_NUMBER; + struct telemetry2_payload_hdr *payload = + (struct telemetry2_payload_hdr *)(ADSP_DW->slots[slot]); + size_t hdr_size = ALIGN_UP(sizeof(*payload), CONFIG_DCACHE_LINE_SIZE); + struct telemetry2_chunk_hdr *chunk = (struct telemetry2_chunk_hdr *) + (((uint8_t *) payload) + hdr_size); + size_t size = ALIGN_UP(required_size, CONFIG_DCACHE_LINE_SIZE); + k_spinlock_key_t key; + + if (ADSP_DW->descs[slot].type != ADSP_DW_SLOT_TELEMETRY2) { + if (ADSP_DW->descs[slot].type != 0) + LOG_WRN("Slot %d was not free: %u", slot, ADSP_DW->descs[slot].type); + LOG_INF("Initializing telemetry2 slot 0x%08x", ADSP_DW->descs[slot].type); + ADSP_DW->descs[slot].type = ADSP_DW_SLOT_TELEMETRY2; + payload->hdr_size = hdr_size; + payload->magic = TELEMETRY2_PAYLOAD_MAGIC; + payload->abi = TELEMETRY2_PAYLOAD_V0_0; + } + + LOG_INF("Add id %u size %u (after alignment %u)", id, required_size, size); + chunk = telemetry2_chunk_search(chunk, id); + if (id == chunk->id) + goto match_found; + + /* End of chunk list reached, but specified chunk not found. We + * need to reseeve one, so take the spin lock and check if more + * chunks were added since the previus search. + */ + key = k_spin_lock(&lock); + chunk = telemetry2_chunk_search(chunk, id); + /* Check if some other thread beat us to add the chunk we want */ + if (id == chunk->id) { + k_spin_unlock(&lock, key); + goto match_found; + } + + /* If the chunk was not found, reserve space for it after + * the last reserved chunk. + */ + if (((uint8_t *) chunk) + size >= ADSP_DW->slots[slot + 1]) { + LOG_ERR("No space for chunk %u of size %u in slot %u, offset %u", + id, size, slot, ((uint8_t *) chunk) - ADSP_DW->slots[slot]); + k_spin_unlock(&lock, key); + return NULL; + } + + if (chunk->id != TELEMETRY2_CHUNK_ID_EMPTY) + LOG_WRN("Chunk of size %u has type %u, assuming empty", + chunk->size, chunk->id); + + LOG_INF("Chunk %u reserved", id); + chunk->size = size; + chunk->id = id; + payload->total_size = size + ((uint8_t *) chunk) - ((uint8_t *) payload); + + k_spin_unlock(&lock, key); + + return chunk; + +match_found: + /* If a matching chunk was found check that the size is enough + * for what was requested. If yes return the value, if not + * return NULL. + */ + if (required_size > chunk->size) { + LOG_ERR("Chunk %d size too small %u > %u", + id, required_size, chunk->size); + return NULL; + } + LOG_INF("Chunk %u found", id); + return chunk; +} diff --git a/src/debug/telemetry/telemetry2_slot.h b/src/debug/telemetry/telemetry2_slot.h new file mode 100644 index 000000000000..49483c938b45 --- /dev/null +++ b/src/debug/telemetry/telemetry2_slot.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * + * Copyright(c) 2024 Intel Corporation. + */ + +#ifndef __SOC_TELEMETRY2_SLOT_H__ +#define __SOC_TELEMETRY2_SLOT_H__ + +/* + * TELEMETRY2 is a generic memory structure for passing real-time debug + * information from the SOF system. It only provides the framework for + * passing pieces of abstract data from DSP side to host side debugging + * tools. + * + * This example describes how TELEMETRY2 data is transferred from DSP + * to host side using debug memory window. + * + * TELEMETRY2 slot is taken from SRAM window divided into several + * chunks with simple header declared below. + * + * "type" describes the contents of the chunk + * "size" describes the size of the chunk in bytes + * + * After each chunk another chunk is expected to follow, until an + * empty chunk header is found. If chunk type is 0 and size 0 it + * indicates an empty header, and that the rest of the slot is unused. + * A new chunk, with the header, can be reserved stating at the end of + * previous chunk (which has been aligned to the end of cache line). + * + * Each chunk is aligned to start at 64-byte cache line boundary. + * + * For example: + * -------------------------------------------------- --- + * | magic = TELEMETRY2_PAYLOAD_MAGIC | | + * | hdr_size = 64 | | + * | total_size = 320 | | + * | abi = TELEMETRY2_PAYLOAD_V0_0 | 64 bytes + * | tstamp = | | + * | | | + * -------------------------------------------------- --- + * | type = ADSP_DW_TELEMETRY2_ID_THREAD_INFO | | + * | size = 256 | 256 bytes + * | chunk data | | + * | ... | | + * -------------------------------------------------- --- + * | type = ADSP_DW_TELEMETRY2_TYPE_EMPTY | + * | size = 0 | + * -------------------------------------------------- + * + * The above depicts a telemetry2 slot with thread analyzer data + * of 256 bytes (including the header) in the first chunk and + * the rest of the slot being free. + */ + +#include + +// HACK: This should be in zephyr/soc/intel/intel_adsp/common/include/adsp_debug_window.h +#define ADSP_DW_SLOT_TELEMETRY2 0x4c455500 + +#define TELEMETRY2_PAYLOAD_MAGIC 0x1ED15EED + +#define TELEMETRY2_PAYLOAD_V0_0 0 + +/* Telemetry2 payload header + * + * The payload header should be written in the beginning of the + * telemetry2 payload, before the chunks. + */ +struct telemetry2_payload_hdr { + uint32_t magic; // used to identify valid data + uint32_t hdr_size; // size of this header only in bytes + uint32_t total_size; // total size of the whole payload in bytes + uint32_t abi; // ABI version, can be used by tool to warn users if too old. + uint64_t tstamp; // aligned with host epoch. +} __packed __aligned(CONFIG_DCACHE_LINE_SIZE); + +/* Defined telemetry2 chunk types */ +#define TELEMETRY2_CHUNK_ID_EMPTY 0 +#define TELEMETRY2_ID_THREAD_INFO 1 + +/* Telemetry2 chunk header */ +struct telemetry2_chunk_hdr { + uint32_t id; // Chunk ID + uint32_t size; // Size of telemetry chunk +} __packed; + +void *telemetry2_chunk_get(int id, size_t size); + +#endif /* __SOC_TELEMETRY2_SLOT_H__ */ diff --git a/src/debug/telemetry/telemetry2_thread_info.c b/src/debug/telemetry/telemetry2_thread_info.c new file mode 100644 index 000000000000..723bd5096a17 --- /dev/null +++ b/src/debug/telemetry/telemetry2_thread_info.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2024 Intel Corporation. + +#include +#include +#include +#include +#include + +#include "telemetry2_slot.h" + +LOG_MODULE_REGISTER(thread_info); + +#define THREAD_INFO_MAX_THREADS 16 + +struct thread_info { + char name[14]; + uint8_t stack_usage; /* Relative stack usage U(0,8) fixed point value */ + uint8_t cpu_usage; /* Relative cpu usage U(0,8) fixed point value */ +} __packed; + +#define THREAD_INFO_VERSION_0_0 0 + +#define THREAD_INFO_STATE_UNINITIALIZED 0 +#define THREAD_INFO_STATE_BEING_UPDATED 1 +#define THREAD_INFO_STATE_UPTODATE 2 + +/* Core specific data, updated each round, including the thread table. */ +struct thread_info_core { + uint8_t state; /* indicates if core data is in consistent state */ + uint8_t counter; /* incremented every round */ + uint8_t load; /* Core's load U(0,8) fixed point value */ + uint8_t thread_count; + struct thread_info thread[THREAD_INFO_MAX_THREADS]; +} __packed __aligned(CONFIG_DCACHE_LINE_SIZE); + +/* Thread info telemetry2 chunk header. Only initialized at first + * thread info thread start. Should be used to find the core specific + * sections where the thread info is when decoding. + */ +struct thread_info_chunk { + struct telemetry2_chunk_hdr hdr; + uint16_t core_count; + uint16_t core_offset[CONFIG_MP_MAX_NUM_CPUS]; + struct thread_info_core __aligned(CONFIG_DCACHE_LINE_SIZE) core[CONFIG_MP_MAX_NUM_CPUS]; +} __packed; + +#ifdef CONFIG_THREAD_RUNTIME_STATS +/* Data structure to store the cycle counter values from the previous + * round. The numbers are used to calculate what the load was on this + * round. + */ +static struct previous_counters { // Cached data from previous round + uint64_t active; // All execution cycles + uint64_t all; // All cycles including idle + struct thread_counters { + void *tid; // thread ID (the thread struct ptr) + uint64_t cycles; // cycle counter value + } threads[THREAD_INFO_MAX_THREADS]; // The max amount of threads we follow +} previous[CONFIG_MP_MAX_NUM_CPUS]; +#endif + +/* Data structure to be passed down to thread_info_cb() by + * k_thread_foreach_current_cpu(). + */ +struct user_data { + int core; + struct thread_info_core *core_data; + int thread_count; +#ifdef CONFIG_THREAD_RUNTIME_STATS + bool stats_valid; + uint32_t all_cycles; + void *active_threads[THREAD_INFO_MAX_THREADS]; + struct previous_counters *previous; +#endif +}; + +#ifdef CONFIG_THREAD_RUNTIME_STATS + +/* Finds and/or updates cached cycle counter value for 'tid'-thread + * and calculates used execution cycles since previous round and + * returns it. + */ + +static uint32_t get_cycles(void *tid, k_thread_runtime_stats_t *thread_stats, + struct user_data *ud, const char *name) + +{ + int i; + + /* Mark the thread as active */ + ud->active_threads[ud->thread_count] = tid; + /* look for cached value from previous round for 'tid'-thread */ + for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { + if (ud->previous->threads[i].tid == tid) { + /* Calculate number cycles since previous round */ + uint32_t cycles = (uint32_t) (thread_stats->execution_cycles - + ud->previous->threads[i].cycles); + + LOG_DBG("%p found at %d (%s %llu)", tid, i, + name, thread_stats->execution_cycles); + /* update cached value */ + ud->previous->threads[i].cycles = thread_stats->execution_cycles; + return cycles; + } + } + + /* If no cached value was found, look for an empty slot to store the recent value */ + for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { + if (ud->previous->threads[i].tid == NULL) { + ud->previous->threads[i].tid = tid; + ud->previous->threads[i].cycles = thread_stats->execution_cycles; + LOG_DBG("%p placed at %d (%s %llu)", tid, i, + name, ud->previous->threads[i].cycles); + break; + } + } + + /* If there is more than THREAD_INFO_MAX_THREADS threads in the system */ + if (i == ARRAY_SIZE(ud->previous->threads)) + LOG_INF("No place found for %s %p", name, tid); + + /* If there was no previous counter value to compare, return 0 cycles. */ + return 0; +} + +static uint8_t thread_info_cpu_utilization(struct k_thread *thread, + struct user_data *ud, const char *name) +{ + k_thread_runtime_stats_t thread_stats; + uint32_t cycles; + int ret; + + if (!ud->stats_valid) + return 0; + + if (k_thread_runtime_stats_get(thread, &thread_stats) != 0) + return 0; + + cycles = get_cycles(thread, &thread_stats, ud, name); + + LOG_DBG("thread %s %u / %u", name, cycles, ud->all_cycles); + + return (uint8_t) ((255llu * cycles) / ud->all_cycles); +} +#else +static uint8_t thread_info_cpu_utilization(struct k_thread *thread, + struct user_data *ud, const char *name) +{ + return 0; +} +#endif + +#ifdef CONFIG_THREAD_STACK_INFO +static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name) +{ + size_t stack_size, stack_unused; + int ret; + + stack_size = thread->stack_info.size; + ret = k_thread_stack_space_get(thread, &stack_unused); + if (ret) { + LOG_ERR(" %-20s: unable to get stack space (%d)", + name, ret); + stack_unused = 0; + } + return (UINT8_MAX * (stack_size - stack_unused)) / stack_size; +} +#else +static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name) +{ + return 0; +} +#endif + +static void thread_info_cb(const struct k_thread *cthread, void *user_data) +{ + struct k_thread *thread = (struct k_thread *)cthread; + struct user_data *ud = user_data; + struct thread_info *thread_info; + const char *name; + + if (ud->thread_count > ARRAY_SIZE(ud->core_data->thread)) { + LOG_ERR("Thread count %u exceeds the memory window size\n", + ud->thread_count++); + return; + } + + thread_info = &ud->core_data->thread[ud->thread_count]; + + name = k_thread_name_get((k_tid_t)thread); + if (!name || name[0] == '\0') { + snprintk(thread_info->name, sizeof(thread_info->name), "%p", + (void *)thread); + } else { + strncpy(thread_info->name, name, sizeof(thread_info->name)); + } + + thread_info->stack_usage = thread_info_stack_level(thread, name); + thread_info->cpu_usage = thread_info_cpu_utilization(thread, ud, name); + + LOG_DBG("core %u %s stack %u%% cpu %u%%", ud->core, + thread_info->name, thread_info->stack_usage * 100U / 255, + thread_info->cpu_usage * 100U / 255); + + ud->thread_count++; +} + +#ifdef CONFIG_THREAD_RUNTIME_STATS + +/* Marks cached thread cycle counter entries of removed threads + * free. This also happens to threads pinned to other cpu than the + * primary. In the beginning they are listed on primary cpu, until the + * pinned cpu is started up and the thread is executed for the fist + * time and it moves to the cpu its pinned on. + */ +static void cleanup_old_thread_cycles(struct user_data *ud) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) { + bool found = false; + + /* This entry is already free, continue */ + if (ud->previous->threads[i].tid == NULL) + continue; + + /* Check if the thread was seen on previous round */ + for (j = 0; j < ud->thread_count; j++) { + if (ud->active_threads[j] == ud->previous->threads[i].tid) { + found = true; + break; + } + } + /* If the thead is not any more active, mark the entry free */ + if (!found) { + ud->previous->threads[i].tid = NULL; + ud->previous->threads[i].cycles = 0; + } + } +} +#else +static void cleanup_old_thread_cycles(struct user_data *ud) { } +#endif + +static void thread_info_get(int core, struct thread_info_core *core_data) +{ + k_thread_runtime_stats_t core_stats; + struct user_data ud = { + .core = core, + .core_data = core_data, + .thread_count = 0, +#ifdef CONFIG_THREAD_RUNTIME_STATS + .previous = &previous[core], + .active_threads = { NULL }, +#endif + }; + uint8_t load = 0; +#ifdef CONFIG_THREAD_RUNTIME_STATS + int ret = k_thread_runtime_stats_cpu_get(core, &core_stats); + + if (ret == 0) { + uint32_t active_cycles = (uint32_t) (core_stats.total_cycles - + ud.previous->active); + uint32_t all_cycles = (uint32_t) (core_stats.execution_cycles - + ud.previous->all); + + ud.stats_valid = true; + load = (uint8_t) ((255LLU * active_cycles) / all_cycles); + LOG_DBG("Core %u load %u / %u total %llu / %llu", core, + active_cycles, all_cycles, + core_stats.total_cycles, core_stats.execution_cycles); + ud.previous->active = core_stats.total_cycles; + ud.previous->all = core_stats.execution_cycles; + ud.all_cycles = all_cycles; + } +#endif + core_data->state = THREAD_INFO_STATE_BEING_UPDATED; + + core_data->load = load; + /* This is best effort debug tool. Unlocked version should be fine. */ + k_thread_foreach_unlocked_filter_by_cpu(core, thread_info_cb, &ud); + + cleanup_old_thread_cycles(&ud); + + core_data->counter++; + core_data->thread_count = ud.thread_count; + core_data->state = THREAD_INFO_STATE_UPTODATE; +} + +static void thread_info_run(void *data, void *cnum, void *a) +{ + struct thread_info_chunk *chunk = data; + int core = (int) cnum; + struct thread_info_core *core_data = &chunk->core[core]; + + for (;;) { + thread_info_get(core, core_data); + k_sleep(K_SECONDS(CONFIG_SOF_TELEMETRY2_THREAD_INFO_INTERVAL)); + } +} + +static struct thread_info_chunk *thread_info_init(void) +{ + struct thread_info_chunk *chunk; + int i; + + chunk = telemetry2_chunk_get(TELEMETRY2_ID_THREAD_INFO, + sizeof(*chunk)); + if (!chunk) + return NULL; + + chunk->core_count = CONFIG_MP_MAX_NUM_CPUS; + for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) + chunk->core_offset[i] = offsetof(struct thread_info_chunk, core[i]); + + return chunk; +} + +static K_THREAD_STACK_ARRAY_DEFINE(info_thread_stacks, CONFIG_MP_MAX_NUM_CPUS, + 1024); +static struct k_thread info_thread[CONFIG_MP_MAX_NUM_CPUS]; + +static int thread_info_start(void) +{ + struct thread_info_chunk *chunk = thread_info_init(); + uint32_t i; + + if (!chunk) + return 0; + + for (i = 0; i < ARRAY_SIZE(info_thread); i++) { + char name[24]; + k_tid_t tid = NULL; + int ret; + + tid = k_thread_create(&info_thread[i], info_thread_stacks[i], + 1024, + thread_info_run, + chunk, (void *) i, NULL, + K_LOWEST_APPLICATION_THREAD_PRIO, 0, K_FOREVER); + if (!tid) { + LOG_ERR("k_thread_create() failed for core %u", i); + continue; + } + ret = k_thread_cpu_pin(tid, i); + if (ret < 0) { + LOG_ERR("Pinning thread to code core %u", i); + k_thread_abort(tid); + continue; + } + snprintf(name, sizeof(name), "%u thread info", i); + ret = k_thread_name_set(tid, name); + if (ret < 0) + LOG_INF("k_thread_name_set failed: %d for %u", ret, i); + + k_thread_start(tid); + LOG_DBG("Thread %p for core %u started", tid, i); + } + + return 0; +} + +SYS_INIT(thread_info_start, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT); diff --git a/tools/telemetry2/thread-info.py b/tools/telemetry2/thread-info.py new file mode 100644 index 000000000000..c5c5e5783361 --- /dev/null +++ b/tools/telemetry2/thread-info.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: BSD-3-Clause +# +# Copyright (c) 2024, Intel Corporation. + +import argparse +import mmap +import ctypes +import time +import sys + +TELEMETRY2_PAYLOAD_MAGIC = 0x1ED15EED + +TELEMETRY2_CHUNK_ID_EMPTY = 0 +TELEMETRY2_ID_THREAD_INFO = 1 + +TELEMETRY2_PAYLOAD_V0_0 = 0 + +#telemetry2 payload +class Telemetry2PayloadHdr(ctypes.Structure): + _fields_ = [ + ("magic", ctypes.c_uint), + ("hdr_size", ctypes.c_uint), + ("total_size", ctypes.c_uint), + ("abi", ctypes.c_uint), + ("tstamp", ctypes.c_ulonglong) + ] + +# telemetry2 header struct +class Telemetry2ChunkHdr(ctypes.Structure): + _fields_ = [("id", ctypes.c_uint), ("size", ctypes.c_uint)] + +def get_telemetry2_payload_ok(slot): + payload_hdr = ctypes.cast(slot, + ctypes.POINTER(Telemetry2PayloadHdr)) + if payload_hdr.contents.magic != TELEMETRY2_PAYLOAD_MAGIC: + print("Telemetry2 payload header bad magic 0x%x\n" % + payload_hdr.contents.magic) + return False + if payload_hdr.contents.abi != TELEMETRY2_PAYLOAD_V0_0: + print("Unknown Telemetry2 abi version %u\n" % + payload_hdr.contents.abi) + return True + +def get_telemetry2_chunk_offset(id, slot): + """ + Generic function to get telmetry2 chunk offset by type + """ + offset = 0 + while True: + struct_ptr = ctypes.cast(slot[offset:], + ctypes.POINTER(Telemetry2ChunkHdr)) + #print("Checking %u id 0x%x size %u\n" % + # (offset, struct_ptr.contents.id, struct_ptr.contents.size)) + if struct_ptr.contents.id == id: + return offset + if struct_ptr.contents.size == 0: + return -1 + offset = offset + struct_ptr.contents.size + +class ThreadInfo(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ("name", ctypes.c_char * 14), + ("stack_usage", ctypes.c_ubyte), + ("cpu_usage", ctypes.c_ubyte), + ] + +class CPUInfo(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ("state", ctypes.c_ubyte), + ("counter", ctypes.c_ubyte), + ("load", ctypes.c_ubyte), + ("thread_count", ctypes.c_ubyte), + ("thread", ThreadInfo * 16), + ] + +class ThreadInfoChunk(ctypes.Structure): + _pack_ = 1 + _fields_ = [ + ("hdr", Telemetry2ChunkHdr ), + ("core_count", ctypes.c_ushort), + ("core_offsets", ctypes.c_ushort * 16), + ] + +class CoreData: + """ Class for tracking thread analyzer info for one core """ + counter = 0 + offset = 0 + core = 0 + STATE_UNINITIALIZED = 0 + STATE_BEING_UPDATED = 1 + STATE_UPTODATE = 2 + + def __init__(self, offset, core): + self.counter = 0 + self.offset = offset + self.core = core + + def print(self, slot): + cpu_info = ctypes.cast(slot[self.offset:], + ctypes.POINTER(CPUInfo)) + if self.counter == cpu_info.contents.counter: + return + if cpu_info.contents.state != self.STATE_UPTODATE: + return + self.counter = cpu_info.contents.counter + print("Core %d load %02.1f%%" % + (self.core, cpu_info.contents.load / 2.55)) + for i in range(cpu_info.contents.thread_count): + thread = cpu_info.contents.thread[i] + print("\t%-20s cpu %02.1f%% stack %02.1f%%" % + (thread.name.decode('utf-8'), thread.cpu_usage / 2.55, + thread.stack_usage / 2.55)); + +class Telemetry2ThreadAnalyzerDecoder: + """ + Class for finding thread analyzer chuck and initializing CoreData objects. + """ + file_size = 4096 # ADSP debug slot size + f = None + chunk_offset = -1 + chunk = None + core_data = [] + + def set_file(self, f): + self.f = f + + def decode_chunk(self): + if self.chunk != None: + return + self.f.seek(0) + slot = self.f.read(self.file_size) + if not get_telemetry2_payload_ok(slot): + return + self.chunk_offset = get_telemetry2_chunk_offset(TELEMETRY2_ID_THREAD_INFO, slot) + if self.chunk_offset < 0: + print("Thread info chunk not found") + self.chunk = None + return + self.chunk = ctypes.cast(slot[self.chunk_offset:], + ctypes.POINTER(ThreadInfoChunk)) + hdr = self.chunk.contents.hdr + #print(f"Chunk at {self.chunk_offset} id: {hdr.id} size: {hdr.size}") + print(f"core_count: {self.chunk.contents.core_count}") + + def init_core_data(self): + if self.chunk == None: + return + if len(self.core_data) == self.chunk.contents.core_count: + return + self.core_data = [None] * self.chunk.contents.core_count + for i in range(self.chunk.contents.core_count): + offset = self.chunk_offset + self.chunk.contents.core_offsets[i] + self.core_data[i] = CoreData(offset, i) + #print(f"core_offsets {i}: {self.chunk.contents.core_offsets[i]}") + + def loop_cores(self): + if len(self.core_data) == 0: + return + self.f.seek(0) + slot = self.f.read(self.file_size) + for i in range(len(self.core_data)): + self.core_data[i].print(slot) + + def reset(self): + self.f = None + self.chunk = None + + def print_status(self, tag): + print(f"{tag} f {self.f} offset {self.chunk_offset} chunk {self.chunk} cores {len(self.core_data)}") + +def main_f(args): + """ + Map telemetry2 slot into memorey and find the chunk we are interested in + """ + decoder = Telemetry2ThreadAnalyzerDecoder() + prev_error = None + while True: + try: + with open(args.telemetry2_file, "rb") as f: + decoder.set_file(f) + decoder.decode_chunk() + decoder.init_core_data() + while True: + decoder.loop_cores() + time.sleep(args.update_interval) + except FileNotFoundError: + print(f"File {args.telemetry2_file} not found!") + break + except OSError as e: + if str(e) != prev_error: + print(f"Open {args.telemetry2_file} failed '{e}'") + prev_error = str(e) + decoder.reset() + time.sleep(args.update_interval) + +def parse_params(): + """ Parses parameters + """ + parser = argparse.ArgumentParser(description= + "SOF Telemetry2 thread info client. "+ + "Opens telemetry2 slot debugfs file, looks"+ + " for thread info chunk, and decodes its "+ + "contentes into stdout periodically.") + parser.add_argument('-t', '--update-interval', type=float, + help='Telemetry2 window polling interval in seconds, default 1', + default=1) + parser.add_argument('-f', '--telemetry2-file', + help='File to read the telemetry2 data from, default /sys/kernel/debug/sof/telemetry2', + default="/sys/kernel/debug/sof/telemetry2") + parsed_args = parser.parse_args() + return parsed_args + +args = parse_params() +main_f(args)