From 226cf9ddca024900f7997223f5d1f5fc2d67bc21 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 6 Nov 2023 12:11:52 +0100 Subject: [PATCH 1/9] use right PCM_MSR_DRV_NAME to access MSR driver Change-Id: Id636df520e7373679b4302ee7d9f54e755ad43fd --- src/utils.h | 5 ++++- src/windows/windriver.h | 4 +--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/utils.h b/src/utils.h index fcbc67a7..f80478df 100644 --- a/src/utils.h +++ b/src/utils.h @@ -539,9 +539,12 @@ inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) } #ifdef _MSC_VER + +#define PCM_MSR_DRV_NAME TEXT("\\\\.\\RDMSR") + inline HANDLE openMSRDriver() { - return CreateFile(TEXT("\\\\.\\RDMSR"), GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); + return CreateFile(PCM_MSR_DRV_NAME, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); } #endif diff --git a/src/windows/windriver.h b/src/windows/windriver.h index add9b9b4..c7b10140 100644 --- a/src/windows/windriver.h +++ b/src/windows/windriver.h @@ -100,9 +100,7 @@ class Driver { if (0 != StartService(hService, 0, NULL)) { - tstring convDriverName(&driverName_[0]); - tstring driverPath = TEXT("\\\\.\\") + convDriverName; - restrictDriverAccess(driverPath.c_str()); + restrictDriverAccess(PCM_MSR_DRV_NAME); return true; } DWORD err = GetLastError(); From 73b2daed63fe96d37bcee58461c565f253560680 Mon Sep 17 00:00:00 2001 From: Pavithran Pandiyan Date: Tue, 31 Oct 2023 11:12:21 +0100 Subject: [PATCH 2/9] pcm accel grafana support address comments Update pcm-accel-common.h Update pcm-accel-common.h Update pcm-sensor-server.cpp fix clangscan issues fix actions & comments Support for Accelerators in pcm-grafana dashboard --- src/CMakeLists.txt | 4 +- src/cpucounters.cpp | 22 ++ src/cpucounters.h | 30 ++- src/dashboard.cpp | 36 ++- src/pcm-accel-common.cpp | 456 +++++++++++++++++++++++++++++++++++ src/pcm-accel-common.h | 158 ++++++++++++ src/pcm-accel.cpp | 489 +++----------------------------------- src/pcm-sensor-server.cpp | 130 +++++++++- src/topology.cpp | 3 +- tests/CMakeLists.txt | 2 +- 10 files changed, 863 insertions(+), 467 deletions(-) create mode 100644 src/pcm-accel-common.cpp create mode 100644 src/pcm-accel-common.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c2c1d304..107136e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ # All pcm-* executables set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) -file(GLOB COMMON_SOURCES msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) +file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) if (APPLE) file(GLOB UNUX_SOURCES dashboard.cpp) @@ -143,6 +143,8 @@ foreach(PROJECT_NAME ${PROJECT_NAMES}) file(READ pcm-sensor-server.service.in SENSOR_SERVICE_IN) string(REPLACE "@@CMAKE_INSTALL_SBINDIR@@" "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_SBINDIR}" SENSOR_SERVICE "${SENSOR_SERVICE_IN}") file(WRITE "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" "${SENSOR_SERVICE}") + file(GLOB PROJECT_FILE ${PROJECT_NAME}.cpp pcm-accel-common.h pcm-accel-common.cpp) + target_include_directories(pcm-sensor-server PUBLIC ${CMAKE_SOURCE_DIR}) if(LINUX_SYSTEMD) install(FILES "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" DESTINATION "${LINUX_SYSTEMD_UNITDIR}") endif(LINUX_SYSTEMD) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 2908f235..0f669469 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2737,6 +2737,8 @@ PCM::PCM() : num_phys_cores_per_socket(0), num_online_cores(0), num_online_sockets(0), + accel(0), + accel_counters_num_max(0), core_gen_counter_num_max(0), core_gen_counter_num_used(0), // 0 means no core gen counters used core_gen_counter_width(0), @@ -6480,6 +6482,26 @@ uint32 PCM::getNumSockets() const return (uint32)num_sockets; } +uint32 PCM::getAccel() const +{ + return accel; +} + +void PCM::setAccel(uint32 input) +{ + accel = input; +} + +uint32 PCM::getNumberofAccelCounters() const +{ + return accel_counters_num_max; +} + +void PCM::setNumberofAccelCounters(uint32 input) +{ + accel_counters_num_max = input; +} + uint32 PCM::getNumOnlineSockets() const { return (uint32)num_online_sockets; diff --git a/src/cpucounters.h b/src/cpucounters.h index 3c3bf346..eba20082 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -605,6 +605,8 @@ class PCM_API PCM int32 num_phys_cores_per_socket; int32 num_online_cores; int32 num_online_sockets; + uint32 accel; + uint32 accel_counters_num_max; uint32 core_gen_counter_num_max; uint32 core_gen_counter_num_used; uint32 core_gen_counter_width; @@ -1506,9 +1508,29 @@ class PCM_API PCM \return Number of sockets in the system */ uint32 getNumSockets() const; + + /*! \brief Reads the accel type in the system + \return acceltype + */ + uint32 getAccel() const; + + /*! \brief Sets the accel type in the system + \return acceltype + */ + void setAccel(uint32 input); + + /*! \brief Reads the Number of AccelCounters in the system + \return None + */ + uint32 getNumberofAccelCounters() const; + + /*! \brief Sets the Number of AccelCounters in the system + \return number of counters + */ + void setNumberofAccelCounters(uint32 input); /*! \brief Reads number of online sockets (CPUs) in the system - \return Number of online sockets in the system + \return Number of online sockets in the system */ uint32 getNumOnlineSockets() const; @@ -3374,6 +3396,11 @@ class SystemCounterState : public SocketCounterState } public: + typedef uint32_t h_id; + typedef uint32_t v_id; + typedef std::map,uint64_t> ctr_data; + typedef std::vector dev_content; + std::vector accel_counters; std::vector CXLWriteMem,CXLWriteCache; friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & before, const SystemCounterState & after); friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & now); @@ -3385,6 +3412,7 @@ class SystemCounterState : public SocketCounterState uncoreTSC(0) { PCM * m = PCM::getInstance(); + accel_counters.resize(m->getNumberofAccelCounters()); CXLWriteMem.resize(m->getNumSockets(),0); CXLWriteCache.resize(m->getNumSockets(),0); incomingQPIPackets.resize(m->getNumSockets(), diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 4ed3e756..55f17567 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -4,7 +4,8 @@ #include #include #include -#include "cpucounters.h" + +#include "pcm-accel-common.h" #include "dashboard.h" namespace pcm { @@ -515,6 +516,12 @@ std::string influxDBCore_Aggregate_Core_Counters(const std::string& S, const std return influxDB_Counters(S, m, "Core Aggregate_Core Counters"); } +std::string influxDBAccel_Counters(const std::string& S, const std::string& m) +{ + AcceleratorCounterState * accs = AcceleratorCounterState::getInstance(); + return std::string("mean(\\\"Sockets_") + S + "_Accelerators_" +accs->getAccelCounterName()+" Counters Device_" + m + "\\\")"; +} + std::string influxDBCore_Aggregate_Core_Counters(const std::string& m) { return influxDB_Counters(m, "Core Aggregate_Core Counters"); @@ -542,6 +549,7 @@ std::mutex dashboardGenMutex; std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int nc) { auto pcm = PCM::getInstance(); + auto accs = AcceleratorCounterState::getInstance(); std::lock_guard dashboardGenGuard(dashboardGenMutex); const size_t NumSockets = (ns < 0) ? pcm->getNumSockets() : ns; const size_t NumUPILinksPerSocket = (nu < 0) ? pcm->getQPILinksPerSocket() : nu; @@ -783,6 +791,32 @@ std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int dashboard.push(panel); dashboard.push(panel1); } + if (pcm->getAccel() != ACCEL_NOCONFIG){ + auto accelCounters = [&](const std::string & m) + { + auto panel = std::make_shared(0, y, width, height, accs->getAccelCounterName() + " " + m,"Byte/sec", false); + std::shared_ptr panel1; + panel1 = std::make_shared(width, y, max_width - width, height, std::string("Current ") +accs->getAccelCounterName() + " (Byte/sec)"); + y += height; + for (size_t s = 0; s < accs->getNumOfAccelDevs(); ++s) + { + const auto S = std::to_string(s); + const auto suffix = "/1"; + auto t = createTarget("Device "+S, + "mean(\\\"Accelerators_"+accs->getAccelCounterName()+" Counters Device " + S + "_" + m + "\\\")" + suffix, + "rate(" + prometheusMetric(accs->remove_string_inside_use(m)) + "{instance=\\\"$node\\\", aggregate=\\\"system\\\", source=\\\"accel\\\" ,"+accs->getAccelCounterName()+"device=\\\"" + S + "\\\"}" + interval + ")" + suffix); + panel->push(t); + panel1->push(t); + + } + dashboard.push(panel); + dashboard.push(panel1); + }; + for (int j =0;jgetNumberOfCounters();j++) + { + accelCounters(accs->getAccelIndexCounterName(j)); + } + } for (size_t s = 0; s < NumSockets; ++s) { const auto S = std::to_string(s); diff --git a/src/pcm-accel-common.cpp b/src/pcm-accel-common.cpp new file mode 100644 index 00000000..9aa7eb7e --- /dev/null +++ b/src/pcm-accel-common.cpp @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2022-2023, Intel Corporation +// written by White.Hu, Pavithran P + +#include "pcm-accel-common.h" +#include "cpucounters.h" +#include + +idx_ccr* idx_get_ccr(uint64_t& ccr) +{ + return new spr_idx_ccr(ccr); +} + +uint32_t AcceleratorCounterState::getNumOfAccelDevs() +{ + uint32_t dev_count = 0; + + if (evt_ctx.accel >= ACCEL_MAX || evt_ctx.m == NULL) + return 0; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_IAA); + break; + case ACCEL_DSA: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_DSA); + break; + case ACCEL_QAT: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_QAT); + break; + default: + dev_count = 0; + break; + } + + return dev_count; +} + +uint32_t AcceleratorCounterState::getMaxNumOfAccelCtrs() +{ + uint32_t ctr_count = 0; + + if (evt_ctx.accel >= ACCEL_MAX || evt_ctx.m == NULL) + return 0; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + ctr_count = evt_ctx.m->getMaxNumOfIDXAccelCtrs(evt_ctx.accel); + break; + default: + ctr_count = 0; + break; + } + + return ctr_count; +} + +int32_t AcceleratorCounterState::programAccelCounters() +{ + std::vector rawEvents; + std::vector filters_wq, filters_tc, filters_pgsz, filters_xfersz, filters_eng; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX || evt_ctx.ctrs.size() == 0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs()) + return -1; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + for (auto pctr = evt_ctx.ctrs.begin(); pctr != evt_ctx.ctrs.end(); ++pctr) + { + rawEvents.push_back(pctr->ccr); + filters_wq.push_back(pctr->cfr_wq); + filters_tc.push_back(pctr->cfr_tc); + filters_pgsz.push_back(pctr->cfr_pgsz); + filters_xfersz.push_back(pctr->cfr_xfersz); + filters_eng.push_back(pctr->cfr_eng); + //std::cout<<"ctr idx=0x" << std::hex << pctr->idx << " hid=0x" << std::hex << pctr->h_id << " vid=0x" << std::hex << pctr->v_id <<" ccr=0x" << std::hex << pctr->ccr << "\n"; + //std::cout<<"mul=0x" << std::hex << pctr->multiplier << " div=0x" << std::hex << pctr->divider << "\n" << std::dec; + } + evt_ctx.m->programIDXAccelCounters(idx_accel_mapping[evt_ctx.accel], rawEvents, filters_wq, filters_eng, filters_tc, filters_pgsz, filters_xfersz); + break; + default: + break; + } + + return 0; +} + +SimpleCounterState AcceleratorCounterState::getAccelCounterState(uint32 dev, uint32 ctr_index) +{ + SimpleCounterState result; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX || dev >= getNumOfAccelDevs() || ctr_index >= getMaxNumOfAccelCtrs()) + return result; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + result = evt_ctx.m->getIDXAccelCounterState(evt_ctx.accel, dev, ctr_index); + break; + case ACCEL_MAX: + case ACCEL_NOCONFIG: + break; + } + + return result; +} + +bool AcceleratorCounterState::isAccelCounterAvailable() +{ + bool ret = true; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX) + ret =false; + + if (getNumOfAccelDevs() == 0) + ret = false; + + return ret; +} + +std::string AcceleratorCounterState::getAccelCounterName() +{ + std::string ret; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + ret = "iaa"; + break; + case ACCEL_DSA: + ret = "dsa"; + break; + case ACCEL_QAT: + ret = "qat"; + break; + default: + ret = "id=" + std::to_string(evt_ctx.accel) + "(unknown)"; + } + + return ret; +} + +bool AcceleratorCounterState::getAccelDevLocation( uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location) +{ + bool ret = true; + + switch (loc_map) + { + case SOCKET_MAP: + location = evt_ctx.m->getCPUSocketIdOfIDXAccelDev(evt_ctx.accel, dev); + break; + case NUMA_MAP: + location = evt_ctx.m->getNumaNodeOfIDXAccelDev(evt_ctx.accel, dev); + break; + default: + ret = false; + } + + return ret; +} + +/*! \brief Computes number of accelerator counters present in system + + \return Number of accel counters in system +*/ +int AcceleratorCounterState::getNumberOfCounters(){ + + return getCounters().size(); +} + +std::string AcceleratorCounterState::getAccelIndexCounterName(int ctr_index) +{ + accel_counter pctr = getCounters().at(ctr_index); + return pctr.v_event_name; +} + +uint64 AcceleratorCounterState::getAccelIndexCounter(uint32 dev, const SystemCounterState & before,const SystemCounterState & after,int ctr_index) +{ + const uint32_t counter_nb = getCounters().size(); + accel_counter pctr = getCounters().at(ctr_index); + uint64_t raw_result = getNumberOfEvents(before.accel_counters[dev*counter_nb + ctr_index], after.accel_counters[dev*counter_nb + ctr_index]); + uint64_t trans_result = uint64_t (raw_result * pctr.multiplier / (double) pctr.divider ); + return trans_result; +} + +int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue) +{ + accel_evt_parse_context *context = (accel_evt_parse_context *)cb_ctx; + // PCM *m = context->m; + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + + if (cb_type == EVT_LINE_START) //this event will be called per line(start) + { + context->ctr.cfr_wq = 0xFFFF; + context->ctr.cfr_eng = 0xFFFF; + context->ctr.cfr_tc = 0xFFFF; + context->ctr.cfr_pgsz = 0xFFFF; + context->ctr.cfr_xfersz = 0xFFFF; + context->ctr.ccr = 0; + } + else if (cb_type == EVT_LINE_FIELD) //this event will be called per field of line + { + std::unique_ptr pccr(idx_get_ccr(context->ctr.ccr)); + + //std::cout << "Key:" << key << " Value:" << value << " opcodeFieldMap[key]:" << ofm[key] << "\n"; + switch (ofm[key]) + { + case PCM::EVENT_SELECT: + pccr->set_event_select(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case PCM::ENABLE: + pccr->set_enable(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case EVENT_CATEGORY: + pccr->set_event_category(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case FILTER_WQ: + context->ctr.cfr_wq = (uint32_t)numValue; + break; + case FILTER_ENG: + context->ctr.cfr_eng = (uint32_t)numValue; + break; + case FILTER_TC: + context->ctr.cfr_tc = (uint32_t)numValue; + break; + case FILTER_PGSZ: + context->ctr.cfr_pgsz = (uint32_t)numValue; + break; + case FILTER_XFERSZ: + context->ctr.cfr_xfersz = (uint32_t)numValue; + break; + case PCM::INVALID: + default: + std::cerr << "Field in -o file not recognized. The key is: " << key << "\n"; + return -1; + } + } + else if(cb_type == EVT_LINE_COMPLETE) //this event will be called every line(end) + { + if (context->accel == ACCEL_IAA && base_ctr.h_event_name != "IAA") + { + return 0; //skip non-IAA cfg line + } + else if(context->accel == ACCEL_DSA && base_ctr.h_event_name != "DSA") + { + return 0; //skip non-DSA cfg line + } + else if(context->accel == ACCEL_QAT && base_ctr.h_event_name != "QAT") + { + return 0; //skip non-QAT cfg line + } + + //Validate the total number of counter exceed the maximum or not. + if ((uint32)base_ctr.idx >= accs_->getMaxNumOfAccelCtrs()) + { + std::cerr << "line parse KO due to invalid value!" << std::dec << "\n"; + return 0; //skip the invalid cfg line + } + + context->ctr.h_event_name = base_ctr.h_event_name; + context->ctr.v_event_name = base_ctr.v_event_name; + context->ctr.idx = base_ctr.idx; + context->ctr.multiplier = base_ctr.multiplier; + context->ctr.divider = base_ctr.divider; + context->ctr.h_id = base_ctr.h_id; + context->ctr.v_id = base_ctr.v_id; + //std::cout << "line parse OK, ctrcfg=0x" << std::hex << context->ctr.ccr << ", h_event_name=" << base_ctr.h_event_name << ", v_event_name=" << base_ctr.v_event_name; + //std::cout << ", h_id=0x" << std::hex << base_ctr.h_id << ", v_id=0x" << std::hex << base_ctr.v_id; + //std::cout << ", idx=0x"<< std::hex << base_ctr.idx << ", multiplier=0x" << std::hex << base_ctr.multiplier << ", divider=0x" << std::hex << base_ctr.divider << std::dec << "\n"; + context->ctrs.push_back(context->ctr); + } + + return 0; +} + +std::vector& AcceleratorCounterState::getCounters(){ + return evt_ctx.ctrs; +} + +uint32_t AcceleratorCounterState::getAccel() +{ + return evt_ctx.accel; +} + +void readAccelCounters(SystemCounterState& sycs_) +{ + AcceleratorCounterState *accs_ = AcceleratorCounterState::getInstance(); + PCM *pcm = PCM::getInstance(); + // const uint32_t delay_ms = uint32_t(delay * 1000); + const uint32_t dev_count = accs_->getNumOfAccelDevs(); + const uint32_t counter_nb = accs_->getCounters().size(); + pcm->setNumberofAccelCounters(dev_count*counter_nb); + uint32_t ctr_index = 0; + // accel_content accel_results(ACCEL_MAX, dev_content(ACCEL_IP_DEV_COUNT_MAX, ctr_data())); + sycs_.accel_counters.resize(dev_count*counter_nb); + SimpleCounterState *currState = new SimpleCounterState[dev_count*counter_nb]; + // programAccelCounters(m, accel, ctrs); + + switch (accs_->getAccel()) + { + case ACCEL_IAA: + case ACCEL_DSA: + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + ctr_index = 0; + for (auto pctr = accs_->getCounters().begin(); pctr != accs_->getCounters().end(); ++pctr) + { + sycs_.accel_counters[dev*counter_nb + ctr_index] = accs_->getAccelCounterState( dev, ctr_index); + ctr_index++; + } + } + break; + + case ACCEL_QAT: + // MySleepMs(delay_ms); + + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + pcm->controlQATTelemetry(dev, PCM::QAT_TLM_REFRESH); + ctr_index = 0; + for (auto pctr = accs_->getCounters().begin();pctr != accs_->getCounters().end(); ++pctr) + { + sycs_.accel_counters[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); + + // raw_result = currState[dev*counter_nb + ctr_index].getRawData(); + // trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider ); + + //accel_result[evt_ctx.accel][dev][std::pair(pctr->h_id,pctr->v_id)] = trans_result; + //std::cout << "collect_data: accel=" << accel << " dev=" << dev << " h_id=" << pctr->h_id << " v_id=" << pctr->v_id << " data=" << std::hex << trans_result << "\n" << std::dec; + ctr_index++; + } + } + break; + } + + delete[] currState; + +} + +AcceleratorCounterState* AcceleratorCounterState::instance = NULL; +AcceleratorCounterState * AcceleratorCounterState::getInstance() + { + // lock-free read + // cppcheck-suppress identicalConditionAfterEarlyExit + if (instance) return instance; + + std::unique_lock instanceCreationMutex; + // cppcheck-suppress identicalConditionAfterEarlyExit + if (instance) return instance; + + return instance = new AcceleratorCounterState(); + } + +std::string AcceleratorCounterState::remove_string_inside_use(std::string text) { + std::string result = ""; + int open_use_count = 0; + for (char c : text) { + if (c == '(') { + open_use_count += 1; + } else if (c == ')' ) { + open_use_count -= 1; + } else if (open_use_count == 0) { + result += c; + } + } + return result; +} + +void AcceleratorCounterState::setEvents(PCM *m,ACCEL_IP accel, std::string specify_evtfile,bool evtfile) +{ + evt_ctx.m = m; + evt_ctx.accel = accel; + if (isAccelCounterAvailable() == true) + { + if (evtfile==false) //All platform use the spr config file by default. + { + ev_file_name = "opCode-143-accel.txt"; + } + else + { + ev_file_name = specify_evtfile; + } + //std::cout << "load event config file from:" << ev_file_name << "\n"; + } + else + { + std::cerr << "Error: " << getAccelCounterName() << " device is NOT available/ready with this platform! Program aborted\n"; + exit(EXIT_FAILURE); + } + + switch (accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + opcodeFieldMap["hname"] = PCM::H_EVENT_NAME; + opcodeFieldMap["vname"] = PCM::V_EVENT_NAME; + opcodeFieldMap["multiplier"] = PCM::MULTIPLIER; + opcodeFieldMap["divider"] = PCM::DIVIDER; + opcodeFieldMap["ctr"] = PCM::COUNTER_INDEX; + opcodeFieldMap["en"] = PCM::ENABLE; + opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT; + opcodeFieldMap["ev_cat"] = EVENT_CATEGORY; + opcodeFieldMap["filter_wq"] = FILTER_WQ; + opcodeFieldMap["filter_eng"] = FILTER_ENG; + opcodeFieldMap["filter_tc"] = FILTER_TC; + opcodeFieldMap["filter_pgsz"] = FILTER_PGSZ; + opcodeFieldMap["filter_xfersz"] = FILTER_XFERSZ; + + p_evt_handler = idx_evt_parse_handler; + evt_ctx.ctrs.clear();//fill the ctrs by evt_handler callback func. + break; + default: + std::cerr << "Error: Accel type=0x" << std::hex << accel << " is not supported! Program aborted\n" << std::dec; + exit(EXIT_FAILURE); + } + + try + { + load_events(ev_file_name, opcodeFieldMap, p_evt_handler, (void *)&evt_ctx); + } + catch (std::exception & e) + { + std::cerr << "Error: " << e.what() << "\n"; + std::cerr << "Error: event cfg file have the problem, please double check it! Program aborted\n"; + exit(EXIT_FAILURE); + } + if (evt_ctx.ctrs.size() ==0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs()) + { + std::cout<< evt_ctx.ctrs.size()<< " " << getMaxNumOfAccelCtrs(); + std::cerr << "Error: event counter size is 0 or exceed maximum, please check the event cfg file! Program aborted\n"; + exit(EXIT_FAILURE); + } + + if (accel == ACCEL_QAT) + { + const uint32_t dev_count = getNumOfAccelDevs(); + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + m->controlQATTelemetry(dev, PCM::QAT_TLM_START); //start the QAT telemetry service + } + } +} \ No newline at end of file diff --git a/src/pcm-accel-common.h b/src/pcm-accel-common.h new file mode 100644 index 00000000..387b25ac --- /dev/null +++ b/src/pcm-accel-common.h @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2022-2023, Intel Corporation +// written by White.Hu, Pavithran P + +#pragma once +#include "cpucounters.h" +#ifdef __linux__ +#include +#endif +using namespace pcm; + +#define PCM_DELAY_DEFAULT 3.0 // in seconds + +class idx_ccr { + public: + virtual uint64_t get_event_select() const = 0; + virtual void set_event_select(uint64_t value) = 0; + virtual uint64_t get_event_category() const = 0; + virtual void set_event_category(uint64_t value) = 0; + virtual uint64_t get_enable() const = 0; + virtual void set_enable(uint64_t value) = 0; + virtual uint64_t get_ccr_value() const = 0; + virtual void set_ccr_value(uint64_t value) = 0; + virtual ~idx_ccr() {}; +}; + +class spr_idx_ccr: public idx_ccr { + public: + spr_idx_ccr(uint64_t &v){ + ccr_value = &v; + } + virtual uint64_t get_event_select() const { //EVENT bit, bit 32 + return ((*ccr_value >> 32) & 0xFFFFFFF); + } + virtual void set_event_select(uint64_t value) { + *ccr_value |= (value << 32); + } + virtual uint64_t get_event_category() const { //EVENT Categorg, bit 8 + return ((*ccr_value >> 8) & 0xF); + } + virtual void set_event_category(uint64_t value) { + *ccr_value |= (value << 8); + } + virtual uint64_t get_enable() const { //Enable counter, bit 0 + return ((*ccr_value >> 0 ) & 0x01); + } + virtual void set_enable(uint64_t value) { + *ccr_value |= (value << 0); + } + virtual uint64_t get_ccr_value() const { + return *ccr_value; + } + virtual void set_ccr_value(uint64_t value) { + *ccr_value = value; + } + + private: + uint64_t* ccr_value = NULL; +}; + +idx_ccr* idx_get_ccr(uint64_t& ccr); + +typedef enum +{ + ACCEL_IAA, + ACCEL_DSA, + ACCEL_QAT, + ACCEL_MAX, + ACCEL_NOCONFIG, +} ACCEL_IP; + +enum IDXPerfmonField +{ + DPF_BASE = 0x100, //start from 0x100 to different with PerfmonField in cpucounter.h + EVENT_CATEGORY, + FILTER_WQ, + FILTER_ENG, + FILTER_TC, + FILTER_PGSZ, + FILTER_XFERSZ +}; + +typedef enum +{ + SOCKET_MAP, + NUMA_MAP, +} ACCEL_DEV_LOC_MAPPING; + +const std::vector idx_accel_mapping = +{ + PCM::IDX_IAA, + PCM::IDX_DSA, + PCM::IDX_QAT +}; + +#define ACCEL_IP_DEV_COUNT_MAX (16) + +typedef uint32_t h_id; +typedef uint32_t v_id; +typedef std::map,uint64_t> ctr_data; +typedef std::vector dev_content; +typedef std::vector accel_content; + +struct accel_counter : public counter { + //filter config for IDX Accelerator. + uint32_t cfr_wq = 0; + uint32_t cfr_eng = 0; + uint32_t cfr_tc = 0; + uint32_t cfr_pgsz = 0; + uint32_t cfr_xfersz = 0; +}; + +typedef struct +{ + PCM *m; + ACCEL_IP accel; + accel_counter ctr; + std::vector ctrs; +} accel_evt_parse_context; + +typedef int (*pfn_evt_handler)(evt_cb_type, void *, counter &, std::map &, std::string, uint64); + +int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue); +void readAccelCounters(SystemCounterState &sycs_); + +class AcceleratorCounterState { + + private: + AcceleratorCounterState(){}; // forbidden to call directly because it is a singleton + AcceleratorCounterState & operator = (const AcceleratorCounterState &) = delete; + static AcceleratorCounterState * instance; + accel_evt_parse_context evt_ctx = { {}, {}, {}, {} }; + public: + AcceleratorCounterState(const AcceleratorCounterState& obj) = delete; + // std::mutex instanceCreationMutex; + static AcceleratorCounterState * getInstance(); + std::map opcodeFieldMap; + std::string ev_file_name; + pfn_evt_handler p_evt_handler = NULL; + + void setEvents(PCM * m,ACCEL_IP accel,std::string specify_evtfile,bool evtfile); + uint32_t getNumOfAccelDevs(); + uint32_t getAccel(); + uint32_t getMaxNumOfAccelCtrs(); + std::vector& getCounters(); + int32_t programAccelCounters(); + SimpleCounterState getAccelCounterState(uint32 dev, uint32 ctr_index); + bool isAccelCounterAvailable(); + std::string getAccelCounterName(); + void setDSA(); + bool getAccelDevLocation( uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location); + // void readAccelCounters(SystemCounterState sycs_); + int getNumberOfCounters(); + std::string getAccelIndexCounterName(int ctr_index); + std::string remove_string_inside_use(std::string text); + uint64 getAccelIndexCounter(uint32 dev, const SystemCounterState & before,const SystemCounterState & after,int ctr_index); + +}; \ No newline at end of file diff --git a/src/pcm-accel.cpp b/src/pcm-accel.cpp index 021ea43d..8988589d 100644 --- a/src/pcm-accel.cpp +++ b/src/pcm-accel.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2022, Intel Corporation // written by White.Hu -#include "cpucounters.h" +#include "pcm-accel-common.h" #ifdef _MSC_VER #pragma warning(disable : 4996) // for sprintf #include @@ -20,285 +20,10 @@ #ifdef _MSC_VER #include "freegetopt/getopt.h" #endif -#ifdef __linux__ -#include -#endif #include "lspci.h" #include "utils.h" -using namespace std; using namespace pcm; - -#define PCM_DELAY_DEFAULT 3.0 // in seconds - -class idx_ccr { - public: - virtual uint64_t get_event_select() const = 0; - virtual void set_event_select(uint64_t value) = 0; - virtual uint64_t get_event_category() const = 0; - virtual void set_event_category(uint64_t value) = 0; - virtual uint64_t get_enable() const = 0; - virtual void set_enable(uint64_t value) = 0; - virtual uint64_t get_ccr_value() const = 0; - virtual void set_ccr_value(uint64_t value) = 0; - virtual ~idx_ccr() {}; -}; - -class spr_idx_ccr: public idx_ccr { - public: - spr_idx_ccr(uint64_t &v){ - ccr_value = &v; - } - virtual uint64_t get_event_select() const { //EVENT bit, bit 32 - return ((*ccr_value >> 32) & 0xFFFFFFF); - } - virtual void set_event_select(uint64_t value) { - *ccr_value |= (value << 32); - } - virtual uint64_t get_event_category() const { //EVENT Categorg, bit 8 - return ((*ccr_value >> 8) & 0xF); - } - virtual void set_event_category(uint64_t value) { - *ccr_value |= (value << 8); - } - virtual uint64_t get_enable() const { //Enable counter, bit 0 - return ((*ccr_value >> 0 ) & 0x01); - } - virtual void set_enable(uint64_t value) { - *ccr_value |= (value << 0); - } - virtual uint64_t get_ccr_value() const { - return *ccr_value; - } - virtual void set_ccr_value(uint64_t value) { - *ccr_value = value; - } - - private: - uint64_t* ccr_value = NULL; -}; - -idx_ccr* idx_get_ccr(uint64_t& ccr) -{ - return new spr_idx_ccr(ccr); -} - -typedef enum -{ - ACCEL_IAA, - ACCEL_DSA, - ACCEL_QAT, - ACCEL_MAX, -} ACCEL_IP; - -enum IDXPerfmonField -{ - DPF_BASE = 0x100, //start from 0x100 to different with PerfmonField in cpucounter.h - EVENT_CATEGORY, - FILTER_WQ, - FILTER_ENG, - FILTER_TC, - FILTER_PGSZ, - FILTER_XFERSZ -}; - -typedef enum -{ - SOCKET_MAP, - NUMA_MAP, -} ACCEL_DEV_LOC_MAPPING; - -const std::vector idx_accel_mapping = -{ - PCM::IDX_IAA, - PCM::IDX_DSA, - PCM::IDX_QAT -}; - -#define ACCEL_IP_DEV_COUNT_MAX (16) - -typedef uint32_t h_id; -typedef uint32_t v_id; -typedef std::map,uint64_t> ctr_data; -typedef std::vector dev_content; -typedef std::vector accel_content; - accel_content accel_results(ACCEL_MAX, dev_content(ACCEL_IP_DEV_COUNT_MAX, ctr_data())); - -struct accel_counter : public counter { - //filter config for IDX Accelerator. - uint32_t cfr_wq = 0; - uint32_t cfr_eng = 0; - uint32_t cfr_tc = 0; - uint32_t cfr_pgsz = 0; - uint32_t cfr_xfersz = 0; -}; - -typedef struct -{ - PCM *m; - ACCEL_IP accel; - accel_counter ctr; - vector ctrs; -} accel_evt_parse_context; - -uint32_t getNumOfAccelDevs(PCM *m, ACCEL_IP accel) -{ - uint32_t dev_count = 0; - - if (accel >= ACCEL_MAX || m == NULL) - return 0; - - switch (accel) - { - case ACCEL_IAA: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_IAA); - break; - case ACCEL_DSA: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_DSA); - break; - case ACCEL_QAT: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_QAT); - break; - default: - dev_count = 0; - break; - } - - return dev_count; -} - -uint32_t getMaxNumOfAccelCtrs(PCM *m, ACCEL_IP accel) -{ - uint32_t ctr_count = 0; - - if (accel >= ACCEL_MAX || m == NULL) - return 0; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - ctr_count = m->getMaxNumOfIDXAccelCtrs(accel); - break; - default: - ctr_count = 0; - break; - } - - return ctr_count; -} - -int32_t programAccelCounters(PCM *m, ACCEL_IP accel, std::vector& ctrs) -{ - vector rawEvents; - vector filters_wq, filters_tc, filters_pgsz, filters_xfersz, filters_eng; - - if (m == NULL || accel >= ACCEL_MAX || ctrs.size() == 0 || ctrs.size() > getMaxNumOfAccelCtrs(m, accel)) - return -1; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - for (auto pctr = ctrs.begin(); pctr != ctrs.end(); ++pctr) - { - rawEvents.push_back(pctr->ccr); - filters_wq.push_back(pctr->cfr_wq); - filters_tc.push_back(pctr->cfr_tc); - filters_pgsz.push_back(pctr->cfr_pgsz); - filters_xfersz.push_back(pctr->cfr_xfersz); - filters_eng.push_back(pctr->cfr_eng); - //std::cout<<"ctr idx=0x" << std::hex << pctr->idx << " hid=0x" << std::hex << pctr->h_id << " vid=0x" << std::hex << pctr->v_id <<" ccr=0x" << std::hex << pctr->ccr << "\n"; - //std::cout<<"mul=0x" << std::hex << pctr->multiplier << " div=0x" << std::hex << pctr->divider << "\n" << std::dec; - } - m->programIDXAccelCounters(idx_accel_mapping[accel], rawEvents, filters_wq, filters_eng, filters_tc, filters_pgsz, filters_xfersz); - break; - default: - break; - } - - return 0; -} - -SimpleCounterState getAccelCounterState(PCM *m, ACCEL_IP accel, uint32 dev, uint32 ctr_index) -{ - SimpleCounterState result; - - if (m == NULL || accel >= ACCEL_MAX || dev >= getNumOfAccelDevs(m, accel) || ctr_index >= getMaxNumOfAccelCtrs(m, accel)) - return result; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - result = m->getIDXAccelCounterState(accel, dev, ctr_index); - break; - default: - break; - } - - return result; -} - -bool isAccelCounterAvailable(PCM *m, ACCEL_IP accel) -{ - bool ret = true; - - if (m == NULL || accel >= ACCEL_MAX) - ret =false; - - if (getNumOfAccelDevs(m, accel) == 0) - ret = false; - - return ret; -} - -std::string getAccelCounterName(ACCEL_IP accel) -{ - std::string ret; - - switch (accel) - { - case ACCEL_IAA: - ret = "iaa"; - break; - case ACCEL_DSA: - ret = "dsa"; - break; - case ACCEL_QAT: - ret = "qat"; - break; - default: - ret = "id=" + std::to_string(accel) + "(unknown)"; - break; - } - - return ret; -} - -bool getAccelDevLocation(PCM *m, const ACCEL_IP accel, uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location) -{ - bool ret = true; - - switch (loc_map) - { - case SOCKET_MAP: - location = m->getCPUSocketIdOfIDXAccelDev(accel, dev); - break; - case NUMA_MAP: - location = m->getNumaNodeOfIDXAccelDev(accel, dev); - break; - default: - ret = false; - break; - } - - return ret; -} - std::vector build_counter_names(std::string dev_name, std::vector& ctrs, const ACCEL_DEV_LOC_MAPPING loc_map) { std::vector v; @@ -367,15 +92,17 @@ void print_usage(const std::string& progname) std::cout << "\n"; } -std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vector& ctrs, +std::vector build_csv(const ACCEL_IP accel, std::vector& ctrs, const bool human_readable, const std::string& csv_delimiter, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) { + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); std::vector result; std::vector current_row; auto header = build_counter_names("Accelerator", ctrs, loc_map); result.push_back(build_csv_row(header, csv_delimiter)); std::map> v_sort; - uint32_t dev_count = getNumOfAccelDevs(m, accel); + uint32_t dev_count = accs_->getNumOfAccelDevs(); for (uint32_t dev = 0; dev != dev_count; ++dev) { @@ -401,7 +128,7 @@ std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vectorgetAccelDevLocation( dev, loc_map, location) == true) { current_row.push_back(std::to_string(location)); //location info } @@ -422,13 +149,15 @@ std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vector build_display(PCM *m, const ACCEL_IP accel, std::vector& ctrs, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) +std::vector build_display(const ACCEL_IP accel, std::vector& ctrs, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) { std::vector buffer; std::vector headers; std::vector data; std::string row; - uint32_t dev_count = getNumOfAccelDevs(m, accel); + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + uint32_t dev_count = accs_->getNumOfAccelDevs(); headers = build_counter_names("Accelerator", ctrs, loc_map); //Print first row @@ -461,7 +190,7 @@ std::vector build_display(PCM *m, const ACCEL_IP accel, std::vector std::string h_name = v_array[0]->h_event_name; uint32 location = 0xff; - if (getAccelDevLocation(m, accel, dev, loc_map, location) == true) + if (accs_->getAccelDevLocation(dev, loc_map, location) == true) { v_data.push_back(location); //location info } @@ -500,15 +229,15 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< { const uint32_t delay_ms = uint32_t(delay * 1000); SimpleCounterState *before, *after; - const uint32_t dev_count = getNumOfAccelDevs(m, accel); + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + const uint32_t dev_count = accs_->getNumOfAccelDevs(); const uint32_t counter_nb = ctrs.size(); uint32_t ctr_index = 0; before = new SimpleCounterState[dev_count*counter_nb]; after = new SimpleCounterState[dev_count*counter_nb]; - programAccelCounters(m, accel, ctrs); - switch (accel) { case ACCEL_IAA: @@ -518,7 +247,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin(); pctr != ctrs.end(); ++pctr) { - before[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + before[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); ctr_index++; } } @@ -528,7 +257,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin();pctr != ctrs.end(); ++pctr) { - after[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + after[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); uint64_t raw_result = getNumberOfEvents(before[dev*counter_nb + ctr_index], after[dev*counter_nb + ctr_index]); uint64_t trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider * (1000 / (double) delay_ms)); accel_results[accel][dev][std::pair(pctr->h_id,pctr->v_id)] = trans_result; @@ -547,7 +276,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin();pctr != ctrs.end(); ++pctr) { - after[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + after[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); uint64_t raw_result = after[dev*counter_nb + ctr_index].getRawData(); uint64_t trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider ); @@ -567,99 +296,9 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< delete[] after; } -int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue) -{ - accel_evt_parse_context *context = (accel_evt_parse_context *)cb_ctx; - PCM *m = context->m; - if (cb_type == EVT_LINE_START) //this event will be called per line(start) - { - context->ctr.cfr_wq = 0xFFFF; - context->ctr.cfr_eng = 0xFFFF; - context->ctr.cfr_tc = 0xFFFF; - context->ctr.cfr_pgsz = 0xFFFF; - context->ctr.cfr_xfersz = 0xFFFF; - context->ctr.ccr = 0; - } - else if (cb_type == EVT_LINE_FIELD) //this event will be called per field of line - { - std::unique_ptr pccr(idx_get_ccr(context->ctr.ccr)); - - //std::cout << "Key:" << key << " Value:" << value << " opcodeFieldMap[key]:" << ofm[key] << "\n"; - switch (ofm[key]) - { - case PCM::EVENT_SELECT: - pccr->set_event_select(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case PCM::ENABLE: - pccr->set_enable(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case EVENT_CATEGORY: - pccr->set_event_category(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case FILTER_WQ: - context->ctr.cfr_wq = (uint32_t)numValue; - break; - case FILTER_ENG: - context->ctr.cfr_eng = (uint32_t)numValue; - break; - case FILTER_TC: - context->ctr.cfr_tc = (uint32_t)numValue; - break; - case FILTER_PGSZ: - context->ctr.cfr_pgsz = (uint32_t)numValue; - break; - case FILTER_XFERSZ: - context->ctr.cfr_xfersz = (uint32_t)numValue; - break; - case PCM::INVALID: - default: - std::cerr << "Field in -o file not recognized. The key is: " << key << "\n"; - return -1; - } - } - else if(cb_type == EVT_LINE_COMPLETE) //this event will be called every line(end) - { - if (context->accel == ACCEL_IAA && base_ctr.h_event_name != "IAA") - { - return 0; //skip non-IAA cfg line - } - else if(context->accel == ACCEL_DSA && base_ctr.h_event_name != "DSA") - { - return 0; //skip non-DSA cfg line - } - else if(context->accel == ACCEL_QAT && base_ctr.h_event_name != "QAT") - { - return 0; //skip non-QAT cfg line - } - - //Validate the total number of counter exceed the maximum or not. - if ((uint32)base_ctr.idx >= getMaxNumOfAccelCtrs(m, context->accel)) - { - std::cerr << "line parse KO due to invalid value!" << std::dec << "\n"; - return 0; //skip the invalid cfg line - } - context->ctr.h_event_name = base_ctr.h_event_name; - context->ctr.v_event_name = base_ctr.v_event_name; - context->ctr.idx = base_ctr.idx; - context->ctr.multiplier = base_ctr.multiplier; - context->ctr.divider = base_ctr.divider; - context->ctr.h_id = base_ctr.h_id; - context->ctr.v_id = base_ctr.v_id; - //std::cout << "line parse OK, ctrcfg=0x" << std::hex << context->ctr.ccr << ", h_event_name=" << base_ctr.h_event_name << ", v_event_name=" << base_ctr.v_event_name; - //std::cout << ", h_id=0x" << std::hex << base_ctr.h_id << ", v_id=0x" << std::hex << base_ctr.v_id; - //std::cout << ", idx=0x"<< std::hex << base_ctr.idx << ", multiplier=0x" << std::hex << base_ctr.multiplier << ", divider=0x" << std::hex << base_ctr.divider << std::dec << "\n"; - context->ctrs.push_back(context->ctr); - } - - return 0; -} -typedef int (*pfn_evt_handler)(evt_cb_type, void *, counter &, std::map &, std::string, uint64); PCM_MAIN_NOTHROW; @@ -672,7 +311,7 @@ int mainThrows(int argc, char * argv[]) std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION ; std::cout << "\n This utility measures Sapphire Rapids-SP accelerators information.\n"; - std::string program = string(argv[0]); + std::string program = std::string(argv[0]); bool csv = false; bool human_readable = false; std::string csv_delimiter = ","; @@ -684,10 +323,10 @@ int mainThrows(int argc, char * argv[]) ACCEL_DEV_LOC_MAPPING loc_map = SOCKET_MAP; //default is socket mapping MainLoop mainLoop; PCM * m; - accel_evt_parse_context evt_ctx; - std::map opcodeFieldMap; + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + std::string ev_file_name; - pfn_evt_handler p_evt_handler; while (argc > 1) { @@ -803,70 +442,7 @@ int mainThrows(int argc, char * argv[]) exit(EXIT_FAILURE); } - if (isAccelCounterAvailable(m, accel) == true) - { - if (evtfile == false) //All platform use the spr config file by default. - { - ev_file_name = "opCode-143-accel.txt"; - } - else - { - ev_file_name = specify_evtfile; - } - //std::cout << "load event config file from:" << ev_file_name << "\n"; - } - else - { - std::cerr << "Error: " << getAccelCounterName(accel) << " device is NOT available/ready with this platform! Program aborted\n"; - exit(EXIT_FAILURE); - } - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - opcodeFieldMap["hname"] = PCM::H_EVENT_NAME; - opcodeFieldMap["vname"] = PCM::V_EVENT_NAME; - opcodeFieldMap["multiplier"] = PCM::MULTIPLIER; - opcodeFieldMap["divider"] = PCM::DIVIDER; - opcodeFieldMap["ctr"] = PCM::COUNTER_INDEX; - opcodeFieldMap["en"] = PCM::ENABLE; - opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT; - opcodeFieldMap["ev_cat"] = EVENT_CATEGORY; - opcodeFieldMap["filter_wq"] = FILTER_WQ; - opcodeFieldMap["filter_eng"] = FILTER_ENG; - opcodeFieldMap["filter_tc"] = FILTER_TC; - opcodeFieldMap["filter_pgsz"] = FILTER_PGSZ; - opcodeFieldMap["filter_xfersz"] = FILTER_XFERSZ; - - p_evt_handler = idx_evt_parse_handler; - evt_ctx.m = m; - evt_ctx.accel = accel; - evt_ctx.ctrs.clear();//fill the ctrs by evt_handler callback func. - break; - default: - std::cerr << "Error: Accel type=0x" << std::hex << accel << " is not supported! Program aborted\n" << std::dec; - exit(EXIT_FAILURE); - break; - } - - try - { - load_events(ev_file_name, opcodeFieldMap, p_evt_handler, (void *)&evt_ctx); - } - catch (std::exception & e) - { - std::cerr << "Error: " << e.what() << "\n"; - std::cerr << "Error: event cfg file have the problem, please double check it! Program aborted\n"; - exit(EXIT_FAILURE); - } - - if (evt_ctx.ctrs.size() ==0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs(m, evt_ctx.accel)) - { - std::cerr << "Error: event counter size is 0 or exceed maximum, please check the event cfg file! Program aborted\n"; - exit(EXIT_FAILURE); - } + accs_->setEvents(m,accel,specify_evtfile,evtfile); std::ostream* output = &std::cout; std::fstream file_stream; @@ -874,23 +450,16 @@ int mainThrows(int argc, char * argv[]) { file_stream.open(output_file.c_str(), std::ios_base::out); output = &file_stream; - } - - if (accel == ACCEL_QAT) - { - const uint32_t dev_count = getNumOfAccelDevs(m, accel); - for (uint32_t dev = 0; dev != dev_count; ++dev) - { - m->controlQATTelemetry(dev, PCM::QAT_TLM_START); //start the QAT telemetry service - } - } - + } + accs_->programAccelCounters(); + std::vector CTRS= accs_->getCounters(); mainLoop([&]() { - collect_data(m, delay, accel, evt_ctx.ctrs); + + collect_data(m, delay, accel, CTRS); std::vector display_buffer = csv ? - build_csv(m, accel, evt_ctx.ctrs, human_readable, csv_delimiter, accel_results, loc_map) : - build_display(m, accel, evt_ctx.ctrs, accel_results, loc_map); + build_csv( accel, CTRS, human_readable, csv_delimiter, accel_results, loc_map) : + build_display( accel, CTRS, accel_results, loc_map); display(display_buffer, *output); return true; }); diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index ced46bbd..6e7aa692 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -5,11 +5,14 @@ // https://github.com/prometheus/prometheus/wiki/Default-port-allocations constexpr unsigned int DEFAULT_HTTP_PORT = 9738; constexpr unsigned int DEFAULT_HTTPS_PORT = DEFAULT_HTTP_PORT; +#include "pcm-accel-common.h" #include #include #include #include +#include + #include #include #include @@ -338,6 +341,12 @@ class JSONPrinter : Visitor endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_LIST ); SystemCounterState before = getSystemCounter( aggPair_.first ); SystemCounterState after = getSystemCounter( aggPair_.second ); + PCM * pcm = PCM::getInstance(); + if (pcm->getAccel()!=ACCEL_NOCONFIG){ + startObject ("Accelerators",BEGIN_OBJECT); + printAccelCounterState(before,after); + endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_OBJECT ); + } startObject( "QPI/UPI Links", BEGIN_OBJECT ); printSystemCounterState( before, after ); endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_OBJECT ); @@ -347,6 +356,7 @@ class JSONPrinter : Visitor startObject( "Uncore Aggregate", BEGIN_OBJECT ); printUncoreCounterState( before, after ); endObject( JSONPrinter::LineEndAction::NewLineOnly, END_OBJECT ); + endObject( JSONPrinter::LineEndAction::NewLineOnly, END_OBJECT ); } @@ -433,6 +443,23 @@ class JSONPrinter : Visitor endObject( JSONPrinter::NewLineOnly, END_OBJECT ); } + void printAccelCounterState( SystemCounterState const& before, SystemCounterState const& after ) { + AcceleratorCounterState* accs_ = AcceleratorCounterState::getInstance(); + uint32 devs = accs_->getNumOfAccelDevs(); + for ( uint32 i=0; i < devs; ++i ) { + startObject( std::string( accs_->getAccelCounterName() + " Counters Device " ) + std::to_string( i ), BEGIN_OBJECT ); + for(int j=0;jgetNumberOfCounters();j++){ + printCounter( accs_->getAccelIndexCounterName(j), accs_->getAccelIndexCounter(i, before, after,j) ); + } + // debug prints + //for(uint32 j=0;jgetNumberOfCounters();j++){ + // std::cout<getAccelIndexCounterName(j) << " "<getAccelIndexCounter(i, before, after,j)<getAccelIndexCounterName()<< accs_->getAccelInboundBW (i, before, after ) << " "<< accs_->getAccelOutboundBW (i, before, after ) << " "<getAccelShareWQ_ReqNb (i, before, after ) << " "<getAccelDedicateWQ_ReqNb (i, before, after ) << std::endl; + endObject( JSONPrinter::DelimiterAndNewLine, END_OBJECT ); + } + } + void printSystemCounterState( SystemCounterState const& before, SystemCounterState const& after ) { PCM* pcm = PCM::getInstance(); uint32 sockets = pcm->getNumSockets(); @@ -596,6 +623,10 @@ class PrometheusPrinter : Visitor SystemCounterState after = getSystemCounter( aggPair_.second ); addToHierarchy( "aggregate=\"system\"" ); PCM* pcm = PCM::getInstance(); + if (pcm->getAccel()!=ACCEL_NOCONFIG){ + printComment( "Accelerator Counters" ); + printAccelCounterState(before,after); + } if ( pcm->isServerCPU() && pcm->getNumSockets() >= 2 ) { printComment( "UPI/QPI Counters" ); printSystemCounterState( before, after ); @@ -686,6 +717,23 @@ class PrometheusPrinter : Visitor removeFromHierarchy(); } + void printAccelCounterState( SystemCounterState const& before, SystemCounterState const& after ) + { + addToHierarchy( "source=\"accel\"" ); + AcceleratorCounterState* accs_ = AcceleratorCounterState::getInstance(); + uint32 devs = accs_->getNumOfAccelDevs(); + + for ( uint32 i=0; i < devs; ++i ) + { + addToHierarchy( std::string( accs_->getAccelCounterName() + "device=\"" ) + std::to_string( i ) + "\"" ); + for(int j=0;jgetNumberOfCounters();j++) + { + printCounter( accs_->remove_string_inside_use(accs_->getAccelIndexCounterName(j)), accs_->getAccelIndexCounter(i, before, after,j) ); + } + removeFromHierarchy(); + } + removeFromHierarchy(); + } void printSystemCounterState( SystemCounterState const& before, SystemCounterState const& after ) { addToHierarchy( "source=\"uncore\"" ); PCM* pcm = PCM::getInstance(); @@ -3167,9 +3215,16 @@ int mainThrows(int argc, char * argv[]) { unsigned short debug_level = 0; std::string certificateFile; std::string privateKeyFile; - + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); null_stream nullStream; check_and_set_silent(argc, argv, nullStream); + ACCEL_IP accel=ACCEL_NOCONFIG; //default is IAA + bool evtfile = false; + std::string specify_evtfile; + // ACCEL_DEV_LOC_MAPPING loc_map = SOCKET_MAP; //default is socket mapping + MainLoop mainLoop; + std::string ev_file_name; if ( argc > 1 ) { std::string arg_value; @@ -3228,11 +3283,69 @@ int mainThrows(int argc, char * argv[]) { { forceRTMAbortMode = true; } + else if (check_argument_equals(argv[i], {"-iaa", "/iaa"})) + { + accel = ACCEL_IAA; + } + else if (check_argument_equals(argv[i], {"-dsa", "/dsa"})) + { + accel = ACCEL_DSA; + std::cout << "Aggregator firstest : " << accs_->getAccelCounterName() << accel; + } +#ifdef __linux__ + else if (check_argument_equals(argv[i], {"-qat", "/qat"})) + { + accel = ACCEL_QAT; + } + // else if (check_argument_equals(argv[i], {"-numa", "/numa"})) + // { + // loc_map = NUMA_MAP; + // } +#endif + else if (extract_argument_value(argv[i], {"-evt", "/evt"}, arg_value)) + { + evtfile = true; + specify_evtfile = std::move(arg_value); + } else if ( check_argument_equals( argv[i], {"-silent", "/silent"} ) ) { // handled in check_and_set_silent continue; } + +#ifdef __linux__ + // check kernel version for driver dependency. + if (accel != ACCEL_NOCONFIG) + { + std::cout << "Info: IDX - Please ensure the required driver(e.g idxd driver for iaa/dsa, qat driver and etc) correct enabled with this system, else the tool may fail to run.\n"; + struct utsname sys_info; + if (!uname(&sys_info)) + { + std::string krel_str; + uint32 krel_major_ver=0, krel_minor_ver=0; + krel_str = sys_info.release; + std::vector krel_info = split(krel_str, '.'); + std::istringstream iss_krel_major(krel_info[0]); + std::istringstream iss_krel_minor(krel_info[1]); + iss_krel_major >> std::setbase(0) >> krel_major_ver; + iss_krel_minor >> std::setbase(0) >> krel_minor_ver; + + switch (accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + if ((krel_major_ver < 5) || (krel_major_ver == 5 && krel_minor_ver < 11)) + { + std::cout<< "Warning: IDX - current linux kernel version(" << krel_str << ") is too old, please upgrade it to the latest due to required idxd driver integrated to kernel since 5.11.\n"; + } + break; + default: + std::cout<< "Info: Chosen "<< accel<<" IDX - current linux kernel version(" << krel_str << ")"; + + } + } + } +#endif #if defined (USE_SSL) else if ( check_argument_equals( argv[i], {"-C", "--certificateFile"} ) ) { @@ -3315,6 +3428,7 @@ int mainThrows(int argc, char * argv[]) { // A HTTP interface to change the programming is planned PCM::ErrorCode status; PCM * pcmInstance = PCM::getInstance(); + pcmInstance->setAccel(accel); assert(pcmInstance); if (forceRTMAbortMode) { @@ -3326,7 +3440,8 @@ int mainThrows(int argc, char * argv[]) { switch ( status ) { case PCM::PMUBusy: { - if ( forcedProgramming == false ) { + if ( forcedProgramming == false ) + { std::cout << "Warning: PMU appears to be busy, do you want to reset it? (y/n)\n"; char answer; std::cin >> answer; @@ -3356,7 +3471,18 @@ int mainThrows(int argc, char * argv[]) { //TODO: check return value when its implemented pcmInstance->programCXLCM(); + if (pcmInstance->getAccel()!=ACCEL_NOCONFIG) + { + if (pcmInstance->supportIDXAccelDev() == false) + { + std::cerr << "Error: IDX accelerator is NOT supported with this platform! Program aborted\n"; + exit(EXIT_FAILURE); + } + accs_->setEvents(pcmInstance,accel,specify_evtfile,evtfile); + + accs_->programAccelCounters(); + } #if defined (USE_SSL) if ( useSSL ) { if ( port == 0 ) diff --git a/src/topology.cpp b/src/topology.cpp index a8cb6c85..6771a646 100644 --- a/src/topology.cpp +++ b/src/topology.cpp @@ -1,8 +1,8 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2016-2022, Intel Corporation -#include "cpucounters.h" #include "topology.h" +#include "pcm-accel-common.h" namespace pcm { @@ -87,6 +87,7 @@ void Aggregator::dispatch( SystemRoot const& syp ) { PCM* pcm = PCM::getInstance(); pcm->readQPICounters( sycs_ ); pcm->readAndAggregateCXLCMCounters( sycs_ ); + readAccelCounters(sycs_); } Aggregator::Aggregator() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 61b16b19..1d10dc8d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/tests) if(UNIX) # daemon_alignment_test on Linux and Unix - file(GLOB TEST_FILE daemon_alignment_test.cpp) + file(GLOB TEST_FILE daemon_alignment_test.cpp pcm-accel-common.cpp) add_executable(daemon_alignment_test ${TEST_FILE}) target_link_libraries(daemon_alignment_test) From 285d50275c527c693b4386e49839d2cfa847a9d4 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Mon, 6 Nov 2023 10:41:36 +0100 Subject: [PATCH 3/9] try cross-platform-actions for FreeBSD Change-Id: If5e063b9fcd264280b4bd1e8a1f00c34bfc8db60 --- .github/workflows/freebsd_build.yml | 12 ++++++------ .github/workflows/freebsd_scan_build.yml | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/freebsd_build.yml b/.github/workflows/freebsd_build.yml index 2eba0ad8..10e9d2fb 100644 --- a/.github/workflows/freebsd_build.yml +++ b/.github/workflows/freebsd_build.yml @@ -17,14 +17,14 @@ jobs: submodules: recursive - name: build in FreeBSD VM id: build - uses: vmactions/freebsd-vm@v0 + uses: cross-platform-actions/action@v0.21.0 with: - usesh: true - sync: sshfs - prepare: | - pkg install -y curl gmake cmake - + memory: 2048 + shell: sh + operating_system: freebsd + version: '13.2' run: | + sudo pkg install -y curl gmake cmake pwd ls -lah whoami diff --git a/.github/workflows/freebsd_scan_build.yml b/.github/workflows/freebsd_scan_build.yml index 45cb0614..adcd731b 100644 --- a/.github/workflows/freebsd_scan_build.yml +++ b/.github/workflows/freebsd_scan_build.yml @@ -17,13 +17,14 @@ jobs: submodules: recursive - name: clang scan build in FreeBSD VM id: clang-scan-build - uses: vmactions/freebsd-vm@v0 + uses: cross-platform-actions/action@v0.21.0 with: - usesh: true - prepare: | - pkg install -y curl gmake cmake devel/llvm llvm - + memory: 2048 + shell: sh + operating_system: freebsd + version: '13.2' run: | + sudo pkg install -y curl gmake cmake devel/llvm llvm pwd ls -lah whoami From a47b34b8f2d09484e76d058c1a732a6a1f0619a0 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Tue, 7 Nov 2023 21:20:37 +0100 Subject: [PATCH 4/9] fix race condition in isHWTMAL1Supported Change-Id: Iea0760d1edc95d5ccd66ee69ab83314229e6f458 --- src/cpucounters.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 0f669469..e0c526f6 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -3504,6 +3504,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter lastProgrammedCustomCounters.clear(); lastProgrammedCustomCounters.resize(num_cores); core_global_ctrl_value = 0ULL; + isHWTMAL1Supported(); // ínit value to prevent MT races std::vector > asyncCoreResults; std::vector programmingStatuses(num_cores, PCM::Success); From 83606004531a5fa73b049534c469d7efee1090e5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 8 Nov 2023 09:18:45 +0100 Subject: [PATCH 5/9] fix deadlock in processDVSEC addresses one of the issue in https://github.com/intel/pcm/discussions/590 Change-Id: I71b6fee77fb5eae2694f75ee28bfb88b37142b3e --- src/pci.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/pci.h b/src/pci.h index da2f15e1..ec44e99f 100644 --- a/src/pci.h +++ b/src/pci.h @@ -178,6 +178,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct auto probe = [&f](const uint32 group, const uint32 bus, const uint32 device, const uint32 function) { + // std::cerr << "Probing " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << std::dec << "\n"; uint32 value = 0; try { @@ -191,6 +192,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct } const uint32 vendor_id = value & 0xffff; const uint32 device_id = (value >> 16) & 0xffff; + // std::cerr << "Found dev " << std::hex << vendor_id << ":" << device_id << std::dec << "\n"; if (vendor_id != PCM_INTEL_PCI_VENDOR_ID) { return; @@ -256,12 +258,13 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) { forAllIntelDevices([&](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */) { + // std::cerr << "Intel device scan. found " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << std::dec; uint32 status{0}; PciHandleType h(group, bus, device, function); h.read32(6, &status); // read status if (status & 0x10) // has capability list { - // std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; + // std::cerr << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; VSEC header; uint64 offset = 0x100; do @@ -274,11 +277,11 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) { return; } - // std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; - // std::cout << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; - if (matchFunc(header)) // UNCORE_DISCOVERY_DVSEC_ID_PMON + // std::cerr << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; + // std::cerr << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; + if (matchFunc(header)) { - // std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; + // std::cerr << ".... found match\n"; auto barOffset = 0x10 + header.fields.tBIR * 4; uint32 bar = 0; if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar @@ -291,7 +294,12 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; } } + const uint64 lastOffset = offset; offset = header.fields.cap_next & ~3; + if (lastOffset == offset) // the offset did not change + { + return; // deadlock protection + } } while (1); } }); From 5c48cc951a4944c69adb0b83f65c7980c90bbac5 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Wed, 15 Nov 2023 15:29:07 +0100 Subject: [PATCH 6/9] add support of PP0 and PP1 energy metrics Change-Id: Ia7aaa42351da88f1e420b68ddab804fe48539db3 --- src/cpucounters.cpp | 16 ++++++++++++++++ src/cpucounters.h | 37 +++++++++++++++++++++++++++++++++++++ src/pcm.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ src/types.h | 2 ++ 4 files changed, 98 insertions(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index e0c526f6..bbe9ee7c 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1736,6 +1736,14 @@ void PCM::initEnergyMonitoring() std::make_shared( new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_DRAM_ENERGY_STATUS), 32, 10000)); } + + if (ppEnergyMetricsAvailable() && MSR.size() && num_sockets == 1 && pp_energy_status.empty()) + { + pp_energy_status.push_back(std::make_shared( + new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP0_ENERGY_STATUS), 32, 10000)); + pp_energy_status.push_back(std::make_shared( + new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP1_ENERGY_STATUS), 32, 10000)); + } } static const uint32 UBOX0_DEV_IDS[] = { @@ -6071,6 +6079,14 @@ void PCM::readAndAggregateEnergyCounters(const uint32 socket, CounterStateType & if (socket < (uint32)dram_energy_status.size()) result.DRAMEnergyStatus += dram_energy_status[socket]->read(); + + if (socket == 0) + { + for (size_t pp = 0; pp < pp_energy_status.size(); ++pp) + { + result.PPEnergyStatus[pp] += pp_energy_status[pp]->read(); + } + } } template diff --git a/src/cpucounters.h b/src/cpucounters.h index eba20082..50588f98 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -644,6 +644,7 @@ class PCM_API PCM double joulesPerEnergyUnit; std::vector > energy_status; std::vector > dram_energy_status; + std::vector > pp_energy_status; std::vector > cboPMUs; std::vector > mdfPMUs; std::vector>> cxlPMUs; // socket X CXL ports X UNIT {0,1} @@ -691,6 +692,7 @@ class PCM_API PCM bool linux_arch_perfmon = false; public: + enum { MAX_PP = 1 }; // max power plane number on Intel architecture (client) enum { MAX_C_STATE = 10 }; // max C-state on Intel architecture //! \brief Returns true if the specified core C-state residency metric is supported @@ -2426,6 +2428,11 @@ class PCM_API PCM ; } + bool ppEnergyMetricsAvailable() const + { + return packageEnergyMetricsAvailable() && hasClientMCCounters() && num_sockets == 1; + } + static double getBytesPerFlit(int32 cpu_model_) { if (hasUPI(cpu_model_)) @@ -2966,6 +2973,18 @@ uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType return after.PackageEnergyStatus - before.PackageEnergyStatus; } +/*! \brief Returns energy consumed by processor, excluding DRAM (measured in internal units) + \param powerPlane power plane ID + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +uint64 getConsumedEnergy(const int powerPlane, const CounterStateType& before, const CounterStateType& after) +{ + assert(powerPlane <= PCM::MAX_PP); + return after.PPEnergyStatus[powerPlane] - before.PPEnergyStatus[powerPlane]; +} + /*! \brief Returns energy consumed by DRAM (measured in internal units) \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3019,6 +3038,20 @@ double getConsumedJoules(const CounterStateType & before, const CounterStateType return double(getConsumedEnergy(before, after)) * m->getJoulesPerEnergyUnit(); } +/*! \brief Returns Joules consumed by processor (excluding DRAM) + \param powePlane power plane + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +double getConsumedJoules(const int powerPlane, const CounterStateType& before, const CounterStateType& after) +{ + PCM* m = PCM::getInstance(); + if (!m) return -1.; + + return double(getConsumedEnergy(powerPlane, before, after)) * m->getJoulesPerEnergyUnit(); +} + /*! \brief Returns Joules consumed by DRAM \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3081,6 +3114,8 @@ class UncoreCounterState template friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template + friend uint64 getConsumedEnergy(const int pp, const CounterStateType& before, const CounterStateType& after); + template friend uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template friend uint64 getUncoreClocks(const CounterStateType& before, const CounterStateType& after); @@ -3110,6 +3145,7 @@ class UncoreCounterState uint64 UncMCIARequests; uint64 UncMCIORequests; uint64 PackageEnergyStatus; + uint64 PPEnergyStatus[PCM::MAX_PP + 1]; uint64 DRAMEnergyStatus; uint64 TOROccupancyIAMiss; uint64 TORInsertsIAMiss; @@ -3137,6 +3173,7 @@ class UncoreCounterState UncClocks(0) { std::fill(CStateResidency, CStateResidency + PCM::MAX_C_STATE + 1, 0); + std::fill(PPEnergyStatus, PPEnergyStatus + PCM::MAX_PP + 1, 0); } virtual ~UncoreCounterState() { } diff --git a/src/pcm.cpp b/src/pcm.cpp index 80ad0248..b8880e8f 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -438,6 +438,11 @@ void print_output(PCM * m, cout << " GT |"; if (m->packageEnergyMetricsAvailable()) cout << " CPU energy |"; + if (m->ppEnergyMetricsAvailable()) + { + cout << " PP0 energy |"; + cout << " PP1 energy |"; + } if (m->dramEnergyMetricsAvailable()) cout << " DIMM energy |"; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -469,6 +474,12 @@ void print_output(PCM * m, cout << " "; cout << setw(6) << getConsumedJoules(sktstate1[i], sktstate2[i]); } + if (m->ppEnergyMetricsAvailable()) { + cout << " "; + cout << setw(6) << getConsumedJoules(0, sktstate1[i], sktstate2[i]); + cout << " "; + cout << setw(6) << getConsumedJoules(1, sktstate1[i], sktstate2[i]); + } if(m->dramEnergyMetricsAvailable()) { cout << " "; cout << setw(6) << getDRAMConsumedJoules(sktstate1[i], sktstate2[i]); @@ -500,6 +511,12 @@ void print_output(PCM * m, cout << " "; cout << setw(6) << getConsumedJoules(sstate1, sstate2); } + if (m->ppEnergyMetricsAvailable()) { + cout << " "; + cout << setw(6) << getConsumedJoules(0, sstate1, sstate2); + cout << " "; + cout << setw(6) << getConsumedJoules(1, sstate1, sstate2); + } if (m->dramEnergyMetricsAvailable()) { cout << " "; cout << setw(6) << getDRAMConsumedJoules(sstate1, sstate2); @@ -612,6 +629,8 @@ void print_csv_header(PCM * m, print_csv_header_helper("System Pack C-States"); if (m->packageEnergyMetricsAvailable()) print_csv_header_helper(header); + if (m->ppEnergyMetricsAvailable()) + print_csv_header_helper(header, 2); if (m->dramEnergyMetricsAvailable()) print_csv_header_helper(header); if (m->LLCReadMissLatencyMetricsAvailable()) @@ -692,6 +711,13 @@ void print_csv_header(PCM * m, header = "Proc Energy (Joules)"; print_csv_header_helper(header,m->getNumSockets()); } + if (m->ppEnergyMetricsAvailable()) + { + header = "Power Plane 0 Energy (Joules)"; + print_csv_header_helper(header, m->getNumSockets()); + header = "Power Plane 1 Energy (Joules)"; + print_csv_header_helper(header, m->getNumSockets()); + } if (m->dramEnergyMetricsAvailable()) { header = "DRAM Energy (Joules)"; @@ -772,6 +798,11 @@ void print_csv_header(PCM * m, if (m->packageEnergyMetricsAvailable()) cout << "Proc Energy (Joules),"; + if (m->ppEnergyMetricsAvailable()) + { + cout << "Power Plane 0 Energy (Joules),"; + cout << "Power Plane 1 Energy (Joules),"; + } if (m->dramEnergyMetricsAvailable()) cout << "DRAM Energy (Joules),"; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -848,6 +879,11 @@ void print_csv_header(PCM * m, for (uint32 i = 0; i < m->getNumSockets(); ++i) cout << "SKT" << i << ","; } + if (m->ppEnergyMetricsAvailable()) + { + for (uint32 i = 0; i < m->getNumSockets(); ++i) + cout << "SKT" << i << "," << "SKT" << i << ","; + } if (m->dramEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) @@ -998,6 +1034,8 @@ void print_csv(PCM * m, if (m->packageEnergyMetricsAvailable()) cout << getConsumedJoules(sstate1, sstate2) << ","; + if (m->ppEnergyMetricsAvailable()) + cout << getConsumedJoules(0, sstate1, sstate2) << "," << getConsumedJoules(1, sstate1, sstate2) << ","; if (m->dramEnergyMetricsAvailable()) cout << getDRAMConsumedJoules(sstate1, sstate2) << ","; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -1085,6 +1123,11 @@ void print_csv(PCM * m, for (uint32 i = 0; i < m->getNumSockets(); ++i) cout << getConsumedJoules(sktstate1[i], sktstate2[i]) << ","; } + if (m->ppEnergyMetricsAvailable()) + { + for (uint32 i = 0; i < m->getNumSockets(); ++i) + cout << getConsumedJoules(0, sktstate1[i], sktstate2[i]) << "," << getConsumedJoules(1, sktstate1[i], sktstate2[i]) << ","; + } if (m->dramEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) diff --git a/src/types.h b/src/types.h index c19921c9..ba70c223 100644 --- a/src/types.h +++ b/src/types.h @@ -1353,6 +1353,8 @@ struct ICX_IIOPMUCNTCTLRegister #define MSR_IA32_BIOS_SIGN_ID (0x8B) #define MSR_DRAM_ENERGY_STATUS (0x0619) +constexpr auto MSR_PP0_ENERGY_STATUS = 0x639; +constexpr auto MSR_PP1_ENERGY_STATUS = 0x641; #define MSR_PKG_C2_RESIDENCY (0x60D) #define MSR_PKG_C3_RESIDENCY (0x3F8) From 1f2ae400e0359c0707d65a3c811442c785c4a9d5 Mon Sep 17 00:00:00 2001 From: Roman Dementiev Date: Thu, 16 Nov 2023 11:28:17 +0100 Subject: [PATCH 7/9] initial code for 1f leaf topology detection --- src/cpucounters.cpp | 35 +++++++++++++++++++++++++++++++++++ src/topologyentry.h | 17 +++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index bbe9ee7c..ce3aa211 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1158,6 +1158,41 @@ bool PCM::discoverSystemTopology() } subleaf++; } while (1); + + struct domain + { + unsigned type, levelShift, nextLevelShift, width; + }; + std::vector topologyDomains; + if (max_cpuid >= 0x1F) + { + subleaf = 0; + do + { + pcm_cpuid(0x1F, subleaf, cpuid_args); + domain d; + d.type = extract_bits_ui(cpuid_args.reg.ecx, 8, 15); + if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID) + { + break; + } + d.nextLevelShift = extract_bits_ui(cpuid_args.reg.eax, 0, 4); + d.levelShift = topologyDomains.empty() ? 0 : topologyDomains.back().nextLevelShift; + d.width = d.nextLevelShift - d.levelShift; + topologyDomains.push_back(d); + ++subleaf; + } while (true); +#if 0 + for (size_t l = 0; l < topologyDomains.size(); ++l) + { + std::cerr << "Topology level " << l << + " type " << topologyDomains[l].type << + " width " << topologyDomains[l].width << + " levelShift " << topologyDomains[l].levelShift << + " nextLevelShift " << topologyDomains[l].nextLevelShift << "\n"; + } +#endif + } } if (wasThreadReported && wasCoreReported) diff --git a/src/topologyentry.h b/src/topologyentry.h index 1961070b..4b53045c 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -13,9 +13,22 @@ struct PCM_API TopologyEntry // describes a core int32 os_id; int32 thread_id; int32 core_id; - int32 tile_id; // tile is a constalation of 1 or more cores sharing salem L2 cache. Unique for entire system + int32 module_id; + int32 tile_id; // tile is a constalation of 1 or more cores sharing same L2 cache. Unique for entire system + int32 die_id; + int32 die_grp_id; int32 socket; int32 native_cpu_model = -1; + enum DomainTypeID + { + InvalidDomainTypeID = 0, + LogicalProcessorDomain = 1, + CoreDomain = 2, + ModuleDomain = 3, + TileDomain = 4, + DieDomain = 5, + DieGrpDomain = 6 + }; enum CoreType { Atom = 0x20, @@ -24,7 +37,7 @@ struct PCM_API TopologyEntry // describes a core }; CoreType core_type = Invalid; - TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), tile_id(-1), socket(-1) { } + TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), module_id(-1), tile_id(-1), die_id(-1), die_grp_id(-1), socket(-1) { } const char* getCoreTypeStr() { switch (core_type) From 495e63444bd54da5686632501c34a35cd0904250 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Thu, 16 Nov 2023 16:06:06 +0100 Subject: [PATCH 8/9] catch exceptions in msr.sys address https://github.com/intel/pcm/issues/566 Change-Id: I1659178575c492f9a3b8802c5649b2b29632636f --- src/WinMSRDriver/msrmain.c | 51 +++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/src/WinMSRDriver/msrmain.c b/src/WinMSRDriver/msrmain.c index 23919b02..183d789b 100644 --- a/src/WinMSRDriver/msrmain.c +++ b/src/WinMSRDriver/msrmain.c @@ -182,7 +182,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) new_affinity.Group = ProcNumber.Group; new_affinity.Mask = 1ULL << (ProcNumber.Number); KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity); - __writemsr(input_msr_req->msr_address, input_msr_req->write_value); + __try + { + __writemsr(input_msr_req->msr_address, input_msr_req->write_value); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_WRITE core 0x%X msr 0x%llX value 0x%llX\n", + status, input_msr_req->core_id, input_msr_req->msr_address, input_msr_req->write_value); + } KeRevertToUserGroupAffinityThread(&old_affinity); Irp->IoStatus.Information = 0; // result size break; @@ -198,7 +207,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) new_affinity.Group = ProcNumber.Group; new_affinity.Mask = 1ULL << (ProcNumber.Number); KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity); - *output = __readmsr(input_msr_req->msr_address); + __try + { + *output = __readmsr(input_msr_req->msr_address); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_READ core 0x%X msr 0x%llX\n", + status, input_msr_req->core_id, input_msr_req->msr_address); + } KeRevertToUserGroupAffinityThread(&old_affinity); Irp->IoStatus.Information = sizeof(ULONG64); // result size break; @@ -258,8 +276,19 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) slot.u.bits.FunctionNumber = input_pcicfg_req->func; #pragma warning(push) #pragma warning(disable: 4996) - size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, - &(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes); + __try + { + size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, + &(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + size = 0; + DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_WRITE b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X value 0x%llX\n", + status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes, + input_pcicfg_req->write_value); + } #pragma warning(pop) if (size != input_pcicfg_req->bytes) { @@ -279,8 +308,18 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) slot.u.bits.FunctionNumber = input_pcicfg_req->func; #pragma warning(push) #pragma warning(disable: 4996) - size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, - output, input_pcicfg_req->reg, input_pcicfg_req->bytes); + __try + { + size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, + output, input_pcicfg_req->reg, input_pcicfg_req->bytes); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + size = 0; + DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_READ b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X\n", + status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes); + } #pragma warning(pop) if (size != input_pcicfg_req->bytes) { From c6b5403ba2c6c42a00ec64942b283c42e3c7c339 Mon Sep 17 00:00:00 2001 From: "Dementiev, Roman" Date: Fri, 17 Nov 2023 12:14:53 +0100 Subject: [PATCH 9/9] address clang scan warning Change-Id: Ie810caa4c6bb6580096629b92ac8287326a46fbe --- src/cpucounters.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index ce3aa211..861f1f4f 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1466,6 +1466,7 @@ bool PCM::discoverSystemTopology() MSR.push_back(std::make_shared(i)); } + assert(num_cores > 0); TopologyEntry entries[num_cores]; if (MSR[0]->buildTopology(num_cores, entries) != 0) { std::cerr << "Unable to build CPU topology" << std::endl;