diff --git a/src/WinMSRDriver/msrmain.c b/src/WinMSRDriver/msrmain.c index 23919b02..183d789b 100644 --- a/src/WinMSRDriver/msrmain.c +++ b/src/WinMSRDriver/msrmain.c @@ -182,7 +182,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) new_affinity.Group = ProcNumber.Group; new_affinity.Mask = 1ULL << (ProcNumber.Number); KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity); - __writemsr(input_msr_req->msr_address, input_msr_req->write_value); + __try + { + __writemsr(input_msr_req->msr_address, input_msr_req->write_value); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_WRITE core 0x%X msr 0x%llX value 0x%llX\n", + status, input_msr_req->core_id, input_msr_req->msr_address, input_msr_req->write_value); + } KeRevertToUserGroupAffinityThread(&old_affinity); Irp->IoStatus.Information = 0; // result size break; @@ -198,7 +207,16 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) new_affinity.Group = ProcNumber.Group; new_affinity.Mask = 1ULL << (ProcNumber.Number); KeSetSystemGroupAffinityThread(&new_affinity, &old_affinity); - *output = __readmsr(input_msr_req->msr_address); + __try + { + *output = __readmsr(input_msr_req->msr_address); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + DbgPrint("Error: exception with code 0x%X in IO_CTL_MSR_READ core 0x%X msr 0x%llX\n", + status, input_msr_req->core_id, input_msr_req->msr_address); + } KeRevertToUserGroupAffinityThread(&old_affinity); Irp->IoStatus.Information = sizeof(ULONG64); // result size break; @@ -258,8 +276,19 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) slot.u.bits.FunctionNumber = input_pcicfg_req->func; #pragma warning(push) #pragma warning(disable: 4996) - size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, - &(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes); + __try + { + size = HalSetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, + &(input_pcicfg_req->write_value), input_pcicfg_req->reg, input_pcicfg_req->bytes); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + size = 0; + DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_WRITE b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X value 0x%llX\n", + status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes, + input_pcicfg_req->write_value); + } #pragma warning(pop) if (size != input_pcicfg_req->bytes) { @@ -279,8 +308,18 @@ NTSTATUS deviceControl(PDEVICE_OBJECT DeviceObject, PIRP Irp) slot.u.bits.FunctionNumber = input_pcicfg_req->func; #pragma warning(push) #pragma warning(disable: 4996) - size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, - output, input_pcicfg_req->reg, input_pcicfg_req->bytes); + __try + { + size = HalGetBusDataByOffset(PCIConfiguration, input_pcicfg_req->bus, slot.u.AsULONG, + output, input_pcicfg_req->reg, input_pcicfg_req->bytes); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + status = GetExceptionCode(); + size = 0; + DbgPrint("Error: exception with code 0x%X in IO_CTL_PCICFG_READ b 0x%X d 0x%X f 0x%X reg 0x%X bytes 0x%X\n", + status, input_pcicfg_req->bus, input_pcicfg_req->dev, input_pcicfg_req->func, input_pcicfg_req->reg, input_pcicfg_req->bytes); + } #pragma warning(pop) if (size != input_pcicfg_req->bytes) { diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index e0c526f6..861f1f4f 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -1158,6 +1158,41 @@ bool PCM::discoverSystemTopology() } subleaf++; } while (1); + + struct domain + { + unsigned type, levelShift, nextLevelShift, width; + }; + std::vector topologyDomains; + if (max_cpuid >= 0x1F) + { + subleaf = 0; + do + { + pcm_cpuid(0x1F, subleaf, cpuid_args); + domain d; + d.type = extract_bits_ui(cpuid_args.reg.ecx, 8, 15); + if (d.type == TopologyEntry::DomainTypeID::InvalidDomainTypeID) + { + break; + } + d.nextLevelShift = extract_bits_ui(cpuid_args.reg.eax, 0, 4); + d.levelShift = topologyDomains.empty() ? 0 : topologyDomains.back().nextLevelShift; + d.width = d.nextLevelShift - d.levelShift; + topologyDomains.push_back(d); + ++subleaf; + } while (true); +#if 0 + for (size_t l = 0; l < topologyDomains.size(); ++l) + { + std::cerr << "Topology level " << l << + " type " << topologyDomains[l].type << + " width " << topologyDomains[l].width << + " levelShift " << topologyDomains[l].levelShift << + " nextLevelShift " << topologyDomains[l].nextLevelShift << "\n"; + } +#endif + } } if (wasThreadReported && wasCoreReported) @@ -1431,6 +1466,7 @@ bool PCM::discoverSystemTopology() MSR.push_back(std::make_shared(i)); } + assert(num_cores > 0); TopologyEntry entries[num_cores]; if (MSR[0]->buildTopology(num_cores, entries) != 0) { std::cerr << "Unable to build CPU topology" << std::endl; @@ -1736,6 +1772,14 @@ void PCM::initEnergyMonitoring() std::make_shared( new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[i]], MSR_DRAM_ENERGY_STATUS), 32, 10000)); } + + if (ppEnergyMetricsAvailable() && MSR.size() && num_sockets == 1 && pp_energy_status.empty()) + { + pp_energy_status.push_back(std::make_shared( + new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP0_ENERGY_STATUS), 32, 10000)); + pp_energy_status.push_back(std::make_shared( + new CounterWidthExtender::MsrHandleCounter(MSR[socketRefCore[0]], MSR_PP1_ENERGY_STATUS), 32, 10000)); + } } static const uint32 UBOX0_DEV_IDS[] = { @@ -6071,6 +6115,14 @@ void PCM::readAndAggregateEnergyCounters(const uint32 socket, CounterStateType & if (socket < (uint32)dram_energy_status.size()) result.DRAMEnergyStatus += dram_energy_status[socket]->read(); + + if (socket == 0) + { + for (size_t pp = 0; pp < pp_energy_status.size(); ++pp) + { + result.PPEnergyStatus[pp] += pp_energy_status[pp]->read(); + } + } } template diff --git a/src/cpucounters.h b/src/cpucounters.h index eba20082..50588f98 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -644,6 +644,7 @@ class PCM_API PCM double joulesPerEnergyUnit; std::vector > energy_status; std::vector > dram_energy_status; + std::vector > pp_energy_status; std::vector > cboPMUs; std::vector > mdfPMUs; std::vector>> cxlPMUs; // socket X CXL ports X UNIT {0,1} @@ -691,6 +692,7 @@ class PCM_API PCM bool linux_arch_perfmon = false; public: + enum { MAX_PP = 1 }; // max power plane number on Intel architecture (client) enum { MAX_C_STATE = 10 }; // max C-state on Intel architecture //! \brief Returns true if the specified core C-state residency metric is supported @@ -2426,6 +2428,11 @@ class PCM_API PCM ; } + bool ppEnergyMetricsAvailable() const + { + return packageEnergyMetricsAvailable() && hasClientMCCounters() && num_sockets == 1; + } + static double getBytesPerFlit(int32 cpu_model_) { if (hasUPI(cpu_model_)) @@ -2966,6 +2973,18 @@ uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType return after.PackageEnergyStatus - before.PackageEnergyStatus; } +/*! \brief Returns energy consumed by processor, excluding DRAM (measured in internal units) + \param powerPlane power plane ID + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +uint64 getConsumedEnergy(const int powerPlane, const CounterStateType& before, const CounterStateType& after) +{ + assert(powerPlane <= PCM::MAX_PP); + return after.PPEnergyStatus[powerPlane] - before.PPEnergyStatus[powerPlane]; +} + /*! \brief Returns energy consumed by DRAM (measured in internal units) \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3019,6 +3038,20 @@ double getConsumedJoules(const CounterStateType & before, const CounterStateType return double(getConsumedEnergy(before, after)) * m->getJoulesPerEnergyUnit(); } +/*! \brief Returns Joules consumed by processor (excluding DRAM) + \param powePlane power plane + \param before CPU counter state before the experiment + \param after CPU counter state after the experiment +*/ +template +double getConsumedJoules(const int powerPlane, const CounterStateType& before, const CounterStateType& after) +{ + PCM* m = PCM::getInstance(); + if (!m) return -1.; + + return double(getConsumedEnergy(powerPlane, before, after)) * m->getJoulesPerEnergyUnit(); +} + /*! \brief Returns Joules consumed by DRAM \param before CPU counter state before the experiment \param after CPU counter state after the experiment @@ -3081,6 +3114,8 @@ class UncoreCounterState template friend uint64 getConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template + friend uint64 getConsumedEnergy(const int pp, const CounterStateType& before, const CounterStateType& after); + template friend uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterStateType & after); template friend uint64 getUncoreClocks(const CounterStateType& before, const CounterStateType& after); @@ -3110,6 +3145,7 @@ class UncoreCounterState uint64 UncMCIARequests; uint64 UncMCIORequests; uint64 PackageEnergyStatus; + uint64 PPEnergyStatus[PCM::MAX_PP + 1]; uint64 DRAMEnergyStatus; uint64 TOROccupancyIAMiss; uint64 TORInsertsIAMiss; @@ -3137,6 +3173,7 @@ class UncoreCounterState UncClocks(0) { std::fill(CStateResidency, CStateResidency + PCM::MAX_C_STATE + 1, 0); + std::fill(PPEnergyStatus, PPEnergyStatus + PCM::MAX_PP + 1, 0); } virtual ~UncoreCounterState() { } diff --git a/src/pcm.cpp b/src/pcm.cpp index 80ad0248..b8880e8f 100644 --- a/src/pcm.cpp +++ b/src/pcm.cpp @@ -438,6 +438,11 @@ void print_output(PCM * m, cout << " GT |"; if (m->packageEnergyMetricsAvailable()) cout << " CPU energy |"; + if (m->ppEnergyMetricsAvailable()) + { + cout << " PP0 energy |"; + cout << " PP1 energy |"; + } if (m->dramEnergyMetricsAvailable()) cout << " DIMM energy |"; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -469,6 +474,12 @@ void print_output(PCM * m, cout << " "; cout << setw(6) << getConsumedJoules(sktstate1[i], sktstate2[i]); } + if (m->ppEnergyMetricsAvailable()) { + cout << " "; + cout << setw(6) << getConsumedJoules(0, sktstate1[i], sktstate2[i]); + cout << " "; + cout << setw(6) << getConsumedJoules(1, sktstate1[i], sktstate2[i]); + } if(m->dramEnergyMetricsAvailable()) { cout << " "; cout << setw(6) << getDRAMConsumedJoules(sktstate1[i], sktstate2[i]); @@ -500,6 +511,12 @@ void print_output(PCM * m, cout << " "; cout << setw(6) << getConsumedJoules(sstate1, sstate2); } + if (m->ppEnergyMetricsAvailable()) { + cout << " "; + cout << setw(6) << getConsumedJoules(0, sstate1, sstate2); + cout << " "; + cout << setw(6) << getConsumedJoules(1, sstate1, sstate2); + } if (m->dramEnergyMetricsAvailable()) { cout << " "; cout << setw(6) << getDRAMConsumedJoules(sstate1, sstate2); @@ -612,6 +629,8 @@ void print_csv_header(PCM * m, print_csv_header_helper("System Pack C-States"); if (m->packageEnergyMetricsAvailable()) print_csv_header_helper(header); + if (m->ppEnergyMetricsAvailable()) + print_csv_header_helper(header, 2); if (m->dramEnergyMetricsAvailable()) print_csv_header_helper(header); if (m->LLCReadMissLatencyMetricsAvailable()) @@ -692,6 +711,13 @@ void print_csv_header(PCM * m, header = "Proc Energy (Joules)"; print_csv_header_helper(header,m->getNumSockets()); } + if (m->ppEnergyMetricsAvailable()) + { + header = "Power Plane 0 Energy (Joules)"; + print_csv_header_helper(header, m->getNumSockets()); + header = "Power Plane 1 Energy (Joules)"; + print_csv_header_helper(header, m->getNumSockets()); + } if (m->dramEnergyMetricsAvailable()) { header = "DRAM Energy (Joules)"; @@ -772,6 +798,11 @@ void print_csv_header(PCM * m, if (m->packageEnergyMetricsAvailable()) cout << "Proc Energy (Joules),"; + if (m->ppEnergyMetricsAvailable()) + { + cout << "Power Plane 0 Energy (Joules),"; + cout << "Power Plane 1 Energy (Joules),"; + } if (m->dramEnergyMetricsAvailable()) cout << "DRAM Energy (Joules),"; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -848,6 +879,11 @@ void print_csv_header(PCM * m, for (uint32 i = 0; i < m->getNumSockets(); ++i) cout << "SKT" << i << ","; } + if (m->ppEnergyMetricsAvailable()) + { + for (uint32 i = 0; i < m->getNumSockets(); ++i) + cout << "SKT" << i << "," << "SKT" << i << ","; + } if (m->dramEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) @@ -998,6 +1034,8 @@ void print_csv(PCM * m, if (m->packageEnergyMetricsAvailable()) cout << getConsumedJoules(sstate1, sstate2) << ","; + if (m->ppEnergyMetricsAvailable()) + cout << getConsumedJoules(0, sstate1, sstate2) << "," << getConsumedJoules(1, sstate1, sstate2) << ","; if (m->dramEnergyMetricsAvailable()) cout << getDRAMConsumedJoules(sstate1, sstate2) << ","; if (m->LLCReadMissLatencyMetricsAvailable()) @@ -1085,6 +1123,11 @@ void print_csv(PCM * m, for (uint32 i = 0; i < m->getNumSockets(); ++i) cout << getConsumedJoules(sktstate1[i], sktstate2[i]) << ","; } + if (m->ppEnergyMetricsAvailable()) + { + for (uint32 i = 0; i < m->getNumSockets(); ++i) + cout << getConsumedJoules(0, sktstate1[i], sktstate2[i]) << "," << getConsumedJoules(1, sktstate1[i], sktstate2[i]) << ","; + } if (m->dramEnergyMetricsAvailable()) { for (uint32 i = 0; i < m->getNumSockets(); ++i) diff --git a/src/topologyentry.h b/src/topologyentry.h index 1961070b..4b53045c 100644 --- a/src/topologyentry.h +++ b/src/topologyentry.h @@ -13,9 +13,22 @@ struct PCM_API TopologyEntry // describes a core int32 os_id; int32 thread_id; int32 core_id; - int32 tile_id; // tile is a constalation of 1 or more cores sharing salem L2 cache. Unique for entire system + int32 module_id; + int32 tile_id; // tile is a constalation of 1 or more cores sharing same L2 cache. Unique for entire system + int32 die_id; + int32 die_grp_id; int32 socket; int32 native_cpu_model = -1; + enum DomainTypeID + { + InvalidDomainTypeID = 0, + LogicalProcessorDomain = 1, + CoreDomain = 2, + ModuleDomain = 3, + TileDomain = 4, + DieDomain = 5, + DieGrpDomain = 6 + }; enum CoreType { Atom = 0x20, @@ -24,7 +37,7 @@ struct PCM_API TopologyEntry // describes a core }; CoreType core_type = Invalid; - TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), tile_id(-1), socket(-1) { } + TopologyEntry() : os_id(-1), thread_id (-1), core_id(-1), module_id(-1), tile_id(-1), die_id(-1), die_grp_id(-1), socket(-1) { } const char* getCoreTypeStr() { switch (core_type) diff --git a/src/types.h b/src/types.h index c19921c9..ba70c223 100644 --- a/src/types.h +++ b/src/types.h @@ -1353,6 +1353,8 @@ struct ICX_IIOPMUCNTCTLRegister #define MSR_IA32_BIOS_SIGN_ID (0x8B) #define MSR_DRAM_ENERGY_STATUS (0x0619) +constexpr auto MSR_PP0_ENERGY_STATUS = 0x639; +constexpr auto MSR_PP1_ENERGY_STATUS = 0x641; #define MSR_PKG_C2_RESIDENCY (0x60D) #define MSR_PKG_C3_RESIDENCY (0x3F8)