Skip to content

Commit

Permalink
fix cha offset, remove read/write difference
Browse files Browse the repository at this point in the history
  • Loading branch information
victoryang00 committed Aug 24, 2023
1 parent e15511b commit 4f81d2b
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ $ sudo apt install llvm-dev clang libbpf-dev libclang-dev libcxxopts-dev libfmt-
```
## User input
```bash
LOGV=1 ./CXL-MEM-Simulator -t ./microbench/many_calloc -i 5 -c 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
LOGV=1 ./CXL-MEM-Simulator -t ./microbench/many_calloc -i 5 -c 0,1,2,3,4,5,6,7
```
1. -t Target: The path to the executable
2. -i Interval: The epoch of the simulator, the parameter is in milisecond
Expand Down
18 changes: 3 additions & 15 deletions include/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,7 @@ struct CPUElem {
uint64_t cpu_l2stall_t;
uint64_t cpu_llcl_hits;
uint64_t cpu_llcl_miss;
uint64_t cpu_bandwidth_read;
uint64_t cpu_bandwidth_write;
};

struct CPUTargetElem {
uint64_t all_dram_rds;
uint64_t cpu_l2stall_t;
uint64_t cpu_llcl_hits;
uint64_t cpu_llcl_miss;
uint64_t cpu_cxl_traffic_read;
uint64_t cpu_cxl_traffic_write;
uint64_t cpu_bandwidth;
};

struct PEBSElem {
Expand Down Expand Up @@ -113,10 +103,8 @@ struct PerfConfig {
uint64_t cpu_l2stall_config;
uint64_t cpu_llcl_hits_config;
uint64_t cpu_llcl_miss_config;
uint64_t cpu_bandwidth_read_config;
uint64_t cpu_bandwidth_write_config;
std::optional<uint64_t> cpu_cxl_traffic_read_config; // use this to initialize the cxl traffic
std::optional<uint64_t> cpu_cxl_traffic_write_config; // use this to initialize the cxl traffic
uint64_t cpu_bandwidth_config;
std::optional<uint64_t> cpu_cxl_traffic_config; // use this to initialize the cxl traffic
};

struct ModelContext {
Expand Down
3 changes: 2 additions & 1 deletion include/incore.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "helper.h"
#include "perf.h"
#include <sys/types.h>

class CXLController;
union CPUID_INFO {
int array[4];
Expand All @@ -16,7 +17,7 @@ union CPUID_INFO {
/** This is a per cha metrics*/
class Incore {
public:
PerfInfo *perf[5];
PerfInfo *perf[4]; // should only be 4 counters
struct PerfConfig *perf_config;
Incore(pid_t pid, int cpu, struct PerfConfig *perf_config);
~Incore() = default;
Expand Down
2 changes: 1 addition & 1 deletion include/uncore.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Uncore {
public:
uint32_t unc_idx;
PerfInfo *perf;
Uncore(const uint32_t unc_idx, PerfConfig *perf_config);
Uncore(uint32_t unc_idx, PerfConfig *perf_config);
~Uncore() = default;

int read_cha_elems(struct CHAElem *elem);
Expand Down
6 changes: 3 additions & 3 deletions src/helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ const struct ModelContext model_ctx[] = {{CPU_MDL_BDX,
{"/sys/bus/event_source/devices/uncore_cha_%u/type",
/*
* cha_config:
* UNC_C_LLC_VICTIMS
* UNC_CHA_LLC_VICTIMS
* umask=0x21,event=37
*/
0x2137,
Expand Down Expand Up @@ -96,7 +96,7 @@ const struct ModelContext model_ctx[] = {{CPU_MDL_BDX,
{"/sys/bus/event_source/devices/uncore_cha_%u/type",
/*
* cha_config:
* UNC_C_LLC_VICTIMS => OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD
* UNC_CHA_LLC_VICTIMS
* umask=0x10,event=b0
*/
0x10b0,
Expand Down Expand Up @@ -143,7 +143,7 @@ const struct ModelContext model_ctx[] = {{CPU_MDL_BDX,
* UNC_C_LLC_VICTIMS => OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD
* umask=0x21,event=10
*/
0x2110,
0x2110, // no use
/*
* all_dram_rds_config:
* OCR.ALL_READS.L3_MISS.SNOOP_NONE => OCR.DEMAND_DATA_RD.L3_MISS
Expand Down
70 changes: 31 additions & 39 deletions src/incore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,57 @@ int Incore::stop() {
}
return r;
}
void Incore::init_cpu_mem_read(const pid_t pid, const int cpu) {
this->perf[0] = init_incore_perf(pid, cpu, perf_config->cpu_bandwidth_read_config, 0);
}
void Incore::init_cpu_l2stall(const pid_t pid, const int cpu) {
this->perf[1] = init_incore_perf(pid, cpu, perf_config->cpu_l2stall_config, 0);
this->perf[0] = init_incore_perf(pid, cpu, perf_config->cpu_l2stall_config, 0);
}
void Incore::init_cpu_llcl_hits(const pid_t pid, const int cpu) {
this->perf[2] = init_incore_perf(pid, cpu, perf_config->cpu_llcl_hits_config, 0);
this->perf[1] = init_incore_perf(pid, cpu, perf_config->cpu_llcl_hits_config, 0);
}
void Incore::init_cpu_llcl_miss(const pid_t pid, const int cpu) {
this->perf[3] = init_incore_perf(pid, cpu, perf_config->cpu_llcl_miss_config, 0);
this->perf[2] = init_incore_perf(pid, cpu, perf_config->cpu_llcl_miss_config, 0);
}
void Incore::init_cpu_mem_read(const pid_t pid, const int cpu) {
this->perf[3] = init_incore_perf(pid, cpu, perf_config->cpu_bandwidth_read_config, 0);
}
void Incore::init_cpu_mem_write(const pid_t pid, const int cpu) {
this->perf[4] = init_incore_perf(pid, cpu, perf_config->cpu_bandwidth_write_config, 0);
}
int Incore::read_cpu_elems(struct CPUElem *elem) {
ssize_t r;

r = this->perf[0]->read_pmu(&elem->cpu_bandwidth_read);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_bandwidth_read failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_bandwidth_read:{}\n", elem->cpu_bandwidth_read);

r = this->perf[1]->read_pmu(&elem->cpu_l2stall_t);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_l2stall_t failled.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_l2stall_t:{}\n", elem->cpu_l2stall_t);

r = this->perf[2]->read_pmu(&elem->cpu_llcl_hits);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_llcl_hits failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_llcl_hits:{}\n", elem->cpu_llcl_hits);
r = this->perf[0]->read_pmu(&elem->cpu_l2stall_t);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_l2stall_t failled.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_l2stall_t:{}\n", elem->cpu_l2stall_t);

r = this->perf[3]->read_pmu(&elem->cpu_llcl_miss);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_llcl_miss failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_llcl_miss:{}\n", elem->cpu_llcl_miss);
r = this->perf[1]->read_pmu(&elem->cpu_llcl_hits);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_llcl_hits failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_llcl_hits:{}\n", elem->cpu_llcl_hits);

r = this->perf[4]->read_pmu(&elem->cpu_bandwidth_write);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_bandwidth_write failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_bandwidth_write:{}\n", elem->cpu_bandwidth_write);
r = this->perf[2]->read_pmu(&elem->cpu_llcl_miss);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_llcl_miss failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_llcl_miss:{}\n", elem->cpu_llcl_miss);
r = this->perf[3]->read_pmu(&elem->cpu_bandwidth);
if (r < 0) {
LOG(ERROR) << fmt::format("read cpu_bandwidth failed.\n");
return r;
}
LOG(DEBUG) << fmt::format("read cpu_bandwidth:{}\n", elem->cpu_bandwidth);
}
Incore::Incore(const pid_t pid, const int cpu, struct PerfConfig *perf_config) : perf_config(perf_config) {
/* reset all pmc values */
this->init_cpu_mem_read(pid, cpu);
this->init_cpu_l2stall(pid, cpu);
this->init_cpu_llcl_hits(pid, cpu);
this->init_cpu_llcl_miss(pid, cpu);
this->init_cpu_mem_read(pid, cpu);
this->init_cpu_mem_write(pid, cpu);
}
bool get_cpu_info(struct CPUInfo *cpu_info) {
Expand Down
6 changes: 4 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@ int main(int argc, char *argv[]) {
cxxopts::value<std::string>()->default_value("./microbench/many_calloc"))(
"h,help", "The value for epoch value", cxxopts::value<bool>()->default_value("false"))(
"i,interval", "The value for epoch value", cxxopts::value<int>()->default_value("5"))(
"s,source", "Collection Phase or Validation Phase", cxxopts::value<bool>()->default_value("false"))(
"c,cpuset", "The CPUSET for CPU to set affinity on and only run the target process on those CPUs",
cxxopts::value<std::vector<int>>()->default_value("0,1,2,3,4,5,6,7"))(
"d,dramlatency", "The current platform's dram latency", cxxopts::value<double>()->default_value("110"))(
"p,pebsperiod", "The pebs sample period", cxxopts::value<int>()->default_value("1"))(
"m,mode", "Page mode or cacheline mode", cxxopts::value<std::string>()->default_value("p"))(
"o,topology", "The newick tree input for the CXL memory expander topology",
cxxopts::value<std::string>()->default_value("(1,(2,3))"))(
"s,capacity", "The capacity vector of the CXL memory expander with the firsgt local",
"e,capacity", "The capacity vector of the CXL memory expander with the firsgt local",
cxxopts::value<std::vector<int>>()->default_value("0,20,20,20"))(
"f,frequency", "The frequency for the running thread", cxxopts::value<double>()->default_value("4000"))(
"l,latency", "The simulated latency by epoch based calculation for injected latency",
Expand All @@ -70,6 +71,7 @@ int main(int argc, char *argv[]) {
auto capacity = result["capacity"].as<std::vector<int>>();
auto dramlatency = result["dramlatency"].as<double>();
auto mode = result["mode"].as<std::string>() == "p";
auto source = result["source"].as<bool>();
Helper helper{};
InterleavePolicy *policy = new InterleavePolicy();
CXLController *controller;
Expand Down Expand Up @@ -273,7 +275,7 @@ int main(int argc, char *argv[]) {
uint64_t target_l2stall = 0, target_llcmiss = 0, target_llchits = 0;
for (int j = 0; j < ncpu; ++j) {
pmu.cpus[j].read_cpu_elems(&mon.after->cpus[j]);
read_config += mon.after->cpus[j].cpu_bandwidth_read - mon.before->cpus[j].cpu_bandwidth_read;
read_config += mon.after->cpus[j].cpu_bandwidth - mon.before->cpus[j].cpu_bandwidth;
}
/* read PEBS sample */
if (mon.pebs_ctx->read(controller, &mon.after->pebs) < 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/uncore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Uncore::Uncore(const uint32_t unc_idx, PerfConfig *perf_config) {
auto attr = perf_event_attr{
.type = (uint32_t)value,
.size = sizeof(struct perf_event_attr),
.config = perf_config->cha_config,
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1,
.inherit = 1,
.enable_on_exec = 1,
Expand Down

0 comments on commit 4f81d2b

Please sign in to comment.