Skip to content

Commit

Permalink
Merge branch 'master' into internal_update
Browse files Browse the repository at this point in the history
  • Loading branch information
hyesoon authored Aug 14, 2023
2 parents 6960742 + 96a7da4 commit 120bcc5
Show file tree
Hide file tree
Showing 41 changed files with 328 additions and 246 deletions.
30 changes: 0 additions & 30 deletions .travis.yml

This file was deleted.

4 changes: 2 additions & 2 deletions INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

=== Requirement ===

SCons
SCons and dependencies

* How to get SCons
Ubuntu --> apt-get install scons
Ubuntu --> apt-get install scons python-metaconfig


=== Build steps ===
Expand Down
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
[![Build Status](https://travis-ci.org/gthparch/macsim.svg?branch=master)](https://travis-ci.org/gthparch/macsim)

# Macsim
## Introduction

Expand All @@ -16,7 +14,7 @@
cores) and SMT or MT architectures as well.
* Currently interconnection network model (based on IRIS) and power model (based
on McPAT) are connected.
* MacSim is also one of the components of SST, so muiltiple MacSim simulatore
* MacSim is also one of the components of SST, so multiple MacSim simulatore
can run concurrently.
* The project has been supported by Intel, NSF, Sandia National Lab.

Expand Down
4 changes: 2 additions & 2 deletions SConscript
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3

#########################################################################################
# Author : Jaekyu Lee (jq.lee17@gmail.com)
Expand Down Expand Up @@ -38,7 +38,7 @@ warn_flags = ' '.join(warn_flags)
env = Environment()
custom_vars = set(['AS', 'AR', 'CC', 'CXX', 'HOME', 'LD_LIBRARY_PATH', 'PATH', 'RANLIB'])

for key,val in os.environ.iteritems():
for key,val in os.environ.items():
if key in custom_vars:
env[key] = val

Expand Down
26 changes: 13 additions & 13 deletions SConstruct
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3

#########################################################################################
# Author : Jaekyu Lee (jq.lee17@gmail.com)
Expand All @@ -8,7 +8,7 @@

import os
import sys
import ConfigParser
import configparser


## Check c++14 support
Expand Down Expand Up @@ -48,7 +48,7 @@ def pre_compile_check():
env = Environment()
custom_vars = set(['AS', 'AR', 'CC', 'CXX', 'HOME', 'LD_LIBRARY_PATH', 'PATH', 'RANLIB'])

for key,val in os.environ.iteritems():
for key,val in os.environ.items():
if key in custom_vars:
env[key] = val

Expand All @@ -68,17 +68,17 @@ flags = {}


## Configuration from file
Config = ConfigParser.ConfigParser()
Config = configparser.ConfigParser()
Config.read('macsim.config')
flags['dram'] = Config.get('Library', 'dram', '0')
flags['power'] = Config.get('Library', 'power', '0')
flags['iris'] = Config.get('Library', 'iris', '0')
flags['qsim'] = Config.get('Library', 'qsim', '0')
flags['debug'] = Config.get('Build', 'debug', '0')
flags['gprof'] = Config.get('Build', 'gprof', '0')
flags['pin_3_13_trace'] = Config.get('Build', 'pin_3_13_trace', '0')
flags['val'] = Config.get('Build_Extra', 'val', '0')
flags['ramulator'] = Config.get('Library', 'ramulator', '0')
flags['dram'] = Config.get('Library', 'dram', fallback='0')
flags['power'] = Config.get('Library', 'power', fallback='0')
flags['iris'] = Config.get('Library', 'iris', fallback='0')
flags['qsim'] = Config.get('Library', 'qsim', fallback='0')
flags['debug'] = Config.get('Build', 'debug', fallback='0')
flags['gprof'] = Config.get('Build', 'gprof', fallback='0')
flags['pin_3_13_trace'] = Config.get('Build', 'pin_3_13_trace', fallback='0')
flags['val'] = Config.get('Build_Extra', 'val', fallback='0')
flags['ramulator'] = Config.get('Library', 'ramulator', fallback='0')

## Configuration from commandline
flags['debug'] = ARGUMENTS.get('debug', flags['debug'])
Expand Down
2 changes: 1 addition & 1 deletion build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3

#########################################################################################
# Author : Jaekyu Lee (jq.lee17@gmail.com)
Expand Down
2 changes: 1 addition & 1 deletion def/general.stat.def
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ DEF_STAT(EXE_TIME, COUNT, NO_RATIO)
DEF_STAT(NUM_REPEAT, COUNT, NO_RATIO)

DEF_STAT(CYC_COUNT_X86, COUNT, NO_RATIO)
DEF_STAT(CYC_COUNT_PTX, COUNT, NO_RATIO)
DEF_STAT(CYC_COUNT_ACC, COUNT, NO_RATIO)

DEF_STAT(AVG_BLOCK_EXE_CYCLE, COUNT, NO_RATIO)
DEF_STAT(AVG_BLOCK_EXE_CYCLE_BASE, COUNT, NO_RATIO)
Expand Down
2 changes: 1 addition & 1 deletion doc/latex/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ all:
cp macsim.pdf ../;

clean:
$(RM) *.log *.aux *.blg *.bbl *.dvi *.brf macsim.ps macsim.pdf
$(RM) *.out *.log *.aux *.blg *.bbl *.toc *.styles *.dvi *.brf macsim.ps macsim.pdf
93 changes: 66 additions & 27 deletions doc/latex/trace.tex
Original file line number Diff line number Diff line change
Expand Up @@ -381,42 +381,81 @@ \subsection{trace\_xx.raw}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

The trace\_xx.raw file is generated for each thread/warp and contains the
dynamic instruction trace for the thread/warp in binary
format. The structure/format for encoding instructions is the same in
both x86 and PTX traces and looks like as follows (in order):
dynamic instruction trace for the thread/warp in binary format. The
structure/format for encoding instructions different for x86 and for PTX. In the
source, this is defined in \Verb+src/trace_read.h+.

For x86, the trace entry format is (\Verb+struct trace_info_cpu_s+):

%trace format for an instruction in trace_xx.raw
\vspace{0.2in}
\begin{footnotesize}
\begin{tabular}{l c c l l}
Type & Size (Bytes) & Offset(Bytes) & Field & Description \\ \hline \hline
\Verb+uint8_t+ & 1 & 0 & \Verb+m_num_read_regs+ & number of source registers \\
\Verb+uint8_t+ & 1 & 1 & \Verb+m_num_dest_regs+ & number of destination registers \\
\Verb+uint8_t+ & 9 & 2 & \Verb+m_src[MAX_SRC_NUM]+ & source register IDs \\
\Verb+uint8_t+ & 6 & 11 & \Verb+m_dst[MAX_DST_NUM]+ & destination register IDs \\
\Verb+uint8_t+ & 1 & 17 & \Verb+m_cf_type+ & branch type \\
\Verb+bool+ & 1 & 18 & \Verb+m_has_immediate+ & indicates whether this instruction has immediate field \\
\Verb+uint8_t+ & 1 & 19 & \Verb+m_opcode+ & opcode \\
\Verb+bool+ & 1 & 20 & \Verb+m_has_st+ & indicates whether this instruction has store operation \\
\Verb+bool+ & 1 & 21 & \Verb+m_is_fp+ & indicates whether this instruction is a FP operation \\
\Verb+bool+ & 1 & 22 & \Verb+m_write_flg+ & write flag \\
\Verb+uint8_t+ & 1 & 23 & \Verb+m_num_ld+ & number of load operations \\
\Verb+uint8_t+ & 1 & 24 & \Verb+m_size+ & instruction size \\
\Verb+uint32_t+ & 4 & 28 & \Verb+m_ld_vaddr1+ & load address 1 \\
\Verb+uint32_t+ & 4 & 32 & \Verb+m_ld_vaddr2+ & load address 2 \\
\Verb+uint32_t+ & 4 & 36 & \Verb+m_st_vaddr+ & store address \\
\Verb+uint32_t+ & 4 & 40 & \Verb+m_instruction_addr+ & PC address \\
\Verb+uint32_t+ & 4 & 44 & \Verb+m_branch_target+ & branch target address \\
\Verb+uint8_t+ & 1 & 48 & \Verb+m_mem_read_size+ & memory read size \\
\Verb+uint8_t+ & 1 & 49 & \Verb+m_mem_write_size+ & memory write size \\
\Verb+bool+ & 1 & 50 & \Verb+m_rep_dir+ & repetition direction \\
\Verb+bool+ & 1 & 51 & \Verb+m_actually_taken+ & indicates whether branch is actually taken \\
\end{tabular}
\end{footnotesize}
\vspace{0.2in}

For PTX, the trace entry format is (\Verb+struct trace_info_gpu_small_s+):

\vspace{0.2in}
\begin{footnotesize}
\begin{tabular}{l c l l}
Type & Size (Bytes) & Field & Description \\ \hline \hline
\Verb+uint8_t+ & 1 & \Verb+m_num_read_regs+ & number of source registers \\
\Verb+uint8_t+ & 1 & \Verb+m_num_dest_regs+ & number of destination registers \\
\Verb+uint8_t+ & 9 & \Verb+m_src[MAX_SRC_NUM]+ & source register IDs \\
\Verb+uint8_t+ & 6 & \Verb+m_dst[MAX_DST_NUM]+ & destination register IDs \\
\Verb+uint8_t+ & 1 & \Verb+m_cf_type+ & branch type \\
\Verb+bool+ & 1 & \Verb+m_has_immediate+ & indicates whether this instruction has immediate field \\
\Verb+uint8_t+ & 1 & \Verb+m_opcode+ & opcode \\
\Verb+bool+ & 1 & \Verb+m_has_st+ & indicates whether this instruction has store operation \\
\Verb+bool+ & 1 & \Verb+m_is_fp+ & indicates whether this instruction is a FP operation \\
\Verb+bool+ & 1 & \Verb+m_write_flg+ & write flag \\
\Verb+uint8_t+ & 1 & \Verb+m_num_ld+ & number of load operations \\
\Verb+uint8_t+ & 1 & \Verb+m_size+ & instruction size \\
\Verb+uint32_t+ & 4 & \Verb+m_ld_vaddr1+ & load address 1 \\
\Verb+uint32_t+ & 4 & \Verb+m_ld_vaddr2+ & load address 2 \\
\Verb+uint32_t+ & 4 & \Verb+m_st_vaddr+ & store address \\
\Verb+uint32_t+ & 4 & \Verb+m_instruction_addr+ & PC address \\
\Verb+uint32_t+ & 4 & \Verb+m_branch_target+ & branch target address \\
\Verb+uint8_t+ & 1 & \Verb+m_mem_read_size+ & memory read size \\
\Verb+uint8_t+ & 1 & \Verb+m_mem_write_size+ & memory write size \\
\Verb+bool+ & 1 & \Verb+m_rep_dir+ & repetition direction \\
\Verb+bool+ & 1 & \Verb+m_actually_taken+ & indicates whether branch is actually taken \\
\begin{tabular}{l c c l l}
Type & Size (Bytes) & Offset(Bytes) & Field & Description \\ \hline \hline
\Verb+uint8_t+ & 1 & 0 & \Verb+m_opcode+ & from \Verb+GPU_OPCODE_ENUM+ \\
\Verb+bool+ & 1 & 1 & \Verb+m_is_fp+ & whether this instruction deals with \Verb+float+s \\
\Verb+bool+ & 1 & 2 & \Verb+m_is_load+ & whether this instruction loads from memory \\
\Verb+uint8_t+ & 1 & 3 & \Verb+m_cf_type+ & branch type \\
\Verb+uint8_t+ & 1 & 4 & \Verb+m_num_read_regs+ & number of source registers \\
\Verb+uint8_t+ & 1 & 5 & \Verb+m_num_dest_regs+ & number of destination registers \\
\Verb+uint16_t+ & 10 & 6 & \Verb+m_src[MAX_GPU_SRC_NUM]+ & source register IDs \\
\Verb+uint16_t+ & 8 & 16 & \Verb+m_dst[MAX_GPU_DST_NUM]+ & destination register IDs \\
\Verb+uint8_t+ & 1 & 24 & \Verb+m_size+ & instruction size \\
\Verb+uint32_t+ & 4 & 28 & \Verb+m_active_mask+ & warp's non-blocked threads \\
\Verb+uint32_t+ & 4 & 32 & \Verb+m_br_taken_mask+ & warp's threads that took branch \\
\Verb+uint64_t+ & 8 & 40 & \Verb+m_inst_addr+ & address of current instruction \\
\Verb+uint64_t+ & 8 & 48 & \Verb+m_br_target_addr+ & address to branch to \\
\Verb+uint64_t+ & 8 & 56 & \Verb+m_reconv_inst_addr+ & address of branch reconvergence \\
\Verb+uint64_t+ & 8 & 56 & \Verb+m_mem_addr+ & memory address to load/store to \\
\Verb+uint8_t+ & 1 & 64 & \Verb+m_barrier_id+ & \\
\Verb+uint8_t+ & 1 & 64 & \Verb+m_mem_access_size+ & memory access granularity in bytes \\
\Verb+uint16_t+ & 2 & 66 & \Verb+m_num_barrier_threads+ & \\
\Verb+uint8_t+ & 1 & 68 & \Verb+m_addr_space+ & memory region to work on \\
\Verb+uint8_t+ & 1 & 68 & \Verb+m_level+ & \\
\Verb+uint8_t+ & 1 & 69 & \Verb+m_cache_level+ & \\
\Verb+uint8_t+ & 1 & 69 & \Verb+m_cache_operator+ & \\
\end{tabular}
\end{footnotesize}
\vspace{0.2in}

Additionally, if a PTX trace entry is a load or a store, it will be followed by
32 8-byte (aligned) addresses, each corresponding to the address used by one of
the warps. Notably, he code for handling this was built on top of the existing
simulator. As a result, the output generated by \Verb+debug_trace_read+ or
\Verb+debug_print_trace+ will contain one extra instruction after the memory
operation, erroneously decoded from the memory address data. Nonetheless, the
simulation results are correct --- only the debug output is affected.


Note that the raw trace is compressed with zlib to reduce the sizes of
the generated trace files, and the size of each field is the size
Expand Down
Binary file modified doc/macsim.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion internal
19 changes: 14 additions & 5 deletions macsimComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,16 @@ macsimComponent::macsimComponent(ComponentId_t id, Params& params)
m_clock_freq,
new Clock::Handler<macsimComponent>(this, &macsimComponent::ticReceived));

m_ptx_core = params.find<bool>("ptx_core", 0);
if (params.find<bool>("ptx_core", 0)) {
m_acc_type = PTX_CORE;
m_acc_core = 1;
} else if (params.find<bool>("igpu_core", 0)) {
m_acc_type = IGPU_CORE;
m_acc_core = 1;
} else {
m_acc_core = 0;
m_acc_type = NO_ACC;
}
m_num_link = params.find<uint32_t>("num_link", 1);
configureLinks(params, tc);

Expand Down Expand Up @@ -150,7 +159,7 @@ void macsimComponent::configureLinks(SST::Params& params, TimeConverter* tc) {
m_data_cache_requests.push_back(std::map<uint64_t, uint64_t>());
m_data_cache_responses.push_back(std::set<uint64_t>());

if (m_ptx_core) {
if (m_acc_core) {
auto ccache_link = loadUserSubComponent<Interfaces::SimpleMem>(
"core" + std::to_string(l) + "-ccache", ComponentInfo::SHARE_NONE, tc,
new Interfaces::SimpleMem::Handler<macsimComponent>(
Expand Down Expand Up @@ -194,7 +203,7 @@ void macsimComponent::configureLinks(SST::Params& params, TimeConverter* tc) {
m_data_cache_request_counters = std::vector<uint64_t>(m_num_link, 0);
m_data_cache_response_counters = std::vector<uint64_t>(m_num_link, 0);

if (m_ptx_core) {
if (m_acc_core) {
m_const_cache_request_counters = std::vector<uint64_t>(m_num_link, 0);
m_const_cache_response_counters = std::vector<uint64_t>(m_num_link, 0);
m_texture_cache_request_counters = std::vector<uint64_t>(m_num_link, 0);
Expand Down Expand Up @@ -275,7 +284,7 @@ void macsimComponent::setup() {
new Callback<macsimComponent, bool, int, uint64_t>(
this, &macsimComponent::strobeDataCacheRespQ);

if (m_ptx_core) {
if (m_acc_core) {
CallbackSendConstCacheRequest* scr =
new Callback<macsimComponent, void, int, uint64_t, uint64_t, int>(
this, &macsimComponent::sendConstCacheRequest);
Expand Down Expand Up @@ -347,7 +356,7 @@ bool macsimComponent::ticReceived(Cycle_t) {
// Debugging
if (m_cycle % 100000 == 0) {
for (unsigned int l = 0; l < m_num_link; ++l) {
if (m_ptx_core) {
if (m_acc_core) {
MSC_DEBUG(
"Core[%2d] I$: (%lu, %lu), D$: (%lu, %lu) C$: (%lu, %lu), T$: (%lu, "
"%lu)\n",
Expand Down
3 changes: 2 additions & 1 deletion macsimComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class macsimComponent : public SST::Component

macsim_c *m_macsim;
bool m_sim_running;
bool m_ptx_core;
bool m_acc_core;
ACC_Type m_acc_type;
bool m_cube_connected;
bool m_debug_all;
int64_t m_debug_addr;
Expand Down
Loading

0 comments on commit 120bcc5

Please sign in to comment.