From da50a5101192cb21529fee055f5e369eb336b2dc Mon Sep 17 00:00:00 2001 From: Josh Slycord Date: Thu, 1 Jul 2021 14:53:15 -0400 Subject: [PATCH] New branch to finish organizing and testing the new power model implementation. The power model will now use a SimObject interface named hw_interface as the central interface for all power/area/performance/statistics that are generated by gem5-SALAM. This commit has some structures in place but likely will undergo major changes for the next few revisions as all the current versions of the hardware model are combined and merged here. Signed-off-by: Josh Slycord --- src/hwacc/ComputeUnit.py | 6 +- src/hwacc/CycleCounts.py | 19 +- src/hwacc/HWInterface.py | 13 ++ src/hwacc/HWModel/SimObjects/CycleCounts.py | 72 ++++++ .../HWModel/SimObjects/FunctionalUnits.py | 25 ++ .../HWModel/SimObjects/LLVMInstruction.py | 70 ++++++ src/hwacc/HWModel/SimObjects/RuntimeParams.py | 7 + .../HWModel/SimObjects/SALAMStatistics.py | 6 + src/hwacc/HWModel/src/cacti_wrapper.cc | 123 ++++++++++ src/hwacc/HWModel/src/cacti_wrapper.hh | 1 + .../src/cycle_counts.cc} | 3 +- .../src/cycle_counts.hh} | 7 +- src/hwacc/HWModel/src/functional_unit.cc | 180 ++++++++++++++ src/hwacc/HWModel/src/functional_unit.hh | 59 +++++ src/hwacc/HWModel/src/hw_model.cc | 1 + src/hwacc/HWModel/src/hw_model.hh | 41 ++++ src/hwacc/HWModel/src/macros.hh | 1 + src/hwacc/LLVMInterface.py | 8 + src/hwacc/LLVMRead/src/Makefile | 17 -- src/hwacc/LLVMRead/src/cycle_counts.cc | 72 ++++++ src/hwacc/LLVMRead/src/cycle_counts.hh | 75 ++++++ src/hwacc/LLVMRead/src/debug_flags.hh | 38 ++- src/hwacc/LLVMRead/src/instruction.hh | 2 +- src/hwacc/LLVMRead/src/macros.hh | 220 ++---------------- src/hwacc/SConscript | 2 +- src/hwacc/comm_interface.cc | 1 - src/hwacc/compute_unit.cc | 2 +- src/hwacc/compute_unit.hh | 4 +- src/hwacc/hw_interface.cc | 0 src/hwacc/hw_interface.hh | 0 src/hwacc/llvm_interface.cc | 85 +++---- src/hwacc/llvm_interface.hh | 2 +- 32 files changed, 881 insertions(+), 281 deletions(-) create mode 100644 src/hwacc/HWInterface.py create mode 100644 src/hwacc/HWModel/SimObjects/CycleCounts.py create mode 100644 src/hwacc/HWModel/SimObjects/FunctionalUnits.py create mode 100644 src/hwacc/HWModel/SimObjects/LLVMInstruction.py create mode 100644 src/hwacc/HWModel/SimObjects/RuntimeParams.py create mode 100644 src/hwacc/HWModel/SimObjects/SALAMStatistics.py create mode 100644 src/hwacc/HWModel/src/cacti_wrapper.cc create mode 100644 src/hwacc/HWModel/src/cacti_wrapper.hh rename src/hwacc/{LLVMRead/src/cycle_count.cc => HWModel/src/cycle_counts.cc} (97%) rename src/hwacc/{LLVMRead/src/cycle_count.hh => HWModel/src/cycle_counts.hh} (94%) create mode 100644 src/hwacc/HWModel/src/functional_unit.cc create mode 100644 src/hwacc/HWModel/src/functional_unit.hh create mode 100644 src/hwacc/HWModel/src/hw_model.cc create mode 100644 src/hwacc/HWModel/src/hw_model.hh create mode 100644 src/hwacc/HWModel/src/macros.hh delete mode 100644 src/hwacc/LLVMRead/src/Makefile create mode 100644 src/hwacc/LLVMRead/src/cycle_counts.cc create mode 100644 src/hwacc/LLVMRead/src/cycle_counts.hh create mode 100644 src/hwacc/hw_interface.cc create mode 100644 src/hwacc/hw_interface.hh diff --git a/src/hwacc/ComputeUnit.py b/src/hwacc/ComputeUnit.py index 3c32d3c45..b4d263243 100644 --- a/src/hwacc/ComputeUnit.py +++ b/src/hwacc/ComputeUnit.py @@ -3,10 +3,14 @@ from m5.SimObject import SimObject from CommInterface import CommInterface from CycleCounts import CycleCounts +#from HWInterface import HWInterface class ComputeUnit(SimObject): type = 'ComputeUnit' cxx_header = "hwacc/compute_unit.hh" comm_int = Param.CommInterface(Parent.any, "Communication interface to connect to") - cycles = Param.CycleCounts(Parent.any, "Load cycle count config") + cycles = Param.CycleCounts(Parent.any, "Load instruction runtime cycles") + + #comm_int = Param.CommInterface(Parent.any, "Communication interface to connect to") + #hw_int = Param.HWInterface(Parent.any, "Hardware interface to connect to") \ No newline at end of file diff --git a/src/hwacc/CycleCounts.py b/src/hwacc/CycleCounts.py index 474230f24..da1796ca7 100644 --- a/src/hwacc/CycleCounts.py +++ b/src/hwacc/CycleCounts.py @@ -2,11 +2,15 @@ from m5.proxy import * from m5.SimObject import SimObject -#Cycle Counts +#Instruction runtime cycles class CycleCounts(SimObject): + # SimObject type type = "CycleCounts" - cxx_header = "hwacc/LLVMRead/src/cycle_count.hh" + + # gem5-SALAM attached header + cxx_header = "hwacc/LLVMRead/src/cycle_counts.hh" + # Instruction cycle count defaults counter = Param.UInt32(1, "Counter intructions runtime cycles.") gep = Param.UInt32(1, "GetElementPtr intructions runtime cycles.") phi = Param.UInt32(1, "Phi intructions runtime cycles.") @@ -28,6 +32,7 @@ class CycleCounts(SimObject): fptoui = Param.UInt32(1, "Floating point to unsigned integer intructions runtime cycles.") fptosi = Param.UInt32(1, "Floating point to signed integer intructions runtime cycles.") uitofp = Param.UInt32(1, "Unsigned integer to floating point intructions runtime cycles.") + sitofp = Param.UInt32(1, "Signed integer to floating point intructions runtime cycles.") ptrtoint = Param.UInt32(1, "Pointer to integer intructions runtime cycles.") inttoptr = Param.UInt32(1, "Integer to pointer intructions runtime cycles.") bitcast = Param.UInt32(1, "Bitcast intructions runtime cycles.") @@ -37,8 +42,8 @@ class CycleCounts(SimObject): landingpad = Param.UInt32(1, "Landing pad intructions runtime cycles.") catchpad = Param.UInt32(1, "Catch pad intructions runtime cycles.") alloca = Param.UInt32(1, "Allocate intructions runtime cycles.") - load = Param.UInt32(0, "Must be 0, handled by memory controller"); - store = Param.UInt32(0, "Must be 0, handled by memory controller"); + load = Param.UInt32(0, "Must be 0, handled by memory controller") + store = Param.UInt32(0, "Must be 0, handled by memory controller") fence = Param.UInt32(1, "Fence intructions runtime cycles.") cmpxchg = Param.UInt32(1, "Compare and exchange intructions runtime cycles.") atomicrmw = Param.UInt32(1, "Atomic remove intructions runtime cycles.") @@ -64,8 +69,4 @@ class CycleCounts(SimObject): fsub = Param.UInt32(5, "Floating point subtraction intructions runtime cycles.") fmul = Param.UInt32(4, "Floating point multiplication intructions runtime cycles.") fdiv = Param.UInt32(16, "Floating point division intructions runtime cycles.") - frem = Param.UInt32(5, "Floating point remainder intructions runtime cycles.") - - - - + frem = Param.UInt32(5, "Floating point remainder intructions runtime cycles.") \ No newline at end of file diff --git a/src/hwacc/HWInterface.py b/src/hwacc/HWInterface.py new file mode 100644 index 000000000..054c1d2c5 --- /dev/null +++ b/src/hwacc/HWInterface.py @@ -0,0 +1,13 @@ +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject +from CycleCounts import CycleCounts +from FunctionalUnits import FunctionalUnits +from SALAMStatistics import SALAMStatistics +from RuntimeParams import RuntimeParams + +class HardwareInterface(SimObject): + type = 'HardwareInterface' + cxx_header = "hwacc/hw_interface.hh" + + cycle_counts = Param.CycleCounts(Parent.any, "Load instruction runtime cycle counts") \ No newline at end of file diff --git a/src/hwacc/HWModel/SimObjects/CycleCounts.py b/src/hwacc/HWModel/SimObjects/CycleCounts.py new file mode 100644 index 000000000..da1796ca7 --- /dev/null +++ b/src/hwacc/HWModel/SimObjects/CycleCounts.py @@ -0,0 +1,72 @@ +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject + +#Instruction runtime cycles +class CycleCounts(SimObject): + # SimObject type + type = "CycleCounts" + + # gem5-SALAM attached header + cxx_header = "hwacc/LLVMRead/src/cycle_counts.hh" + + # Instruction cycle count defaults + counter = Param.UInt32(1, "Counter intructions runtime cycles.") + gep = Param.UInt32(1, "GetElementPtr intructions runtime cycles.") + phi = Param.UInt32(1, "Phi intructions runtime cycles.") + select = Param.UInt32(1, "Select intructions runtime cycles.") + ret = Param.UInt32(1, "Return intructions runtime cycles.") + br = Param.UInt32(1, "Branch intructions runtime cycles.") + switch_inst = Param.UInt32(1, "Switch intructions runtime cycles.") + indirectbr = Param.UInt32(1, "Indirect Branch intructions runtime cycles.") + invoke = Param.UInt32(1, "Invoke intructions runtime cycles.") + resume = Param.UInt32(1, "Resume intructions runtime cycles.") + unreachable = Param.UInt32(1, "Unreachable intructions runtime cycles.") + icmp = Param.UInt32(1, "Integer compare intructions runtime cycles.") + fcmp = Param.UInt32(1, "Floating point compare intructions runtime cycles.") + trunc = Param.UInt32(1, "Truncate intructions runtime cycles.") + zext = Param.UInt32(1, "Zero extend intructions runtime cycles.") + sext = Param.UInt32(1, "Sign extend intructions runtime cycles.") + fptrunc = Param.UInt32(1, "Floating point truncate intructions runtime cycles.") + fpext = Param.UInt32(1, "Floating point extend intructions runtime cycles.") + fptoui = Param.UInt32(1, "Floating point to unsigned integer intructions runtime cycles.") + fptosi = Param.UInt32(1, "Floating point to signed integer intructions runtime cycles.") + uitofp = Param.UInt32(1, "Unsigned integer to floating point intructions runtime cycles.") + sitofp = Param.UInt32(1, "Signed integer to floating point intructions runtime cycles.") + ptrtoint = Param.UInt32(1, "Pointer to integer intructions runtime cycles.") + inttoptr = Param.UInt32(1, "Integer to pointer intructions runtime cycles.") + bitcast = Param.UInt32(1, "Bitcast intructions runtime cycles.") + addrspacecast = Param.UInt32(1, "Address space cast intructions runtime cycles.") + call = Param.UInt32(1, "Call intructions runtime cycles.") + vaarg = Param.UInt32(1, "Vaarg intructions runtime cycles.") + landingpad = Param.UInt32(1, "Landing pad intructions runtime cycles.") + catchpad = Param.UInt32(1, "Catch pad intructions runtime cycles.") + alloca = Param.UInt32(1, "Allocate intructions runtime cycles.") + load = Param.UInt32(0, "Must be 0, handled by memory controller") + store = Param.UInt32(0, "Must be 0, handled by memory controller") + fence = Param.UInt32(1, "Fence intructions runtime cycles.") + cmpxchg = Param.UInt32(1, "Compare and exchange intructions runtime cycles.") + atomicrmw = Param.UInt32(1, "Atomic remove intructions runtime cycles.") + extractvalue = Param.UInt32(1, "Extract value intructions runtime cycles.") + insertvalue = Param.UInt32(1, "Insert value intructions runtime cycles.") + extractelement = Param.UInt32(1, "Extract element intructions runtime cycles.") + insertelement = Param.UInt32(1, "Insert element intructions runtime cycles.") + shufflevector = Param.UInt32(1, "Shuffle vector intructions runtime cycles.") + shl = Param.UInt32(1, "Shift left intructions runtime cycles.") + lshr = Param.UInt32(1, "Logical shift right intructions runtime cycles.") + ashr = Param.UInt32(1, "Arithmetic shift right intructions runtime cycles.") + and_inst = Param.UInt32(1, "And intructions runtime cycles.") + or_inst = Param.UInt32(1, "Or intructions runtime cycles.") + xor_inst = Param.UInt32(1, "Xor intructions runtime cycles.") + add = Param.UInt32(1, "Integer add intructions runtime cycles.") + sub = Param.UInt32(1, "Integer subtract intructions runtime cycles.") + mul = Param.UInt32(1, "Integer multiply intructions runtime cycles.") + udiv = Param.UInt32(1, "Unsigned integer division intructions runtime cycles.") + sdiv = Param.UInt32(1, "Signed integer division intructions runtime cycles.") + urem = Param.UInt32(1, "Unsigned remainder intructions runtime cycles.") + srem = Param.UInt32(1, "Signed remainder intructions runtime cycles.") + fadd = Param.UInt32(5, "Floating point addition intructions runtime cycles.") + fsub = Param.UInt32(5, "Floating point subtraction intructions runtime cycles.") + fmul = Param.UInt32(4, "Floating point multiplication intructions runtime cycles.") + fdiv = Param.UInt32(16, "Floating point division intructions runtime cycles.") + frem = Param.UInt32(5, "Floating point remainder intructions runtime cycles.") \ No newline at end of file diff --git a/src/hwacc/HWModel/SimObjects/FunctionalUnits.py b/src/hwacc/HWModel/SimObjects/FunctionalUnits.py new file mode 100644 index 000000000..a2d865233 --- /dev/null +++ b/src/hwacc/HWModel/SimObjects/FunctionalUnits.py @@ -0,0 +1,25 @@ +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject +from LLVMInstruction import LLVMInstruction + +class FunctionalUnits(SimObject): + type = 'FunctionalUnits' + cxx_header = "hwacc/HWModel/src/functional_unit.hh" + + FU_counter = Param.Int32(-1, "Available counter functional units. -1 indicates unlimited resources") + FU_int_adder = Param.Int32(-1, "Available integer addition/subtraction functional units. -1 indicates unlimited resources") + FU_int_multiplier = Param.Int32(-1, "Available integer multiply/divide functional units. -1 indicates unlimited resources") + FU_int_shifter = Param.Int32(-1, "Available integer shifter functional units. -1 indicates unlimited resources") + FU_int_bit = Param.Int32(-1, "Available integer bitwise functional units. -1 indicates unlimited resources") + FU_fp_sp_adder = Param.Int32(-1, "Available floating point single precision addition/subtraction functional units. -1 indicates unlimited resources") + FU_fp_dp_adder = Param.Int32(-1, "Available floating point double precision addition/subtraction functional units. -1 indicates unlimited resources") + FU_fp_sp_multiplier = Param.Int32(-1, "Available floating point single precision multiply functional units. -1 indicates unlimited resources") + FU_fp_sp_divider = Param.Int32(-1, "Available floating point single precision divide functional units. -1 indicates unlimited resources") + FU_fp_dp_multiplier = Param.Int32(-1, "Available floating point double precision multiply functional units. -1 indicates unlimited resources") + FU_fp_dp_divider = Param.Int32(-1, "Available floating point single precision divide functional units. -1 indicates unlimited resources") + FU_compare = Param.Int32(-1, "Available comparison functional units. -1 indicates unlimited resources") + FU_GEP = Param.Int32(-1, "Available equivalent getelementptr functional units. -1 indicates unlimited resources") + FU_conversion = Param.Int32(-1, "Available type conversion functional units. -1 indicates unlimited resources") + FU_pipelined = Param.Int32(1, "Sets functional units to operate as pipelined (1) or not pipelined (0)") + FU_clock_period = Param.Int32(10, "Sets the transitor type used for power calculations") \ No newline at end of file diff --git a/src/hwacc/HWModel/SimObjects/LLVMInstruction.py b/src/hwacc/HWModel/SimObjects/LLVMInstruction.py new file mode 100644 index 000000000..e9ca13efb --- /dev/null +++ b/src/hwacc/HWModel/SimObjects/LLVMInstruction.py @@ -0,0 +1,70 @@ +from m5.params import * +from m5.proxy import * +from m5.SimObject import SimObject + +class LLVMInstruction(SimObject): + type = 'LLVMInstruction' + cxx_header = "hwacc/HWModel/src/functional_unit.hh" + + # Custom Instructions + counter = Param.UInt32(0, "Counter operation.") + + # LLVM Instructions + # Bool Param available? Or UInt1 or UInt8? + gep = Param.UInt32(0, "GetElementPtr operation.") + phi = Param.UInt32(0, "Phi operation.") + select = Param.UInt32(0, "Select operation.") + ret = Param.UInt32(0, "Return operation.") + br = Param.UInt32(0, "Branch operation.") + switch_inst = Param.UInt32(0, "Switch operation.") + indirectbr = Param.UInt32(0, "Indirect Branch operation.") + invoke = Param.UInt32(0, "Invoke operation.") + resume = Param.UInt32(0, "Resume operation.") + unreachable = Param.UInt32(0, "Unreachable operation.") + icmp = Param.UInt32(0, "Integer compare operation.") + fcmp = Param.UInt32(0, "Floating point compare operation.") + trunc = Param.UInt32(0, "Truncate operation.") + zext = Param.UInt32(0, "Zero extend operation.") + sext = Param.UInt32(0, "Sign extend operation.") + fptrunc = Param.UInt32(0, "Floating point truncate operation.") + fpext = Param.UInt32(0, "Floating point extend operation.") + fptoui = Param.UInt32(0, "Floating point to unsigned integer operation.") + fptosi = Param.UInt32(0, "Floating point to signed integer operation.") + uitofp = Param.UInt32(0, "Unsigned integer to floating point operation.") + ptrtoint = Param.UInt32(0, "Pointer to integer operation.") + inttoptr = Param.UInt32(0, "Integer to pointer operation.") + bitcast = Param.UInt32(0, "Bitcast operation.") + addrspacecast = Param.UInt32(0, "Address space cast operation.") + call = Param.UInt32(0, "Call operation.") + vaarg = Param.UInt32(0, "Vaarg operation.") + landingpad = Param.UInt32(0, "Landing pad operation.") + catchpad = Param.UInt32(0, "Catch pad operation.") + alloca = Param.UInt32(0, "Allocate operation.") + load = Param.UInt32(0, "Must be 0, handled by memory controller") + store = Param.UInt32(0, "Must be 0, handled by memory controller") + fence = Param.UInt32(0, "Fence operation.") + cmpxchg = Param.UInt32(0, "Compare and exchange operation.") + atomicrmw = Param.UInt32(0, "Atomic remove operation.") + extractvalue = Param.UInt32(0, "Extract value operation.") + insertvalue = Param.UInt32(0, "Insert value operation.") + extractelement = Param.UInt32(0, "Extract element operation.") + insertelement = Param.UInt32(0, "Insert element operation.") + shufflevector = Param.UInt32(0, "Shuffle vector operation.") + shl = Param.UInt32(0, "Shift left operation.") + lshr = Param.UInt32(0, "Logical shift right operation.") + ashr = Param.UInt32(0, "Arithmetic shift right operation.") + and_inst = Param.UInt32(0, "And operation.") + or_inst = Param.UInt32(0, "Or operation.") + xor_inst = Param.UInt32(0, "Xor operation.") + add = Param.UInt32(0, "Integer add operation.") + sub = Param.UInt32(0, "Integer subtract operation.") + mul = Param.UInt32(0, "Integer multiply operation.") + udiv = Param.UInt32(0, "Unsigned integer division operation.") + sdiv = Param.UInt32(0, "Signed integer division operation.") + urem = Param.UInt32(0, "Unsigned remainder operation.") + srem = Param.UInt32(0, "Signed remainder operation.") + fadd = Param.UInt32(0, "Floating point addition operation.") + fsub = Param.UInt32(0, "Floating point subtraction operation.") + fmul = Param.UInt32(0, "Floating point multiplication operation.") + fdiv = Param.UInt32(0, "Floating point division operation.") + frem = Param.UInt32(0, "Floating point remainder operation.") \ No newline at end of file diff --git a/src/hwacc/HWModel/SimObjects/RuntimeParams.py b/src/hwacc/HWModel/SimObjects/RuntimeParams.py new file mode 100644 index 000000000..03d1ecd99 --- /dev/null +++ b/src/hwacc/HWModel/SimObjects/RuntimeParams.py @@ -0,0 +1,7 @@ + + + + + +class RuntimeParams(): + # Runtime params \ No newline at end of file diff --git a/src/hwacc/HWModel/SimObjects/SALAMStatistics.py b/src/hwacc/HWModel/SimObjects/SALAMStatistics.py new file mode 100644 index 000000000..f3c1e4c49 --- /dev/null +++ b/src/hwacc/HWModel/SimObjects/SALAMStatistics.py @@ -0,0 +1,6 @@ + + + + +class SALAMStatistics(): + # Define what statistics to do \ No newline at end of file diff --git a/src/hwacc/HWModel/src/cacti_wrapper.cc b/src/hwacc/HWModel/src/cacti_wrapper.cc new file mode 100644 index 000000000..b95a61cca --- /dev/null +++ b/src/hwacc/HWModel/src/cacti_wrapper.cc @@ -0,0 +1,123 @@ + +uca_org_t cactiWrapper(unsigned num_of_bytes, unsigned wordsize, unsigned num_ports, int cache_type) { + int cache_size = num_of_bytes; + int line_size = wordsize; // in bytes + if (wordsize < 4) // minimum line size in cacti is 32-bit/4-byte + line_size = 4; + if (cache_size / line_size < 64) + cache_size = line_size * 64; // minimum scratchpad size: 64 words + int associativity = 1; + int rw_ports = num_ports; + if (rw_ports == 0) + rw_ports = 1; + int excl_read_ports = 0; + int excl_write_ports = 0; + int single_ended_read_ports = 0; + int search_ports = 0; + int banks = 1; + double tech_node = 40; // in nm + //# following three parameters are meaningful only for main memories + int page_sz = 0; + int burst_length = 8; + int pre_width = 8; + int output_width = wordsize * 8; + //# to model special structure like branch target buffers, directory, etc. + //# change the tag size parameter + //# if you want cacti to calculate the tagbits, set the tag size to "default" + int specific_tag = false; + int tag_width = 0; + int access_mode = 2; // 0 normal, 1 seq, 2 fast + int cache = cache_type; // scratch ram 0 or cache 1 + int main_mem = 0; + // assign weights for CACTI optimizations + int obj_func_delay = 0; + int obj_func_dynamic_power = 0; + int obj_func_leakage_power = 100; + int obj_func_area = 0; + int obj_func_cycle_time = 0; + // from CACTI example config... + int dev_func_delay = 20; + int dev_func_dynamic_power = 100000; + int dev_func_leakage_power = 100000; + int dev_func_area = 1000000; + int dev_func_cycle_time = 1000000; + + int ed_ed2_none = 2; // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp = 300; + int wt = 0; // 0 - default(search across everything), 1 - global, 2 - 5% + // delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in = + 0; // 0(itrs-hp) 1-itrs-lstp(low standby power) + int data_arr_peri_global_tech_flavor_in = 0; // 0(itrs-hp) + int tag_arr_ram_cell_tech_flavor_in = 0; // itrs-hp + int tag_arr_peri_global_tech_flavor_in = 0; // itrs-hp + int interconnect_projection_type_in = 1; // 0 - aggressive, 1 - normal + int wire_inside_mat_type_in = 1; // 2 - global, 0 - local, 1 - semi-global + int wire_outside_mat_type_in = 1; // 2 - global + int REPEATERS_IN_HTREE_SEGMENTS_in = + 1; // TODO for now only wires with repeaters are supported + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in = 0; + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in = 0; + int force_wiretype = 1; + int wiretype = 30; + int force_config = 0; + int ndwl = 1; + int ndbl = 1; + int nspd = 0; + int ndcm = 1; + int ndsam1 = 0; + int ndsam2 = 0; + int ecc = 0; + return cacti_interface(cache_size, + line_size, + associativity, + rw_ports, + excl_read_ports, + excl_write_ports, + single_ended_read_ports, + search_ports, + banks, + tech_node, // in nm + output_width, + specific_tag, + tag_width, + access_mode, // 0 normal, 1 seq, 2 fast + cache, // scratch ram or cache + main_mem, + obj_func_delay, + obj_func_dynamic_power, + obj_func_leakage_power, + obj_func_cycle_time, + obj_func_area, + dev_func_delay, + dev_func_dynamic_power, + dev_func_leakage_power, + dev_func_area, + dev_func_cycle_time, + ed_ed2_none, + temp, + wt, + data_arr_ram_cell_tech_flavor_in, + data_arr_peri_global_tech_flavor_in, + tag_arr_ram_cell_tech_flavor_in, + tag_arr_peri_global_tech_flavor_in, + interconnect_projection_type_in, + wire_inside_mat_type_in, + wire_outside_mat_type_in, + REPEATERS_IN_HTREE_SEGMENTS_in, + VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + page_sz, + burst_length, + pre_width, + force_wiretype, + wiretype, + force_config, + ndwl, + ndbl, + nspd, + ndcm, + ndsam1, + ndsam2, + ecc); +} \ No newline at end of file diff --git a/src/hwacc/HWModel/src/cacti_wrapper.hh b/src/hwacc/HWModel/src/cacti_wrapper.hh new file mode 100644 index 000000000..9feb18beb --- /dev/null +++ b/src/hwacc/HWModel/src/cacti_wrapper.hh @@ -0,0 +1 @@ +uca_org_t cactiWrapper(unsigned num_of_bytes, unsigned wordsize, unsigned num_ports, int cache_type); \ No newline at end of file diff --git a/src/hwacc/LLVMRead/src/cycle_count.cc b/src/hwacc/HWModel/src/cycle_counts.cc similarity index 97% rename from src/hwacc/LLVMRead/src/cycle_count.cc rename to src/hwacc/HWModel/src/cycle_counts.cc index e92789e17..48780d45e 100644 --- a/src/hwacc/LLVMRead/src/cycle_count.cc +++ b/src/hwacc/HWModel/src/cycle_counts.cc @@ -1,5 +1,5 @@ //------------------------------------------// -#include "cycle_count.hh" +#include "cycle_counts.hh" //------------------------------------------// CycleCounts::CycleCounts(CycleCountsParams *p) : @@ -25,6 +25,7 @@ CycleCounts::CycleCounts(CycleCountsParams *p) : fptoui_inst(p->fptoui), fptosi_inst(p->fptosi), uitofp_inst(p->uitofp), + sitofp_inst(p->sitofp), ptrtoint_inst(p->ptrtoint), inttoptr_inst(p->inttoptr), bitcast_inst(p->bitcast), diff --git a/src/hwacc/LLVMRead/src/cycle_count.hh b/src/hwacc/HWModel/src/cycle_counts.hh similarity index 94% rename from src/hwacc/LLVMRead/src/cycle_count.hh rename to src/hwacc/HWModel/src/cycle_counts.hh index 99c7ffd1c..f72baf7f1 100644 --- a/src/hwacc/LLVMRead/src/cycle_count.hh +++ b/src/hwacc/HWModel/src/cycle_counts.hh @@ -1,5 +1,5 @@ -#ifndef CYCLE_COUNT_HH -#define CYCLE_COUNT_HH +#ifndef CYCLE_COUNTS_HH +#define CYCLE_COUNTS_HH //------------------------------------------// #include "debug_flags.hh" #include "params/CycleCounts.hh" @@ -31,6 +31,7 @@ class CycleCounts : public SimObject { uint32_t fptoui_inst; uint32_t fptosi_inst; uint32_t uitofp_inst; + uint32_t sitofp_inst; uint32_t ptrtoint_inst; uint32_t inttoptr_inst; uint32_t bitcast_inst; @@ -71,4 +72,4 @@ class CycleCounts : public SimObject { CycleCounts(CycleCountsParams *p); }; -#endif //__CYCLE_COUNT_HH__ +#endif //__CYCLE_COUNTS_HH__ diff --git a/src/hwacc/HWModel/src/functional_unit.cc b/src/hwacc/HWModel/src/functional_unit.cc new file mode 100644 index 000000000..8d219a620 --- /dev/null +++ b/src/hwacc/HWModel/src/functional_unit.cc @@ -0,0 +1,180 @@ +//------------------------------------------// +#include "functional_units.hh" +//------------------------------------------// + + +FunctionalUnit::FunctionalUnit(int Latency, uint8_t HardwareUnit) { + hardwareUnit = HardwareUnit; + powerAreaProfile.latency = Latency; + cycle_count = 0; + parse_count = 0; + dynamic_count = 0; + dynamic_max = 0; + static_limit = 0; + multistage = false; + switch(HardwareUnit) { + case COUNTER: { + getCounterPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case INTADDER: { + getAdderPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case INTMULTI: { + getMultiplierPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case INTSHIFTER: { + getShifterPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case INTBITWISE: { + getBitPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPSPADDER: { + getSinglePrecisionFloatingPointAdderPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPDPADDER: { + getDoublePrecisionFloatingPointAdderPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPSPMULTI: { + getSinglePrecisionFloatingPointMultiplierPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPDPMULTI: { + getDoublePrecisionFloatingPointMultiplierPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPSPDIVID: { + getSinglePrecisionFloatingPointMultiplierPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case FPDPDIVID: { + getDoublePrecisionFloatingPointMultiplierPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case COMPARE: { + getBitPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case GETELEMENTPTR: { + + break; + } + case CONVERSION: { + getShifterPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + case REGISTER: { + getRegisterPowerArea( Latency, + &powerAreaProfile.internal_power, + &powerAreaProfile.switch_power, + &powerAreaProfile.dynamic_power, + &powerAreaProfile.dynamic_energy, + &powerAreaProfile.leakage_power, + &powerAreaProfile.area); + break; + } + default: { + // Other + break; + } + } +} + +bool +FunctionalUnit::available() { + if ((dynamic_count < static_limit) || (static_limit < 0)) { + updateDynamic(); + return true; + } + return false; +} + +void +FunctionalUnit::setStatic(int Static) { + if (Static == 0) static_limit = parse_count; + else static_limit = Static; +} \ No newline at end of file diff --git a/src/hwacc/HWModel/src/functional_unit.hh b/src/hwacc/HWModel/src/functional_unit.hh new file mode 100644 index 000000000..15f99ea34 --- /dev/null +++ b/src/hwacc/HWModel/src/functional_unit.hh @@ -0,0 +1,59 @@ +#ifndef FUNCTIONAL_UNITS_HH +#define FUNCTIONAL_UNITS_HH +//------------------------------------------// +#include "debug_flags.hh" +#include "power_model.hh" +//------------------------------------------// +#include +#include +#include +//------------------------------------------// + +struct PowerAreaProfile { + float latency = 0; + float internal_power = 0; + float switch_power = 0; + float dynamic_power = 0; + float dynamic_energy = 0; + float leakage_power = 0; + float area = 0; + int cycleCount = 0; + PowerAreaProfile(): + latency(0.0), + internal_power(0.0), + switch_power(0.0), + dynamic_power(0.0), + dynamic_energy(0.0), + leakage_power(0.0), + area(0.0), + cycleCount(0) {} +}; + +class FunctionalUnit { + private: + PowerAreaProfile powerAreaProfile; + uint8_t hardwareUnit; + int cycle_count; + int parse_count; + int dynamic_count; + int dynamic_max; + int static_limit; + bool multistage; + public: + FunctionalUnit(int Latency, uint8_t HardwareUnit); + int getDynamicMax() { return dynamic_max; } + int getStaticLimit() { return static_limit; } + int getParsedLimit() { return parse_count; } + void updateDynamic() { dynamic_count++; } + void maxDynamic() { if(dynamic_max < dynamic_count) dynamic_max = dynamic_count; } + void setStatic(int Static); + void setRuntime(int Runtime) { dynamic_max = Runtime; } + void updateParse() { parse_count++; } + void setCycleCount(int Count) { cycle_count = Count; } + void reset() { dynamic_count = 0; } + void multistaged() { multistage = true; } + bool available(); + void occupancy(); +}; + +#endif \ No newline at end of file diff --git a/src/hwacc/HWModel/src/hw_model.cc b/src/hwacc/HWModel/src/hw_model.cc new file mode 100644 index 000000000..fbc2b31a9 --- /dev/null +++ b/src/hwacc/HWModel/src/hw_model.cc @@ -0,0 +1 @@ +#include "hw_model.hh" \ No newline at end of file diff --git a/src/hwacc/HWModel/src/hw_model.hh b/src/hwacc/HWModel/src/hw_model.hh new file mode 100644 index 000000000..c27f06298 --- /dev/null +++ b/src/hwacc/HWModel/src/hw_model.hh @@ -0,0 +1,41 @@ +#ifndef __HWMODEL_HW_MODEL_HH__ +#define __HWMODEL_HW_MODEL_HH__ + + + +class HWInterface { + + private: + + + protected: + + public: + +}; + +/* +// data collection + registers + read/write usage + size + + functional units + type (Adder, Multiplyer, etc..) + + + + +// data display + // write to file + // flags/defines for what outputs you want + + + + +*/ + + + + +#endif //__HWMODEL_HW_MODEL_HH__ diff --git a/src/hwacc/HWModel/src/macros.hh b/src/hwacc/HWModel/src/macros.hh new file mode 100644 index 000000000..8b4e6df1e --- /dev/null +++ b/src/hwacc/HWModel/src/macros.hh @@ -0,0 +1 @@ +// #define functional units here \ No newline at end of file diff --git a/src/hwacc/LLVMInterface.py b/src/hwacc/LLVMInterface.py index 6d0bf5a09..60a9baf8d 100644 --- a/src/hwacc/LLVMInterface.py +++ b/src/hwacc/LLVMInterface.py @@ -29,3 +29,11 @@ class LLVMInterface(ComputeUnit): FU_clock_period = Param.Int32(10, "Sets the transitor type used for power calculations") clock_period = Param.Int32(10, "System clock speed") top_name = Param.String("top", "Name of the top-level function for the accelerator") + + + + + #in_file = Param.String("LLVM Trace File") + #lockstep_mode = Param.Bool(True, "TRUE: Stall datapath if any operation stalls. FALSE: Only stall datapath regions with stalls") + #sched_threshold = Param.UInt32(10000, "Scheduling window threshold. Prevents scheduling windows size from exploding during regions of high loop parallelism") + #clock_period = Param.Int32(10, "System clock speed") \ No newline at end of file diff --git a/src/hwacc/LLVMRead/src/Makefile b/src/hwacc/LLVMRead/src/Makefile deleted file mode 100644 index bf11e3b50..000000000 --- a/src/hwacc/LLVMRead/src/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -CPPFLAGS=-Wall -std=c++11 -c - -all: clean basic_block llvm_types instructions registers power_func - -llvm_types: llvm_types.cc llvm_types.hh - g++ ${CPPFLAGS} llvm_types.cc -basic_block: basic_block.cc basic_block.hh - g++ ${CPPFLAGS} basic_block.cc -registers: registers.cc registers.hh - g++ ${CPPFLAGS} registers.cc -instructions: instructions.cc instructions.hh - g++ ${CPPFLAGS} instructions.cc -instructions: power_func.cc power_func.hh - g++ ${CPPFLAGS} power_func.cc - -clean: - rm -f *.o diff --git a/src/hwacc/LLVMRead/src/cycle_counts.cc b/src/hwacc/LLVMRead/src/cycle_counts.cc new file mode 100644 index 000000000..48780d45e --- /dev/null +++ b/src/hwacc/LLVMRead/src/cycle_counts.cc @@ -0,0 +1,72 @@ +//------------------------------------------// +#include "cycle_counts.hh" +//------------------------------------------// + +CycleCounts::CycleCounts(CycleCountsParams *p) : + SimObject(p), + counter_inst(p->counter), + gep_inst(p->gep), + phi_inst(p->phi), + select_inst(p->select), + ret_inst(p->ret), + br_inst(p->br), + switch_inst(p->switch_inst), + indirectbr_inst(p->indirectbr), + invoke_inst(p->invoke), + resume_inst(p->resume), + unreachable_inst(p->unreachable), + icmp_inst(p->icmp), + fcmp_inst(p->fcmp), + trunc_inst(p->trunc), + zext_inst(p->zext), + sext_inst(p->sext), + fptrunc_inst(p->fptrunc), + fpext_inst(p->fpext), + fptoui_inst(p->fptoui), + fptosi_inst(p->fptosi), + uitofp_inst(p->uitofp), + sitofp_inst(p->sitofp), + ptrtoint_inst(p->ptrtoint), + inttoptr_inst(p->inttoptr), + bitcast_inst(p->bitcast), + addrspacecast_inst(p->addrspacecast), + call_inst(p->call), + vaarg_inst(p->vaarg), + landingpad_inst(p->landingpad), + catchpad_inst(p->catchpad), + alloca_inst(p->alloca), + load_inst(p->load), + store_inst(p->store), + fence_inst(p->fence), + cmpxchg_inst(p->cmpxchg), + atomicrmw_inst(p->atomicrmw), + extractvalue_inst(p->extractvalue), + insertvalue_inst(p->insertvalue), + extractelement_inst(p->extractelement), + insertelement_inst(p->insertelement), + shufflevector_inst(p->shufflevector), + shl_inst(p->shl), + lshr_inst(p->lshr), + ashr_inst(p->ashr), + and_inst(p->and_inst), + or_inst(p->or_inst), + xor_inst(p->xor_inst), + add_inst(p->add), + sub_inst(p->sub), + mul_inst(p->mul), + udiv_inst(p->udiv), + sdiv_inst(p->sdiv), + urem_inst(p->urem), + srem_inst(p->srem), + fadd_inst(p->fadd), + fsub_inst(p->fsub), + fmul_inst(p->fmul), + fdiv_inst(p->fdiv), + frem_inst(p->frem) { + } + + +CycleCounts* +CycleCountsParams::create() { + return new CycleCounts(this); +} diff --git a/src/hwacc/LLVMRead/src/cycle_counts.hh b/src/hwacc/LLVMRead/src/cycle_counts.hh new file mode 100644 index 000000000..f72baf7f1 --- /dev/null +++ b/src/hwacc/LLVMRead/src/cycle_counts.hh @@ -0,0 +1,75 @@ +#ifndef CYCLE_COUNTS_HH +#define CYCLE_COUNTS_HH +//------------------------------------------// +#include "debug_flags.hh" +#include "params/CycleCounts.hh" +#include "sim/sim_object.hh" +//------------------------------------------// +#include +//------------------------------------------// + +class CycleCounts : public SimObject { + public: + uint32_t counter_inst; + uint32_t gep_inst; + uint32_t phi_inst; + uint32_t select_inst; + uint32_t ret_inst; + uint32_t br_inst; + uint32_t switch_inst; + uint32_t indirectbr_inst; + uint32_t invoke_inst; + uint32_t resume_inst; + uint32_t unreachable_inst; + uint32_t icmp_inst; + uint32_t fcmp_inst; + uint32_t trunc_inst; + uint32_t zext_inst; + uint32_t sext_inst; + uint32_t fptrunc_inst; + uint32_t fpext_inst; + uint32_t fptoui_inst; + uint32_t fptosi_inst; + uint32_t uitofp_inst; + uint32_t sitofp_inst; + uint32_t ptrtoint_inst; + uint32_t inttoptr_inst; + uint32_t bitcast_inst; + uint32_t addrspacecast_inst; + uint32_t call_inst; + uint32_t vaarg_inst; + uint32_t landingpad_inst; + uint32_t catchpad_inst; + uint32_t alloca_inst; + uint32_t load_inst; + uint32_t store_inst; + uint32_t fence_inst; + uint32_t cmpxchg_inst; + uint32_t atomicrmw_inst; + uint32_t extractvalue_inst; + uint32_t insertvalue_inst; + uint32_t extractelement_inst; + uint32_t insertelement_inst; + uint32_t shufflevector_inst; + uint32_t shl_inst; + uint32_t lshr_inst; + uint32_t ashr_inst; + uint32_t and_inst; + uint32_t or_inst; + uint32_t xor_inst; + uint32_t add_inst; + uint32_t sub_inst; + uint32_t mul_inst; + uint32_t udiv_inst; + uint32_t sdiv_inst; + uint32_t urem_inst; + uint32_t srem_inst; + uint32_t fadd_inst; + uint32_t fsub_inst; + uint32_t fmul_inst; + uint32_t fdiv_inst; + uint32_t frem_inst; + CycleCounts(CycleCountsParams *p); +}; + +#endif //__CYCLE_COUNTS_HH__ diff --git a/src/hwacc/LLVMRead/src/debug_flags.hh b/src/hwacc/LLVMRead/src/debug_flags.hh index c83ed0342..3175a5a98 100644 --- a/src/hwacc/LLVMRead/src/debug_flags.hh +++ b/src/hwacc/LLVMRead/src/debug_flags.hh @@ -1,8 +1,29 @@ #ifndef LLVMREAD_DEBUG_HH #define LLVMREAD_DEBUG_HH -//------------------------------------------// -#include "macros.hh" // Included here so all files have access -//------------------------------------------// +// License + +//____________________________________________________________________________ +// Doxygen setup +/** + * @defgroup flags Flags group + */ + +//____________________________________________________________________________ +// File description +/** + * @file + * Contains include information for all M5 debug flags and base + * implementation of the Debugger class used within gem5-SALAM. + */ + +//____________________________________________________________________________ +// Global includes +// - debug_flags.hh is a common header for all files, so all files included +// - here will available throughout the entire application +#include "macros.hh" + +//____________________________________________________________________________ +// Debug includes #include "debug/AddrRanges.hh" #include "debug/CommInterface.hh" #include "debug/CommInterfaceQueues.hh" @@ -17,11 +38,21 @@ #include "debug/StreamDma.hh" #include "debug/Trace.hh" #include "debug/Step.hh" + +//____________________________________________________________________________ +// M5 includes #include "base/trace.hh" +/** + * @namespace SALAM + */ namespace SALAM { +/** + * @class Debugger + * Base implementation of the debugger used in gem5-SALAM + */ class Debugger { private: @@ -35,5 +66,6 @@ class Debugger }; } + //------------------------------------------// #endif //__LLVMREAD_DEBUG_HH__ diff --git a/src/hwacc/LLVMRead/src/instruction.hh b/src/hwacc/LLVMRead/src/instruction.hh index aa31361fa..9ec163ddc 100644 --- a/src/hwacc/LLVMRead/src/instruction.hh +++ b/src/hwacc/LLVMRead/src/instruction.hh @@ -8,7 +8,7 @@ #include #include "basic_block.hh" #include "operand.hh" -#include "cycle_count.hh" +#include "cycle_counts.hh" #include "debug_flags.hh" #include "value.hh" #include "mem_request.hh" diff --git a/src/hwacc/LLVMRead/src/macros.hh b/src/hwacc/LLVMRead/src/macros.hh index 9e4f0ffdf..497fac4b9 100644 --- a/src/hwacc/LLVMRead/src/macros.hh +++ b/src/hwacc/LLVMRead/src/macros.hh @@ -1,211 +1,21 @@ #ifndef MACROS_HH #define MACROS_HH -//------------------------------------------// -#include -#include +//____________________________________________________________________________ -// Function Macros -#define MIN(a,b) (((a)<(b))?(a):(b)) - -// Data type sizing based off LLVM references -#define SYSTEMSIZE 64 // Bit size of system -#define BYTE 8 // Size of a byte in bits -#define VOIDSIZE 0 // Void data type size -#define BYTESIZE(X) (((X-1)/8)+1) // Convert bits to bytes -#define DEFAULTSIZE (SYSTEMSIZE/BYTE) // Default register size -#define POINTERSIZE (SYSTEMSIZE/BYTE) // Pointer data type size -#define LABELSIZE (SYSTEMSIZE/BYTE) // Label data type size -#define DOUBLESIZE (SYSTEMSIZE/BYTE) // Double data type size -#define FLOATSIZE (SYSTEMSIZE/(2*BYTE)) // Float data type size - -// Indexing -#define SKIPFIRST 1 // Start string on second character -#define COMPAREFOUND 0 // Index 0 returned if entire string is found - -// Hardware Units -#define COUNTER 0 -#define INTADDER 1 -#define INTMULTI 2 -#define INTSHIFTER 3 -#define INTBITWISE 4 -#define FPSPADDER 5 -#define FPDPADDER 6 -#define FPSPMULTI 7 -#define FPSPDIVID 8 -#define FPDPMULTI 9 -#define FPDPDIVID 10 -#define COMPARE 11 -#define GETELEMENTPTR 12 -#define CONVERSION 13 -#define OTHER 14 -#define REGISTER 15 -// Hardware Unit Staging -#define COUNTER_STAGES 1 -#define INTADDER_STAGES 1 -#define INTMULTI_STAGES 1 -#define INTSHIFTER_STAGES 1 -#define INTBITWISE_STAGES 1 -#define FPSPADDER_STAGES 3 -#define FPDPADDER_STAGES 3 -#define FPSPMULTI_STAGES 3 -#define FPSPDIVID_STAGES 3 -#define FPDPMULTI_STAGES 3 -#define FPDPDIVID_STAGES 3 -#define COMPARE_STAGES 1 -#define GETELEMENTPTR_STAGES 1 -#define CONVERSION_STAGES 1 -#define REGISTER_STAGES 1 -#define OTHER_STAGES 1 -//Flags -#define ZEROEXT 0x00000000001 -#define SIGNEXT 0x00000000002 -#define INREG 0x00000000004 -#define BYVAL 0x00000000008 -#define SRET 0x00000000010 -#define NOALIAS 0x00000000020 -#define NOCAPTURE 0x00000000040 -#define NEST 0x00000000080 -#define RETURNED 0x00000000100 -#define ALIGNSTACK 0x00000000200 -#define ALWAYSINLINE 0x00000000400 -#define BUILTIN 0x00000000800 -#define COLD 0x00000001000 -#define INLINEINT 0x00000002000 -#define MINSIZE 0x00000004000 -#define NAKED 0x00000008000 -#define NOBUILTIN 0x00000010000 -#define NODUPLICATE 0x00000020000 -#define NOIMPLICITFLOAT 0x00000040000 -#define NOINLINE 0x00000080000 -#define NONLAZYBIND 0x00000100000 -#define NOREDZONE 0x00000200000 -#define NORETURN 0x00000400000 -#define NOUNWIND 0x00000800000 -#define OPTNONE 0x00001000000 -#define OPTSIZE 0x00002000000 -#define READNONE 0x00004000000 -#define READONLY 0x00008000000 -#define RETURNS_TWICE 0x00010000000 -#define SANITIZE_MEMORY 0x00020000000 -#define SANITIZE_ADDRESS 0x00040000000 -#define SANITIZE_THREAD 0x00080000000 -#define SSP 0x00100000000 -#define SSPREQ 0x00200000000 -#define SSPSTRONG 0x00400000000 -#define UWTABLE 0x00800000000 -#define CCC 0x01000000000 -#define FASTCC 0x02000000000 -#define COLDCC 0x04000000000 -#define CC10 0x08000000000 -#define CC11 0x10000000000 -#define VOLATILE 0x20000000000 -#define INBOUNDS 0x40000000000 - -#define NNAN 0x00000000001 -#define NINF 0x00000000002 -#define NSZ 0x00000000004 -#define ARCP 0x00000000008 -#define CONTRACT 0x00000000010 -#define AFN 0x00000000020 -#define REASSOC 0x00000000040 -#define FAST 0x00000000080 -#define NSW 0x00000000100 -#define NUW 0x00000000200 -#define EXACT 0x00000000400 -#define EQ 0x00000000800 -#define NE 0x00000001000 -#define UGT 0x00000002000 -#define UGE 0x00000004000 -#define ULT 0x00000008000 -#define ULE 0x00000010000 -#define SGT 0x00000020000 -#define SGE 0x00000040000 -#define SLT 0x00000080000 -#define SLE 0x00000100000 -#define CONDFALSE 0x00000200000 -#define CONDTRUE 0x00000400000 -#define OEQ 0x00000800000 -#define OGT 0x00001000000 -#define OGE 0x00002000000 -#define OLT 0x00004000000 -#define OLE 0x00008000000 -#define ONE 0x00010000000 -#define ORD 0x00020000000 -#define UEQ 0x00040000000 -#define UNE 0x00080000000 -#define UNO 0x00100000000 +/** + * Finds the minimal value of arguments + */ +#define MIN(a,b) (((a)<(b))?(a):(b)) -// LLVM Instructions Definitions (Re-Define From llvm/IR/Instructions.def) -#define LLVM_IR_Move 0 -#define LLVM_IR_Ret 1 -#define LLVM_IR_Br 2 -#define LLVM_IR_Switch 3 -#define LLVM_IR_IndirectBr 4 -#define LLVM_IR_Invoke 5 -#define LLVM_IR_Resume 6 -#define LLVM_IR_Unreachable 7 -#define LLVM_IR_Add 13 -#define LLVM_IR_FAdd 14 -#define LLVM_IR_Sub 15 -#define LLVM_IR_FSub 16 -#define LLVM_IR_Mul 17 -#define LLVM_IR_FMul 18 -#define LLVM_IR_UDiv 19 -#define LLVM_IR_SDiv 20 -#define LLVM_IR_FDiv 21 -#define LLVM_IR_URem 22 -#define LLVM_IR_SRem 23 -#define LLVM_IR_FRem 24 -#define LLVM_IR_Shl 25 -#define LLVM_IR_LShr 26 -#define LLVM_IR_AShr 27 -#define LLVM_IR_And 28 -#define LLVM_IR_Or 29 -#define LLVM_IR_Xor 30 -#define LLVM_IR_Alloca 31 -#define LLVM_IR_Load 32 -#define LLVM_IR_Store 33 -#define LLVM_IR_GetElementPtr 34 -#define LLVM_IR_Fence 35 -#define LLVM_IR_AtomicCmpXchg 36 -#define LLVM_IR_AtomicRMW 37 -#define LLVM_IR_Trunc 38 -#define LLVM_IR_ZExt 39 -#define LLVM_IR_SExt 40 -#define LLVM_IR_FPToUI 41 -#define LLVM_IR_FPToSI 42 -#define LLVM_IR_UIToFP 43 -#define LLVM_IR_SIToFP 44 -#define LLVM_IR_FPTrunc 45 -#define LLVM_IR_FPExt 46 -#define LLVM_IR_PtrToInt 47 -#define LLVM_IR_IntToPtr 48 -#define LLVM_IR_BitCast 49 -#define LLVM_IR_AddrSpaceCast 50 -#define LLVM_IR_ICmp 53 -#define LLVM_IR_FCmp 54 -#define LLVM_IR_PHI 55 -#define LLVM_IR_Call 56 -#define LLVM_IR_Select 57 -#define LLVM_IR_VAArg 60 -#define LLVM_IR_ExtractElement 61 -#define LLVM_IR_InsertElement 62 -#define LLVM_IR_ShuffleVector 63 -#define LLVM_IR_ExtractValue 64 -#define LLVM_IR_InsertValue 65 -#define LLVM_IR_LandingPad 66 -#define LLVM_IR_DMAFence 97 -#define LLVM_IR_DMAStore 98 -#define LLVM_IR_DMALoad 99 -#define LLVM_IR_IndexAdd 100 -#define LLVM_IR_SilentStore 101 -#define LLVM_IR_Sine 102 -#define LLVM_IR_Cosine 103 +/** + * @namespace SALAM + */ namespace SALAM { + // LLVM comparison codes enum Predicate : unsigned { FCMP_FALSE = 0, FCMP_OEQ = 1, FCMP_OGT = 2, FCMP_OGE = 3, FCMP_OLT = 4, FCMP_OLE = 5, FCMP_ONE = 6, FCMP_ORD = 7, @@ -219,4 +29,16 @@ namespace SALAM { } +/* +Useful snippets + +// Line break +//____________________________________________________________________________ + + + + + +*/ + #endif //__MACROS_HH__ diff --git a/src/hwacc/SConscript b/src/hwacc/SConscript index b2687a883..14e2b31ff 100644 --- a/src/hwacc/SConscript +++ b/src/hwacc/SConscript @@ -31,7 +31,7 @@ if env['TARGET_ISA'] == 'arm': #Source('LLVMRead/src/instructions.cc') Source('LLVMRead/src/registers.cc') #Source('LLVMRead/src/base_instruction.cc') - Source('LLVMRead/src/cycle_count.cc') + Source('LLVMRead/src/cycle_counts.cc') Source('LLVMRead/src/operand.cc') Source('acc_cluster.cc') Source('stream_buffer.cc') diff --git a/src/hwacc/comm_interface.cc b/src/hwacc/comm_interface.cc index d59e219de..3d2658cce 100644 --- a/src/hwacc/comm_interface.cc +++ b/src/hwacc/comm_interface.cc @@ -30,7 +30,6 @@ CommInterface::CommInterface(Params *p) : cacheLineSize(p->cache_line_size), clock_period(p->clock_period), endian(p->system->getGuestByteOrder()), - // debugEnabled(true), // TODO: Revert debugEnabled(p->enable_debug_msgs), reset_spm(p->reset_spm) { processDelay = 1000 * clock_period; diff --git a/src/hwacc/compute_unit.cc b/src/hwacc/compute_unit.cc index 28c0f3736..620209862 100644 --- a/src/hwacc/compute_unit.cc +++ b/src/hwacc/compute_unit.cc @@ -5,7 +5,7 @@ ComputeUnit::ComputeUnit(ComputeUnitParams *p) : SimObject(p), comm(p->comm_int), - cycles(p->cycles), + cycle_counts(p->cycles), tickEvent(this) {} ComputeUnit* diff --git a/src/hwacc/compute_unit.hh b/src/hwacc/compute_unit.hh index cd1166793..d55d19a4e 100644 --- a/src/hwacc/compute_unit.hh +++ b/src/hwacc/compute_unit.hh @@ -6,7 +6,7 @@ #include "hwacc/comm_interface.hh" #include "hwacc/LLVMRead/src/mem_request.hh" #include "hwacc/LLVMRead/src/debug_flags.hh" -#include "hwacc/LLVMRead/src/cycle_count.hh" +#include "hwacc/LLVMRead/src/cycle_counts.hh" //------------------------------------------// class ComputeUnit : public SimObject { @@ -14,7 +14,7 @@ class ComputeUnit : public SimObject { protected: CommInterface *comm; - CycleCounts *cycles; + CycleCounts *cycle_counts; class TickEvent : public Event { diff --git a/src/hwacc/hw_interface.cc b/src/hwacc/hw_interface.cc new file mode 100644 index 000000000..e69de29bb diff --git a/src/hwacc/hw_interface.hh b/src/hwacc/hw_interface.hh new file mode 100644 index 000000000..e69de29bb diff --git a/src/hwacc/llvm_interface.cc b/src/hwacc/llvm_interface.cc index 6a3baa978..de54d829c 100644 --- a/src/hwacc/llvm_interface.cc +++ b/src/hwacc/llvm_interface.cc @@ -882,47 +882,50 @@ LLVMInterface::createInstruction(llvm::Instruction * inst, uint64_t id) { uint64_t OpCode = inst->Instruction::getOpcode(); if (DTRACE(Trace)) DPRINTF(LLVMInterface, "Switch OpCode [%d]\n", OpCode); switch(OpCode) { - case llvm::Instruction::Ret : return SALAM::createRetInst(id, OpCode, cycles->ret_inst); break; - case llvm::Instruction::Br: return SALAM::createBrInst(id, OpCode, cycles->br_inst); break; - case llvm::Instruction::Switch: return SALAM::createSwitchInst(id, OpCode, cycles->switch_inst); break; - case llvm::Instruction::Add: return SALAM::createAddInst(id, OpCode, cycles->add_inst); break; - case llvm::Instruction::FAdd: return SALAM::createFAddInst(id, OpCode, cycles->fadd_inst); break; - case llvm::Instruction::Sub: return SALAM::createSubInst(id, OpCode, cycles->sub_inst); break; - case llvm::Instruction::FSub: return SALAM::createFSubInst(id, OpCode, cycles->fsub_inst); break; - case llvm::Instruction::Mul: return SALAM::createMulInst(id, OpCode, cycles->mul_inst); break; - case llvm::Instruction::FMul: return SALAM::createFMulInst(id, OpCode, cycles->fmul_inst); break; - case llvm::Instruction::UDiv: return SALAM::createUDivInst(id, OpCode, cycles->udiv_inst); break; - case llvm::Instruction::SDiv: return SALAM::createSDivInst(id, OpCode, cycles->sdiv_inst); break; - case llvm::Instruction::FDiv: return SALAM::createFDivInst(id, OpCode, cycles->fdiv_inst); break; - case llvm::Instruction::URem: return SALAM::createURemInst(id, OpCode, cycles->urem_inst); break; - case llvm::Instruction::SRem: return SALAM::createSRemInst(id, OpCode, cycles->srem_inst); break; - case llvm::Instruction::FRem: return SALAM::createFRemInst(id, OpCode, cycles->frem_inst); break; - case llvm::Instruction::Shl: return SALAM::createShlInst(id, OpCode, cycles->shl_inst); break; - case llvm::Instruction::LShr: return SALAM::createLShrInst(id, OpCode, cycles->lshr_inst); break; - case llvm::Instruction::AShr: return SALAM::createAShrInst(id, OpCode, cycles->ashr_inst); break; - case llvm::Instruction::And: return SALAM::createAndInst(id, OpCode, cycles->and_inst); break; - case llvm::Instruction::Or: return SALAM::createOrInst(id, OpCode, cycles->or_inst); break; - case llvm::Instruction::Xor: return SALAM::createXorInst(id, OpCode, cycles->xor_inst); break; - case llvm::Instruction::Load: return SALAM::createLoadInst(id, OpCode, cycles->load_inst); break; - case llvm::Instruction::Store: return SALAM::createStoreInst(id, OpCode, cycles->store_inst); break; - case llvm::Instruction::GetElementPtr : return SALAM::createGetElementPtrInst(id, OpCode, cycles->gep_inst); break; - case llvm::Instruction::Trunc: return SALAM::createTruncInst(id, OpCode, cycles->trunc_inst); break; - case llvm::Instruction::ZExt: return SALAM::createZExtInst(id, OpCode, cycles->zext_inst); break; - case llvm::Instruction::SExt: return SALAM::createSExtInst(id, OpCode, cycles->sext_inst); break; - case llvm::Instruction::FPToUI: return SALAM::createFPToUIInst(id, OpCode, cycles->fptoui_inst); break; - case llvm::Instruction::FPToSI: return SALAM::createFPToSIInst(id, OpCode, cycles->fptosi_inst); break; - case llvm::Instruction::UIToFP: return SALAM::createUIToFPInst(id, OpCode, cycles->uitofp_inst); break; - case llvm::Instruction::SIToFP: return SALAM::createSIToFPInst(id, OpCode, cycles->uitofp_inst); break; // FIX - case llvm::Instruction::FPTrunc: return SALAM::createFPTruncInst(id, OpCode, cycles->fptrunc_inst); break; - case llvm::Instruction::FPExt: return SALAM::createFPExtInst(id, OpCode, cycles->fpext_inst); break; - case llvm::Instruction::PtrToInt: return SALAM::createPtrToIntInst(id, OpCode, cycles->ptrtoint_inst); break; - case llvm::Instruction::IntToPtr: return SALAM::createIntToPtrInst(id, OpCode, cycles->inttoptr_inst); break; - case llvm::Instruction::ICmp: return SALAM::createICmpInst(id, OpCode, cycles->icmp_inst); break; - case llvm::Instruction::FCmp: return SALAM::createFCmpInst(id, OpCode, cycles->fcmp_inst); break; - case llvm::Instruction::PHI: return SALAM::createPHIInst(id, OpCode, cycles->phi_inst); break; - case llvm::Instruction::Call: return SALAM::createCallInst(id, OpCode, cycles->call_inst); break; - case llvm::Instruction::Select: return SALAM::createSelectInst(id, OpCode, cycles->select_inst); break; - default: // return SALAM::createBadInst(id); break; + case llvm::Instruction::Ret : return SALAM::createRetInst(id, OpCode, cycle_counts->ret_inst); break; + case llvm::Instruction::Br: return SALAM::createBrInst(id, OpCode, cycle_counts->br_inst); break; + case llvm::Instruction::Switch: return SALAM::createSwitchInst(id, OpCode, cycle_counts->switch_inst); break; + case llvm::Instruction::Add: return SALAM::createAddInst(id, OpCode, cycle_counts->add_inst); break; + case llvm::Instruction::FAdd: return SALAM::createFAddInst(id, OpCode, cycle_counts->fadd_inst); break; + case llvm::Instruction::Sub: return SALAM::createSubInst(id, OpCode, cycle_counts->sub_inst); break; + case llvm::Instruction::FSub: return SALAM::createFSubInst(id, OpCode, cycle_counts->fsub_inst); break; + case llvm::Instruction::Mul: return SALAM::createMulInst(id, OpCode, cycle_counts->mul_inst); break; + case llvm::Instruction::FMul: return SALAM::createFMulInst(id, OpCode, cycle_counts->fmul_inst); break; + case llvm::Instruction::UDiv: return SALAM::createUDivInst(id, OpCode, cycle_counts->udiv_inst); break; + case llvm::Instruction::SDiv: return SALAM::createSDivInst(id, OpCode, cycle_counts->sdiv_inst); break; + case llvm::Instruction::FDiv: return SALAM::createFDivInst(id, OpCode, cycle_counts->fdiv_inst); break; + case llvm::Instruction::URem: return SALAM::createURemInst(id, OpCode, cycle_counts->urem_inst); break; + case llvm::Instruction::SRem: return SALAM::createSRemInst(id, OpCode, cycle_counts->srem_inst); break; + case llvm::Instruction::FRem: return SALAM::createFRemInst(id, OpCode, cycle_counts->frem_inst); break; + case llvm::Instruction::Shl: return SALAM::createShlInst(id, OpCode, cycle_counts->shl_inst); break; + case llvm::Instruction::LShr: return SALAM::createLShrInst(id, OpCode, cycle_counts->lshr_inst); break; + case llvm::Instruction::AShr: return SALAM::createAShrInst(id, OpCode, cycle_counts->ashr_inst); break; + case llvm::Instruction::And: return SALAM::createAndInst(id, OpCode, cycle_counts->and_inst); break; + case llvm::Instruction::Or: return SALAM::createOrInst(id, OpCode, cycle_counts->or_inst); break; + case llvm::Instruction::Xor: return SALAM::createXorInst(id, OpCode, cycle_counts->xor_inst); break; + case llvm::Instruction::Load: return SALAM::createLoadInst(id, OpCode, cycle_counts->load_inst); break; + case llvm::Instruction::Store: return SALAM::createStoreInst(id, OpCode, cycle_counts->store_inst); break; + case llvm::Instruction::GetElementPtr : return SALAM::createGetElementPtrInst(id, OpCode, cycle_counts->gep_inst); break; + case llvm::Instruction::Trunc: return SALAM::createTruncInst(id, OpCode, cycle_counts->trunc_inst); break; + case llvm::Instruction::ZExt: return SALAM::createZExtInst(id, OpCode, cycle_counts->zext_inst); break; + case llvm::Instruction::SExt: return SALAM::createSExtInst(id, OpCode, cycle_counts->sext_inst); break; + case llvm::Instruction::FPToUI: return SALAM::createFPToUIInst(id, OpCode, cycle_counts->fptoui_inst); break; + case llvm::Instruction::FPToSI: return SALAM::createFPToSIInst(id, OpCode, cycle_counts->fptosi_inst); break; + case llvm::Instruction::UIToFP: return SALAM::createUIToFPInst(id, OpCode, cycle_counts->uitofp_inst); break; + case llvm::Instruction::SIToFP: return SALAM::createSIToFPInst(id, OpCode, cycle_counts->sitofp_inst); break; + case llvm::Instruction::FPTrunc: return SALAM::createFPTruncInst(id, OpCode, cycle_counts->fptrunc_inst); break; + case llvm::Instruction::FPExt: return SALAM::createFPExtInst(id, OpCode, cycle_counts->fpext_inst); break; + case llvm::Instruction::PtrToInt: return SALAM::createPtrToIntInst(id, OpCode, cycle_counts->ptrtoint_inst); break; + case llvm::Instruction::IntToPtr: return SALAM::createIntToPtrInst(id, OpCode, cycle_counts->inttoptr_inst); break; + case llvm::Instruction::ICmp: return SALAM::createICmpInst(id, OpCode, cycle_counts->icmp_inst); break; + case llvm::Instruction::FCmp: return SALAM::createFCmpInst(id, OpCode, cycle_counts->fcmp_inst); break; + case llvm::Instruction::PHI: return SALAM::createPHIInst(id, OpCode, cycle_counts->phi_inst); break; + case llvm::Instruction::Call: return SALAM::createCallInst(id, OpCode, cycle_counts->call_inst); break; + case llvm::Instruction::Select: return SALAM::createSelectInst(id, OpCode, cycle_counts->select_inst); break; + default: { + warn("Tried to create instance of undefined instruction type!"); return std::make_shared(id); + break; + } } } \ No newline at end of file diff --git a/src/hwacc/llvm_interface.hh b/src/hwacc/llvm_interface.hh index 7c6e6d37c..0fb11979c 100644 --- a/src/hwacc/llvm_interface.hh +++ b/src/hwacc/llvm_interface.hh @@ -8,7 +8,7 @@ #include "hwacc/LLVMRead/src/function.hh" #include "hwacc/LLVMRead/src/llvm_types.hh" #include "hwacc/LLVMRead/src/debug_flags.hh" -#include "hwacc/LLVMRead/src/cycle_count.hh" +#include "hwacc/LLVMRead/src/cycle_counts.hh" //------------------------------------------// #include #include