Skip to content

Commit

Permalink
occamy: Extend wide interconnect with multicast
Browse files Browse the repository at this point in the history
  • Loading branch information
colluca committed Oct 29, 2023
1 parent 7a81ac9 commit 3d964a7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 111 deletions.
82 changes: 4 additions & 78 deletions docs/rm/2_addrmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,10 @@ This is the current address map of occamy. Note that the Quadrants address map h
| HBM\_CFG\_CTRL | 64.0 KB | used | 0x0a80\_0000 | 0x0a80\_ffff |
| - | 7.9 MB | free | 0x0a81\_0000 | 0x0aff\_ffff |
| QUAD\_0\_CFG | 64.0 KB | used | 0x0b00\_0000 | 0x0b00\_ffff |
| QUAD\_1\_CFG | 64.0 KB | used | 0x0b01\_0000 | 0x0b01\_ffff |
| QUAD\_2\_CFG | 64.0 KB | used | 0x0b02\_0000 | 0x0b02\_ffff |
| QUAD\_3\_CFG | 64.0 KB | used | 0x0b03\_0000 | 0x0b03\_ffff |
| QUAD\_4\_CFG | 64.0 KB | used | 0x0b04\_0000 | 0x0b04\_ffff |
| QUAD\_5\_CFG | 64.0 KB | used | 0x0b05\_0000 | 0x0b05\_ffff |
| - | 15.6 MB | free | 0x0b06\_0000 | 0x0bff\_ffff |
| - | 15.9 MB | free | 0x0b01\_0000 | 0x0bff\_ffff |
| PLIC | 64.0 MB | used | 0x0c00\_0000 | 0x0fff\_ffff |
| QUADRANTS | 6.0 MB | used | 0x1000\_0000 | 0x105f\_ffff |
| - | 10.0 MB | free | 0x1060\_0000 | 0x10ff\_ffff |
| QUADRANTS | 256.0 KB | used | 0x1000\_0000 | 0x1003\_ffff |
| - | 15.7 MB | free | 0x1004\_0000 | 0x10ff\_ffff |
| SYS\_IDMA\_CFG | 64.0 KB | used | 0x1100\_0000 | 0x1100\_ffff |
| - | 239.9 MB | free | 0x1101\_0000 | 0x1fff\_ffff |
| PCIE | 640.0 MB | used | 0x2000\_0000 | 0x47ff\_ffff |
Expand Down Expand Up @@ -81,74 +76,5 @@ This is the current address map of occamy. Note that the Quadrants address map h
| 0 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1000\_0000 | 0x1001\_ffff |
| 0 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1002\_0000 | 0x1002\_ffff |
| 0 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1003\_0000 | 0x1003\_ffff |
| 0 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1004\_0000 | 0x1005\_ffff |
| 0 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1006\_0000 | 0x1006\_ffff |
| 0 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1007\_0000 | 0x1007\_ffff |
| 0 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1008\_0000 | 0x1009\_ffff |
| 0 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x100a\_0000 | 0x100a\_ffff |
| 0 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x100b\_0000 | 0x100b\_ffff |
| 0 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x100c\_0000 | 0x100d\_ffff |
| 0 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x100e\_0000 | 0x100e\_ffff |
| 0 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x100f\_0000 | 0x100f\_ffff |
| 1 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1010\_0000 | 0x1011\_ffff |
| 1 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1012\_0000 | 0x1012\_ffff |
| 1 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1013\_0000 | 0x1013\_ffff |
| 1 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1014\_0000 | 0x1015\_ffff |
| 1 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1016\_0000 | 0x1016\_ffff |
| 1 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1017\_0000 | 0x1017\_ffff |
| 1 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1018\_0000 | 0x1019\_ffff |
| 1 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x101a\_0000 | 0x101a\_ffff |
| 1 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x101b\_0000 | 0x101b\_ffff |
| 1 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x101c\_0000 | 0x101d\_ffff |
| 1 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x101e\_0000 | 0x101e\_ffff |
| 1 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x101f\_0000 | 0x101f\_ffff |
| 2 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1020\_0000 | 0x1021\_ffff |
| 2 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1022\_0000 | 0x1022\_ffff |
| 2 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1023\_0000 | 0x1023\_ffff |
| 2 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1024\_0000 | 0x1025\_ffff |
| 2 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1026\_0000 | 0x1026\_ffff |
| 2 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1027\_0000 | 0x1027\_ffff |
| 2 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1028\_0000 | 0x1029\_ffff |
| 2 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x102a\_0000 | 0x102a\_ffff |
| 2 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x102b\_0000 | 0x102b\_ffff |
| 2 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x102c\_0000 | 0x102d\_ffff |
| 2 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x102e\_0000 | 0x102e\_ffff |
| 2 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x102f\_0000 | 0x102f\_ffff |
| 3 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1030\_0000 | 0x1031\_ffff |
| 3 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1032\_0000 | 0x1032\_ffff |
| 3 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1033\_0000 | 0x1033\_ffff |
| 3 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1034\_0000 | 0x1035\_ffff |
| 3 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1036\_0000 | 0x1036\_ffff |
| 3 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1037\_0000 | 0x1037\_ffff |
| 3 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1038\_0000 | 0x1039\_ffff |
| 3 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x103a\_0000 | 0x103a\_ffff |
| 3 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x103b\_0000 | 0x103b\_ffff |
| 3 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x103c\_0000 | 0x103d\_ffff |
| 3 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x103e\_0000 | 0x103e\_ffff |
| 3 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x103f\_0000 | 0x103f\_ffff |
| 4 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1040\_0000 | 0x1041\_ffff |
| 4 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1042\_0000 | 0x1042\_ffff |
| 4 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1043\_0000 | 0x1043\_ffff |
| 4 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1044\_0000 | 0x1045\_ffff |
| 4 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1046\_0000 | 0x1046\_ffff |
| 4 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1047\_0000 | 0x1047\_ffff |
| 4 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1048\_0000 | 0x1049\_ffff |
| 4 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x104a\_0000 | 0x104a\_ffff |
| 4 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x104b\_0000 | 0x104b\_ffff |
| 4 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x104c\_0000 | 0x104d\_ffff |
| 4 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x104e\_0000 | 0x104e\_ffff |
| 4 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x104f\_0000 | 0x104f\_ffff |
| 5 | 0 | CLUSTER\_TCDM | 128.0 KB | 0x1050\_0000 | 0x1051\_ffff |
| 5 | 0 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1052\_0000 | 0x1052\_ffff |
| 5 | 0 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1053\_0000 | 0x1053\_ffff |
| 5 | 1 | CLUSTER\_TCDM | 128.0 KB | 0x1054\_0000 | 0x1055\_ffff |
| 5 | 1 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x1056\_0000 | 0x1056\_ffff |
| 5 | 1 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x1057\_0000 | 0x1057\_ffff |
| 5 | 2 | CLUSTER\_TCDM | 128.0 KB | 0x1058\_0000 | 0x1059\_ffff |
| 5 | 2 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x105a\_0000 | 0x105a\_ffff |
| 5 | 2 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x105b\_0000 | 0x105b\_ffff |
| 5 | 3 | CLUSTER\_TCDM | 128.0 KB | 0x105c\_0000 | 0x105d\_ffff |
| 5 | 3 | CLUSTER\_PERIPHERAL | 64.0 KB | 0x105e\_0000 | 0x105e\_ffff |
| 5 | 3 | CLUSTER\_ZERO\_MEM | 64.0 KB | 0x105f\_0000 | 0x105f\_ffff |
| - | - | EMPTY | 10.0 MB | 0x1060\_0000 | 0x10ff\_ffff |
| - | - | EMPTY | 15.7 MB | 0x1004\_0000 | 0x10ff\_ffff |

82 changes: 52 additions & 30 deletions util/occamygen/occamygen.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def main():
nr_s1_quadrants = occamy.cfg["nr_s1_quadrant"]
nr_s1_clusters = occamy.cfg["s1_quadrant"]["nr_clusters"]
is_remote_quadrant = occamy.cfg["is_remote_quadrant"]
enable_multicast = occamy.cfg["enable_multicast"]
enable_narrow_multicast = occamy.cfg["enable_multicast"]
enable_wide_multicast = occamy.cfg["enable_multicast"]
# Iterate over Hives to get the number of cores.
nr_cluster_cores = len([
core for hive in occamy.cfg["cluster"]["hives"]
Expand Down Expand Up @@ -583,6 +584,15 @@ def main():

# Quadrant inter xbar
# Connects all quadrant pre xbars to all quadrants, with additional wide xbar M/S pair

# Default port: soc wide xbar (last port)
num_slave_ports = nr_s1_quadrants + len(occamy.cfg["remote_quadrants"]) + \
is_remote_quadrant + 1
default_mst_port_idx_bits = clog2(num_slave_ports)
default_mst_port_idx = "{0:b}".format(num_slave_ports - 1)
default_mst_port_idx = "{}'b{}".format(default_mst_port_idx_bits*num_slave_ports,
default_mst_port_idx*num_slave_ports)

quadrant_inter_xbar = solder.AxiXbar(
48,
512,
Expand All @@ -596,16 +606,17 @@ def main():
no_loopback=True,
atop_support=False,
context="soc",
node=am_quadrant_inter_xbar)
node=am_quadrant_inter_xbar,
enable_multicast=enable_wide_multicast,
default_mst_port_idx=default_mst_port_idx)

# Default port: soc wide xbar
quadrant_inter_xbar.add_output_entry("wide_xbar", am_soc_wide_xbar)
quadrant_inter_xbar.add_input("wide_xbar")
for i in range(nr_s1_quadrants):
# Default route passes HBI through quadrant 0
# --> mask this route, forcing it through default wide xbar
quadrant_inter_xbar.add_output_entry("quadrant_{}".format(i),
am_wide_xbar_quadrant_s1[i])
am_wide_xbar_quadrant_s1[i],
is_mcast_target=enable_wide_multicast,
forward_mcast=enable_wide_multicast)
quadrant_inter_xbar.add_input("quadrant_{}".format(i))
for i, rq in enumerate(occamy.cfg["remote_quadrants"]):
quadrant_inter_xbar.add_input("rmq_{}".format(i))
Expand All @@ -615,6 +626,9 @@ def main():
quadrant_inter_xbar.add_output("remote", [])
quadrant_inter_xbar.add_input("remote")

quadrant_inter_xbar.add_output_entry("wide_xbar", am_soc_wide_xbar)
quadrant_inter_xbar.add_input("wide_xbar", is_mcast_master=enable_wide_multicast)

hbm_xbar = solder.AxiXbar(
48,
512,
Expand Down Expand Up @@ -652,18 +666,21 @@ def main():
no_loopback=True,
atop_support=False,
context="soc",
node=am_soc_wide_xbar)
node=am_soc_wide_xbar,
enable_multicast=False,
forward_mcast=enable_wide_multicast)

# Default port: HBI (always escalate "upwards" in hierarchy -> off-chip)
if not is_remote_quadrant:
soc_wide_xbar.add_output_entry("hbi", am_hbi)
soc_wide_xbar.add_output_entry("hbm_xbar", am_hbm_xbar)
soc_wide_xbar.add_output_entry("quadrant_inter_xbar", am_quadrant_inter_xbar)
soc_wide_xbar.add_output_entry("quadrant_inter_xbar", am_quadrant_inter_xbar,
is_mcast_target=False, forward_mcast=enable_wide_multicast)
soc_wide_xbar.add_output_entry("soc_narrow", am_soc_narrow_xbar)
soc_wide_xbar.add_input("hbi")
soc_wide_xbar.add_input("quadrant_inter_xbar")
soc_wide_xbar.add_input("soc_narrow")
soc_wide_xbar.add_input("sys_idma_mst")
soc_wide_xbar.add_input("sys_idma_mst", is_mcast_master=enable_wide_multicast)
soc_wide_xbar.add_output_entry("spm_wide", am_spm_wide)
soc_wide_xbar.add_output_entry("wide_zero_mem", am_wide_zero_mem)

Expand All @@ -684,19 +701,19 @@ def main():
no_loopback=True,
context="soc",
node=am_soc_narrow_xbar,
enable_multicast=enable_multicast)
enable_multicast=enable_narrow_multicast)

for i in range(nr_s1_quadrants):
soc_narrow_xbar.add_output_symbolic_multi("s1_quadrant_{}".format(i),
[(f"s1_quadrant_base_addr[{i}]",
"S1QuadrantAddressSpace"),
(f"s1_quadrant_cfg_base_addr[{i}]",
"S1QuadrantCfgAddressSpace")],
is_mcast_target=enable_multicast,
forward_mcast=enable_multicast)
is_mcast_target=enable_narrow_multicast,
forward_mcast=enable_narrow_multicast)
soc_narrow_xbar.add_input("s1_quadrant_{}".format(i))

soc_narrow_xbar.add_input("cva6", is_mcast_master=enable_multicast)
soc_narrow_xbar.add_input("cva6", is_mcast_master=enable_narrow_multicast)
soc_narrow_xbar.add_input("soc_wide")
soc_narrow_xbar.add_input("periph")
soc_narrow_xbar.add_input("pcie")
Expand Down Expand Up @@ -728,7 +745,8 @@ def main():
# We need 3 "crossbars", which are really simple muxes and demuxes
quadrant_s1_ctrl_xbars = dict()
for name, (iw, lm, forward_mcast) in {
'soc_to_quad': (soc_narrow_xbar.iw_out(), "axi_pkg::CUT_SLV_PORTS", enable_multicast),
'soc_to_quad': (soc_narrow_xbar.iw_out(), "axi_pkg::CUT_SLV_PORTS",
enable_narrow_multicast),
'quad_to_soc': (soc_narrow_xbar.iw, "axi_pkg::CUT_MST_PORTS", False),
}.items():
# Reuse (preserve) narrow Xbar IDs and max transactions
Expand Down Expand Up @@ -773,6 +791,13 @@ def main():
# S1 Quadrants #
################
# Dummy entries to generate associated types.

num_slave_ports = nr_s1_clusters + 1
default_mst_port_idx_bits = clog2(num_slave_ports)
default_mst_port_idx = "{0:b}".format(nr_s1_clusters)
default_mst_port_idx = "{}'b{}".format(default_mst_port_idx_bits*num_slave_ports,
default_mst_port_idx*num_slave_ports)

wide_xbar_quadrant_s1 = solder.AxiXbar(
48,
512,
Expand All @@ -786,13 +811,9 @@ def main():
no_loopback=True,
atop_support=False,
context="quadrant_s1",
node=am_wide_xbar_quadrant_s1[0])

num_slave_ports = nr_s1_clusters + 1
default_mst_port_idx_bits = clog2(nr_s1_clusters + 1)
default_mst_port_idx = "{0:b}".format(nr_s1_clusters)
default_mst_port_idx = "{}'b{}".format(default_mst_port_idx_bits*num_slave_ports,
default_mst_port_idx*num_slave_ports)
node=am_wide_xbar_quadrant_s1[0],
enable_multicast=enable_wide_multicast,
default_mst_port_idx=default_mst_port_idx)

narrow_xbar_quadrant_s1 = solder.AxiXbar(
48,
Expand All @@ -809,27 +830,28 @@ def main():
fall_through=occamy.cfg["s1_quadrant"]["narrow_xbar"]["fall_through"],
no_loopback=True,
context="quadrant_s1",
enable_multicast=enable_multicast,
enable_multicast=enable_narrow_multicast,
default_mst_port_idx=default_mst_port_idx)

wide_xbar_quadrant_s1.add_output("top", [])
wide_xbar_quadrant_s1.add_input("top")

narrow_xbar_quadrant_s1.add_input("top", is_mcast_master=enable_multicast)

for i in range(nr_s1_clusters):
wide_xbar_quadrant_s1.add_output_symbolic("cluster_{}".format(i),
f"cluster_base_addr[{i}]",
"ClusterAddressSpace")

"ClusterAddressSpace",
is_mcast_target=enable_wide_multicast,
forward_mcast=False)
wide_xbar_quadrant_s1.add_input("cluster_{}".format(i))

narrow_xbar_quadrant_s1.add_output_symbolic("cluster_{}".format(i),
f"cluster_base_addr[{i}]",
"ClusterAddressSpace",
is_mcast_target=enable_multicast,
is_mcast_target=enable_narrow_multicast,
forward_mcast=False)
narrow_xbar_quadrant_s1.add_input("cluster_{}".format(i))

wide_xbar_quadrant_s1.add_output("top", [])
wide_xbar_quadrant_s1.add_input("top", is_mcast_master=enable_wide_multicast)

narrow_xbar_quadrant_s1.add_input("top", is_mcast_master=enable_narrow_multicast)
narrow_xbar_quadrant_s1.add_output("top", [])

# remote downstream mux
Expand Down
9 changes: 6 additions & 3 deletions util/solder/solder.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,10 @@ def emit(aw, dw, iw, uw, enable_multicast=False):
code += f"logic [{(dw + 7) // 8 - 1}:0], "
if enable_multicast:
user_t = "struct packed {"
user_t += f"logic [{max(0, aw - 1)}:0] mcast; "
user_t += f"logic [{max(0, uw - aw - 1)}:0] atomics_id;}}"
user_t += f"logic [{max(0, aw - 1)}:0] mcast;"
if uw > aw:
user_t += f" logic [{max(0, uw - aw - 1)}:0] atomics_id;"
user_t += "}"
else:
user_t = f"logic [{max(0, uw - 1)}:0]"
code += f"{user_t})\n"
Expand Down Expand Up @@ -1588,7 +1590,8 @@ def emit(self):
if not self.outputs[i]['is_mcast_target']:
if self.outputs[i+1]['is_mcast_target']:
violations.append(True)
assert (not violations), 'Multicast-targetable slaves must be at lower indices'
assert (not violations), \
f'{self.name}: multicast-targetable slaves must be at lower indices'
# Sort address map rules by `is_multicast_rule` to ensure that
# multicast rules are at lower indices
self.addrmap.sort(key=operator.itemgetter('is_mcast_rule'))
Expand Down

0 comments on commit 3d964a7

Please sign in to comment.