diff --git a/README.md b/README.md index ca08c8945..a3d30874a 100644 --- a/README.md +++ b/README.md @@ -187,11 +187,386 @@ To get a visualization of the traces, check out the `scripts/tracevis.py` script We also provide Synopsys Spyglass linting scripts in the `hardware/spyglass`. Run `make lint` in the `hardware` folder, with a specific MemPool configuration, to run the tests associated with the `lint_rtl` target. +## Publications +If you use MemPool in your work or research, you can cite us: + +**MemPool: A Scalable Manycore Architecture with a Low-Latency Shared L1 Memory** + +``` +@article{Riedel2023MemPool, + title = {{MemPool}: A Scalable Manycore Architecture with a Low-Latency Shared {L1} Memory}, + author = {Riedel, Samuel and Cavalcante, Matheus and Andri, Renzo and Benini, Luca}, + journal = {IEEE Transactions on Computers}, + year = {2023}, + volume = {72}, + number = {12}, + pages = {3561--3575}, + publisher = {IEEE Computer Society}, + doi = {10.1109/TC.2023.3307796} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10227739) and is also available on [arXiv:2303.17742 [cs.AR]](https://arxiv.org/abs/2303.17742) and the [ETH Research Collection](https://doi.org/10.3929/ethz-b-000643341). + + +The following publications give more details about MemPool, its extensions, and use cases: + +### 2021 + +
+MemPool: A Shared-L1 Memory Many-Core Cluster with a Low-Latency Interconnect +

+ +``` +@inproceedings{Cavalcante2021MemPool, + title = {{MemPool}: A Shared-{L1} Memory Many-Core Cluster with a Low-Latency Interconnect}, + author = {Cavalcante, Matheus and Riedel, Samuel and Pullini, Antonio and Benini, Luca}, + booktitle = {2021 Design, Automation, and Test in Europe Conference and Exhibition}, + address = {Grenoble, France}, + year = {2021}, + month = mar, + pages = {701--706}, + publisher = {IEEE}, + doi = {10.23919/DATE51398.2021.9474087} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/9474087) and is also available on [arXiv:2012.02973 [cs.AR]](https://arxiv.org/abs/2012.02973). + +

+
+ + +
+3D SoC integration, beyond 2.5D chiplets +

+ +``` +@inproceedings{Beyne2021, + title = {{3D} {SoC} integration, beyond {2.5D} chiplets}, + author = {Beyne, Eric and Milojevic, Dragomir and {Van Der Plas}, Geert and Beyer, Gerald}, + booktitle = {Technical Digest - International Electron Devices Meeting, IEDM}, + year = {2021}, + pages = {79--82}, + publisher = {IEEE}, + doi = {10.1109/IEDM19574.2021.9720614} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/9720614). + +

+
+ + +### 2022 + +
+MemPool-3D: Boosting Performance and Efficiency of Shared-L1 Memory Many-Core Clusters with 3D Integration +

+ +``` +@inproceedings{Cavalcante2022MemPool3D, + title = {{MemPool-3D}: Boosting Performance and Efficiency of Shared-{L1} Memory Many-Core Clusters with {3D} Integration}, + author = {Cavalcante, Matheus and Agnesina, Anthony and Riedel, Samuel and Brunion, Moritz and Garcia-Ortiz, Alberto and Milojevic, Dragomir and Catthoor, Francky and Lim, Sung Kyu and Benini, Luca}, + booktitle = {2022 Design, Automation, and Test in Europe Conference and Exhibition}, + address = {Online}, + year = {2022}, + month = mar, + pages = {394--399}, + publisher = {IEEE}, + doi = {10.23919/DATE54114.2022.9774726} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/9774726) and is also available on [arXiv:2112.01168 [cs.AR]](https://arxiv.org/abs/2112.01168). + +

+
+ + +
+Hier-3D: A Hierarchical Physical Design Methodology for Face-to-Face-Bonded 3D ICs +

+ +``` +@inproceedings{Agnesina2022, + title = {{Hier-3D}: A Hierarchical Physical Design Methodology for Face-to-Face-Bonded {3D} ICs}, + author = {Agnesina, Anthony and Brunion, Moritz and Garcia-Ortiz, Alberto and Catthoor, Francky and Milojevic, Dragomir and Komalan, Manu and Cavalcante, Matheus and Riedel, Samuel and Benini, Luca and Lim, Sung Kyu}, + booktitle = {Proceedings of the ACM/IEEE International Symposium on Low Power Electronics and Design}, + address = {New York, NY, USA}, + year = {2022}, + month = aug, + publisher = {Association for Computing Machinery}, + doi = {10.1145/3531437.3539702} +} +``` +This paper was published on [ACM DL](https://dl.acm.org/doi/10.1145/3531437.3539702). + + +

+
+ + +
+Spatz: A Compact Vector Processing Unit for High-Performance and Energy-Efficient Shared-L1 Clusters +

+ +``` +@inproceedings{Cavalcante2022Spatz, + title = {Spatz: A Compact Vector Processing Unit for High-Performance and Energy-Efficient Shared-{L1} Clusters}, + author = {Cavalcante, Matheus and W{\"{u}}thrich, Domenic and Perotti, Matteo and Riedel, Samuel and Benini, Luca}, + booktitle = {2022 IEEE/ACM International Conference On Computer Aided Design (ICCAD)}, + address = {San Diego, California, USA}, + year = {2022}, + month = oct, + pages = {159--167}, + publisher = {Association for Computing Machinery}, + doi = {10.1145/3508352.3549367} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10069431) and is also available on [arXiv:2207.07970 [cs.AR]](https://arxiv.org/abs/2207.07970). + +

+
+ + +
+Thermal Performance Analysis of Mempool RISC-V Multicore SoC +

+ +``` +@article{Venkateswarlu2022, + title = {Thermal Performance Analysis of Mempool RISC-V Multicore {SoC}}, + author = {Venkateswarlu, Sankatali and Mishra, Subrat and Oprins, Herman and Vermeersch, Bjorn and Brunion, Moritz and Han, Jun Han and Stan, Mircea R. and Weckx, Pieter and Catthoor, Francky}, + journal = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, + year = {2022}, + volume = {30}, + number = {11}, + pages = {1668--1676}, + publisher = {IEEE}, + doi = {10.1109/TVLSI.2022.3207553} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/9905665). + +

+
+ + +### 2023 + +
+Towards Chip-Package-System Co-optimization of Thermally-limited System-On-Chips (SOCs) +

+ +``` +@inproceedings{Mishra2023, + title = {Towards Chip-Package-System Co-optimization of Thermally-limited System-On-Chips (SOCs)}, + author = {Mishra, S. and Sankatali, V. and Vermeersch, B. and Brunion, M. and Lofrano, M. and Abdi, D. and Oprins, H. and Biswas, D. and Zografos, O. and Hiblot, G. and {Van Der Plas}, G. and Weckx, P. and Hellings, G. and Myers, J. and Catthoor, F. and Ryckaert, J.}, + booktitle = {IEEE International Reliability Physics Symposium Proceedings}, + address = {Monterey, CA, USA}, + year = {2023}, + month = mar, + publisher = {IEEE}, + doi = {10.1109/IRPS48203.2023.10117979} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10117979). + +

+
+ + +
+Efficient Parallelization of 5G-PUSCH on a Scalable RISC-V Many-Core Processor +

+ +``` +@inproceedings{Bertuletti2023PUSCH, + title = {Efficient Parallelization of {5G-PUSCH} on a Scalable {RISC-V} Many-Core Processor}, + author = {Bertuletti, Marco and Zhang, Yichao and Vanelli-Coralli, Alessandro and Benini, Luca}, + booktitle = {2023 Design, Automation, and Test in Europe Conference and Exhibition}, + address = {Antwerp, Belgium}, + year = {2023}, + month = apr, + pages = {396--401}, + publisher = {IEEE}, + doi = {10.23919/DATE56975.2023.10137247} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10137247) and is also available on [arXiv:2210.09196 [cs.DC]](https://arxiv.org/abs/2210.09196). + +

+
+ + +
+MemPool Meets Systolic: Flexible Systolic Computation in a Large Shared-Memory Processor Cluster +

+ +``` +@inproceedings{Riedel2023MmS, + title = {{MemPool} Meets Systolic: Flexible Systolic Computation in a Large Shared-Memory Processor Cluster}, + author = {Riedel, Samuel and Khov, Gua Hao and Mazzola, Sergio and Cavalcante, Matheus and Andri, Renzo and Benini, Luca}, + booktitle = {2023 Design, Automation, and Test in Europe Conference and Exhibition}, + address = {Antwerp, Belgium}, + year = {2023}, + month = apr, + pages = {503--504}, + publisher = {IEEE}, + doi = {10.23919/DATE56975.2023.10136909} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10136909). + +

+
+ + +
+Fast Shared-Memory Barrier Synchronization for a 1024-Cores RISC-V Many-Core Cluster +

+ +``` +@inproceedings{Bertuletti2023Barrier, + title = {Fast Shared-Memory Barrier Synchronization for a 1024-Cores {RISC-V} Many-Core Cluster}, + author = {Bertuletti, Marco and Riedel, Samuel and Zhang, Yichao and Vanelli-Coralli, Alessandro and Benini, Luca}, + booktitle = {Embedded Computer Systems: Architectures, Modeling, and Simulation}, + editor = {Silvano, Cristina and Pilato, Christian and Reichenbach, Marc}, + address = {Samos}, + year = {2023}, + month = jul, + pages = {241--254}, + publisher = {Springer Nature Switzerland}, + doi = {10.1007/978-3-031-46077-7_16} +} +``` +This paper was published on [Springer Link](https://link.springer.com/chapter/10.1007/978-3-031-46077-7_16) and is also available on [arXiv:2307.10248 [cs.DC]](https://arxiv.org/abs/2307.10248) and the [ETH Research Collection](https://doi.org/10.3929/ethz-b-000648454). + +

+
+ + +
+MemPool: A Scalable Manycore Architecture with a Low-Latency Shared L1 Memory +

+ +``` +@article{Riedel2023MemPool, + title = {{MemPool}: A Scalable Manycore Architecture with a Low-Latency Shared {L1} Memory}, + author = {Riedel, Samuel and Cavalcante, Matheus and Andri, Renzo and Benini, Luca}, + journal = {IEEE Transactions on Computers}, + year = {2023}, + volume = {72}, + number = {12}, + pages = {3561--3575}, + publisher = {IEEE Computer Society}, + doi = {10.1109/TC.2023.3307796} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10227739) and is also available on [arXiv:2303.17742 [cs.AR]](https://arxiv.org/abs/2303.17742) and the [ETH Research Collection](https://doi.org/10.3929/ethz-b-000643341). + +

+
+ + +
+Impact of 3-D Integration on Thermal Performance of RISC-V MemPool Multicore SOC +

+ +``` +@article{Venkateswarlu2023, + title = {Impact of 3-D Integration on Thermal Performance of {RISC-V} {MemPool} Multicore {SOC}}, + author = {Venkateswarlu, Sankatali and Mishra, Subrat and Oprins, Herman and Vermeersch, Bjorn and Brunion, Moritz and Han, Jun Han and Stan, Mircea R. and Biswas, Dwaipayan and Weckx, Pieter and Catthoor, Francky}, + journal = {IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, + year = {2023}, + volume = {31}, + number = {12}, + pages = {1896-1904}, + publisher = {IEEE}, + doi = {10.1109/TVLSI.2023.3314135} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10261872). + +

+
+ + +
+MinPool: A 16-core NUMA-L1 Memory RISC-V Processor Cluster for Always-on Image Processing in 65nm CMOS +

+ +``` +@inproceedings{Riedel2023MinPool, + author={Riedel, Samuel and Cavalcante, Matheus and Frouzakis, Manos and Wüthrich, Domenic and Mustafa, Enis and Billa, Arlind and Benini, Luca}, + title={{MinPool}: A 16-core {NUMA-L1} Memory {RISC-V} Processor Cluster for Always-on Image Processing in 65nm {CMOS}}, + booktitle={2023 30th IEEE International Conference on Electronics, Circuits and Systems (ICECS)}, + address = {Istanbul, Turkiye}, + year={2023}, + month=dec, + pages={1--4}, + publisher={IEEE}, + doi={10.1109/ICECS58634.2023.10382925} +} +``` +This paper was published on [IEEE Xplore](https://ieeexplore.ieee.org/document/10382925) and is also available on the [ETH Research Collection](https://doi.org/10.3929/ethz-b-000653598). + +

+
+ +### 2024 + +
+LRSCwait: Enabling Scalable and Efficient Synchronization in Manycore Systems through Polling-Free and Retry-Free Operation +

+ +``` +@article{Riedel2024LRSCwait, + title={{LRSCwait}: Enabling Scalable and Efficient Synchronization in Manycore Systems through Polling-Free and Retry-Free Operation}, + author={Samuel Riedel and Marc Gantenbein and Alessandro Ottaviano and Torsten Hoefler and Luca Benini}, + journal={arXiv:2401.09359 [cs.AR]}, + year={2024}, + month=jan +} +``` +This paper is available on [arXiv:2401.09359 [cs.AR]](https://arxiv.org/abs/2401.09359). + +

+
+ + +
+Enabling Efficient Hybrid Systolic Computation in Shared L1-Memory Manycore Clusters +

+ +``` +@article{Mazzola2024Systolic, + title={Enabling Efficient Hybrid Systolic Computation in Shared {L1}-Memory Manycore Clusters}, + author={Sergio Mazzola and Samuel Riedel and Luca Benini}, + journal={arXiv:2402.12986 [cs.AR]}, + year={2024}, + month=feb +} +``` +This paper is available on [arXiv:2402.12986 [cs.AR]](https://arxiv.org/abs/2402.12986). + +

+
+ +## Chips + +The MemPool architecture has been taped out in the following chips: + +- 2021 [**MinPool**](http://asic.ethz.ch/2021/Minpool.html): A 16-core prototype of MemPool. +- 2024 [**Heartstream**](http://asic.ethz.ch/2024/Heartstream.html): A 64-core version of MemPool with systolic and FPU support. + ## License MemPool is released under permissive open source licenses. Most of MemPool's source code is released under the Apache License 2.0 (`Apache-2.0`) see [`LICENSE`](LICENSE). The code in `hardware` is released under Solderpad v0.51 (`SHL-0.51`) see [`hardware/LICENSE`](hardware/LICENSE). Note, MemPool includes several third-party packages with their own licenses: +
+Note, MemPool includes several third-party packages with their own licenses: +

+ ### Software - `software/runtime/printf.{c,h}` is licensed under the MIT license. @@ -227,21 +602,5 @@ The open-source simulator [Verilator](https://www.veripool.org/verilator) can be - `toolchain/verilator` is licensed under GPL. See [Verilator's license](https://github.com/verilator/verilator/blob/master/LICENSE) for more details. -## Publication - -If you want to use MemPool, you can cite us: - -``` -@InProceedings{MemPool2021, - author = {Matheus Cavalcante and Samuel Riedel and Antonio Pullini and Luca Benini}, - title = {{MemPool}: A Shared-{L1} Memory Many-Core Cluster with a Low-Latency Interconnect}, - booktitle = {2021 Design, Automation, and Test in Europe Conference and Exhibition (DATE)}, - year = 2021, - month = mar, - address = {Grenoble, FR}, - pages = {701-706}, - doi = {10.23919/DATE51398.2021.9474087} -} -``` - -This paper is also available at arXiv, at the following link: [arXiv:2012.02973 [cs.AR]](https://arxiv.org/abs/2012.02973). +

+