diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..05df2ca --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,25 @@ +name: Build and test cpu +on: + push: + paths-ignore: + - '**.md' + - 'LICENSE' + pull_request: + paths: + - '**.md' + - 'LICENSE' + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Build + run: make + + - name: Test cpu + run: make test-cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..98e980a --- /dev/null +++ b/.gitignore @@ -0,0 +1,46 @@ +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +build/ +build_debug/ + +.vscode/ + +# python +*.egg-info +*.pyc + +# onnx model +*.onnx +*.pb +*.npy diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e856b94 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "3rd-party/nlohmann_json_cmake_fetchcontent"] + path = 3rd-party/nlohmann_json_cmake_fetchcontent + url = git@github.com:ArthurSonzogni/nlohmann_json_cmake_fetchcontent.git +[submodule "3rd-party/googletest"] + path = 3rd-party/googletest + url = git@github.com:google/googletest.git diff --git a/3rd-party/googletest b/3rd-party/googletest new file mode 160000 index 0000000..3e3b44c --- /dev/null +++ b/3rd-party/googletest @@ -0,0 +1 @@ +Subproject commit 3e3b44c300b21eb996a2957782421bc0f157af18 diff --git a/3rd-party/nlohmann_json_cmake_fetchcontent b/3rd-party/nlohmann_json_cmake_fetchcontent new file mode 160000 index 0000000..326308d --- /dev/null +++ b/3rd-party/nlohmann_json_cmake_fetchcontent @@ -0,0 +1 @@ +Subproject commit 326308d7512d2168ae00199aec4dd0f714526e89 diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..836a7e0 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,96 @@ +# Do not change these options in this file. Use cmake.config, cmake -DOPTION=VALUE, or ccmake to specify them. +option(BUILD_TEST "Build tests" OFF) + +cmake_minimum_required(VERSION 3.17) + +include(CMakeDependentOption) +project(InfiniTensor C CXX) + +cmake_dependent_option(BUILD_TEST_CORE "Build tests for core components" ON BUILD_TEST OFF) + +set(DEFAULT_BUILD_TYPE "RelWithDebInfo") +# Build Type +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + message("Configuring for Debug build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") + add_compile_definitions(DEBUG_MODE) +elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + message("Configuring for Release build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + add_compile_definitions(NDEBUG) +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + message("Configuring for RelWithDebInfo build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") +else() + message("Build type not specified. Configuring for RelWithDebInfo build.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2") +endif() + + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_EXTENSIONS OFF) # -std=gnu++11 when on, -std=c++11 when off +add_compile_options(-Wno-error=unused-variable) + +find_package( + Python + COMPONENTS Interpreter Development + REQUIRED) + +# OpenMP +find_package(OpenMP) +if(OpenMP_C_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") +endif() +if(OpenMP_CXX_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + +include_directories(include) + +if(BUILD_TEST) + set(BUILD_GMOCK + OFF + CACHE BOOL "Do not build gmock" FORCE) + set(INSTALL_GTEST + OFF + CACHE BOOL "Do not install gtest" FORCE) + add_subdirectory(3rd-party/googletest) + include_directories(3rd-party/googletest/googletest/include) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall -Werror -Wno-error=deprecated-declarations -Wno-error=pointer-arith") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG") # Enable assertion +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -UNDEBUG") # Enable assertion + + +# Source files +file(GLOB_RECURSE SRC src/core/*.cc src/kernels/cpu/*.cc src/operators/*.cc src/utils/*.cc) + +if(USE_INTELCPU) + file(GLOB_RECURSE SRC_INTELCPU src/intelcpu/*.cc src/kernels/intelcpu/*.cc ) + list (APPEND SRC ${SRC_INTELCPU}) +endif() + +# Libraries +add_library(InfiniTensor SHARED ${SRC}) + +function(build_test files) + # Non-recursive glob for skip failed tests + file(GLOB TEST_SOURCES ${files}) + foreach(testsourcefile ${TEST_SOURCES}) + get_filename_component(testname ${testsourcefile} NAME_WE) + add_executable(${testname} ${testsourcefile}) + target_link_libraries(${testname} InfiniTensor GTest::gtest_main) + add_test(NAME ${testname} COMMAND ${testname}) + endforeach(testsourcefile ${TEST_SOURCES}) +endfunction() + +if(BUILD_TEST) + add_compile_definitions(BUILD_TEST=1) + enable_testing() + if(BUILD_TEST_CORE) + build_test(test/core/*.cc) + build_test(test/operators/*.cc) + build_test(test/kernels/nativecpu/*.cc) + endif() +endif() diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..35ef7ef --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +.PHONY : build clean format install-python test-cpp test-onnx + +TYPE ?= Release +TEST ?= ON + +CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE) +CMAKE_OPT += -DBUILD_TEST=$(TEST) + +build: + mkdir -p build/$(TYPE) + cd build/$(TYPE) && cmake $(CMAKE_OPT) ../.. && make -j8 + +clean: + rm -rf build + +test-cpp: + @echo + cd build/$(TYPE) && make test diff --git a/README.md b/README.md new file mode 100644 index 0000000..7bbac1c --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# TinyInfiniTensor + +一个简化版的 ai compiler,用于初学者快速上手学习,保留了计算图和 kernel 层的概念,能够基于 c++ 搭建计算图进行推理计算,目前只支持 cpu 平台。 + +[环境部署文档](docs/项目部署.md) + +[训练营作业介绍文档](docs/训练营作业介绍.md) \ No newline at end of file diff --git "a/docs/\350\256\255\347\273\203\350\220\245\344\275\234\344\270\232\344\273\213\347\273\215.md" "b/docs/\350\256\255\347\273\203\350\220\245\344\275\234\344\270\232\344\273\213\347\273\215.md" new file mode 100644 index 0000000..f3a1f7b --- /dev/null +++ "b/docs/\350\256\255\347\273\203\350\220\245\344\275\234\344\270\232\344\273\213\347\273\215.md" @@ -0,0 +1,247 @@ +# 本地自测 + +运行 ``make test-cpp``,通过所有测例即为完成作业,每个测例 10 分。 + +1. test_allocator:依赖作业一 +2. test_cast:依赖作业四 +3. test_clip:依赖作业三 +4. test_concat:依赖作业五 +5. test_element_wise:依赖作业六 +6. test_transpose:依赖作业二 +7. test_nativecpu_concat:依赖作业一、作业五 +8. test_nativecpu_elementwise:依赖作业一、作业六 +9. test_nativecpu_transpose:依赖作业一、作业二 +10. test_matmul:依赖作业六、作业七 +11. test_graph:依赖作业八 + +# 作业题目 + +## 作业一:内存分配器 + +难度:⭐⭐⭐⭐ + +对应测例:``test_allocator``,``test_nativecpu_concat``,``test_nativecpu_elementwise``,``test_nativecpu_transpose`` + +### step1 完善 allocator 声明 + +需要实现的代码块位置:`include/core/allocator.h` + +````c++ +// =================================== 作业 =================================== +// TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 +// HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 +// =================================== 作业 =================================== +```` + +### step2 完善 allocator 定义 + +需要实现的代码块位置:`src/core/allocator.cc` + +完善分配函数: + +````c++ + size_t Allocator::alloc(size_t size) + { + IT_ASSERT(this->ptr == nullptr); + // pad the size to the multiple of alignment + size = this->getAlignedSize(size); + + // =================================== 作业 =================================== + // TODO: 设计一个算法来分配内存,返回起始地址偏移量 + // =================================== 作业 =================================== + + return 0; + } +```` + +完善释放函数: + +````c++ + void Allocator::free(size_t addr, size_t size) + { + IT_ASSERT(this->ptr == nullptr); + size = getAlignedSize(size); + + // =================================== 作业 =================================== + // TODO: 设计一个算法来回收内存 + // =================================== 作业 =================================== + } +```` + +### step3 计算图的内存分配部分 + +需要实现的代码块位置:`src/core/graph.cc` + +完善计算图的内存分配部分: + +````c++ + void GraphObj::dataMalloc() + { + // topological sorting first + IT_ASSERT(topo_sort() == true); + + // =================================== 作业 =================================== + // TODO:利用 allocator 给计算图分配内存 + // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 + // =================================== 作业 =================================== + + allocator.info(); + } +```` + +## 作业二:transpose 算子形状推导 + +难度:⭐ + +对应测例:``test_transpose``,``test_nativecpu_transpose`` + +需要实现的代码块位置:`src/operators/transpose.cc` + +````c++ + optional> TransposeObj::inferShape(const TensorVec &inputs) + { + const auto A = inputs[0]; + auto input_dim = A->getDims(); + auto output_dim = input_dim; + int rank = A->getRank(); + + // =================================== 作业 =================================== + // TODO:修改 output_dim,返回正确的 transpose 后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 + // =================================== 作业 =================================== + + return {{}}; + } + +```` + +## 作业三:clip 算子形状推导 + +难度:⭐ + +对应测例:``test_clip`` + +需要实现的代码块位置:`src/operators/unary.cc` + +````c++ + optional> ClipObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 clip 操作后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 + // =================================== 作业 =================================== + return {{}}; + } +```` + +## 作业四:cast 算子形状推导 & 数据类型推导 + +难度:⭐⭐ + +对应测例:``test_cast`` + +需要实现的代码块位置:`src/operators/unary.cc` + +````c++ + vector CastObj::inferDataType(const TensorVec &inputs) const + { + // =================================== 作业 =================================== + // TODO:返回经过 cast 操作后, 输出 tensor 的数目和数据类型 + // REF_FILE: src/core/operator.cc + // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 + // =================================== 作业 =================================== + return {}; + } + + optional> CastObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 cast 操作后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 + // =================================== 作业 =================================== + return {{}}; + } +```` + + + +## 作业五:concat 算子形状推导 + +难度:⭐⭐ + +对应测例:``test_concat``,``test_nativecpu_concat`` + +需要实现的代码块位置:`src/operators/concat.cc` + +````c++ +optional> ConcatObj::inferShape(const TensorVec &inputs) { + Shape dims = inputs[0]->getDims(); + auto rank = inputs[0]->getRank(); + + // =================================== 作业 =================================== + // TODO:修改 dims,返回正确的 concat 后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 + // =================================== 作业 =================================== + + return {{dims}}; +} +```` + +## 作业六:双向广播 + +难度:⭐⭐⭐ + +对应测例:``test_element_wise``,``test_nativecpu_elementwise``,``test_matmul`` + +需要实现的代码块位置:`src/utils/operator_utils.cc` + +````c++ +Shape infer_broadcast(const Shape &A, const Shape &B) { + + // =================================== 作业 =================================== + // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 + // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md + // =================================== 作业 =================================== + + return {}; +} +```` + +## 作业七:矩阵乘形状推导 + +难度:⭐⭐⭐ + +对应测例:``test_matmul`` + +需要实现的代码块位置:`src/operators/matmul.cc` + +````c++ +optional> MatmulObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 matmul 操作后的 shape + // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm + // =================================== 作业 =================================== + return {{}}; + } +```` + +## 作业八:简单图优化规则实现 + +难度:⭐⭐⭐⭐ + +对应测例:``test_graph`` + +需要实现的代码块位置:`src/core/graph.cc` + +````c++ +void GraphObj::optimize() +{ + // =================================== 作业 =================================== + // TODO: 设计一个算法来实现指定的图优化规则 + // 图优化规则如下: + // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) + // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) + // =================================== 作业 =================================== +} +```` \ No newline at end of file diff --git "a/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" "b/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" new file mode 100644 index 0000000..5690349 --- /dev/null +++ "b/docs/\351\241\271\347\233\256\351\203\250\347\275\262.md" @@ -0,0 +1,35 @@ +### 环境准备 +建议使用Linux系统或Mac系统,windows下使用WSL,配置方法和Linux一致。 + +1. 安装gcc、g++,请确认版本为 11.3 及以上的稳定版本 +``` bash +# linux 使用apt安装 +sudo apt install gcc g++ + +# mac 使用Homebrew安装 +brew install gcc +``` + +2. 安装CMake,请确认版本为 3.17 及以上的稳定版本 +``` bash +# linux 使用apt安装 +sudo apt install cmake + +# mac 使用Homebrew安装 +brew install cmake +``` + +2. 安装make +``` bash +# linux 使用apt安装 +sudo apt install make + +# mac 使用Homebrew安装 +brew install make +``` + +### 构建命令 +配置好上述环境后,进入项目目录后可以通过以下命令进行构建。 +- `make`/`make build`: 构建整个项目; +- `make test-cpp`: 构建项目后执行测例; +- `make clean`:清理生成文件 \ No newline at end of file diff --git a/include/core/allocator.h b/include/core/allocator.h new file mode 100644 index 0000000..002601d --- /dev/null +++ b/include/core/allocator.h @@ -0,0 +1,59 @@ +#pragma once +#include "core/runtime.h" +#include "core/tensor.h" +#ifdef BUILD_TEST +#include "gtest/gtest.h" +#endif +#include +#include +#include + +namespace infini { + class Allocator + { + private: + Runtime runtime; + + size_t used; + + size_t peak; + + size_t alignment; + + // pointer to the memory actually allocated + void *ptr; + + // =================================== 作业 =================================== + // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 + // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 + // =================================== 作业 =================================== + + public: + Allocator(Runtime runtime); + + virtual ~Allocator(); + + // function: simulate memory allocation + // arguments: + // size: size of memory block to be allocated + // return: head address offset of the allocated memory block + size_t alloc(size_t size); + + // function: simulate memory free + // arguments: + // addr: head address offset of memory block to be free + // size: size of memory block to be freed + void free(size_t addr, size_t size); + + // function: perform actual memory allocation + // return: pointer to the head address of the allocated memory + void *getPtr(); + + void info(); + + private: + // function: memory alignment, rouned up + // return: size of the aligned memory block + size_t getAlignedSize(size_t size); + }; +} diff --git a/include/core/blob.h b/include/core/blob.h new file mode 100644 index 0000000..01684f6 --- /dev/null +++ b/include/core/blob.h @@ -0,0 +1,25 @@ +#pragma once +#include "core/common.h" +#include "core/ref.h" + +namespace infini { + +class RuntimeObj; +using Runtime = Ref; + +class BlobObj +{ + Runtime runtime; + void *ptr; + +public: + BlobObj(Runtime runtime, void *ptr) : runtime(runtime), ptr(ptr) {} + BlobObj(BlobObj &other) = delete; + BlobObj &operator=(BlobObj const &) = delete; + ~BlobObj() {}; + + template + T getPtr() const { return reinterpret_cast(ptr); } +}; + +} // namespace infini diff --git a/include/core/common.h b/include/core/common.h new file mode 100644 index 0000000..e4fd65b --- /dev/null +++ b/include/core/common.h @@ -0,0 +1,85 @@ +#pragma once +#include "utils/exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace infini { +using std::list; +using std::map; +using std::optional; +using std::pair; +using std::set; +using std::string; +using std::tie; +using std::to_string; +using std::tuple; +using std::unordered_map; +using std::vector; + +// Metaprogramming utilities +#define _CAT(A, B) A##B +#define _SELECT(NAME, NUM) _CAT(NAME##_, NUM) +#define _GET_COUNT(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, COUNT, ...) COUNT +#define _VA_SIZE(...) _GET_COUNT(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) +#define _VA_SELECT(NAME, ...) _SELECT(NAME, _VA_SIZE(__VA_ARGS__))(__VA_ARGS__) + +// Assert: conditions should have no side effect +#define _IT_ASSERT_2(condition, info) \ + static_cast(condition) \ + ? void(0) \ + : throw ::infini::Exception( \ + std::string("[") + __FILE__ + ":" + std::to_string(__LINE__) + \ + "] Assertion failed (" + #condition + "): " + info) +#define _IT_ASSERT_1(condition) _IT_ASSERT_2(condition, "") +#define IT_ASSERT(...) _VA_SELECT(_IT_ASSERT, __VA_ARGS__) + +#define IT_TODO_HALT() _IT_ASSERT_2(false, "Unimplemented") +#define IT_TODO_HALT_MSG(msg) _IT_ASSERT_2(false, msg) +#define IT_ASSERT_TODO(condition) _IT_ASSERT_2(condition, "Unimplemented") +#define IT_TODO_SKIP() puts("Unimplemented " __FILE__ ":" __LINE__) + +// std::to_underlying is avaiable since C++23 +template auto enum_to_underlying(T e) { + return static_cast>(e); +} + +template std::string vecToString(const std::vector &vec) { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < vec.size(); ++i) { + ss << vec.at(i); + if (i < vec.size() - 1) { + ss << ","; + } + } + ss << "]"; + return ss.str(); +} + +template std::string vecToString(const T *st, size_t length) { + std::stringstream ss; + ss << "["; + size_t i = 0; + for (i = 0; i < length; i++) { + ss << *(st + i); + if (i < length - 1) { + ss << ","; + } + } + ss << "]"; + return ss.str(); +} + +} // namespace infini diff --git a/include/core/data_type.h b/include/core/data_type.h new file mode 100644 index 0000000..0b7c1fa --- /dev/null +++ b/include/core/data_type.h @@ -0,0 +1,103 @@ +#pragma once +#include "core/common.h" + +namespace infini { + +class DataType { + public: + // + static const DataType Undefine; + static const DataType Float32; + static const DataType UInt8; + static const DataType Int8; + static const DataType UInt16; + static const DataType Int16; + static const DataType Int32; + static const DataType Int64; + static const DataType String; + static const DataType Bool; + static const DataType Float16; + static const DataType Double; + static const DataType UInt32; + static const DataType UInt64; + static const DataType BFloat16; + // "sizePerElement" show the DType to cpu_type + // DataType::Bool -> int8_t DataType::Float16 -> uint16_t + static constexpr size_t sizePerElement[]{0, + sizeof(float), + sizeof(uint8_t), + sizeof(int8_t), + sizeof(uint16_t), + sizeof(int16_t), + sizeof(int32_t), + sizeof(int64_t), + sizeof(std::string), + sizeof(int8_t), + sizeof(uint16_t), + sizeof(double), + sizeof(uint32_t), + sizeof(uint64_t), + 0, + 0, + sizeof(uint16_t)}; + + static constexpr std::string_view names[]{ + "Undefine", "Float32", "UInt8", "Int8", "UInt16", + "Int16", "Int32", "Int64", "String", "Bool", + "Float16", "Double", "UInt32", "UInt64", "PlaceHolder", + "PlaceHolder", "BFloat16"}; + + static constexpr int cpuType[]{-1, 0, 2, 3, 4, 5, 6, 7, -1, + 3, 4, 9, 1, 8, -1, -1, 4}; + + private: + int index; + + public: + // FIXME: default ctor should be deleted but json requires it. Solution: + // https://github.com/nlohmann/json#how-can-i-use-get-for-non-default-constructiblenon-copyable-types + DataType() = default; + constexpr DataType(int index) : index(index) {} + bool operator==(const DataType &rhs) const { return index == rhs.index; } + bool operator<(const DataType &rhs) const { return index < rhs.index; } + + template static int get() { + IT_TODO_HALT_MSG("Unsupported data type"); + } + size_t getSize() const { return sizePerElement[index]; } + string toString() const { return string(names[index]); } + int cpuTypeInt() const { return cpuType[index]; } + int getIndex() const { return index; } +}; + +// Method definitions are out of the declaration due to GCC bug: +// https://stackoverflow.com/questions/49707184/explicit-specialization-in-non-namespace-scope-does-not-compile-in-gcc +template <> inline int DataType::get() { return 0; } +template <> inline int DataType::get() { return 1; } +template <> inline int DataType::get() { return 2; } +template <> inline int DataType::get() { return 3; } +template <> inline int DataType::get() { return 4; } +template <> inline int DataType::get() { return 5; } +template <> inline int DataType::get() { return 6; } +template <> inline int DataType::get() { return 7; } +template <> inline int DataType::get() { return 8; } +template <> inline int DataType::get() { return 9; } + +template struct DT {}; +template <> struct DT<0> { using t = bool; }; +template <> struct DT<1> { using t = float; }; +template <> struct DT<2> { using t = uint8_t; }; +template <> struct DT<3> { using t = int8_t; }; +template <> struct DT<4> { using t = uint16_t; }; +template <> struct DT<5> { using t = int16_t; }; +template <> struct DT<6> { using t = int32_t; }; +template <> struct DT<7> { using t = int64_t; }; +template <> struct DT<8> { using t = char; }; +template <> struct DT<9> { using t = int8_t; }; +template <> struct DT<10> { using t = uint16_t; }; +template <> struct DT<11> { using t = double; }; +template <> struct DT<12> { using t = uint32_t; }; +template <> struct DT<13> { using t = uint64_t; }; +template <> struct DT<16> { using t = uint16_t; }; + +} // namespace infini diff --git a/include/core/graph.h b/include/core/graph.h new file mode 100644 index 0000000..9326eaf --- /dev/null +++ b/include/core/graph.h @@ -0,0 +1,119 @@ +#pragma once +#include "core/allocator.h" +#include "core/operator.h" +#include "core/tensor.h" + +namespace infini +{ + + class GraphObj : public Object + { + protected: + Runtime runtime; + TensorVec tensors; + OpVec ops; + Allocator allocator; + + public: + explicit GraphObj(Runtime runtime) + : runtime(runtime), allocator(runtime), sorted(false){}; + string toString() const override; + Runtime getRuntime() const { return runtime; } + + Tensor addTensor(Shape dim, DataType dtype = DataType::Float32); + Tensor addTensor(const Tensor &tensor); + TensorVec addTensor(const TensorVec &tensors); + void removeOperator(Operator op) + { + auto it = std::find(ops.begin(), ops.end(), op); + if (it != ops.end()) + ops.erase(it); + } + + void removeTensor(Tensor tensor) + { + auto it = std::find(tensors.begin(), tensors.end(), tensor); + if (it != tensors.end()) + tensors.erase(it); + } + + const TensorVec &getTensors() const { return tensors; } + const OpVec &getOperators() const { return ops; } + Tensor getTensor(int) const; + + /** + * @brief Sort the nodes in topological order. + * It returns true if the sorting is successful. + * Otherwise false is returned, means that there are rings in the graph, + * so the topological sorting fails. + */ + bool topo_sort(); + + void optimize(); + + void shape_infer(); + + void dataMalloc(); + + /** + * @brief Add an operator and create its outputs. Output tensor arguments + * should be empty Refs (e.g., nullptr). + */ + template + Ref addOp(Args &&...args) + { + Ref op = infini::make_ref(this, std::forward(args)...); + addOperatorAndConnect(op); + return op; + } + + /** + * @brief Add an operator with its outputs specified. + */ + template + Ref addOpWithOutputs(Args &&...args) + { + Ref op = infini::make_ref(nullptr, std::forward(args)...); + addOperatorAndConnect(op); + return op; + } + + /** + * @brief Gets input tensors of this graph. + */ + inline TensorVec getInputs() const + { + TensorVec ret; + for (const auto &t : tensors) + if (!t->getSource()) + ret.emplace_back(t); + return ret; + } + + /** + * @brief Gets output tensors of this graph. + */ + inline TensorVec getOutputs() const + { + TensorVec ret; + for (const auto &t : tensors) + if (t->getTargets().empty()) + ret.emplace_back(t); + return ret; + } + + bool checkValid() const; + + private: + /** + * @brief Add reverse connections and Op relationship in ctor. + */ + void addOperatorAndConnect(const Operator &op); + + /** + * @brief If the nodes is sorted in topological order. + */ + bool sorted; + }; + +} // namespace infini \ No newline at end of file diff --git a/include/core/kernel.h b/include/core/kernel.h new file mode 100644 index 0000000..a762424 --- /dev/null +++ b/include/core/kernel.h @@ -0,0 +1,87 @@ +#pragma once +#include "core/common.h" +#include "core/operator.h" +#include "core/tensor.h" +#include "utils/operator_utils.h" +#include + +namespace infini +{ + + class RuntimeObj; + + class Kernel + { + public: + Kernel() {} + virtual ~Kernel() {} + + /** + * @brief Executes an op with a default parameter. + */ + virtual void compute(const Operator &op, + const RuntimeObj *context) const = 0; + }; + + class KernelRegistry + { + public: + using KernelRecord = + tuple; // Kernel, name, ID + + private: + std::map kernels; + int nKernels = 0; + + public: + ~KernelRegistry() + { + for (auto &[k, v] : kernels) + delete std::get<0>(v); + } + static KernelRegistry &getInstance() + { + static KernelRegistry instance; + return instance; + } + bool registerKernel(const KernelAttrs &key, Kernel *kernel, string name) + { + IT_ASSERT(kernels.find(key) == kernels.end(), + "Kernel already registered"); + kernels.emplace(key, KernelRecord{kernel, name, ++nKernels}); + return true; + } + Kernel *getKernel(const KernelAttrs &kernelAttrs) const + { + auto it = kernels.find(kernelAttrs); + IT_ASSERT(it != kernels.end(), "Kernel not found for key {" + + get_kernel_attrs_str(kernelAttrs) + + "}"); + return std::get<0>(it->second); + } + const KernelRecord &getKernelItem(const KernelAttrs &kernelAttrs) const + { + return kernels.at(kernelAttrs); + } + }; + + class CpuKernelWithoutConfig : public Kernel + { + public: + virtual void compute(const Operator &op, + const RuntimeObj *context) const = 0; + }; + +} // namespace infini + +#define _REGISTER_KERNEL_1(device, opType, kernel, name, cnt) \ + namespace infini \ + { \ + static const bool _CAT(_register_kernel_, cnt) = \ + KernelRegistry::getInstance().registerKernel(KernelAttrs{device, \ + opType}, \ + new kernel(), name); \ + } + +#define REGISTER_KERNEL(device, opType, kernel, name) \ + _REGISTER_KERNEL_1(device, opType, kernel, name, __COUNTER__) diff --git a/include/core/object.h b/include/core/object.h new file mode 100644 index 0000000..2db50ad --- /dev/null +++ b/include/core/object.h @@ -0,0 +1,71 @@ +#pragma once +#include "core/common.h" +#include "ref.h" + +namespace infini { + +using UidBaseType = int; + +class Uid { + private: + UidBaseType uid; + + public: + Uid(UidBaseType uid) : uid(uid) {} + Uid &operator=(const Uid &rhs) = delete; + + operator UidBaseType() const { return uid; } +}; + +class Guid : public Uid { + private: + UidBaseType generateGuid() { + static UidBaseType guidCnt = 0; + return ++guidCnt; + } + + public: + Guid() : Uid(generateGuid()) {} + Guid(const Guid &rhs) : Uid(generateGuid()) {} +}; + +/** + * @brief Family unique ID. Cloned tensors shared the same FUID. + */ +class Fuid : public Uid { + private: + UidBaseType generateFuid() { + static UidBaseType fuidCnt = 0; + return ++fuidCnt; + } + + public: + Fuid() : Uid(generateFuid()) {} + Fuid(const Fuid &fuid) : Uid(fuid) {} +}; + +class Object { + protected: + Guid guid; + + public: + virtual ~Object(){}; + virtual string toString() const = 0; + void print() { std::cout << toString() << std::endl; } + UidBaseType getGuid() const { return guid; } +}; + +inline std::ostream &operator<<(std::ostream &os, const Object &obj) { + os << obj.toString(); + return os; +} + +// Overload for Ref-wrapped Object +template > * = nullptr> +inline std::ostream &operator<<(std::ostream &os, const Ref &obj) { + os << obj->toString(); + return os; +} + +} // namespace infini diff --git a/include/core/op_type.h b/include/core/op_type.h new file mode 100644 index 0000000..db67f33 --- /dev/null +++ b/include/core/op_type.h @@ -0,0 +1,42 @@ +#pragma once +#ifndef OP_TYPE_H +#define OP_TYPE_H + +#include +#include + +namespace infini +{ + struct OpType + { + using underlying_t = uint16_t; + enum : underlying_t + { + Unknown, + Add, + Cast, + Clip, + Concat, + Div, + Mul, + MatMul, + Relu, + Sub, + Transpose, + + } type; + + constexpr OpType(decltype(type) t) : type(t) {} + constexpr explicit OpType(underlying_t val) : type((decltype(type))val) {} + constexpr underlying_t underlying() const { return type; } + + bool operator==(OpType others) const { return type == others.type; } + bool operator!=(OpType others) const { return type != others.type; } + bool operator<(OpType others) const { return type < others.type; } + + const char *toString() const; + }; + +} // namespace infini + +#endif // OP_TYPE_H diff --git a/include/core/operator.h b/include/core/operator.h new file mode 100644 index 0000000..0641007 --- /dev/null +++ b/include/core/operator.h @@ -0,0 +1,93 @@ +#pragma once + +#include "core/op_type.h" +#include "core/tensor.h" + +namespace infini +{ + using KernelAttrs = std::tuple; + + class GraphObj; + class OperatorObj : public Object + { + friend class GraphObj; + + protected: + OpType type; + TensorVec inputs; + TensorVec outputs; + vector> predecessors; + vector> successors; + + public: + OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs); + virtual optional> inferShape(const TensorVec &inputs) = 0; + virtual vector inferDataType(const TensorVec &inputs) const; + /** + * @brief Constructs outputs (if requried) and check whether the operator is + * valid. + * + * @param graph If graph is not nullptr, outputs should be created in this + * function. + */ + bool checkValid(GraphObj *graph); + + public: // getter and setter + const TensorVec &getInputs() const { return inputs; } + const TensorVec &getOutputs() const { return outputs; } + Tensor getInputs(size_t i) const { return inputs.at(i); } + Tensor getOutput() const + { + IT_ASSERT(outputs.size() == 1, "Unimplemented"); + return outputs[0]; + } + Tensor getOutput(size_t i) const + { + IT_ASSERT(i < outputs.size(), "Index exceeded"); + return outputs.at(i); + } + OpVec getPredecessors() const { return wrefs_to_refs(predecessors); } + OpVec getSuccessors() const { return wrefs_to_refs(successors); } + OpType getOpType() const { return type; } + // HACK: set correct data type + DataType getDType() const { return getInputs(0)->getDType(); } + DataType getOutDType() const { return getOutput()->getDType(); } + virtual int numInputs() const = 0; + virtual int numOutputs() const = 0; + + /** + * @brief Clone this operator and replace its inputs and outputs. + * + * @param newInputs + * @param newOutputs + * @return Operator + */ + virtual Operator clone(const TensorVec &newInputs, + const TensorVec &newOutputs) const = 0; + + protected: + optional> inferShape(); + vector inferDataType() const; + + private: + void addPredecessors(const Operator &op) { predecessors.emplace_back(op); } + void addSuccessors(const Operator &op) { successors.emplace_back(op); } + void removePredecessors(const Operator &op); + void removeSuccessors(const Operator &op); + void replaceInput(Tensor t1, Tensor t2); + }; + +#define OP_CLONE(OpObj) \ + virtual Operator clone(const TensorVec &newInputs, \ + const TensorVec &newOutputs) const override \ + { \ + auto op = infini::make_ref(*this); \ + op->inputs = newInputs; \ + op->outputs = newOutputs; \ + op->predecessors.clear(); \ + op->successors.clear(); \ + IT_ASSERT(op->checkValid(nullptr)); \ + return op; \ + } + +} // namespace infini diff --git a/include/core/ref.h b/include/core/ref.h new file mode 100644 index 0000000..3393f6e --- /dev/null +++ b/include/core/ref.h @@ -0,0 +1,43 @@ +#pragma once +#include "core/common.h" +#include +#include +#include + +namespace infini { + +template using Ref = std::shared_ptr; +template using WRef = std::weak_ptr; + +template struct is_ref : std::false_type {}; +template struct is_ref> : std::true_type {}; +template struct is_ref> : std::true_type {}; + +template Ref make_ref(Params &&...params) { + static_assert(is_ref::value == false, "Ref should not be nested"); + return std::make_shared(std::forward(params)...); +} + +template > * = nullptr> +Ref as(const Ref &ref) { + return std::dynamic_pointer_cast(ref); +} + +template +std::vector> refs_to_wrefs(const std::vector> &refs) { + std::vector> wrefs; + for (const auto &ref : refs) + wrefs.emplace_back(ref); + return wrefs; +} + +template +std::vector> wrefs_to_refs(const std::vector> &wrefs) { + std::vector> refs; + for (const auto &wref : wrefs) + refs.emplace_back(wref); + return refs; +} + +} // namespace infini diff --git a/include/core/runtime.h b/include/core/runtime.h new file mode 100644 index 0000000..1b64cd9 --- /dev/null +++ b/include/core/runtime.h @@ -0,0 +1,69 @@ +#pragma once +#include "core/common.h" +#include "core/op_type.h" +#include "core/ref.h" + +namespace infini +{ + class TensorObj; + class OperatorObj; + class GraphObj; + class RuntimeObj; + class BlobObj; + + using Tensor = Ref; + using Operator = Ref; + using Graph = Ref; + using Runtime = Ref; + using Blob = Ref; + + using TensorVec = vector; + using OpVec = vector; + + enum class Device + { + CPU = 1 + }; + + class RuntimeObj : public std::enable_shared_from_this + { + protected: + Device device; + + public: + explicit RuntimeObj(Device device) + : device(device) {} + RuntimeObj(RuntimeObj &other) = delete; + RuntimeObj &operator=(RuntimeObj const &) = delete; + virtual ~RuntimeObj() {} + + virtual void run(const Graph &graph) const = 0; + virtual void *alloc(size_t size) = 0; + virtual void dealloc(void *ptr) = 0; + + bool isCpu() const + { + return true; + } + + virtual string toString() const = 0; + }; + + class NativeCpuRuntimeObj : public RuntimeObj + { + public: + NativeCpuRuntimeObj() : RuntimeObj(Device::CPU) {} + + static Ref &getInstance() + { + static Ref instance = + make_ref(); + return instance; + } + void dealloc(void *ptr) override; + void run(const Graph &graph) const override; + void *alloc(size_t size) override; + string toString() const override; + }; + +} // namespace infini diff --git a/include/core/tensor.h b/include/core/tensor.h new file mode 100644 index 0000000..93eec14 --- /dev/null +++ b/include/core/tensor.h @@ -0,0 +1,164 @@ +#pragma once +#include "core/blob.h" +#include "core/data_type.h" +#include "core/object.h" +#include "core/runtime.h" +#include +#include +#include + +namespace infini +{ + class GraphObj; + using ShapeElem = int; + using Shape = vector; + class TensorObj : public Object + { + friend class GraphObj; + + protected: + int dim; + + DataType dtype; + vector> targets; + WRef source; + Blob data; + Runtime runtime; + + private: + Shape shape; + size_t _size; // Cache of Π(shape). + Fuid fuid; // Cloned tensors share the same id. Tensors constructed from + // scratch have a new id. + + public: + TensorObj(Shape shape, DataType dtype, Runtime runtime); + virtual ~TensorObj() {} + string toString() const override; + + size_t size() const { return _size; } + size_t getBytes() const { return _size * dtype.getSize(); } + + Shape getDims() const { return shape; } + void setShape(Shape shape_); + size_t getRank() const { return shape.size(); } + UidBaseType getFuid() const { return fuid; } + + void setData( + std::function const &generator) const; + + void setDataBlob(const Blob &blob); + + void printData() const; + bool equalData(const Tensor &rhs, double relativeError = 1e-6) const; + + template + bool equalData(const vector &dataVector) + { + IT_ASSERT(size() == dataVector.size()); + IT_ASSERT(DataType::get() == dtype.cpuTypeInt()); + return equalDataImpl(getRawDataPtr(), dataVector.data(), size()); + } + + template + T getRawDataPtr() const + { + static_assert(std::is_pointer_v, + "Raw data pointer has a type of pointer"); + IT_ASSERT(data != nullptr); + return data->getPtr(); + } + + DataType getDType() const { return dtype; } + Runtime getRuntime() const { return runtime; } + + OpVec getTargets() const { return wrefs_to_refs(targets); } + Operator getSource() const { return source.lock(); } + + private: + template + string dataToString() const + { + std::stringstream builder; + builder << "Tensor: " << guid << std::endl; + + auto numDims = shape.size(); + auto dimSzVec = vector(numDims, 1); + auto ptr = data->getPtr(); + dimSzVec[numDims - 1] = shape[numDims - 1]; + + for (int i = numDims - 1; i != 0; --i) + dimSzVec[i - 1] = dimSzVec[i] * shape[i - 1]; + + for (size_t i = 0, iEnd = size(); i < iEnd; ++i) + { + for (size_t j = 0; j < numDims; ++j) + if (i % dimSzVec[j] == 0) + builder << "["; + + builder << ptr[i]; + for (size_t j = 0; j < numDims; ++j) + if ((int)i % dimSzVec[j] == dimSzVec[j] - 1) + builder << "]"; + + if (i != size() - 1) + builder << ", "; + + auto column = (size_t)dimSzVec[numDims - 1]; + if (i % column == column - 1) + builder << std::endl; + } + return builder.str(); + } + + template + bool equalDataImpl(const T *a, const T *b, size_t size, + double relativeError = 1e-6) const + { + for (size_t i = 0; i < size; ++i) + { + if constexpr (std::is_integral_v) + { + if (a[i] != b[i]) + return false; + } + else if constexpr (std::is_floating_point_v) + { + if (std::min(fabs(a[i]), fabs(b[i])) == 0. && + fabs(a[i] - b[i]) > relativeError) + { + printf("Error on %lu: %f %f\n", i, a[i], b[i]); + return false; + } + else if (std::min(fabs(a[i]), fabs(b[i])) != 0. && + fabs(a[i] - b[i]) / + std::max(fabs(a[i]), fabs(b[i])) > + relativeError) + { + printf("Error on %lu: %f %f\n", i, a[i], b[i]); + return false; + } + } + else + { + static_assert(!sizeof(T), "Unsupported data type"); + } + } + return true; + } + + void addTarget(const Operator &op) { targets.emplace_back(op); } + void setSource(const Operator &op) { source = op; } + void removeTarget(const Operator &op) + { + for (auto itr = targets.begin(); itr != targets.end();) + { + if (itr->lock() == op) + itr = targets.erase(itr); + else + ++itr; + } + } + }; + +} // namespace infini diff --git a/include/operators/concat.h b/include/operators/concat.h new file mode 100644 index 0000000..86287fd --- /dev/null +++ b/include/operators/concat.h @@ -0,0 +1,32 @@ +#pragma once +#include "core/operator.h" + +namespace infini { +/** + * @brief Concatenate several tensors into one. All the input tensors should + * have the same shape except for the concatenated dimension. + * + */ +class ConcatObj : public OperatorObj { + int dim; + + public: + /** + * @brief Construct a new Concat object. + * + * @param graph The computation graph that this operator belongs to. + * @param inputs The input tensors to be concatenated. + * @param output Concatenated tensor. + * @param dim The dimension to concatenate on. + */ + ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int dim); + OP_CLONE(ConcatObj); + + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 1; } + int getDim() const { return dim; } +}; +} // namespace infini diff --git a/include/operators/element_wise.h b/include/operators/element_wise.h new file mode 100644 index 0000000..4260b2d --- /dev/null +++ b/include/operators/element_wise.h @@ -0,0 +1,47 @@ +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Base class of **binary** element-wise operators. + * Unary operators like activations are not the derived classes of + * ElementWiseObj. + * + */ + class ElementWiseObj : public OperatorObj + { + public: + /** + * @brief Construct a new ElementWise object + * + * @param type Operator type. + * @param graph The computation graph that this operator belongs to. + * @param input0 The first input tensor. + * @param input1 The second input tensor. + * @param output The output tensor. + */ + ElementWiseObj(OpType type, GraphObj *graph, Tensor input0, Tensor input1, + Tensor output); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 2; } + int numOutputs() const override { return 1; } + }; + +#define DEFINE_ELEMENT_WISE_OBJ(prefix, type) \ + class prefix##Obj : public ElementWiseObj \ + { \ + public: \ + prefix##Obj(GraphObj *graph, Tensor input0, Tensor input1, \ + Tensor output) \ + : ElementWiseObj(type, graph, input0, input1, output) {} \ + OP_CLONE(prefix##Obj); \ + }; + + DEFINE_ELEMENT_WISE_OBJ(Add, OpType::Add) + DEFINE_ELEMENT_WISE_OBJ(Sub, OpType::Sub) + DEFINE_ELEMENT_WISE_OBJ(Mul, OpType::Mul) + DEFINE_ELEMENT_WISE_OBJ(Div, OpType::Div) +}; // namespace infini diff --git a/include/operators/matmul.h b/include/operators/matmul.h new file mode 100644 index 0000000..4925895 --- /dev/null +++ b/include/operators/matmul.h @@ -0,0 +1,60 @@ +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Matrix multiplication. + * + */ + class MatmulObj : public OperatorObj + { + private: + // InfiniTensor assumes a row-major tensor layout. `transA`=false means + // default dims, true means A should be transposed before matmul. This is in + // oppsite to the column-major BLAS. + bool transA, transB; + + // Auxiliary attributes which are not a part of operator attributes. + int m, n, k; + + public: + /** + * @brief Matmul operator with batch broadcast and tensor transpose + * supports. Only one tensor with singe batch can be broadcasted due to the + * BLAS interface restriction. Tranpose indicates whether the last two + * dimensions should be transposed before Matmul and does not affect other + * leading dimensions. + * + * Matmul show how operators are defined in InfiniTensor. The constructor of + * an operator can create output tensors for the operator or not, which + * depends on `graph`. + * + * @param graph The computation graph that this operator belongs to. + * @param A The input tensor. + * @param B The input tensor. + * @param C C is the output of Matmul. If outputs are going to be created in + * the constructor, C should be an empty Ref. + * @param transA If matrix A should be transposed when computing. + * @param transB If matrix B should be transposed when computing. + */ + MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, + bool transA = false, bool transB = false); + OP_CLONE(MatmulObj); + + std::string toString() const override; + optional> inferShape(const TensorVec &inputs) override; + + int numInputs() const override { return inputs.size(); } + int numOutputs() const override { return 1; } + + bool getTransA() const { return transA; } + bool getTransB() const { return transB; } + void setTransA(bool transA) { this->transA = transA; } + void setTransB(bool transB) { this->transB = transB; } + int getM() const { return m; } + int getN() const { return n; } + int getK() const { return k; } + }; + +} // namespace infini \ No newline at end of file diff --git a/include/operators/transpose.h b/include/operators/transpose.h new file mode 100644 index 0000000..c32bbe5 --- /dev/null +++ b/include/operators/transpose.h @@ -0,0 +1,34 @@ +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief Transpose the input tensor similar to numpy.transpose. + * + */ + class TransposeObj : public OperatorObj + { + public: + /** + * @brief Construct a new TransposeObj object. + * + * @param graph The graph to which this operator belongs. + * @param input The input tensor. + * @param output The output tensor. + * @param permute The permutation of the dimensions. + */ + TransposeObj(GraphObj *graph, Tensor input, Tensor output, + vector permute); + OP_CLONE(TransposeObj); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + std::vector getPermute() const { return transposePermute; } + + private: + vector transposePermute; + }; +} // namespace infini diff --git a/include/operators/unary.h b/include/operators/unary.h new file mode 100644 index 0000000..83f3dd3 --- /dev/null +++ b/include/operators/unary.h @@ -0,0 +1,104 @@ +#pragma once +#include "core/operator.h" + +namespace infini +{ + /** + * @brief The base class for unary operators. + * + */ + class UnaryObj : public OperatorObj + { + public: + /** + * @brief Construct a new Unary object. + * + * @param type Operator type. + * @param graph The computation graph that this operator belongs to. + * @param input The input tensor. + * @param output The output tensor. + */ + UnaryObj(OpType type, GraphObj *graph, Tensor input, Tensor output); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + }; + + class ClipObj : public OperatorObj + { + public: + ClipObj(GraphObj *graph, Tensor input, Tensor output, + std::optional min, std::optional max); + OP_CLONE(ClipObj); + optional> inferShape(const TensorVec &inputs) override; + + std::string toString() const override; + std::optional getMin() const { return minValue; }; + std::optional getMax() const { return maxValue; }; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + std::optional minValue, maxValue; + }; + + enum class CastType + { + Float2Float16 = 0, + Float2Int64, + Float2Int32, + Float2Int16, + Float2Int8, + Float2BFloat16, + Int322Float, + Int322Int8, + Int322Int16, + Int322Int64, + Int162Float, + Int162Int32, + Int82Float, + Int82Int16, + Int82Int32, + Uint82Float, + Uint82Int32, + Uint82Int64, + Int642Int32, + Int642Uint32, + Int642Float, + Uint322Int64, + Float162Float, + BFloat162Float, + Float2Float, + }; + + class CastObj : public OperatorObj + { + public: + CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type); + OP_CLONE(CastObj); + optional> inferShape(const TensorVec &inputs) override; + vector inferDataType(const TensorVec &inputs) const override; + + std::string toString() const override; + CastType getType() const { return castType; } + DataType getOutputDataType() const; + int numInputs() const override { return 1; } + int numOutputs() const override { return 1; } + + private: + CastType castType; + }; + +#define DEFINE_UNARY_OBJ(prefix, type) \ + class prefix##Obj : public UnaryObj \ + { \ + public: \ + prefix##Obj(GraphObj *graph, Tensor input, Tensor output) \ + : UnaryObj(type, graph, input, output) {} \ + OP_CLONE(prefix##Obj); \ + }; + + DEFINE_UNARY_OBJ(Relu, OpType::Relu) +}; // namespace infini diff --git a/include/test.h b/include/test.h new file mode 100644 index 0000000..cef5a3e --- /dev/null +++ b/include/test.h @@ -0,0 +1,4 @@ +#pragma once +#include "core/common.h" +#include "utils/data_generator.h" +#include "gtest/gtest.h" diff --git a/include/utils/data_generator.h b/include/utils/data_generator.h new file mode 100644 index 0000000..1b7d91a --- /dev/null +++ b/include/utils/data_generator.h @@ -0,0 +1,59 @@ +#pragma once +#include "core/common.h" +#include + +namespace infini { + +class DataGenerator { + private: + virtual void fill(uint32_t *data, size_t size) { IT_TODO_HALT(); } + virtual void fill(float *data, size_t size) { IT_TODO_HALT(); } + +public: + virtual ~DataGenerator() {} + void operator()(void *data, size_t size, DataType dataType) { + if (dataType == DataType::UInt32) + fill(reinterpret_cast(data), size); + else if (dataType == DataType::Float32) + fill(reinterpret_cast(data), size); + else + IT_TODO_HALT(); + } +}; + +class IncrementalGenerator : public DataGenerator { + public: + virtual ~IncrementalGenerator() {} + + private: + template void fill(T *data, size_t size) { + for (size_t i = 0; i < size; i++) { + data[i] = i; + } + } + + void fill(uint32_t *data, size_t size) override { + fill(data, size); + } + void fill(float *data, size_t size) override { fill(data, size); } +}; + +template class ValGenerator : public DataGenerator { + public: + virtual ~ValGenerator() {} + + private: + template void fill(T *data, size_t size) { + for (size_t i = 0; i < size; i++) { + data[i] = val; + } + } + + void fill(uint32_t *data, size_t size) override { + fill(data, size); + } + void fill(float *data, size_t size) override { fill(data, size); } +}; +typedef ValGenerator<1> OneGenerator; +typedef ValGenerator<0> ZeroGenerator; +} // namespace infini diff --git a/include/utils/exception.h b/include/utils/exception.h new file mode 100644 index 0000000..d7bb433 --- /dev/null +++ b/include/utils/exception.h @@ -0,0 +1,22 @@ +#pragma once +#include +#include + +namespace infini { + +class Exception : public std::runtime_error { + protected: + std::string info; + + public: + Exception(const std::string &msg); + + Exception &operator<<(const std::string &str) { + info += str; + return *this; + } + + const char *what() const noexcept override { return info.c_str(); } +}; + +} // namespace infini diff --git a/include/utils/operator_utils.h b/include/utils/operator_utils.h new file mode 100644 index 0000000..e3a2373 --- /dev/null +++ b/include/utils/operator_utils.h @@ -0,0 +1,26 @@ +#pragma once +#ifndef OPERATOR_UTIL_H +#define OPERATOR_UTIL_H + +#include "core/operator.h" +#include "core/tensor.h" + +#include + +namespace infini { + +// Launch a broadcast shape based on the shape of input A and B +Shape infer_broadcast(const Shape &A, const Shape &B); +// Launch the real axis based on rank and current axis +int get_real_axis(const int &axis, const int &rank); +// Locate the index with size from Shape +Shape locate_index(size_t inputN, const Shape &shape); +// Delocate the ShapeIndex from Shape with broadcast +size_t delocate_index(const Shape &shapeIndex, const Shape &shape, + const Shape &stride); +// Convert KernelAttrs to a string representation +std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs); + +} // namespace infini + +#endif diff --git a/src/core/allocator.cc b/src/core/allocator.cc new file mode 100644 index 0000000..ff593ae --- /dev/null +++ b/src/core/allocator.cc @@ -0,0 +1,69 @@ +#include "core/allocator.h" +#include + +namespace infini +{ + Allocator::Allocator(Runtime runtime) : runtime(runtime) + { + used = 0; + peak = 0; + ptr = nullptr; + + // 'alignment' defaults to sizeof(uint64_t), because it is the length of + // the longest data type currently supported by the DataType field of + // the tensor + alignment = sizeof(uint64_t); + } + + Allocator::~Allocator() + { + if (this->ptr != nullptr) + { + runtime->dealloc(this->ptr); + } + } + + size_t Allocator::alloc(size_t size) + { + IT_ASSERT(this->ptr == nullptr); + // pad the size to the multiple of alignment + size = this->getAlignedSize(size); + + // =================================== 作业 =================================== + // TODO: 设计一个算法来分配内存,返回起始地址偏移量 + // =================================== 作业 =================================== + + return 0; + } + + void Allocator::free(size_t addr, size_t size) + { + IT_ASSERT(this->ptr == nullptr); + size = getAlignedSize(size); + + // =================================== 作业 =================================== + // TODO: 设计一个算法来回收内存 + // =================================== 作业 =================================== + } + + void *Allocator::getPtr() + { + if (this->ptr == nullptr) + { + this->ptr = runtime->alloc(this->peak); + printf("Allocator really alloc: %p %lu bytes\n", this->ptr, peak); + } + return this->ptr; + } + + size_t Allocator::getAlignedSize(size_t size) + { + return ((size - 1) / this->alignment + 1) * this->alignment; + } + + void Allocator::info() + { + std::cout << "Used memory: " << this->used + << ", peak memory: " << this->peak << std::endl; + } +} diff --git a/src/core/data_type.cc b/src/core/data_type.cc new file mode 100644 index 0000000..3825c9c --- /dev/null +++ b/src/core/data_type.cc @@ -0,0 +1,22 @@ +#include "core/data_type.h" + +namespace infini { +// Move implementation here to avoid compile time error on some platform +// to be consistent with onnx +// https://github.com/onnx/onnx/blob/aeb21329122b96df1d3ef33b500a35ca140b1431/onnx/onnx.proto#L484 +const DataType DataType::Undefine(0); +const DataType DataType::Float32(1); +const DataType DataType::UInt8(2); +const DataType DataType::Int8(3); +const DataType DataType::UInt16(4); +const DataType DataType::Int16(5); +const DataType DataType::Int32(6); +const DataType DataType::Int64(7); +const DataType DataType::String(8); +const DataType DataType::Bool(9); +const DataType DataType::Float16(10); +const DataType DataType::Double(11); +const DataType DataType::UInt32(12); +const DataType DataType::UInt64(13); +const DataType DataType::BFloat16(16); +} // namespace infini diff --git a/src/core/graph.cc b/src/core/graph.cc new file mode 100644 index 0000000..3a90637 --- /dev/null +++ b/src/core/graph.cc @@ -0,0 +1,230 @@ +#include "core/graph.h" +#include +#include +#include + +namespace infini +{ + + void GraphObj::addOperatorAndConnect(const Operator &op) + { + sorted = false; + ops.push_back(op); + for (auto &input : op->getInputs()) + { + if (input) + { + input->addTarget(op); + if (auto pred = input->getSource()) + { + pred->addSuccessors(op); + op->addPredecessors(pred); + } + } + } + for (auto &output : op->getOutputs()) + { + if (output) + { + output->setSource(op); + for (auto &succ : output->getTargets()) + { + succ->addPredecessors(op); + op->addSuccessors(succ); + } + } + } + } + + string GraphObj::toString() const + { + std::ostringstream oss; + oss << "Graph Tensors:\n"; + for (const auto &tensor : tensors) + oss << tensor << "\n"; + + oss << "Graph operators:\n"; + for (const auto &op : ops) + { + vector preds, succs; + for (auto &o : op->getPredecessors()) + preds.emplace_back(o->getGuid()); + for (auto &o : op->getSuccessors()) + succs.emplace_back(o->getGuid()); + oss << "OP " << op->getGuid(); + oss << ", pred " << vecToString(preds); + oss << ", succ " << vecToString(succs); + oss << ", " << op << "\n"; + } + return oss.str(); + } + + bool GraphObj::topo_sort() + { + if (this->sorted) + { + return true; + } + std::vector sorted; + std::unordered_set flags; + sorted.reserve(ops.size()); + flags.reserve(ops.size()); + while (sorted.size() < ops.size()) + { + // Any node is move to sorted in this loop. + auto modified = false; + for (auto const &op : ops) + { + if (auto const &inputs = op->getInputs(); + flags.find(op.get()) == flags.end() && + std::all_of(inputs.begin(), inputs.end(), + [&flags](auto const &input) + { + auto ptr = input->getSource().get(); + return !ptr || flags.find(ptr) != flags.end(); + })) + { + modified = true; + sorted.emplace_back(op); + flags.insert(op.get()); + } + } + if (!modified) + { + return false; + } + } + this->ops = std::move(sorted); + return this->sorted = true; + } + + void GraphObj::optimize() + { + // =================================== 作业 =================================== + // TODO: 设计一个算法来实现指定的图优化规则 + // 图优化规则如下: + // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) + // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) + // =================================== 作业 =================================== + } + + Tensor GraphObj::getTensor(int fuid) const + { + for (auto tensor : tensors) + { + if (tensor->getFuid() == fuid) + { + return tensor; + } + } + return nullptr; + } + + void GraphObj::shape_infer() + { + for (auto &op : ops) + { + auto ans = op->inferShape(); + IT_ASSERT(ans.has_value()); + auto oldOutputs = op->getOutputs(); + IT_ASSERT(ans.value().size() == oldOutputs.size()); + // replace the old outputshape and size with new one + for (int i = 0; i < (int)ans.value().size(); ++i) + { + auto newShape = ans.value()[i]; + auto oldShape = oldOutputs[i]->getDims(); + auto fuid = oldOutputs[i]->getFuid(); + if (newShape != oldShape) + { + auto tensor = this->getTensor(fuid); + tensor->setShape(newShape); + } + } + } + } + + void GraphObj::dataMalloc() + { + // topological sorting first + IT_ASSERT(topo_sort() == true); + + // =================================== 作业 =================================== + // TODO:利用 allocator 给计算图分配内存 + // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 + // =================================== 作业 =================================== + + allocator.info(); + } + + Tensor GraphObj::addTensor(Shape dim, DataType dtype) + { + return tensors.emplace_back(make_ref(dim, dtype, runtime)); + } + + Tensor GraphObj::addTensor(const Tensor &tensor) + { + IT_ASSERT(tensor->getRuntime() == runtime, + std::string("Tensor runtime mismatch: cannot add a tenosr in ") + + tensor->getRuntime()->toString() + " to " + + runtime->toString()); + tensors.emplace_back(tensor); + return tensor; + } + + TensorVec GraphObj::addTensor(const TensorVec &tensors) + { + for (auto &t : tensors) + addTensor(t); + return tensors; + } + + // tensor's "source" and "target" must be in "ops". + // tensor has no "source" and no "target" must not exist. + // "inputs" or "outputs" of operators must be in "tensors" + // "predecessors" and "successors" of an operator of "ops" must be in "ops". + bool GraphObj::checkValid() const + { + for (auto tensor : tensors) + { + IT_ASSERT(!(tensor->getTargets().size() == 0 && + nullptr == tensor->getSource())); + for (auto op : tensor->getTargets()) + { + IT_ASSERT(std::find(ops.begin(), ops.end(), op) != ops.end()); + } + auto op = tensor->getSource(); + IT_ASSERT(!(op && std::find(ops.begin(), ops.end(), op) == ops.end())); + } + for (auto op : ops) + { + for (auto tensor : op->getInputs()) + { + IT_ASSERT(std::find(tensors.begin(), tensors.end(), tensor) != + tensors.end()); + } + for (auto tensor : op->getOutputs()) + { + IT_ASSERT(std::find(tensors.begin(), tensors.end(), tensor) != + tensors.end()); + } + for (auto pre : op->getPredecessors()) + { + IT_ASSERT(std::find(ops.begin(), ops.end(), pre) != ops.end()); + } + for (auto suc : op->getSuccessors()) + { + IT_ASSERT(std::find(ops.begin(), ops.end(), suc) != ops.end()); + } + } + std::set s; + // check whether two tensors with the same FUID exist + for (auto tensor : tensors) + { + int cnt = s.count(tensor->getFuid()); + IT_ASSERT(cnt == 0, std::to_string(tensor->getFuid())); + s.insert(tensor->getFuid()); + } + return true; + } + +} // namespace infini \ No newline at end of file diff --git a/src/core/op_type.cc b/src/core/op_type.cc new file mode 100644 index 0000000..b2a721a --- /dev/null +++ b/src/core/op_type.cc @@ -0,0 +1,32 @@ +#include "core/op_type.h" + +namespace infini +{ + const char *OpType::toString() const + { +#define CASE(NAME) \ + case OpType::NAME: \ + return #NAME + + switch (type) + { + CASE(Unknown); + CASE(Add); + CASE(Sub); + CASE(Mul); + CASE(Div); + CASE(Cast); + CASE(Clip); + CASE(Relu); + CASE(Transpose); + CASE(Concat); + CASE(MatMul); + + default: + return "Unknown"; + } + +#undef CASE + } + +} // namespace infini diff --git a/src/core/operator.cc b/src/core/operator.cc new file mode 100644 index 0000000..a70ca48 --- /dev/null +++ b/src/core/operator.cc @@ -0,0 +1,85 @@ +#include "core/operator.h" +#include "core/graph.h" + +namespace infini +{ + + OperatorObj::OperatorObj(OpType opType, TensorVec inputs, TensorVec outputs) + : type(opType), inputs(inputs), outputs(outputs) {} + + void OperatorObj::removePredecessors(const Operator &op) + { + for (auto it = predecessors.begin(); it != predecessors.end();) + { + if (it->lock() == op) + it = predecessors.erase(it); + else + ++it; + } + } + + void OperatorObj::removeSuccessors(const Operator &op) + { + for (auto it = successors.begin(); it != successors.end();) + { + if (it->lock() == op) + it = successors.erase(it); + else + ++it; + } + } + + void OperatorObj::replaceInput(Tensor t1, Tensor t2) + { + for (auto itr = inputs.begin(); itr != inputs.end(); ++itr) + { + if (*itr == t1) + { + *itr = t2; + } + } + } + + bool OperatorObj::checkValid(GraphObj *graph) + { + auto optShapes = inferShape(); + if (!optShapes) // shape inference failed + return false; + + const vector &shapes = *optShapes; + if (shapes.size() != outputs.size()) + return false; + if (graph) + { // if graph != nullptr, outputs should be created + auto dataTypes = inferDataType(); + for (size_t i = 0; i < outputs.size(); i++) + { + IT_ASSERT(!outputs[i], "Find empty output while operator creation"); + outputs[i] = graph->addTensor(shapes[i], dataTypes[i]); + } + } + else + { // if outputs have been created, check their shapes + for (size_t i = 0; i < shapes.size(); ++i) + { + if (shapes[i] != outputs[i]->getDims()) + return false; + } + } + return true; + } + + optional> OperatorObj::inferShape() { return inferShape(inputs); } + + vector OperatorObj::inferDataType(const TensorVec &inputs) const + { + auto dataType = inputs[0]->getDType(); + return vector(numOutputs(), dataType); + } + + vector OperatorObj::inferDataType() const + { + return inferDataType(inputs); + } + +} // namespace infini diff --git a/src/core/runtime.cc b/src/core/runtime.cc new file mode 100644 index 0000000..bd88d90 --- /dev/null +++ b/src/core/runtime.cc @@ -0,0 +1,36 @@ +#include "core/runtime.h" +#include "core/blob.h" +#include "core/kernel.h" +#include "core/graph.h" +#include "core/kernel.h" +#include +#include +#include +namespace infini +{ + void NativeCpuRuntimeObj::run(const Graph &graph) const + { + const auto &kernelRegistry = KernelRegistry::getInstance(); + + for (auto &op : graph->getOperators()) + { + auto kernelAttrs = KernelAttrs{device, op->getOpType().underlying()}; + Kernel *kernel = kernelRegistry.getKernel(kernelAttrs); + kernel->compute(op, this); + } + } + + string NativeCpuRuntimeObj::toString() const { return "CPU Runtime"; } + + void NativeCpuRuntimeObj::dealloc(void *ptr) + { + return free(ptr); + } + + void *NativeCpuRuntimeObj::alloc(size_t size) + { + return calloc((size + sizeof(uint64_t) - 1) / sizeof(uint64_t), + sizeof(uint64_t)); + } + +} // namespace infini diff --git a/src/core/tensor.cc b/src/core/tensor.cc new file mode 100644 index 0000000..db54a2d --- /dev/null +++ b/src/core/tensor.cc @@ -0,0 +1,116 @@ +#include "core/tensor.h" +#include "core/blob.h" +#include "core/operator.h" +#include "core/runtime.h" +#include +#include + +namespace infini { + + TensorObj::TensorObj(Shape shape_, DataType dtype, Runtime runtime) + : dim(shape_.size()), dtype(dtype), runtime(runtime), shape(std::move(shape_)), + _size(std::accumulate(shape.begin(), shape.end(), 1, std::multiplies{})) {} + + string TensorObj::toString() const + { + // Convert data pointer to string + std::stringstream ss; + if (data != nullptr) + ss << data->getPtr(); + else + ss << "nullptr data"; + string ret = "Tensor " + std::to_string(guid) + ", Fuid " + + std::to_string(fuid) + ", shape " + vecToString(shape) + + ", dtype " + dtype.toString() + ", " + runtime->toString() + + ", " + ss.str() + "\n"; + vector targetGuids; + for (const auto &op : targets) + targetGuids.emplace_back(op.lock()->getGuid()); + if (auto o = source.lock()) + ret += ", source " + std::to_string(o->getGuid()); + else + ret += ", source None"; + ret += ", targets " + vecToString(targetGuids); + return ret; + } + +void TensorObj::setShape(Shape shape_) { + shape = shape_; + size_t size = std::accumulate(shape.begin(), shape.end(), 1, + [](auto acc, auto x) { return acc * x; }); + _size = size; +} + +void TensorObj::printData() const { + IT_ASSERT(data != nullptr); + if (!runtime->isCpu()) + IT_TODO_HALT(); + +#define TRY_PRINT(N) \ + if (dtype == DataType(N)) \ + std::cout << dataToString::t>() << std::endl; + + TRY_PRINT(0) // fmt: new line + else TRY_PRINT(1) // + else TRY_PRINT(2) // + else TRY_PRINT(3) // + else TRY_PRINT(4) // + else TRY_PRINT(5) // + else TRY_PRINT(6) // + else TRY_PRINT(7) // + else TRY_PRINT(8) // + else TRY_PRINT(9) // + else TRY_PRINT(10) // + else TRY_PRINT(11) // + else TRY_PRINT(12) // + else TRY_PRINT(13) // + else TRY_PRINT(16) // + else IT_TODO_HALT(); + +#undef TRY_PRINT +} + +bool TensorObj::equalData(const Tensor &rhs, double relativeError) const { + IT_ASSERT(data != nullptr); + IT_ASSERT(rhs->data != nullptr); + IT_ASSERT(getDType() == rhs->getDType()); + IT_ASSERT(runtime->isCpu()); + IT_ASSERT(rhs->getRuntime()->isCpu()); + if (size() != rhs->size()) + return false; + +#define TEST_EQUAL(N) \ + if (dtype == DataType(N)) \ + return equalDataImpl(getRawDataPtr::t *>(), \ + rhs->getRawDataPtr::t *>(), size(), \ + relativeError); + + TEST_EQUAL(0) // fmt: new line + else TEST_EQUAL(1) // + else TEST_EQUAL(2) // + else TEST_EQUAL(3) // + else TEST_EQUAL(4) // + else TEST_EQUAL(5) // + else TEST_EQUAL(6) // + else TEST_EQUAL(7) // + else TEST_EQUAL(8) // + else TEST_EQUAL(9) // + else TEST_EQUAL(10) // + else TEST_EQUAL(11) // + else TEST_EQUAL(12) // + else TEST_EQUAL(13) // + else TEST_EQUAL(16) // + else IT_TODO_HALT(); + +#undef TEST_EQUAL +} + +void TensorObj::setData( + const std::function &generator) const { + IT_ASSERT(data != nullptr); + generator(getRawDataPtr(), size(), dtype); +} + +void TensorObj::setDataBlob(const Blob &blob) { this->data = blob; } + +}; // namespace infini diff --git a/src/kernels/cpu/concat.cc b/src/kernels/cpu/concat.cc new file mode 100644 index 0000000..6e061c7 --- /dev/null +++ b/src/kernels/cpu/concat.cc @@ -0,0 +1,64 @@ +#include "operators/concat.h" +#include "core/kernel.h" + +namespace infini { + +class NaiveConcat : public CpuKernelWithoutConfig { + template + void doCompute(const Operator &_op, const RuntimeObj *context) const { + auto op = as(_op); + auto inputs = op->getInputs(), outputs = op->getOutputs(); + auto dim = op->getDim(); + auto output = outputs[0]; + std::vector iDims; + for (auto input : inputs) + iDims.emplace_back(input->getDims()); + const auto &outDim = output->getDims(); + size_t blockOffsetInner = 1; + for (size_t i = outDim.size() - 1; i > (size_t)dim; --i) + blockOffsetInner *= outDim[i]; + size_t blockOffset = outDim[dim] * blockOffsetInner; + for (size_t i = 0; i < inputs.size(); ++i) { + auto input = inputs[i]; + auto dimOffset = 0; + auto iDim = iDims[i]; + for (size_t j = 0; j < i; ++j) + dimOffset += iDims[j][dim]; + size_t localBlockOffset = 1; + for (size_t i = iDim.size() - 1; + i >= (size_t)dim && i != (size_t)-1; --i) + localBlockOffset *= iDim[i]; + auto innerOffset = blockOffsetInner * dimOffset; + auto inSize = input->size(); + auto inPtr = input->getRawDataPtr(), + outPtr = output->getRawDataPtr(); +#pragma omp parallel for + for (size_t iOffset = 0; iOffset < inSize; ++iOffset) { + auto oOffset = iOffset % localBlockOffset + innerOffset + + iOffset / localBlockOffset * blockOffset; + outPtr[oOffset] = inPtr[iOffset]; + } + } + } + + void compute(const Operator &_op, + const RuntimeObj *context) const override { +#define CASE(N) \ + case N: \ + doCompute::t>(_op, context) + + int dataTypeIdx = _op->getDType().getIndex(); + switch (dataTypeIdx) { + CASE(1); // DataType::Float32 + break; + CASE(12); // DataType::UInt32 + break; + default: + IT_TODO_HALT(); + } + } +}; + +REGISTER_KERNEL(Device::CPU, OpType::Concat, NaiveConcat, "ConcatNaive_CPU"); + +} // namespace infini diff --git a/src/kernels/cpu/element_wise.cc b/src/kernels/cpu/element_wise.cc new file mode 100644 index 0000000..af03c7a --- /dev/null +++ b/src/kernels/cpu/element_wise.cc @@ -0,0 +1,119 @@ +#include "operators/element_wise.h" +#include "core/kernel.h" +#include "utils/operator_utils.h" + +namespace infini +{ + class NativeElementWise : public CpuKernelWithoutConfig + { + template + static T addCompute(T val0, T val1) + { + return val0 + val1; + } + + template + static T subCompute(T val0, T val1) + { + return val0 - val1; + } + + template + static T mulCompute(T val0, T val1) + { + return val0 * val1; + } + + template + static T divCompute(T val0, T val1) + { + return (T)(val0 / val1); + } + + template + void doCompute(const Operator &_op, const RuntimeObj *context) const + { + auto op = as(_op); + T *inptr0 = op->getInputs(0)->getRawDataPtr(); + T *inptr1 = op->getInputs(1)->getRawDataPtr(); + T *outptr = op->getOutput()->getRawDataPtr(); + + auto shapeA = op->getInputs(0)->getDims(); + auto shapeB = op->getInputs(1)->getDims(); + auto shapeC = op->getOutput()->getDims(); + auto rank = op->getOutput()->getRank(); + Shape a(rank, 1); + Shape b(rank, 1); + std::copy(shapeA.begin(), shapeA.end(), + a.begin() + (rank - shapeA.size())); + std::copy(shapeB.begin(), shapeB.end(), + b.begin() + (rank - shapeB.size())); + auto getStride = [&](const Shape &shape) + { + int p = 1; + Shape stride(rank); + for (auto i = rank; i > 0; --i) + { + stride[i - 1] = p; + p = p * shape[i - 1]; + } + return stride; + }; + Shape strideA = getStride(a); + Shape strideB = getStride(b); + + auto n = op->getOutput()->size(); + T (*_doCompute) + (T val0, T val1); + switch (op->getOpType().underlying()) + { + case OpType::Add: + _doCompute = addCompute; + break; + case OpType::Sub: + _doCompute = subCompute; + break; + case OpType::Mul: + _doCompute = mulCompute; + break; + case OpType::Div: + _doCompute = divCompute; + break; + default: + IT_TODO_HALT(); + } + + for (size_t i = 0; i < n; ++i) + { + auto shapeIndexC = locate_index(i, shapeC); + auto indexA = delocate_index(shapeIndexC, a, strideA); + auto indexB = delocate_index(shapeIndexC, b, strideB); + outptr[i] = _doCompute(inptr0[indexA], inptr1[indexB]); + } + } + + void compute(const Operator &_op, + const RuntimeObj *context) const override + { +#define CASE(N) \ + case N: \ + doCompute::t>(_op, context) + + int dataTypeIdx = _op->getDType().getIndex(); + switch (dataTypeIdx) + { + CASE(1); // DataType::Float32 + break; + CASE(12); // DataType::UInt32 + break; + default: + IT_TODO_HALT(); + } + } + }; + + REGISTER_KERNEL(Device::CPU, OpType::Add, NativeElementWise, "addNaive_CPU"); + REGISTER_KERNEL(Device::CPU, OpType::Sub, NativeElementWise, "subNaive_CPU"); + REGISTER_KERNEL(Device::CPU, OpType::Mul, NativeElementWise, "mulNaive_CPU"); + REGISTER_KERNEL(Device::CPU, OpType::Div, NativeElementWise, "divNaive_CPU"); +}; // namespace infini diff --git a/src/kernels/cpu/transpose.cc b/src/kernels/cpu/transpose.cc new file mode 100644 index 0000000..46292d4 --- /dev/null +++ b/src/kernels/cpu/transpose.cc @@ -0,0 +1,60 @@ +#include "operators/transpose.h" +#include "core/kernel.h" + +namespace infini { + +inline Shape idx2Pos(const Shape &shape, size_t idx) { + Shape pos = Shape(shape.size(), 0); + auto rest = idx, curDimId = shape.size() - 1; + while (rest > 0) { + pos[curDimId] = rest % shape[curDimId]; + rest /= shape[curDimId]; + curDimId--; + } + return pos; +} + +class NaiveTranspose : public CpuKernelWithoutConfig { + template + void doCompute(const Operator &_op, const RuntimeObj *context) const { + auto op = as(_op); + auto inputs = op->getInputs(), outputs = op->getOutputs(); + const auto &inDim = inputs[0]->getDims(); + const auto &perm = op->getPermute(); + + size_t inSize = inputs[0]->size(); + auto inPtr = inputs[0]->getRawDataPtr(), + outPtr = outputs[0]->getRawDataPtr(); + // #pragma omp parallel for + for (size_t inIdx = 0; inIdx < inSize; ++inIdx) { + auto posInput = idx2Pos(inDim, inIdx); + int outIdx = 0; + for (size_t j = 0, jEnd = perm.size(); j < jEnd; ++j) { + outIdx = outIdx * inDim[perm[j]] + posInput[perm[j]]; + } + outPtr[outIdx] = inPtr[inIdx]; + } + } + + void compute(const Operator &_op, + const RuntimeObj *context) const override { +#define CASE(N) \ + case N: \ + doCompute::t>(_op, context) + + int dataTypeIdx = _op->getDType().getIndex(); + switch (dataTypeIdx) { + CASE(1); // DataType::Float32 + break; + CASE(12); // DataType::UInt32 + break; + default: + IT_TODO_HALT(); + } + } +}; + +REGISTER_KERNEL(Device::CPU, OpType::Transpose, NaiveTranspose, + "TransposeNaive_CPU"); + +} // namespace infini diff --git a/src/kernels/cpu/unary.cc b/src/kernels/cpu/unary.cc new file mode 100644 index 0000000..7e60a4d --- /dev/null +++ b/src/kernels/cpu/unary.cc @@ -0,0 +1,106 @@ +#include "operators/unary.h" +#include "core/constants.h" +#include "core/kernel.h" + +namespace infini +{ + class NativeUnary : public CpuKernelWithoutConfig + { + template + static T reluCompute(T val) + { + return std::max(T(0), val); + } + + template + void doCompute(const Operator &_op, const RuntimeObj *context) const + { + auto op = as(_op); + T *inptr = op->getInputs(0)->getRawDataPtr(); + T *outptr = op->getOutput()->getRawDataPtr(); + + auto outDim = op->getOutput()->getDims(); + auto n = op->getOutput()->size(); + + T (*_doCompute) + (T val); + switch (op->getOpType().underlying()) + { + case OpType::Relu: + _doCompute = reluCompute; + break; + default: + IT_TODO_HALT(); + } + + for (size_t offset = 0; offset < n; offset++) + { + outptr[offset] = _doCompute(inptr[offset]); + } + } + + void compute(const Operator &_op, + const RuntimeObj *context) const override + { +#define CASE(N) \ + case N: \ + doCompute::t>(_op, context) + + int dataTypeIdx = _op->getDType().getIndex(); + switch (dataTypeIdx) + { + CASE(1); // DataType::Float32 + break; + CASE(12); // DataType::UInt32 + break; + default: + IT_TODO_HALT(); + } + } + }; + + class Clip : public CpuKernelWithoutConfig + { + template + void doCompute(const Operator &_op, const RuntimeObj *context) const + { + auto op = as(_op); + T *inptr = op->getInputs(0)->getRawDataPtr(); + T *outptr = op->getOutput()->getRawDataPtr(); + auto minValue = op->getMin(); + auto maxValue = op->getMax(); + + auto n = op->getOutput()->size(); + for (size_t offset = 0; offset < n; offset++) + { + auto val = *inptr++; + *outptr++ = (minValue && val < *minValue) ? *minValue + : (maxValue && val > *maxValue) ? *maxValue + : val; + } + } + + void compute(const Operator &_op, + const RuntimeObj *context) const override + { +#define CASE(N) \ + case N: \ + doCompute::t>(_op, context) + + int dataTypeIdx = _op->getDType().getIndex(); + switch (dataTypeIdx) + { + CASE(1); // DataType::Float32 + break; + CASE(12); // DataType::UInt32 + break; + default: + IT_TODO_HALT(); + } + } + }; + + REGISTER_KERNEL(Device::CPU, OpType::Relu, NativeUnary, "reluNaive_CPU"); + REGISTER_KERNEL(Device::CPU, OpType::Clip, Clip, "Clip_CPU"); + +}; // namespace infini diff --git a/src/operators/concat.cc b/src/operators/concat.cc new file mode 100644 index 0000000..d196330 --- /dev/null +++ b/src/operators/concat.cc @@ -0,0 +1,38 @@ +#include "operators/concat.h" +#include "utils/operator_utils.h" + +namespace infini { +ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim) + : OperatorObj(OpType::Concat, inputs, {output}) { + int rank = inputs[0]->getRank(); + dim = get_real_axis(_dim, rank); + IT_ASSERT(checkValid(graph)); +} + +optional> ConcatObj::inferShape(const TensorVec &inputs) { + Shape dims = inputs[0]->getDims(); + auto rank = inputs[0]->getRank(); + + // =================================== 作业 =================================== + // TODO:修改 dims,返回正确的 concat 后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 + // =================================== 作业 =================================== + + return {{dims}}; +} + +std::string ConcatObj::toString() const { + std::ostringstream os; + os << "Concat[" << getGuid() << "]"; + os << "("; + for (auto input : inputs) + os << vecToString(input->getDims()) << ","; + os << "dim=" << dim << ","; + os << "input="; + for (auto input : inputs) + os << input->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +} // namespace infini diff --git a/src/operators/element_wise.cc b/src/operators/element_wise.cc new file mode 100644 index 0000000..c1b4ef1 --- /dev/null +++ b/src/operators/element_wise.cc @@ -0,0 +1,33 @@ +#include "operators/element_wise.h" +#include "utils/operator_utils.h" + +namespace infini +{ + ElementWiseObj::ElementWiseObj(OpType type, GraphObj *graph, Tensor input0, + Tensor input1, Tensor output) + : OperatorObj(type, {input0, input1}, {output}) + { + IT_ASSERT(checkValid(graph)); + } + + optional> ElementWiseObj::inferShape(const TensorVec &inputs) + { + const auto A = inputs[0], B = inputs[1]; + auto res = infer_broadcast(A->getDims(), B->getDims()); + return {{res}}; + } + + std::string ElementWiseObj::toString() const + { + std::ostringstream os; + os << type.toString() << "[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << vecToString(inputs[1]->getDims()) << ","; + os << "input0=" << inputs[0]->getGuid() << ","; + os << "input1=" << inputs[1]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); + } + +}; // namespace infini diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc new file mode 100644 index 0000000..0ce94a1 --- /dev/null +++ b/src/operators/matmul.cc @@ -0,0 +1,33 @@ +#include "operators/matmul.h" + +namespace infini +{ + + MatmulObj::MatmulObj(GraphObj *graph, Tensor A, Tensor B, Tensor C, bool transA, + bool transB) + : OperatorObj(OpType::MatMul, TensorVec{A, B}, {C}), + transA(transA), transB(transB) + { + IT_ASSERT(checkValid(graph)); + } + + string MatmulObj::toString() const + { + std::ostringstream os; + os << "Matmul([" << (transA ? "A^T" : "A") << "," << (transB ? "B^T" : "B]") + << ",A=" << inputs[0]->getGuid() + << ",B=" << inputs[1]->getGuid() << ",C=" << outputs[0]->getGuid() + << ",mnk=[" << m << "," << n << "," << k << "])"; + return os.str(); + } + + optional> MatmulObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 matmul 操作后的 shape + // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm + // =================================== 作业 =================================== + return {{}}; + } + +} // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc new file mode 100644 index 0000000..f5e9659 --- /dev/null +++ b/src/operators/transpose.cc @@ -0,0 +1,50 @@ +#include "operators/transpose.h" + +namespace infini +{ + TransposeObj::TransposeObj(GraphObj *graph, Tensor input, Tensor output, + vector permute) + : OperatorObj(OpType::Transpose, {input}, {output}) + { + auto rank = input->getRank(); + if (permute.empty()) + { + for (size_t i = 0; i < rank; ++i) + { + transposePermute[i] = i; + } + } + else + { + IT_ASSERT(rank == permute.size()); + transposePermute = std::move(permute); + } + IT_ASSERT(checkValid(graph)); + } + + optional> TransposeObj::inferShape(const TensorVec &inputs) + { + const auto A = inputs[0]; + auto input_dim = A->getDims(); + auto output_dim = input_dim; + int rank = A->getRank(); + + // =================================== 作业 =================================== + // TODO:修改 output_dim,返回正确的 transpose 后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 + // =================================== 作业 =================================== + + return {{}}; + } + + std::string TransposeObj::toString() const + { + std::ostringstream os; + os << type.toString() << "[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); + } +}; // namespace infini diff --git a/src/operators/unary.cc b/src/operators/unary.cc new file mode 100644 index 0000000..e773506 --- /dev/null +++ b/src/operators/unary.cc @@ -0,0 +1,148 @@ +#include "operators/unary.h" + +namespace infini +{ + UnaryObj::UnaryObj(OpType type, GraphObj *graph, Tensor input, Tensor output) + : OperatorObj(type, {input}, {output}) + { + IT_ASSERT(checkValid(graph)); + } + + optional> UnaryObj::inferShape(const TensorVec &inputs) + { + const auto A = inputs[0]; + return {{A->getDims()}}; + } + + std::string UnaryObj::toString() const + { + std::ostringstream os; + os << type.toString() << "[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); + } + + ClipObj::ClipObj(GraphObj *graph, Tensor input, Tensor output, + std::optional min, std::optional max) + : OperatorObj(OpType::Clip, {input}, {output}), minValue(min), + maxValue(max) + { + IT_ASSERT(checkValid(graph)); + } + + optional> ClipObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 clip 操作后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 + // =================================== 作业 =================================== + return {{}}; + } + + std::string ClipObj::toString() const + { + std::ostringstream os; + os << type.toString() << "[" << getGuid() << "]"; + os << "("; + os << vecToString(inputs[0]->getDims()) << ","; + os << "input=" << inputs[0]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); + } + + CastObj::CastObj(GraphObj *graph, Tensor input, Tensor output, CastType type) + : OperatorObj(OpType::Cast, {input}, {output}), castType(type) + { + IT_ASSERT(checkValid(graph)); + } + + vector CastObj::inferDataType(const TensorVec &inputs) const + { + // =================================== 作业 =================================== + // TODO:返回经过 cast 操作后, 输出 tensor 的数目和数据类型 + // REF_FILE: src/core/operator.cc + // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 + // =================================== 作业 =================================== + return {}; + } + + optional> CastObj::inferShape(const TensorVec &inputs) + { + // =================================== 作业 =================================== + // TODO:返回经过 cast 操作后的 shape + // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 + // =================================== 作业 =================================== + return {{}}; + } + + std::string CastObj::toString() const + { + std::ostringstream os; + os << type.toString() << "[" << getGuid() << "]"; + os << "("; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); + } + + DataType CastObj::getOutputDataType() const + { + switch (castType) + { + case CastType::Float2Float16: + return DataType::Float16; + case CastType::Float2Int64: + return DataType::Int64; + case CastType::Float2Int32: + return DataType::Int32; + case CastType::Float2Int16: + return DataType::Int16; + case CastType::Float2Int8: + return DataType::Int8; + case CastType::Int322Float: + return DataType::Float32; + case CastType::Int322Int8: + return DataType::Int8; + case CastType::Int322Int16: + return DataType::Int16; + case CastType::Int162Float: + return DataType::Float32; + case CastType::Int162Int32: + return DataType::Int32; + case CastType::Int82Float: + return DataType::Float32; + case CastType::Int82Int16: + return DataType::Int16; + case CastType::Int82Int32: + return DataType::Int32; + case CastType::Uint82Float: + return DataType::Float32; + case CastType::Uint82Int32: + return DataType::Int32; + case CastType::Uint82Int64: + return DataType::Int64; + case CastType::Int322Int64: + return DataType::Int64; + case CastType::Int642Int32: + return DataType::Int32; + case CastType::Int642Uint32: + return DataType::UInt32; + case CastType::Int642Float: + return DataType::Float32; + case CastType::Uint322Int64: + return DataType::Int64; + case CastType::Float162Float: + return DataType::Float32; + case CastType::BFloat162Float: + return DataType::Float32; + case CastType::Float2BFloat16: + return DataType::BFloat16; + case CastType::Float2Float: + return DataType::Float32; + default: + IT_TODO_HALT(); + } + } +}; // namespace infini diff --git a/src/utils/exception.cc b/src/utils/exception.cc new file mode 100644 index 0000000..228a39a --- /dev/null +++ b/src/utils/exception.cc @@ -0,0 +1,5 @@ +#include "utils/exception.h" + +namespace infini { +Exception::Exception(const std::string &msg) : std::runtime_error(msg) {} +} // namespace infini diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc new file mode 100644 index 0000000..edbd2c8 --- /dev/null +++ b/src/utils/operator_utils.cc @@ -0,0 +1,69 @@ +#include "utils/operator_utils.h" +#include "core/runtime.h" + +namespace infini { + +Shape infer_broadcast(const Shape &A, const Shape &B) { + + // =================================== 作业 =================================== + // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 + // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md + // =================================== 作业 =================================== + + return {}; +} + +int get_real_axis(const int &axis, const int &rank) { + IT_ASSERT(rank >= 1); + IT_ASSERT(axis >= -rank && axis <= (rank - 1)); + int newAxis; + if (axis < 0) { + newAxis = rank + axis; + } else { + newAxis = axis; + } + return newAxis; +} + +Shape locate_index(size_t inputN, const Shape &shape) { + Shape ans(shape.size()); + auto i = ans.rbegin(); + auto j = shape.rbegin(), ej = shape.rend(); + while (j != ej) { + auto div = std::div(inputN, *j++); + *i++ = div.rem; + inputN = div.quot; + } + return ans; +} + +size_t delocate_index(const Shape &shapeIndex, const Shape &shape, + const Shape &stride) { + size_t ans = 0; + Shape index(shapeIndex.size()); + IT_ASSERT(shapeIndex.size() == shape.size()); + IT_ASSERT(shape.size() == stride.size()); + for (size_t i = 0; i < shape.size(); ++i) { + index[i] = shapeIndex[i] % shape[i]; + ans += index[i] * stride[i]; + } + return ans; +} + +std::string device_to_str(Device device) { + std::string deviceStr; + switch (device) { + case Device::CPU: + return "CPU"; + default: + IT_TODO_HALT(); + } +} + +std::string get_kernel_attrs_str(const KernelAttrs &kernelAttrs) { + std::string deviceStr = device_to_str(std::get<0>(kernelAttrs)); + std::string opStr = OpType(std::get<1>(kernelAttrs)).toString(); + return deviceStr + ", " + opStr; +} + +} // namespace infini diff --git a/test/core/test_allocator.cc b/test/core/test_allocator.cc new file mode 100644 index 0000000..0515edc --- /dev/null +++ b/test/core/test_allocator.cc @@ -0,0 +1,74 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini +{ + TEST(Allocator, testAlloc) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = make_ref(shape, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c + size_t offsetA = allocator.alloc(a->getBytes()); + size_t offsetB = allocator.alloc(b->getBytes()); + size_t offsetC = allocator.alloc(c->getBytes()); + // free b, then allocate d + allocator.free(offsetB, b->getBytes()); + size_t offsetD = allocator.alloc(d->getBytes()); + // expected to be a->d->c + EXPECT_EQ(offsetB, offsetD); + ASSERT_FALSE(offsetA == 0 && offsetB == 0 && offsetC == 0 && offsetD == 0); + } + + TEST(Allocator, testAllocWithEndFreeBlock) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = + make_ref(Shape{2, 2, 2, 3}, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c + allocator.alloc(a->getBytes()); + allocator.alloc(b->getBytes()); + size_t offsetC = allocator.alloc(c->getBytes()); + allocator.info(); + // free c, then allocate d + allocator.free(offsetC, c->getBytes()); + size_t offsetD = allocator.alloc(d->getBytes()); + allocator.info(); + // expected to be a->b->d, with no free block between b and c + EXPECT_EQ(offsetC, offsetD); + } + + TEST(Allocator, testGetPtr) + { + Shape shape = Shape{1, 2, 2, 3}; + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Tensor a = make_ref(shape, DataType::Float32, runtime); + Tensor b = make_ref(shape, DataType::Float32, runtime); + Tensor c = make_ref(shape, DataType::Float32, runtime); + Tensor d = make_ref(shape, DataType::Float32, runtime); + Allocator allocator = Allocator(runtime); + // allocate a->b->c->d + allocator.alloc(a->getBytes()); + allocator.alloc(b->getBytes()); + allocator.alloc(c->getBytes()); + allocator.alloc(d->getBytes()); + // multiple calls to the getPtr() function should return the same pointer + void *ptr1 = allocator.getPtr(); + void *ptr2 = allocator.getPtr(); + EXPECT_EQ(ptr1, ptr2); + } + +} // namespace infini diff --git a/test/core/test_graph.cc b/test/core/test_graph.cc new file mode 100644 index 0000000..89c22dd --- /dev/null +++ b/test/core/test_graph.cc @@ -0,0 +1,40 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/matmul.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini +{ + TEST(Graph, Optimize) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor t1 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); + Tensor t2 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor t3 = g->addTensor({2, 3, 5, 4}, DataType::UInt32); + Tensor o = g->addTensor({2, 3, 4, 4}, DataType::UInt32); + g->addOpWithOutputs(i1, t1, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(t1, t2, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(i2, t3, Shape{0, 1, 3, 2}); + g->addOpWithOutputs(t2, t3, o); + // 优化前 + g->print(); + g->optimize(); + // 优化后 + g->print(); + EXPECT_EQ(g->getOperators().size(), 1); + EXPECT_EQ(g->getTensors().size(), 3); + EXPECT_EQ(g->getOperators()[0]->getOpType().underlying(), 8); + auto op = as(g->getOperators()[0]); + EXPECT_EQ(op->getInputs(0)->getGuid(), 2); + EXPECT_EQ(op->getInputs(1)->getGuid(), 3); + EXPECT_EQ(op->getOutputs()[0], o); + EXPECT_EQ(op->getTransA(), false); + EXPECT_EQ(op->getTransB(), true); + } +} \ No newline at end of file diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc new file mode 100644 index 0000000..fc87fb1 --- /dev/null +++ b/test/kernels/nativecpu/test_nativecpu_concat.cc @@ -0,0 +1,28 @@ +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/concat.h" + +#include "test.h" + +namespace infini { + +TEST(Concat, NativeCpu) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + + auto t1 = g->addTensor({2, 2, 3, 1}, DataType::Float32); + auto t2 = g->addTensor({2, 2, 1, 1}, DataType::Float32); + auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); + auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); + g->dataMalloc(); + t1->setData(IncrementalGenerator()); + t2->setData(OneGenerator()); + t3->setData(OneGenerator()); + + runtime->run(g); + EXPECT_TRUE(op->getOutput()->equalData( + vector{0, 1, 2, 1, 1, 1, 3, 4, 5, 1, 1, 1, + 6, 7, 8, 1, 1, 1, 9, 10, 11, 1, 1, 1})); +} + +} // namespace infini diff --git a/test/kernels/nativecpu/test_nativecpu_elementwise.cc b/test/kernels/nativecpu/test_nativecpu_elementwise.cc new file mode 100644 index 0000000..c6ef191 --- /dev/null +++ b/test/kernels/nativecpu/test_nativecpu_elementwise.cc @@ -0,0 +1,44 @@ +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/element_wise.h" + +#include "test.h" + +namespace infini { + +using ExpectOutput = vector; +template +void testElementWiseNativeCpu( + const std::function &generator1, + const std::function &generator2, + const Shape &shape1, const Shape &shape2, const ExpectOutput &ansVec) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto t1 = g->addTensor(shape1, DataType::Float32); + auto t2 = g->addTensor(shape2, DataType::Float32); + + auto op = g->addOp(t1, t2, nullptr); + g->dataMalloc(); + t1->setData(generator1); + t2->setData(generator2); + + runtime->run(g); + EXPECT_TRUE(op->getOutput()->equalData(ansVec)); +} + +TEST(ElementWise, NativeCpu) { + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 4, 5, 6, 6, 7, 8, 10, 11, 12}); + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 0, 0, 3, 4, 5, 0, 0, 0, 9, 10, 11}); + testElementWiseNativeCpu( + IncrementalGenerator(), IncrementalGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 2, 3, 4, 6, 7, 8, 8, 9, 10}); + testElementWiseNativeCpu( + IncrementalGenerator(), OneGenerator(), Shape{1, 2, 2, 3, 1}, + Shape{2, 1, 1}, ExpectOutput{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}); +} + +} // namespace infini diff --git a/test/kernels/nativecpu/test_nativecpu_transpose.cc b/test/kernels/nativecpu/test_nativecpu_transpose.cc new file mode 100644 index 0000000..501d402 --- /dev/null +++ b/test/kernels/nativecpu/test_nativecpu_transpose.cc @@ -0,0 +1,27 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini { + +TEST(Transpose, NativeCpu) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + + Shape permute = {0, 2, 1, 3}; + auto input = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(input, nullptr, permute); + g->dataMalloc(); + input->setData(IncrementalGenerator()); + + runtime->run(g); + + EXPECT_TRUE(op->getOutput(0)->equalData(vector{0, 1, 2, 3, 12, 13, 14, 15, + 4, 5, 6, 7, 16, 17, 18, 19, + 8, 9, 10, 11, 20, 21, 22, 23})); +} + +} // namespace infini diff --git a/test/operators/test_cast.cc b/test/operators/test_cast.cc new file mode 100644 index 0000000..3177751 --- /dev/null +++ b/test/operators/test_cast.cc @@ -0,0 +1,23 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini +{ + + TEST(Cast, ShapeInference) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2}, DataType::Float32); + auto op = g->addOp(i0, nullptr, CastType::Float2Float16); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2})); + EXPECT_EQ(op->getOutDType(), (DataType::Float16)); + } + } + +} // namespace infini diff --git a/test/operators/test_clip.cc b/test/operators/test_clip.cc new file mode 100644 index 0000000..bd4e07f --- /dev/null +++ b/test/operators/test_clip.cc @@ -0,0 +1,23 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/unary.h" + +#include "test.h" + +namespace infini { + + TEST(Clip, ShapeInference) + { + // Runtime + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({1, 2, 2, 3}, DataType::Float32); + float min = 1.0; + float max = 4.0; + auto op = g->addOp(i0, nullptr, min, max); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 2, 3})); + EXPECT_EQ(op->getOutDType(), (DataType::Float32)); + } + +} // namespace infini diff --git a/test/operators/test_concat.cc b/test/operators/test_concat.cc new file mode 100644 index 0000000..8984b9f --- /dev/null +++ b/test/operators/test_concat.cc @@ -0,0 +1,16 @@ +#include "core/graph.h" +#include "core/runtime.h" +#include "operators/concat.h" +#include "test.h" + +namespace infini { +TEST(Concat, ShapeInfer) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + Graph g = make_ref(runtime); + auto t1 = g->addTensor({1, 3, 2, 4}, DataType::Float32); + auto t2 = g->addTensor({1, 3, 2, 5}, DataType::Float32); + + auto op = g->addOp(TensorVec{t1, t2}, nullptr, 3); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 9})); +} +} // namespace infini diff --git a/test/operators/test_element_wise.cc b/test/operators/test_element_wise.cc new file mode 100644 index 0000000..f4fdd66 --- /dev/null +++ b/test/operators/test_element_wise.cc @@ -0,0 +1,66 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/element_wise.h" + +#include "test.h" + +namespace infini { + + TEST(ElementWise, ShapeInference) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 3, 4}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 3, 4})); + } + } + + TEST(ElementWise, Broadcasting) + { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({5}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 4, 5}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({1, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 3, 1, 1}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + + { + Graph g = make_ref(runtime); + Tensor i0 = g->addTensor({3, 4, 5}, DataType::UInt32); + Tensor i1 = g->addTensor({2, 1, 1, 1}, DataType::UInt32); + auto op = g->addOp(i0, i1, nullptr); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 3, 4, 5})); + } + } + +} // namespace infini diff --git a/test/operators/test_matmul.cc b/test/operators/test_matmul.cc new file mode 100644 index 0000000..32fbc36 --- /dev/null +++ b/test/operators/test_matmul.cc @@ -0,0 +1,57 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/matmul.h" + +#include "test.h" + +namespace infini +{ + using ExpectOutput = vector; + + TEST(Matmul, ShapeInference) + { + auto runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{1, 3, 5}); + auto B = g->addTensor(Shape{1, 5, 2}); + auto matmul = g->addOp(A, B, nullptr); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{1, 3, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{3, 5, 4}); + auto B = g->addTensor(Shape{3, 5, 2}); + auto matmul = g->addOp(A, B, nullptr, true, false); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{3, 4, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{1, 2, 3, 5}); + auto B = g->addTensor(Shape{1, 1, 5, 2}); + auto matmul = g->addOp(A, B, nullptr); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{1, 2, 3, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{2, 3, 5, 4}); + auto B = g->addTensor(Shape{1, 3, 5, 2}); + auto matmul = g->addOp(A, B, nullptr, true, false); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); + } + { + Graph g = make_ref(runtime); + auto A = g->addTensor(Shape{2, 3, 5, 4}); + auto B = g->addTensor(Shape{1, 3, 2, 5}); + auto matmul = g->addOp(A, B, nullptr, true, true); + auto C = matmul->getOutputs()[0]; + EXPECT_EQ(C->getDims(), (Shape{2, 3, 4, 2})); + } + } + +}; // namespace infini \ No newline at end of file diff --git a/test/operators/test_transpose.cc b/test/operators/test_transpose.cc new file mode 100644 index 0000000..1c12b79 --- /dev/null +++ b/test/operators/test_transpose.cc @@ -0,0 +1,32 @@ +#include "core/graph.h" +#include "core/kernel.h" +#include "core/runtime.h" +#include "operators/transpose.h" + +#include "test.h" + +namespace infini { + +TEST(Transpose, ShapeInference) { + Runtime runtime = NativeCpuRuntimeObj::getInstance(); + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 1, 2, 3}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 2, 3, 4})); + } + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({1, 2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 2, 1, 3}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{1, 3, 2, 4})); + } + { + Graph g = make_ref(runtime); + Tensor i = g->addTensor({2, 3, 4}, DataType::Float32); + auto op = g->addOp(i, nullptr, Shape{0, 2, 1}); + EXPECT_EQ(op->getOutput()->getDims(), (Shape{2, 4, 3})); + } +} + +} // namespace infini