From 5feda886661f247d30007323b9deae2fa9ce4478 Mon Sep 17 00:00:00 2001 From: chengyujuan Date: Fri, 29 Jun 2018 14:53:19 +0800 Subject: [PATCH 1/5] update saber_scale when the input num of scale is two --- saber/funcs/impl/cuda/base/cuda_c/saber_scale.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/saber/funcs/impl/cuda/base/cuda_c/saber_scale.cu b/saber/funcs/impl/cuda/base/cuda_c/saber_scale.cu index ab9a0c9d1..c78a332d5 100644 --- a/saber/funcs/impl/cuda/base/cuda_c/saber_scale.cu +++ b/saber/funcs/impl/cuda/base/cuda_c/saber_scale.cu @@ -47,6 +47,10 @@ SaberStatus SaberScaledata(); auto out_data = outputs[0]->mutable_data(); const int count = inputs[0]->valid_size(); + if (inputs.size() > 1) { + _scale_dim = inputs[1]->valid_size(); + _inner_dim = count / _scale_dim; + } if (_scale_dim > 1 || inputs.size() > 1) { auto scale_data = inputs.size() > 1 ? inputs[1]->data() : _weight.data(); auto bias_data = param.bias_term ? _bias.data() : NULL; From 8ac822d865b90c6853a594d257e48c0323944526 Mon Sep 17 00:00:00 2001 From: chengyujuan Date: Fri, 29 Jun 2018 18:34:39 +0800 Subject: [PATCH 2/5] move privacy infomation --- .../graph/graph_parser_from_model_test.cpp | 3 +-- .../net/net_exec_multi_thread_test.cpp | 2 +- test/framework/net/net_exec_test.cpp | 26 +------------------ .../net/net_exec_test_sequence_labeling.cpp | 2 +- 4 files changed, 4 insertions(+), 29 deletions(-) diff --git a/test/framework/graph/graph_parser_from_model_test.cpp b/test/framework/graph/graph_parser_from_model_test.cpp index 883a12858..a38bf3dd4 100644 --- a/test/framework/graph/graph_parser_from_model_test.cpp +++ b/test/framework/graph/graph_parser_from_model_test.cpp @@ -7,8 +7,7 @@ using namespace anakin; using namespace anakin::graph; -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/google_net/googlenet.anakin.bin"; -std::string model_path = "/home/chaowen/anakin_v2/model_v2/yolo/yolo.anakin.bin"; +std::string model_path = "/path/to/name.anakin.bin"; TEST(GraphTest, graph_load_model) { diff --git a/test/framework/net/net_exec_multi_thread_test.cpp b/test/framework/net/net_exec_multi_thread_test.cpp index 21d4b20f6..1d912f45c 100644 --- a/test/framework/net/net_exec_multi_thread_test.cpp +++ b/test/framework/net/net_exec_multi_thread_test.cpp @@ -12,7 +12,7 @@ using Target_H = X86; using Target = ARM; using Target_H = ARM; #endif -std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/yolo_lane_v2.anakin.bin"; +std::string model_path = "../benchmark/CNN/models/vgg16.anakin.bin"; #ifdef USE_CUDA #if 1 diff --git a/test/framework/net/net_exec_test.cpp b/test/framework/net/net_exec_test.cpp index 620a57d8d..cb475a861 100644 --- a/test/framework/net/net_exec_test.cpp +++ b/test/framework/net/net_exec_test.cpp @@ -16,32 +16,8 @@ using Target_H = ARM; //#define USE_DIEPSE -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/diepsie_light_head.anakin.bin"; - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/diepsie_light_head_base.anakin.bin"; - - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/densebox.anakin.bin"; - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/cnn_seg.anakin.bin"; - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/yolo_camera_detector.anakin.bin"; - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/yolo_lane_v2.anakin.bin"; - -// alignment of face -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/net_deploy_stageI.anakin.bin"; - -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/net_deploy_stageII.anakin.bin"; - -// residual 7 patch of face -//std::string model_path = "/home/chaowen/anakin_v2/model_v2/anakin-models/adu/anakin_models/diepsie_light_head/residual_net_7patch_3hc.anakin.bin"; - -// resnet 50 -//std::string model_path = "/home/cuichaowen/anakin2/anakin2/benchmark/CNN/mobilenet_v2.anakin.bin"; - // vgg16 -std::string model_path = "/home/cuichaowen/anakin2/anakin2/benchmark/CNN/models/vgg16.anakin.bin"; +std::string model_path = "../benchmark/CNN/models/vgg16.anakin.bin"; #ifdef USE_CUDA #if 1 diff --git a/test/framework/net/net_exec_test_sequence_labeling.cpp b/test/framework/net/net_exec_test_sequence_labeling.cpp index cfe2e18fe..3cdc4cb89 100644 --- a/test/framework/net/net_exec_test_sequence_labeling.cpp +++ b/test/framework/net/net_exec_test_sequence_labeling.cpp @@ -419,7 +419,7 @@ TEST(NetTest, net_execute_base_test) { Net net_executer(*graph, true); // std::vector input_data; -// std::string img_path = "/home/public/anakin2_ocr/inputs/48_194.txt"; +// std::string img_path = "/path/to/0.txt"; // int res = read_file(input_data, img_path.c_str()); From df9d2fb36ad26e625129ac56d772e671dab89685 Mon Sep 17 00:00:00 2001 From: liujunjie Date: Tue, 3 Jul 2018 11:22:13 +0800 Subject: [PATCH 3/5] update cnn benchmark from QA report --- benchmark/README_GPU.md | 110 ++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/benchmark/README_GPU.md b/benchmark/README_GPU.md index a1f67c143..04326535a 100644 --- a/benchmark/README_GPU.md +++ b/benchmark/README_GPU.md @@ -35,21 +35,21 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 8.8690 | 8.2815 - 2 | 15.5344 | 13.9116 - 4 | 26.6000 | 21.8747 - 8 | 49.8279 | 40.4076 - 32 | 188.6270 | 163.7660 + 1 | 8.85176 | 8.15362 + 2 | 15.6517 | 13.8716 + 4 | 26.5303 | 21.8478 + 8 | 48.2286 | 40.496 + 32 | 183.994 | 163.035 - GPU Memory Used (`MB`) BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 963 | 997 - 2 | 965 | 1039 - 4 | 991 | 1115 - 8 | 1067 | 1269 - 32 | 1715 | 2193 + 1 | 887 | 648 + 2 | 965 | 733 + 4 | 991 | 810 + 8 | 1067 | 911 + 32 | 1715 | 1325 ### Yolo @@ -58,22 +58,22 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 16.4596| 15.2124 - 2 | 26.6347| 25.0442 - 4 | 43.3695| 43.5017 - 8 | 80.9139 | 80.9880 - 32 | 293.8080| 310.8810 + 1 | 16.4623| 15.3214 + 2 | 26.7082| 25.0305 + 4 | 43.2129| 43.4758 + 8 | 80.0053 | 80.7645 + 32 | 283.352| 311.152 - GPU Memory Used (`MB`) BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 1569 | 1775 - 2 | 1649 | 1815 - 4 | 1709 | 1887 - 8 | 1731 | 2031 - 32 | 2253 | 2907 + 1 | 1226 | 1192 + 2 | 1326 | 1269 + 4 | 1435 | 1356 + 8 | 1563 | 1434 + 32 | 2150 | 1633 ### Resnet50 @@ -81,21 +81,21 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 4.2459 | 4.1061 - 2 | 6.2627 | 6.5159 - 4 | 10.1277 | 11.3327 - 8 | 17.8209 | 20.6680 - 32 | 65.8582 | 77.8858 + 1 | 4.26834 | 3.25853 + 2 | 6.2811 | 6.12156 + 4 | 10.1183 | 10.9219 + 8 | 18.1395 | 20.323 + 32 | 66.4728 | 83.9934 - GPU Memory Used (`MB`) BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 531 | 503 - 2 | 543 | 517 - 4 | 583 | 541 - 8 | 611 | 589 - 32 | 809 | 879 + 1 | 932 | 272 + 2 | 936 | 318 + 4 | 720 | 376 + 8 | 697 | 480 + 32 | 842 | 835 ### Resnet101 @@ -103,21 +103,21 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 7.5562 | 7.0837 - 2 | 11.6023 | 11.4079 - 4 | 18.3650 | 20.0493 - 8 | 32.7632 | 36.0648 - 32 | 123.2550 | 135.4880 + 1 | 7.58234 | 5.66457 + 2 | 11.6014 | 10.9213 + 4 | 18.3298 | 19.3987 + 8 | 32.6523 | 37.5575 + 32 | 123.114 | 149.089 - GPU Memory Used (`MB)` BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 701 | 683 - 2 | 713 | 697 - 4 | 793 | 721 - 8 | 819 | 769 - 32 | 1043 | 1059 + 1 | 1020 | 420 + 2 | 961 | 467 + 4 | 943 | 503 + 8 | 885 | 606 + 32 | 1048 | 1077 ### MobileNet V1 @@ -125,21 +125,21 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 45.5156 | 1.3947 - 2 | 46.5585 | 2.5483 - 4 | 48.4242 | 4.3404 - 8 | 52.7957 | 8.1513 - 32 | 83.2519 | 31.3178 + 1 | 45.2189 | 1.39566 + 2 | 46.4538 | 2.50698 + 4 | 47.8918 | 4.38727 + 8 | 52.3636 | 8.21416 + 32 | 83.0503 | 31.33 - GPU Memory Used (`MB`) BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 329 | 283 - 2 | 345 | 289 - 4 | 371 | 299 - 8 | 393 | 319 - 32 | 531 | 433 + 1 | 516 | 176 + 2 | 524 | 166 + 4 | 497 | 165 + 8 | 508 | 239 + 32 | 628 | 388 ### MobileNet V2 @@ -147,11 +147,11 @@ We tested them on single-GPU with single-thread. BatchSize | TensorRT | Anakin :---: | :---: | :---: | - 1 | 65.6861 | 2.9842 - 2 | 66.6814 | 4.7472 - 4 | 69.7114 | 7.4163 - 8 | 76.1092 | 12.8779 - 32 | 124.9810 | 47.2142 + 1 | 65.4277 | 1.80542 + 2 | 66.2048 | 3.85568 + 4 | 68.8045 | 6.80921 + 8 | 75.64 | 12.6038 + 32 | 124.09 | 47.6079 - GPU Memory Used (`MB`) From d1bb23802724fbfc14fe435bf9487391c199129e Mon Sep 17 00:00:00 2001 From: liujunjie Date: Tue, 3 Jul 2018 14:46:25 +0800 Subject: [PATCH 4/5] update CPU benchmark from QA report --- benchmark/README_CPU.md | 73 +++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/benchmark/README_CPU.md b/benchmark/README_CPU.md index f607abbba..6113e2e2c 100644 --- a/benchmark/README_CPU.md +++ b/benchmark/README_CPU.md @@ -4,6 +4,9 @@ This time, we only provide benchmark on CPU. In the near future, we will add benchmark on ARM and GPU. +> System: `CentOS 7 in Docker`, for benchmark between Anakin and Tensorflow +> System: `CentOS 6.3`, for benchmark between Anakin and Paddle + ## Counterpart of anakin : The counterpart of **`Anakin`** is `Tensorflow 1.8.0`, which installed by Anaconda 4.5.4, run by Python 3.6 @@ -202,29 +205,29 @@ We tested them on single-CPU with different thread numbers. 4 | 18074 | 118696 6 | 26607 | 102044 -2. **`Anakin`** VS **`PaddlePaddle/Fluid\`** - +2. **`Anakin`** VS **`PaddlePaddle/Fluid`** +We use private dataset and different QPS index in this benchmark. ### language model in E5-2650 v4 - Latency (`ms`) of one batch ThreadNum | Fluid | Anakin :---: | :---: | :---: | - 1 | 42.09 | 1.90 - 2 | 42.14 | 2.16 - 6 | 42.15 | 4.21 - 10 | 42.14 | 9.26 - 12 | 42.34 | 11.17 + 1 | 42.7418 | 1.93589 + 2 | 42.7418 | 2.49537 + 6 | 42.7734 | 3.14332 + 10 | 43.0721 | 4.55329 + 12 | 42.8501 | 5.09893 - Throughput (`sentence/s`) ThreadNum | Fluid | Anakin :---: | :---: | :---: | - 1 | 23 | 524 - 2 | 47 | 916 - 6 | 141 | 1402 - 10 | 236 | 1063 - 12 | 282 | 1044 + 1 | 23 | 504 + 2 | 46 | 762 + 6 | 134 | 1393 + 10 | 218 | 1556 + 12 | 260 | 1541 ### Chinese_ner model in E5-2650 v4 @@ -232,25 +235,47 @@ We tested them on single-CPU with different thread numbers. ThreadNum | Fluid | Anakin :---: | :---: | :---: | - 1 | 0.47 | 0.17 - 4 | 0.26 | 0.17 - 6 | 0.36 | 0.17 - 10 | 0.59 | 0.17 - 12 | 0.72 | 0.17 + 1 | 0.380475 | 0.17034 + 4 | 0.380475 | 0.171143 + 6 | 0.380475 | 0.172688 + 10 | 0.380475 | 0.173269 + 12 | 0.380475 | 0.17668 + +- Throughput (`sentence/s`) + + ThreadNum | Fluid | Anakin + :---: | :---: | :---: | + 1 | 7844 | 5822 + 4 | 7844 | 11377 + 6 | 7844 | 29725 + 10 | 7844 | 41238 + 12 | 7844 | 42790 + +### text_classfication model in E5-2650 v4 + +- Latency (`ms`) of one batch + + ThreadNum | Fluid | Anakin + :---: | :---: | :---: | + 1 | 1.48578 | 1.10088 + 4 | 1.54025 | 1.11258 + 6 | 1.68529 | 1.1257 + 10 | 1.9817 | 1.13267 + 12 | 2.21864 | 1.1429 - Throughput (`sentence/s`) ThreadNum | Fluid | Anakin :---: | :---: | :---: | - 1 | 2129 | 5819 - 4 | 3866 | 11182 - 6 | 8095 | 30948 - 10 | 8250 | 44093 - 12 | 8112 | 47185 + 1 | 673 | 901 + 4 | 1289 | 1665 + 6 | 3458 | 4449 + 10 | 4875 | 6183 + 12 | 5265 | 6188 ## How to run those Benchmark models? -> 1. You can just run `sh benchmark_tensorflow.sh` and `sh benchmark_anakin.sh` -> 2. Get the model of caffe or fluid, convert model to anakin model, use net_test_*** to test your model. +> 1. You can just run `sh benchmark_tensorflow.sh` and `sh benchmark_anakin.sh` +> 2. Get the model of caffe or fluid, convert model to anakin model, use net_test_*** to test your model. From cb86d02e41a72c67c6b7051cfcda373cc3b951b6 Mon Sep 17 00:00:00 2001 From: shixiaowei02 Date: Tue, 3 Jul 2018 15:49:53 +0800 Subject: [PATCH 5/5] add authors --- AUTHORS.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 AUTHORS.md diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 000000000..bcc5f3ead --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,27 @@ +| Github account | name | +|---|---| +| chenjiaoAngel | Jiao Chen | +| cyj1986 | Yujuan Cheng | +| feifei14119 | Fei Wang | +| jackyh | Chengjie He | +| Jayoprell | Xiaocheng Luo | +| jjsbear | Jingsong Ji | +| LittleMaer | Yi Zhuang | +| mengkai94 | Kai Meng | +| micytw | Michael Wu | +| pangge | Chaowen Cui | +| perchbird | Xiaokun Yu | +| PeterJkPeng | Junyi Peng | +| qq332982511 | Junjie Liu | +| Shixiaowei02 | Xiaowei Shi | +| sogalin | Soga Lin | +| throneclay | Shuai Zhang | +| vin-huang | Vin Huang | +| wgy0804 | Guoya Wang | +| xklnono | Kailu Xu | +| xyoungli | Xiaoyang Li | +| yanan1112 | Yanan Liu | +| yao-matrix | Weifeng Yao | +| zdcocnftcp10 | Dachuan Zhao | +| zhouhuan2009 | Huan Zhou | +| zoooooooyuan | Yuan Zu | \ No newline at end of file