diff --git a/README.md b/README.md
index fb5dc438..41be612f 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1 @@
 # Models implemented with OneFlow framework.
-
-- cv
-- nlp
-  - [text_classfication](./nlp/text_classfication/)
-  - [odd_numbers](./nlp/odd_numbers/)
-- science
-- audio
-- vidio
diff --git a/cv/classification/README.md b/cv/classification/README.md
index 55c3fd10..41020387 100644
--- a/cv/classification/README.md
+++ b/cv/classification/README.md
@@ -90,12 +90,20 @@ For CIFAR100, you only need to specify the dataset downloaded path in [config.py
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
 ```bash
-cd vision/projects/classification/
-bash ddp_training.sh
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
+
diff --git a/cv/classification/cait_M36_384/README.md b/cv/classification/cait_M36_384/README.md
new file mode 100644
index 00000000..f90f3608
--- /dev/null
+++ b/cv/classification/cait_M36_384/README.md
@@ -0,0 +1,66 @@
+## cait_M36_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit/config.py b/cv/classification/cait_M36_384/config.py
similarity index 100%
rename from cv/classification/vit/config.py
rename to cv/classification/cait_M36_384/config.py
diff --git a/cv/classification/vit/configs b/cv/classification/cait_M36_384/configs
similarity index 100%
rename from cv/classification/vit/configs
rename to cv/classification/cait_M36_384/configs
diff --git a/cv/classification/vit/data b/cv/classification/cait_M36_384/data
similarity index 100%
rename from cv/classification/vit/data
rename to cv/classification/cait_M36_384/data
diff --git a/cv/classification/cait_M36_384/infer.sh b/cv/classification/cait_M36_384/infer.sh
new file mode 100755
index 00000000..8b5965be
--- /dev/null
+++ b/cv/classification/cait_M36_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="cait_M36_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 384 \
+        --throughput
+
diff --git a/cv/classification/vit/logger.py b/cv/classification/cait_M36_384/logger.py
similarity index 100%
rename from cv/classification/vit/logger.py
rename to cv/classification/cait_M36_384/logger.py
diff --git a/cv/classification/vit/lr_scheduler.py b/cv/classification/cait_M36_384/lr_scheduler.py
similarity index 100%
rename from cv/classification/vit/lr_scheduler.py
rename to cv/classification/cait_M36_384/lr_scheduler.py
diff --git a/cv/classification/vit/main.py b/cv/classification/cait_M36_384/main.py
similarity index 100%
rename from cv/classification/vit/main.py
rename to cv/classification/cait_M36_384/main.py
diff --git a/cv/classification/vit/optimizer.py b/cv/classification/cait_M36_384/optimizer.py
similarity index 100%
rename from cv/classification/vit/optimizer.py
rename to cv/classification/cait_M36_384/optimizer.py
diff --git a/cv/classification/vit/requirements.txt b/cv/classification/cait_M36_384/requirements.txt
similarity index 100%
rename from cv/classification/vit/requirements.txt
rename to cv/classification/cait_M36_384/requirements.txt
diff --git a/cv/classification/cait_M36_384/train.sh b/cv/classification/cait_M36_384/train.sh
new file mode 100755
index 00000000..8bd36738
--- /dev/null
+++ b/cv/classification/cait_M36_384/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="cait_M36_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --batch-size 8 \
+        --image-size 384
+
diff --git a/cv/classification/vit/utils.py b/cv/classification/cait_M36_384/utils.py
similarity index 100%
rename from cv/classification/vit/utils.py
rename to cv/classification/cait_M36_384/utils.py
diff --git a/cv/classification/cait_M48_448/README.md b/cv/classification/cait_M48_448/README.md
new file mode 100644
index 00000000..222ce691
--- /dev/null
+++ b/cv/classification/cait_M48_448/README.md
@@ -0,0 +1,66 @@
+## cait_M48_448
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/cait_M48_448/config.py b/cv/classification/cait_M48_448/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/cait_M48_448/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/configs b/cv/classification/cait_M48_448/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/cait_M48_448/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/data b/cv/classification/cait_M48_448/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/cait_M48_448/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/infer.sh b/cv/classification/cait_M48_448/infer.sh
new file mode 100755
index 00000000..cf91e281
--- /dev/null
+++ b/cv/classification/cait_M48_448/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="cait_M48_448"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 448 \
+        --throughput
+
diff --git a/cv/classification/cait_M48_448/logger.py b/cv/classification/cait_M48_448/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/cait_M48_448/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/lr_scheduler.py b/cv/classification/cait_M48_448/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/cait_M48_448/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/main.py b/cv/classification/cait_M48_448/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/cait_M48_448/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/optimizer.py b/cv/classification/cait_M48_448/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/cait_M48_448/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/requirements.txt b/cv/classification/cait_M48_448/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/cait_M48_448/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/cait_M48_448/train.sh b/cv/classification/cait_M48_448/train.sh
new file mode 100755
index 00000000..a5c8d621
--- /dev/null
+++ b/cv/classification/cait_M48_448/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="cait_M48_448"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 448 \
+        --batch-size 1
+
diff --git a/cv/classification/cait_M48_448/utils.py b/cv/classification/cait_M48_448/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/cait_M48_448/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/README.md b/cv/classification/cait_S24_384/README.md
new file mode 100644
index 00000000..45b8ee1e
--- /dev/null
+++ b/cv/classification/cait_S24_384/README.md
@@ -0,0 +1,66 @@
+## cait_S24_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/cait_S24_384/config.py b/cv/classification/cait_S24_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/cait_S24_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/configs b/cv/classification/cait_S24_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/cait_S24_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/data b/cv/classification/cait_S24_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/cait_S24_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/infer.sh b/cv/classification/cait_S24_384/infer.sh
new file mode 100755
index 00000000..f2e55340
--- /dev/null
+++ b/cv/classification/cait_S24_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="cait_S24_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 384 \
+        --throughput
+
diff --git a/cv/classification/cait_S24_384/logger.py b/cv/classification/cait_S24_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/cait_S24_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/lr_scheduler.py b/cv/classification/cait_S24_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/cait_S24_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/main.py b/cv/classification/cait_S24_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/cait_S24_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/optimizer.py b/cv/classification/cait_S24_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/cait_S24_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/requirements.txt b/cv/classification/cait_S24_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/cait_S24_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/cait_S24_384/train.sh b/cv/classification/cait_S24_384/train.sh
new file mode 100755
index 00000000..a53adb1e
--- /dev/null
+++ b/cv/classification/cait_S24_384/train.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="cait_S24_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 384
+
diff --git a/cv/classification/cait_S24_384/utils.py b/cv/classification/cait_S24_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/cait_S24_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/README.md b/cv/classification/cait_S36_384/README.md
new file mode 100644
index 00000000..8fcfb13c
--- /dev/null
+++ b/cv/classification/cait_S36_384/README.md
@@ -0,0 +1,66 @@
+## cait_S36_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/cait_S36_384/config.py b/cv/classification/cait_S36_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/cait_S36_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/configs b/cv/classification/cait_S36_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/cait_S36_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/data b/cv/classification/cait_S36_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/cait_S36_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/infer.sh b/cv/classification/cait_S36_384/infer.sh
new file mode 100755
index 00000000..92301816
--- /dev/null
+++ b/cv/classification/cait_S36_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="cait_S36_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 384 \
+        --throughput
+
diff --git a/cv/classification/cait_S36_384/logger.py b/cv/classification/cait_S36_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/cait_S36_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/lr_scheduler.py b/cv/classification/cait_S36_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/cait_S36_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/main.py b/cv/classification/cait_S36_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/cait_S36_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/optimizer.py b/cv/classification/cait_S36_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/cait_S36_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/requirements.txt b/cv/classification/cait_S36_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/cait_S36_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/cait_S36_384/train.sh b/cv/classification/cait_S36_384/train.sh
new file mode 100755
index 00000000..492cf1a1
--- /dev/null
+++ b/cv/classification/cait_S36_384/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="cait_S36_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --batch-size 16 \
+        --image-size 384
+
diff --git a/cv/classification/cait_S36_384/utils.py b/cv/classification/cait_S36_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/cait_S36_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/README.md b/cv/classification/cait_XS24_384/README.md
new file mode 100644
index 00000000..be15580c
--- /dev/null
+++ b/cv/classification/cait_XS24_384/README.md
@@ -0,0 +1,66 @@
+## cait_XS24_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/cait_XS24_384/config.py b/cv/classification/cait_XS24_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/cait_XS24_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/configs b/cv/classification/cait_XS24_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/cait_XS24_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/data b/cv/classification/cait_XS24_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/cait_XS24_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/infer.sh b/cv/classification/cait_XS24_384/infer.sh
new file mode 100755
index 00000000..ff602741
--- /dev/null
+++ b/cv/classification/cait_XS24_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="cait_XS24_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-5 \
+        --image-size 384 \
+        --throughput
+
diff --git a/cv/classification/cait_XS24_384/logger.py b/cv/classification/cait_XS24_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/cait_XS24_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/lr_scheduler.py b/cv/classification/cait_XS24_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/cait_XS24_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/main.py b/cv/classification/cait_XS24_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/cait_XS24_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/optimizer.py b/cv/classification/cait_XS24_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/cait_XS24_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/requirements.txt b/cv/classification/cait_XS24_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/cait_XS24_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/cait_XS24_384/train.sh b/cv/classification/cait_XS24_384/train.sh
new file mode 100755
index 00000000..47b5ae2f
--- /dev/null
+++ b/cv/classification/cait_XS24_384/train.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="cait_XS24_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size 384 \
+        --lr 1e-5
+
diff --git a/cv/classification/cait_XS24_384/utils.py b/cv/classification/cait_XS24_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/cait_XS24_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/configs/deit_settings.yaml b/cv/classification/configs/deit_settings.yaml
new file mode 100755
index 00000000..436bdcb3
--- /dev/null
+++ b/cv/classification/configs/deit_settings.yaml
@@ -0,0 +1,52 @@
+DATA:
+  BATCH_SIZE: 32
+  DATASET: imagenet
+  DATA_PATH: /data/dataset/ImageNet/extract
+  IMG_SIZE: 384
+  INTERPOLATION: bicubic
+  ZIP_MODE: False
+  CACHE_MODE: "part"
+  PIN_MEMORY: True
+  NUM_WORKERS: 8
+
+MODEL:
+  PRETRAINED: True
+  RESUME: ""
+  LABEL_SMOOTHING: 0.1
+
+TRAIN:
+  START_EPOCH: 0
+  EPOCHS: 90
+  WARMUP_EPOCHS: 0
+  WEIGHT_DECAY: 1e-4
+  BASE_LR: 0.1
+  WARMUP_LR: 5e-7
+  CLIP_GRAD: 5.0
+  AUTO_RESUME: False
+  ACCUMULATION_STEPS: 0
+  
+  LR_SCHEDULER:
+    NAME: step
+
+  OPTIMIZER:
+    NAME: sgd
+    MOMENTUM: 0.9
+  
+AUG:
+  COLOR_JITTER: 0.4
+  AUTO_AUGMENT: rand-m9-mstd0.5-inc1
+  REPROB: 0.25
+  MIXUP: 0.0
+  CUTMIX: 0.0
+  CUTMIX_MINMAX: None
+
+TEST:
+  CROP: True
+  SEQUENTIAL: False
+
+TAG: default
+SAVE_FREQ: 1
+PRINT_FREQ: 50
+SEED: 42
+EVAL_MODE: True
+THROUGHPUT_MODE: False
\ No newline at end of file
diff --git a/cv/classification/configs/regionvit_settings.yaml b/cv/classification/configs/regionvit_settings.yaml
new file mode 100755
index 00000000..52a3a27b
--- /dev/null
+++ b/cv/classification/configs/regionvit_settings.yaml
@@ -0,0 +1,52 @@
+DATA:
+  BATCH_SIZE: 32
+  DATASET: imagenet
+  DATA_PATH: /data/dataset/ImageNet/extract
+  IMG_SIZE: 224
+  INTERPOLATION: bicubic
+  ZIP_MODE: False
+  CACHE_MODE: "part"
+  PIN_MEMORY: True
+  NUM_WORKERS: 8
+
+MODEL:
+  PRETRAINED: True
+  RESUME: ""
+  LABEL_SMOOTHING: 0.1
+
+TRAIN:
+  START_EPOCH: 0
+  EPOCHS: 90
+  WARMUP_EPOCHS: 0
+  WEIGHT_DECAY: 1e-8
+  BASE_LR: 2e-4
+  WARMUP_LR: 1e-6
+  CLIP_GRAD: None
+  AUTO_RESUME: False
+  ACCUMULATION_STEPS: 0
+  
+  LR_SCHEDULER:
+    NAME: cosine
+
+  OPTIMIZER:
+    NAME: adamw
+    MOMENTUM: 0.9
+  
+AUG:
+  COLOR_JITTER: 0.3
+  AUTO_AUGMENT: rand-m9-mstd0.5-inc1
+  REPROB: 0.25
+  MIXUP: 0.8
+  CUTMIX: 1.0
+  CUTMIX_MINMAX: None
+
+TEST:
+  CROP: True
+  SEQUENTIAL: False
+
+TAG: default
+SAVE_FREQ: 1
+PRINT_FREQ: 50
+SEED: 42
+EVAL_MODE: True
+THROUGHPUT_MODE: False
\ No newline at end of file
diff --git a/cv/classification/configs/regnet_settings.yaml b/cv/classification/configs/regnet_settings.yaml
index 09c43920..0560f859 100644
--- a/cv/classification/configs/regnet_settings.yaml
+++ b/cv/classification/configs/regnet_settings.yaml
@@ -1,5 +1,5 @@
 DATA:
-  BATCH_SIZE: 128
+  BATCH_SIZE: 32
   DATASET: imagenet
   DATA_PATH: /data/dataset/ImageNet/extract
   IMG_SIZE: 224
diff --git a/cv/classification/configs/van_settings.yaml b/cv/classification/configs/van_settings.yaml
new file mode 100755
index 00000000..aaaf19be
--- /dev/null
+++ b/cv/classification/configs/van_settings.yaml
@@ -0,0 +1,53 @@
+DATA:
+  BATCH_SIZE: 32
+  DATASET: imagenet
+  DATA_PATH: /data/dataset/ImageNet/extract
+  IMG_SIZE: 224
+  INTERPOLATION: bicubic
+  ZIP_MODE: False
+  CACHE_MODE: "part"
+  PIN_MEMORY: False
+  NUM_WORKERS: 8
+
+MODEL:
+  PRETRAINED: True
+  RESUME: ""
+  LABEL_SMOOTHING: 0.1
+
+TRAIN:
+  START_EPOCH: 0
+  EPOCHS: 90
+  WARMUP_EPOCHS: 0
+  WEIGHT_DECAY: 0.05
+  BASE_LR: 1e-3
+  WARMUP_LR: 1e-6
+  CLIP_GRAD: 5.0
+  AUTO_RESUME: False
+  ACCUMULATION_STEPS: 0
+  
+  LR_SCHEDULER:
+    NAME: cosine
+
+  OPTIMIZER:
+    NAME: adamw
+    MOMENTUM: 0.9
+  
+AUG:
+  COLOR_JITTER: 0.4
+  AUTO_AUGMENT: rand-m9-mstd0.5-inc1
+  REPROB: 0.25
+  REMODE: pixel
+  MIXUP: 0.8
+  CUTMIX: 1.0
+  CUTMIX_MINMAX: None
+
+TEST:
+  CROP: False
+  SEQUENTIAL: False
+
+TAG: default
+SAVE_FREQ: 1
+PRINT_FREQ: 50
+SEED: 42
+EVAL_MODE: True
+THROUGHPUT_MODE: False
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/README.md b/cv/classification/convmixer_1536_20/README.md
new file mode 100644
index 00000000..6eed78da
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/README.md
@@ -0,0 +1,66 @@
+## convmixer_1536_20
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/convmixer_1536_20/config.py b/cv/classification/convmixer_1536_20/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/configs b/cv/classification/convmixer_1536_20/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/data b/cv/classification/convmixer_1536_20/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/infer.sh b/cv/classification/convmixer_1536_20/infer.sh
new file mode 100755
index 00000000..4684739b
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="convmixer_1536_20"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/convmixer_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/convmixer_1536_20/logger.py b/cv/classification/convmixer_1536_20/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/lr_scheduler.py b/cv/classification/convmixer_1536_20/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/main.py b/cv/classification/convmixer_1536_20/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/optimizer.py b/cv/classification/convmixer_1536_20/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/requirements.txt b/cv/classification/convmixer_1536_20/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/convmixer_1536_20/train.sh b/cv/classification/convmixer_1536_20/train.sh
new file mode 100755
index 00000000..5e7101c9
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="convmixer_1536_20"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/convmixer_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/convmixer_1536_20/utils.py b/cv/classification/convmixer_1536_20/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/convmixer_1536_20/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/README.md b/cv/classification/convmixer_768_32_relu/README.md
new file mode 100644
index 00000000..da9c3f50
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/README.md
@@ -0,0 +1,66 @@
+## convmixer_768_32_relu
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/convmixer_768_32_relu/config.py b/cv/classification/convmixer_768_32_relu/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/configs b/cv/classification/convmixer_768_32_relu/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/data b/cv/classification/convmixer_768_32_relu/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/infer.sh b/cv/classification/convmixer_768_32_relu/infer.sh
new file mode 100755
index 00000000..f3f67d16
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="convmixer_768_32_relu"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/convmixer_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/convmixer_768_32_relu/logger.py b/cv/classification/convmixer_768_32_relu/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/lr_scheduler.py b/cv/classification/convmixer_768_32_relu/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/main.py b/cv/classification/convmixer_768_32_relu/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/optimizer.py b/cv/classification/convmixer_768_32_relu/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/requirements.txt b/cv/classification/convmixer_768_32_relu/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/convmixer_768_32_relu/train.sh b/cv/classification/convmixer_768_32_relu/train.sh
new file mode 100755
index 00000000..6c37c914
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="convmixer_768_32_relu"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/convmixer_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 0.001
+
diff --git a/cv/classification/convmixer_768_32_relu/utils.py b/cv/classification/convmixer_768_32_relu/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/convmixer_768_32_relu/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/README.md b/cv/classification/convnext_tiny_224/README.md
new file mode 100644
index 00000000..3890378e
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/README.md
@@ -0,0 +1,65 @@
+## ConvNext
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/convnext_tiny_224/config.py b/cv/classification/convnext_tiny_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/configs b/cv/classification/convnext_tiny_224/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/data b/cv/classification/convnext_tiny_224/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/infer.sh b/cv/classification/convnext_tiny_224/infer.sh
new file mode 100755
index 00000000..4ef32c6f
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="convnext_tiny_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/convnext_tiny_224/logger.py b/cv/classification/convnext_tiny_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/lr_scheduler.py b/cv/classification/convnext_tiny_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/main.py b/cv/classification/convnext_tiny_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/optimizer.py b/cv/classification/convnext_tiny_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/requirements.txt b/cv/classification/convnext_tiny_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/convnext_tiny_224/train.sh b/cv/classification/convnext_tiny_224/train.sh
new file mode 100644
index 00000000..26476478
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="convnext_tiny_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/convnext_tiny_224/utils.py b/cv/classification/convnext_tiny_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/convnext_tiny_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/README.md b/cv/classification/deit_3_base_384_1k/README.md
new file mode 100644
index 00000000..a4f96ecf
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/README.md
@@ -0,0 +1,66 @@
+## deit_3_base_384_1k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_3_base_384_1k/config.py b/cv/classification/deit_3_base_384_1k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/configs b/cv/classification/deit_3_base_384_1k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/data b/cv/classification/deit_3_base_384_1k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/infer.sh b/cv/classification/deit_3_base_384_1k/infer.sh
new file mode 100755
index 00000000..f4f2ad90
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_3_base_384_1k/logger.py b/cv/classification/deit_3_base_384_1k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/lr_scheduler.py b/cv/classification/deit_3_base_384_1k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/main.py b/cv/classification/deit_3_base_384_1k/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_3_base_384_1k/optimizer.py b/cv/classification/deit_3_base_384_1k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/requirements.txt b/cv/classification/deit_3_base_384_1k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_3_base_384_1k/train.sh b/cv/classification/deit_3_base_384_1k/train.sh
new file mode 100755
index 00000000..be85798e
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_384"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE
+        
diff --git a/cv/classification/deit_3_base_384_1k/utils.py b/cv/classification/deit_3_base_384_1k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_3_base_384_1k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/README.md b/cv/classification/deit_base_distilled_patch16_224/README.md
new file mode 100644
index 00000000..eec5807b
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/README.md
@@ -0,0 +1,66 @@
+## deit_base_distilled_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_distilled_patch16_224/config.py b/cv/classification/deit_base_distilled_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/configs b/cv/classification/deit_base_distilled_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/data b/cv/classification/deit_base_distilled_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/infer.sh b/cv/classification/deit_base_distilled_patch16_224/infer.sh
new file mode 100755
index 00000000..11fd5917
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_distilled_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_base_distilled_patch16_224/logger.py b/cv/classification/deit_base_distilled_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/lr_scheduler.py b/cv/classification/deit_base_distilled_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/main.py b/cv/classification/deit_base_distilled_patch16_224/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_base_distilled_patch16_224/optimizer.py b/cv/classification/deit_base_distilled_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/requirements.txt b/cv/classification/deit_base_distilled_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/train.sh b/cv/classification/deit_base_distilled_patch16_224/train.sh
new file mode 100755
index 00000000..1db18125
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_distilled_patch16_224"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_224/utils.py b/cv/classification/deit_base_distilled_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/README.md b/cv/classification/deit_base_distilled_patch16_384/README.md
new file mode 100644
index 00000000..046725fe
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/README.md
@@ -0,0 +1,66 @@
+## deit_base_distilled_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_distilled_patch16_384/config.py b/cv/classification/deit_base_distilled_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/configs b/cv/classification/deit_base_distilled_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/data b/cv/classification/deit_base_distilled_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/infer.sh b/cv/classification/deit_base_distilled_patch16_384/infer.sh
new file mode 100755
index 00000000..d0c8ae59
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_distilled_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/deit_base_distilled_patch16_384/logger.py b/cv/classification/deit_base_distilled_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/lr_scheduler.py b/cv/classification/deit_base_distilled_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/main.py b/cv/classification/deit_base_distilled_patch16_384/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_base_distilled_patch16_384/optimizer.py b/cv/classification/deit_base_distilled_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/requirements.txt b/cv/classification/deit_base_distilled_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_distilled_patch16_384/train.sh b/cv/classification/deit_base_distilled_patch16_384/train.sh
new file mode 100755
index 00000000..885fb605
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="deit_base_distilled_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=16
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/deit_base_distilled_patch16_384/utils.py b/cv/classification/deit_base_distilled_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_distilled_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/README.md b/cv/classification/deit_base_patch16_224/README.md
new file mode 100644
index 00000000..a4a3f9fd
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/README.md
@@ -0,0 +1,66 @@
+## deit_base_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_patch16_224/config.py b/cv/classification/deit_base_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/configs b/cv/classification/deit_base_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/data b/cv/classification/deit_base_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/infer.sh b/cv/classification/deit_base_patch16_224/infer.sh
new file mode 100755
index 00000000..2b58e143
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_base_patch16_224/logger.py b/cv/classification/deit_base_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/lr_scheduler.py b/cv/classification/deit_base_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/main.py b/cv/classification/deit_base_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/optimizer.py b/cv/classification/deit_base_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/requirements.txt b/cv/classification/deit_base_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/train.sh b/cv/classification/deit_base_patch16_224/train.sh
new file mode 100755
index 00000000..eed43582
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_224"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_224/utils.py b/cv/classification/deit_base_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/README.md b/cv/classification/deit_base_patch16_384/README.md
new file mode 100644
index 00000000..66da693c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/README.md
@@ -0,0 +1,66 @@
+## deit_base_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_patch16_384/config.py b/cv/classification/deit_base_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/configs b/cv/classification/deit_base_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/data b/cv/classification/deit_base_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/infer.sh b/cv/classification/deit_base_patch16_384/infer.sh
new file mode 100755
index 00000000..cab280f0
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/deit_base_patch16_384/logger.py b/cv/classification/deit_base_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/lr_scheduler.py b/cv/classification/deit_base_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/main.py b/cv/classification/deit_base_patch16_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/optimizer.py b/cv/classification/deit_base_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/requirements.txt b/cv/classification/deit_base_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_384/train.sh b/cv/classification/deit_base_patch16_384/train.sh
new file mode 100755
index 00000000..534ffe48
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=16
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/deit_base_patch16_384/utils.py b/cv/classification/deit_base_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/README.md b/cv/classification/deit_base_patch16_LS_224/README.md
new file mode 100644
index 00000000..53b2ce17
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/README.md
@@ -0,0 +1,66 @@
+## deit_base_patch16_LS_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_patch16_LS_224/config.py b/cv/classification/deit_base_patch16_LS_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/configs b/cv/classification/deit_base_patch16_LS_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/data b/cv/classification/deit_base_patch16_LS_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/infer.sh b/cv/classification/deit_base_patch16_LS_224/infer.sh
new file mode 100755
index 00000000..3c6963b2
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_base_patch16_LS_224/logger.py b/cv/classification/deit_base_patch16_LS_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/lr_scheduler.py b/cv/classification/deit_base_patch16_LS_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/main.py b/cv/classification/deit_base_patch16_LS_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/optimizer.py b/cv/classification/deit_base_patch16_LS_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/requirements.txt b/cv/classification/deit_base_patch16_LS_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224/train.sh b/cv/classification/deit_base_patch16_LS_224/train.sh
new file mode 100755
index 00000000..afb6fd6b
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_224"
+BATCH_SIZE=128
+LEARNING_RATE=1e-3
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
diff --git a/cv/classification/deit_base_patch16_LS_224/utils.py b/cv/classification/deit_base_patch16_LS_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/README.md b/cv/classification/deit_base_patch16_LS_224_in21k/README.md
new file mode 100644
index 00000000..399169cc
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_huge_patch14_LS_224_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/config.py b/cv/classification/deit_base_patch16_LS_224_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/configs b/cv/classification/deit_base_patch16_LS_224_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/data b/cv/classification/deit_base_patch16_LS_224_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/infer.sh b/cv/classification/deit_base_patch16_LS_224_in21k/infer.sh
new file mode 100755
index 00000000..fe5b33b6
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_224_in21k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/logger.py b/cv/classification/deit_base_patch16_LS_224_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/lr_scheduler.py b/cv/classification/deit_base_patch16_LS_224_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/main.py b/cv/classification/deit_base_patch16_LS_224_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/optimizer.py b/cv/classification/deit_base_patch16_LS_224_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/requirements.txt b/cv/classification/deit_base_patch16_LS_224_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/train.sh b/cv/classification/deit_base_patch16_LS_224_in21k/train.sh
new file mode 100755
index 00000000..0e3dfd89
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_224_in21k"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_224_in21k/utils.py b/cv/classification/deit_base_patch16_LS_224_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_224_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/README.md b/cv/classification/deit_base_patch16_LS_384_in21k/README.md
new file mode 100644
index 00000000..13182841
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_base_patch16_LS_384_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/config.py b/cv/classification/deit_base_patch16_LS_384_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/configs b/cv/classification/deit_base_patch16_LS_384_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/data b/cv/classification/deit_base_patch16_LS_384_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/infer.sh b/cv/classification/deit_base_patch16_LS_384_in21k/infer.sh
new file mode 100755
index 00000000..313a223b
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_384_in21k"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/logger.py b/cv/classification/deit_base_patch16_LS_384_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/lr_scheduler.py b/cv/classification/deit_base_patch16_LS_384_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/main.py b/cv/classification/deit_base_patch16_LS_384_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/optimizer.py b/cv/classification/deit_base_patch16_LS_384_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/requirements.txt b/cv/classification/deit_base_patch16_LS_384_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/train.sh b/cv/classification/deit_base_patch16_LS_384_in21k/train.sh
new file mode 100755
index 00000000..cbb87a04
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_base_patch16_LS_384_in21k"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE
+        
diff --git a/cv/classification/deit_base_patch16_LS_384_in21k/utils.py b/cv/classification/deit_base_patch16_LS_384_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_base_patch16_LS_384_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/README.md b/cv/classification/deit_huge_patch14_LS_224/README.md
new file mode 100644
index 00000000..65b8ed7c
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/README.md
@@ -0,0 +1,66 @@
+## deit_huge_patch14_LS_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_huge_patch14_LS_224/config.py b/cv/classification/deit_huge_patch14_LS_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/configs b/cv/classification/deit_huge_patch14_LS_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/data b/cv/classification/deit_huge_patch14_LS_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/infer.sh b/cv/classification/deit_huge_patch14_LS_224/infer.sh
new file mode 100755
index 00000000..1851f503
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_huge_patch14_LS_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_huge_patch14_LS_224/logger.py b/cv/classification/deit_huge_patch14_LS_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/lr_scheduler.py b/cv/classification/deit_huge_patch14_LS_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/main.py b/cv/classification/deit_huge_patch14_LS_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/optimizer.py b/cv/classification/deit_huge_patch14_LS_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/requirements.txt b/cv/classification/deit_huge_patch14_LS_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224/train.sh b/cv/classification/deit_huge_patch14_LS_224/train.sh
new file mode 100755
index 00000000..c272d5cc
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/train.sh
@@ -0,0 +1,14 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="deit_huge_patch14_LS_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH   
diff --git a/cv/classification/deit_huge_patch14_LS_224/utils.py b/cv/classification/deit_huge_patch14_LS_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/README.md b/cv/classification/deit_huge_patch14_LS_224_in21k/README.md
new file mode 100644
index 00000000..399169cc
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_huge_patch14_LS_224_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/config.py b/cv/classification/deit_huge_patch14_LS_224_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/configs b/cv/classification/deit_huge_patch14_LS_224_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/data b/cv/classification/deit_huge_patch14_LS_224_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/infer.sh b/cv/classification/deit_huge_patch14_LS_224_in21k/infer.sh
new file mode 100755
index 00000000..0fc82e00
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_huge_patch14_LS_224_in21k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/logger.py b/cv/classification/deit_huge_patch14_LS_224_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/lr_scheduler.py b/cv/classification/deit_huge_patch14_LS_224_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/main.py b/cv/classification/deit_huge_patch14_LS_224_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/optimizer.py b/cv/classification/deit_huge_patch14_LS_224_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/requirements.txt b/cv/classification/deit_huge_patch14_LS_224_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/train.sh b/cv/classification/deit_huge_patch14_LS_224_in21k/train.sh
new file mode 100755
index 00000000..d6a6eb07
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_huge_patch14_LS_224_in21k"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_huge_patch14_LS_224_in21k/utils.py b/cv/classification/deit_huge_patch14_LS_224_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_huge_patch14_LS_224_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/README.md b/cv/classification/deit_large_patch16_LS_224/README.md
new file mode 100644
index 00000000..034314b7
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/README.md
@@ -0,0 +1,66 @@
+## deit_large_patch16_LS_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_large_patch16_LS_224/config.py b/cv/classification/deit_large_patch16_LS_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/configs b/cv/classification/deit_large_patch16_LS_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/data b/cv/classification/deit_large_patch16_LS_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/infer.sh b/cv/classification/deit_large_patch16_LS_224/infer.sh
new file mode 100755
index 00000000..1b3b6873
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_large_patch16_LS_224/logger.py b/cv/classification/deit_large_patch16_LS_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/lr_scheduler.py b/cv/classification/deit_large_patch16_LS_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/main.py b/cv/classification/deit_large_patch16_LS_224/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_large_patch16_LS_224/optimizer.py b/cv/classification/deit_large_patch16_LS_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/requirements.txt b/cv/classification/deit_large_patch16_LS_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_224/train.sh b/cv/classification/deit_large_patch16_LS_224/train.sh
new file mode 100755
index 00000000..2d4c17f3
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_224"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
diff --git a/cv/classification/deit_large_patch16_LS_224/utils.py b/cv/classification/deit_large_patch16_LS_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/README.md b/cv/classification/deit_large_patch16_LS_384/README.md
new file mode 100644
index 00000000..1bb0b3d5
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/README.md
@@ -0,0 +1,66 @@
+## deit_large_patch16_LS_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_large_patch16_LS_384/config.py b/cv/classification/deit_large_patch16_LS_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/configs b/cv/classification/deit_large_patch16_LS_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/data b/cv/classification/deit_large_patch16_LS_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/infer.sh b/cv/classification/deit_large_patch16_LS_384/infer.sh
new file mode 100755
index 00000000..d0217bc0
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/deit_large_patch16_LS_384/logger.py b/cv/classification/deit_large_patch16_LS_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/lr_scheduler.py b/cv/classification/deit_large_patch16_LS_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/main.py b/cv/classification/deit_large_patch16_LS_384/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_large_patch16_LS_384/optimizer.py b/cv/classification/deit_large_patch16_LS_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/requirements.txt b/cv/classification/deit_large_patch16_LS_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384/train.sh b/cv/classification/deit_large_patch16_LS_384/train.sh
new file mode 100755
index 00000000..beaa4d94
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_384"
+IMAGE_SIZE=384
+BATCH_SIZE=8
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/deit_large_patch16_LS_384/utils.py b/cv/classification/deit_large_patch16_LS_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/README.md b/cv/classification/deit_large_patch16_LS_384_in21k/README.md
new file mode 100644
index 00000000..0beb2269
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_large_patch16_LS_384_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/config.py b/cv/classification/deit_large_patch16_LS_384_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/configs b/cv/classification/deit_large_patch16_LS_384_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/data b/cv/classification/deit_large_patch16_LS_384_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/infer.sh b/cv/classification/deit_large_patch16_LS_384_in21k/infer.sh
new file mode 100755
index 00000000..3181fb7b
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_384_in21k"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/logger.py b/cv/classification/deit_large_patch16_LS_384_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/lr_scheduler.py b/cv/classification/deit_large_patch16_LS_384_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/main.py b/cv/classification/deit_large_patch16_LS_384_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/optimizer.py b/cv/classification/deit_large_patch16_LS_384_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/requirements.txt b/cv/classification/deit_large_patch16_LS_384_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/train.sh b/cv/classification/deit_large_patch16_LS_384_in21k/train.sh
new file mode 100755
index 00000000..65cf7a0c
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_large_patch16_LS_384_in21k"
+IMAGE_SIZE=384
+BATCH_SIZE=8
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/deit_large_patch16_LS_384_in21k/utils.py b/cv/classification/deit_large_patch16_LS_384_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_large_patch16_LS_384_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/README.md b/cv/classification/deit_small_distilled_patch16_224/README.md
new file mode 100644
index 00000000..cdb82440
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/README.md
@@ -0,0 +1,66 @@
+## deit_small_distilled_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_small_distilled_patch16_224/config.py b/cv/classification/deit_small_distilled_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/configs b/cv/classification/deit_small_distilled_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/data b/cv/classification/deit_small_distilled_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/infer.sh b/cv/classification/deit_small_distilled_patch16_224/infer.sh
new file mode 100755
index 00000000..b84490be
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_small_distilled_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_small_distilled_patch16_224/logger.py b/cv/classification/deit_small_distilled_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/lr_scheduler.py b/cv/classification/deit_small_distilled_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/main.py b/cv/classification/deit_small_distilled_patch16_224/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_small_distilled_patch16_224/optimizer.py b/cv/classification/deit_small_distilled_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/requirements.txt b/cv/classification/deit_small_distilled_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_small_distilled_patch16_224/train.sh b/cv/classification/deit_small_distilled_patch16_224/train.sh
new file mode 100755
index 00000000..1e03fd2a
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12347
+MODEL_ARCH="deit_small_distilled_patch16_224"
+BATCH_SIZE=64
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/deit_small_distilled_patch16_224/utils.py b/cv/classification/deit_small_distilled_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_small_distilled_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/README.md b/cv/classification/deit_small_patch16_LS_224/README.md
new file mode 100644
index 00000000..c31e0f0f
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/README.md
@@ -0,0 +1,66 @@
+## deit_small_patch16_LS_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_small_patch16_LS_224/config.py b/cv/classification/deit_small_patch16_LS_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/configs b/cv/classification/deit_small_patch16_LS_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/data b/cv/classification/deit_small_patch16_LS_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/infer.sh b/cv/classification/deit_small_patch16_LS_224/infer.sh
new file mode 100755
index 00000000..3c9225b7
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_small_patch16_LS_224/logger.py b/cv/classification/deit_small_patch16_LS_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/lr_scheduler.py b/cv/classification/deit_small_patch16_LS_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/main.py b/cv/classification/deit_small_patch16_LS_224/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_small_patch16_LS_224/optimizer.py b/cv/classification/deit_small_patch16_LS_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/requirements.txt b/cv/classification/deit_small_patch16_LS_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224/train.sh b/cv/classification/deit_small_patch16_LS_224/train.sh
new file mode 100755
index 00000000..3b9c939f
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_224"
+export CUDA_VISIBLE_DEVICES=4,5,6,7
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 0.001
+
diff --git a/cv/classification/deit_small_patch16_LS_224/utils.py b/cv/classification/deit_small_patch16_LS_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/README.md b/cv/classification/deit_small_patch16_LS_224_in21k/README.md
new file mode 100644
index 00000000..e32161c7
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_small_patch16_LS_224_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/config.py b/cv/classification/deit_small_patch16_LS_224_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/configs b/cv/classification/deit_small_patch16_LS_224_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/data b/cv/classification/deit_small_patch16_LS_224_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/infer.sh b/cv/classification/deit_small_patch16_LS_224_in21k/infer.sh
new file mode 100755
index 00000000..ce34f643
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_224_in21k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/logger.py b/cv/classification/deit_small_patch16_LS_224_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/lr_scheduler.py b/cv/classification/deit_small_patch16_LS_224_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/main.py b/cv/classification/deit_small_patch16_LS_224_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/optimizer.py b/cv/classification/deit_small_patch16_LS_224_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/requirements.txt b/cv/classification/deit_small_patch16_LS_224_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/train.sh b/cv/classification/deit_small_patch16_LS_224_in21k/train.sh
new file mode 100755
index 00000000..96b3277a
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_224_in21k"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_224_in21k/utils.py b/cv/classification/deit_small_patch16_LS_224_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_224_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/README.md b/cv/classification/deit_small_patch16_LS_384/README.md
new file mode 100644
index 00000000..544cbb4f
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/README.md
@@ -0,0 +1,66 @@
+## deit_small_patch16_LS_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_small_patch16_LS_384/config.py b/cv/classification/deit_small_patch16_LS_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/configs b/cv/classification/deit_small_patch16_LS_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/data b/cv/classification/deit_small_patch16_LS_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/infer.sh b/cv/classification/deit_small_patch16_LS_384/infer.sh
new file mode 100755
index 00000000..e8a84578
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_small_patch16_LS_384/logger.py b/cv/classification/deit_small_patch16_LS_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/lr_scheduler.py b/cv/classification/deit_small_patch16_LS_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/main.py b/cv/classification/deit_small_patch16_LS_384/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_small_patch16_LS_384/optimizer.py b/cv/classification/deit_small_patch16_LS_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/requirements.txt b/cv/classification/deit_small_patch16_LS_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384/train.sh b/cv/classification/deit_small_patch16_LS_384/train.sh
new file mode 100755
index 00000000..6be5979f
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_384"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE
+        
diff --git a/cv/classification/deit_small_patch16_LS_384/utils.py b/cv/classification/deit_small_patch16_LS_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/README.md b/cv/classification/deit_small_patch16_LS_384_in21k/README.md
new file mode 100644
index 00000000..a52bfe3a
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/README.md
@@ -0,0 +1,66 @@
+## deit_small_patch16_LS_384_in21k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/config.py b/cv/classification/deit_small_patch16_LS_384_in21k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/configs b/cv/classification/deit_small_patch16_LS_384_in21k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/data b/cv/classification/deit_small_patch16_LS_384_in21k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/infer.sh b/cv/classification/deit_small_patch16_LS_384_in21k/infer.sh
new file mode 100755
index 00000000..17c0faf7
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_384_in21k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/logger.py b/cv/classification/deit_small_patch16_LS_384_in21k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/lr_scheduler.py b/cv/classification/deit_small_patch16_LS_384_in21k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/main.py b/cv/classification/deit_small_patch16_LS_384_in21k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/optimizer.py b/cv/classification/deit_small_patch16_LS_384_in21k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/requirements.txt b/cv/classification/deit_small_patch16_LS_384_in21k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/train.sh b/cv/classification/deit_small_patch16_LS_384_in21k/train.sh
new file mode 100755
index 00000000..b2934e05
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_small_patch16_LS_384_in21k"
+BATCH_SIZE=32
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/deit_settings.yaml\
+        --model_arch $MODEL_ARCH  \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE
+        
diff --git a/cv/classification/deit_small_patch16_LS_384_in21k/utils.py b/cv/classification/deit_small_patch16_LS_384_in21k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_small_patch16_LS_384_in21k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/README.md b/cv/classification/deit_tiny_distilled_patch16_224/README.md
new file mode 100644
index 00000000..cc3ca948
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/README.md
@@ -0,0 +1,66 @@
+## deit_tiny_distilled_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/config.py b/cv/classification/deit_tiny_distilled_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/configs b/cv/classification/deit_tiny_distilled_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/data b/cv/classification/deit_tiny_distilled_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/infer.sh b/cv/classification/deit_tiny_distilled_patch16_224/infer.sh
new file mode 100755
index 00000000..2ecbedae
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_tiny_distilled_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/logger.py b/cv/classification/deit_tiny_distilled_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/lr_scheduler.py b/cv/classification/deit_tiny_distilled_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/main.py b/cv/classification/deit_tiny_distilled_patch16_224/main.py
new file mode 100644
index 00000000..cb47530d
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/main.py
@@ -0,0 +1,460 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/optimizer.py b/cv/classification/deit_tiny_distilled_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/requirements.txt b/cv/classification/deit_tiny_distilled_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/train.sh b/cv/classification/deit_tiny_distilled_patch16_224/train.sh
new file mode 100755
index 00000000..6e4140bb
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12377
+MODEL_ARCH="deit_tiny_distilled_patch16_224"
+BATCH_SIZE=64
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_distilled_patch16_224/utils.py b/cv/classification/deit_tiny_distilled_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_tiny_distilled_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/README.md b/cv/classification/deit_tiny_patch16_224/README.md
new file mode 100644
index 00000000..082817a7
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/README.md
@@ -0,0 +1,66 @@
+## deit_tiny_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/deit_tiny_patch16_224/config.py b/cv/classification/deit_tiny_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/configs b/cv/classification/deit_tiny_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/data b/cv/classification/deit_tiny_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/infer.sh b/cv/classification/deit_tiny_patch16_224/infer.sh
new file mode 100755
index 00000000..62194910
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="deit_tiny_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/deit_tiny_patch16_224/logger.py b/cv/classification/deit_tiny_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/lr_scheduler.py b/cv/classification/deit_tiny_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/main.py b/cv/classification/deit_tiny_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/optimizer.py b/cv/classification/deit_tiny_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/requirements.txt b/cv/classification/deit_tiny_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/train.sh b/cv/classification/deit_tiny_patch16_224/train.sh
new file mode 100755
index 00000000..ef82ec38
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/train.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="deit_tiny_patch16_224"
+BATCH_SIZE=256
+LEARNING_RATE=5e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
\ No newline at end of file
diff --git a/cv/classification/deit_tiny_patch16_224/utils.py b/cv/classification/deit_tiny_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/deit_tiny_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/densenet121/readme.md b/cv/classification/densenet121/readme.md
index e3b73287..70abe9f9 100644
--- a/cv/classification/densenet121/readme.md
+++ b/cv/classification/densenet121/readme.md
@@ -91,12 +91,19 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
 
-bash train.sh
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
diff --git a/cv/classification/densenet169/readme.md b/cv/classification/densenet169/readme.md
index 20c7a06f..28f9e04a 100644
--- a/cv/classification/densenet169/readme.md
+++ b/cv/classification/densenet169/readme.md
@@ -91,12 +91,17 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
-```bash
 
-bash train.sh
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
 ```
 
+### inference
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
\ No newline at end of file
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
diff --git a/cv/classification/densenet201/README.md b/cv/classification/densenet201/README.md
index 20c7a06f..4f3cbe02 100644
--- a/cv/classification/densenet201/README.md
+++ b/cv/classification/densenet201/README.md
@@ -89,14 +89,21 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
   ```
 
 
-
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
 
-bash train.sh
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
\ No newline at end of file
+
diff --git a/cv/classification/dla102/README.md b/cv/classification/dla102/README.md
new file mode 100644
index 00000000..0e413694
--- /dev/null
+++ b/cv/classification/dla102/README.md
@@ -0,0 +1,66 @@
+## dla102
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla102/config.py b/cv/classification/dla102/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla102/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla102/configs b/cv/classification/dla102/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla102/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla102/data b/cv/classification/dla102/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla102/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla102/infer.sh b/cv/classification/dla102/infer.sh
new file mode 100755
index 00000000..c757d4c6
--- /dev/null
+++ b/cv/classification/dla102/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla102"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla102/logger.py b/cv/classification/dla102/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla102/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla102/lr_scheduler.py b/cv/classification/dla102/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla102/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla102/main.py b/cv/classification/dla102/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla102/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla102/optimizer.py b/cv/classification/dla102/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla102/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla102/requirements.txt b/cv/classification/dla102/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla102/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla102/train.sh b/cv/classification/dla102/train.sh
new file mode 100755
index 00000000..c6c58f75
--- /dev/null
+++ b/cv/classification/dla102/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla102"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla102/utils.py b/cv/classification/dla102/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla102/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla102x/README.md b/cv/classification/dla102x/README.md
new file mode 100644
index 00000000..97947f05
--- /dev/null
+++ b/cv/classification/dla102x/README.md
@@ -0,0 +1,66 @@
+## dla102x
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla102x/config.py b/cv/classification/dla102x/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla102x/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla102x/configs b/cv/classification/dla102x/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla102x/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla102x/data b/cv/classification/dla102x/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla102x/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla102x/infer.sh b/cv/classification/dla102x/infer.sh
new file mode 100755
index 00000000..24406e56
--- /dev/null
+++ b/cv/classification/dla102x/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla102x"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla102x/logger.py b/cv/classification/dla102x/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla102x/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla102x/lr_scheduler.py b/cv/classification/dla102x/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla102x/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla102x/main.py b/cv/classification/dla102x/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla102x/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla102x/optimizer.py b/cv/classification/dla102x/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla102x/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla102x/requirements.txt b/cv/classification/dla102x/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla102x/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla102x/train.sh b/cv/classification/dla102x/train.sh
new file mode 100755
index 00000000..8c01592f
--- /dev/null
+++ b/cv/classification/dla102x/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla102x"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla102x/utils.py b/cv/classification/dla102x/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla102x/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla169/README.md b/cv/classification/dla169/README.md
new file mode 100644
index 00000000..a658aae6
--- /dev/null
+++ b/cv/classification/dla169/README.md
@@ -0,0 +1,66 @@
+## dla169
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla169/config.py b/cv/classification/dla169/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla169/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla169/configs b/cv/classification/dla169/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla169/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla169/data b/cv/classification/dla169/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla169/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla169/infer.sh b/cv/classification/dla169/infer.sh
new file mode 100755
index 00000000..473187f0
--- /dev/null
+++ b/cv/classification/dla169/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla169"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla169/logger.py b/cv/classification/dla169/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla169/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla169/lr_scheduler.py b/cv/classification/dla169/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla169/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla169/main.py b/cv/classification/dla169/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla169/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla169/optimizer.py b/cv/classification/dla169/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla169/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla169/requirements.txt b/cv/classification/dla169/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla169/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla169/train.sh b/cv/classification/dla169/train.sh
new file mode 100755
index 00000000..8a3b93f1
--- /dev/null
+++ b/cv/classification/dla169/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla169"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla169/utils.py b/cv/classification/dla169/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla169/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla34/README.md b/cv/classification/dla34/README.md
new file mode 100644
index 00000000..d6747e82
--- /dev/null
+++ b/cv/classification/dla34/README.md
@@ -0,0 +1,66 @@
+## dla34
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla34/config.py b/cv/classification/dla34/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla34/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla34/configs b/cv/classification/dla34/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla34/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla34/data b/cv/classification/dla34/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla34/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla34/infer.sh b/cv/classification/dla34/infer.sh
new file mode 100755
index 00000000..d0046ca8
--- /dev/null
+++ b/cv/classification/dla34/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla34"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla34/logger.py b/cv/classification/dla34/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla34/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla34/lr_scheduler.py b/cv/classification/dla34/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla34/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla34/main.py b/cv/classification/dla34/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla34/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla34/optimizer.py b/cv/classification/dla34/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla34/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla34/requirements.txt b/cv/classification/dla34/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla34/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla34/train.sh b/cv/classification/dla34/train.sh
new file mode 100755
index 00000000..d08a46cb
--- /dev/null
+++ b/cv/classification/dla34/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla34"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla34/utils.py b/cv/classification/dla34/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla34/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla46_c/README.md b/cv/classification/dla46_c/README.md
new file mode 100644
index 00000000..c7037cd6
--- /dev/null
+++ b/cv/classification/dla46_c/README.md
@@ -0,0 +1,66 @@
+## dla46_c
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla46_c/config.py b/cv/classification/dla46_c/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla46_c/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla46_c/configs b/cv/classification/dla46_c/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla46_c/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla46_c/data b/cv/classification/dla46_c/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla46_c/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla46_c/infer.sh b/cv/classification/dla46_c/infer.sh
new file mode 100755
index 00000000..e0645e28
--- /dev/null
+++ b/cv/classification/dla46_c/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla46_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla46_c/logger.py b/cv/classification/dla46_c/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla46_c/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla46_c/lr_scheduler.py b/cv/classification/dla46_c/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla46_c/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla46_c/main.py b/cv/classification/dla46_c/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla46_c/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla46_c/optimizer.py b/cv/classification/dla46_c/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla46_c/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla46_c/requirements.txt b/cv/classification/dla46_c/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla46_c/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla46_c/train.sh b/cv/classification/dla46_c/train.sh
new file mode 100755
index 00000000..01c46029
--- /dev/null
+++ b/cv/classification/dla46_c/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla46_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla46_c/utils.py b/cv/classification/dla46_c/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla46_c/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/README.md b/cv/classification/dla46x_c/README.md
new file mode 100644
index 00000000..a57bdc8b
--- /dev/null
+++ b/cv/classification/dla46x_c/README.md
@@ -0,0 +1,66 @@
+## dla46x_c
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla46x_c/config.py b/cv/classification/dla46x_c/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla46x_c/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/configs b/cv/classification/dla46x_c/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla46x_c/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/data b/cv/classification/dla46x_c/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla46x_c/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/infer.sh b/cv/classification/dla46x_c/infer.sh
new file mode 100755
index 00000000..0ec005dd
--- /dev/null
+++ b/cv/classification/dla46x_c/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla46x_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla46x_c/logger.py b/cv/classification/dla46x_c/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla46x_c/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/lr_scheduler.py b/cv/classification/dla46x_c/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla46x_c/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/main.py b/cv/classification/dla46x_c/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla46x_c/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/optimizer.py b/cv/classification/dla46x_c/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla46x_c/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/requirements.txt b/cv/classification/dla46x_c/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla46x_c/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla46x_c/train.sh b/cv/classification/dla46x_c/train.sh
new file mode 100755
index 00000000..dff1fc9e
--- /dev/null
+++ b/cv/classification/dla46x_c/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla46x_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla46x_c/utils.py b/cv/classification/dla46x_c/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla46x_c/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla60/README.md b/cv/classification/dla60/README.md
new file mode 100644
index 00000000..2237ac73
--- /dev/null
+++ b/cv/classification/dla60/README.md
@@ -0,0 +1,66 @@
+## dla60
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla60/config.py b/cv/classification/dla60/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla60/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla60/configs b/cv/classification/dla60/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla60/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla60/data b/cv/classification/dla60/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla60/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla60/infer.sh b/cv/classification/dla60/infer.sh
new file mode 100755
index 00000000..d6e8e0b1
--- /dev/null
+++ b/cv/classification/dla60/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla60"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla60/logger.py b/cv/classification/dla60/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla60/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla60/lr_scheduler.py b/cv/classification/dla60/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla60/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla60/main.py b/cv/classification/dla60/main.py
new file mode 100644
index 00000000..7ff436fd
--- /dev/null
+++ b/cv/classification/dla60/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla60/optimizer.py b/cv/classification/dla60/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla60/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla60/requirements.txt b/cv/classification/dla60/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla60/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla60/train.sh b/cv/classification/dla60/train.sh
new file mode 100755
index 00000000..4d263fe3
--- /dev/null
+++ b/cv/classification/dla60/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla60"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla60/utils.py b/cv/classification/dla60/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla60/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla60x/README.md b/cv/classification/dla60x/README.md
new file mode 100644
index 00000000..3ebeff15
--- /dev/null
+++ b/cv/classification/dla60x/README.md
@@ -0,0 +1,66 @@
+## dla60x
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla60x/config.py b/cv/classification/dla60x/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla60x/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla60x/configs b/cv/classification/dla60x/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla60x/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla60x/data b/cv/classification/dla60x/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla60x/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla60x/infer.sh b/cv/classification/dla60x/infer.sh
new file mode 100755
index 00000000..948fbaf2
--- /dev/null
+++ b/cv/classification/dla60x/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla60x"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla60x/logger.py b/cv/classification/dla60x/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla60x/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla60x/lr_scheduler.py b/cv/classification/dla60x/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla60x/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla60x/main.py b/cv/classification/dla60x/main.py
new file mode 100644
index 00000000..c2d27d1b
--- /dev/null
+++ b/cv/classification/dla60x/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla60x/optimizer.py b/cv/classification/dla60x/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla60x/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla60x/requirements.txt b/cv/classification/dla60x/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla60x/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla60x/train.sh b/cv/classification/dla60x/train.sh
new file mode 100755
index 00000000..f417aa3d
--- /dev/null
+++ b/cv/classification/dla60x/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla60x"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla60x/utils.py b/cv/classification/dla60x/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla60x/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/README.md b/cv/classification/dla60x_c/README.md
new file mode 100644
index 00000000..bef6f4ac
--- /dev/null
+++ b/cv/classification/dla60x_c/README.md
@@ -0,0 +1,66 @@
+## dla60x_c
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/dla60x_c/config.py b/cv/classification/dla60x_c/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/dla60x_c/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/configs b/cv/classification/dla60x_c/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/dla60x_c/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/data b/cv/classification/dla60x_c/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/dla60x_c/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/infer.sh b/cv/classification/dla60x_c/infer.sh
new file mode 100755
index 00000000..72761fcd
--- /dev/null
+++ b/cv/classification/dla60x_c/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="dla60x_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3 \
+        --throughput
+
diff --git a/cv/classification/dla60x_c/logger.py b/cv/classification/dla60x_c/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/dla60x_c/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/lr_scheduler.py b/cv/classification/dla60x_c/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/dla60x_c/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/main.py b/cv/classification/dla60x_c/main.py
new file mode 100644
index 00000000..b9020995
--- /dev/null
+++ b/cv/classification/dla60x_c/main.py
@@ -0,0 +1,455 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler,
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+        
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model(images)
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+            
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/optimizer.py b/cv/classification/dla60x_c/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/dla60x_c/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/requirements.txt b/cv/classification/dla60x_c/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/dla60x_c/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/dla60x_c/train.sh b/cv/classification/dla60x_c/train.sh
new file mode 100755
index 00000000..185c762a
--- /dev/null
+++ b/cv/classification/dla60x_c/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="dla60x_c"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-3
+
diff --git a/cv/classification/dla60x_c/utils.py b/cv/classification/dla60x_c/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/dla60x_c/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b0/README.md b/cv/classification/efficientnet_b0/README.md
index fbeec40b..4ae4ef98 100644
--- a/cv/classification/efficientnet_b0/README.md
+++ b/cv/classification/efficientnet_b0/README.md
@@ -1,5 +1,5 @@
-## EfficientNet b0
-
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
 ### Installation
 - Install the latest version of OneFlow
 ```bash
diff --git a/cv/classification/efficientnet_b1/README.md b/cv/classification/efficientnet_b1/README.md
new file mode 100644
index 00000000..4ae4ef98
--- /dev/null
+++ b/cv/classification/efficientnet_b1/README.md
@@ -0,0 +1,66 @@
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b1/config.py b/cv/classification/efficientnet_b1/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b1/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/configs b/cv/classification/efficientnet_b1/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b1/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/data b/cv/classification/efficientnet_b1/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b1/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/infer.sh b/cv/classification/efficientnet_b1/infer.sh
new file mode 100755
index 00000000..5cb30235
--- /dev/null
+++ b/cv/classification/efficientnet_b1/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b1"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b1/logger.py b/cv/classification/efficientnet_b1/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b1/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/lr_scheduler.py b/cv/classification/efficientnet_b1/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b1/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/main.py b/cv/classification/efficientnet_b1/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b1/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/optimizer.py b/cv/classification/efficientnet_b1/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b1/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/requirements.txt b/cv/classification/efficientnet_b1/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b1/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b1/train.sh b/cv/classification/efficientnet_b1/train.sh
new file mode 100755
index 00000000..c4b4ce83
--- /dev/null
+++ b/cv/classification/efficientnet_b1/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b1"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b1/utils.py b/cv/classification/efficientnet_b1/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b1/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/README.md b/cv/classification/efficientnet_b2/README.md
new file mode 100644
index 00000000..68fa82d8
--- /dev/null
+++ b/cv/classification/efficientnet_b2/README.md
@@ -0,0 +1,66 @@
+## EfficientNet b2
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b2/config.py b/cv/classification/efficientnet_b2/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b2/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/configs b/cv/classification/efficientnet_b2/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b2/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/data b/cv/classification/efficientnet_b2/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b2/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/infer.sh b/cv/classification/efficientnet_b2/infer.sh
new file mode 100755
index 00000000..59774164
--- /dev/null
+++ b/cv/classification/efficientnet_b2/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b2"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b2/logger.py b/cv/classification/efficientnet_b2/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b2/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/lr_scheduler.py b/cv/classification/efficientnet_b2/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b2/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/main.py b/cv/classification/efficientnet_b2/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b2/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/optimizer.py b/cv/classification/efficientnet_b2/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b2/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/requirements.txt b/cv/classification/efficientnet_b2/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b2/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b2/train.sh b/cv/classification/efficientnet_b2/train.sh
new file mode 100755
index 00000000..0afb7a5c
--- /dev/null
+++ b/cv/classification/efficientnet_b2/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b2"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b2/utils.py b/cv/classification/efficientnet_b2/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b2/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/README.md b/cv/classification/efficientnet_b3/README.md
new file mode 100644
index 00000000..4ae4ef98
--- /dev/null
+++ b/cv/classification/efficientnet_b3/README.md
@@ -0,0 +1,66 @@
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b3/config.py b/cv/classification/efficientnet_b3/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b3/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/configs b/cv/classification/efficientnet_b3/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b3/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/data b/cv/classification/efficientnet_b3/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b3/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/infer.sh b/cv/classification/efficientnet_b3/infer.sh
new file mode 100755
index 00000000..d80c7b60
--- /dev/null
+++ b/cv/classification/efficientnet_b3/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b3/logger.py b/cv/classification/efficientnet_b3/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b3/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/lr_scheduler.py b/cv/classification/efficientnet_b3/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b3/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/main.py b/cv/classification/efficientnet_b3/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b3/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/optimizer.py b/cv/classification/efficientnet_b3/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b3/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/requirements.txt b/cv/classification/efficientnet_b3/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b3/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b3/train.sh b/cv/classification/efficientnet_b3/train.sh
new file mode 100755
index 00000000..3a54b0f1
--- /dev/null
+++ b/cv/classification/efficientnet_b3/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b3/utils.py b/cv/classification/efficientnet_b3/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b3/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/README.md b/cv/classification/efficientnet_b4/README.md
new file mode 100644
index 00000000..4ae4ef98
--- /dev/null
+++ b/cv/classification/efficientnet_b4/README.md
@@ -0,0 +1,66 @@
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b4/config.py b/cv/classification/efficientnet_b4/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b4/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/configs b/cv/classification/efficientnet_b4/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b4/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/data b/cv/classification/efficientnet_b4/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b4/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/infer.sh b/cv/classification/efficientnet_b4/infer.sh
new file mode 100755
index 00000000..7f3dc9bd
--- /dev/null
+++ b/cv/classification/efficientnet_b4/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b4"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b4/logger.py b/cv/classification/efficientnet_b4/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b4/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/lr_scheduler.py b/cv/classification/efficientnet_b4/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b4/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/main.py b/cv/classification/efficientnet_b4/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b4/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/optimizer.py b/cv/classification/efficientnet_b4/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b4/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/requirements.txt b/cv/classification/efficientnet_b4/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b4/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b4/train.sh b/cv/classification/efficientnet_b4/train.sh
new file mode 100755
index 00000000..9fe98006
--- /dev/null
+++ b/cv/classification/efficientnet_b4/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b4"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b4/utils.py b/cv/classification/efficientnet_b4/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b4/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/README.md b/cv/classification/efficientnet_b5/README.md
new file mode 100644
index 00000000..08b26d07
--- /dev/null
+++ b/cv/classification/efficientnet_b5/README.md
@@ -0,0 +1,67 @@
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b5/config.py b/cv/classification/efficientnet_b5/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b5/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/configs b/cv/classification/efficientnet_b5/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b5/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/data b/cv/classification/efficientnet_b5/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b5/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/infer.sh b/cv/classification/efficientnet_b5/infer.sh
new file mode 100755
index 00000000..21b60106
--- /dev/null
+++ b/cv/classification/efficientnet_b5/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b5 "
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b5/logger.py b/cv/classification/efficientnet_b5/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b5/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/lr_scheduler.py b/cv/classification/efficientnet_b5/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b5/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/main.py b/cv/classification/efficientnet_b5/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b5/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/optimizer.py b/cv/classification/efficientnet_b5/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b5/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/requirements.txt b/cv/classification/efficientnet_b5/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b5/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b5/train.sh b/cv/classification/efficientnet_b5/train.sh
new file mode 100755
index 00000000..1906df7e
--- /dev/null
+++ b/cv/classification/efficientnet_b5/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b5"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b5/utils.py b/cv/classification/efficientnet_b5/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b5/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/README.md b/cv/classification/efficientnet_b6/README.md
new file mode 100644
index 00000000..fbeec40b
--- /dev/null
+++ b/cv/classification/efficientnet_b6/README.md
@@ -0,0 +1,66 @@
+## EfficientNet b0
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b6/config.py b/cv/classification/efficientnet_b6/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b6/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/configs b/cv/classification/efficientnet_b6/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b6/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/data b/cv/classification/efficientnet_b6/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b6/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/infer.sh b/cv/classification/efficientnet_b6/infer.sh
new file mode 100755
index 00000000..b666d015
--- /dev/null
+++ b/cv/classification/efficientnet_b6/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b6"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b6/logger.py b/cv/classification/efficientnet_b6/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b6/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/lr_scheduler.py b/cv/classification/efficientnet_b6/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b6/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/main.py b/cv/classification/efficientnet_b6/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b6/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/optimizer.py b/cv/classification/efficientnet_b6/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b6/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/requirements.txt b/cv/classification/efficientnet_b6/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b6/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b6/train.sh b/cv/classification/efficientnet_b6/train.sh
new file mode 100755
index 00000000..420b3bf4
--- /dev/null
+++ b/cv/classification/efficientnet_b6/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b6"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b6/utils.py b/cv/classification/efficientnet_b6/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b6/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/README.md b/cv/classification/efficientnet_b7/README.md
new file mode 100644
index 00000000..4ae4ef98
--- /dev/null
+++ b/cv/classification/efficientnet_b7/README.md
@@ -0,0 +1,66 @@
+## EfficientNet 
+EfficientNet is an efficient neural network architecture that achieves outstanding performance with relatively fewer parameters and computational costs by combining specific components and techniques in deep learning. The design of EfficientNet is based on the concept of Compound Scaling, which simultaneously adjusts the network's width, depth, and resolution across different dimensions to achieve a better balance between performance and efficiency. This enables EfficientNet to excel in various scenarios with limited computational resources, such as mobile devices and embedded systems, for tasks like image classification, object detection, and semantic segmentation. The emergence of EfficientNet has greatly advanced the efficient deployment and application of deep learning models, making it a significant breakthrough in the field.
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/efficientnet_b7/config.py b/cv/classification/efficientnet_b7/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/efficientnet_b7/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/configs b/cv/classification/efficientnet_b7/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/efficientnet_b7/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/data b/cv/classification/efficientnet_b7/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/efficientnet_b7/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/infer.sh b/cv/classification/efficientnet_b7/infer.sh
new file mode 100755
index 00000000..ee93e93e
--- /dev/null
+++ b/cv/classification/efficientnet_b7/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="efficientnet_b7"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/efficientnet_b7/logger.py b/cv/classification/efficientnet_b7/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/efficientnet_b7/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/lr_scheduler.py b/cv/classification/efficientnet_b7/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/efficientnet_b7/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/main.py b/cv/classification/efficientnet_b7/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/efficientnet_b7/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/optimizer.py b/cv/classification/efficientnet_b7/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/efficientnet_b7/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/requirements.txt b/cv/classification/efficientnet_b7/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/efficientnet_b7/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/efficientnet_b7/train.sh b/cv/classification/efficientnet_b7/train.sh
new file mode 100755
index 00000000..ab9c2e89
--- /dev/null
+++ b/cv/classification/efficientnet_b7/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="efficientnet_b7"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/efficientnet_b7/utils.py b/cv/classification/efficientnet_b7/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/efficientnet_b7/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/README.md b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/README.md
new file mode 100644
index 00000000..954849db
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/README.md
@@ -0,0 +1,66 @@
+## fan_base_16_p4_hybrid_in22k_1k_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/config.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/configs b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/data b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/infer.sh b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/infer.sh
new file mode 100755
index 00000000..a9cdc012
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid_in22k_1k_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/logger.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/lr_scheduler.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/main.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/optimizer.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/requirements.txt b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/train.sh b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/train.sh
new file mode 100755
index 00000000..2c7ddd92
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid_in22k_1k_384"
+IMAGE_SIZE=384
+BATCH_SIZE=16
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/utils.py b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_base_16_p4_hybrid_in22k_1k_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/README.md b/cv/classification/fan_hybrid_base/README.md
new file mode 100644
index 00000000..e7b63e84
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/README.md
@@ -0,0 +1,66 @@
+## FAN-Hybrid-base
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_hybrid_base/config.py b/cv/classification/fan_hybrid_base/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/configs b/cv/classification/fan_hybrid_base/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/data b/cv/classification/fan_hybrid_base/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/infer.sh b/cv/classification/fan_hybrid_base/infer.sh
new file mode 100755
index 00000000..ebcff441
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_hybrid_base/logger.py b/cv/classification/fan_hybrid_base/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/lr_scheduler.py b/cv/classification/fan_hybrid_base/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/main.py b/cv/classification/fan_hybrid_base/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/optimizer.py b/cv/classification/fan_hybrid_base/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/requirements.txt b/cv/classification/fan_hybrid_base/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base/train.sh b/cv/classification/fan_hybrid_base/train.sh
new file mode 100755
index 00000000..c920ea24
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_hybrid_base/utils.py b/cv/classification/fan_hybrid_base/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_hybrid_base/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/README.md b/cv/classification/fan_hybrid_base_in22k_1k/README.md
new file mode 100644
index 00000000..ba134a9e
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/README.md
@@ -0,0 +1,66 @@
+## fan_hybrid_base_in22k_1k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/config.py b/cv/classification/fan_hybrid_base_in22k_1k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/configs b/cv/classification/fan_hybrid_base_in22k_1k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/data b/cv/classification/fan_hybrid_base_in22k_1k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/infer.sh b/cv/classification/fan_hybrid_base_in22k_1k/infer.sh
new file mode 100755
index 00000000..63c92f40
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid_in22k_1k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/logger.py b/cv/classification/fan_hybrid_base_in22k_1k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/lr_scheduler.py b/cv/classification/fan_hybrid_base_in22k_1k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/main.py b/cv/classification/fan_hybrid_base_in22k_1k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/optimizer.py b/cv/classification/fan_hybrid_base_in22k_1k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/requirements.txt b/cv/classification/fan_hybrid_base_in22k_1k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/train.sh b/cv/classification/fan_hybrid_base_in22k_1k/train.sh
new file mode 100755
index 00000000..2ebd0b9f
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_base_16_p4_hybrid_in22k_1k"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_hybrid_base_in22k_1k/utils.py b/cv/classification/fan_hybrid_base_in22k_1k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_hybrid_base_in22k_1k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/README.md b/cv/classification/fan_hybrid_small/README.md
new file mode 100644
index 00000000..45e3fef4
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/README.md
@@ -0,0 +1,66 @@
+## FAN-Hybrid-small
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_hybrid_small/config.py b/cv/classification/fan_hybrid_small/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/configs b/cv/classification/fan_hybrid_small/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/data b/cv/classification/fan_hybrid_small/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/infer.sh b/cv/classification/fan_hybrid_small/infer.sh
new file mode 100755
index 00000000..92d77bca
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_small_12_p4_hybrid"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_hybrid_small/logger.py b/cv/classification/fan_hybrid_small/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/lr_scheduler.py b/cv/classification/fan_hybrid_small/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/main.py b/cv/classification/fan_hybrid_small/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/optimizer.py b/cv/classification/fan_hybrid_small/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/requirements.txt b/cv/classification/fan_hybrid_small/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_small/train.sh b/cv/classification/fan_hybrid_small/train.sh
new file mode 100755
index 00000000..7e3dbf2c
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_small_12_p4_hybrid"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_hybrid_small/utils.py b/cv/classification/fan_hybrid_small/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_hybrid_small/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/README.md b/cv/classification/fan_hybrid_tiny/README.md
new file mode 100644
index 00000000..d418672f
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/README.md
@@ -0,0 +1,66 @@
+## FAN-Hybrid-tiny
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_hybrid_tiny/config.py b/cv/classification/fan_hybrid_tiny/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/configs b/cv/classification/fan_hybrid_tiny/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/data b/cv/classification/fan_hybrid_tiny/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/infer.sh b/cv/classification/fan_hybrid_tiny/infer.sh
new file mode 100755
index 00000000..63d892aa
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_tiny_8_p4_hybrid"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_hybrid_tiny/logger.py b/cv/classification/fan_hybrid_tiny/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/lr_scheduler.py b/cv/classification/fan_hybrid_tiny/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/main.py b/cv/classification/fan_hybrid_tiny/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/optimizer.py b/cv/classification/fan_hybrid_tiny/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/requirements.txt b/cv/classification/fan_hybrid_tiny/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_hybrid_tiny/train.sh b/cv/classification/fan_hybrid_tiny/train.sh
new file mode 100755
index 00000000..b3421b95
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=6
+PORT=12346
+MODEL_ARCH="fan_tiny_8_p4_hybrid"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_hybrid_tiny/utils.py b/cv/classification/fan_hybrid_tiny/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_hybrid_tiny/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/README.md b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/README.md
new file mode 100644
index 00000000..2331c51d
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/README.md
@@ -0,0 +1,66 @@
+## fan_large_16_p4_hybrid_in22k_1k
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/config.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/configs b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/data b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/infer.sh b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/infer.sh
new file mode 100755
index 00000000..2bce46aa
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_large_16_p4_hybrid_in22k_1k"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/logger.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/lr_scheduler.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/main.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/optimizer.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/requirements.txt b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/train.sh b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/train.sh
new file mode 100755
index 00000000..e914de6d
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_large_16_p4_hybrid_in22k_1k"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k/utils.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/README.md b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/README.md
new file mode 100644
index 00000000..94b01d13
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/README.md
@@ -0,0 +1,66 @@
+## fan_large_16_p4_hybrid_in22k_1k_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/config.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/configs b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/data b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/infer.sh b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/infer.sh
new file mode 100755
index 00000000..26b95c52
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_large_16_p4_hybrid_in22k_1k_384"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/logger.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/lr_scheduler.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/main.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/optimizer.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/requirements.txt b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/train.sh b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/train.sh
new file mode 100755
index 00000000..734f87e8
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_large_16_p4_hybrid_in22k_1k_384"
+IMAGE_SIZE=384
+BATCH_SIZE=8
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/utils.py b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_large_16_p4_hybrid_in22k_1k_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/README.md b/cv/classification/fan_vit_small/README.md
new file mode 100644
index 00000000..53ef275c
--- /dev/null
+++ b/cv/classification/fan_vit_small/README.md
@@ -0,0 +1,66 @@
+## FAN-ViT-small
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_vit_small/config.py b/cv/classification/fan_vit_small/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_vit_small/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/configs b/cv/classification/fan_vit_small/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_vit_small/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/data b/cv/classification/fan_vit_small/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_vit_small/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/infer.sh b/cv/classification/fan_vit_small/infer.sh
new file mode 100755
index 00000000..836e0a8b
--- /dev/null
+++ b/cv/classification/fan_vit_small/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_small_12_p16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_vit_small/logger.py b/cv/classification/fan_vit_small/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_vit_small/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/lr_scheduler.py b/cv/classification/fan_vit_small/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_vit_small/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/main.py b/cv/classification/fan_vit_small/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_vit_small/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/optimizer.py b/cv/classification/fan_vit_small/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_vit_small/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/requirements.txt b/cv/classification/fan_vit_small/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_vit_small/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_vit_small/train.sh b/cv/classification/fan_vit_small/train.sh
new file mode 100755
index 00000000..f8b96e32
--- /dev/null
+++ b/cv/classification/fan_vit_small/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_small_12_p16_224"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_vit_small/utils.py b/cv/classification/fan_vit_small/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_vit_small/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/README.md b/cv/classification/fan_vit_tiny/README.md
new file mode 100644
index 00000000..89696f8f
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/README.md
@@ -0,0 +1,66 @@
+## FAN-ViT-tiny
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/fan_vit_tiny/config.py b/cv/classification/fan_vit_tiny/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/configs b/cv/classification/fan_vit_tiny/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/data b/cv/classification/fan_vit_tiny/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/infer.sh b/cv/classification/fan_vit_tiny/infer.sh
new file mode 100755
index 00000000..233145ff
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="fan_tiny_12_p16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/fan_vit_tiny/logger.py b/cv/classification/fan_vit_tiny/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/lr_scheduler.py b/cv/classification/fan_vit_tiny/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/main.py b/cv/classification/fan_vit_tiny/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/optimizer.py b/cv/classification/fan_vit_tiny/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/requirements.txt b/cv/classification/fan_vit_tiny/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/fan_vit_tiny/train.sh b/cv/classification/fan_vit_tiny/train.sh
new file mode 100755
index 00000000..14cf1c43
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="fan_tiny_12_p16_224"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/fan_vit_tiny/utils.py b/cv/classification/fan_vit_tiny/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/fan_vit_tiny/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/README.md b/cv/classification/genet_normal/README.md
new file mode 100644
index 00000000..312ce8a3
--- /dev/null
+++ b/cv/classification/genet_normal/README.md
@@ -0,0 +1,66 @@
+## genet_normal
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/genet_normal/config.py b/cv/classification/genet_normal/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/genet_normal/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/configs b/cv/classification/genet_normal/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/genet_normal/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/genet_normal/data b/cv/classification/genet_normal/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/genet_normal/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/genet_normal/infer.sh b/cv/classification/genet_normal/infer.sh
new file mode 100755
index 00000000..97acce97
--- /dev/null
+++ b/cv/classification/genet_normal/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="genet_normal"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --image-size 192 \
+        --throughput
+
diff --git a/cv/classification/genet_normal/logger.py b/cv/classification/genet_normal/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/genet_normal/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/lr_scheduler.py b/cv/classification/genet_normal/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/genet_normal/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/main.py b/cv/classification/genet_normal/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/genet_normal/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/optimizer.py b/cv/classification/genet_normal/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/genet_normal/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/genet_normal/requirements.txt b/cv/classification/genet_normal/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/genet_normal/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/genet_normal/train.sh b/cv/classification/genet_normal/train.sh
new file mode 100755
index 00000000..5a0434bc
--- /dev/null
+++ b/cv/classification/genet_normal/train.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="genet_normal"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/senet_default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --image-size 192
+
diff --git a/cv/classification/genet_normal/utils.py b/cv/classification/genet_normal/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/genet_normal/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/README.md b/cv/classification/genet_small/README.md
new file mode 100644
index 00000000..e1db037d
--- /dev/null
+++ b/cv/classification/genet_small/README.md
@@ -0,0 +1,66 @@
+## genet_small
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/genet_small/config.py b/cv/classification/genet_small/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/genet_small/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/configs b/cv/classification/genet_small/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/genet_small/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/genet_small/data b/cv/classification/genet_small/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/genet_small/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/genet_small/infer.sh b/cv/classification/genet_small/infer.sh
new file mode 100755
index 00000000..8fee9ec7
--- /dev/null
+++ b/cv/classification/genet_small/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="genet_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --image-size 192 \
+        --throughput
+
diff --git a/cv/classification/genet_small/logger.py b/cv/classification/genet_small/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/genet_small/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/lr_scheduler.py b/cv/classification/genet_small/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/genet_small/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/main.py b/cv/classification/genet_small/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/genet_small/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/optimizer.py b/cv/classification/genet_small/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/genet_small/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/genet_small/requirements.txt b/cv/classification/genet_small/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/genet_small/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/genet_small/train.sh b/cv/classification/genet_small/train.sh
new file mode 100755
index 00000000..6d51600e
--- /dev/null
+++ b/cv/classification/genet_small/train.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="genet_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/senet_default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --image-size 192
+
diff --git a/cv/classification/genet_small/utils.py b/cv/classification/genet_small/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/genet_small/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/README.md b/cv/classification/hrnet_w18_small/README.md
new file mode 100644
index 00000000..c9bd75e8
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/README.md
@@ -0,0 +1,66 @@
+## hrnet_w18_small
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w18_small/config.py b/cv/classification/hrnet_w18_small/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/configs b/cv/classification/hrnet_w18_small/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/data b/cv/classification/hrnet_w18_small/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/infer.sh b/cv/classification/hrnet_w18_small/infer.sh
new file mode 100755
index 00000000..e3d587e1
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w18_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w18_small/logger.py b/cv/classification/hrnet_w18_small/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/lr_scheduler.py b/cv/classification/hrnet_w18_small/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/main.py b/cv/classification/hrnet_w18_small/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/optimizer.py b/cv/classification/hrnet_w18_small/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/requirements.txt b/cv/classification/hrnet_w18_small/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small/train.sh b/cv/classification/hrnet_w18_small/train.sh
new file mode 100755
index 00000000..e18b6bb4
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w18_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w18_small/utils.py b/cv/classification/hrnet_w18_small/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w18_small/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/README.md b/cv/classification/hrnet_w18_small_v2/README.md
new file mode 100644
index 00000000..e81b690d
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/README.md
@@ -0,0 +1,66 @@
+## hrnet_w18_small_v2
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w18_small_v2/config.py b/cv/classification/hrnet_w18_small_v2/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/configs b/cv/classification/hrnet_w18_small_v2/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/data b/cv/classification/hrnet_w18_small_v2/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/infer.sh b/cv/classification/hrnet_w18_small_v2/infer.sh
new file mode 100755
index 00000000..3c27ae8f
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w18_small_v2"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w18_small_v2/logger.py b/cv/classification/hrnet_w18_small_v2/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/lr_scheduler.py b/cv/classification/hrnet_w18_small_v2/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/main.py b/cv/classification/hrnet_w18_small_v2/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/optimizer.py b/cv/classification/hrnet_w18_small_v2/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/requirements.txt b/cv/classification/hrnet_w18_small_v2/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w18_small_v2/train.sh b/cv/classification/hrnet_w18_small_v2/train.sh
new file mode 100755
index 00000000..925d17e5
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w18_small_v2"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w18_small_v2/utils.py b/cv/classification/hrnet_w18_small_v2/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w18_small_v2/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/README.md b/cv/classification/hrnet_w30/README.md
new file mode 100644
index 00000000..8179be16
--- /dev/null
+++ b/cv/classification/hrnet_w30/README.md
@@ -0,0 +1,66 @@
+## hrnet_w30
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w30/config.py b/cv/classification/hrnet_w30/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w30/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/configs b/cv/classification/hrnet_w30/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w30/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/data b/cv/classification/hrnet_w30/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w30/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/infer.sh b/cv/classification/hrnet_w30/infer.sh
new file mode 100755
index 00000000..a96092ea
--- /dev/null
+++ b/cv/classification/hrnet_w30/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w30"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w30/logger.py b/cv/classification/hrnet_w30/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w30/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/lr_scheduler.py b/cv/classification/hrnet_w30/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w30/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/main.py b/cv/classification/hrnet_w30/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w30/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/optimizer.py b/cv/classification/hrnet_w30/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w30/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/requirements.txt b/cv/classification/hrnet_w30/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w30/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w30/train.sh b/cv/classification/hrnet_w30/train.sh
new file mode 100755
index 00000000..1cdd0e02
--- /dev/null
+++ b/cv/classification/hrnet_w30/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w30"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w30/utils.py b/cv/classification/hrnet_w30/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w30/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/README.md b/cv/classification/hrnet_w32/README.md
new file mode 100644
index 00000000..3d0584f3
--- /dev/null
+++ b/cv/classification/hrnet_w32/README.md
@@ -0,0 +1,66 @@
+## hrnet_w32
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w32/config.py b/cv/classification/hrnet_w32/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w32/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/configs b/cv/classification/hrnet_w32/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w32/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/data b/cv/classification/hrnet_w32/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w32/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/infer.sh b/cv/classification/hrnet_w32/infer.sh
new file mode 100755
index 00000000..ebf5f006
--- /dev/null
+++ b/cv/classification/hrnet_w32/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w32"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w32/logger.py b/cv/classification/hrnet_w32/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w32/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/lr_scheduler.py b/cv/classification/hrnet_w32/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w32/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/main.py b/cv/classification/hrnet_w32/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w32/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/optimizer.py b/cv/classification/hrnet_w32/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w32/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/requirements.txt b/cv/classification/hrnet_w32/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w32/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w32/train.sh b/cv/classification/hrnet_w32/train.sh
new file mode 100755
index 00000000..d8e106aa
--- /dev/null
+++ b/cv/classification/hrnet_w32/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w32"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w32/utils.py b/cv/classification/hrnet_w32/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w32/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/README.md b/cv/classification/hrnet_w40/README.md
new file mode 100644
index 00000000..1aad7879
--- /dev/null
+++ b/cv/classification/hrnet_w40/README.md
@@ -0,0 +1,66 @@
+## hrnet_w40
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w40/config.py b/cv/classification/hrnet_w40/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w40/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/configs b/cv/classification/hrnet_w40/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w40/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/data b/cv/classification/hrnet_w40/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w40/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/infer.sh b/cv/classification/hrnet_w40/infer.sh
new file mode 100755
index 00000000..1d5a90cb
--- /dev/null
+++ b/cv/classification/hrnet_w40/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w40"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w40/logger.py b/cv/classification/hrnet_w40/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w40/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/lr_scheduler.py b/cv/classification/hrnet_w40/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w40/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/main.py b/cv/classification/hrnet_w40/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w40/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/optimizer.py b/cv/classification/hrnet_w40/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w40/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/requirements.txt b/cv/classification/hrnet_w40/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w40/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w40/train.sh b/cv/classification/hrnet_w40/train.sh
new file mode 100755
index 00000000..372a548a
--- /dev/null
+++ b/cv/classification/hrnet_w40/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w40"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w40/utils.py b/cv/classification/hrnet_w40/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w40/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/README.md b/cv/classification/hrnet_w44/README.md
new file mode 100644
index 00000000..a5fd2e4c
--- /dev/null
+++ b/cv/classification/hrnet_w44/README.md
@@ -0,0 +1,66 @@
+## hrnet_w44
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w44/config.py b/cv/classification/hrnet_w44/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w44/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/configs b/cv/classification/hrnet_w44/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w44/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/data b/cv/classification/hrnet_w44/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w44/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/infer.sh b/cv/classification/hrnet_w44/infer.sh
new file mode 100755
index 00000000..03934a89
--- /dev/null
+++ b/cv/classification/hrnet_w44/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w44"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w44/logger.py b/cv/classification/hrnet_w44/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w44/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/lr_scheduler.py b/cv/classification/hrnet_w44/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w44/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/main.py b/cv/classification/hrnet_w44/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w44/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/optimizer.py b/cv/classification/hrnet_w44/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w44/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/requirements.txt b/cv/classification/hrnet_w44/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w44/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w44/train.sh b/cv/classification/hrnet_w44/train.sh
new file mode 100755
index 00000000..310a49aa
--- /dev/null
+++ b/cv/classification/hrnet_w44/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w44"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w44/utils.py b/cv/classification/hrnet_w44/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w44/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/README.md b/cv/classification/hrnet_w48/README.md
new file mode 100644
index 00000000..5dbbfe9a
--- /dev/null
+++ b/cv/classification/hrnet_w48/README.md
@@ -0,0 +1,66 @@
+## hrnet_w48
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w48/config.py b/cv/classification/hrnet_w48/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w48/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/configs b/cv/classification/hrnet_w48/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w48/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/data b/cv/classification/hrnet_w48/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w48/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/infer.sh b/cv/classification/hrnet_w48/infer.sh
new file mode 100755
index 00000000..9c913fb5
--- /dev/null
+++ b/cv/classification/hrnet_w48/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w48"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w48/logger.py b/cv/classification/hrnet_w48/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w48/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/lr_scheduler.py b/cv/classification/hrnet_w48/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w48/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/main.py b/cv/classification/hrnet_w48/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w48/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/optimizer.py b/cv/classification/hrnet_w48/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w48/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/requirements.txt b/cv/classification/hrnet_w48/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w48/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w48/train.sh b/cv/classification/hrnet_w48/train.sh
new file mode 100755
index 00000000..13936683
--- /dev/null
+++ b/cv/classification/hrnet_w48/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="hrnet_w48"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w48/utils.py b/cv/classification/hrnet_w48/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w48/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/README.md b/cv/classification/hrnet_w64/README.md
new file mode 100644
index 00000000..1c6dbe00
--- /dev/null
+++ b/cv/classification/hrnet_w64/README.md
@@ -0,0 +1,66 @@
+## hrnet_w64
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/hrnet_w64/config.py b/cv/classification/hrnet_w64/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/hrnet_w64/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/configs b/cv/classification/hrnet_w64/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/hrnet_w64/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/data b/cv/classification/hrnet_w64/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/hrnet_w64/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/infer.sh b/cv/classification/hrnet_w64/infer.sh
new file mode 100755
index 00000000..aa092d64
--- /dev/null
+++ b/cv/classification/hrnet_w64/infer.sh
@@ -0,0 +1,17 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="hrnet_w64"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2 \
+        --throughput
+
diff --git a/cv/classification/hrnet_w64/logger.py b/cv/classification/hrnet_w64/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/hrnet_w64/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/lr_scheduler.py b/cv/classification/hrnet_w64/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/hrnet_w64/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/main.py b/cv/classification/hrnet_w64/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/hrnet_w64/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/optimizer.py b/cv/classification/hrnet_w64/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/hrnet_w64/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/requirements.txt b/cv/classification/hrnet_w64/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/hrnet_w64/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/hrnet_w64/train.sh b/cv/classification/hrnet_w64/train.sh
new file mode 100755
index 00000000..2f8010be
--- /dev/null
+++ b/cv/classification/hrnet_w64/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="hrnet_w64"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 1e-2
+
diff --git a/cv/classification/hrnet_w64/utils.py b/cv/classification/hrnet_w64/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/hrnet_w64/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/levit_128/README.md b/cv/classification/levit_128/README.md
new file mode 100644
index 00000000..308536b5
--- /dev/null
+++ b/cv/classification/levit_128/README.md
@@ -0,0 +1,66 @@
+## levit_128
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/levit_128/config.py b/cv/classification/levit_128/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/levit_128/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/levit_128/configs b/cv/classification/levit_128/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/levit_128/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/levit_128/data b/cv/classification/levit_128/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/levit_128/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/levit_128/infer.sh b/cv/classification/levit_128/infer.sh
new file mode 100755
index 00000000..43624d1f
--- /dev/null
+++ b/cv/classification/levit_128/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="levit_128"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/levit_128/logger.py b/cv/classification/levit_128/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/levit_128/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/levit_128/lr_scheduler.py b/cv/classification/levit_128/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/levit_128/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/levit_128/main.py b/cv/classification/levit_128/main.py
new file mode 100644
index 00000000..1c34db66
--- /dev/null
+++ b/cv/classification/levit_128/main.py
@@ -0,0 +1,462 @@
+"""
+Modified from https://github.com/microsoft/Swin-Transformer/blob/main/main.py
+"""
+
+import os
+import time
+import argparse
+import datetime
+import numpy as np
+import oneflow as flow
+import oneflow.backends.cudnn as cudnn
+
+from flowvision.loss.cross_entropy import (
+    LabelSmoothingCrossEntropy,
+    SoftTargetCrossEntropy,
+)
+from flowvision.utils.metrics import accuracy
+from flowvision.models import ModelCreator
+
+from config import get_config
+from data import build_loader
+from lr_scheduler import build_scheduler
+from optimizer import build_optimizer
+from logger import create_logger
+from utils import (
+    load_checkpoint,
+    save_checkpoint,
+    get_grad_norm,
+    auto_resume_helper,
+    reduce_tensor,
+    AverageMeter,
+    TimeMeter
+)
+import flowvision
+
+def build_model(config):
+    model_arch = config.MODEL.ARCH
+    model = ModelCreator.create_model(model_arch, pretrained=config.MODEL.PRETRAINED)
+    return model
+
+
+def parse_option():
+    parser = argparse.ArgumentParser(
+        "Flowvision image classification training and evaluation script", add_help=False
+    )
+    parser.add_argument(
+        "--model_arch",
+        type=str,
+        required=True,
+        default="swin_tiny_patch4_window7_224",
+        help="model for training",
+    )
+    parser.add_argument(
+        "--cfg", type=str, required=True, metavar="FILE", help="path to config file",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options by adding 'KEY VALUE' pairs. ",
+        default=None,
+        nargs="+",
+    )
+
+    # easy config modification
+    parser.add_argument(
+        "--synthetic-data",
+        action="store_true",
+        dest="synthetic_data",
+        help="Use synthetic data",
+    )
+    parser.add_argument(
+        "--epochs", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--lr", type=float, help="initial learning rate")
+    parser.add_argument(
+        "--image-size", type=int, help="initial image size"
+    )
+    parser.add_argument(
+        "--batch-size", type=int, help="batch size for single GPU"
+    )
+    parser.add_argument("--data-path", type=str, help="path to dataset")
+    parser.add_argument(
+        "--zip",
+        action="store_true",
+        help="use zipped dataset instead of folder dataset",
+    )
+    parser.add_argument(
+        "--cache-mode",
+        type=str,
+        default="part",
+        choices=["no", "full", "part"],
+        help="no: no cache, "
+        "full: cache all data, "
+        "part: sharding the dataset into nonoverlapping pieces and only cache one piece",
+    )
+    parser.add_argument("--resume", help="resume from checkpoint")
+    parser.add_argument(
+        "--accumulation-steps", type=int, help="gradient accumulation steps"
+    )
+    parser.add_argument(
+        "--use-checkpoint",
+        action="store_true",
+        help="whether to use gradient checkpointing to save memory",
+    )
+    parser.add_argument(
+        "--output",
+        default="output",
+        type=str,
+        metavar="PATH",
+        help="root of output folder, the full path is <output>/<model_name>/<tag> (default: output)",
+    )
+    parser.add_argument("--tag", help="tag of experiment")
+    parser.add_argument("--eval", action="store_true", help="Perform evaluation only")
+    parser.add_argument(
+        "--throughput", action="store_true", help="Test throughput only"
+    )
+
+    # distributed training
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=0,
+        required=False,
+        help="local rank for DistributedDataParallel",
+    )
+
+    args, unparsed = parser.parse_known_args()
+
+    config = get_config(args)
+
+    return args, config
+
+
+def main(config):
+    (
+        dataset_train,
+        dataset_val,
+        data_loader_train,
+        data_loader_val,
+        mixup_fn,
+    ) = build_loader(config)
+
+    logger.info(f"Creating model:{config.MODEL.ARCH}")
+    model = build_model(config)
+    model.cuda()
+
+    optimizer = build_optimizer(config, model)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    # FIXME: model with DDP wrapper doesn't have model.module
+    model_without_ddp = model
+
+    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    logger.info(f"number of params: {n_parameters}")
+    if hasattr(model_without_ddp, "flops"):
+        flops = model_without_ddp.flops()
+        logger.info(f"number of GFLOPs: {flops / 1e9}")
+
+    lr_scheduler = build_scheduler(config, optimizer, len(data_loader_train))
+
+    if config.AUG.MIXUP > 0.0:
+        # smoothing is handled with mixup label transform
+        criterion = SoftTargetCrossEntropy()
+    elif config.MODEL.LABEL_SMOOTHING > 0.0:
+        criterion = LabelSmoothingCrossEntropy(smoothing=config.MODEL.LABEL_SMOOTHING)
+    else:
+        criterion = flow.nn.CrossEntropyLoss()
+
+    max_accuracy = 0.0
+
+    if config.TRAIN.AUTO_RESUME:
+        resume_file = auto_resume_helper(config.OUTPUT)
+        if resume_file:
+            if config.MODEL.RESUME:
+                logger.warning(
+                    f"auto-resume changing resume file from {config.MODEL.RESUME} to {resume_file}"
+                )
+            config.defrost()
+            config.MODEL.RESUME = resume_file
+            config.freeze()
+            logger.info(f"auto resuming from {resume_file}")
+        else:
+            logger.info(f"no checkpoint found in {config.OUTPUT}, ignoring auto resume")
+
+    if config.MODEL.RESUME:
+        print("resume called")
+        max_accuracy = load_checkpoint(
+            config, model_without_ddp, optimizer, lr_scheduler, logger
+        )
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        if config.EVAL_MODE:
+            return
+
+    if config.THROUGHPUT_MODE:
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        throughput(data_loader_val, model, logger)
+        return
+
+    logger.info("Start training")
+    start_time = time.time()
+    for epoch in range(config.TRAIN.START_EPOCH, config.TRAIN.EPOCHS):
+        if not config.DATA.SYNTHETIC_DATA:
+            data_loader_train.sampler.set_epoch(epoch)
+
+        train_one_epoch(
+            config,
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            epoch,
+            mixup_fn,
+            lr_scheduler
+        )
+        if flow.env.get_rank() == 0 and (
+            epoch % config.SAVE_FREQ == 0 or epoch == (config.TRAIN.EPOCHS - 1)
+        ):
+            save_checkpoint(
+                config,
+                epoch,
+                model_without_ddp,
+                max_accuracy,
+                optimizer,
+                lr_scheduler,
+                logger,
+            )
+
+        # no validate
+        acc1, acc5, loss = validate(config, data_loader_val, model)
+        logger.info(
+            f"Accuracy of the network on the {len(data_loader_val)} test images: {acc1:.1f}%"
+        )
+        max_accuracy = max(max_accuracy, acc1)
+        logger.info(f"Max accuracy: {max_accuracy:.2f}%")
+
+    total_time = time.time() - start_time
+    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+    logger.info("Training time {}".format(total_time_str))
+
+
+def train_one_epoch(
+    config, model, criterion, data_loader, optimizer, epoch, mixup_fn, lr_scheduler
+):
+    model.train()
+    optimizer.zero_grad()
+
+    num_steps = len(data_loader)
+    one_sample_time = TimeMeter()
+    loss_meter = AverageMeter()
+
+    start = time.time()
+    end = time.time()
+    for idx, (samples, targets) in enumerate(data_loader):
+        samples = samples.cuda()
+        targets = targets.cuda()
+
+        if mixup_fn is not None:
+            samples, targets = mixup_fn(samples, targets)
+
+        outputs = model(samples)
+        if type(outputs) == tuple:
+            outputs = outputs[0]
+
+
+        if config.TRAIN.ACCUMULATION_STEPS > 1:
+            loss = criterion(outputs, targets)
+            loss = loss / config.TRAIN.ACCUMULATION_STEPS
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            if (idx + 1) % config.TRAIN.ACCUMULATION_STEPS == 0:
+                optimizer.step()
+                optimizer.zero_grad()
+                lr_scheduler.step()
+        else:
+            loss = criterion(outputs, targets)
+            optimizer.zero_grad()
+            loss.backward()
+            if config.TRAIN.CLIP_GRAD:
+                flow.nn.utils.clip_grad_norm_(
+                    model.parameters(), config.TRAIN.CLIP_GRAD
+                )
+            optimizer.step()
+            lr_scheduler.step()
+
+        one_sample_time.record(samples.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss.cpu().detach(), targets.size(0))
+
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            lr = optimizer.param_groups[0]["lr"]
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = one_sample_time.get()
+            etas =  (num_steps - idx) * samples.size(0) * flow.env.get_world_size() / throughput_avg
+            one_sample_time.reset()
+
+            logger.info(
+                f"Train: [{epoch}/{config.TRAIN.EPOCHS}][{idx}/{num_steps}]\t"
+                f"eta {datetime.timedelta(seconds=int(etas))}\tlr {lr:.6f}\t"
+                f"time {samples.size(0) * flow.env.get_world_size() / throughput:.4f}s ({samples.size(0) * flow.env.get_world_size() / throughput_avg:.4f}s)\t"
+                f"rate {throughput:.4f}/s ({throughput_avg:.4f}/s)\t"
+                f"loss {loss:.4f} ({loss_avg:.4f})\t"
+            )
+
+    epoch_time = time.time() - start
+    logger.info(
+        f"EPOCH {epoch} training takes {datetime.timedelta(seconds=int(epoch_time))}"
+    )
+
+
+@flow.no_grad()
+def validate(config, data_loader, model):
+    criterion = flow.nn.CrossEntropyLoss()
+    model.eval()
+
+    batch_time = TimeMeter()
+    loss_meter = AverageMeter()
+    acc1_meter = AverageMeter()
+    acc5_meter = AverageMeter()
+
+    end = time.time()
+    for idx, (images, target) in enumerate(data_loader):
+        images = images.cuda()
+        target = target.cuda()
+
+        # compute output
+        output = model.forward(images)
+        if type(output) == tuple:
+            output = output[0]
+
+
+        # measure accuracy and record loss
+        loss = criterion(output, target)
+        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+
+        acc1 = reduce_tensor(acc1)
+        acc5 = reduce_tensor(acc5)
+        loss = reduce_tensor(loss)
+
+        batch_time.record(target.size(0) * flow.env.get_world_size())
+        loss_meter.record(loss, target.size(0))
+        acc1_meter.record(acc1, target.size(0))
+        acc5_meter.record(acc5, target.size(0))
+
+        # measure elapsed time
+        end = time.time()
+
+        if idx % config.PRINT_FREQ == 0:
+            acc1, acc1_avg = acc1_meter.get()
+            acc5, acc5_avg = acc5_meter.get()
+            loss, loss_avg = loss_meter.get()
+            throughput, throughput_avg = batch_time.get()
+            batch_time.reset()
+
+            logger.info(
+                f"Test: [{idx}/{len(data_loader)}]\t"
+                f"Throughput {throughput:.3f} ({throughput_avg:.3f})\t"
+                f"Loss {loss:.4f} ({loss_avg:.4f})\t"
+                f"Acc@1 {acc1:.3f} ({acc1_avg:.3f})\t"
+                f"Acc@5 {acc5:.3f} ({acc5_avg:.3f})\t"
+            )
+
+    logger.info(f" * Acc@1 {acc1_avg:.3f} Acc@5 {acc5_avg:.3f}")
+    return acc1_avg, acc5_avg, loss_meter
+
+
+@flow.no_grad()
+def throughput(data_loader, model, logger):
+    model.eval()
+
+    for idx, (images, _) in enumerate(data_loader):
+        images = images.cuda()
+        batch_size = images.shape[0]
+        for i in range(50):
+            model.forward(images)
+        flow.cuda.synchronize()
+        # TODO: add flow.cuda.synchronize()
+        logger.info(f"throughput averaged with 30 times")
+        tic1 = time.time()
+        for i in range(30):
+            model.forward(images)
+
+        flow.cuda.synchronize()
+        tic2 = time.time()
+        logger.info(
+            f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"
+        )
+        return
+
+
+if __name__ == "__main__":
+    _, config = parse_option()
+
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        rank = flow.env.get_rank()
+        world_size = flow.env.get_world_size()
+        print(f"RANK and WORLD_SIZE in environ: {rank}/{world_size}")
+    else:
+        rank = -1
+        world_size = -1
+
+    seed = config.SEED + flow.env.get_rank()
+    flow.manual_seed(seed)
+    np.random.seed(seed)
+    cudnn.benchmark = True
+
+    # linear_scaled_lr = (
+    #     config.TRAIN.BASE_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    # linear_scaled_warmup_lr = (
+    #     config.TRAIN.WARMUP_LR
+    #     * config.DATA.BATCH_SIZE
+    #     * flow.env.get_world_size()
+    #     / 512.0
+    # )
+    linear_scaled_lr = config.TRAIN.BASE_LR
+    linear_scaled_warmup_lr = config.TRAIN.WARMUP_LR
+
+    linear_scaled_min_lr = (
+        config.TRAIN.MIN_LR * config.DATA.BATCH_SIZE * flow.env.get_world_size() / 512.0
+    )
+
+    # gradient accumulation also need to scale the learning rate
+    if config.TRAIN.ACCUMULATION_STEPS > 1:
+        linear_scaled_lr = linear_scaled_lr * config.TRAIN.ACCUMULATION_STEPS
+        linear_scaled_warmup_lr = (
+            linear_scaled_warmup_lr * config.TRAIN.ACCUMULATION_STEPS
+        )
+        linear_scaled_min_lr = linear_scaled_min_lr * config.TRAIN.ACCUMULATION_STEPS
+    config.defrost()
+    config.TRAIN.BASE_LR = linear_scaled_lr
+    config.TRAIN.WARMUP_LR = linear_scaled_warmup_lr
+    config.TRAIN.MIN_LR = linear_scaled_min_lr
+    config.freeze()
+
+    os.makedirs(config.OUTPUT, exist_ok=True)
+    logger = create_logger(
+        output_dir=config.OUTPUT,
+        dist_rank=flow.env.get_rank(),
+        name=f"{config.MODEL.ARCH}",
+    )
+
+    if flow.env.get_rank() == 0:
+        path = os.path.join(config.OUTPUT, "config.json")
+        with open(path, "w") as f:
+            f.write(config.dump())
+        logger.info(f"Full config saved to {path}")
+
+    # print config
+    logger.info(config.dump())
+
+    main(config)
diff --git a/cv/classification/levit_128/optimizer.py b/cv/classification/levit_128/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/levit_128/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/levit_128/requirements.txt b/cv/classification/levit_128/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/levit_128/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/levit_128/train.sh b/cv/classification/levit_128/train.sh
new file mode 100755
index 00000000..248456ce
--- /dev/null
+++ b/cv/classification/levit_128/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="levit_128"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/levit_128/utils.py b/cv/classification/levit_128/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/levit_128/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/main.py b/cv/classification/main.py
index aed5be02..b780d7e9 100755
--- a/cv/classification/main.py
+++ b/cv/classification/main.py
@@ -144,7 +144,7 @@ def main(config):
     model.cuda()
 
     optimizer = build_optimizer(config, model)
-    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False)
+    model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False, use_bucket=False)
     # FIXME: model with DDP wrapper doesn't have model.module
     model_without_ddp = model
 
@@ -453,4 +453,4 @@ def throughput(data_loader, model, logger):
     # print config
     logger.info(config.dump())
 
-    main(config)
\ No newline at end of file
+    main(config)
diff --git a/cv/classification/mlp_mixer_l16_224/README.md b/cv/classification/mlp_mixer_l16_224/README.md
new file mode 100644
index 00000000..b6c2caba
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/README.md
@@ -0,0 +1,66 @@
+## mlp_mixer_l16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/mlp_mixer_l16_224/config.py b/cv/classification/mlp_mixer_l16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/configs b/cv/classification/mlp_mixer_l16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/data b/cv/classification/mlp_mixer_l16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/infer.sh b/cv/classification/mlp_mixer_l16_224/infer.sh
new file mode 100755
index 00000000..ea4b2283
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="mlp_mixer_l16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/mlp_mixer_l16_224/logger.py b/cv/classification/mlp_mixer_l16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/lr_scheduler.py b/cv/classification/mlp_mixer_l16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/main.py b/cv/classification/mlp_mixer_l16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/optimizer.py b/cv/classification/mlp_mixer_l16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/requirements.txt b/cv/classification/mlp_mixer_l16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/mlp_mixer_l16_224/train.sh b/cv/classification/mlp_mixer_l16_224/train.sh
new file mode 100755
index 00000000..a41474a3
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/train.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="mlp_mixer_l16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --lr 0.001
+
diff --git a/cv/classification/mlp_mixer_l16_224/utils.py b/cv/classification/mlp_mixer_l16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/mlp_mixer_l16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/mnasnetx1_0/README.md b/cv/classification/mnasnetx1_0/README.md
index 82b8f302..c4dd9019 100644
--- a/cv/classification/mnasnetx1_0/README.md
+++ b/cv/classification/mnasnetx1_0/README.md
@@ -90,14 +90,20 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
   ```
 
 
-
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
 
-bash train.sh
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
diff --git a/cv/classification/poolformer_m36/README.md b/cv/classification/poolformer_m36/README.md
new file mode 100644
index 00000000..cfbee165
--- /dev/null
+++ b/cv/classification/poolformer_m36/README.md
@@ -0,0 +1,65 @@
+## Poolformer-M36
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/poolformer_m36/config.py b/cv/classification/poolformer_m36/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/poolformer_m36/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/configs b/cv/classification/poolformer_m36/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/poolformer_m36/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/data b/cv/classification/poolformer_m36/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/poolformer_m36/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/infer.sh b/cv/classification/poolformer_m36/infer.sh
new file mode 100755
index 00000000..0bfd4869
--- /dev/null
+++ b/cv/classification/poolformer_m36/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="poolformer_m36"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/poolformer_m36/logger.py b/cv/classification/poolformer_m36/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/poolformer_m36/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/lr_scheduler.py b/cv/classification/poolformer_m36/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/poolformer_m36/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/main.py b/cv/classification/poolformer_m36/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/poolformer_m36/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/optimizer.py b/cv/classification/poolformer_m36/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/poolformer_m36/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/requirements.txt b/cv/classification/poolformer_m36/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/poolformer_m36/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/poolformer_m36/train.sh b/cv/classification/poolformer_m36/train.sh
new file mode 100755
index 00000000..8eb49f2d
--- /dev/null
+++ b/cv/classification/poolformer_m36/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="poolformer_m36"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/poolformer_m36/utils.py b/cv/classification/poolformer_m36/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/poolformer_m36/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/README.md b/cv/classification/poolformer_m48/README.md
new file mode 100644
index 00000000..e8591bab
--- /dev/null
+++ b/cv/classification/poolformer_m48/README.md
@@ -0,0 +1,65 @@
+## Poolformer-M48
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/poolformer_m48/config.py b/cv/classification/poolformer_m48/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/poolformer_m48/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/configs b/cv/classification/poolformer_m48/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/poolformer_m48/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/data b/cv/classification/poolformer_m48/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/poolformer_m48/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/infer.sh b/cv/classification/poolformer_m48/infer.sh
new file mode 100755
index 00000000..2f4c9f51
--- /dev/null
+++ b/cv/classification/poolformer_m48/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="poolformer_m48"
+BATCH_SIZE=8
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/poolformer_m48/logger.py b/cv/classification/poolformer_m48/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/poolformer_m48/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/lr_scheduler.py b/cv/classification/poolformer_m48/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/poolformer_m48/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/main.py b/cv/classification/poolformer_m48/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/poolformer_m48/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/optimizer.py b/cv/classification/poolformer_m48/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/poolformer_m48/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/requirements.txt b/cv/classification/poolformer_m48/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/poolformer_m48/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/poolformer_m48/train.sh b/cv/classification/poolformer_m48/train.sh
new file mode 100755
index 00000000..07be5287
--- /dev/null
+++ b/cv/classification/poolformer_m48/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="poolformer_m48"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/poolformer_m48/utils.py b/cv/classification/poolformer_m48/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/poolformer_m48/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/README.md b/cv/classification/poolformer_s12/README.md
new file mode 100644
index 00000000..89f938de
--- /dev/null
+++ b/cv/classification/poolformer_s12/README.md
@@ -0,0 +1,65 @@
+## Poolformer
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/poolformer_s12/config.py b/cv/classification/poolformer_s12/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/poolformer_s12/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/configs b/cv/classification/poolformer_s12/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/poolformer_s12/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/data b/cv/classification/poolformer_s12/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/poolformer_s12/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/infer.sh b/cv/classification/poolformer_s12/infer.sh
new file mode 100755
index 00000000..9020bfd1
--- /dev/null
+++ b/cv/classification/poolformer_s12/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="poolformer_s12"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/poolformer_s12/logger.py b/cv/classification/poolformer_s12/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/poolformer_s12/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/lr_scheduler.py b/cv/classification/poolformer_s12/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/poolformer_s12/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/main.py b/cv/classification/poolformer_s12/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/poolformer_s12/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/optimizer.py b/cv/classification/poolformer_s12/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/poolformer_s12/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/requirements.txt b/cv/classification/poolformer_s12/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/poolformer_s12/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/poolformer_s12/train.sh b/cv/classification/poolformer_s12/train.sh
new file mode 100755
index 00000000..b1207543
--- /dev/null
+++ b/cv/classification/poolformer_s12/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="poolformer_s12"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/poolformer_s12/utils.py b/cv/classification/poolformer_s12/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/poolformer_s12/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/README.md b/cv/classification/poolformer_s24/README.md
new file mode 100644
index 00000000..1e87679d
--- /dev/null
+++ b/cv/classification/poolformer_s24/README.md
@@ -0,0 +1,65 @@
+## Poolformer-S24
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/poolformer_s24/config.py b/cv/classification/poolformer_s24/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/poolformer_s24/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/configs b/cv/classification/poolformer_s24/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/poolformer_s24/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/data b/cv/classification/poolformer_s24/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/poolformer_s24/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/infer.sh b/cv/classification/poolformer_s24/infer.sh
new file mode 100755
index 00000000..37c26043
--- /dev/null
+++ b/cv/classification/poolformer_s24/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="poolformer_s24"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/poolformer_s24/logger.py b/cv/classification/poolformer_s24/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/poolformer_s24/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/lr_scheduler.py b/cv/classification/poolformer_s24/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/poolformer_s24/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/main.py b/cv/classification/poolformer_s24/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/poolformer_s24/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/optimizer.py b/cv/classification/poolformer_s24/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/poolformer_s24/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/requirements.txt b/cv/classification/poolformer_s24/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/poolformer_s24/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/poolformer_s24/train.sh b/cv/classification/poolformer_s24/train.sh
new file mode 100755
index 00000000..dadf3dcd
--- /dev/null
+++ b/cv/classification/poolformer_s24/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="poolformer_s24"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/poolformer_s24/utils.py b/cv/classification/poolformer_s24/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/poolformer_s24/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/README.md b/cv/classification/poolformer_s36/README.md
new file mode 100644
index 00000000..eca5c079
--- /dev/null
+++ b/cv/classification/poolformer_s36/README.md
@@ -0,0 +1,65 @@
+## Poolformer-S36
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/.
+
+### Code Structure
+
+
+  ```
+    .
+    ├── configs -> ../configs
+    │   ├── ...
+    │   └── default_settings.yaml
+    ├── data -> ../data
+    │   ├── __init__.py
+    │   ├── build.py
+    │   ├── cached_image_folder.py
+    │   ├── samplers.py
+    │   └── zipreader.py
+    ├── utils.py -> ../utils.py
+    ├── config.py -> ../config.py
+    ├── logger.py -> ../logger.py
+    ├── lr_scheduler.py -> ../lr_scheduler.py
+    ├── optimizer.py -> ../optimizer.py
+    ├── main.py
+    ├── train.sh
+    └── infer.sh
+  ```
+
+
+
+### Training
+You can use bash script `train.sh` to train this model.
+```````
+sh train.sh
+```````
+
+### Inference
+
+Bash script `infer.sh` is used to infer the trained model.
+```````
+sh infer.sh
+```````
diff --git a/cv/classification/poolformer_s36/config.py b/cv/classification/poolformer_s36/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/poolformer_s36/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/configs b/cv/classification/poolformer_s36/configs
new file mode 120000
index 00000000..bd0ab477
--- /dev/null
+++ b/cv/classification/poolformer_s36/configs
@@ -0,0 +1 @@
+../configs/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/data b/cv/classification/poolformer_s36/data
new file mode 120000
index 00000000..eed2d0bc
--- /dev/null
+++ b/cv/classification/poolformer_s36/data
@@ -0,0 +1 @@
+../data/
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/infer.sh b/cv/classification/poolformer_s36/infer.sh
new file mode 100755
index 00000000..ddedfece
--- /dev/null
+++ b/cv/classification/poolformer_s36/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="poolformer_s36"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
diff --git a/cv/classification/poolformer_s36/logger.py b/cv/classification/poolformer_s36/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/poolformer_s36/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/lr_scheduler.py b/cv/classification/poolformer_s36/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/poolformer_s36/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/main.py b/cv/classification/poolformer_s36/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/poolformer_s36/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/optimizer.py b/cv/classification/poolformer_s36/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/poolformer_s36/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/requirements.txt b/cv/classification/poolformer_s36/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/poolformer_s36/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/poolformer_s36/train.sh b/cv/classification/poolformer_s36/train.sh
new file mode 100755
index 00000000..cb59d79a
--- /dev/null
+++ b/cv/classification/poolformer_s36/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="poolformer_s36"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE  
+
diff --git a/cv/classification/poolformer_s36/utils.py b/cv/classification/poolformer_s36/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/poolformer_s36/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/README.md b/cv/classification/pvt_large/README.md
new file mode 100644
index 00000000..41546fcc
--- /dev/null
+++ b/cv/classification/pvt_large/README.md
@@ -0,0 +1,66 @@
+## PVT_large
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/pvt_large/config.py b/cv/classification/pvt_large/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/pvt_large/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/configs b/cv/classification/pvt_large/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/pvt_large/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/pvt_large/data b/cv/classification/pvt_large/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/pvt_large/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/pvt_large/infer.sh b/cv/classification/pvt_large/infer.sh
new file mode 100755
index 00000000..0cd8b86a
--- /dev/null
+++ b/cv/classification/pvt_large/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="pvt_large"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/pvt_large/logger.py b/cv/classification/pvt_large/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/pvt_large/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/lr_scheduler.py b/cv/classification/pvt_large/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/pvt_large/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/main.py b/cv/classification/pvt_large/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/pvt_large/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/optimizer.py b/cv/classification/pvt_large/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/pvt_large/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/pvt_large/requirements.txt b/cv/classification/pvt_large/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/pvt_large/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/pvt_large/train.sh b/cv/classification/pvt_large/train.sh
new file mode 100755
index 00000000..f239f94a
--- /dev/null
+++ b/cv/classification/pvt_large/train.sh
@@ -0,0 +1,20 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="pvt_large"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
+
diff --git a/cv/classification/pvt_large/utils.py b/cv/classification/pvt_large/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/pvt_large/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/README.md b/cv/classification/pvt_medium/README.md
new file mode 100644
index 00000000..95f2011b
--- /dev/null
+++ b/cv/classification/pvt_medium/README.md
@@ -0,0 +1,66 @@
+## PVT_medium
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/pvt_medium/config.py b/cv/classification/pvt_medium/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/pvt_medium/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/configs b/cv/classification/pvt_medium/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/pvt_medium/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/data b/cv/classification/pvt_medium/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/pvt_medium/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/infer.sh b/cv/classification/pvt_medium/infer.sh
new file mode 100755
index 00000000..09dc917e
--- /dev/null
+++ b/cv/classification/pvt_medium/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="pvt_medium"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/pvt_medium/logger.py b/cv/classification/pvt_medium/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/pvt_medium/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/lr_scheduler.py b/cv/classification/pvt_medium/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/pvt_medium/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/main.py b/cv/classification/pvt_medium/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/pvt_medium/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/optimizer.py b/cv/classification/pvt_medium/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/pvt_medium/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/requirements.txt b/cv/classification/pvt_medium/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/pvt_medium/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/pvt_medium/train.sh b/cv/classification/pvt_medium/train.sh
new file mode 100755
index 00000000..e3feb936
--- /dev/null
+++ b/cv/classification/pvt_medium/train.sh
@@ -0,0 +1,20 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="pvt_medium"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
+
diff --git a/cv/classification/pvt_medium/utils.py b/cv/classification/pvt_medium/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/pvt_medium/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/README.md b/cv/classification/pvt_small/README.md
new file mode 100644
index 00000000..6366d61e
--- /dev/null
+++ b/cv/classification/pvt_small/README.md
@@ -0,0 +1,66 @@
+## PVT_small
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/pvt_small/config.py b/cv/classification/pvt_small/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/pvt_small/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/configs b/cv/classification/pvt_small/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/pvt_small/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/pvt_small/data b/cv/classification/pvt_small/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/pvt_small/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/pvt_small/infer.sh b/cv/classification/pvt_small/infer.sh
new file mode 100755
index 00000000..0c04930d
--- /dev/null
+++ b/cv/classification/pvt_small/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="pvt_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/pvt_small/logger.py b/cv/classification/pvt_small/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/pvt_small/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/lr_scheduler.py b/cv/classification/pvt_small/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/pvt_small/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/main.py b/cv/classification/pvt_small/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/pvt_small/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/optimizer.py b/cv/classification/pvt_small/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/pvt_small/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/pvt_small/requirements.txt b/cv/classification/pvt_small/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/pvt_small/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/pvt_small/train.sh b/cv/classification/pvt_small/train.sh
new file mode 100755
index 00000000..9f0aac00
--- /dev/null
+++ b/cv/classification/pvt_small/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="pvt_small"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/pvt_small/utils.py b/cv/classification/pvt_small/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/pvt_small/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/README.md b/cv/classification/pvt_tiny/README.md
new file mode 100644
index 00000000..20eea371
--- /dev/null
+++ b/cv/classification/pvt_tiny/README.md
@@ -0,0 +1,66 @@
+## PVT_tiny
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/pvt_tiny/config.py b/cv/classification/pvt_tiny/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/pvt_tiny/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/configs b/cv/classification/pvt_tiny/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/pvt_tiny/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/data b/cv/classification/pvt_tiny/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/pvt_tiny/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/infer.sh b/cv/classification/pvt_tiny/infer.sh
new file mode 100755
index 00000000..d887fc10
--- /dev/null
+++ b/cv/classification/pvt_tiny/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="pvt_tiny"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/pvt_tiny/logger.py b/cv/classification/pvt_tiny/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/pvt_tiny/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/lr_scheduler.py b/cv/classification/pvt_tiny/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/pvt_tiny/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/main.py b/cv/classification/pvt_tiny/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/pvt_tiny/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/optimizer.py b/cv/classification/pvt_tiny/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/pvt_tiny/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/requirements.txt b/cv/classification/pvt_tiny/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/pvt_tiny/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/pvt_tiny/train.sh b/cv/classification/pvt_tiny/train.sh
new file mode 100755
index 00000000..fb0aa047
--- /dev/null
+++ b/cv/classification/pvt_tiny/train.sh
@@ -0,0 +1,20 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="pvt_tiny"
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/default_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
+
diff --git a/cv/classification/pvt_tiny/utils.py b/cv/classification/pvt_tiny/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/pvt_tiny/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/README.md b/cv/classification/regionvit_base_224/README.md
new file mode 100644
index 00000000..21ce07db
--- /dev/null
+++ b/cv/classification/regionvit_base_224/README.md
@@ -0,0 +1,66 @@
+## regionvit_base_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regionvit_base_224/config.py b/cv/classification/regionvit_base_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regionvit_base_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/configs b/cv/classification/regionvit_base_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regionvit_base_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/data b/cv/classification/regionvit_base_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regionvit_base_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/infer.sh b/cv/classification/regionvit_base_224/infer.sh
new file mode 100755
index 00000000..cf0fce22
--- /dev/null
+++ b/cv/classification/regionvit_base_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regionvit_base_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regionvit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regionvit_base_224/logger.py b/cv/classification/regionvit_base_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regionvit_base_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/lr_scheduler.py b/cv/classification/regionvit_base_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regionvit_base_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/main.py b/cv/classification/regionvit_base_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regionvit_base_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/optimizer.py b/cv/classification/regionvit_base_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regionvit_base_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/requirements.txt b/cv/classification/regionvit_base_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regionvit_base_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regionvit_base_224/train.sh b/cv/classification/regionvit_base_224/train.sh
new file mode 100755
index 00000000..e3b2f578
--- /dev/null
+++ b/cv/classification/regionvit_base_224/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regionvit_base_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regionvit_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regionvit_base_224/utils.py b/cv/classification/regionvit_base_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regionvit_base_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/README.md b/cv/classification/regnet_x_16gf/README.md
new file mode 100644
index 00000000..e011ed86
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_x_16gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_16gf/config.py b/cv/classification/regnet_x_16gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/configs b/cv/classification/regnet_x_16gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/data b/cv/classification/regnet_x_16gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/infer.sh b/cv/classification/regnet_x_16gf/infer.sh
new file mode 100755
index 00000000..931bdd62
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_16gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_16gf/logger.py b/cv/classification/regnet_x_16gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/lr_scheduler.py b/cv/classification/regnet_x_16gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/main.py b/cv/classification/regnet_x_16gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/optimizer.py b/cv/classification/regnet_x_16gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/requirements.txt b/cv/classification/regnet_x_16gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_16gf/train.sh b/cv/classification/regnet_x_16gf/train.sh
new file mode 100755
index 00000000..ce60355e
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_16gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_16gf/utils.py b/cv/classification/regnet_x_16gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_16gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/README.md b/cv/classification/regnet_x_1_6gf/README.md
new file mode 100644
index 00000000..83454d1f
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_x_1_6gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_1_6gf/config.py b/cv/classification/regnet_x_1_6gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/configs b/cv/classification/regnet_x_1_6gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/data b/cv/classification/regnet_x_1_6gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/infer.sh b/cv/classification/regnet_x_1_6gf/infer.sh
new file mode 100755
index 00000000..66083a85
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_1_6gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_1_6gf/logger.py b/cv/classification/regnet_x_1_6gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/lr_scheduler.py b/cv/classification/regnet_x_1_6gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/main.py b/cv/classification/regnet_x_1_6gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/optimizer.py b/cv/classification/regnet_x_1_6gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/requirements.txt b/cv/classification/regnet_x_1_6gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_1_6gf/train.sh b/cv/classification/regnet_x_1_6gf/train.sh
new file mode 100755
index 00000000..30c0ba27
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_x_1_6gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_1_6gf/utils.py b/cv/classification/regnet_x_1_6gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_1_6gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/README.md b/cv/classification/regnet_x_32gf/README.md
new file mode 100644
index 00000000..7ec01988
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/README.md
@@ -0,0 +1,66 @@
+## regnet_x_32gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_32gf/config.py b/cv/classification/regnet_x_32gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/configs b/cv/classification/regnet_x_32gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/data b/cv/classification/regnet_x_32gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/infer.sh b/cv/classification/regnet_x_32gf/infer.sh
new file mode 100755
index 00000000..e94ac5ae
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_32gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_32gf/logger.py b/cv/classification/regnet_x_32gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/lr_scheduler.py b/cv/classification/regnet_x_32gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/main.py b/cv/classification/regnet_x_32gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/optimizer.py b/cv/classification/regnet_x_32gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/requirements.txt b/cv/classification/regnet_x_32gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_32gf/train.sh b/cv/classification/regnet_x_32gf/train.sh
new file mode 100755
index 00000000..8acc042c
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_32gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_32gf/utils.py b/cv/classification/regnet_x_32gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_32gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/README.md b/cv/classification/regnet_x_3_2gf/README.md
new file mode 100644
index 00000000..106b9f6e
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_x_3_2gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_3_2gf/config.py b/cv/classification/regnet_x_3_2gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/configs b/cv/classification/regnet_x_3_2gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/data b/cv/classification/regnet_x_3_2gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/infer.sh b/cv/classification/regnet_x_3_2gf/infer.sh
new file mode 100755
index 00000000..f03645fc
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_3_2gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_3_2gf/logger.py b/cv/classification/regnet_x_3_2gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/lr_scheduler.py b/cv/classification/regnet_x_3_2gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/main.py b/cv/classification/regnet_x_3_2gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/optimizer.py b/cv/classification/regnet_x_3_2gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/requirements.txt b/cv/classification/regnet_x_3_2gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_3_2gf/train.sh b/cv/classification/regnet_x_3_2gf/train.sh
new file mode 100755
index 00000000..76bb8bcf
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_x_3_2gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_3_2gf/utils.py b/cv/classification/regnet_x_3_2gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_3_2gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/README.md b/cv/classification/regnet_x_800mf/README.md
new file mode 100644
index 00000000..e91e97e3
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/README.md
@@ -0,0 +1,67 @@
+## RegNet
+RegNet is a neural network architecture designed for deep learning tasks, particularly in the field of computer vision. It was introduced in the paper titled "Designing Network Design Spaces" by Touvron et al. in 2020. RegNet is known for its efficiency, scalability, and strong performance on various computer vision tasks, making it a popular choice for researchers and practitioners in the field.
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_800mf/config.py b/cv/classification/regnet_x_800mf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/configs b/cv/classification/regnet_x_800mf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/data b/cv/classification/regnet_x_800mf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/infer.sh b/cv/classification/regnet_x_800mf/infer.sh
new file mode 100755
index 00000000..8d2080b9
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_800mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_800mf/logger.py b/cv/classification/regnet_x_800mf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/lr_scheduler.py b/cv/classification/regnet_x_800mf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/main.py b/cv/classification/regnet_x_800mf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/optimizer.py b/cv/classification/regnet_x_800mf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/requirements.txt b/cv/classification/regnet_x_800mf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_800mf/train.sh b/cv/classification/regnet_x_800mf/train.sh
new file mode 100755
index 00000000..faaf46e3
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_x_800mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_800mf/utils.py b/cv/classification/regnet_x_800mf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_800mf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/README.md b/cv/classification/regnet_x_8gf/README.md
new file mode 100644
index 00000000..0c671b98
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/README.md
@@ -0,0 +1,66 @@
+## regnet_x_8gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_x_8gf/config.py b/cv/classification/regnet_x_8gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/configs b/cv/classification/regnet_x_8gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/data b/cv/classification/regnet_x_8gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/infer.sh b/cv/classification/regnet_x_8gf/infer.sh
new file mode 100755
index 00000000..44357a2b
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_8gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_x_8gf/logger.py b/cv/classification/regnet_x_8gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/lr_scheduler.py b/cv/classification/regnet_x_8gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/main.py b/cv/classification/regnet_x_8gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/optimizer.py b/cv/classification/regnet_x_8gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/requirements.txt b/cv/classification/regnet_x_8gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_x_8gf/train.sh b/cv/classification/regnet_x_8gf/train.sh
new file mode 100755
index 00000000..d4f7d387
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_x_8gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_x_8gf/utils.py b/cv/classification/regnet_x_8gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_x_8gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/README.md b/cv/classification/regnet_y_16gf/README.md
new file mode 100644
index 00000000..e47bfff8
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_y_16gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_16gf/config.py b/cv/classification/regnet_y_16gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/configs b/cv/classification/regnet_y_16gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/data b/cv/classification/regnet_y_16gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/infer.sh b/cv/classification/regnet_y_16gf/infer.sh
new file mode 100755
index 00000000..92d163fb
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_16gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_16gf/logger.py b/cv/classification/regnet_y_16gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/lr_scheduler.py b/cv/classification/regnet_y_16gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/main.py b/cv/classification/regnet_y_16gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/optimizer.py b/cv/classification/regnet_y_16gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/requirements.txt b/cv/classification/regnet_y_16gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_16gf/train.sh b/cv/classification/regnet_y_16gf/train.sh
new file mode 100755
index 00000000..2bdd2c43
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_16gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_16gf/utils.py b/cv/classification/regnet_y_16gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_16gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/README.md b/cv/classification/regnet_y_1_6gf/README.md
new file mode 100644
index 00000000..b2eb634e
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_y_1_6gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_1_6gf/config.py b/cv/classification/regnet_y_1_6gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/configs b/cv/classification/regnet_y_1_6gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/data b/cv/classification/regnet_y_1_6gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/infer.sh b/cv/classification/regnet_y_1_6gf/infer.sh
new file mode 100755
index 00000000..1c91d260
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_1_6gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_1_6gf/logger.py b/cv/classification/regnet_y_1_6gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/lr_scheduler.py b/cv/classification/regnet_y_1_6gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/main.py b/cv/classification/regnet_y_1_6gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/optimizer.py b/cv/classification/regnet_y_1_6gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/requirements.txt b/cv/classification/regnet_y_1_6gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_1_6gf/train.sh b/cv/classification/regnet_y_1_6gf/train.sh
new file mode 100755
index 00000000..bcbcf77c
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_y_1_6gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_1_6gf/utils.py b/cv/classification/regnet_y_1_6gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_1_6gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/README.md b/cv/classification/regnet_y_32gf/README.md
new file mode 100644
index 00000000..92f81e37
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_y_32gf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_32gf/config.py b/cv/classification/regnet_y_32gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/configs b/cv/classification/regnet_y_32gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/data b/cv/classification/regnet_y_32gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/infer.sh b/cv/classification/regnet_y_32gf/infer.sh
new file mode 100755
index 00000000..2a6721f6
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_32gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_32gf/logger.py b/cv/classification/regnet_y_32gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/lr_scheduler.py b/cv/classification/regnet_y_32gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/main.py b/cv/classification/regnet_y_32gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/optimizer.py b/cv/classification/regnet_y_32gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/requirements.txt b/cv/classification/regnet_y_32gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_32gf/train.sh b/cv/classification/regnet_y_32gf/train.sh
new file mode 100755
index 00000000..937c13b8
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_32gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_32gf/utils.py b/cv/classification/regnet_y_32gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_32gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/README.md b/cv/classification/regnet_y_3_2gf/README.md
new file mode 100644
index 00000000..4a9a1af1
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_x_400mf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_3_2gf/config.py b/cv/classification/regnet_y_3_2gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/configs b/cv/classification/regnet_y_3_2gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/data b/cv/classification/regnet_y_3_2gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/infer.sh b/cv/classification/regnet_y_3_2gf/infer.sh
new file mode 100755
index 00000000..86174d81
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_3_2gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_3_2gf/logger.py b/cv/classification/regnet_y_3_2gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/lr_scheduler.py b/cv/classification/regnet_y_3_2gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/main.py b/cv/classification/regnet_y_3_2gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/optimizer.py b/cv/classification/regnet_y_3_2gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/requirements.txt b/cv/classification/regnet_y_3_2gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_3_2gf/train.sh b/cv/classification/regnet_y_3_2gf/train.sh
new file mode 100755
index 00000000..ce7bc7e4
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_y_3_2gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_3_2gf/utils.py b/cv/classification/regnet_y_3_2gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_3_2gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/README.md b/cv/classification/regnet_y_400mf/README.md
new file mode 100644
index 00000000..da0c8b87
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/README.md
@@ -0,0 +1,66 @@
+## RegNet
+RegNet is a neural network architecture designed for deep learning tasks, particularly in the field of computer vision. It was introduced in the paper titled "Designing Network Design Spaces" by Touvron et al. in 2020. RegNet is known for its efficiency, scalability, and strong performance on various computer vision tasks, making it a popular choice for researchers and practitioners in the field.
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_400mf/config.py b/cv/classification/regnet_y_400mf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/configs b/cv/classification/regnet_y_400mf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/data b/cv/classification/regnet_y_400mf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/infer.sh b/cv/classification/regnet_y_400mf/infer.sh
new file mode 100755
index 00000000..a55c6e23
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+CUDA_VISIBLE_DEVICES=5
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_400mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_400mf/logger.py b/cv/classification/regnet_y_400mf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/lr_scheduler.py b/cv/classification/regnet_y_400mf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/main.py b/cv/classification/regnet_y_400mf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/optimizer.py b/cv/classification/regnet_y_400mf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/requirements.txt b/cv/classification/regnet_y_400mf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_400mf/train.sh b/cv/classification/regnet_y_400mf/train.sh
new file mode 100755
index 00000000..5293df1f
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_y_400mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_400mf/utils.py b/cv/classification/regnet_y_400mf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_400mf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/README.md b/cv/classification/regnet_y_800mf/README.md
new file mode 100644
index 00000000..e91e97e3
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/README.md
@@ -0,0 +1,67 @@
+## RegNet
+RegNet is a neural network architecture designed for deep learning tasks, particularly in the field of computer vision. It was introduced in the paper titled "Designing Network Design Spaces" by Touvron et al. in 2020. RegNet is known for its efficiency, scalability, and strong performance on various computer vision tasks, making it a popular choice for researchers and practitioners in the field.
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_800mf/config.py b/cv/classification/regnet_y_800mf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/configs b/cv/classification/regnet_y_800mf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/data b/cv/classification/regnet_y_800mf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/infer.sh b/cv/classification/regnet_y_800mf/infer.sh
new file mode 100755
index 00000000..cd39d8e9
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_800mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_800mf/logger.py b/cv/classification/regnet_y_800mf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/lr_scheduler.py b/cv/classification/regnet_y_800mf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/main.py b/cv/classification/regnet_y_800mf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/optimizer.py b/cv/classification/regnet_y_800mf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/requirements.txt b/cv/classification/regnet_y_800mf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_800mf/train.sh b/cv/classification/regnet_y_800mf/train.sh
new file mode 100755
index 00000000..ef08a318
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="regnet_y_800mf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_800mf/utils.py b/cv/classification/regnet_y_800mf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_800mf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/README.md b/cv/classification/regnet_y_8gf/README.md
new file mode 100644
index 00000000..4a9a1af1
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/README.md
@@ -0,0 +1,66 @@
+## RegNet_x_400mf
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/regnet_y_8gf/config.py b/cv/classification/regnet_y_8gf/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/configs b/cv/classification/regnet_y_8gf/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/data b/cv/classification/regnet_y_8gf/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/infer.sh b/cv/classification/regnet_y_8gf/infer.sh
new file mode 100755
index 00000000..defff6bd
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/infer.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_8gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/regnet_y_8gf/logger.py b/cv/classification/regnet_y_8gf/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/lr_scheduler.py b/cv/classification/regnet_y_8gf/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/main.py b/cv/classification/regnet_y_8gf/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/optimizer.py b/cv/classification/regnet_y_8gf/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/requirements.txt b/cv/classification/regnet_y_8gf/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/regnet_y_8gf/train.sh b/cv/classification/regnet_y_8gf/train.sh
new file mode 100755
index 00000000..17395412
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="regnet_y_8gf"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/regnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/regnet_y_8gf/utils.py b/cv/classification/regnet_y_8gf/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/regnet_y_8gf/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/README.md b/cv/classification/rexnet_lite_1_0/README.md
new file mode 100644
index 00000000..ceb96578
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/README.md
@@ -0,0 +1,66 @@
+## rexnet_lite_1_0
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnet_lite_1_0/config.py b/cv/classification/rexnet_lite_1_0/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/configs b/cv/classification/rexnet_lite_1_0/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/data b/cv/classification/rexnet_lite_1_0/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/infer.sh b/cv/classification/rexnet_lite_1_0/infer.sh
new file mode 100755
index 00000000..fce00db4
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnet_lite_1_0/logger.py b/cv/classification/rexnet_lite_1_0/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/lr_scheduler.py b/cv/classification/rexnet_lite_1_0/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/main.py b/cv/classification/rexnet_lite_1_0/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/optimizer.py b/cv/classification/rexnet_lite_1_0/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/requirements.txt b/cv/classification/rexnet_lite_1_0/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_0/train.sh b/cv/classification/rexnet_lite_1_0/train.sh
new file mode 100755
index 00000000..f3f551d1
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnet_lite_1_0/utils.py b/cv/classification/rexnet_lite_1_0/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_0/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/README.md b/cv/classification/rexnet_lite_1_3/README.md
new file mode 100644
index 00000000..44fdb5fc
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/README.md
@@ -0,0 +1,66 @@
+## rexnet_lite_1_3
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnet_lite_1_3/config.py b/cv/classification/rexnet_lite_1_3/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/configs b/cv/classification/rexnet_lite_1_3/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/data b/cv/classification/rexnet_lite_1_3/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/infer.sh b/cv/classification/rexnet_lite_1_3/infer.sh
new file mode 100755
index 00000000..1788a2bc
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnet_lite_1_3/logger.py b/cv/classification/rexnet_lite_1_3/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/lr_scheduler.py b/cv/classification/rexnet_lite_1_3/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/main.py b/cv/classification/rexnet_lite_1_3/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/optimizer.py b/cv/classification/rexnet_lite_1_3/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/requirements.txt b/cv/classification/rexnet_lite_1_3/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_3/train.sh b/cv/classification/rexnet_lite_1_3/train.sh
new file mode 100755
index 00000000..aa3563f4
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnet_lite_1_3/utils.py b/cv/classification/rexnet_lite_1_3/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_3/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/README.md b/cv/classification/rexnet_lite_1_5/README.md
new file mode 100644
index 00000000..2c6ea58f
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/README.md
@@ -0,0 +1,66 @@
+## rexnet_lite_1_5
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnet_lite_1_5/config.py b/cv/classification/rexnet_lite_1_5/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/configs b/cv/classification/rexnet_lite_1_5/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/data b/cv/classification/rexnet_lite_1_5/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/infer.sh b/cv/classification/rexnet_lite_1_5/infer.sh
new file mode 100755
index 00000000..5716ba45
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_5"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnet_lite_1_5/logger.py b/cv/classification/rexnet_lite_1_5/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/lr_scheduler.py b/cv/classification/rexnet_lite_1_5/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/main.py b/cv/classification/rexnet_lite_1_5/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/optimizer.py b/cv/classification/rexnet_lite_1_5/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/requirements.txt b/cv/classification/rexnet_lite_1_5/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_1_5/train.sh b/cv/classification/rexnet_lite_1_5/train.sh
new file mode 100755
index 00000000..ae8c4407
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnet_lite_1_5"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnet_lite_1_5/utils.py b/cv/classification/rexnet_lite_1_5/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnet_lite_1_5/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/README.md b/cv/classification/rexnet_lite_2_0/README.md
new file mode 100644
index 00000000..b2b35ce2
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/README.md
@@ -0,0 +1,66 @@
+## rexnet_lite_2_0
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnet_lite_2_0/config.py b/cv/classification/rexnet_lite_2_0/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/configs b/cv/classification/rexnet_lite_2_0/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/data b/cv/classification/rexnet_lite_2_0/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/infer.sh b/cv/classification/rexnet_lite_2_0/infer.sh
new file mode 100755
index 00000000..2b02d452
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnet_lite_2_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnet_lite_2_0/logger.py b/cv/classification/rexnet_lite_2_0/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/lr_scheduler.py b/cv/classification/rexnet_lite_2_0/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/main.py b/cv/classification/rexnet_lite_2_0/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/optimizer.py b/cv/classification/rexnet_lite_2_0/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/requirements.txt b/cv/classification/rexnet_lite_2_0/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnet_lite_2_0/train.sh b/cv/classification/rexnet_lite_2_0/train.sh
new file mode 100755
index 00000000..c47071e8
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnet_lite_2_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnet_lite_2_0/utils.py b/cv/classification/rexnet_lite_2_0/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnet_lite_2_0/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/README.md b/cv/classification/rexnetv1_1_3/README.md
new file mode 100644
index 00000000..f2395663
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/README.md
@@ -0,0 +1,66 @@
+## RexNetv1_1_3
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnetv1_1_3/config.py b/cv/classification/rexnetv1_1_3/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/configs b/cv/classification/rexnetv1_1_3/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/data b/cv/classification/rexnetv1_1_3/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/infer.sh b/cv/classification/rexnetv1_1_3/infer.sh
new file mode 100755
index 00000000..91339c73
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnetv1_1_3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnetv1_1_3/logger.py b/cv/classification/rexnetv1_1_3/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/lr_scheduler.py b/cv/classification/rexnetv1_1_3/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/main.py b/cv/classification/rexnetv1_1_3/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/optimizer.py b/cv/classification/rexnetv1_1_3/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/requirements.txt b/cv/classification/rexnetv1_1_3/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_3/train.sh b/cv/classification/rexnetv1_1_3/train.sh
new file mode 100755
index 00000000..a9fa6f6e
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnetv1_1_3"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnetv1_1_3/utils.py b/cv/classification/rexnetv1_1_3/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnetv1_1_3/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/README.md b/cv/classification/rexnetv1_1_5/README.md
new file mode 100644
index 00000000..6f924d4a
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/README.md
@@ -0,0 +1,66 @@
+## RexNetv1_1_5
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnetv1_1_5/config.py b/cv/classification/rexnetv1_1_5/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/configs b/cv/classification/rexnetv1_1_5/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/data b/cv/classification/rexnetv1_1_5/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/infer.sh b/cv/classification/rexnetv1_1_5/infer.sh
new file mode 100755
index 00000000..729e84bb
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnetv1_1_5"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnetv1_1_5/logger.py b/cv/classification/rexnetv1_1_5/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/lr_scheduler.py b/cv/classification/rexnetv1_1_5/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/main.py b/cv/classification/rexnetv1_1_5/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/optimizer.py b/cv/classification/rexnetv1_1_5/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/requirements.txt b/cv/classification/rexnetv1_1_5/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_1_5/train.sh b/cv/classification/rexnetv1_1_5/train.sh
new file mode 100755
index 00000000..7839299e
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnetv1_1_5"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnetv1_1_5/utils.py b/cv/classification/rexnetv1_1_5/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnetv1_1_5/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/README.md b/cv/classification/rexnetv1_2_0/README.md
new file mode 100644
index 00000000..51d85873
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/README.md
@@ -0,0 +1,66 @@
+## RexNetv1_2_0
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnetv1_2_0/config.py b/cv/classification/rexnetv1_2_0/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/configs b/cv/classification/rexnetv1_2_0/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/data b/cv/classification/rexnetv1_2_0/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/infer.sh b/cv/classification/rexnetv1_2_0/infer.sh
new file mode 100755
index 00000000..60a8ebd1
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnetv1_2_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnetv1_2_0/logger.py b/cv/classification/rexnetv1_2_0/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/lr_scheduler.py b/cv/classification/rexnetv1_2_0/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/main.py b/cv/classification/rexnetv1_2_0/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/optimizer.py b/cv/classification/rexnetv1_2_0/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/requirements.txt b/cv/classification/rexnetv1_2_0/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_2_0/train.sh b/cv/classification/rexnetv1_2_0/train.sh
new file mode 100755
index 00000000..805674b6
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="rexnetv1_2_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnetv1_2_0/utils.py b/cv/classification/rexnetv1_2_0/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnetv1_2_0/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/README.md b/cv/classification/rexnetv1_3_0/README.md
new file mode 100644
index 00000000..5c631fdf
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/README.md
@@ -0,0 +1,66 @@
+## RexNetv1_3_0
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/rexnetv1_3_0/config.py b/cv/classification/rexnetv1_3_0/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/configs b/cv/classification/rexnetv1_3_0/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/data b/cv/classification/rexnetv1_3_0/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/infer.sh b/cv/classification/rexnetv1_3_0/infer.sh
new file mode 100755
index 00000000..232bbef7
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="rexnetv1_3_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/rexnetv1_3_0/logger.py b/cv/classification/rexnetv1_3_0/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/lr_scheduler.py b/cv/classification/rexnetv1_3_0/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/main.py b/cv/classification/rexnetv1_3_0/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/optimizer.py b/cv/classification/rexnetv1_3_0/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/requirements.txt b/cv/classification/rexnetv1_3_0/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/rexnetv1_3_0/train.sh b/cv/classification/rexnetv1_3_0/train.sh
new file mode 100755
index 00000000..072be9c3
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="rexnetv1_3_0"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/rexnet_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/rexnetv1_3_0/utils.py b/cv/classification/rexnetv1_3_0/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/rexnetv1_3_0/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/shufflenet_v2x0_5/README.md b/cv/classification/shufflenet_v2x0_5/README.md
index cdf879d8..1911ad6b 100644
--- a/cv/classification/shufflenet_v2x0_5/README.md
+++ b/cv/classification/shufflenet_v2x0_5/README.md
@@ -95,12 +95,20 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
+
+### inference
 
-bash train.sh
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
+
diff --git a/cv/classification/shufflenet_v2x1_0/README.md b/cv/classification/shufflenet_v2x1_0/README.md
index cf282c74..be029200 100644
--- a/cv/classification/shufflenet_v2x1_0/README.md
+++ b/cv/classification/shufflenet_v2x1_0/README.md
@@ -95,12 +95,20 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
+
+### inference
 
-bash train.sh
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
+
diff --git a/cv/classification/shufflenet_v2x1_5/README.md b/cv/classification/shufflenet_v2x1_5/README.md
index cdf879d8..1911ad6b 100644
--- a/cv/classification/shufflenet_v2x1_5/README.md
+++ b/cv/classification/shufflenet_v2x1_5/README.md
@@ -95,12 +95,20 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
+
+### inference
 
-bash train.sh
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
+
diff --git a/cv/classification/shufflenet_v2x2_0/README.md b/cv/classification/shufflenet_v2x2_0/README.md
index cdf879d8..1911ad6b 100644
--- a/cv/classification/shufflenet_v2x2_0/README.md
+++ b/cv/classification/shufflenet_v2x2_0/README.md
@@ -95,12 +95,20 @@ For ImageNet dataset, you can download it from http://image-net.org/. We provide
 
 
 ### Training
-- ddp training with simple bash file
+
+You can use bash script `train.sh` to train this model.
+
 ```bash
+sh train.sh
+```
+
+### inference
 
-bash train.sh
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
 ```
 
 
-## Reference
-- [Swin-Transformer](https://github.com/microsoft/Swin-Transformer)
+
diff --git a/cv/classification/van_base/README.md b/cv/classification/van_base/README.md
new file mode 100644
index 00000000..fc28c391
--- /dev/null
+++ b/cv/classification/van_base/README.md
@@ -0,0 +1,66 @@
+## VAN
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/van_base/config.py b/cv/classification/van_base/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/van_base/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/van_base/configs b/cv/classification/van_base/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/van_base/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/van_base/data b/cv/classification/van_base/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/van_base/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/van_base/infer.sh b/cv/classification/van_base/infer.sh
new file mode 100755
index 00000000..569b98df
--- /dev/null
+++ b/cv/classification/van_base/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="van_base"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/van_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/van_base/logger.py b/cv/classification/van_base/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/van_base/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/van_base/lr_scheduler.py b/cv/classification/van_base/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/van_base/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/van_base/main.py b/cv/classification/van_base/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/van_base/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/van_base/optimizer.py b/cv/classification/van_base/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/van_base/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/van_base/requirements.txt b/cv/classification/van_base/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/van_base/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/van_base/train.sh b/cv/classification/van_base/train.sh
new file mode 100755
index 00000000..107c1c29
--- /dev/null
+++ b/cv/classification/van_base/train.sh
@@ -0,0 +1,15 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=8
+PORT=12346
+MODEL_ARCH="van_base"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/van_settings.yaml \
+        --model_arch $MODEL_ARCH 
+
diff --git a/cv/classification/van_base/utils.py b/cv/classification/van_base/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/van_base/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit/README.md b/cv/classification/vit_base_patch16_224/README.md
similarity index 100%
rename from cv/classification/vit/README.md
rename to cv/classification/vit_base_patch16_224/README.md
diff --git a/cv/classification/vit_base_patch16_224/config.py b/cv/classification/vit_base_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/configs b/cv/classification/vit_base_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/data b/cv/classification/vit_base_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit/infer.sh b/cv/classification/vit_base_patch16_224/infer.sh
similarity index 100%
rename from cv/classification/vit/infer.sh
rename to cv/classification/vit_base_patch16_224/infer.sh
diff --git a/cv/classification/vit_base_patch16_224/logger.py b/cv/classification/vit_base_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/lr_scheduler.py b/cv/classification/vit_base_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/main.py b/cv/classification/vit_base_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/optimizer.py b/cv/classification/vit_base_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224/requirements.txt b/cv/classification/vit_base_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit/train.sh b/cv/classification/vit_base_patch16_224/train.sh
similarity index 100%
rename from cv/classification/vit/train.sh
rename to cv/classification/vit_base_patch16_224/train.sh
diff --git a/cv/classification/vit_base_patch16_224/utils.py b/cv/classification/vit_base_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/README.md b/cv/classification/vit_base_patch16_224_miil/README.md
new file mode 100644
index 00000000..6ea8dd4a
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch16_224_miil
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch16_224_miil/config.py b/cv/classification/vit_base_patch16_224_miil/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/configs b/cv/classification/vit_base_patch16_224_miil/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/data b/cv/classification/vit_base_patch16_224_miil/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/infer.sh b/cv/classification/vit_base_patch16_224_miil/infer.sh
new file mode 100755
index 00000000..ef3ffd2a
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch16_224_miil"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch16_224_miil/logger.py b/cv/classification/vit_base_patch16_224_miil/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/lr_scheduler.py b/cv/classification/vit_base_patch16_224_miil/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/main.py b/cv/classification/vit_base_patch16_224_miil/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/optimizer.py b/cv/classification/vit_base_patch16_224_miil/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/requirements.txt b/cv/classification/vit_base_patch16_224_miil/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_miil/train.sh b/cv/classification/vit_base_patch16_224_miil/train.sh
new file mode 100755
index 00000000..4933628b
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_base_patch16_224_miil"
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch16_224_miil/utils.py b/cv/classification/vit_base_patch16_224_miil/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_miil/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/README.md b/cv/classification/vit_base_patch16_224_sam/README.md
new file mode 100644
index 00000000..1173f776
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch16_224_sam
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch16_224_sam/config.py b/cv/classification/vit_base_patch16_224_sam/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/configs b/cv/classification/vit_base_patch16_224_sam/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/data b/cv/classification/vit_base_patch16_224_sam/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/infer.sh b/cv/classification/vit_base_patch16_224_sam/infer.sh
new file mode 100755
index 00000000..d20847b6
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch16_224_sam"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch16_224_sam/logger.py b/cv/classification/vit_base_patch16_224_sam/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/lr_scheduler.py b/cv/classification/vit_base_patch16_224_sam/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/main.py b/cv/classification/vit_base_patch16_224_sam/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/optimizer.py b/cv/classification/vit_base_patch16_224_sam/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/requirements.txt b/cv/classification/vit_base_patch16_224_sam/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_224_sam/train.sh b/cv/classification/vit_base_patch16_224_sam/train.sh
new file mode 100755
index 00000000..66189d31
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/train.sh
@@ -0,0 +1,20 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_base_patch16_224_sam"
+BATCH_SIZE=128
+LEARNING_RATE=1e-4
+export CUDA_VISIBLE_DEVICES=5,6
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch16_224_sam/utils.py b/cv/classification/vit_base_patch16_224_sam/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch16_224_sam/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/README.md b/cv/classification/vit_base_patch16_384/README.md
new file mode 100644
index 00000000..3b81ff70
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch16_384/config.py b/cv/classification/vit_base_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/configs b/cv/classification/vit_base_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/data b/cv/classification/vit_base_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/infer.sh b/cv/classification/vit_base_patch16_384/infer.sh
new file mode 100755
index 00000000..f7e98a5e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch16_384/logger.py b/cv/classification/vit_base_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/lr_scheduler.py b/cv/classification/vit_base_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/main.py b/cv/classification/vit_base_patch16_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/optimizer.py b/cv/classification/vit_base_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/requirements.txt b/cv/classification/vit_base_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch16_384/train.sh b/cv/classification/vit_base_patch16_384/train.sh
new file mode 100755
index 00000000..e690471e
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_base_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=32
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch16_384/utils.py b/cv/classification/vit_base_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/README.md b/cv/classification/vit_base_patch32_224/README.md
new file mode 100644
index 00000000..019c52cf
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch32_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch32_224/config.py b/cv/classification/vit_base_patch32_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/configs b/cv/classification/vit_base_patch32_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/data b/cv/classification/vit_base_patch32_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/infer.sh b/cv/classification/vit_base_patch32_224/infer.sh
new file mode 100755
index 00000000..476e6d4f
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch32_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch32_224/logger.py b/cv/classification/vit_base_patch32_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/lr_scheduler.py b/cv/classification/vit_base_patch32_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/main.py b/cv/classification/vit_base_patch32_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/optimizer.py b/cv/classification/vit_base_patch32_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/requirements.txt b/cv/classification/vit_base_patch32_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224/train.sh b/cv/classification/vit_base_patch32_224/train.sh
new file mode 100755
index 00000000..09ccc83a
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_base_patch32_224"
+BATCH_SIZE=256
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch32_224/utils.py b/cv/classification/vit_base_patch32_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/README.md b/cv/classification/vit_base_patch32_224_sam/README.md
new file mode 100644
index 00000000..f3fd836e
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch32_224_sam
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch32_224_sam/config.py b/cv/classification/vit_base_patch32_224_sam/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/configs b/cv/classification/vit_base_patch32_224_sam/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/data b/cv/classification/vit_base_patch32_224_sam/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/infer.sh b/cv/classification/vit_base_patch32_224_sam/infer.sh
new file mode 100755
index 00000000..03a700e8
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch32_224_sam"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch32_224_sam/logger.py b/cv/classification/vit_base_patch32_224_sam/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/lr_scheduler.py b/cv/classification/vit_base_patch32_224_sam/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/main.py b/cv/classification/vit_base_patch32_224_sam/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/optimizer.py b/cv/classification/vit_base_patch32_224_sam/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/requirements.txt b/cv/classification/vit_base_patch32_224_sam/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_224_sam/train.sh b/cv/classification/vit_base_patch32_224_sam/train.sh
new file mode 100755
index 00000000..aac164b7
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_base_patch32_224_sam"
+BATCH_SIZE=128
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch32_224_sam/utils.py b/cv/classification/vit_base_patch32_224_sam/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch32_224_sam/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/README.md b/cv/classification/vit_base_patch32_384/README.md
new file mode 100644
index 00000000..0fcc247d
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch32_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch32_384/config.py b/cv/classification/vit_base_patch32_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/configs b/cv/classification/vit_base_patch32_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/data b/cv/classification/vit_base_patch32_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/infer.sh b/cv/classification/vit_base_patch32_384/infer.sh
new file mode 100755
index 00000000..ef559941
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch32_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch32_384/logger.py b/cv/classification/vit_base_patch32_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/lr_scheduler.py b/cv/classification/vit_base_patch32_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/main.py b/cv/classification/vit_base_patch32_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/optimizer.py b/cv/classification/vit_base_patch32_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/requirements.txt b/cv/classification/vit_base_patch32_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch32_384/train.sh b/cv/classification/vit_base_patch32_384/train.sh
new file mode 100755
index 00000000..e481a789
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_base_patch32_384"
+IMAGE_SIZE=384
+BATCH_SIZE=128
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch32_384/utils.py b/cv/classification/vit_base_patch32_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch32_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/README.md b/cv/classification/vit_base_patch8_224/README.md
new file mode 100644
index 00000000..4617058c
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/README.md
@@ -0,0 +1,66 @@
+## vit_base_patch8_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_base_patch8_224/config.py b/cv/classification/vit_base_patch8_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/configs b/cv/classification/vit_base_patch8_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/data b/cv/classification/vit_base_patch8_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/infer.sh b/cv/classification/vit_base_patch8_224/infer.sh
new file mode 100755
index 00000000..c7c2b4d3
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_base_patch8_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_base_patch8_224/logger.py b/cv/classification/vit_base_patch8_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/lr_scheduler.py b/cv/classification/vit_base_patch8_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/main.py b/cv/classification/vit_base_patch8_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/optimizer.py b/cv/classification/vit_base_patch8_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/requirements.txt b/cv/classification/vit_base_patch8_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_base_patch8_224/train.sh b/cv/classification/vit_base_patch8_224/train.sh
new file mode 100755
index 00000000..c7fc2c8e
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_base_patch8_224"
+BATCH_SIZE=16
+LEARNING_RATE=3e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_base_patch8_224/utils.py b/cv/classification/vit_base_patch8_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_base_patch8_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/README.md b/cv/classification/vit_large_patch16_224/README.md
new file mode 100644
index 00000000..18ce3b8f
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/README.md
@@ -0,0 +1,66 @@
+## vit_large_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_large_patch16_224/config.py b/cv/classification/vit_large_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/configs b/cv/classification/vit_large_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/data b/cv/classification/vit_large_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/infer.sh b/cv/classification/vit_large_patch16_224/infer.sh
new file mode 100755
index 00000000..79ab4d3f
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_large_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_large_patch16_224/logger.py b/cv/classification/vit_large_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/lr_scheduler.py b/cv/classification/vit_large_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/main.py b/cv/classification/vit_large_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/optimizer.py b/cv/classification/vit_large_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/requirements.txt b/cv/classification/vit_large_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_224/train.sh b/cv/classification/vit_large_patch16_224/train.sh
new file mode 100755
index 00000000..399c7931
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_large_patch16_224"
+BATCH_SIZE=32
+LEARNING_RATE=1e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_large_patch16_224/utils.py b/cv/classification/vit_large_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_large_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/README.md b/cv/classification/vit_large_patch16_384/README.md
new file mode 100644
index 00000000..11b19143
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/README.md
@@ -0,0 +1,66 @@
+## vit_large_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_large_patch16_384/config.py b/cv/classification/vit_large_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/configs b/cv/classification/vit_large_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/data b/cv/classification/vit_large_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/infer.sh b/cv/classification/vit_large_patch16_384/infer.sh
new file mode 100755
index 00000000..73e26e6c
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_large_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_large_patch16_384/logger.py b/cv/classification/vit_large_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/lr_scheduler.py b/cv/classification/vit_large_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/main.py b/cv/classification/vit_large_patch16_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/optimizer.py b/cv/classification/vit_large_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/requirements.txt b/cv/classification/vit_large_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch16_384/train.sh b/cv/classification/vit_large_patch16_384/train.sh
new file mode 100755
index 00000000..b1db623d
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_large_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=8
+LEARNING_RATE=1e-5
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_large_patch16_384/utils.py b/cv/classification/vit_large_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_large_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/README.md b/cv/classification/vit_large_patch32_384/README.md
new file mode 100644
index 00000000..35a5ef07
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/README.md
@@ -0,0 +1,66 @@
+## vit_large_patch32_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_large_patch32_384/config.py b/cv/classification/vit_large_patch32_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/configs b/cv/classification/vit_large_patch32_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/data b/cv/classification/vit_large_patch32_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/infer.sh b/cv/classification/vit_large_patch32_384/infer.sh
new file mode 100755
index 00000000..507f7cbb
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_large_patch32_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_large_patch32_384/logger.py b/cv/classification/vit_large_patch32_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/lr_scheduler.py b/cv/classification/vit_large_patch32_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/main.py b/cv/classification/vit_large_patch32_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/optimizer.py b/cv/classification/vit_large_patch32_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/requirements.txt b/cv/classification/vit_large_patch32_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_large_patch32_384/train.sh b/cv/classification/vit_large_patch32_384/train.sh
new file mode 100755
index 00000000..31fe1e3a
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_large_patch32_384"
+IMAGE_SIZE=384
+BATCH_SIZE=64
+LEARNING_RATE=3e-5
+export CUDA_VISIBLE_DEVICES=6,7
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_large_patch32_384/utils.py b/cv/classification/vit_large_patch32_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_large_patch32_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/README.md b/cv/classification/vit_small_patch16_224/README.md
new file mode 100644
index 00000000..c447f43c
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/README.md
@@ -0,0 +1,66 @@
+## vit_small_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_small_patch16_224/config.py b/cv/classification/vit_small_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/configs b/cv/classification/vit_small_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/data b/cv/classification/vit_small_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/infer.sh b/cv/classification/vit_small_patch16_224/infer.sh
new file mode 100755
index 00000000..6ccab069
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_small_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_small_patch16_224/logger.py b/cv/classification/vit_small_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/lr_scheduler.py b/cv/classification/vit_small_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/main.py b/cv/classification/vit_small_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/optimizer.py b/cv/classification/vit_small_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/requirements.txt b/cv/classification/vit_small_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_224/train.sh b/cv/classification/vit_small_patch16_224/train.sh
new file mode 100755
index 00000000..43eb9221
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_small_patch16_224"
+BATCH_SIZE=256
+LEARNING_RATE=3e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_small_patch16_224/utils.py b/cv/classification/vit_small_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_small_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/README.md b/cv/classification/vit_small_patch16_384/README.md
new file mode 100644
index 00000000..769ef052
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/README.md
@@ -0,0 +1,66 @@
+## vit_small_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_small_patch16_384/config.py b/cv/classification/vit_small_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/configs b/cv/classification/vit_small_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/data b/cv/classification/vit_small_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/infer.sh b/cv/classification/vit_small_patch16_384/infer.sh
new file mode 100755
index 00000000..adbc8ec7
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_small_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_small_patch16_384/logger.py b/cv/classification/vit_small_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/lr_scheduler.py b/cv/classification/vit_small_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/main.py b/cv/classification/vit_small_patch16_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/optimizer.py b/cv/classification/vit_small_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/requirements.txt b/cv/classification/vit_small_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch16_384/train.sh b/cv/classification/vit_small_patch16_384/train.sh
new file mode 100755
index 00000000..b9ce7eb2
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=2
+PORT=12346
+MODEL_ARCH="vit_small_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=64
+LEARNING_RATE=1e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_small_patch16_384/utils.py b/cv/classification/vit_small_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_small_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/README.md b/cv/classification/vit_small_patch32_224/README.md
new file mode 100644
index 00000000..d0de5357
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/README.md
@@ -0,0 +1,66 @@
+## vit_samll_patch32_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_small_patch32_224/config.py b/cv/classification/vit_small_patch32_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/configs b/cv/classification/vit_small_patch32_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/data b/cv/classification/vit_small_patch32_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/infer.sh b/cv/classification/vit_small_patch32_224/infer.sh
new file mode 100755
index 00000000..a1f48b37
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_small_patch32_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_small_patch32_224/logger.py b/cv/classification/vit_small_patch32_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/lr_scheduler.py b/cv/classification/vit_small_patch32_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/main.py b/cv/classification/vit_small_patch32_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/optimizer.py b/cv/classification/vit_small_patch32_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/requirements.txt b/cv/classification/vit_small_patch32_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_224/train.sh b/cv/classification/vit_small_patch32_224/train.sh
new file mode 100755
index 00000000..f9193759
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_small_patch32_224"
+BATCH_SIZE=256
+LEARNING_RATE=3e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_small_patch32_224/utils.py b/cv/classification/vit_small_patch32_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_small_patch32_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/README.md b/cv/classification/vit_small_patch32_384/README.md
new file mode 100644
index 00000000..b9356097
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/README.md
@@ -0,0 +1,66 @@
+## vit_samll_patch32_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_small_patch32_384/config.py b/cv/classification/vit_small_patch32_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/configs b/cv/classification/vit_small_patch32_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/data b/cv/classification/vit_small_patch32_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/infer.sh b/cv/classification/vit_small_patch32_384/infer.sh
new file mode 100755
index 00000000..9532b0af
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_small_patch32_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_small_patch32_384/logger.py b/cv/classification/vit_small_patch32_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/lr_scheduler.py b/cv/classification/vit_small_patch32_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/main.py b/cv/classification/vit_small_patch32_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/optimizer.py b/cv/classification/vit_small_patch32_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/requirements.txt b/cv/classification/vit_small_patch32_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_small_patch32_384/train.sh b/cv/classification/vit_small_patch32_384/train.sh
new file mode 100755
index 00000000..20247819
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_small_patch32_384"
+IMAGE_SIZE=384
+BATCH_SIZE=256
+LEARNING_RATE=3e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_small_patch32_384/utils.py b/cv/classification/vit_small_patch32_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_small_patch32_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/README.md b/cv/classification/vit_tiny_patch16_224/README.md
new file mode 100644
index 00000000..12b5fdae
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/README.md
@@ -0,0 +1,66 @@
+## vit_tiny_patch16_224
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_tiny_patch16_224/config.py b/cv/classification/vit_tiny_patch16_224/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/configs b/cv/classification/vit_tiny_patch16_224/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/data b/cv/classification/vit_tiny_patch16_224/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/infer.sh b/cv/classification/vit_tiny_patch16_224/infer.sh
new file mode 100755
index 00000000..f0a670e6
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/infer.sh
@@ -0,0 +1,16 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_tiny_patch16_224"
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --throughput
+
diff --git a/cv/classification/vit_tiny_patch16_224/logger.py b/cv/classification/vit_tiny_patch16_224/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/lr_scheduler.py b/cv/classification/vit_tiny_patch16_224/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/main.py b/cv/classification/vit_tiny_patch16_224/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/optimizer.py b/cv/classification/vit_tiny_patch16_224/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/requirements.txt b/cv/classification/vit_tiny_patch16_224/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_224/train.sh b/cv/classification/vit_tiny_patch16_224/train.sh
new file mode 100755
index 00000000..b86c6329
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/train.sh
@@ -0,0 +1,19 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_tiny_patch16_224"
+BATCH_SIZE=128
+LEARNING_RATE=3e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_tiny_patch16_224/utils.py b/cv/classification/vit_tiny_patch16_224/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_224/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/README.md b/cv/classification/vit_tiny_patch16_384/README.md
new file mode 100644
index 00000000..43ff7266
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/README.md
@@ -0,0 +1,66 @@
+## vit_tiny_patch16_384
+
+### Installation
+- Install the latest version of OneFlow
+```bash
+python3 -m pip install oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+```
+Find more information on [install oneflow](https://github.com/Oneflow-Inc/oneflow#install-oneflow)
+
+- Install flowvision
+
+Then install the latest stable release of flowvision
+
+```bash
+pip install flowvision==0.2.1
+```
+
+- Install other requirements
+```bash
+python3 -m pip install -r requirements.txt
+```
+
+### Dataset
+#### ImageNet
+For ImageNet dataset, you can download it from http://image-net.org/. We provide the following two ways to load data:
+
+- For standard folder dataset, move validation images to labeled sub-folders. The file structure should look like:
+  ```bash
+  $ tree data
+  imagenet
+  ├── train
+  │   ├── class1
+  │   │   ├── img1.jpeg
+  │   │   ├── img2.jpeg
+  │   │   └── ...
+  │   ├── class2
+  │   │   ├── img3.jpeg
+  │   │   └── ...
+  │   └── ...
+  └── val
+      ├── class1
+      │   ├── img4.jpeg
+      │   ├── img5.jpeg
+      │   └── ...
+      ├── class2
+      │   ├── img6.jpeg
+      │   └── ...
+      └── ...
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### inference
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/cv/classification/vit_tiny_patch16_384/config.py b/cv/classification/vit_tiny_patch16_384/config.py
new file mode 120000
index 00000000..3721332c
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/config.py
@@ -0,0 +1 @@
+../config.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/configs b/cv/classification/vit_tiny_patch16_384/configs
new file mode 120000
index 00000000..271eaab3
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/configs
@@ -0,0 +1 @@
+../configs
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/data b/cv/classification/vit_tiny_patch16_384/data
new file mode 120000
index 00000000..4909e06e
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/data
@@ -0,0 +1 @@
+../data
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/infer.sh b/cv/classification/vit_tiny_patch16_384/infer.sh
new file mode 100755
index 00000000..9643466a
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/infer.sh
@@ -0,0 +1,18 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=1
+PORT=12346
+MODEL_ARCH="vit_tiny_patch16_384"
+IMAGE_SIZE=384
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --image-size $IMAGE_SIZE \
+        --throughput
+
diff --git a/cv/classification/vit_tiny_patch16_384/logger.py b/cv/classification/vit_tiny_patch16_384/logger.py
new file mode 120000
index 00000000..e1dc8ceb
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/logger.py
@@ -0,0 +1 @@
+../logger.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/lr_scheduler.py b/cv/classification/vit_tiny_patch16_384/lr_scheduler.py
new file mode 120000
index 00000000..268fb140
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/lr_scheduler.py
@@ -0,0 +1 @@
+../lr_scheduler.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/main.py b/cv/classification/vit_tiny_patch16_384/main.py
new file mode 120000
index 00000000..f05f7527
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/main.py
@@ -0,0 +1 @@
+../main.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/optimizer.py b/cv/classification/vit_tiny_patch16_384/optimizer.py
new file mode 120000
index 00000000..e5dec61d
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/optimizer.py
@@ -0,0 +1 @@
+../optimizer.py
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/requirements.txt b/cv/classification/vit_tiny_patch16_384/requirements.txt
new file mode 120000
index 00000000..dc833dd4
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
\ No newline at end of file
diff --git a/cv/classification/vit_tiny_patch16_384/train.sh b/cv/classification/vit_tiny_patch16_384/train.sh
new file mode 100755
index 00000000..e0162c01
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/train.sh
@@ -0,0 +1,21 @@
+export PYTHONPATH=$PWD:$PYTHONPATH
+set -aux
+
+GPU_NUMS=4
+PORT=12346
+MODEL_ARCH="vit_tiny_patch16_384"
+IMAGE_SIZE=384
+BATCH_SIZE=128
+LEARNING_RATE=3e-4
+
+python3 -m oneflow.distributed.launch \
+        --nproc_per_node $GPU_NUMS \
+        --master_addr 127.0.0.1 \
+        --master_port $PORT \
+        main.py \
+        --cfg configs/vit_settings.yaml \
+        --model_arch $MODEL_ARCH \
+        --batch-size $BATCH_SIZE \
+        --image-size $IMAGE_SIZE \
+        --lr $LEARNING_RATE 
+
diff --git a/cv/classification/vit_tiny_patch16_384/utils.py b/cv/classification/vit_tiny_patch16_384/utils.py
new file mode 120000
index 00000000..50fbc6d8
--- /dev/null
+++ b/cv/classification/vit_tiny_patch16_384/utils.py
@@ -0,0 +1 @@
+../utils.py
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..dde3e365
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+yacs==0.1.8
+termcolor==2.3.0
+pycocotools==2.0.7
+flowvision==0.2.2
diff --git a/science/ldc/README.md b/science/ldc/README.md
new file mode 100644
index 00000000..c839196d
--- /dev/null
+++ b/science/ldc/README.md
@@ -0,0 +1,29 @@
+### Lid-driven cavity flow
+
+Lid-driven cavity flow is a classic fluid mechanics problem, where a square container has its top wall moving, creating a flow within the container.
+
+This flow problem is typically analyzed in a two-dimensional scenario, corresponding to a rectangular domain with solid boundaries. In this setup, the top wall is driven at a certain velocity while the other three walls are fixed. The fluid is constrained to flow within this rectangular domain.
+
+Lid-driven cavity flow finds broad applications in fluid dynamics, computational fluid dynamics, and numerical simulations. Due to its simple geometry and well-defined boundary conditions, it serves as a benchmark problem for studying incompressible fluid flow, boundary layers, flow stability, and validation of computational models.
+
+This problem can be solved by numerically simulating the fluid dynamics equations (usually the incompressible Navier-Stokes equations). The flow characteristics and behavior can be analyzed by computing parameters such as velocity, pressure, and vorticity of the flow field.
+
+Studying lid-driven cavity flow provides a deeper understanding of fluid flow behavior and serves as a foundation for analyzing and simulating more complex flow problems.
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+### Infer
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
+After inference, three files with a suffix of vtu will be generated, representing the horizontal flow velocity, vertical flow velocity, and pressure distribution of the LDC problem respectively. You can use software such as Paraview to visualize and post-process the results.
diff --git a/science/ldc/core/__init__.py b/science/ldc/core/__init__.py
new file mode 100644
index 00000000..ed128e51
--- /dev/null
+++ b/science/ldc/core/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## empty
diff --git a/science/ldc/core/nn.py b/science/ldc/core/nn.py
new file mode 100644
index 00000000..ede161af
--- /dev/null
+++ b/science/ldc/core/nn.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import oneflow as flow
+from .utils import cfg
+
+
+class FC(flow.nn.Module):
+
+    def __init__(self, num_ins, num_outs, num_layers, hiden_size, activiation):
+        super(FC, self).__init__()
+        if num_ins <= 0 or num_outs <= 0 or hiden_size <= 0 or num_layers <= 2:
+            raise ValueError(
+                'Invalid value, num_ins/num_outs/hiden_size should be greater than 0 and num_layers should be greater than 2.'
+            )
+        self.num_ins = num_ins
+        self.num_outs = num_outs
+        self.num_layers = num_layers
+        self.hiden_size = hiden_size
+        self.activiation = activiation
+
+        layers = [flow.nn.Linear(num_ins, hiden_size), self.activiation]
+        for idx in range(num_layers - 2):
+            layers.append(flow.nn.Linear(hiden_size, hiden_size))
+            layers.append(self.activiation)
+        layers.append(flow.nn.Linear(hiden_size, num_outs))
+        self.layers = flow.nn.Sequential(*layers)
+        self.to(cfg.get_device())
+
+    def forward(self, ins):
+        return self.layers(ins)
+
+
+class Loss(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def evaluate(self, items):
+        raise NotImplementedError("Implement in Loss subclass")
+
+
+class WeightedL2(Loss):
+
+    def __init__(self, weights):
+        super(WeightedL2, self).__init__('WeightedL2')
+        self.weights = weights
+
+    def evaluate(self, items):
+        if len(items) != len(self.weights):
+            raise ValueError(
+                'Invalid number of items, should be {} but get {}.'.format(
+                    len(self.weights), len(items)))
+        losses = []
+        loss = 0.0
+        for item, weight in zip(items, self.weights):
+            item_loss = flow.sum(item**2) * weight / item.shape[0]
+            loss += item_loss
+            losses.append(item_loss)
+
+        return losses, loss
diff --git a/science/ldc/core/pinns.py b/science/ldc/core/pinns.py
new file mode 100644
index 00000000..b1d384fe
--- /dev/null
+++ b/science/ldc/core/pinns.py
@@ -0,0 +1,493 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+import numpy as np
+import oneflow as flow
+from .utils import cfg, sample, cache_func, check_indexes, tensor, save_checkpoint, load_checkpoint, save_vtk
+
+
+class Domain(object):
+
+    def __init__(self, dim, time_dependent, discreted):
+        self.dim = dim
+        self.time_dependent = time_dependent
+        self.discreted = discreted
+        self.dtype = cfg.get_dtype_n()
+
+    def get_dim(self):
+        return self.dim
+
+    def is_discreted(self):
+        return self.discreted
+
+    def is_time_dependent(self):
+        return self.time_dependent
+
+    def get_dtype(self):
+        return self.dtype
+
+    def get_interior_points(self, time=None):
+        if not self.discreted:
+            raise ValueError(
+                "This interface is only valid within dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.interior_points
+        else:
+            return self.interior_points[np.isclose(self.interior_points[:, 0],
+                                                   time)]
+
+    def get_boundary_points(self, time=None):
+        if not self.discreted:
+            raise ValueError("Try get points on a not dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.boundary_points
+        else:
+            return self.boundary_points[np.isclose(self.boundary_points[:, 0],
+                                                   time)]
+
+    def get_initial_points(self):
+        if not self.discreted or not self.time_dependent:
+            raise ValueError(
+                "Try get points on a not dicreted or a time independent domain."
+            )
+
+        time = np.sort(self.timedomain.get_points(), axis=None)[0]
+        if self.timedomain.is_on_initial(time):
+            return self.get_points(time)
+        else:
+            raise ValueError("The earliest time is not on initial")
+
+    def get_points(self, time=None):
+        if not self.discreted:
+            raise ValueError("Try get points on a not dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.points
+        else:
+            return self.points[np.isclose(self.points[:, 0], time)]
+
+    def get_npoints(self):
+        if not self.discreted:
+            raise ValueError(
+                "This interface is only valid within dicreted domain.")
+        return self.points.size // (
+            self.dim +
+            1) if self.time_dependent else self.points.size // self.dim
+
+
+class Rectangle(Domain):
+
+    def __init__(self, origins, extents):
+        super(Rectangle, self).__init__(2, False, False)
+        self.origins = np.array(origins, dtype=self.dtype)
+        self.extents = np.array(extents, dtype=self.dtype)
+
+        self.perimeter = 2 * np.sum(self.extents, dtype=self.dtype)
+        self.volume = np.prod(self.extents, dtype=self.dtype)
+
+    def _not_close_corners(self, points):
+        l1 = self.extents[0]
+        l2 = l1 + self.extents[1]
+        l3 = l2 + l1
+
+        points = points[np.logical_not(np.isclose(points,
+                                                  l1 / self.perimeter))]
+        points = points[np.logical_not(np.isclose(points,
+                                                  l2 / self.perimeter))]
+        points = points[np.logical_not(np.isclose(points,
+                                                  l3 / self.perimeter))]
+
+        return points
+
+    def _sample_interior_points(self, n, sampler):
+        x = sample(n, self.dim, sampler, self.dtype)
+        return self.origins + self.extents * x
+
+    def _sample_boundary_points(self, n, sampler):
+        u = np.ravel(sample(n + 4, 1, sampler, self.dtype))
+        u = self._not_close_corners(u) * self.perimeter
+        points_list = [np.zeros((0, 2), dtype=self.dtype)]
+        for l in u:
+            if l < self.extents[0]:
+                points_list.append((self.origins + [l, 0])[np.newaxis, :])
+            elif l < np.sum(self.extents):
+                points_list.append(
+                    (self.origins +
+                     [self.extents[0], l - self.extents[0]])[np.newaxis, :])
+            elif l < np.sum(self.extents) + self.extents[0]:
+                points_list.append((self.origins + [
+                    self.perimeter / 2 + self.extents[0] - l, self.extents[1]
+                ])[np.newaxis, :])
+            else:
+                points_list.append(
+                    (self.origins + [0, self.perimeter - l])[np.newaxis, :])
+        ret = np.concatenate(points_list, axis=0).astype(self.dtype)
+        return ret
+
+    def discrete(self, interior, boundary, sampler='uniform'):
+        # Step1: some checks.
+        if self.discreted:
+            raise ValueError(
+                "Do not support discrete a domian more than once.")
+
+        if self.time_dependent:
+            raise ValueError(
+                "A domain should not be time dependent before discrete process."
+            )
+
+        if interior < 0 or boundary < 0:
+            raise ValueError(
+                "Invalid arguments, should sample at least 0 points from interior and boundary."
+            )
+
+        # Step2: discrete process
+        self.interior_points = self._sample_interior_points(
+            interior, sampler) if interior != 0 else np.zeros(
+                (0, self.dim), self.dtype)
+        if self.interior_points.shape[0] != interior:
+            warnings.warn(
+                "Expected {x} interior points, but get {y} interior points".
+                format(x=interior, y=self.interior_points.shape[0]))
+        self.boundary_points = self._sample_boundary_points(
+            boundary, sampler) if boundary != 0 else np.zeros(
+                (0, self.dim), self.dtype)
+        if self.boundary_points.shape[0] != boundary:
+            warnings.warn(
+                "Expected {x} boundary points, but get {y} boundary points".
+                format(x=boundary, y=self.boundary_points.shape[0]))
+
+        self.points = np.concatenate(
+            (self.interior_points, self.boundary_points), axis=0)
+
+        self.discreted = True
+        return self
+
+
+# Helper class to compute Jacobian matrix for tensor with shape x[bsize, m] and y[bsize, n]
+class Jacobian(object):
+
+    def __init__(self, x, y):
+        super(Jacobian, self).__init__()
+        # Ensure that the shapes of x and y match
+        if len(x.shape) != 2 or len(y.shape) != 2:
+            raise ValueError(
+                'Invalid shape, the number of dimensions of x and y should be 2.'
+            )
+        if x.shape[0] != y.shape[0]:
+            raise ValueError(
+                'Invalid shape, the first dimension of x and y should be the same.'
+            )
+        self.bsize = x.shape[0]
+        self.x = x
+        self.y = y
+        self.J = [None for _ in range(self.y.nelement() // self.bsize)]
+
+    def _check_items(self, items):
+        if isinstance(items, int):
+            return True
+        elif isinstance(items, tuple) or isinstance(items, list):
+            if len(items) != 2:
+                return False
+            for item in items:
+                if not isinstance(item, int):
+                    return False
+        else:
+            return False
+        return True
+
+    def __getitem__(self, items):
+        if not self._check_items(items):
+            raise TypeError(
+                'Invalid item, should be int or tuple/list of length 2.')
+
+        # Compute the gradient only if it hasn't been computed yet
+        i = items if isinstance(items, int) else items[0]
+        if self.J[i] is None:
+            self.J[i], = flow.autograd.grad(self.y[:, i],
+                                            self.x,
+                                            flow.ones_like(self.y[:, i]),
+                                            create_graph=True,
+                                            retain_graph=True)
+        return self.J[items] if isinstance(
+            items, int) else self.J[items[0]][:, items[1]]
+
+
+class PDEs(object):
+
+    def __init__(self, num_pdes, variables=None):
+        self.num_pdes = num_pdes
+        self.variables = variables
+
+    def get_num_pdes(self):
+        return self.num_pdes
+
+    def get_variables(self):
+        return self.variables
+
+    def evaluate(self, inputs, outputs, inputs_names=None, outputs_names=None):
+        raise NotImplementedError("Implement in PDEs subclass")
+
+
+# Time independent
+# du_dx + dv_dy = 0
+# u * du_dx + v * du_dy - nu / rho * du_dx2 - nu / rho * du_dy2 + 1.0 / rho * dp_dx = 0
+# u * dv_dx + v * dv_dy - nu / rho * dv_dx2 - nu / rho * dv_dy2 + 1.0 / rho * dp_dy = 0
+class NavierStokes2D(PDEs):
+
+    def __init__(self, nu=0.01, rho=1.0):
+        super(NavierStokes2D, self).__init__(2)
+        self.nu = nu
+        self.rho = rho
+
+    def evaluate(self,
+                 inputs,
+                 outputs,
+                 inputs_names=['x', 'y'],
+                 outputs_names=['u', 'v', 'p']):
+
+        pos_x = inputs_names.index('x')
+        pos_y = inputs_names.index('y')
+        pos_u = outputs_names.index('u')
+        pos_v = outputs_names.index('v')
+        pos_p = outputs_names.index('p')
+        u = outputs[:, pos_u]
+        v = outputs[:, pos_v]
+        jac = Jacobian(inputs, outputs)
+        hes_u = Jacobian(inputs, jac[pos_u])
+        hes_v = Jacobian(inputs, jac[pos_v])
+
+        pde_1 = jac[pos_u, pos_x] + jac[pos_v, pos_y]
+        pde_2 = u * jac[pos_u, pos_x] + v * jac[pos_u, pos_y] - self.nu / self.rho * hes_u[pos_x, pos_x] - \
+                self.nu / self.rho * hes_u[pos_y, pos_y] + 1.0 / self.rho * jac[pos_p, pos_x]
+        pde_3 = u * jac[pos_v, pos_x] + v * jac[pos_v, pos_y] - self.nu / self.rho * hes_v[pos_x, pos_x] - \
+                self.nu / self.rho * hes_v[pos_y, pos_y] + 1.0 / self.rho * jac[pos_p, pos_y]
+
+        return [pde_1, pde_2, pde_3]
+
+
+class BC(object):
+
+    def __init__(self, domain, constrain_func, value_func, boundary_points):
+        if not domain.is_discreted():
+            raise ValueError("The geometry must be discreted")
+        self.domain = domain
+        self.value_func = cache_func(value_func)
+        self.constrain_func = constrain_func
+        self.boundary_points = boundary_points
+
+    def evaluate(self, net, value_indexes):
+        raise NotImplementedError(
+            "Boundary condition evaluate not implement in {}".format(
+                self.__class__.__name__))
+
+
+@cache_func
+def get_boundary_constrain_points(domain,
+                                  X,
+                                  constrain_func,
+                                  with_boundary_normal=False):
+
+    if X is None:
+        boundary_points = domain.get_boundary_points()
+        constrain = constrain_func(boundary_points)
+        if with_boundary_normal:
+            boundary_points = boundary_points[constrain]
+            n = tensor(domain.boundary_normal(boundary_points))
+            boundary_points = tensor(boundary_points)
+            boundary_points.requires_grad = True
+            return boundary_points, n
+        else:
+            return tensor(boundary_points[constrain])
+    else:
+        constrain = domain.is_on_boundary(X) * constrain_func(X)
+        if with_boundary_normal:
+            boundary_points = X[constrain]
+            n = tensor(domain.boundary_normal(boundary_points))
+            boundary_points = tensor(boundary_points)
+            boundary_points.requires_grad = True
+            return boundary_points, n
+        else:
+            return tensor(X[constrain])
+
+
+class DirichletBC(BC):
+    """Dirichlet boundary conditions: y(x) = func(x).
+    """
+
+    def __init__(self,
+                 domain,
+                 constrain_func,
+                 value_func,
+                 boundary_points=None):
+        super().__init__(domain, constrain_func, value_func, boundary_points)
+
+    def evaluate(self, net, value_indexes):
+        if not check_indexes(value_indexes):
+            raise ValueError("value indexes should be list/tuple of int ")
+
+        boundary_points = get_boundary_constrain_points(
+            self.domain, self.boundary_points, self.constrain_func, False)
+
+        net_out = net(boundary_points)
+        gt = self.value_func(boundary_points)
+        diff = net_out[:, value_indexes] - gt
+        return diff
+
+
+class AISolver(object):
+
+    def __init__(self, algorithm, network, loss, optimizer, checkpoint_path):
+        self.algorithm = algorithm
+        self.network = network
+        self.loss = loss
+        self.optimizer = optimizer
+        self.checkpoint_path = checkpoint_path
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+    def save_checkpoint(self, path):
+        save_checkpoint(path, self.network, self.optimizer)
+
+    def load_checkpoint(self, path):
+        load_checkpoint(path, self.network, self.optimizer)
+
+    def train(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+    def predict(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+    def visualize(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+
+class PINNSolver(AISolver):
+
+    def __init__(self,
+                 network,
+                 loss,
+                 optimizer,
+                 domain,
+                 pdes,
+                 inputs_names,
+                 outputs_names,
+                 bcs=[],
+                 bc_indexes=[],
+                 ics=[],
+                 ic_indexes=[],
+                 sups=[],
+                 checkpoint_path='./log'):
+        super(PINNSolver, self).__init__('PINNs', network, loss, optimizer,
+                                         checkpoint_path)
+        self.domain = domain
+        self.pdes = pdes
+        self.inputs_names = inputs_names
+        self.outputs_names = outputs_names
+        self.bcs = bcs
+        self.bc_indexes = bc_indexes
+        self.ics = ics
+        self.ic_indexes = ic_indexes
+        self.sups = sups
+
+    def save_checkpoint(self, path):
+        save_checkpoint(path, self.network, self.optimizer,
+                        self.pdes.get_variables())
+
+    def load_checkpoint(self, path):
+        load_checkpoint(path, self.network, self.optimizer,
+                        self.pdes.get_variables())
+
+    def train(self, num_epoch, log_frequency=100, checkpoint_frequency=1000):
+        for idx in range(num_epoch):
+            self.optimizer.zero_grad()
+            # PDE items
+            inputs_interior = tensor(self.domain.get_interior_points())
+            inputs_interior.requires_grad = True
+            outputs_interior = self.network(inputs_interior)
+            pde_items = self.pdes.evaluate(inputs_interior, outputs_interior,
+                                           self.inputs_names,
+                                           self.outputs_names)
+
+            # bc items
+            bc_items = [
+                bc.evaluate(self.network, indexes)
+                for bc, indexes in zip(self.bcs, self.bc_indexes)
+            ]
+
+            # ic items
+            ic_items = [
+                ic.evaluate(self.network, indexes)
+                for ic, indexes in zip(self.ics, self.ic_indexes)
+            ]
+
+            # sup items
+            sup_items = [
+                self.network(sup['x']) - sup['y'] for sup in self.sups
+            ]
+
+            losses, loss = self.loss.evaluate(pde_items + bc_items + ic_items +
+                                              sup_items)
+            loss.backward()
+            self.optimizer.step()
+
+            if (idx + 1) % log_frequency == 0:
+                print(
+                    f"num_epoch: {idx + 1}, loss: {loss.detach().cpu().numpy():.7g}"
+                )
+                print(f"sub losses:")
+                for loss_el in losses:
+                    print(f"{loss_el.detach().cpu().numpy():.7g}")
+
+                variables = self.pdes.get_variables()
+                if variables is not None:
+                    print(f"variables:")
+                    for name in variables.names:
+                        print(
+                            f"{name}: {getattr(variables, name).detach().cpu().numpy():.7g}"
+                        )
+            if (idx + 1) % checkpoint_frequency == 0:
+                self.save_checkpoint(self.checkpoint_path + '/checkpoint_' +
+                                     str(idx + 1) + '.pt')
+
+    def evaluate(self, domain=None):
+        domain = self.domain if domain is None else domain
+        inputs = tensor(domain.get_points())
+        return self.network(inputs).detach().cpu().numpy()
+
+    def visualize(self, filename='outputs', domain=None, time=None):
+        domain = self.domain if domain is None else domain
+        inputs = tensor(domain.get_points())
+        outputs = self.network(inputs).detach().cpu().numpy()
+        if outputs.shape[1] == 1:
+            save_vtk(filename, domain, outputs[:, 0], time)
+        else:
+            for idx in range(outputs.shape[1]):
+                save_vtk(filename + '_' + str(idx), domain, outputs[:, idx],
+                         time)
diff --git a/science/ldc/core/utils.py b/science/ldc/core/utils.py
new file mode 100644
index 00000000..05e70cba
--- /dev/null
+++ b/science/ldc/core/utils.py
@@ -0,0 +1,229 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import vtk
+import itertools
+import numpy as np
+import oneflow as flow
+from functools import wraps
+from pyevtk.hl import pointsToVTK
+
+
+class Config(object):
+
+    def __init__(self):
+        super(Config, self).__init__()
+        self.allow_dtypes = ['float32', 'float64']
+        self.dtype = 'float32'
+        self.dtype_n = np.float32
+        self.dtype_t = flow.float32
+        self.device = flow.device(
+            'cuda:0' if flow.cuda.is_available() else 'cpu')
+
+        self.seed = None
+        flow.set_default_tensor_type(flow.FloatTensor)
+
+    def get_dtype(self):
+        return self.dtype
+
+    def get_dtype_n(self):
+        return self.dtype_n
+
+    def get_dtype_t(self):
+        return self.dtype_t
+
+    def set_dtype(self, dtype):
+        if dtype not in self.allow_dtypes:
+            raise ValueError(
+                'Invalid default dtype, should be float32 or float64.')
+        self.dtype = dtype
+        self.dtype_n = np.float32 if self.dtype == 'float32' else np.float64
+        self.dtype_t = flow.float32 if self.dtype == 'float32' else flow.float64
+        flow.set_default_tensor_type(flow.FloatTensor if self.dtype ==
+                                     'float32' else flow.DoubleTensor)
+
+    def set_seed(self, seed):
+        self.seed = seed
+        np.random.seed(seed)
+        flow.manual_seed(seed)
+
+    def set_device(self, device):
+        self.device = flow.device(device)
+
+    def get_device(self):
+        return self.device
+
+
+cfg = Config()
+
+
+def sample(num_samples, dim, method, dtype):
+    if method is not 'uniform':
+        raise ValueError('Invalid method, only support uniform.')
+
+    # do not include start point 0, and end point
+    n = int(np.ceil(num_samples**(1 / dim)))
+    sample_list = [
+        np.linspace(0.0, 1.0, num=n + 1, endpoint=False, dtype=dtype)[1:]
+        for _ in range(dim)
+    ]
+
+    ret = list(itertools.product(*sample_list))
+    return np.array(ret, dtype=dtype).reshape(-1, dim)
+
+
+def cache_func(func):
+    cache = {}
+
+    @wraps(func)
+    def wrapper_cache(*args):
+        # id(args) will have error, will be the same
+        # generator for will have bug, will not be the same
+        key = ' '.join([str(id(arg)) for arg in args])
+        if key not in cache:
+            cache[key] = func(*args)
+        return cache[key]
+
+    return wrapper_cache
+
+
+def check_indexes(indexes):
+    if isinstance(indexes, tuple) or isinstance(indexes, list):
+        for index in indexes:
+            if not isinstance(index, int):
+                return False
+    else:
+        return False
+    return True
+
+
+def tensor(data,
+           dtype=None,
+           device=None,
+           requires_grad=False,
+           pin_memory=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.tensor(data,
+                       dtype=dtype,
+                       device=device,
+                       requires_grad=requires_grad,
+                       pin_memory=pin_memory)
+    return data
+
+
+def ones(*size, dtype=None, device=None, requires_grad=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.ones(*size,
+                     dtype=dtype,
+                     device=device,
+                     requires_grad=requires_grad)
+    return data
+
+
+def zeros(*size, dtype=None, device=None, requires_grad=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.zeros(*size,
+                      dtype=dtype,
+                      device=device,
+                      requires_grad=requires_grad)
+    return data
+
+
+def save_checkpoint(path, net, opt, var=None):
+    if not os.path.exists(os.path.dirname(path)):
+        os.makedirs(os.path.dirname(path))
+    if var is None:
+        flow.save(
+            {
+                'net_state_dict': net.state_dict(),
+                'opt_state_dict': opt.state_dict()
+            }, path)
+    else:
+        flow.save(
+            {
+                'net_state_dict': net.state_dict(),
+                'opt_state_dict': opt.state_dict(),
+                'var_state_dict': var.state_dict()
+            }, path)
+
+
+def load_checkpoint(path, net, opt, var=None):
+    checkpoint = flow.load(path)
+    net.load_state_dict(checkpoint['net_state_dict'])
+    opt.load_state_dict(checkpoint['opt_state_dict'])
+    if var is not None:
+        var.load_state_dict(checkpoint['var_state_dict'])
+
+
+def save_vtk(filename, domain, value, time=None):
+    if domain.points.ndim != 2:
+        raise ValueError('Invalid grid shape, it should be 2-dimensional')
+    if domain.points.shape[0] != value.shape[0]:
+        raise ValueError(
+            'The first dimension of grid and value must be the same')
+    if None == time:
+        if domain.points.shape[1] not in [1, 2, 3]:
+            raise ValueError(
+                'Only 1D, 2D or 3D data is supported for visualization')
+
+        x = np.copy(domain.points[:, 0])
+        y = np.zeros(domain.points.shape[0], dtype=domain.points.dtype
+                     ) if domain.points.shape[1] != 2 else np.copy(
+                         domain.points[:, 1])
+        z = np.zeros(domain.points.shape[0], dtype=domain.points.dtype
+                     ) if domain.points.shape[1] != 3 else np.copy(
+                         domain.points[:, 2])
+
+        pointsToVTK(filename, x, y, z, data={'v': np.copy(value)})
+    else:
+        if not (isinstance(time, float) or isinstance(time, int)):
+            raise ValueError("The type of time should be int or float")
+        if domain.points.shape[1] not in [2, 3, 4]:
+            raise ValueError(
+                'Only 2D or 3D data is supported for visualization')
+        x = np.copy(domain.get_points(time)[:, 1])
+        y = np.zeros(domain.get_points(time)[:, 1].shape[0],
+                     dtype=domain.points.dtype
+                     ) if domain.points.shape[1] != 3 else np.copy(
+                         domain.get_points(time)[:, 2])
+        z = np.zeros(domain.get_points(time)[:, 1].shape[0],
+                     dtype=domain.points.dtype
+                     ) if domain.points.shape[1] != 4 else np.copy(
+                         domain.get_points(time)[:, 3])
+
+        pointsToVTK(filename,
+                    x,
+                    y,
+                    z,
+                    data={
+                        'v':
+                        np.copy(value[np.isclose(domain.get_points()[:, 0],
+                                                 time)])
+                    })
diff --git a/science/ldc/infer.sh b/science/ldc/infer.sh
new file mode 100755
index 00000000..941bb345
--- /dev/null
+++ b/science/ldc/infer.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python3 main.py --type="infer" --pretrained=True
diff --git a/science/ldc/main.py b/science/ldc/main.py
new file mode 100644
index 00000000..d3a94db8
--- /dev/null
+++ b/science/ldc/main.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import oneflow as flow
+from core.utils import ones, zeros
+from core.nn import FC, WeightedL2
+from core.pinns import Rectangle, NavierStokes2D, DirichletBC, PINNSolver
+import argparse
+import wget
+import os
+
+parser = argparse.ArgumentParser(description='manual to this script')
+parser.add_argument("--type", type=str, default="train")
+parser.add_argument("--pretrained", type=bool, default=False)
+args = parser.parse_args()
+
+# set bc
+def bc_constrain_func_1(x):
+    return np.isclose(x[:, 1], np.ones(x.shape[0]) * (-0.05 + 0.1))
+
+
+def bc_value_func_1(x):
+    u = ones(x.shape[0])
+    v = zeros(x.shape[0])
+    return flow.stack((u, v), dim=1)
+
+
+def bc_constrain_func_2(x):
+    return np.logical_not(
+        np.isclose(x[:, 1],
+                   np.ones(x.shape[0]) * (-0.05 + 0.1)))
+
+
+def build_model():
+    # define geometry
+    domain = Rectangle(origins=[-0.05, -0.05],
+                    extents=[0.1, 0.1]).discrete(interior=8000, boundary=400)
+
+    # define pde
+    pde = NavierStokes2D(nu=0.01, rho=1.0)
+
+    # define nn
+    net = FC(num_ins=2,
+            num_outs=3,
+            num_layers=5,
+            hiden_size=20,
+            activiation=flow.nn.Tanh())
+
+    # define loss
+    loss = WeightedL2(weights=[1, 1, 1, 100, 100])
+
+    # define optimizer
+    opt = flow.optim.Adam(net.parameters(), 0.001)
+
+    bc1 = DirichletBC(domain, bc_constrain_func_1, bc_value_func_1)
+
+    bc2 = DirichletBC(domain, bc_constrain_func_2, lambda x: zeros((x.shape[0], 2)))
+
+    # define solver
+    solver = PINNSolver(network=net,
+                        loss=loss,
+                        optimizer=opt,
+                        domain=domain,
+                        pdes=pde,
+                        inputs_names=['x', 'y'],
+                        outputs_names=['u', 'v', 'p'],
+                        bcs=[bc1, bc2],
+                        bc_indexes=[[0, 1], [0, 1]])
+    return solver
+
+if __name__ == "__main__":
+    solver = build_model()
+    # load pretrained
+    if args.pretrained:
+        print("Load checkpoint")
+        if not os.path.isfile('ldc.of'):
+            url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/ldc.of"
+            wget.download(url,'ldc.of')
+        solver.load_checkpoint('ldc.of')
+
+    if args.type=="train":
+        # train
+        print("Start train")
+        solver.train(num_epoch=30000, log_frequency=100, checkpoint_frequency=1000)
+    elif args.type=="infer":
+        # infer
+        print("Start infer")
+        solver.evaluate()
+        solver.visualize()
+
diff --git a/science/ldc/requirements.txt b/science/ldc/requirements.txt
new file mode 100644
index 00000000..9e8b8f28
--- /dev/null
+++ b/science/ldc/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.24.3
+pyevtk==1.6.0
+vtk==9.2.6
+wget==3.2
diff --git a/science/ldc/train.sh b/science/ldc/train.sh
new file mode 100755
index 00000000..a125badc
--- /dev/null
+++ b/science/ldc/train.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python3 main.py --type="train"
diff --git a/science/lorenz_system/README.md b/science/lorenz_system/README.md
new file mode 100644
index 00000000..e84594bb
--- /dev/null
+++ b/science/lorenz_system/README.md
@@ -0,0 +1,42 @@
+### Lorenz system
+
+The Lorenz system refers to a system of ordinary differential equations that displays chaotic behavior. It was first introduced by Edward Lorenz, a meteorologist, while studying atmospheric convection.
+
+The Lorenz system consists of three nonlinear differential equations:
+
+```
+dx/dt = C1 * (y - x)
+dy/dt = x * (C2 - z) - y
+dz/dt = x * y - C3 * z
+```
+
+In these equations, x, y, and z are variables representing the state of the system, and t represents time. The system also has three parameters: C1, C2, and C3, which control the behavior of the system. These parameters are typically set to specific values to observe interesting dynamics.
+
+The Lorenz system is known for its sensitivity to initial conditions, which leads to chaotic behavior. It exhibits complicated trajectories in phase space, characterized by strange attractors. These attractors are non-periodic and can have complex fractal structures.
+
+The Lorenz system has found applications in various fields, such as physics, mathematics, and engineering, serving as a prototypical example of chaotic behavior. It has contributed to the study of dynamical systems, chaos theory, and the exploration of nonlinear dynamics.
+
+
+### Data
+
+Set the independent variable to be time t, and the dependent variables to be x, y, and z. We give a small amount of data when C1=10, C2=15, and C3=8/3 as training data.
+
+
+### Training
+
+You can use bash script `train.sh` to train this model.
+
+```bash
+sh train.sh
+```
+
+During the training process, it will print out the inversion results of the three parameters C1, C2, and C3 every 100 iters.
+
+### Infer
+
+Bash script `infer.sh` is used to infer the trained model.
+
+```bash
+sh infer.sh
+```
+
diff --git a/science/lorenz_system/core/__init__.py b/science/lorenz_system/core/__init__.py
new file mode 100644
index 00000000..ed128e51
--- /dev/null
+++ b/science/lorenz_system/core/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## empty
diff --git a/science/lorenz_system/core/nn.py b/science/lorenz_system/core/nn.py
new file mode 100644
index 00000000..ede161af
--- /dev/null
+++ b/science/lorenz_system/core/nn.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import oneflow as flow
+from .utils import cfg
+
+
+class FC(flow.nn.Module):
+
+    def __init__(self, num_ins, num_outs, num_layers, hiden_size, activiation):
+        super(FC, self).__init__()
+        if num_ins <= 0 or num_outs <= 0 or hiden_size <= 0 or num_layers <= 2:
+            raise ValueError(
+                'Invalid value, num_ins/num_outs/hiden_size should be greater than 0 and num_layers should be greater than 2.'
+            )
+        self.num_ins = num_ins
+        self.num_outs = num_outs
+        self.num_layers = num_layers
+        self.hiden_size = hiden_size
+        self.activiation = activiation
+
+        layers = [flow.nn.Linear(num_ins, hiden_size), self.activiation]
+        for idx in range(num_layers - 2):
+            layers.append(flow.nn.Linear(hiden_size, hiden_size))
+            layers.append(self.activiation)
+        layers.append(flow.nn.Linear(hiden_size, num_outs))
+        self.layers = flow.nn.Sequential(*layers)
+        self.to(cfg.get_device())
+
+    def forward(self, ins):
+        return self.layers(ins)
+
+
+class Loss(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def evaluate(self, items):
+        raise NotImplementedError("Implement in Loss subclass")
+
+
+class WeightedL2(Loss):
+
+    def __init__(self, weights):
+        super(WeightedL2, self).__init__('WeightedL2')
+        self.weights = weights
+
+    def evaluate(self, items):
+        if len(items) != len(self.weights):
+            raise ValueError(
+                'Invalid number of items, should be {} but get {}.'.format(
+                    len(self.weights), len(items)))
+        losses = []
+        loss = 0.0
+        for item, weight in zip(items, self.weights):
+            item_loss = flow.sum(item**2) * weight / item.shape[0]
+            loss += item_loss
+            losses.append(item_loss)
+
+        return losses, loss
diff --git a/science/lorenz_system/core/pinns.py b/science/lorenz_system/core/pinns.py
new file mode 100644
index 00000000..0dcd0954
--- /dev/null
+++ b/science/lorenz_system/core/pinns.py
@@ -0,0 +1,493 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+import numpy as np
+import oneflow as flow
+from .utils import cfg, sample, cache_func, check_indexes, tensor, save_checkpoint, load_checkpoint
+
+
+class Domain(object):
+
+    def __init__(self, dim, time_dependent, discreted):
+        self.dim = dim
+        self.time_dependent = time_dependent
+        self.discreted = discreted
+        self.dtype = cfg.get_dtype_n()
+
+    def get_dim(self):
+        return self.dim
+
+    def is_discreted(self):
+        return self.discreted
+
+    def is_time_dependent(self):
+        return self.time_dependent
+
+    def get_dtype(self):
+        return self.dtype
+
+    def get_interior_points(self, time=None):
+        if not self.discreted:
+            raise ValueError(
+                "This interface is only valid within dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.interior_points
+        else:
+            return self.interior_points[np.isclose(self.interior_points[:, 0],
+                                                   time)]
+
+    def get_boundary_points(self, time=None):
+        if not self.discreted:
+            raise ValueError("Try get points on a not dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.boundary_points
+        else:
+            return self.boundary_points[np.isclose(self.boundary_points[:, 0],
+                                                   time)]
+
+    def get_initial_points(self):
+        if not self.discreted or not self.time_dependent:
+            raise ValueError(
+                "Try get points on a not dicreted or a time independent domain."
+            )
+
+        time = np.sort(self.timedomain.get_points(), axis=None)[0]
+        if self.timedomain.is_on_initial(time):
+            return self.get_points(time)
+        else:
+            raise ValueError("The earliest time is not on initial")
+
+    def get_points(self, time=None):
+        if not self.discreted:
+            raise ValueError("Try get points on a not dicreted domain.")
+        if time is not None and not self.time_dependent:
+            raise ValueError(
+                "Try get points at specific time on a time independent geometry domain."
+            )
+
+        if None == time:
+            return self.points
+        else:
+            return self.points[np.isclose(self.points[:, 0], time)]
+
+    def get_npoints(self):
+        if not self.discreted:
+            raise ValueError(
+                "This interface is only valid within dicreted domain.")
+        return self.points.size // (
+            self.dim +
+            1) if self.time_dependent else self.points.size // self.dim
+
+
+class Interval(Domain):
+
+    def __init__(self, origin, extent):
+        super(Interval, self).__init__(1, False, False)
+        self.origin = np.array([origin], dtype=self.dtype)
+        self.extent = np.array([extent], dtype=self.dtype)
+
+    def _sample_interior_points(self, n, sampler):
+        if 'uniform' == sampler:
+            points = np.linspace(0.0,
+                                 1.0,
+                                 num=n + 1,
+                                 endpoint=False,
+                                 dtype=self.dtype)[1:].reshape(n, 1)
+        else:
+            points = sample(n, 1, sampler, self.dtype)
+        ret = points * self.extent + self.origin
+        return ret
+
+    def _sample_boundary_points(self, n, sampler):
+        if 2 == n:
+            return np.array([self.origin,
+                             self.origin + self.extent]).astype(self.dtype)
+        if 'uniform' == sampler:
+            xl = np.full(((n + 1) // 2, 1), self.origin).astype(self.dtype)
+            xr = np.full((n - (n + 1) // 2, 1),
+                         self.origin + self.extent).astype(self.dtype)
+            return np.vstack((xl, xr))
+        else:
+            rng = np.random.default_rng(cfg.seed)
+            return rng.choice([self.origin, self.origin + self.extent],
+                              n).astype(self.dtype)
+
+    def discrete(self, interior, boundary, sampler='uniform'):
+        # Step1: some checks.
+        if self.discreted:
+            raise ValueError(
+                "Do not support discrete a domian more than once.")
+
+        if self.time_dependent:
+            raise ValueError(
+                "A domain should not be time dependent before discrete process."
+            )
+
+        if interior < 0 or boundary < 0:
+            raise ValueError(
+                "Invalid arguments, should sample at least 0 points from interior and boundary."
+            )
+
+        # Step2: discrete process
+        self.interior_points = self._sample_interior_points(
+            interior, sampler) if interior != 0 else np.zeros(
+                (0, self.dim), self.dtype)
+        if self.interior_points.shape[0] != interior:
+            warnings.warn(
+                "Expected {x} interior points, but get {y} interior points".
+                format(x=interior, y=self.interior_points.shape[0]))
+        self.boundary_points = self._sample_boundary_points(
+            boundary, sampler) if boundary != 0 else np.zeros(
+                (0, self.dim), self.dtype)
+        if self.boundary_points.shape[0] != boundary:
+            warnings.warn(
+                "Expected {x} boundary points, but get {y} boundary points".
+                format(x=boundary, y=self.boundary_points.shape[0]))
+
+        self.points = np.concatenate(
+            (self.interior_points, self.boundary_points), axis=0)
+
+        self.discreted = True
+        return self
+
+
+# Helper class to compute Jacobian matrix for tensor with shape x[bsize, m] and y[bsize, n]
+class Jacobian(object):
+
+    def __init__(self, x, y):
+        super(Jacobian, self).__init__()
+        # Ensure that the shapes of x and y match
+        if len(x.shape) != 2 or len(y.shape) != 2:
+            raise ValueError(
+                'Invalid shape, the number of dimensions of x and y should be 2.'
+            )
+        if x.shape[0] != y.shape[0]:
+            raise ValueError(
+                'Invalid shape, the first dimension of x and y should be the same.'
+            )
+        self.bsize = x.shape[0]
+        self.x = x
+        self.y = y
+        self.J = [None for _ in range(self.y.nelement() // self.bsize)]
+
+    def _check_items(self, items):
+        if isinstance(items, int):
+            return True
+        elif isinstance(items, tuple) or isinstance(items, list):
+            if len(items) != 2:
+                return False
+            for item in items:
+                if not isinstance(item, int):
+                    return False
+        else:
+            return False
+        return True
+
+    def __getitem__(self, items):
+        if not self._check_items(items):
+            raise TypeError(
+                'Invalid item, should be int or tuple/list of length 2.')
+
+        # Compute the gradient only if it hasn't been computed yet
+        i = items if isinstance(items, int) else items[0]
+        if self.J[i] is None:
+            self.J[i], = flow.autograd.grad(self.y[:, i],
+                                            self.x,
+                                            flow.ones_like(self.y[:, i]),
+                                            create_graph=True,
+                                            retain_graph=True)
+        return self.J[items] if isinstance(
+            items, int) else self.J[items[0]][:, items[1]]
+
+
+class PDEs(object):
+
+    def __init__(self, num_pdes, variables=None):
+        self.num_pdes = num_pdes
+        self.variables = variables
+
+    def get_num_pdes(self):
+        return self.num_pdes
+
+    def get_variables(self):
+        return self.variables
+
+    def evaluate(self, inputs, outputs, inputs_names=None, outputs_names=None):
+        raise NotImplementedError("Implement in PDEs subclass")
+
+
+class Variables(flow.nn.Module):
+
+    def __init__(self, name_value_map=dict()):
+        super(Variables, self).__init__()
+        self.names = []
+        for key in name_value_map:
+            self.names.append(key)
+            self.register_parameter(
+                key,
+                flow.nn.parameter.Parameter(flow.tensor(name_value_map[key])))
+        self.to(cfg.get_device())
+
+
+# dx_dt = C1(y - x)
+# dy_dt = x(C2 - z) - y
+# dz_dt = x * y - C3 * z
+class UndeterminedLorenzSystem(PDEs):
+
+    def __init__(self, variables):
+        super(UndeterminedLorenzSystem, self).__init__(3, variables)
+
+    def evaluate(self,
+                 inputs,
+                 outputs,
+                 inputs_names=['t'],
+                 outputs_names=['x', 'y', 'z']):
+
+        pos_t = inputs_names.index('t')
+        pos_x = outputs_names.index('x')
+        pos_y = outputs_names.index('y')
+        pos_z = outputs_names.index('z')
+        x = outputs[:, pos_x]
+        y = outputs[:, pos_y]
+        z = outputs[:, pos_z]
+        C1 = self.variables.C1
+        C2 = self.variables.C2
+        C3 = self.variables.C3
+        jac = Jacobian(inputs, outputs)
+
+        pde_1 = jac[pos_x, pos_t] - C1 * (y - x)
+        pde_2 = jac[pos_y, pos_t] - x * (C2 - z) + y
+        pde_3 = jac[pos_z, pos_t] - x * y + C3 * z
+        return [pde_1, pde_2, pde_3]
+
+
+class BC(object):
+
+    def __init__(self, domain, constrain_func, value_func, boundary_points):
+        if not domain.is_discreted():
+            raise ValueError("The geometry must be discreted")
+        self.domain = domain
+        self.value_func = cache_func(value_func)
+        self.constrain_func = constrain_func
+        self.boundary_points = boundary_points
+
+    def evaluate(self, net, value_indexes):
+        raise NotImplementedError(
+            "Boundary condition evaluate not implement in {}".format(
+                self.__class__.__name__))
+
+
+@cache_func
+def get_boundary_constrain_points(domain,
+                                  X,
+                                  constrain_func,
+                                  with_boundary_normal=False):
+
+    if X is None:
+        boundary_points = domain.get_boundary_points()
+        constrain = constrain_func(boundary_points)
+        if with_boundary_normal:
+            boundary_points = boundary_points[constrain]
+            n = tensor(domain.boundary_normal(boundary_points))
+            boundary_points = tensor(boundary_points)
+            boundary_points.requires_grad = True
+            return boundary_points, n
+        else:
+            return tensor(boundary_points[constrain])
+    else:
+        constrain = domain.is_on_boundary(X) * constrain_func(X)
+        if with_boundary_normal:
+            boundary_points = X[constrain]
+            n = tensor(domain.boundary_normal(boundary_points))
+            boundary_points = tensor(boundary_points)
+            boundary_points.requires_grad = True
+            return boundary_points, n
+        else:
+            return tensor(X[constrain])
+
+
+class DirichletBC(BC):
+    """Dirichlet boundary conditions: y(x) = func(x).
+    """
+
+    def __init__(self,
+                 domain,
+                 constrain_func,
+                 value_func,
+                 boundary_points=None):
+        super().__init__(domain, constrain_func, value_func, boundary_points)
+
+    def evaluate(self, net, value_indexes):
+        if not check_indexes(value_indexes):
+            raise ValueError("value indexes should be list/tuple of int ")
+
+        boundary_points = get_boundary_constrain_points(
+            self.domain, self.boundary_points, self.constrain_func, False)
+
+        net_out = net(boundary_points)
+        gt = self.value_func(boundary_points)
+        diff = net_out[:, value_indexes] - gt
+        return diff
+
+
+class Supervision(object):
+
+    def __init__(self, x, y):
+        self.dtype_t = cfg.get_dtype_t()
+        self.device = cfg.get_device()
+        self.x = flow.tensor(x, dtype=self.dtype_t, device=self.device)
+        self.y = flow.tensor(y, dtype=self.dtype_t, device=self.device)
+
+    def __getitem__(self, item):
+        if item not in ['x', 'y']:
+            raise ValueError("Invalid item, only support x, y.")
+
+        return getattr(self, item)
+
+
+class AISolver(object):
+
+    def __init__(self, algorithm, network, loss, optimizer, checkpoint_path):
+        self.algorithm = algorithm
+        self.network = network
+        self.loss = loss
+        self.optimizer = optimizer
+        self.checkpoint_path = checkpoint_path
+
+    def set_optimizer(self, optimizer):
+        self.optimizer = optimizer
+
+    def save_checkpoint(self, path):
+        save_checkpoint(path, self.network, self.optimizer)
+
+    def load_checkpoint(self, path):
+        load_checkpoint(path, self.network, self.optimizer)
+
+    def train(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+    def predict(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+    def visualize(self):
+        raise NotImplementedError("Implement in AISolver subclass")
+
+
+class PINNSolver(AISolver):
+
+    def __init__(self,
+                 network,
+                 loss,
+                 optimizer,
+                 domain,
+                 pdes,
+                 inputs_names,
+                 outputs_names,
+                 bcs=[],
+                 bc_indexes=[],
+                 ics=[],
+                 ic_indexes=[],
+                 sups=[],
+                 checkpoint_path='./log'):
+        super(PINNSolver, self).__init__('PINNs', network, loss, optimizer,
+                                         checkpoint_path)
+        self.domain = domain
+        self.pdes = pdes
+        self.inputs_names = inputs_names
+        self.outputs_names = outputs_names
+        self.bcs = bcs
+        self.bc_indexes = bc_indexes
+        self.ics = ics
+        self.ic_indexes = ic_indexes
+        self.sups = sups
+
+    def save_checkpoint(self, path):
+        save_checkpoint(path, self.network, self.optimizer,
+                        self.pdes.get_variables())
+
+    def load_checkpoint(self, path):
+        load_checkpoint(path, self.network, self.optimizer,
+                        self.pdes.get_variables())
+
+    def train(self, num_epoch, log_frequency=100, checkpoint_frequency=1000):
+        for idx in range(num_epoch):
+            self.optimizer.zero_grad()
+            # PDE items
+            inputs_interior = tensor(self.domain.get_interior_points())
+            inputs_interior.requires_grad = True
+            outputs_interior = self.network(inputs_interior)
+            pde_items = self.pdes.evaluate(inputs_interior, outputs_interior,
+                                           self.inputs_names,
+                                           self.outputs_names)
+
+            # bc items
+            bc_items = [
+                bc.evaluate(self.network, indexes)
+                for bc, indexes in zip(self.bcs, self.bc_indexes)
+            ]
+
+            # ic items
+            ic_items = [
+                ic.evaluate(self.network, indexes)
+                for ic, indexes in zip(self.ics, self.ic_indexes)
+            ]
+
+            # sup items
+            sup_items = [
+                self.network(sup['x']) - sup['y'] for sup in self.sups
+            ]
+
+            losses, loss = self.loss.evaluate(pde_items + bc_items + ic_items +
+                                              sup_items)
+            loss.backward()
+            self.optimizer.step()
+
+            if (idx + 1) % log_frequency == 0:
+                print(
+                    f"num_epoch: {idx + 1}, loss: {loss.detach().cpu().numpy():.7g}"
+                )
+                print(f"sub losses:")
+                for loss_el in losses:
+                    print(f"{loss_el.detach().cpu().numpy():.7g}")
+
+                variables = self.pdes.get_variables()
+                if variables is not None:
+                    print(f"variables:")
+                    for name in variables.names:
+                        print(
+                            f"{name}: {getattr(variables, name).detach().cpu().numpy():.7g}"
+                        )
+            if (idx + 1) % checkpoint_frequency == 0:
+                self.save_checkpoint(self.checkpoint_path + '/checkpoint_' +
+                                     str(idx + 1) + '.pt')
+
+    def evaluate(self):
+        variables = self.pdes.get_variables()
+        if variables is not None:
+            print(f"Evaluate variables:")
+            for name in variables.names:
+                print(
+                    f"{name}: {getattr(variables, name).detach().cpu().numpy():.7g}"
+                )
diff --git a/science/lorenz_system/core/utils.py b/science/lorenz_system/core/utils.py
new file mode 100644
index 00000000..7c614b1c
--- /dev/null
+++ b/science/lorenz_system/core/utils.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import itertools
+import numpy as np
+import oneflow as flow
+from functools import wraps
+
+
+class Config(object):
+
+    def __init__(self):
+        super(Config, self).__init__()
+        self.allow_dtypes = ['float32', 'float64']
+        self.dtype = 'float32'
+        self.dtype_n = np.float32
+        self.dtype_t = flow.float32
+        self.device = flow.device(
+            'cuda:0' if flow.cuda.is_available() else 'cpu')
+
+        self.seed = None
+        flow.set_default_tensor_type(flow.FloatTensor)
+
+    def get_dtype(self):
+        return self.dtype
+
+    def get_dtype_n(self):
+        return self.dtype_n
+
+    def get_dtype_t(self):
+        return self.dtype_t
+
+    def set_dtype(self, dtype):
+        if dtype not in self.allow_dtypes:
+            raise ValueError(
+                'Invalid default dtype, should be float32 or float64.')
+        self.dtype = dtype
+        self.dtype_n = np.float32 if self.dtype == 'float32' else np.float64
+        self.dtype_t = flow.float32 if self.dtype == 'float32' else flow.float64
+        flow.set_default_tensor_type(flow.FloatTensor if self.dtype ==
+                                     'float32' else flow.DoubleTensor)
+
+    def set_seed(self, seed):
+        self.seed = seed
+        np.random.seed(seed)
+        flow.manual_seed(seed)
+
+    def set_device(self, device):
+        self.device = flow.device(device)
+
+    def get_device(self):
+        return self.device
+
+
+cfg = Config()
+
+
+def sample(num_samples, dim, method, dtype):
+    if method is not 'uniform':
+        raise ValueError('Invalid method, only support uniform.')
+
+    # do not include start point 0, and end point
+    n = int(np.ceil(num_samples**(1 / dim)))
+    sample_list = [
+        np.linspace(0.0, 1.0, num=n + 1, endpoint=False, dtype=dtype)[1:]
+        for _ in range(dim)
+    ]
+
+    ret = list(itertools.product(*sample_list))
+    return np.array(ret, dtype=dtype).reshape(-1, dim)
+
+
+def cache_func(func):
+    cache = {}
+
+    @wraps(func)
+    def wrapper_cache(*args):
+        # id(args) will have error, will be the same
+        # generator for will have bug, will not be the same
+        key = ' '.join([str(id(arg)) for arg in args])
+        if key not in cache:
+            cache[key] = func(*args)
+        return cache[key]
+
+    return wrapper_cache
+
+
+def check_indexes(indexes):
+    if isinstance(indexes, tuple) or isinstance(indexes, list):
+        for index in indexes:
+            if not isinstance(index, int):
+                return False
+    else:
+        return False
+    return True
+
+
+def tensor(data,
+           dtype=None,
+           device=None,
+           requires_grad=False,
+           pin_memory=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.tensor(data,
+                       dtype=dtype,
+                       device=device,
+                       requires_grad=requires_grad,
+                       pin_memory=pin_memory)
+    return data
+
+
+def ones(*size, dtype=None, device=None, requires_grad=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.ones(*size,
+                     dtype=dtype,
+                     device=device,
+                     requires_grad=requires_grad)
+    return data
+
+
+def zeros(*size, dtype=None, device=None, requires_grad=False):
+
+    if dtype is None:
+        dtype = cfg.get_dtype_t()
+    if device is None:
+        device = cfg.get_device()
+
+    data = flow.zeros(*size,
+                      dtype=dtype,
+                      device=device,
+                      requires_grad=requires_grad)
+    return data
+
+
+def save_checkpoint(path, net, opt, var=None):
+    if not os.path.exists(os.path.dirname(path)):
+        os.makedirs(os.path.dirname(path))
+    if var is None:
+        flow.save(
+            {
+                'net_state_dict': net.state_dict(),
+                'opt_state_dict': opt.state_dict()
+            }, path)
+    else:
+        flow.save(
+            {
+                'net_state_dict': net.state_dict(),
+                'opt_state_dict': opt.state_dict(),
+                'var_state_dict': var.state_dict()
+            }, path)
+
+
+def load_checkpoint(path, net, opt, var=None):
+    checkpoint = flow.load(path)
+    net.load_state_dict(checkpoint['net_state_dict'])
+    opt.load_state_dict(checkpoint['opt_state_dict'])
+    if var is not None:
+        var.load_state_dict(checkpoint['var_state_dict'])
diff --git a/science/lorenz_system/data/lorenz.npz b/science/lorenz_system/data/lorenz.npz
new file mode 100644
index 00000000..8ec09fd3
Binary files /dev/null and b/science/lorenz_system/data/lorenz.npz differ
diff --git a/science/lorenz_system/infer.sh b/science/lorenz_system/infer.sh
new file mode 100755
index 00000000..941bb345
--- /dev/null
+++ b/science/lorenz_system/infer.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python3 main.py --type="infer" --pretrained=True
diff --git a/science/lorenz_system/main.py b/science/lorenz_system/main.py
new file mode 100644
index 00000000..cf92ecbb
--- /dev/null
+++ b/science/lorenz_system/main.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import oneflow as flow
+from core.utils import ones
+from core.nn import FC, WeightedL2
+from core.pinns import Interval, Variables, UndeterminedLorenzSystem, DirichletBC, Supervision, PINNSolver
+import argparse
+import wget
+import os
+
+parser = argparse.ArgumentParser(description='manual to this script')
+parser.add_argument("--type", type=str, default="train")
+parser.add_argument("--pretrained", type=bool, default=False)
+args = parser.parse_args()
+
+
+
+
+# set bc
+def bc_constrain_func(x):
+    return np.isclose(x[:, 0], np.zeros(x.shape[0]))
+
+
+def bc_value_func(x):
+    v1 = ones(x.shape[0]) * (-8.0)
+    v2 = ones(x.shape[0]) * (7.0)
+    v3 = ones(x.shape[0]) * (27.0)
+    return flow.stack((v1, v2, v3), dim=1)
+
+def build_model():
+    # define geometry
+    domain = Interval(0, 3).discrete(interior=500, boundary=2)
+
+    # define pde
+    var = Variables(name_value_map={'C1': 1.0, 'C2': 1.0, 'C3': 1.0})
+    pde = UndeterminedLorenzSystem(var)
+
+    bc = DirichletBC(domain, bc_constrain_func, bc_value_func)
+
+    # define sup
+    data = np.load("data/lorenz.npz")
+    sup = Supervision(data['t'], data['y'])
+
+    # define nn
+    net = FC(num_ins=1,
+            num_outs=3,
+            num_layers=5,
+            hiden_size=40,
+            activiation=flow.nn.Tanh())
+
+    # define loss
+    loss = WeightedL2(weights=[1, 1, 1, 1, 100])
+
+    # define optimizer
+    opt = flow.optim.Adam(
+        list(net.parameters()) + list(pde.get_variables().parameters()), 0.001)
+
+    # define solver
+    solver = PINNSolver(network=net,
+                        loss=loss,
+                        optimizer=opt,
+                        domain=domain,
+                        pdes=pde,
+                        inputs_names=['t'],
+                        outputs_names=['x', 'y', 'z'],
+                        bcs=[bc],
+                        bc_indexes=[
+                            [0, 1, 2],
+                        ],
+                        sups=[sup])
+    return solver
+
+
+if __name__ == "__main__":
+    solver = build_model()
+    # load pretrained
+    if args.pretrained:
+        print("Load checkpoint")
+        if not os.path.isfile('lorenz_system.of'):
+            url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/lorenz_system.of"
+            wget.download(url,'lorenz_system.of')
+        solver.load_checkpoint('lorenz_system.of')
+
+    if args.type=="train":
+        # train
+        print("Start train")
+        solver.train(num_epoch=20000, log_frequency=100, checkpoint_frequency=1000)
+    elif args.type=="infer":
+        # infer
+        print("Start infer")
+        solver.evaluate()
\ No newline at end of file
diff --git a/science/lorenz_system/requirements.txt b/science/lorenz_system/requirements.txt
new file mode 100644
index 00000000..e7ef04f9
--- /dev/null
+++ b/science/lorenz_system/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.24.3
+wget==3.2
diff --git a/science/lorenz_system/train.sh b/science/lorenz_system/train.sh
new file mode 100755
index 00000000..a125badc
--- /dev/null
+++ b/science/lorenz_system/train.sh
@@ -0,0 +1,15 @@
+# Copyright (c) 2023 OneFlow Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+python3 main.py --type="train"
diff --git a/video/README.md b/video/README.md
new file mode 100644
index 00000000..b28a9f1b
--- /dev/null
+++ b/video/README.md
@@ -0,0 +1 @@
+# Video Models
diff --git a/vidio/README.md b/vidio/README.md
deleted file mode 100644
index c68f53e9..00000000
--- a/vidio/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# Science Models