Skip to content

Commit

Permalink
Wheel skip nccl (#10452)
Browse files Browse the repository at this point in the history
Co-authored-by: oneflow-ci-bot <ci-bot@oneflow.org>
  • Loading branch information
jackalcooper and oneflow-ci-bot authored Mar 19, 2024
1 parent f111e83 commit a130148
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/canary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
- name: Checkout Oneflow-Inc/oneflow
if: ${{ github.event.inputs.oneflow-ref == '' }}
uses: actions/checkout@v2
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux
id: build-cuda
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/on_merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ jobs:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@whl-skip-nccl
name: Update benchmark history
timeout-minutes: 10
8 changes: 4 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
ref: ${{ inputs.branch }}
repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow
token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
name: Find build cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
if: ${{ inputs.is_priv }}
run: |
env
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -174,7 +174,7 @@ jobs:
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -199,7 +199,7 @@ jobs:
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cpu' }}
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/simple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ jobs:
repository: Oneflow-Inc/conda-env
ref: 30a7f00eb48ee9009d85a848e720823e5054c66b
path: conda-env
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build with gcc7
if: ${{ matrix.build-type == 'gcc7'}}
with:
Expand All @@ -253,7 +253,7 @@ jobs:
oneflow-build-env: conda
conda-env-file: conda-env/dev/gcc7/environment-v2.yml
conda-env-name: oneflow-dev-gcc7-v2
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build with clang10
if: ${{ matrix.build-type == 'clang10'}}
with:
Expand Down
38 changes: 19 additions & 19 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -223,7 +223,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -237,7 +237,7 @@ jobs:
run: |
echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
exit 1
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cpu
if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }}
Expand All @@ -259,7 +259,7 @@ jobs:
python-versions: |
3.7
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cpu-sanitizers
if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }}
Expand All @@ -280,7 +280,7 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.8
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build manylinux ${{ matrix.entry }}
id: build-cuda
if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }}
Expand All @@ -300,7 +300,7 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.7
- uses: Oneflow-Inc/get-oneflow@support-py311-py312
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }}
with:
Expand Down Expand Up @@ -339,7 +339,7 @@ jobs:
})
- name: Upload packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -350,7 +350,7 @@ jobs:
dst-dir: cpack
- name: Upload whl
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -375,7 +375,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -406,7 +406,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -488,7 +488,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -504,7 +504,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -514,7 +514,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Get primary node
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/master-address@support-py311-py312
uses: Oneflow-Inc/get-oneflow/master-address@whl-skip-nccl
id: get-primary-node
with:
rank: ${{ matrix.rank }}
Expand Down Expand Up @@ -718,7 +718,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -734,7 +734,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -744,7 +744,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download ASAN and UBSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: asan-ubsan-download-digest
timeout-minutes: 10
with:
Expand All @@ -754,7 +754,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download TSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@support-py311-py312
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
id: tsan-download-digest
timeout-minutes: 10
with:
Expand Down Expand Up @@ -1080,7 +1080,7 @@ jobs:
- name: Benchmark Test
timeout-minutes: 100
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }}
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-py311-py312
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@whl-skip-nccl
with:
collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark
container-name: ${{ env.TEST_CONTAINER_NAME }}
Expand Down Expand Up @@ -1141,7 +1141,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
fetch-depth: 0
- uses: Oneflow-Inc/get-oneflow/cache-complete@support-py311-py312
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand Down
4 changes: 4 additions & 0 deletions cmake/oneflow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ if(BUILD_PYTHON)
pybind11_add_module(oneflow_internal ${PYBIND11_SRCS} ${of_pybind_obj_cc} ${PYBIND_REGISTRY_CC})
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cublas/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cudnn/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/nccl/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH
"\$ORIGIN/../nvidia/cusparse/lib")
set_property(TARGET oneflow_internal APPEND PROPERTY BUILD_RPATH "\$ORIGIN/../nvidia/cufft/lib")
set_compile_options_to_oneflow_target(oneflow_internal)
set_property(TARGET oneflow_internal PROPERTY CXX_VISIBILITY_PRESET "default")
add_dependencies(oneflow_internal of_functional_obj of_functional_tensor_obj of_op_schema)
Expand Down
6 changes: 6 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,16 @@ def get_version():
if "cu11" in ONEFLOW_VERSION and "cu112" not in ONEFLOW_VERSION:
REQUIRED_PACKAGES.append("nvidia-cudnn-cu11")
REQUIRED_PACKAGES.append("nvidia-cublas-cu11")
REQUIRED_PACKAGES.append("nvidia-nccl-cu11")
REQUIRED_PACKAGES.append("nvidia-cusparse-cu11")
REQUIRED_PACKAGES.append("nvidia-cufft-cu11")

if "cu12" in ONEFLOW_VERSION:
REQUIRED_PACKAGES.append("nvidia-cudnn-cu12")
REQUIRED_PACKAGES.append("nvidia-cublas-cu12")
REQUIRED_PACKAGES.append("nvidia-nccl-cu12")
REQUIRED_PACKAGES.append("nvidia-cusparse-cu12")
REQUIRED_PACKAGES.append("nvidia-cufft-cu12")

# if python version < 3.7.x, than need pip install dataclasses
if sys.version_info.minor < 7:
Expand Down

0 comments on commit a130148

Please sign in to comment.