-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #56 from archlitchi/dev-vgpu-1219
add implementation for libvgpu_so and corresponding CI
- Loading branch information
Showing
84 changed files
with
12,015 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,3 +129,7 @@ kubernetes.tar.gz | |
# test coverage file | ||
coverage.txt | ||
|
||
updateso.sh | ||
|
||
lib/nvidia/libvgpu/build | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
cmake_minimum_required (VERSION 2.8.11) | ||
project (libvgpu) | ||
|
||
macro(get_git_hash _git_hash) | ||
find_package(Git QUIET) | ||
if(GIT_FOUND) | ||
execute_process( | ||
COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%h | ||
OUTPUT_VARIABLE ${_git_hash} | ||
OUTPUT_STRIP_TRAILING_WHITESPACE | ||
ERROR_QUIET | ||
WORKING_DIRECTORY | ||
${CMAKE_CURRENT_SOURCE_DIR} | ||
) | ||
endif() | ||
endmacro() | ||
|
||
macro(get_git_branch _git_branch) | ||
find_package(Git QUIET) | ||
if(GIT_FOUND) | ||
execute_process( | ||
COMMAND ${GIT_EXECUTABLE} symbolic-ref --short -q HEAD | ||
OUTPUT_VARIABLE ${_git_branch} | ||
OUTPUT_STRIP_TRAILING_WHITESPACE | ||
# ERROR_QUIET | ||
WORKING_DIRECTORY | ||
${CMAKE_CURRENT_SOURCE_DIR} | ||
) | ||
endif() | ||
endmacro() | ||
|
||
set(GIT_HASH "") | ||
get_git_hash(GIT_HASH) | ||
message(STATUS "Git hash is ${GIT_HASH}") | ||
|
||
if (NOT DEFINED ENV{CI_COMMIT_REF_NAME}) | ||
set(GIT_BRANCH "") | ||
get_git_branch(GIT_BRANCH) | ||
else() | ||
set(GIT_BRANCH $ENV{CI_COMMIT_REF_NAME}) | ||
endif() | ||
|
||
if(GIT_FOUND) | ||
#string(REPLACE "." "_" GIT_BRANCH ${GIT_BRANCH}) | ||
#string(REPLACE "-" "_" GIT_BRANCH ${GIT_BRANCH}) | ||
#message(STATUS "Git branch is ${GIT_BRANCH}") | ||
endif() | ||
|
||
configure_file(src/static_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/static_config.h) | ||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/config) | ||
|
||
if (NOT DEFINED ENV{CUDA_HOME}) | ||
set(CUDA_HOME /usr/local/cuda) | ||
else() | ||
set(CUDA_HOME $ENV{CUDA_HOME}) | ||
endif() | ||
|
||
|
||
if (CMAKE_BUILD_TYPE STREQUAL Debug) | ||
set(LIBRARY_COMPILE_FLAGS -shared -fPIC -g -D_GNU_SOURCE -Wall) | ||
set(TEST_COMPILE_FLAGS -o1) | ||
else() | ||
set(LIBRARY_COMPILE_FLAGS -shared -fPIC -D_GNU_SOURCE -fvisibility=hidden -Wall) | ||
set(TEST_COMPILE_FLAGS -o1) | ||
endif() | ||
|
||
include_directories(${CUDA_HOME}/include) | ||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) | ||
|
||
add_subdirectory(src) | ||
add_subdirectory(test) | ||
|
||
if (BUILD_DOCKER STREQUAL true) | ||
add_subdirectory(dockerfiles) | ||
endif() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Hook library for CUDA Environments | ||
|
||
![image](docs/images/sample_nvidia-smi.png) | ||
|
||
|
||
## Build | ||
|
||
```bash | ||
sh build.sh | ||
``` | ||
|
||
## Build in Docker | ||
|
||
```bash | ||
docker build . | ||
``` | ||
|
||
## Usage | ||
|
||
_CUDA_DEVICE_MEMORY_LIMIT_ indicates the upper limit of device memory (eg 1g,1024m,1048576k,1073741824) | ||
|
||
_CUDA_DEVICE_SM_LIMIT_ indicates the sm utility percentage of each device | ||
|
||
```bash | ||
# Add 1GB bytes limit And set max sm utility to 50% for all devices | ||
export LD_PRELOAD=./libvgpu.so | ||
export CUDA_DEVICE_MEMORY_LIMIT=1g | ||
export CUDA_DEVICE_SM_LIMIT=50 | ||
``` | ||
|
||
## Docker Images | ||
```bash | ||
# Make docker image | ||
docker build . -f=dockerfiles/Dockerfile-tf1.8-cu90 | ||
|
||
# Launch the docker image | ||
export DEVICE_MOUNTS="--device /dev/nvidia0:/dev/nvidia0 --device /dev/nvidia-uvm:/dev/nvidia-uvm --device /dev/nvidiactl:/dev/nvidiactl" | ||
export LIBRARY_MOUNTS="-v /usr/cuda_files:/usr/cuda_files -v $(which nvidia-smi):/bin/nvidia-smi" | ||
|
||
docker run ${LIBRARY_MOUNTS} ${DEVICE_MOUNTS} -it \ | ||
-e CUDA_DEVICE_MEMORY_LIMIT=2g \ | ||
cuda_vmem:tf1.8-cu90 \ | ||
python -c "import tensorflow; tensorflow.Session()" | ||
``` | ||
|
||
## Log | ||
|
||
Use environment variable LIBCUDA_LOG_LEVEL to set the visibility of logs | ||
|
||
| LIBCUDA_LOG_LEVEL | description | | ||
| ----------------- | ----------- | | ||
| not set | errors,warnings,messages | | ||
| 3 | infos,errors,warnings,messages | | ||
| 4 | debugs,errors,warnings,messages | | ||
|
||
## Test with Frameworks | ||
|
||
Run operations which requires at least 4GB device memory, thus will OOM under 1GB limit | ||
|
||
- TensorFlow | ||
|
||
```bash | ||
python test/python/limit_tensorflow.py --device=0 --tensor_shape=1024,1024,1024 | ||
``` | ||
|
||
- TensorFlow 2.0 | ||
|
||
```bash | ||
python test/python/limit_tensorflow2.py --device=0 --tensor_shape=1024,1024,1024 | ||
``` | ||
|
||
- Pytorch | ||
|
||
```bash | ||
python test/python/limit_pytorch.py --device=0 --tensor_shape=1024,1024,1024 | ||
``` | ||
|
||
- MxNet | ||
|
||
```bash | ||
python test/python/limit_mxnet.py --device=0 --tensor_shape=1024,1024,1024 | ||
``` | ||
|
||
## Test Raw APIs | ||
|
||
```bash | ||
./test/test_alloc | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/bash | ||
root_dir=$(cd $(dirname $0); pwd) | ||
cd $root_dir | ||
|
||
mkdir -p build; cd build | ||
|
||
[[ -z "$J" ]] && J=`nproc | awk '{print int(($0 + 1)/ 2)}'` | ||
CMAKE=${CMAKE-"cmake3"} | ||
CMAKE_OPTIONS=${CMAKE_OPTIONS-""} | ||
#CMAKE_OPTIONS+=" -DMEMORY_LIMIT_DEBUG=1" | ||
CMAKE_OPTIONS+=" -DDLSYM_HOOK_ENABLE=1" | ||
#CMAKE_OPTIONS+=" -DDLSYM_HOOK_DEBUG=1" | ||
CMAKE_OPTIONS+=" -DMULTIPROCESS_LIMIT_ENABLE=1" | ||
CMAKE_OPTIONS+=" -DHOOK_MEMINFO_ENABLE=1" | ||
CMAKE_OPTIONS+=" -DHOOK_NVML_ENABLE=1" | ||
CMAKE_OPTIONS+=" -DCMAKE_BUILD_TYPE=Debug" | ||
|
||
#CMAKE_OPTIONS+=" -DBUILD_DOCKER=true" | ||
# gitlab ci | ||
CI_COMMIT_BRANCH=${CI_COMMIT_BRANCH-""} | ||
CI_COMMIT_SHA=${CI_COMMIT_SHA-""} | ||
|
||
# jenkins | ||
if [ -z $CI_COMMIT_BRANCH ]; then | ||
CI_COMMIT_BRANCH=${BRANCH_NAME-"unknown"} | ||
fi | ||
if [ -z $CI_COMMIT_SHA ]; then | ||
CI_COMMIT_SHA=$(git describe --abbrev=100 --always) | ||
if [ $? -ne 0 ]; then | ||
CI_COMMIT_SHA="unknown" | ||
fi | ||
fi | ||
|
||
echo "CI_COMMIT_BRANCH:${CI_COMMIT_BRANCH}" | ||
echo "CI_COMMIT_SHA:${CI_COMMIT_SHA}" | ||
|
||
${CMAKE} .. ${CMAKE_OPTIONS} -DTEST_DEVICE_ID=0 | ||
make -j$J |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 | ||
COPY . /libvgpu | ||
WORKDIR /libvgpu | ||
RUN apt-get -y update; apt-get -y install wget | ||
RUN wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-aarch64.tar.gz | ||
RUN tar -xf cmake-3.19.8-Linux-aarch64.tar.gz | ||
RUN cp /libvgpu/cmake-3.19.8-Linux-aarch64/bin/cmake /libvgpu/cmake-3.19.8-Linux-aarch64/bin/cmake3 | ||
ENV PATH="/libvgpu/cmake-3.19.8-Linux-aarch64/bin:${PATH}" | ||
RUN rm -r ./build | ||
RUN apt-get -y install openssl libssl-dev | ||
RUN bash ./build.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 | ||
COPY . /libvgpu | ||
WORKDIR /libvgpu | ||
RUN apt-get -y update; apt-get -y install wget | ||
RUN wget https://cmake.org/files/v3.19/cmake-3.19.8-Linux-x86_64.tar.gz | ||
RUN tar -xf cmake-3.19.8-Linux-x86_64.tar.gz | ||
RUN cp /libvgpu/cmake-3.19.8-Linux-x86_64/bin/cmake /libvgpu/cmake-3.19.8-Linux-x86_64/bin/cmake3 | ||
ENV PATH="/libvgpu/cmake-3.19.8-Linux-x86_64/bin:${PATH}" | ||
RUN rm -r ./build | ||
RUN apt-get -y install openssl libssl-dev | ||
RUN bash ./build.sh |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
FROM alpine:latest | ||
|
||
ADD vgpu_test /vgpu_test | ||
ENTRYPOINT ["./vgpu_test.test"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# vgpu_test | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package clients | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
|
||
"github.com/docker/docker/api/types" | ||
"github.com/docker/docker/api/types/container" | ||
"github.com/docker/docker/api/types/mount" | ||
client "github.com/docker/docker/client" | ||
) | ||
|
||
func GetDockerClient() *client.Client { | ||
cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation()) | ||
if err != nil { | ||
panic(err) | ||
} | ||
return cli | ||
} | ||
|
||
func RunContainer(ctx context.Context, image string, cmd []string, env []string, mount []mount.Mount, shmSize int64) string { | ||
client := GetDockerClient() | ||
resp, err := client.ContainerCreate(ctx, &container.Config{ | ||
Image: image, | ||
Cmd: cmd, | ||
Env: env, | ||
}, &container.HostConfig{ | ||
Mounts: mount, | ||
ShmSize: shmSize, | ||
}, nil, nil, "") | ||
if err != nil { | ||
panic(err) | ||
} | ||
if err := client.ContainerStart(ctx, resp.ID, types.ContainerStartOptions{}); err != nil { | ||
panic(err) | ||
} | ||
statusCh, errCh := client.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning) | ||
select { | ||
case err := <-errCh: | ||
if err != nil { | ||
panic(err) | ||
} | ||
case <-statusCh: | ||
} | ||
|
||
out, err := client.ContainerLogs(ctx, resp.ID, types.ContainerLogsOptions{ShowStdout: true}) | ||
if err != nil { | ||
panic(err) | ||
} | ||
buf := new(bytes.Buffer) | ||
buf.ReadFrom(out) | ||
return buf.String() | ||
} |
Oops, something went wrong.