Skip to content

Commit

Permalink
ODSC-47199/Adds supporting GPU image for the operators. (#352)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrDzurb authored Sep 26, 2023
2 parents 8b5ff81 + eacc574 commit d0669f5
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 77 deletions.
6 changes: 3 additions & 3 deletions ads/common/object_storage_details.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
Expand All @@ -15,7 +15,7 @@
from ads.common import oci_client


class InvalidObjectStoragePath(Exception): # pragma: no cover
class InvalidObjectStoragePath(Exception): # pragma: no cover
"""Invalid Object Storage Path."""

pass
Expand Down Expand Up @@ -137,4 +137,4 @@ def is_oci_path(uri: str = None) -> bool:
"""
if not uri:
return False
return uri.startswith("oci://")
return uri.lower().startswith("oci://")
12 changes: 8 additions & 4 deletions ads/opctl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
import ads.opctl.operator.cli
import ads.opctl.spark.cli
from ads.common import auth as authutil
from ads.common.auth import AuthContext, AuthType
from ads.common.auth import AuthType
from ads.common.object_storage_details import ObjectStorageDetails
from ads.opctl.cmds import activate as activate_cmd
from ads.opctl.cmds import apply as apply_cmd
from ads.opctl.cmds import cancel as cancel_cmd
Expand All @@ -30,7 +31,6 @@
from ads.opctl.cmds import run as run_cmd
from ads.opctl.cmds import run_diagnostics as run_diagnostics_cmd
from ads.opctl.cmds import watch as watch_cmd
from ads.opctl.config.base import ConfigProcessor
from ads.opctl.config.merger import ConfigMerger
from ads.opctl.constants import (
BACKEND_NAME,
Expand Down Expand Up @@ -779,7 +779,9 @@ def apply(debug: bool, **kwargs: Dict[str, Any]) -> None:
operator_spec = {}
backend = kwargs.pop("backend")

auth = authutil.default_signer()
auth = {}
if any(ObjectStorageDetails.is_oci_path(uri) for uri in (kwargs["file"], backend)):
auth = authutil.default_signer()

with fsspec.open(kwargs["file"], "r", **auth) as f:
operator_spec = suppress_traceback(debug)(yaml.safe_load)(f.read())
Expand All @@ -788,7 +790,9 @@ def apply(debug: bool, **kwargs: Dict[str, Any]) -> None:
with fsspec.open(backend, "r", **auth) as f:
backend = suppress_traceback(debug)(yaml.safe_load)(f.read())

suppress_traceback(debug)(apply_cmd)(operator_spec, backend, **kwargs)
suppress_traceback(debug)(apply_cmd)(
config=operator_spec, backend=backend, **kwargs
)


commands.add_command(ads.opctl.conda.cli.commands)
Expand Down
52 changes: 0 additions & 52 deletions ads/opctl/docker/merge_dependencies.py

This file was deleted.

15 changes: 8 additions & 7 deletions ads/opctl/docker/operator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@

FROM ghcr.io/oracle/oraclelinux8-instantclient:21 as base

RUN rm -rf /var/cache/yum/* && yum clean all && yum install -y gcc make patch vim iproute net-tools git && rm -rf /var/cache/yum/*
RUN \
rm -rf /var/cache/yum/* && \
yum install -y gcc make patch vim iproute net-tools git && \
yum clean all && \
rm -rf /var/cache/yum/*

########################### CONDA INSTALLATION ########################################
RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
ENV PATH="/miniconda/bin:$PATH"
Expand All @@ -21,17 +27,12 @@ ENV PATH="/miniconda/envs/${CONDA_ENV_NAME}}/bin:$PATH"

RUN conda init bash && source ~/.bashrc && conda activate ${CONDA_ENV_NAME}

########################### SETUP WORKDIR ########################################
RUN mkdir ${OPERATOR_DIR}
# COPY ./artifacts/* ${OPERATOR_DIR}/

ENV OPERATOR_DIR=${OPERATOR_DIR}
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}

# RUN if [ -f ${OPERATOR_DIR}/oracle_ads*.whl ]; then \
# local_whl=$(find ${OPERATOR_DIR} -name "*.whl" -exec basename {} \; | head -n 1 ); \
# source ~/.bashrc && conda activate ${CONDA_ENV_NAME} && pip install ${OPERATOR_DIR}/$local_whl; \
# fi

WORKDIR ${OPERATOR_DIR}

RUN echo "conda activate $CONDA_ENV_NAME">>/root/.bashrc
Expand Down
59 changes: 52 additions & 7 deletions ads/opctl/docker/operator/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,57 @@

FROM ghcr.io/oracle/oraclelinux8-instantclient:21 as base

RUN rm -rf /var/cache/yum/* && yum clean all && yum install -y gcc make patch vim iproute net-tools git && rm -rf /var/cache/yum/*
RUN \
rm -rf /var/cache/yum/* && \
yum install -y gcc make patch vim iproute net-tools git && \
yum clean all && \
rm -rf /var/cache/yum/*

########################### CUDA INSTALLATION ########################################
#Reference: https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/runtime/cudnn7/Dockerfile
#Reference: https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/runtime/Dockerfile
#Reference: https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/base/Dockerfile

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

COPY cuda.repo /etc/yum.repos.d/cuda.repo

ENV CUDA_VERSION 10.1.243

ENV CUDA_PKG_VERSION 10-1-$CUDA_VERSION-1
# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
RUN yum install -y \
cuda-cudart-$CUDA_PKG_VERSION \
cuda-compat-10-1 \
&& \
ln -s cuda-10.1 /usr/local/cuda && \
rm -rf /var/cache/yum/*

# nvidia-docker 1.0
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64

# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411"

ENV CUDNN_VERSION 7.6.5.32
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"

RUN CUDNN_DOWNLOAD_SUM=7eaec8039a2c30ab0bc758d303588767693def6bf49b22485a2c00bf2e136cb3 && \
curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.6.5/cudnn-10.1-linux-x64-v7.6.5.32.tgz -O && \
echo "$CUDNN_DOWNLOAD_SUM cudnn-10.1-linux-x64-v7.6.5.32.tgz" | sha256sum -c - && \
tar --no-same-owner -xzf cudnn-10.1-linux-x64-v7.6.5.32.tgz -C /usr/local --wildcards 'cuda/lib64/libcudnn.so.*' && \
rm cudnn-10.1-linux-x64-v7.6.5.32.tgz && \
ldconfig

########################### CONDA INSTALLATION ########################################
RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
ENV PATH="/miniconda/bin:$PATH"
Expand All @@ -21,17 +71,12 @@ ENV PATH="/miniconda/envs/${CONDA_ENV_NAME}}/bin:$PATH"

RUN conda init bash && source ~/.bashrc && conda activate ${CONDA_ENV_NAME}

########################### SETUP WORKDIR ########################################
RUN mkdir ${OPERATOR_DIR}
COPY ./artifacts/* ${OPERATOR_DIR}/

ENV OPERATOR_DIR=${OPERATOR_DIR}
ENV CONDA_ENV_NAME=${CONDA_ENV_NAME}

RUN if [ -f ${OPERATOR_DIR}/oracle_ads*.whl ]; then \
local_whl=$(find ${OPERATOR_DIR} -name "*.whl" -exec basename {} \; | head -n 1 ); \
source ~/.bashrc && conda activate ${CONDA_ENV_NAME} && pip install ${OPERATOR_DIR}/$local_whl; \
fi

WORKDIR ${OPERATOR_DIR}

RUN echo "conda activate $CONDA_ENV_NAME">>/root/.bashrc
Expand Down
6 changes: 6 additions & 0 deletions ads/opctl/docker/operator/cuda.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[cuda]
name=cuda
baseurl=http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
6 changes: 2 additions & 4 deletions ads/opctl/operator/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,7 @@ def build_image(

# load operator info
operator_info: OperatorInfo = OperatorLoader.from_uri(uri=name).load()
logger.info(
f"Building Docker image for the `{operator_info.name}` service operator."
)
logger.info(f"Building Docker image for the `{operator_info.name}` operator.")

# checks if GPU base image needs to be used.
gpu = operator_info.gpu or gpu
Expand Down Expand Up @@ -413,7 +411,7 @@ def build_image(
)

logger.info(
f"The operator image `{result_image_name}` has been successfully built."
f"The operator image `{result_image_name}` has been successfully built. "
"To publish the image to OCI Container Registry run the "
f"`ads opctl operator publish-image -n {result_image_name}` command"
)
Expand Down

0 comments on commit d0669f5

Please sign in to comment.