Skip to content

Commit

Permalink
Merge branch 'master' into fix/rename_vllm_dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
mreso authored Sep 24, 2024
2 parents fd71f07 + 6881ec5 commit 8394fd7
Show file tree
Hide file tree
Showing 50 changed files with 776 additions and 258 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmark_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
NEURON_RT_NUM_CORES: 1
run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_neuronx.yaml --skip false
- name: Save benchmark artifacts
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: nightly ${{ matrix.hardware }} artifact
path: /tmp/ts_benchmark
Expand All @@ -72,7 +72,7 @@ jobs:
- name: Update benchmark artifacts for auto validation
run: python benchmarks/utils/update_artifacts.py --output /tmp/ts_artifacts/${{ matrix.hardware }}_benchmark_validation
- name: Upload the updated benchmark artifacts for auto validation
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.hardware }}_benchmark_validation
path: /tmp/ts_artifacts
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/docker-nightly-build.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name: Push Docker Nightly

on:
# run every day at 1:15pm
# Run every day at 1:15pm
schedule:
- cron: "15 13 * * *"
workflow_dispatch:

jobs:
nightly:
runs-on: [self-hosted, ci-gpu]
Expand Down Expand Up @@ -32,12 +33,14 @@ jobs:
- name: Push Docker Nightly
run: |
cd docker
sudo apt-get update
docker buildx use multibuilder
python docker_nightly.py --cleanup
- name: Push KServe Docker Nightly
run: |
cd kubernetes/kserve
docker buildx use multibuilder
python docker_nightly.py --cleanup
- name: Open issue on failure
if: ${{ failure() && github.event_name == 'schedule' }}
uses: dacbd/create-issue-action@v1
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/official_release_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ jobs:
if: github.event.inputs.upload_docker == 'yes'
run: |
cd docker
docker buildx use multibuilder
python build_upload_release.py --cleanup
- name: Build & Upload pytorch/torchserve-kfs Docker images
if: github.event.inputs.upload_kfs == 'yes'
run: |
cd kubernetes/kserve
docker buildx use multibuilder
python build_upload_release.py --cleanup
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,23 @@ Refer to [torchserve docker](docker/README.md) for details.

### 🤖 Quick Start LLM Deployment

#### VLLM Engine
```bash
# Make sure to install torchserve with pip or conda as described above and login with `huggingface-cli login`
python -m ts.llm_launcher --model_id meta-llama/Meta-Llama-3-8B-Instruct --disable_token_auth
python -m ts.llm_launcher --model_id meta-llama/Meta-Llama-3.1-8B-Instruct --disable_token_auth

# Try it out
curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions"
curl -X POST -d '{"model":"meta-llama/Meta-Llama-3.1-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions"
```

#### TRT-LLM Engine
```bash
# Make sure to install torchserve with python venv as described above and login with `huggingface-cli login`
# pip install -U --use-deprecated=legacy-resolver -r requirements/trt_llm.txt
python -m ts.llm_launcher --model_id meta-llama/Meta-Llama-3.1-8B-Instruct --engine trt_llm --disable_token_auth

# Try it out
curl -X POST -d '{"prompt":"count from 1 to 9 in french ", "max_tokens": 100}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model"
```

### 🚢 Quick Start LLM Deployment with Docker
Expand Down
26 changes: 26 additions & 0 deletions benchmarks/utils/system_under_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def start(self):
execute("torchserve --stop", wait=True)
click.secho("*Setting up model store...", fg="green")
self._prepare_local_dependency()
self._clear_neuron_cache_if_exists()
click.secho("*Starting local Torchserve instance...", fg="green")

ts_cmd = (
Expand Down Expand Up @@ -141,6 +142,31 @@ def start(self):
if "Model server started" in str(line).strip():
break

def _clear_neuron_cache_if_exists(self):
cache_dir = "/var/tmp/neuron-compile-cache/"

# Check if the directory exists
if os.path.exists(cache_dir) and os.path.isdir(cache_dir):
click.secho(
f"Directory {cache_dir} exists. Clearing contents...", fg="green"
)

# Remove the directory contents
for filename in os.listdir(cache_dir):
file_path = os.path.join(cache_dir, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
click.secho(f"Failed to delete {file_path}. Reason: {e}", fg="red")
click.secho(f"Cache cleared: {cache_dir}", fg="green")
else:
click.secho(
f"Directory {cache_dir} does not exist. No action taken.", fg="green"
)

def stop(self):
click.secho("*Terminating Torchserve instance...", fg="green")
execute("torchserve --stop", wait=True)
Expand Down
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ COPY ./ serve
RUN \
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
rm -rf serve;\
git clone --recursive $REPO_URL -b $BRANCH_NAME; \
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
fi


Expand Down Expand Up @@ -238,7 +238,7 @@ COPY ./ serve
RUN \
if echo "$LOCAL_CHANGES" | grep -q "false"; then \
rm -rf serve;\
git clone --recursive $REPO_URL -b $BRANCH_NAME; \
git clone --recursive $REPO_URL -b $BRANCH_NAME serve; \
fi

COPY --from=compile-image /home/venv /home/venv
Expand Down
23 changes: 17 additions & 6 deletions docker/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ BUILD_NIGHTLY=false
BUILD_FROM_SRC=false
LOCAL_CHANGES=true
PYTHON_VERSION=3.9
ARCH="linux/arm64,linux/amd64"
MULTI=false

for arg in "$@"
do
Expand Down Expand Up @@ -101,6 +103,10 @@ do
BUILD_CPP=true
shift
;;
-m|--multi)
MULTI=true
shift
;;
-n|--nightly)
BUILD_NIGHTLY=true
shift
Expand Down Expand Up @@ -214,12 +220,17 @@ then
fi
fi

if [ "${BUILD_TYPE}" == "production" ]
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target production-image ../
elif [ "${BUILD_TYPE}" == "ci" ]
if [ "${BUILD_TYPE}" == "production" ]; then
if [ "${MULTI}" == "true" ]; then
DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --platform "${ARCH}" --target production-image ../ --push
else
DOCKER_BUILDKIT=1 docker buildx build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
--build-arg LOCAL_CHANGES="${LOCAL_CHANGES}" -t "${DOCKER_TAG}" --target production-image ../ --load
fi
elif [ "${BUILD_TYPE}" == "ci" ];
then
DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg USE_CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}"\
--build-arg BUILD_NIGHTLY="${BUILD_NIGHTLY}" --build-arg BRANCH_NAME="${BRANCH_NAME}" --build-arg REPO_URL="${REPO_URL}" --build-arg BUILD_FROM_SRC="${BUILD_FROM_SRC}"\
Expand Down
12 changes: 6 additions & 6 deletions docker/build_upload_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
organization = args.organization

# Upload pytorch/torchserve docker binaries
try_and_handle(f"./build_image.sh -t {organization}/torchserve:latest", dry_run)
try_and_handle(f"./build_image.sh -m -t {organization}/torchserve:latest", dry_run)
try_and_handle(
f"./build_image.sh -g -cv cu121 -t {organization}/torchserve:latest-gpu",
dry_run,
Expand All @@ -44,14 +44,17 @@
f"./build_image.sh -bt dev -g -cv cu121 -cpp -t {organization}/torchserve:latest-cpp-dev-gpu",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest {organization}/torchserve:latest-cpu",
f"docker buildx imagetools create --tag {organization}/torchserve:latest-cpu {organization}/torchserve:latest",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest {organization}/torchserve:{check_ts_version()}-cpu",
f"docker buildx imagetools create --tag {organization}/torchserve:{check_ts_version()}-cpu {organization}/torchserve:latest",
dry_run,
)

try_and_handle(
f"docker tag {organization}/torchserve:latest-gpu {organization}/torchserve:{check_ts_version()}-gpu",
dry_run,
Expand All @@ -66,12 +69,9 @@
)

for image in [
f"{organization}/torchserve:latest",
f"{organization}/torchserve:latest-cpu",
f"{organization}/torchserve:latest-gpu",
f"{organization}/torchserve:latest-cpp-dev-cpu",
f"{organization}/torchserve:latest-cpp-dev-gpu",
f"{organization}/torchserve:{check_ts_version()}-cpu",
f"{organization}/torchserve:{check_ts_version()}-gpu",
f"{organization}/torchserve:{check_ts_version()}-cpp-dev-cpu",
f"{organization}/torchserve:{check_ts_version()}-cpp-dev-gpu",
Expand Down
9 changes: 3 additions & 6 deletions docker/docker_nightly.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
cpp_dev_gpu_version = f"{project}:cpp-dev-gpu-{get_nightly_version()}"

# Build Nightly images and append the date in the name
try_and_handle(f"./build_image.sh -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(f"./build_image.sh -m -n -t {organization}/{cpu_version}", dry_run)
try_and_handle(
f"./build_image.sh -g -cv cu121 -n -t {organization}/{gpu_version}",
dry_run,
Expand All @@ -54,18 +54,17 @@
)

# Push Nightly images to official PyTorch Dockerhub account
try_and_handle(f"docker push {organization}/{cpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{gpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{cpp_dev_cpu_version}", dry_run)
try_and_handle(f"docker push {organization}/{cpp_dev_gpu_version}", dry_run)

# Tag nightly images with latest
try_and_handle(
f"docker tag {organization}/{cpu_version} {organization}/{project}:latest-cpu",
f"docker buildx imagetools create --tag {organization}/{project}:latest-cpu {organization}/{cpu_version}",
dry_run,
)
try_and_handle(
f"docker tag {organization}/{gpu_version} {organization}/{project}:latest-gpu",
f"docker buildx imagetools create --tag {organization}/{project}:latest-gpu {organization}/{gpu_version}",
dry_run,
)
try_and_handle(
Expand All @@ -78,8 +77,6 @@
)

# Push images with latest tag
try_and_handle(f"docker push {organization}/{project}:latest-cpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-gpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-cpp-dev-cpu", dry_run)
try_and_handle(f"docker push {organization}/{project}:latest-cpp-dev-gpu", dry_run)

Expand Down
4 changes: 2 additions & 2 deletions docs/Troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ Relevant issues: [[#566](https://github.com/pytorch/serve/issues/566)]
#### How can I resolve model specific python dependency?
You can provide a requirements.txt while creating a mar file using "--requirements-file/ -r" flag. You can refer to the [waveglow text-to-speech-synthesizer](https://github.com/pytorch/serve/tree/master/examples/text_to_speech_synthesizer) example

- [waveglow mar creation script](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/create_mar.sh)
- [waveglow handler](https://github.com/pytorch/serve/blob/2d9c7ccc316f592374943a1963c1057bbe232c9e/examples/text_to_speech_synthesizer/waveglow_handler.py#L49)
- [waveglow mar creation script](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/WaveGlow/create_mar.sh)
- [waveglow handler](https://github.com/pytorch/serve/blob/2d9c7ccc316f592374943a1963c1057bbe232c9e/examples/text_to_speech_synthesizer/WaveGlow/waveglow_handler.py#L49)

Relevant issues: [[#566](https://github.com/pytorch/serve/issues/566)]
Refer [Torch model archiver cli](https://github.com/pytorch/serve/blob/master/model-archiver/README.md#torch-model-archiver-command-line-interface) for more details.
Expand Down
4 changes: 2 additions & 2 deletions docs/custom_service.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ class ModelHandler(BaseHandler):

```

Refer [waveglow_handler](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/waveglow_handler.py) for more details.
Refer [waveglow_handler](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/WaveGlow/waveglow_handler.py) for more details.

#### Captum explanations for custom handler

Expand Down Expand Up @@ -362,7 +362,7 @@ class CustomImageClassifier(ImageClassifier):
For more details refer following examples :
- [mnist digit classifier handler](https://github.com/pytorch/serve/tree/master/examples/image_classifier)
- [Huggingface transformer generalized handler](https://github.com/pytorch/serve/blob/master/examples/Huggingface_Transformers/Transformer_handler_generalized.py)
- [Waveglow text to speech synthesizer](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/waveglow_handler.py)
- [Waveglow text to speech synthesizer](https://github.com/pytorch/serve/blob/master/examples/text_to_speech_synthesizer/WaveGlow/waveglow_handler.py)

## Creating a model archive with an entry point

Expand Down
58 changes: 58 additions & 0 deletions examples/custom_endpoint_plugin/ModelReady.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package org.pytorch.serve.plugins.endpoint;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.pytorch.serve.servingsdk.Context;
import org.pytorch.serve.servingsdk.Model;
import org.pytorch.serve.servingsdk.ModelServerEndpoint;
import org.pytorch.serve.servingsdk.Worker;
import org.pytorch.serve.servingsdk.annotations.Endpoint;
import org.pytorch.serve.servingsdk.annotations.helpers.EndpointTypes;
import org.pytorch.serve.servingsdk.http.Request;
import org.pytorch.serve.servingsdk.http.Response;

@Endpoint(
urlPattern = "model-ready",
endpointType = EndpointTypes.INFERENCE,
description = "Endpoint indicating registered model/s ready to serve inference requests")
public class ModelReady extends ModelServerEndpoint {
private boolean modelsLoaded(Context ctx) {
Map<String, Model> modelMap = ctx.getModels();

if (modelMap.isEmpty()) {
return false;
}

for (Map.Entry<String, Model> entry : modelMap.entrySet()) {
boolean workerReady = false;
for (Worker w : entry.getValue().getModelWorkers()) {
if (w.isRunning()) {
workerReady = true;
break;
}
}
if (!workerReady) {
return false;
}
}
return true;
}

@Override
public void doGet(Request req, Response rsp, Context ctx) throws IOException {
if (modelsLoaded(ctx)) {
rsp.setStatus(200, "Model/s ready");
rsp.getOutputStream()
.write(
"{\n\t\"Status\": \"Model/s ready\"\n}\n"
.getBytes(StandardCharsets.UTF_8));
} else {
rsp.setStatus(503, "Model/s not ready");
rsp.getOutputStream()
.write(
"{\n\t\"Status\": \"Model/s not ready\"\n}\n"
.getBytes(StandardCharsets.UTF_8));
}
}
}
Loading

0 comments on commit 8394fd7

Please sign in to comment.