Skip to content

Commit

Permalink
Updated pr.
Browse files Browse the repository at this point in the history
  • Loading branch information
lu-ohai committed Dec 17, 2024
1 parent 3803600 commit e84715c
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 44 deletions.
50 changes: 33 additions & 17 deletions ads/model/framework/embedding_onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from typing import Dict
from typing import Dict, Optional

from ads.model.extractor.embedding_onnx_extractor import EmbeddingONNXExtractor
from ads.model.generic_model import FrameworkSpecificModel
Expand Down Expand Up @@ -108,18 +108,26 @@ class EmbeddingONNXModel(FrameworkSpecificModel):
>>> from huggingface_hub import snapshot_download
>>> local_dir=tempfile.mkdtemp()
>>> # download sentence-transformers/all-MiniLM-L6-v2 from huggingface
>>> allow_patterns=[
... "onnx/model.onnx",
... "config.json",
... "special_tokens_map.json",
... "tokenizer_config.json",
... "tokenizer.json",
... "vocab.txt"
... ]
>>> # download files needed for this demostration to local folder
>>> snapshot_download(
... repo_id="sentence-transformers/all-MiniLM-L6-v2",
... local_dir=local_dir
... local_dir=local_dir,
... allow_patterns=allow_patterns
... )
>>> # copy all files from local_dir to artifact_dir
>>> artifact_dir = tempfile.mkdtemp()
>>> for root, dirs, files in os.walk(local_dir):
>>> for file in files:
>>> src_path = os.path.join(root, file)
>>> shutil.copy(src_path, artifact_dir)
>>> # copy all downloaded files to artifact folder
>>> for file in allow_patterns:
>>> shutil.copy(local_dir + "/" + file, artifact_dir)
>>> model = EmbeddingONNXModel(artifact_dir=artifact_dir)
>>> model.summary_status()
Expand Down Expand Up @@ -157,8 +165,8 @@ class EmbeddingONNXModel(FrameworkSpecificModel):

def __init__(
self,
artifact_dir: str | None = None,
auth: Dict | None = None,
artifact_dir: Optional[str] = None,
auth: Optional[Dict] = None,
serialize: bool = False,
**kwargs: dict,
):
Expand Down Expand Up @@ -191,18 +199,26 @@ def __init__(
>>> from huggingface_hub import snapshot_download
>>> local_dir=tempfile.mkdtemp()
>>> # download sentence-transformers/all-MiniLM-L6-v2 from huggingface
>>> allow_patterns=[
... "onnx/model.onnx",
... "config.json",
... "special_tokens_map.json",
... "tokenizer_config.json",
... "tokenizer.json",
... "vocab.txt"
... ]
>>> # download files needed for this demostration to local folder
>>> snapshot_download(
... repo_id="sentence-transformers/all-MiniLM-L6-v2",
... local_dir=local_dir
... local_dir=local_dir,
... allow_patterns=allow_patterns
... )
>>> # copy all files from subdirectory to artifact_dir
>>> artifact_dir = tempfile.mkdtemp()
>>> for root, dirs, files in os.walk(local_dir):
>>> for file in files:
>>> src_path = os.path.join(root, file)
>>> shutil.copy(src_path, artifact_dir)
>>> # copy all downloaded files to artifact folder
>>> for file in allow_patterns:
>>> shutil.copy(local_dir + "/" + file, artifact_dir)
>>> model = EmbeddingONNXModel(artifact_dir=artifact_dir)
>>> model.summary_status()
Expand Down
16 changes: 15 additions & 1 deletion ads/templates/score_embedding_onnx.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
import json
import subprocess
from functools import lru_cache
import onnxruntime as ort
import jsonschema
Expand Down Expand Up @@ -33,13 +34,26 @@ def load_model(model_file_name=model_name):
contents = os.listdir(model_dir)
if model_file_name in contents:
print(f'Start loading {model_file_name} from model directory {model_dir} ...')
model = ort.InferenceSession(os.path.join(model_dir, model_file_name), providers=['CUDAExecutionProvider','CPUExecutionProvider'])
providers= ['CPUExecutionProvider']
if is_gpu_available():
providers=['CUDAExecutionProvider','CPUExecutionProvider']
model = ort.InferenceSession(os.path.join(model_dir, model_file_name), providers=providers)
print("Model is successfully loaded.")
return model
else:
raise Exception(f'{model_file_name} is not found in model directory {model_dir}')


def is_gpu_available():
"""Check if gpu is available on the infrastructure."""
try:
result = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode == 0:
return True
except FileNotFoundError:
return False


@lru_cache(maxsize=1)
def load_tokenizer(model_full_name):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ See `API Documentation <../../../ads.model.framework.html#ads.model.framework.em
Overview
========

The ``ads.model.framework.embedding_onnx_model.EmbeddingONNXModel`` class in ADS is designed to rapidly get an Embedding ONNX Model into production. The ``.prepare()`` method creates the model artifacts that are needed without configuring it or writing code. However, you can customize the required ``score.py`` file.
The ``ads.model.framework.embedding_onnx_model.EmbeddingONNXModel`` class in ADS is designed to rapidly get an Embedding ONNX Model into production. The ``.prepare()`` method creates the model artifacts that are needed without configuring it or writing code. ``EmbeddingONNXModel`` supports `OpenAI spec <https://github.com/huggingface/text-embeddings-inference/blob/main/docs/openapi.json>`_ for embeddings endpoint.

.. include:: ../_template/overview.rst

Expand All @@ -24,26 +24,26 @@ The following steps take the `sentence-transformers/all-MiniLM-L6-v2 <https://hu
local_dir = tempfile.mkdtemp()
allow_patterns=[
"onnx/model.onnx",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
"tokenizer.json",
"vocab.txt"
]
# download files needed for this demostration to local folder
snapshot_download(
repo_id="sentence-transformers/all-MiniLM-L6-v2",
local_dir=local_dir,
allow_patterns=[
"onnx/model.onnx",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
"tokenizer.json",
"vocab.txt"
]
allow_patterns=allow_patterns
)
artifact_dir = tempfile.mkdtemp()
# copy all downloaded files to artifact folder
for root, dirs, files in os.walk(local_dir):
for file in files:
src_path = os.path.join(root, file)
shutil.copy(src_path, artifact_dir)
for file in allow_patterns:
shutil.copy(local_dir + "/" + file, artifact_dir)
Install Conda Pack
Expand Down Expand Up @@ -213,26 +213,26 @@ Example
local_dir = tempfile.mkdtemp()
# download files needed for the demostration to local folder
allow_patterns=[
"onnx/model.onnx",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
"tokenizer.json",
"vocab.txt"
]
# download files needed for this demostration to local folder
snapshot_download(
repo_id="sentence-transformers/all-MiniLM-L6-v2",
local_dir=local_dir,
allow_patterns=[
"onnx/model.onnx",
"config.json",
"special_tokens_map.json",
"tokenizer_config.json",
"tokenizer.json",
"vocab.txt"
]
allow_patterns=allow_patterns
)
artifact_dir = tempfile.mkdtemp()
# copy all downloaded files to artifact folder
for root, dirs, files in os.walk(local_dir):
for file in files:
src_path = os.path.join(root, file)
shutil.copy(src_path, artifact_dir)
for file in allow_patterns:
shutil.copy(local_dir + "/" + file, artifact_dir)
# initialize EmbeddingONNXModel instance and prepare score.py, runtime.yaml and openapi.json files.
embedding_onnx_model = EmbeddingONNXModel(artifact_dir=artifact_dir)
Expand Down

0 comments on commit e84715c

Please sign in to comment.