oracle-devrel · Yuri-Rassokhin · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 9, 2024
diff --git a/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/LICENSE b/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/LICENSE
@@ -0,0 +1,35 @@
+#Copyright (c) 2024 Oracle and/or its affiliates.
+#
+#The Universal Permissive License (UPL), Version 1.0
+#
+#Subject to the condition set forth below, permission is hereby granted to any
+#person obtaining a copy of this software, associated documentation and/or data
+#(collectively the "Software"), free of charge and under any and all copyright
+#rights in the Software, and any and all patent rights owned or freely
+#licensable by each licensor hereunder covering either (i) the unmodified
+#Software as contributed to or provided by such licensor, or (ii) the Larger
+#Works (as defined below), to deal in both
+#
+#(a) the Software, and
+#(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+#one is included with the Software (each a "Larger Work" to which the Software
+#is contributed by such licensors),
+#
+#without restriction, including without limitation the rights to copy, create
+#derivative works of, display, perform, and distribute the Software and make,
+#use, sell, offer for sale, import, export, have made, and have sold the
+#Software and the Larger Work(s), and to sublicense the foregoing rights on
+#either these or other terms.
+#
+#This license is subject to the following condition:
+#The above copyright notice and either this complete permission notice or at
+#a minimum a reference to the UPL must be included in all copies or
+#substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
diff --git a/...d-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/README.md b/...d-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/README.md
@@ -0,0 +1,31 @@
+# Basic Utility Scripts
+
+This page contains useful scripts for basic actions, such as checking CUDA health or downloading AI model.
+
+Reviewed: 08.06.2024
+
+# When to use this asset?
+
+When you are planning to install and run a pretrained AI model from Huggingface on an OCI GPU instance.
+The examples use LLAMA3, for illustrative purposes, and Torch library.
+
+# How to use this asset?
+
+1. Run `pip install -r requirements.txt`
+2. Pick a script from this directory based on its self-descriptive name, and review its content before using. Some scripts contains placeholders such as YOUR TOKEN to be replaced by Huggingface access token
+3. Run the script. As some scripts may need to download an AI model, its first invocation may take time
+
+## Prerequisites & Docs
+
+### Prerequisites
+
+* An OCI GPU instance
+* A Huggingface account with a valid Auth Token
+
+# License
+
+Copyright (c) 2024 Oracle and/or its affiliates.
+
+Licensed under the Universal Permissive License (UPL), Version 1.0.
+
+See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details.
diff --git a/...-infra-gpu/ai-infrastructure/basic-utility-scripts/download_and_test_huggingface_model.py b/...-infra-gpu/ai-infrastructure/basic-utility-scripts/download_and_test_huggingface_model.py
@@ -0,0 +1,72 @@
+
+#Copyright (c) 2024 Oracle and/or its affiliates.
+#
+#The Universal Permissive License (UPL), Version 1.0
+#
+#Subject to the condition set forth below, permission is hereby granted to any
+#person obtaining a copy of this software, associated documentation and/or data
+#(collectively the "Software"), free of charge and under any and all copyright
+#rights in the Software, and any and all patent rights owned or freely
+#licensable by each licensor hereunder covering either (i) the unmodified
+#Software as contributed to or provided by such licensor, or (ii) the Larger
+#Works (as defined below), to deal in both
+#
+#(a) the Software, and
+#(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+#one is included with the Software (each a "Larger Work" to which the Software
+#is contributed by such licensors),
+#
+#without restriction, including without limitation the rights to copy, create
+#derivative works of, display, perform, and distribute the Software and make,
+#use, sell, offer for sale, import, export, have made, and have sold the
+#Software and the Larger Work(s), and to sublicense the foregoing rights on
+#either these or other terms.
+#
+#This license is subject to the following condition:
+#The above copyright notice and either this complete permission notice or at
+#a minimum a reference to the UPL must be included in all copies or
+#substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
+
+
+import transformers
+import torch
+
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+access_token = "<INSERT TOKEN>"  # Replace with your actual Hugging Face access token
+output_dir = "./"  # Specify your desired directory
+
+# Download the model and tokenizer
+model = transformers.AutoModelForCausalLM.from_pretrained(
+    model_id,
+    cache_dir=output_dir,
+    use_auth_token=access_token,  # Directly pass the token here
+    torch_dtype=torch.bfloat16
+)
+
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    model_id,
+    cache_dir=output_dir,
+    use_auth_token=access_token  # Directly pass the token here
+)
+
+# Save the model and tokenizer to the specified directory
+model.save_pretrained(output_dir)
+tokenizer.save_pretrained(output_dir)
+
+# Example usage
+pipeline = transformers.pipeline(
+  "text-generation",
+  model=model,
+  tokenizer=tokenizer,
+  device="cuda",
+)
+
+print(pipeline("Once upon a time")[0]['generated_text'])
diff --git a/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/requirements.txt b/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/requirements.txt
@@ -0,0 +1,2 @@
+torch
+transformers
diff --git a/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/test_cuda_torch.py b/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/basic-utility-scripts/test_cuda_torch.py
@@ -0,0 +1,53 @@
+#Copyright (c) 2024 Oracle and/or its affiliates.
+#
+#The Universal Permissive License (UPL), Version 1.0
+#
+#Subject to the condition set forth below, permission is hereby granted to any
+#person obtaining a copy of this software, associated documentation and/or data
+#(collectively the "Software"), free of charge and under any and all copyright
+#rights in the Software, and any and all patent rights owned or freely
+#licensable by each licensor hereunder covering either (i) the unmodified
+#Software as contributed to or provided by such licensor, or (ii) the Larger
+#Works (as defined below), to deal in both
+#
+#(a) the Software, and
+#(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+#one is included with the Software (each a "Larger Work" to which the Software
+#is contributed by such licensors),
+#
+#without restriction, including without limitation the rights to copy, create
+#derivative works of, display, perform, and distribute the Software and make,
+#use, sell, offer for sale, import, export, have made, and have sold the
+#Software and the Larger Work(s), and to sublicense the foregoing rights on
+#either these or other terms.
+#
+#This license is subject to the following condition:
+#The above copyright notice and either this complete permission notice or at
+#a minimum a reference to the UPL must be included in all copies or
+#substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
+
+import torch
+
+print("CUDA available:", torch.cuda.is_available())
+
+print("number of GPUs:", torch.cuda.device_count())
+
+if torch.cuda.is_available():
+    try:
+        tensor = torch.tensor([1.0,2.0,3.0],device='cuda:0')
+        print("CUDA is valid:", tensor)
+    except Exception as e:
+        print("Error using CUDA device:", e)
+else:
+    print("CUDA device is unavailable")
+
+for i in range(torch.cuda.device_count()):
+       print(torch.cuda.get_device_properties(i).name)
diff --git a/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/LICENSE b/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/LICENSE
@@ -0,0 +1,35 @@
+#Copyright (c) 2024 Oracle and/or its affiliates.
+#
+#The Universal Permissive License (UPL), Version 1.0
+#
+#Subject to the condition set forth below, permission is hereby granted to any
+#person obtaining a copy of this software, associated documentation and/or data
+#(collectively the "Software"), free of charge and under any and all copyright
+#rights in the Software, and any and all patent rights owned or freely
+#licensable by each licensor hereunder covering either (i) the unmodified
+#Software as contributed to or provided by such licensor, or (ii) the Larger
+#Works (as defined below), to deal in both
+#
+#(a) the Software, and
+#(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+#one is included with the Software (each a "Larger Work" to which the Software
+#is contributed by such licensors),
+#
+#without restriction, including without limitation the rights to copy, create
+#derivative works of, display, perform, and distribute the Software and make,
+#use, sell, offer for sale, import, export, have made, and have sold the
+#Software and the Larger Work(s), and to sublicense the foregoing rights on
+#either these or other terms.
+#
+#This license is subject to the following condition:
+#The above copyright notice and either this complete permission notice or at
+#a minimum a reference to the UPL must be included in all copies or
+#substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
diff --git a/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/README.md b/cloud-infrastructure/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/README.md
@@ -0,0 +1,3 @@
+### Examples of AI API created with FastAPI
+
+Python applications exposing AI models as HTTP endponints built using FastAPI library.
diff --git a/...ture/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/llama3-fastapi-1xGPU-inference.py b/...ture/ai-infra-gpu/ai-infrastructure/fastapi-ai-examples/llama3-fastapi-1xGPU-inference.py
@@ -0,0 +1,107 @@
+#Copyright (c) 2024 Oracle and/or its affiliates.
+#
+#The Universal Permissive License (UPL), Version 1.0
+#
+#Subject to the condition set forth below, permission is hereby granted to any
+#person obtaining a copy of this software, associated documentation and/or data
+#(collectively the "Software"), free of charge and under any and all copyright
+#rights in the Software, and any and all patent rights owned or freely
+#licensable by each licensor hereunder covering either (i) the unmodified
+#Software as contributed to or provided by such licensor, or (ii) the Larger
+#Works (as defined below), to deal in both
+#
+#(a) the Software, and
+#(b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
+#one is included with the Software (each a "Larger Work" to which the Software
+#is contributed by such licensors),
+#
+#without restriction, including without limitation the rights to copy, create
+#derivative works of, display, perform, and distribute the Software and make,
+#use, sell, offer for sale, import, export, have made, and have sold the
+#Software and the Larger Work(s), and to sublicense the foregoing rights on
+#either these or other terms.
+#
+#This license is subject to the following condition:
+#The above copyright notice and either this complete permission notice or at
+#a minimum a reference to the UPL must be included in all copies or
+#substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+#SOFTWARE.
+
+import time
+import torch
+import pynvml
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+
+app = FastAPI()
+
+def print_vram_usage():
+    """Prints the VRAM usage of the GPU."""
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+    print(f"Total VRAM: {info.total / 1024**2:.2f} MB")
+    print(f"Free VRAM: {info.free / 1024**2:.2f} MB")
+    print(f"Used VRAM: {info.used / 1024**2:.2f} MB")
+
+# Define the start time for the total initialization
+start_time = time.time()
+
+print("Loading tokenizer and model...")
+
+# Load the tokenizer and model from the local directory
+model_name = "/share/app/llama3-8b-instruct/"
+
+# Use torch.bfloat16 to reduce memory usage
+model_loading_start_time = time.time()
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16
+)
+print(f"Model loaded in {time.time() - model_loading_start_time:.2f} seconds.")
+print_vram_usage()
+
+tokenizer_loading_start_time = time.time()
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+print(f"Tokenizer loaded in {time.time() - tokenizer_loading_start_time:.2f} seconds.")
+print_vram_usage()
+
+# Use the Hugging Face pipeline for text generation
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device=0  # Use GPU
+)
+
+print(f"Model moved to GPU in {time.time() - model_loading_start_time:.2f} seconds.")
+print_vram_usage()
+
+class InferenceRequest(BaseModel):
+    input_text: str
+
+@app.post("/infer")
+async def infer(request: InferenceRequest):
+    try:
+        inference_start_time = time.time()
+        result = pipe(request.input_text)[0]['generated_text']
+        print(f"Generated text: {result}")
+        print(f"Inference completed in {time.time() - inference_start_time:.2f} seconds.")
+
+        return {"generated_text": result}
+    except Exception as e:
+        print(f"Error during inference: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    print(f"Total initialization time: {time.time() - start_time:.2f} seconds.")
+    uvicorn.run(app, host="0.0.0.0", port=8000)