Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI-Examples] Add Candle ML framework example #31

Open
wants to merge 2 commits into
base: intel_tdx
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CI-Examples/candle/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/candle_matmul
/candle_quantized
/src

# model
/*.bin
/*.json
88 changes: 88 additions & 0 deletions CI-Examples/candle/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)

ifeq ($(DEBUG),1)
GRAMINE_LOG_LEVEL = debug
else
GRAMINE_LOG_LEVEL = error
endif

SRCDIR = src

.PHONY: all
all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
ifeq ($(SGX),1)
all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
endif

######################### Simple Matrix Multiplication #########################

$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
cargo new candle_matmul && cd candle_matmul && \
cargo add --git https://github.com/huggingface/candle.git candle-core && \
cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
cargo build

candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
cp $< $@

candle_matmul.manifest: candle_matmul.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< > $@

candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
@:

.INTERMEDIATE: candle_matmul_sgx_sign
candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
gramine-sgx-sign \
--manifest $< \
--output $<.sgx

############################## Quantized LLaMA #################################

llama-2-7b.ggmlv3.q4_0.bin:
../common_tools/download --output $@ \
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@

tokenizer.json:
../common_tools/download --output $@ \
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@

$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
git clone https://github.com/huggingface/candle.git candle_quantized && \
cd candle_quantized && \
cargo build --example quantized --release

candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
cp $< $@

candle_quantized.manifest: candle_quantized.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< > $@

candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
@:

.INTERMEDIATE: candle_quantized_sgx_sign
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
gramine-sgx-sign \
--manifest $< \
--output $<.sgx
.PHONY: clean
clean:
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized

.PHONY: distclean
distclean: clean
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
33 changes: 33 additions & 0 deletions CI-Examples/candle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Candle

Candle is a minimalist ML framework for Rust with a focus on performance
(including GPU support) and ease of use: https://github.com/huggingface/candle

This directory contains the Makefile and the template manifest for the most
recent version of Candle as of this writing (v0.6.0).

# Warning

The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
happens automatically in the Makefile.

# Quick Start

```sh
# build Candle (uses Rust Cargo) and the final manifest
make SGX=1

# run simple matrix multiplication
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
./candle_matmul
gramine-direct ./candle_matmul
gramine-sgx ./candle_matmul

# run Quantized LLaMA (quantized version of the LLaMA model)
# note that for Gramine, the cmdline args are already defined in the manifest file
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
RAYON_NUM_THREADS=36 ./candle_quantized \
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
```
25 changes: 25 additions & 0 deletions CI-Examples/candle/candle_matmul.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

libos.entrypoint = "/candle_matmul"

loader.log_level = "{{ log_level }}"

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"

fs.mounts = [
{ path = "/candle_matmul", uri = "file:candle_matmul" },
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
]

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
sgx.enclave_size = "1G"

sgx.trusted_files = [
"file:candle_matmul",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/libgcc_s.so.1",
]
37 changes: 37 additions & 0 deletions CI-Examples/candle/candle_quantized.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

libos.entrypoint = "/candle_quantized"

loader.log_level = "{{ log_level }}"

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
loader.env.RAYON_NUM_THREADS = { passthrough = true }

loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
"--tokenizer", "tokenizer.json", "--sample-len", "200" ]

fs.mounts = [
{ path = "/candle_quantized", uri = "file:candle_quantized" },
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },

{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
{ path = "/tokenizer.json", uri = "file:tokenizer.json" },
]

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
sgx.enclave_size = "32G"

sgx.trusted_files = [
"file:candle_quantized",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/libcrypto.so.3",
"file:{{ arch_libdir }}/libgcc_s.so.1",
"file:{{ arch_libdir }}/libssl.so.3",

"file:llama-2-7b.ggmlv3.q4_0.bin",
"file:tokenizer.json",
]
14 changes: 14 additions & 0 deletions CI-Examples/candle/prepared_matmul_src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started

use candle_core::{Device, Tensor};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let device = Device::Cpu;

let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
let b = Tensor::randn(0f32, 1., (3, 4), &device)?;

let c = a.matmul(&b)?;
println!("{c}");
Ok(())
}
9 changes: 8 additions & 1 deletion libos/src/fs/sys/cache_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,21 @@ int sys_cache_load(struct libos_dentry* dent, char** out_data, size_t* out_size)
const struct pal_topo_info* topo = &g_pal_public_state->topo_info;
size_t cache_idx = topo->threads[thread_id].ids_of_caches[cache_class];
const struct pal_cache_info* cache = &topo->caches[cache_idx];
char str[PAL_SYSFS_MAP_FILESZ] = {'\0'};
char str[PAL_SYSFS_BUF_FILESZ] = {'\0'};
if (strcmp(name, "shared_cpu_map") == 0) {
struct callback_arg callback_arg = {
.cache_id_to_match = cache_idx,
.cache_class = cache_class,
};
ret = sys_print_as_bitmask(str, sizeof(str), topo->threads_cnt,
is_same_cache, &callback_arg);
} else if (strcmp(name, "shared_cpu_list") == 0) {
struct callback_arg callback_arg = {
.cache_id_to_match = cache_idx,
.cache_class = cache_class,
};
ret = sys_print_as_ranges(str, sizeof(str), topo->threads_cnt,
is_same_cache, &callback_arg);
} else if (strcmp(name, "level") == 0) {
ret = snprintf(str, sizeof(str), "%zu\n", cache->level);
} else if (strcmp(name, "type") == 0) {
Expand Down
1 change: 1 addition & 0 deletions libos/src/fs/sys/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ static void init_cpu_dir(struct pseudo_node* cpu) {
indexX->list_names = &sys_resource_list_names;

pseudo_add_str(indexX, "shared_cpu_map", &sys_cache_load);
pseudo_add_str(indexX, "shared_cpu_list", &sys_cache_load);
pseudo_add_str(indexX, "level", &sys_cache_load);
pseudo_add_str(indexX, "type", &sys_cache_load);
pseudo_add_str(indexX, "size", &sys_cache_load);
Expand Down