gramineproject · dimakuv · Jul 10, 2024 · Jul 10, 2024
diff --git a/CI-Examples/candle/.gitignore b/CI-Examples/candle/.gitignore
@@ -0,0 +1,7 @@
+/candle_matmul
+/candle_quantized
+/src
+
+# model
+/*.bin
+/*.json
diff --git a/CI-Examples/candle/Makefile b/CI-Examples/candle/Makefile
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
+
+ifeq ($(DEBUG),1)
+GRAMINE_LOG_LEVEL = debug
+else
+GRAMINE_LOG_LEVEL = error
+endif
+
+SRCDIR = src
+
+.PHONY: all
+all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
+ifeq ($(SGX),1)
+all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
+endif
+
+######################### Simple Matrix Multiplication #########################
+
+$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
+	mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+		cargo new candle_matmul && cd candle_matmul && \
+		cargo add --git https://github.com/huggingface/candle.git candle-core && \
+		cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
+		cargo build
+
+candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
+	cp $< $@
+
+candle_matmul.manifest: candle_matmul.manifest.template
+	gramine-manifest \
+		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
+		-Darch_libdir=$(ARCH_LIBDIR) \
+		$< > $@
+
+candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
+	@:
+
+.INTERMEDIATE: candle_matmul_sgx_sign
+candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
+	gramine-sgx-sign \
+		--manifest $< \
+		--output $<.sgx
+
+############################## Quantized LLaMA #################################
+
+llama-2-7b.ggmlv3.q4_0.bin:
+	../common_tools/download --output $@ \
+		--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
+		--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
+
+tokenizer.json:
+	../common_tools/download --output $@ \
+		--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
+		--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
+
+$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
+	mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+		git clone https://github.com/huggingface/candle.git candle_quantized && \
+		cd candle_quantized && \
+		cargo build --example quantized --release
+
+candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
+	cp $< $@
+
+candle_quantized.manifest: candle_quantized.manifest.template
+	gramine-manifest \
+		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
+		-Darch_libdir=$(ARCH_LIBDIR) \
+		$< > $@
+
+candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
+	@:
+
+.INTERMEDIATE: candle_quantized_sgx_sign
+candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
+	gramine-sgx-sign \
+		--manifest $< \
+		--output $<.sgx
+.PHONY: clean
+clean:
+	$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized
+
+.PHONY: distclean
+distclean: clean
+	$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
diff --git a/CI-Examples/candle/README.md b/CI-Examples/candle/README.md
@@ -0,0 +1,33 @@
+# Candle
+
+Candle is a minimalist ML framework for Rust with a focus on performance
+(including GPU support) and ease of use: https://github.com/huggingface/candle
+
+This directory contains the Makefile and the template manifest for the most
+recent version of Candle as of this writing (v0.6.0).
+
+# Warning
+
+The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
+happens automatically in the Makefile.
+
+# Quick Start
+
+```sh
+# build Candle (uses Rust Cargo) and the final manifest
+make SGX=1
+
+# run simple matrix multiplication
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+./candle_matmul
+gramine-direct ./candle_matmul
+gramine-sgx ./candle_matmul
+
+# run Quantized LLaMA (quantized version of the LLaMA model)
+# note that for Gramine, the cmdline args are already defined in the manifest file
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
+RAYON_NUM_THREADS=36 ./candle_quantized \
+    --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
+RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
+RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
+```
diff --git a/CI-Examples/candle/candle_matmul.manifest.template b/CI-Examples/candle/candle_matmul.manifest.template
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_matmul"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+
+fs.mounts = [
+  { path = "/candle_matmul", uri = "file:candle_matmul" },
+  { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+  { path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
+]
+
+sgx.debug = true
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
+sgx.enclave_size = "1G"
+
+sgx.trusted_files = [
+  "file:candle_matmul",
+  "file:{{ gramine.runtimedir() }}/",
+  "file:{{ arch_libdir }}/libgcc_s.so.1",
+]
diff --git a/CI-Examples/candle/candle_quantized.manifest.template b/CI-Examples/candle/candle_quantized.manifest.template
@@ -0,0 +1,37 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_quantized"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+loader.env.RAYON_NUM_THREADS = { passthrough = true }
+
+loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
+                "--tokenizer", "tokenizer.json", "--sample-len", "200" ]
+
+fs.mounts = [
+  { path = "/candle_quantized", uri = "file:candle_quantized" },
+  { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+  { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
+
+  { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
+  { path = "/tokenizer.json", uri = "file:tokenizer.json" },
+]
+
+sgx.debug = true
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
+sgx.enclave_size = "32G"
+
+sgx.trusted_files = [
+  "file:candle_quantized",
+  "file:{{ gramine.runtimedir() }}/",
+  "file:{{ arch_libdir }}/libcrypto.so.3",
+  "file:{{ arch_libdir }}/libgcc_s.so.1",
+  "file:{{ arch_libdir }}/libssl.so.3",
+
+  "file:llama-2-7b.ggmlv3.q4_0.bin",
+  "file:tokenizer.json",
+]
diff --git a/CI-Examples/candle/prepared_matmul_src/main.rs b/CI-Examples/candle/prepared_matmul_src/main.rs
@@ -0,0 +1,14 @@
+// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+
+use candle_core::{Device, Tensor};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let device = Device::Cpu;
+
+    let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
+    let b = Tensor::randn(0f32, 1., (3, 4), &device)?;
+
+    let c = a.matmul(&b)?;
+    println!("{c}");
+    Ok(())
+}
diff --git a/libos/src/fs/sys/cache_info.c b/libos/src/fs/sys/cache_info.c
@@ -44,14 +44,21 @@ int sys_cache_load(struct libos_dentry* dent, char** out_data, size_t* out_size)
     const struct pal_topo_info* topo = &g_pal_public_state->topo_info;
     size_t cache_idx = topo->threads[thread_id].ids_of_caches[cache_class];
     const struct pal_cache_info* cache = &topo->caches[cache_idx];
-    char str[PAL_SYSFS_MAP_FILESZ] = {'\0'};
+    char str[PAL_SYSFS_BUF_FILESZ] = {'\0'};
     if (strcmp(name, "shared_cpu_map") == 0) {
         struct callback_arg callback_arg = {
             .cache_id_to_match = cache_idx,
             .cache_class = cache_class,
         };
         ret = sys_print_as_bitmask(str, sizeof(str), topo->threads_cnt,
                                    is_same_cache, &callback_arg);
+    } else if (strcmp(name, "shared_cpu_list") == 0) {
+        struct callback_arg callback_arg = {
+            .cache_id_to_match = cache_idx,
+            .cache_class = cache_class,
+        };
+        ret = sys_print_as_ranges(str, sizeof(str), topo->threads_cnt,
+                                  is_same_cache, &callback_arg);
     } else if (strcmp(name, "level") == 0) {
         ret = snprintf(str, sizeof(str), "%zu\n", cache->level);
     } else if (strcmp(name, "type") == 0) {

diff --git a/libos/src/fs/sys/fs.c b/libos/src/fs/sys/fs.c
@@ -269,6 +269,7 @@ static void init_cpu_dir(struct pseudo_node* cpu) {
     indexX->list_names = &sys_resource_list_names;
 
     pseudo_add_str(indexX, "shared_cpu_map", &sys_cache_load);
+    pseudo_add_str(indexX, "shared_cpu_list", &sys_cache_load);
     pseudo_add_str(indexX, "level", &sys_cache_load);
     pseudo_add_str(indexX, "type", &sys_cache_load);
     pseudo_add_str(indexX, "size", &sys_cache_load);