Add Candle ML framework example

Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds the Quantized LLaMA example. Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
gramineproject · Jul 26, 2024 · 5a3494d · 5a3494d
1 parent ceba8e9
commit 5a3494d
Show file tree

Hide file tree

Showing 4 changed files with 130 additions and 0 deletions.
diff --git a/candle/.gitignore b/candle/.gitignore
@@ -0,0 +1,6 @@
+/candle_quantized
+/src
+
+# model
+/*.bin
+/*.json
diff --git a/candle/Makefile b/candle/Makefile
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
+
+ifeq ($(DEBUG),1)
+GRAMINE_LOG_LEVEL = debug
+else
+GRAMINE_LOG_LEVEL = error
+endif
+
+SRCDIR = src
+
+.PHONY: all
+all: candle_quantized candle_quantized.manifest
+ifeq ($(SGX),1)
+all: candle_quantized.manifest.sgx candle_quantized.sig
+endif
+
+llama-2-7b.ggmlv3.q4_0.bin:
+	../common_tools/download --output $@ \
+		--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
+		--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
+
+tokenizer.json:
+	../common_tools/download --output $@ \
+		--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
+		--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
+
+$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
+	mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+		git clone https://github.com/huggingface/candle.git candle_quantized && \
+		cd candle_quantized && \
+		cargo build --example quantized --release
+
+candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
+	cp $< $@
+
+candle_quantized.manifest: candle_quantized.manifest.template
+	gramine-manifest \
+		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
+		-Darch_libdir=$(ARCH_LIBDIR) \
+		$< > $@
+
+candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
+	@:
+
+.INTERMEDIATE: candle_quantized_sgx_sign
+candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
+	gramine-sgx-sign \
+		--manifest $< \
+		--output $<.sgx
+.PHONY: clean
+clean:
+	$(RM) *.token *.sig *.manifest.sgx *.manifest candle_quantized
+
+.PHONY: distclean
+distclean: clean
+	$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
diff --git a/candle/README.md b/candle/README.md
@@ -0,0 +1,27 @@
+# Candle
+
+[Candle](https://github.com/huggingface/candle) is a minimalist ML framework for
+Rust with a focus on performance (including GPU support) and ease of use. 
+
+This directory contains the Makefile and the template manifest for the most
+recent version of Candle as of this writing (v0.6.0).
+
+# Warning
+
+The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
+happens automatically in the Makefile.
+
+# Quick Start
+
+```sh
+# build Candle (uses Rust Cargo) and the final manifest
+make SGX=1
+
+# run Quantized LLaMA (quantized version of the LLaMA model)
+# note that for Gramine, the cmdline args are already defined in the manifest file
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
+RAYON_NUM_THREADS=36 ./candle_quantized \
+    --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
+RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
+RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
+```
diff --git a/candle/candle_quantized.manifest.template b/candle/candle_quantized.manifest.template
@@ -0,0 +1,38 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+loader.entrypoint = "file:{{ gramine.libos }}"
+libos.entrypoint = "/candle_quantized"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+loader.env.RAYON_NUM_THREADS = { passthrough = true }
+
+loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
+                "--tokenizer", "tokenizer.json", "--sample-len", "200" ]
+
+fs.mounts = [
+  { path = "/candle_quantized", uri = "file:candle_quantized" },
+  { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+  { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
+
+  { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
+  { path = "/tokenizer.json", uri = "file:tokenizer.json" },
+]
+
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
+sgx.enclave_size = "32G"
+
+sgx.trusted_files = [
+  "file:candle_quantized",
+  "file:{{ gramine.libos }}",
+  "file:{{ gramine.runtimedir() }}/",
+  "file:{{ arch_libdir }}/libcrypto.so.3",
+  "file:{{ arch_libdir }}/libgcc_s.so.1",
+  "file:{{ arch_libdir }}/libssl.so.3",
+
+  "file:llama-2-7b.ggmlv3.q4_0.bin",
+  "file:tokenizer.json",
+]