Skip to content

Commit

Permalink
Add Candle ML framework example
Browse files Browse the repository at this point in the history
Candle is a minimalist ML framework for Rust with a focus on performance
and ease of use. This commit adds the Quantized LLaMA example.

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
Dmitrii Kuvaiskii committed Jul 26, 2024
1 parent ceba8e9 commit 5a3494d
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 0 deletions.
6 changes: 6 additions & 0 deletions candle/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/candle_quantized
/src

# model
/*.bin
/*.json
59 changes: 59 additions & 0 deletions candle/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)

ifeq ($(DEBUG),1)
GRAMINE_LOG_LEVEL = debug
else
GRAMINE_LOG_LEVEL = error
endif

SRCDIR = src

.PHONY: all
all: candle_quantized candle_quantized.manifest
ifeq ($(SGX),1)
all: candle_quantized.manifest.sgx candle_quantized.sig
endif

llama-2-7b.ggmlv3.q4_0.bin:
../common_tools/download --output $@ \
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@

tokenizer.json:
../common_tools/download --output $@ \
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@

$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
git clone https://github.com/huggingface/candle.git candle_quantized && \
cd candle_quantized && \
cargo build --example quantized --release

candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
cp $< $@

candle_quantized.manifest: candle_quantized.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< > $@

candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
@:

.INTERMEDIATE: candle_quantized_sgx_sign
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
gramine-sgx-sign \
--manifest $< \
--output $<.sgx
.PHONY: clean
clean:
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_quantized

.PHONY: distclean
distclean: clean
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
27 changes: 27 additions & 0 deletions candle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Candle

[Candle](https://github.com/huggingface/candle) is a minimalist ML framework for
Rust with a focus on performance (including GPU support) and ease of use.

This directory contains the Makefile and the template manifest for the most
recent version of Candle as of this writing (v0.6.0).

# Warning

The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
happens automatically in the Makefile.

# Quick Start

```sh
# build Candle (uses Rust Cargo) and the final manifest
make SGX=1

# run Quantized LLaMA (quantized version of the LLaMA model)
# note that for Gramine, the cmdline args are already defined in the manifest file
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
RAYON_NUM_THREADS=36 ./candle_quantized \
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
```
38 changes: 38 additions & 0 deletions candle/candle_quantized.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

loader.entrypoint = "file:{{ gramine.libos }}"
libos.entrypoint = "/candle_quantized"

loader.log_level = "{{ log_level }}"

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
loader.env.RAYON_NUM_THREADS = { passthrough = true }

loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
"--tokenizer", "tokenizer.json", "--sample-len", "200" ]

fs.mounts = [
{ path = "/candle_quantized", uri = "file:candle_quantized" },
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },

{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
{ path = "/tokenizer.json", uri = "file:tokenizer.json" },
]

sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
sgx.enclave_size = "32G"

sgx.trusted_files = [
"file:candle_quantized",
"file:{{ gramine.libos }}",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/libcrypto.so.3",
"file:{{ arch_libdir }}/libgcc_s.so.1",
"file:{{ arch_libdir }}/libssl.so.3",

"file:llama-2-7b.ggmlv3.q4_0.bin",
"file:tokenizer.json",
]

0 comments on commit 5a3494d

Please sign in to comment.