Merge branch 'master' into default_miro

mudler · Jul 27, 2024 · a29b44c · a29b44c
2 parents 3ac4fe1 + 80652ab
commit a29b44c
Show file tree

Hide file tree

Showing 55 changed files with 1,533 additions and 531 deletions.
diff --git a/.github/workflows/checksum_checker.yaml b/.github/workflows/checksum_checker.yaml
@@ -41,7 +41,7 @@ jobs:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
-          title: 'models(gallery): :arrow_up: update checksum'
+          title: 'chore(model-gallery): :arrow_up: update checksum'
           branch: "update/checksum"
           body: Updating checksums in gallery/index.yaml
           signoff: true
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
@@ -47,7 +47,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -120,7 +120,7 @@ jobs:
           #   makeflags: "--jobs=3 --output-sync=target"
           # - build-type: 'cublas'
           #   cuda-major-version: "12"
-          #   cuda-minor-version: "4"
+          #   cuda-minor-version: "0"
           #   platforms: 'linux/amd64'
           #   tag-latest: 'false'
           #   tag-suffix: '-cublas-cuda12-ffmpeg-core'

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -75,7 +75,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12'
@@ -100,7 +100,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-cublas-cuda12-ffmpeg'
@@ -285,7 +285,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-core'
@@ -307,7 +307,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "4"
+            cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-cublas-cuda12-ffmpeg-core'

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -4,6 +4,8 @@ on:
   push:
     branches:
       - master
+    tags:
+      - 'v*'
   pull_request:
 
 env:
@@ -29,11 +31,10 @@ jobs:
         with:
           go-version: '1.21.x'
           cache: false
-
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache gawk
+          sudo apt-get install build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk
           sudo apt-get install -qy binutils-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libgmock-dev
       - name: Install CUDA Dependencies
         run: |
@@ -149,7 +150,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache gawk cmake libgmock-dev
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
       - name: Intel Dependencies
         run: |
           wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
@@ -250,7 +251,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
+          sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache upx-ucl
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
           go install google.golang.org/protobuf/cmd/[email protected]
       - name: Build stablediffusion

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -70,7 +70,7 @@ jobs:
       - name: Dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential curl ffmpeg
+          sudo apt-get install build-essential ccache upx-ucl curl ffmpeg
           sudo apt-get install -y libgmock-dev
           curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
              sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \

diff --git a/Dockerfile b/Dockerfile
@@ -24,7 +24,7 @@ RUN apt-get update && \
         cmake \
         curl \
         git \
-        unzip && \
+        unzip upx-ucl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -99,7 +99,7 @@ FROM requirements-${IMAGE_TYPE} AS requirements-drivers
 
 ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
-ARG CUDA_MINOR_VERSION=4
+ARG CUDA_MINOR_VERSION=0
 
 ENV BUILD_TYPE=${BUILD_TYPE}
 

diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=705b7ecf60e667ced57c15d67aa86865e3cc7aa7
+CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -58,7 +58,7 @@ RANDOM := $(shell bash -c 'echo $$RANDOM')
 
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
-LD_FLAGS?=
+LD_FLAGS?=-s -w
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/mudler/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
 
@@ -72,6 +72,14 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
 
+UPX?=
+# check if upx exists
+ifeq (, $(shell which upx))
+	UPX=
+else
+	UPX=$(shell which upx)
+endif
+
 # Default Docker bridge IP
 E2E_BRIDGE_IP?=172.17.0.1
 
@@ -377,6 +385,7 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
+	$(info ${GREEN}I UPX: ${YELLOW}$(UPX)${RESET})
 ifneq ($(BACKEND_LIBS),)
 	$(MAKE) backend-assets/lib
 	cp -f $(BACKEND_LIBS) backend-assets/lib/
@@ -421,7 +430,7 @@ else
 endif
 
 dist-cross-linux-arm64:
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" \
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_NATIVE=off" GRPC_BACKENDS="backend-assets/grpc/llama-cpp-fallback backend-assets/grpc/llama-cpp-grpc backend-assets/util/llama-cpp-rpc-server" GO_TAGS="p2p" \
 	STATIC=true $(MAKE) build
 	mkdir -p release
 # if BUILD_ID is empty, then we don't append it to the binary name
@@ -471,7 +480,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=4 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -733,13 +742,22 @@ backend-assets/grpc: protogen-go replace
 backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/bert-embeddings
+endif
 
 backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/gpt4all
+endif
 
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/huggingface
+endif
 
 backend/cpp/llama/llama.cpp:
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
@@ -765,6 +783,9 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
+ifneq ($(UPX),)
+	$(UPX) backend/cpp/${VARIANT}/grpc-server
+endif
 
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -837,33 +858,57 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
+ifneq ($(UPX),)
+	$(UPX) backend-assets/util/llama-cpp-rpc-server
+endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/llama-ggml
+endif
 
 backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
 	CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/piper
+endif
 
 backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/rwkv
+endif
 
 backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/stablediffusion
+endif
 
 backend-assets/grpc/tinydream: sources/go-tiny-dream sources/go-tiny-dream/libtinydream.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/go-tiny-dream \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/tinydream ./backend/go/image/tinydream
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/tinydream
+endif
 
 backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/libwhisper.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="$(CURDIR)/sources/whisper.cpp/include:$(CURDIR)/sources/whisper.cpp/ggml/include" LIBRARY_PATH=$(CURDIR)/sources/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/whisper
+endif
 
 backend-assets/grpc/local-store: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
+ifneq ($(UPX),)
+	$(UPX) backend-assets/grpc/local-store
+endif
 
 grpcs: prepare $(GRPC_BACKENDS)
 

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -2259,7 +2259,6 @@ static void params_parse(const backend::ModelOptions* request,
      // get the directory of modelfile
      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
      params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
-     params.lora_base  =  model_dir + "/"+request->lorabase();
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();

diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 certifi

diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
@@ -3,7 +3,7 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.0
+grpcio==1.65.1
 opencv-python
 pillow
 protobuf

diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 torch

diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
@@ -1,6 +1,6 @@
 causal-conv1d==1.4.0
 mamba-ssm==2.2.2
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.64.1
+grpcio==1.65.1
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3

diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 librosa
 faster-whisper

diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
@@ -1,5 +1,5 @@
 accelerate
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16

diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
 transformers
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 sentence-transformers==3.0.1
 transformers
-grpcio==1.65.0
+grpcio==1.65.1
 protobuf
 certifi
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406