diff --git a/.devcontainer-scripts/postcreate.sh b/.devcontainer-scripts/postcreate.sh new file mode 100644 index 000000000000..3f9035090a35 --- /dev/null +++ b/.devcontainer-scripts/postcreate.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +cd /workspace + +# Get the files into the volume without a bind mount +if [ ! -d ".git" ]; then + git clone https://github.com/mudler/LocalAI.git . +else + git fetch +fi + +echo "Standard Post-Create script completed." + +if [ -f "/devcontainer-customization/postcreate.sh" ]; then + echo "Launching customization postcreate.sh" + bash "/devcontainer-customization/postcreate.sh" +fi \ No newline at end of file diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh new file mode 100644 index 000000000000..196e821dbd65 --- /dev/null +++ b/.devcontainer-scripts/poststart.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +cd /workspace + +# Grab the pre-stashed backend assets to avoid build issues +cp -r /build/backend-assets /workspace/backend-assets + +# Ensures generated source files are present upon load +make prepare + +echo "Standard Post-Start script completed." + +if [ -f "/devcontainer-customization/poststart.sh" ]; then + echo "Launching customization poststart.sh" + bash "/devcontainer-customization/poststart.sh" +fi \ No newline at end of file diff --git a/.devcontainer-scripts/utils.sh b/.devcontainer-scripts/utils.sh new file mode 100644 index 000000000000..02b588ae6ac6 --- /dev/null +++ b/.devcontainer-scripts/utils.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# This file contains some really simple functions that are useful when building up customization scripts. + + +# Checks if the git config has a user registered - and sets it up if not. +# +# Param 1: name +# Param 2: email +# +config_user() { + local gcn=$(git config --global user.name) + if [ -z "${gcn}" ]; then + echo "Setting up git user / remote" + git config --global user.name "$1" + git config --global user.email "$2" + + fi +} + +# Checks if the git remote is configured - and sets it up if not. Fetches either way. +# +# Param 1: remote name +# Param 2: remote url +# +config_remote() { + local gr=$(git remote -v | grep $1) + if [ -z "${gr}" ]; then + git remote add $1 $2 + fi + git fetch $1 +} + +# Setup special .ssh files +# +# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh +setup_ssh() { + local files=("$@") + for file in "${files[@]}"; then + local cfile="/devcontainer-customization/${file}" + local hfile="~/.ssh/${file}" + if [ ! -f "${hfile}" ]; then + echo "copying ${file}" + cp "${cfile}" "${hfile}" + chmod 600 "${hfile}" + fi + done + ls ~/.ssh +} diff --git a/.devcontainer/customization/README.md b/.devcontainer/customization/README.md new file mode 100644 index 000000000000..89eb48e8da99 --- /dev/null +++ b/.devcontainer/customization/README.md @@ -0,0 +1,25 @@ +Place any additional resources your environment requires in this directory + +Script hooks are currently called for: +`postcreate.sh` and `poststart.sh` + +If files with those names exist here, they will be called at the end of the normal script. + +This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory. + +To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example: + +``` +#!/bin/bash + +source "/.devcontainer-scripts/utils.sh" + +sshfiles=("config", "key.pub") + +setup_ssh "${sshfiles[@]}" + +config_user "YOUR NAME" "YOUR EMAIL" + +config_remote "REMOTE NAME" "REMOTE URL" + +``` \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000000..37c81ffc41da --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,24 @@ +{ + "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json", + "name": "LocalAI", + "workspaceFolder": "/workspace", + "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ], + "service": "api", + "shutdownAction": "stopCompose", + "customizations": { + "vscode": { + "extensions": [ + "golang.go", + "ms-vscode.makefile-tools", + "ms-azuretools.vscode-docker", + "ms-python.python", + "ms-python.debugpy", + "wayou.vscode-todo-highlight", + "waderyan.gitblame" + ] + } + }, + "forwardPorts": [8080, 3000], + "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh", + "postStartCommand": "bash /.devcontainer-scripts/poststart.sh" +} \ No newline at end of file diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml new file mode 100644 index 000000000000..8795d64da169 --- /dev/null +++ b/.devcontainer/docker-compose-devcontainer.yml @@ -0,0 +1,48 @@ +services: + api: + build: + context: .. + dockerfile: Dockerfile + target: devcontainer + args: + - FFMPEG=true + - IMAGE_TYPE=extras + - GO_TAGS=stablediffusion p2p tts + env_file: + - ../.env + ports: + - 8080:8080 + volumes: + - localai_workspace:/workspace + - ../models:/host-models + - ./customization:/devcontainer-customization + command: /bin/sh -c "while sleep 1000; do :; done" + cap_add: + - SYS_PTRACE + security_opt: + - seccomp:unconfined + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - prom_data:/prometheus + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=grafana + volumes: + - ./grafana:/etc/grafana/provisioning/datasources +volumes: + prom_data: + localai_workspace: \ No newline at end of file diff --git a/.devcontainer/grafana/datasource.yml b/.devcontainer/grafana/datasource.yml new file mode 100644 index 000000000000..1ed2fa3c2a28 --- /dev/null +++ b/.devcontainer/grafana/datasource.yml @@ -0,0 +1,10 @@ + +apiVersion: 1 + +datasources: +- name: Prometheus + type: prometheus + url: http://prometheus:9090 + isDefault: true + access: proxy + editable: true diff --git a/.devcontainer/prometheus/prometheus.yml b/.devcontainer/prometheus/prometheus.yml new file mode 100644 index 000000000000..18c44da71447 --- /dev/null +++ b/.devcontainer/prometheus/prometheus.yml @@ -0,0 +1,21 @@ +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: +- job_name: prometheus + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + static_configs: + - targets: + - localhost:9090 \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index 3954769f5c36..e91f0008f1c0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,7 @@ .idea .github .vscode +.devcontainer models examples/chatbot-ui/models examples/rwkv/models diff --git a/.env b/.env index 95a515bc850f..9e5dbd79ee4f 100644 --- a/.env +++ b/.env @@ -79,6 +79,9 @@ ### Enable to run parallel requests # LOCALAI_PARALLEL_REQUESTS=true +# Enable to allow p2p mode +# LOCALAI_P2P=true + ### Watchdog settings ### # Enables watchdog to kill backends that are inactive for too much time diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh index d8fff4a3148d..66dea9a38ad2 100755 --- a/.github/bump_deps.sh +++ b/.github/bump_deps.sh @@ -6,4 +6,17 @@ VAR=$3 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH") +# Read $VAR from Makefile (only first match) +set +e +CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)" +set -e + sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/" + +if [ -z "$CURRENT_COMMIT" ]; then + echo "Could not find $VAR in Makefile." + exit 0 +fi + +echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt" +echo "${LAST_COMMIT}" >> "${VAR}_commit.txt" \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 91b06ba80287..5016ebdb0ee5 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -67,10 +67,6 @@ updates: directory: "/backend/python/parler-tts" schedule: interval: "weekly" - - package-ecosystem: "pip" - directory: "/backend/python/petals" - schedule: - interval: "weekly" - package-ecosystem: "pip" directory: "/backend/python/rerankers" schedule: diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml index 5909c9812442..68cb81cbceca 100644 --- a/.github/workflows/bump_deps.yaml +++ b/.github/workflows/bump_deps.yaml @@ -40,17 +40,30 @@ jobs: steps: - uses: actions/checkout@v4 - name: Bump dependencies 🔧 + id: bump run: | bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }} + { + echo 'message<> "$GITHUB_OUTPUT" + { + echo 'commit<> "$GITHUB_OUTPUT" + rm -rfv ${{ matrix.variable }}_message.txt + rm -rfv ${{ matrix.variable }}_commit.txt - name: Create Pull Request uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.UPDATE_BOT_TOKEN }} push-to-fork: ci-forks/LocalAI commit-message: ':arrow_up: Update ${{ matrix.repository }}' - title: 'chore: :arrow_up: Update ${{ matrix.repository }}' + title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`' branch: "update/${{ matrix.variable }}" - body: Bump of ${{ matrix.repository }} version + body: ${{ steps.bump.outputs.message }} signoff: true diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml new file mode 100644 index 000000000000..7b5c0484fe73 --- /dev/null +++ b/.github/workflows/deploy-explorer.yaml @@ -0,0 +1,64 @@ +name: Explorer deployment + +on: + push: + branches: + - master + tags: + - 'v*' + +concurrency: + group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }} + +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - name: Clone + uses: actions/checkout@v4 + with: + submodules: true + - uses: actions/setup-go@v5 + with: + go-version: '1.21.x' + cache: false + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 + make protogen-go + - name: Build api + run: | + CGO_ENABLED=0 make build-api + - name: rm + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.EXPLORER_SSH_HOST }} + username: ${{ secrets.EXPLORER_SSH_USERNAME }} + key: ${{ secrets.EXPLORER_SSH_KEY }} + port: ${{ secrets.EXPLORER_SSH_PORT }} + script: | + sudo rm -rf local-ai/ || true + - name: copy file via ssh + uses: appleboy/scp-action@v0.1.7 + with: + host: ${{ secrets.EXPLORER_SSH_HOST }} + username: ${{ secrets.EXPLORER_SSH_USERNAME }} + key: ${{ secrets.EXPLORER_SSH_KEY }} + port: ${{ secrets.EXPLORER_SSH_PORT }} + source: "local-ai" + overwrite: true + rm: true + target: ./local-ai + - name: restarting + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.EXPLORER_SSH_HOST }} + username: ${{ secrets.EXPLORER_SSH_USERNAME }} + key: ${{ secrets.EXPLORER_SSH_KEY }} + port: ${{ secrets.EXPLORER_SSH_PORT }} + script: | + sudo cp -rfv local-ai/local-ai /usr/bin/local-ai + sudo systemctl restart local-ai diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index e969a95fc01d..8b37b52ddb99 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -168,32 +168,6 @@ jobs: make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test - - - # tests-petals: - # runs-on: ubuntu-latest - # steps: - # - name: Clone - # uses: actions/checkout@v4 - # with: - # submodules: true - # - name: Dependencies - # run: | - # sudo apt-get update - # sudo apt-get install build-essential ffmpeg - # # Install UV - # curl -LsSf https://astral.sh/uv/install.sh | sh - # sudo apt-get install -y ca-certificates cmake curl patch python3-pip - # sudo apt-get install -y libopencv-dev - # pip install --user --no-cache-dir grpcio-tools==1.64.1 - - # - name: Test petals - # run: | - # make --jobs=5 --output-sync=target -C backend/python/petals - # make --jobs=5 --output-sync=target -C backend/python/petals test - - - # tests-bark: # runs-on: ubuntu-latest # steps: diff --git a/.gitignore b/.gitignore index 096689c50eb5..65eb92570f6f 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,6 @@ docs/static/gallery.html # backend virtual environments **/venv + +# per-developer customization files for the development container +.devcontainer/customization/* \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index 2727da924b0a..504934210b3b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -3,12 +3,12 @@ "configurations": [ { "name": "Python: Current File", - "type": "python", + "type": "debugpy", "request": "launch", "program": "${file}", "console": "integratedTerminal", "justMyCode": false, - "cwd": "${workspaceFolder}/examples/langchain-chroma", + "cwd": "${fileDirname}", "env": { "OPENAI_API_BASE": "http://localhost:8080/v1", "OPENAI_API_KEY": "abc" @@ -19,15 +19,16 @@ "type": "go", "request": "launch", "mode": "debug", - "program": "${workspaceFolder}/main.go", - "args": [ - "api" - ], + "program": "${workspaceRoot}", + "args": [], "env": { - "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz", - "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz", - "DEBUG": "true" - } + "LOCALAI_LOG_LEVEL": "debug", + "LOCALAI_P2P": "true", + "LOCALAI_FEDERATED": "true" + }, + "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"], + "envFile": "${workspaceFolder}/.env", + "cwd": "${workspaceRoot}" } ] } \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index a0feadd9c7eb..9d6517604bfb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core USER root -ARG GO_VERSION=1.22.5 +ARG GO_VERSION=1.22.6 ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -30,7 +30,7 @@ RUN apt-get update && \ # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz -ENV PATH $PATH:/root/go/bin:/usr/local/go/bin +ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin # Install grpc compilers RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ @@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \ COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates +RUN test -n "$TARGETARCH" \ + || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`') + # Use the variables in subsequent instructions RUN echo "Target Architecture: $TARGETARCH" RUN echo "Target Variant: $TARGETVARIANT" # Cuda -ENV PATH /usr/local/cuda/bin:${PATH} +ENV PATH=/usr/local/cuda/bin:${PATH} # HipBLAS requirements -ENV PATH /opt/rocm/bin:${PATH} +ENV PATH=/opt/rocm/bin:${PATH} # OpenBLAS requirements and stable diffusion RUN apt-get update && \ @@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 WORKDIR /build -RUN test -n "$TARGETARCH" \ - || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`') - ################################### ################################### @@ -81,7 +81,7 @@ RUN apt-get update && \ espeak \ python3-pip \ python-is-python3 \ - python3-dev \ + python3-dev llvm \ python3-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ @@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall ################################### ################################### -# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry. -# Adjustments to the build process should likely be made here. -FROM requirements-drivers AS builder +# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer + +FROM requirements-drivers AS builder-base ARG GO_TAGS="stablediffusion tts p2p" ARG GRPC_BACKENDS ARG MAKEFLAGS +ARG LD_FLAGS="-s -w" ENV GRPC_BACKENDS=${GRPC_BACKENDS} ENV GO_TAGS=${GO_TAGS} @@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS} ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" ENV NVIDIA_VISIBLE_DEVICES=all +ENV LD_FLAGS=${LD_FLAGS} -WORKDIR /build +RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH" -COPY . . -COPY .git . -RUN echo "GO_TAGS: $GO_TAGS" +WORKDIR /build -RUN make prepare # We need protoc installed, and the version in 22.04 is too old. We will create one as part installing the GRPC build below # but that will also being in a newer version of absl which stablediffusion cannot compile with. This version of protoc is only @@ -256,9 +255,30 @@ RUN <loraadapter(), scale_factor)); + params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor }); } params.use_mlock = request->mlock(); params.use_mmap = request->mmap(); diff --git a/backend/go/llm/gpt4all/gpt4all.go b/backend/go/llm/gpt4all/gpt4all.go deleted file mode 100644 index 9caab48c0dbb..000000000000 --- a/backend/go/llm/gpt4all/gpt4all.go +++ /dev/null @@ -1,62 +0,0 @@ -package main - -// This is a wrapper to statisfy the GRPC service interface -// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) -import ( - "fmt" - - "github.com/mudler/LocalAI/pkg/grpc/base" - pb "github.com/mudler/LocalAI/pkg/grpc/proto" - gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang" -) - -type LLM struct { - base.SingleThread - - gpt4all *gpt4all.Model -} - -func (llm *LLM) Load(opts *pb.ModelOptions) error { - model, err := gpt4all.New(opts.ModelFile, - gpt4all.SetThreads(int(opts.Threads)), - gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath)) - llm.gpt4all = model - return err -} - -func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption { - predictOptions := []gpt4all.PredictOption{ - gpt4all.SetTemperature(float64(opts.Temperature)), - gpt4all.SetTopP(float64(opts.TopP)), - gpt4all.SetTopK(int(opts.TopK)), - gpt4all.SetTokens(int(opts.Tokens)), - } - - if opts.Batch != 0 { - predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch))) - } - return predictOptions -} - -func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { - return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...) -} - -func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error { - predictOptions := buildPredictOptions(opts) - - go func() { - llm.gpt4all.SetTokenCallback(func(token string) bool { - results <- token - return true - }) - _, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...) - if err != nil { - fmt.Println("err: ", err) - } - llm.gpt4all.SetTokenCallback(nil) - close(results) - }() - - return nil -} diff --git a/backend/go/llm/gpt4all/main.go b/backend/go/llm/gpt4all/main.go deleted file mode 100644 index acf4408799e1..000000000000 --- a/backend/go/llm/gpt4all/main.go +++ /dev/null @@ -1,21 +0,0 @@ -package main - -// Note: this is started internally by LocalAI and a server is allocated for each model - -import ( - "flag" - - grpc "github.com/mudler/LocalAI/pkg/grpc" -) - -var ( - addr = flag.String("addr", "localhost:50051", "the address to connect to") -) - -func main() { - flag.Parse() - - if err := grpc.StartServer(*addr, &LLM{}); err != nil { - panic(err) - } -} diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt new file mode 100644 index 000000000000..6461b696f4c6 --- /dev/null +++ b/backend/python/autogptq/requirements-cublas11.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt new file mode 100644 index 000000000000..12c6d5d5eac2 --- /dev/null +++ b/backend/python/autogptq/requirements-cublas12.txt @@ -0,0 +1 @@ +torch diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt index 635b4c31ee1a..755e19d854c5 100644 --- a/backend/python/autogptq/requirements-intel.txt +++ b/backend/python/autogptq/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt index 7a1bf85f6ca3..53946f2398e5 100644 --- a/backend/python/autogptq/requirements.txt +++ b/backend/python/autogptq/requirements.txt @@ -1,7 +1,6 @@ accelerate auto-gptq==0.7.1 -grpcio==1.65.1 +grpcio==1.65.4 protobuf -torch certifi transformers \ No newline at end of file diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt new file mode 100644 index 000000000000..0b2c3bc7ea88 --- /dev/null +++ b/backend/python/bark/requirements-cpu.txt @@ -0,0 +1,4 @@ +transformers +accelerate +torch +torchaudio \ No newline at end of file diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt new file mode 100644 index 000000000000..71a6a93f442f --- /dev/null +++ b/backend/python/bark/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt new file mode 100644 index 000000000000..0fa270742a3d --- /dev/null +++ b/backend/python/bark/requirements-cublas12.txt @@ -0,0 +1,4 @@ +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt index 7bfc411bd51c..af9e820e9241 100644 --- a/backend/python/bark/requirements-hipblas.txt +++ b/backend/python/bark/requirements-hipblas.txt @@ -1,3 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 torch -torchaudio \ No newline at end of file +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt index 5c4aa6a5ce68..9feb6eef3308 100644 --- a/backend/python/bark/requirements-intel.txt +++ b/backend/python/bark/requirements-intel.txt @@ -3,4 +3,6 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 +transformers +accelerate \ No newline at end of file diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt index d3f9f52be26a..08bfaec31ed4 100644 --- a/backend/python/bark/requirements.txt +++ b/backend/python/bark/requirements.txt @@ -1,6 +1,4 @@ -accelerate bark==0.1.5 -grpcio==1.65.1 +grpcio==1.65.5 protobuf -certifi -transformers \ No newline at end of file +certifi \ No newline at end of file diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index e8dfea03d56d..934b1fd37b14 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -18,10 +18,23 @@ # source $(dirname $0)/../common/libbackend.sh # function init() { + # Name of the backend (directory name) BACKEND_NAME=${PWD##*/} + + # Path where all backends files are MY_DIR=$(realpath `dirname $0`) + + # Build type BUILD_PROFILE=$(getBuildProfile) + # Environment directory + EDIR=${MY_DIR} + + # Allow to specify a custom env dir for shared environments + if [ "x${ENV_DIR}" != "x" ]; then + EDIR=${ENV_DIR} + fi + # If a backend has defined a list of valid build profiles... if [ ! -z "${LIMIT_TARGETS}" ]; then isValidTarget=$(checkTargets ${LIMIT_TARGETS}) @@ -74,13 +87,14 @@ function getBuildProfile() { # This function is idempotent, so you can call it as many times as you want and it will # always result in an activated virtual environment function ensureVenv() { - if [ ! -d "${MY_DIR}/venv" ]; then - uv venv ${MY_DIR}/venv + if [ ! -d "${EDIR}/venv" ]; then + uv venv ${EDIR}/venv echo "virtualenv created" fi - - if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then - source ${MY_DIR}/venv/bin/activate + + # Source if we are not already in a Virtual env + if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then + source ${EDIR}/venv/bin/activate echo "virtualenv activated" fi @@ -113,13 +127,24 @@ function installRequirements() { # These are the requirements files we will attempt to install, in order declare -a requirementFiles=( - "${MY_DIR}/requirements-install.txt" - "${MY_DIR}/requirements.txt" - "${MY_DIR}/requirements-${BUILD_TYPE}.txt" + "${EDIR}/requirements-install.txt" + "${EDIR}/requirements.txt" + "${EDIR}/requirements-${BUILD_TYPE}.txt" ) if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then - requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt") + requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt") + fi + + # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements + if [ "x${BUILD_TYPE}" == "x" ]; then + requirementFiles+=("${EDIR}/requirements-cpu.txt") + fi + + requirementFiles+=("${EDIR}/requirements-after.txt") + + if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then + requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt") fi for reqFile in ${requirementFiles[@]}; do diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt index 8d1e31513580..3517315535d5 100644 --- a/backend/python/common/template/requirements.txt +++ b/backend/python/common/template/requirements.txt @@ -1,2 +1,2 @@ -grpcio==1.65.1 +grpcio==1.65.5 protobuf \ No newline at end of file diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/coqui/requirements-cpu.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt new file mode 100644 index 000000000000..71a6a93f442f --- /dev/null +++ b/backend/python/coqui/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt new file mode 100644 index 000000000000..0fa270742a3d --- /dev/null +++ b/backend/python/coqui/requirements-cublas12.txt @@ -0,0 +1,4 @@ +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 7bfc411bd51c..af9e820e9241 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,3 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 torch -torchaudio \ No newline at end of file +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index 5c4aa6a5ce68..002a55c35c15 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -3,4 +3,6 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 +transformers +accelerate \ No newline at end of file diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt index e1cddaa3a06c..6125f7391aad 100644 --- a/backend/python/coqui/requirements.txt +++ b/backend/python/coqui/requirements.txt @@ -1,6 +1,4 @@ -accelerate TTS==0.22.0 -grpcio==1.65.1 +grpcio==1.65.5 protobuf -certifi -transformers \ No newline at end of file +certifi \ No newline at end of file diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py index a348d290e7c6..8f42084822f2 100755 --- a/backend/python/diffusers/backend.py +++ b/backend/python/diffusers/backend.py @@ -18,13 +18,13 @@ import grpc from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \ - EulerAncestralDiscreteScheduler + EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline from diffusers.pipelines.stable_diffusion import safety_checker from diffusers.utils import load_image, export_to_video from compel import Compel, ReturnedEmbeddingsType - -from transformers import CLIPTextModel +from optimum.quanto import freeze, qfloat8, quantize +from transformers import CLIPTextModel, T5EncoderModel from safetensors.torch import load_file _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -163,6 +163,8 @@ def LoadModel(self, request, context): modelFile = request.Model self.cfg_scale = 7 + self.PipelineType = request.PipelineType + if request.CFGScale != 0: self.cfg_scale = request.CFGScale @@ -244,6 +246,30 @@ def LoadModel(self, request, context): torch_dtype=torchType, use_safetensors=True, variant=variant) + elif request.PipelineType == "FluxPipeline": + self.pipe = FluxPipeline.from_pretrained( + request.Model, + torch_dtype=torch.bfloat16) + if request.LowVRAM: + self.pipe.enable_model_cpu_offload() + elif request.PipelineType == "FluxTransformer2DModel": + dtype = torch.bfloat16 + # specify from environment or default to "ChuckMcSneed/FLUX.1-dev" + bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev") + + transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype) + quantize(transformer, weights=qfloat8) + freeze(transformer) + text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype) + quantize(text_encoder_2, weights=qfloat8) + freeze(text_encoder_2) + + self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype) + self.pipe.transformer = transformer + self.pipe.text_encoder_2 = text_encoder_2 + + if request.LowVRAM: + self.pipe.enable_model_cpu_offload() if CLIPSKIP and request.CLIPSkip != 0: self.clip_skip = request.CLIPSkip @@ -399,6 +425,13 @@ def GenerateImage(self, request, context): request.seed ) + if self.PipelineType == "FluxPipeline": + kwargs["max_sequence_length"] = 256 + + if self.PipelineType == "FluxTransformer2DModel": + kwargs["output_type"] = "pil" + kwargs["generator"] = torch.Generator("cpu").manual_seed(0) + if self.img2vid: # Load the conditioning image image = load_image(request.src) diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt new file mode 100644 index 000000000000..235bb57e3d2f --- /dev/null +++ b/backend/python/diffusers/requirements-cpu.txt @@ -0,0 +1,9 @@ +diffusers +opencv-python +transformers +accelerate +compel +peft +sentencepiece +torch +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt new file mode 100644 index 000000000000..40e718cb1f7c --- /dev/null +++ b/backend/python/diffusers/requirements-cublas11.txt @@ -0,0 +1,10 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +diffusers +opencv-python +transformers +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt new file mode 100644 index 000000000000..3bcc53972aa8 --- /dev/null +++ b/backend/python/diffusers/requirements-cublas12.txt @@ -0,0 +1,9 @@ +torch +diffusers +opencv-python +transformers +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index 6c8da20d8c09..17cf72491555 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,3 +1,11 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -torchvision \ No newline at end of file +torch==2.3.1+rocm6.0 +torchvision==0.18.1+rocm6.0 +diffusers +opencv-python +transformers +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index c393b11896d0..1cc2e2a2bdb6 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -3,4 +3,12 @@ intel-extension-for-pytorch torch torchvision optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 +diffusers +opencv-python +transformers +accelerate +compel +peft +sentencepiece +optimum-quanto \ No newline at end of file diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt index 6f04d677bc6a..b4195fc54c90 100644 --- a/backend/python/diffusers/requirements.txt +++ b/backend/python/diffusers/requirements.txt @@ -1,13 +1,5 @@ setuptools -accelerate -compel -peft -diffusers -grpcio==1.65.1 -opencv-python +grpcio==1.65.4 pillow protobuf -sentencepiece -torch -transformers certifi diff --git a/backend/python/exllama/requirements-cpu.txt b/backend/python/exllama/requirements-cpu.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/exllama/requirements-cpu.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/exllama/requirements-cublas11.txt b/backend/python/exllama/requirements-cublas11.txt new file mode 100644 index 000000000000..1dfb5b9854d2 --- /dev/null +++ b/backend/python/exllama/requirements-cublas11.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +transformers +accelerate \ No newline at end of file diff --git a/backend/python/exllama/requirements-cublas12.txt b/backend/python/exllama/requirements-cublas12.txt new file mode 100644 index 000000000000..1ec544cd1438 --- /dev/null +++ b/backend/python/exllama/requirements-cublas12.txt @@ -0,0 +1,3 @@ +torch +transformers +accelerate \ No newline at end of file diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt index 2aab2631cd9c..99b8109801ab 100644 --- a/backend/python/exllama/requirements.txt +++ b/backend/python/exllama/requirements.txt @@ -1,6 +1,4 @@ -grpcio==1.65.0 +grpcio==1.65.5 protobuf -torch -transformers certifi setuptools \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cpu.txt b/backend/python/exllama2/requirements-cpu.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/exllama2/requirements-cpu.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt new file mode 100644 index 000000000000..1dfb5b9854d2 --- /dev/null +++ b/backend/python/exllama2/requirements-cublas11.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +transformers +accelerate \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt new file mode 100644 index 000000000000..1ec544cd1438 --- /dev/null +++ b/backend/python/exllama2/requirements-cublas12.txt @@ -0,0 +1,3 @@ +torch +transformers +accelerate \ No newline at end of file diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt index 6aae273c94cf..ce15b0b614e3 100644 --- a/backend/python/exllama2/requirements.txt +++ b/backend/python/exllama2/requirements.txt @@ -1,7 +1,5 @@ -accelerate -grpcio==1.65.1 +grpcio==1.65.4 protobuf certifi -torch wheel setuptools \ No newline at end of file diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt new file mode 100644 index 000000000000..ea6890ebb1e5 --- /dev/null +++ b/backend/python/mamba/requirements-after.txt @@ -0,0 +1,2 @@ +causal-conv1d==1.4.0 +mamba-ssm==2.2.2 \ No newline at end of file diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt new file mode 100644 index 000000000000..39dab0fdd98d --- /dev/null +++ b/backend/python/mamba/requirements-cpu.txt @@ -0,0 +1,2 @@ +torch +transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt new file mode 100644 index 000000000000..7048a14f63b9 --- /dev/null +++ b/backend/python/mamba/requirements-cublas11.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt new file mode 100644 index 000000000000..39dab0fdd98d --- /dev/null +++ b/backend/python/mamba/requirements-cublas12.txt @@ -0,0 +1,2 @@ +torch +transformers \ No newline at end of file diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt index 2fc9a07cda77..69d263f0b3ed 100644 --- a/backend/python/mamba/requirements-install.txt +++ b/backend/python/mamba/requirements-install.txt @@ -3,5 +3,4 @@ # https://github.com/Dao-AILab/causal-conv1d/issues/24 packaging setuptools -wheel -torch==2.3.1 \ No newline at end of file +wheel \ No newline at end of file diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt index 2aac2cda0800..920971ce0b1a 100644 --- a/backend/python/mamba/requirements.txt +++ b/backend/python/mamba/requirements.txt @@ -1,6 +1,3 @@ -causal-conv1d==1.4.0 -mamba-ssm==2.2.2 -grpcio==1.65.1 +grpcio==1.65.5 protobuf -certifi -transformers \ No newline at end of file +certifi \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt new file mode 100644 index 000000000000..08ed5eeb4b9f --- /dev/null +++ b/backend/python/openvoice/requirements-cpu.txt @@ -0,0 +1 @@ +torch \ No newline at end of file diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt new file mode 100644 index 000000000000..6461b696f4c6 --- /dev/null +++ b/backend/python/openvoice/requirements-cublas11.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt new file mode 100644 index 000000000000..12c6d5d5eac2 --- /dev/null +++ b/backend/python/openvoice/requirements-cublas12.txt @@ -0,0 +1 @@ +torch diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt index bad088a912d6..25921f8f1a81 100644 --- a/backend/python/openvoice/requirements-intel.txt +++ b/backend/python/openvoice/requirements-intel.txt @@ -2,7 +2,7 @@ intel-extension-for-pytorch torch optimum[openvino] -grpcio==1.65.1 +grpcio==1.65.5 protobuf librosa==0.9.1 faster-whisper==1.0.3 diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt index 86d16ec26a70..13ce9c28713e 100644 --- a/backend/python/openvoice/requirements.txt +++ b/backend/python/openvoice/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.65.1 +grpcio==1.65.5 protobuf librosa faster-whisper diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh index 218c0dcd511f..6c0a840f5a1e 100755 --- a/backend/python/openvoice/test.sh +++ b/backend/python/openvoice/test.sh @@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh # Download checkpoints if not present if [ ! -d "checkpoints_v2" ]; then - wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip + wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip unzip checkpoints_v2.zip fi diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt new file mode 100644 index 000000000000..63599411834c --- /dev/null +++ b/backend/python/parler-tts/requirements-after.txt @@ -0,0 +1 @@ +git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17 \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/parler-tts/requirements-cpu.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt new file mode 100644 index 000000000000..71a6a93f442f --- /dev/null +++ b/backend/python/parler-tts/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt new file mode 100644 index 000000000000..0fa270742a3d --- /dev/null +++ b/backend/python/parler-tts/requirements-cublas12.txt @@ -0,0 +1,4 @@ +torch +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt index 7bfc411bd51c..af9e820e9241 100644 --- a/backend/python/parler-tts/requirements-hipblas.txt +++ b/backend/python/parler-tts/requirements-hipblas.txt @@ -1,3 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 torch -torchaudio \ No newline at end of file +torchaudio +transformers +accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt index 5c4aa6a5ce68..002a55c35c15 100644 --- a/backend/python/parler-tts/requirements-intel.txt +++ b/backend/python/parler-tts/requirements-intel.txt @@ -3,4 +3,6 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 +transformers +accelerate \ No newline at end of file diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt index 147cad9a6a7d..1f17c8922c2e 100644 --- a/backend/python/parler-tts/requirements.txt +++ b/backend/python/parler-tts/requirements.txt @@ -1,7 +1,4 @@ -accelerate -grpcio==1.65.1 +grpcio==1.65.5 protobuf -torch -git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16 certifi -transformers \ No newline at end of file +llvmlite==0.43.0 \ No newline at end of file diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile deleted file mode 100644 index 81b06c2984fd..000000000000 --- a/backend/python/petals/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: petals -petals: protogen - @echo "Creating virtual environment..." - bash install.sh "petals.yml" - @echo "Virtual environment created." - -.PHONY: run -run: protogen - @echo "Running petals..." - bash run.sh - @echo "petals run." - -.PHONY: test -test: protogen - @echo "Testing petals..." - bash test.sh - @echo "petals tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/petals/backend.py b/backend/python/petals/backend.py deleted file mode 100755 index 73bcc4a0da0f..000000000000 --- a/backend/python/petals/backend.py +++ /dev/null @@ -1,140 +0,0 @@ -#!/usr/bin/env python3 -from concurrent import futures -import time -import argparse -import signal -import sys -import os - -import backend_pb2 -import backend_pb2_grpc - -import grpc -import torch -from transformers import AutoTokenizer -from petals import AutoDistributedModelForCausalLM - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer that implements the Backend service defined in backend.proto. - """ - def Health(self, request, context): - """ - Returns a health check message. - - Args: - request: The health check request. - context: The gRPC context. - - Returns: - backend_pb2.Reply: The health check reply. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - Loads a language model. - - Args: - request: The load model request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The load model result. - """ - try: - self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False) - self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model) - self.cuda = False - if request.CUDA: - self.model = self.model.cuda() - self.cuda = True - - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Predict(self, request, context): - """ - Generates text based on the given prompt and sampling parameters. - - Args: - request: The predict request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict result. - """ - - inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"] - if self.cuda: - inputs = inputs.cuda() - - if request.Tokens == 0: - # Max to max value if tokens are not specified - request.Tokens = 8192 - - # TODO: kwargs and map all parameters - outputs = self.model.generate(inputs, max_new_tokens=request.Tokens) - - generated_text = self.tokenizer.decode(outputs[0]) - # Remove prompt from response if present - if request.Prompt in generated_text: - generated_text = generated_text.replace(request.Prompt, "") - - return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8')) - - def PredictStream(self, request, context): - """ - Generates text based on the given prompt and sampling parameters, and streams the results. - - Args: - request: The predict stream request. - context: The gRPC context. - - Returns: - backend_pb2.Result: The predict stream result. - """ - # Implement PredictStream RPC - #for reply in some_data_generator(): - # yield reply - # Not implemented yet - return self.Predict(request, context) - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh deleted file mode 100755 index 36443ef1c559..000000000000 --- a/backend/python/petals/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt deleted file mode 100644 index 0331f106d614..000000000000 --- a/backend/python/petals/requirements-hipblas.txt +++ /dev/null @@ -1,2 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt deleted file mode 100644 index 635b4c31ee1a..000000000000 --- a/backend/python/petals/requirements-intel.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch -torch -optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/petals/requirements.txt b/backend/python/petals/requirements.txt deleted file mode 100644 index 10f5114e74ea..000000000000 --- a/backend/python/petals/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -git+https://github.com/bigscience-workshop/petals -certifi -transformers \ No newline at end of file diff --git a/backend/python/petals/run.sh b/backend/python/petals/run.sh deleted file mode 100755 index 375c07e5f426..000000000000 --- a/backend/python/petals/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/petals/test.py b/backend/python/petals/test.py deleted file mode 100644 index 586d24437e16..000000000000 --- a/backend/python/petals/test.py +++ /dev/null @@ -1,58 +0,0 @@ -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -import unittest -import subprocess -import time -import grpc -import backend_pb2_grpc -import backend_pb2 - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service. - - This class contains methods to test the startup and shutdown of the gRPC service. - """ - def setUp(self): - self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - self.service.terminate() - self.service.wait() - - def test_server_startup(self): - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m")) - print(response) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() diff --git a/backend/python/petals/test.sh b/backend/python/petals/test.sh deleted file mode 100755 index 6940b0661df2..000000000000 --- a/backend/python/petals/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt new file mode 100644 index 000000000000..25a1d8ab8492 --- /dev/null +++ b/backend/python/rerankers/requirements-cpu.txt @@ -0,0 +1,4 @@ +transformers +accelerate +torch +rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt new file mode 100644 index 000000000000..06c4b2cfb52c --- /dev/null +++ b/backend/python/rerankers/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +transformers +accelerate +torch +rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt new file mode 100644 index 000000000000..25a1d8ab8492 --- /dev/null +++ b/backend/python/rerankers/requirements-cublas12.txt @@ -0,0 +1,4 @@ +transformers +accelerate +torch +rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index 76018445f448..961d150cd856 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,2 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +transformers +accelerate +torch +rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt index 635b4c31ee1a..1a39cf4fc05a 100644 --- a/backend/python/rerankers/requirements-intel.txt +++ b/backend/python/rerankers/requirements-intel.txt @@ -1,5 +1,8 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ intel-extension-for-pytorch +transformers +accelerate torch +rerankers[transformers] optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt index 8b2ad4d0d9f0..2a8d18b10ce8 100644 --- a/backend/python/rerankers/requirements.txt +++ b/backend/python/rerankers/requirements.txt @@ -1,6 +1,3 @@ -accelerate -rerankers[transformers] -grpcio==1.65.1 +grpcio==1.65.4 protobuf -certifi -transformers \ No newline at end of file +certifi \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt new file mode 100644 index 000000000000..cd9924ef0748 --- /dev/null +++ b/backend/python/sentencetransformers/requirements-cpu.txt @@ -0,0 +1,6 @@ +torch +accelerate +transformers +bitsandbytes +sentence-transformers==3.0.1 +transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt new file mode 100644 index 000000000000..1131f06624e5 --- /dev/null +++ b/backend/python/sentencetransformers/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +accelerate +sentence-transformers==3.0.1 +transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt new file mode 100644 index 000000000000..2936e17bc178 --- /dev/null +++ b/backend/python/sentencetransformers/requirements-cublas12.txt @@ -0,0 +1,4 @@ +torch +accelerate +sentence-transformers==3.0.1 +transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt index 76018445f448..3b187c685f9c 100644 --- a/backend/python/sentencetransformers/requirements-hipblas.txt +++ b/backend/python/sentencetransformers/requirements-hipblas.txt @@ -1,2 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +torch +accelerate +sentence-transformers==3.0.1 +transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt index 95d4848c5d2c..806e3d475869 100644 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ b/backend/python/sentencetransformers/requirements-intel.txt @@ -2,4 +2,7 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 +accelerate +sentence-transformers==3.0.1 +transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt index 4ef4a28bd70d..920971ce0b1a 100644 --- a/backend/python/sentencetransformers/requirements.txt +++ b/backend/python/sentencetransformers/requirements.txt @@ -1,6 +1,3 @@ -accelerate -sentence-transformers==3.0.1 -transformers -grpcio==1.65.1 +grpcio==1.65.5 protobuf certifi \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/transformers-musicgen/requirements-cpu.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt new file mode 100644 index 000000000000..191a6eefd4d4 --- /dev/null +++ b/backend/python/transformers-musicgen/requirements-cublas11.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt new file mode 100644 index 000000000000..bbcdc8cda704 --- /dev/null +++ b/backend/python/transformers-musicgen/requirements-cublas12.txt @@ -0,0 +1,3 @@ +transformers +accelerate +torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt index 76018445f448..00f0a9464e51 100644 --- a/backend/python/transformers-musicgen/requirements-hipblas.txt +++ b/backend/python/transformers-musicgen/requirements-hipblas.txt @@ -1,2 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 +transformers +accelerate torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt index 95d4848c5d2c..89bfa6a20023 100644 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ b/backend/python/transformers-musicgen/requirements-intel.txt @@ -1,5 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ intel-extension-for-pytorch +transformers +accelerate torch optimum[openvino] setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt index 8ffa3c317652..a0076112ed3e 100644 --- a/backend/python/transformers-musicgen/requirements.txt +++ b/backend/python/transformers-musicgen/requirements.txt @@ -1,7 +1,4 @@ -accelerate -transformers -grpcio==1.65.1 +grpcio==1.65.5 protobuf -torch scipy==1.14.0 certifi \ No newline at end of file diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt new file mode 100644 index 000000000000..f1e6281bbf2a --- /dev/null +++ b/backend/python/transformers/requirements-cpu.txt @@ -0,0 +1,4 @@ +torch +accelerate +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt new file mode 100644 index 000000000000..0abd72d96503 --- /dev/null +++ b/backend/python/transformers/requirements-cublas11.txt @@ -0,0 +1,5 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch +accelerate +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt new file mode 100644 index 000000000000..f1e6281bbf2a --- /dev/null +++ b/backend/python/transformers/requirements-cublas12.txt @@ -0,0 +1,4 @@ +torch +accelerate +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 76018445f448..f6900af129b0 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,2 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +torch +accelerate +transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 8fc18a0ec3d2..5d9efb715dd1 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -2,3 +2,5 @@ intel-extension-for-pytorch torch optimum[openvino] +intel-extension-for-transformers +bitsandbytes \ No newline at end of file diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index 55925b329405..5531ea0ee199 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -1,9 +1,4 @@ -accelerate -transformers -grpcio==1.65.1 +grpcio==1.65.5 protobuf -torch certifi -intel-extension-for-transformers -bitsandbytes -setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt new file mode 100644 index 000000000000..3a3304c0b7f9 --- /dev/null +++ b/backend/python/vall-e-x/requirements-cpu.txt @@ -0,0 +1,3 @@ +accelerate +torch +torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt new file mode 100644 index 000000000000..4e0a151a26c6 --- /dev/null +++ b/backend/python/vall-e-x/requirements-cublas11.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +accelerate +torch +torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt new file mode 100644 index 000000000000..3a3304c0b7f9 --- /dev/null +++ b/backend/python/vall-e-x/requirements-cublas12.txt @@ -0,0 +1,3 @@ +accelerate +torch +torchaudio \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt index 7bfc411bd51c..fc43790a2e59 100644 --- a/backend/python/vall-e-x/requirements-hipblas.txt +++ b/backend/python/vall-e-x/requirements-hipblas.txt @@ -1,3 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch -torchaudio \ No newline at end of file +accelerate +torch==2.3.0+rocm6.0 +torchaudio==2.3.0+rocm6.0 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt index 5c4aa6a5ce68..6185314fe232 100644 --- a/backend/python/vall-e-x/requirements-intel.txt +++ b/backend/python/vall-e-x/requirements-intel.txt @@ -1,6 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ intel-extension-for-pytorch +accelerate torch torchaudio optimum[openvino] -setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file +setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt index d1d0583e40c1..920971ce0b1a 100644 --- a/backend/python/vall-e-x/requirements.txt +++ b/backend/python/vall-e-x/requirements.txt @@ -1,4 +1,3 @@ -accelerate -grpcio==1.65.1 +grpcio==1.65.5 protobuf certifi \ No newline at end of file diff --git a/backend/python/vllm/requirements-after.txt b/backend/python/vllm/requirements-after.txt new file mode 100644 index 000000000000..76f11f154037 --- /dev/null +++ b/backend/python/vllm/requirements-after.txt @@ -0,0 +1 @@ +vllm \ No newline at end of file diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt new file mode 100644 index 000000000000..765a1ef558e6 --- /dev/null +++ b/backend/python/vllm/requirements-cpu.txt @@ -0,0 +1,3 @@ +accelerate +torch +transformers \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas.txt b/backend/python/vllm/requirements-cublas11-after.txt similarity index 100% rename from backend/python/vllm/requirements-cublas.txt rename to backend/python/vllm/requirements-cublas11-after.txt diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt new file mode 100644 index 000000000000..4381772756dd --- /dev/null +++ b/backend/python/vllm/requirements-cublas11.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +accelerate +torch +transformers \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas12-after.txt b/backend/python/vllm/requirements-cublas12-after.txt new file mode 100644 index 000000000000..7bfe8efeb555 --- /dev/null +++ b/backend/python/vllm/requirements-cublas12-after.txt @@ -0,0 +1 @@ +flash-attn \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt new file mode 100644 index 000000000000..765a1ef558e6 --- /dev/null +++ b/backend/python/vllm/requirements-cublas12.txt @@ -0,0 +1,3 @@ +accelerate +torch +transformers \ No newline at end of file diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt index 76018445f448..c73d8141d3a5 100644 --- a/backend/python/vllm/requirements-hipblas.txt +++ b/backend/python/vllm/requirements-hipblas.txt @@ -1,2 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch \ No newline at end of file +accelerate +torch +transformers \ No newline at end of file diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt index 635b4c31ee1a..7903282e84a5 100644 --- a/backend/python/vllm/requirements-intel.txt +++ b/backend/python/vllm/requirements-intel.txt @@ -1,5 +1,7 @@ --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ intel-extension-for-pytorch +accelerate torch +transformers optimum[openvino] setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt index 7c612a2f87f5..99b8109801ab 100644 --- a/backend/python/vllm/requirements.txt +++ b/backend/python/vllm/requirements.txt @@ -1,7 +1,4 @@ -accelerate -vllm -grpcio==1.65.1 +grpcio==1.65.5 protobuf certifi -transformers setuptools \ No newline at end of file diff --git a/core/cli/cli.go b/core/cli/cli.go index 0fed33fdf0df..2073778d747f 100644 --- a/core/cli/cli.go +++ b/core/cli/cli.go @@ -15,4 +15,5 @@ var CLI struct { Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"` Worker worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"` Util UtilCMD `cmd:"" help:"Utility commands"` + Explorer ExplorerCMD `cmd:"" help:"Run p2p explorer"` } diff --git a/core/cli/explorer.go b/core/cli/explorer.go new file mode 100644 index 000000000000..67d25304165d --- /dev/null +++ b/core/cli/explorer.go @@ -0,0 +1,49 @@ +package cli + +import ( + "context" + "time" + + cliContext "github.com/mudler/LocalAI/core/cli/context" + "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http" +) + +type ExplorerCMD struct { + Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` + PoolDatabase string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"` + ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"` + ConnectionErrorThreshold int `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"` + + WithSync bool `env:"LOCALAI_WITH_SYNC,WITH_SYNC" default:"false" help:"Enable sync with the network" group:"api"` + OnlySync bool `env:"LOCALAI_ONLY_SYNC,ONLY_SYNC" default:"false" help:"Only sync with the network" group:"api"` +} + +func (e *ExplorerCMD) Run(ctx *cliContext.Context) error { + + db, err := explorer.NewDatabase(e.PoolDatabase) + if err != nil { + return err + } + + dur, err := time.ParseDuration(e.ConnectionTimeout) + if err != nil { + return err + } + + if e.WithSync { + ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold) + go ds.Start(context.Background(), true) + } + + if e.OnlySync { + ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold) + ctx := context.Background() + + return ds.Start(ctx, false) + } + + appHTTP := http.Explorer(db) + + return appHTTP.Listen(e.Address) +} diff --git a/core/cli/federated.go b/core/cli/federated.go index 32f0fa879555..b917812ce5a8 100644 --- a/core/cli/federated.go +++ b/core/cli/federated.go @@ -8,14 +8,16 @@ import ( ) type FederatedCLI struct { - Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` - Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` - LoadBalanced bool `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"` + Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` + Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` + RandomWorker bool `env:"LOCALAI_RANDOM_WORKER,RANDOM_WORKER" default:"false" help:"Select a random worker from the pool" group:"p2p"` + Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances." group:"p2p"` + TargetWorker string `env:"LOCALAI_TARGET_WORKER,TARGET_WORKER" help:"Target worker to run the federated server on" group:"p2p"` } func (f *FederatedCLI) Run(ctx *cliContext.Context) error { - fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced) + fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, !f.RandomWorker, f.TargetWorker) return fs.Start(context.Background()) } diff --git a/core/cli/models.go b/core/cli/models.go index 030470185e74..56d13fc7a12f 100644 --- a/core/cli/models.go +++ b/core/cli/models.go @@ -83,7 +83,9 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error { return err } - if !downloader.LooksLikeOCI(modelName) { + modelURI := downloader.URI(modelName) + + if !modelURI.LooksLikeOCI() { model := gallery.FindModel(models, modelName, mi.ModelsPath) if model == nil { log.Error().Str("model", modelName).Msg("model not found") diff --git a/core/cli/run.go b/core/cli/run.go index b3d9163223a2..c469f05fcfbc 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -54,6 +54,7 @@ type RunCMD struct { OpaqueErrors bool `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"` Peer2Peer bool `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"` Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"` + Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"` ParallelRequests bool `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"` SingleActiveBackend bool `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"` PreloadBackendOnly bool `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"` @@ -63,6 +64,7 @@ type RunCMD struct { EnableWatchdogBusy bool `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"` WatchdogBusyTimeout string `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"` Federated bool `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"` + DisableGalleryEndpoint bool `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"` } func (r *RunCMD) Run(ctx *cliContext.Context) error { @@ -94,6 +96,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithModelsURL(append(r.Models, r.ModelArgs...)...), config.WithOpaqueErrors(r.OpaqueErrors), config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan), + config.WithP2PNetworkID(r.Peer2PeerNetworkID), } token := "" @@ -119,9 +122,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { } log.Info().Msg("Starting P2P server discovery...") - if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) { + if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) { var tunnelAddresses []string - for _, v := range p2p.GetAvailableNodes("") { + for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) { if v.IsOnline() { tunnelAddresses = append(tunnelAddresses, v.TunnelAddress) } else { @@ -132,7 +135,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar) log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar) - }); err != nil { + }, true); err != nil { return err } } @@ -142,14 +145,13 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { if err != nil { return err } - if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil { - return err - } - node, err := p2p.NewNode(token) + fedCtx := context.Background() + node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID)) if err != nil { return err } - if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil { + + if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil { return err } } @@ -161,6 +163,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { opts = append(opts, config.DisableWebUI) } + if r.DisableGalleryEndpoint { + opts = append(opts, config.DisableGalleryEndpoint) + } + if idleWatchDog || busyWatchDog { opts = append(opts, config.EnableWatchDog) if idleWatchDog { diff --git a/core/cli/util.go b/core/cli/util.go index a7204092bed2..b3e545d869e3 100644 --- a/core/cli/util.go +++ b/core/cli/util.go @@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error { var errs error = nil for _, uri := range hfscmd.ToScan { log.Info().Str("uri", uri).Msg("scanning specific uri") - scanResults, err := downloader.HuggingFaceScan(uri) - if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri)) + if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) { log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!") errs = errors.Join(errs, err) } diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go index 5598a4857d90..2baf51ec4c1b 100644 --- a/core/cli/worker/worker_llamacpp.go +++ b/core/cli/worker/worker_llamacpp.go @@ -21,7 +21,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } if len(os.Args) < 4 { diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go index 2eb5cb94bb7c..7c900667abf3 100644 --- a/core/cli/worker/worker_p2p.go +++ b/core/cli/worker/worker_p2p.go @@ -19,12 +19,13 @@ import ( ) type P2P struct { - WorkerFlags `embed:""` - Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"` - NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"` - RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"` - RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"` - ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"` + WorkerFlags `embed:""` + Token string `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"` + NoRunner bool `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"` + RunnerAddress string `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"` + RunnerPort string `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"` + ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"` + Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"` } func (r *P2P) Run(ctx *cliContext.Context) error { @@ -32,7 +33,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath) log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath) if err != nil { - log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err) + log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err) } // Check if the token is set @@ -59,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { p = r.RunnerPort } - err = p2p.ExposeService(context.Background(), address, p, r.Token, "") + _, err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) if err != nil { return err } @@ -99,7 +100,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error { } }() - err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "") + _, err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) if err != nil { return err } diff --git a/core/config/application_config.go b/core/config/application_config.go index 7233d1ac0916..947c4f136ba5 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -34,6 +34,7 @@ type ApplicationConfig struct { EnforcePredownloadScans bool OpaqueErrors bool P2PToken string + P2PNetworkID string ModelLibraryURL string @@ -56,6 +57,8 @@ type ApplicationConfig struct { ModelsURL []string WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration + + DisableGalleryEndpoint bool } type AppOption func(*ApplicationConfig) @@ -91,6 +94,12 @@ func WithCors(b bool) AppOption { } } +func WithP2PNetworkID(s string) AppOption { + return func(o *ApplicationConfig) { + o.P2PNetworkID = s + } +} + func WithCsrf(b bool) AppOption { return func(o *ApplicationConfig) { o.CSRF = b @@ -124,6 +133,10 @@ var EnableWatchDogIdleCheck = func(o *ApplicationConfig) { o.WatchDogIdle = true } +var DisableGalleryEndpoint = func(o *ApplicationConfig) { + o.DisableGalleryEndpoint = true +} + var EnableWatchDogBusyCheck = func(o *ApplicationConfig) { o.WatchDog = true o.WatchDogBusy = true diff --git a/core/config/backend_config.go b/core/config/backend_config.go index 561d4c3f2693..ab6a6cc6ea5c 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -8,7 +8,6 @@ import ( "github.com/mudler/LocalAI/core/schema" "github.com/mudler/LocalAI/pkg/downloader" "github.com/mudler/LocalAI/pkg/functions" - "github.com/mudler/LocalAI/pkg/utils" ) const ( @@ -72,9 +71,9 @@ type BackendConfig struct { } type File struct { - Filename string `yaml:"filename" json:"filename"` - SHA256 string `yaml:"sha256" json:"sha256"` - URI string `yaml:"uri" json:"uri"` + Filename string `yaml:"filename" json:"filename"` + SHA256 string `yaml:"sha256" json:"sha256"` + URI downloader.URI `yaml:"uri" json:"uri"` } type VallE struct { @@ -213,28 +212,32 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool { // MMProjFileName returns the filename of the MMProj file // If the MMProj is a URL, it will return the MD5 of the URL which is the filename func (c *BackendConfig) MMProjFileName() string { - modelURL := downloader.ConvertURL(c.MMProj) - if downloader.LooksLikeURL(modelURL) { - return utils.MD5(modelURL) + uri := downloader.URI(c.MMProj) + if uri.LooksLikeURL() { + f, _ := uri.FilenameFromUrl() + return f } return c.MMProj } func (c *BackendConfig) IsMMProjURL() bool { - return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj)) + uri := downloader.URI(c.MMProj) + return uri.LooksLikeURL() } func (c *BackendConfig) IsModelURL() bool { - return downloader.LooksLikeURL(downloader.ConvertURL(c.Model)) + uri := downloader.URI(c.Model) + return uri.LooksLikeURL() } // ModelFileName returns the filename of the model // If the model is a URL, it will return the MD5 of the URL which is the filename func (c *BackendConfig) ModelFileName() string { - modelURL := downloader.ConvertURL(c.Model) - if downloader.LooksLikeURL(modelURL) { - return utils.MD5(modelURL) + uri := downloader.URI(c.Model) + if uri.LooksLikeURL() { + f, _ := uri.FilenameFromUrl() + return f } return c.Model diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go index 283dac52bd7d..45fe259e6417 100644 --- a/core/config/backend_config_loader.go +++ b/core/config/backend_config_loader.go @@ -244,7 +244,7 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error { // Create file path filePath := filepath.Join(modelPath, file.Filename) - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { + if err := file.URI.DownloadFile(filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil { return err } } @@ -252,10 +252,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error { // If the model is an URL, expand it, and download the file if config.IsModelURL() { modelFileName := config.ModelFileName() - modelURL := downloader.ConvertURL(config.Model) + uri := downloader.URI(config.Model) // check if file exists if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status) if err != nil { return err } @@ -269,10 +269,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error { if config.IsMMProjURL() { modelFileName := config.MMProjFileName() - modelURL := downloader.ConvertURL(config.MMProj) + uri := downloader.URI(config.MMProj) // check if file exists if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) { - err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status) + err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status) if err != nil { return err } diff --git a/core/config/guesser.go b/core/config/guesser.go index 6c6ef4302991..b63dd051a32a 100644 --- a/core/config/guesser.go +++ b/core/config/guesser.go @@ -26,15 +26,17 @@ const ( type settingsConfig struct { StopWords []string TemplateConfig TemplateConfig + RepeatPenalty float64 } // default settings to adopt with a given model family var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{ Gemma: { + RepeatPenalty: 1.0, StopWords: []string{"<|im_end|>", "", ""}, TemplateConfig: TemplateConfig{ - Chat: "{{.Input }}\n<|start_of_turn|>model\n", - ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>", + Chat: "{{.Input }}\nmodel\n", + ChatMessage: "{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}", Completion: "{{.Input}}", }, }, @@ -192,6 +194,9 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) { if len(cfg.StopWords) == 0 { cfg.StopWords = settings.StopWords } + if cfg.RepeatPenalty == 0.0 { + cfg.RepeatPenalty = settings.RepeatPenalty + } } else { log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family") } @@ -219,7 +224,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType { commandR := arch == "command-r" && eosTokenID == 255001 qwen2 := arch == "qwen2" phi3 := arch == "phi-3" - gemma := strings.HasPrefix(f.Model().Name, "gemma") + gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma") deepseek2 := arch == "deepseek2" switch { diff --git a/core/dependencies_manager/manager.go b/core/dependencies_manager/manager.go index b86139e0f749..8434f721071c 100644 --- a/core/dependencies_manager/manager.go +++ b/core/dependencies_manager/manager.go @@ -37,7 +37,8 @@ func main() { // download the assets for _, asset := range assets { - if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil { + uri := downloader.URI(asset.URL) + if err := uri.DownloadFile(filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil { panic(err) } } diff --git a/core/explorer/database.go b/core/explorer/database.go new file mode 100644 index 000000000000..e24de0aad26b --- /dev/null +++ b/core/explorer/database.go @@ -0,0 +1,125 @@ +package explorer + +// A simple JSON database for storing and retrieving p2p network tokens and a name and description. + +import ( + "encoding/json" + "os" + "sort" + "sync" + + "github.com/gofrs/flock" +) + +// Database is a simple JSON database for storing and retrieving p2p network tokens and a name and description. +type Database struct { + path string + data map[string]TokenData + flock *flock.Flock + sync.Mutex +} + +// TokenData is a p2p network token with a name and description. +type TokenData struct { + Name string `json:"name"` + Description string `json:"description"` + Clusters []ClusterData + Failures int +} + +type ClusterData struct { + Workers []string + Type string + NetworkID string +} + +// NewDatabase creates a new Database with the given path. +func NewDatabase(path string) (*Database, error) { + fileLock := flock.New(path + ".lock") + db := &Database{ + data: make(map[string]TokenData), + path: path, + flock: fileLock, + } + return db, db.load() +} + +// Get retrieves a Token from the Database by its token. +func (db *Database) Get(token string) (TokenData, bool) { + db.flock.Lock() // we are making sure that the file is not being written to + defer db.flock.Unlock() + db.Lock() // we are making sure that is safe if called by another instance in the same process + defer db.Unlock() + db.load() + t, ok := db.data[token] + return t, ok +} + +// Set stores a Token in the Database by its token. +func (db *Database) Set(token string, t TokenData) error { + db.flock.Lock() + defer db.flock.Unlock() + db.Lock() + defer db.Unlock() + db.load() + db.data[token] = t + + return db.save() +} + +// Delete removes a Token from the Database by its token. +func (db *Database) Delete(token string) error { + db.flock.Lock() + defer db.flock.Unlock() + db.Lock() + defer db.Unlock() + db.load() + delete(db.data, token) + return db.save() +} + +func (db *Database) TokenList() []string { + db.flock.Lock() + defer db.flock.Unlock() + db.Lock() + defer db.Unlock() + db.load() + tokens := []string{} + for k := range db.data { + tokens = append(tokens, k) + } + + sort.Slice(tokens, func(i, j int) bool { + // sort by token + return tokens[i] < tokens[j] + }) + + return tokens +} + +// load reads the Database from disk. +func (db *Database) load() error { + if _, err := os.Stat(db.path); os.IsNotExist(err) { + return nil + } + + // Read the file from disk + // Unmarshal the JSON into db.data + f, err := os.ReadFile(db.path) + if err != nil { + return err + } + return json.Unmarshal(f, &db.data) +} + +// Save writes the Database to disk. +func (db *Database) save() error { + // Marshal db.data into JSON + // Write the JSON to the file + f, err := os.Create(db.path) + if err != nil { + return err + } + defer f.Close() + return json.NewEncoder(f).Encode(db.data) +} diff --git a/core/explorer/database_test.go b/core/explorer/database_test.go new file mode 100644 index 000000000000..7f2cbd268a36 --- /dev/null +++ b/core/explorer/database_test.go @@ -0,0 +1,92 @@ +package explorer_test + +import ( + "os" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/mudler/LocalAI/core/explorer" +) + +var _ = Describe("Database", func() { + var ( + dbPath string + db *explorer.Database + err error + ) + + BeforeEach(func() { + // Create a temporary file path for the database + dbPath = "test_db.json" + db, err = explorer.NewDatabase(dbPath) + Expect(err).To(BeNil()) + }) + + AfterEach(func() { + // Clean up the temporary database file + os.Remove(dbPath) + }) + + Context("when managing tokens", func() { + It("should add and retrieve a token", func() { + token := "token123" + t := explorer.TokenData{Name: "TokenName", Description: "A test token"} + + err = db.Set(token, t) + Expect(err).To(BeNil()) + + retrievedToken, exists := db.Get(token) + Expect(exists).To(BeTrue()) + Expect(retrievedToken).To(Equal(t)) + }) + + It("should delete a token", func() { + token := "token123" + t := explorer.TokenData{Name: "TokenName", Description: "A test token"} + + err = db.Set(token, t) + Expect(err).To(BeNil()) + + err = db.Delete(token) + Expect(err).To(BeNil()) + + _, exists := db.Get(token) + Expect(exists).To(BeFalse()) + }) + + It("should persist data to disk", func() { + token := "token123" + t := explorer.TokenData{Name: "TokenName", Description: "A test token"} + + err = db.Set(token, t) + Expect(err).To(BeNil()) + + // Recreate the database object to simulate reloading from disk + db, err = explorer.NewDatabase(dbPath) + Expect(err).To(BeNil()) + + retrievedToken, exists := db.Get(token) + Expect(exists).To(BeTrue()) + Expect(retrievedToken).To(Equal(t)) + + // Check the token list + tokenList := db.TokenList() + Expect(tokenList).To(ContainElement(token)) + }) + }) + + Context("when loading an empty or non-existent file", func() { + It("should start with an empty database", func() { + dbPath = "empty_db.json" + db, err = explorer.NewDatabase(dbPath) + Expect(err).To(BeNil()) + + _, exists := db.Get("nonexistent") + Expect(exists).To(BeFalse()) + + // Clean up + os.Remove(dbPath) + }) + }) +}) diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go new file mode 100644 index 000000000000..fe6470cb825d --- /dev/null +++ b/core/explorer/discovery.go @@ -0,0 +1,213 @@ +package explorer + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "github.com/rs/zerolog/log" + + "github.com/mudler/LocalAI/core/p2p" + "github.com/mudler/edgevpn/pkg/blockchain" +) + +type DiscoveryServer struct { + sync.Mutex + database *Database + connectionTime time.Duration + errorThreshold int +} + +// NewDiscoveryServer creates a new DiscoveryServer with the given Database. +// it keeps the db state in sync with the network state +func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer { + if dur == 0 { + dur = 50 * time.Second + } + if failureThreshold == 0 { + failureThreshold = 3 + } + return &DiscoveryServer{ + database: db, + connectionTime: dur, + errorThreshold: failureThreshold, + } +} + +type Network struct { + Clusters []ClusterData +} + +func (s *DiscoveryServer) runBackground() { + if len(s.database.TokenList()) == 0 { + time.Sleep(5 * time.Second) // avoid busy loop + return + } + + for _, token := range s.database.TokenList() { + c, cancel := context.WithTimeout(context.Background(), s.connectionTime) + defer cancel() + + // Connect to the network + // Get the number of nodes + // save it in the current state (mutex) + // do not do in parallel + n, err := p2p.NewNode(token) + if err != nil { + log.Err(err).Msg("Failed to create node") + s.failedToken(token) + continue + } + + err = n.Start(c) + if err != nil { + log.Err(err).Msg("Failed to start node") + s.failedToken(token) + continue + } + + ledger, err := n.Ledger() + if err != nil { + log.Err(err).Msg("Failed to start ledger") + s.failedToken(token) + continue + } + + networkData := make(chan ClusterData) + + // get the network data - it takes the whole timeout + // as we might not be connected to the network yet, + // and few attempts would have to be made before bailing out + go s.retrieveNetworkData(c, ledger, networkData) + + hasWorkers := false + ledgerK := []ClusterData{} + for key := range networkData { + ledgerK = append(ledgerK, key) + if len(key.Workers) > 0 { + hasWorkers = true + } + } + + log.Debug().Any("network", token).Msgf("Network has %d clusters", len(ledgerK)) + if len(ledgerK) != 0 { + for _, k := range ledgerK { + log.Debug().Any("network", token).Msgf("Clusterdata %+v", k) + } + } + + if hasWorkers { + s.Lock() + data, _ := s.database.Get(token) + (&data).Clusters = ledgerK + (&data).Failures = 0 + s.database.Set(token, data) + s.Unlock() + } else { + s.failedToken(token) + } + } + + s.deleteFailedConnections() +} + +func (s *DiscoveryServer) failedToken(token string) { + s.Lock() + defer s.Unlock() + data, _ := s.database.Get(token) + (&data).Failures++ + s.database.Set(token, data) +} + +func (s *DiscoveryServer) deleteFailedConnections() { + s.Lock() + defer s.Unlock() + for _, t := range s.database.TokenList() { + data, _ := s.database.Get(t) + if data.Failures > s.errorThreshold { + log.Info().Any("token", t).Msg("Token has been removed from the database") + s.database.Delete(t) + } + } +} + +func (s *DiscoveryServer) retrieveNetworkData(c context.Context, ledger *blockchain.Ledger, networkData chan ClusterData) { + clusters := map[string]ClusterData{} + + defer func() { + for _, n := range clusters { + networkData <- n + } + close(networkData) + }() + + for { + select { + case <-c.Done(): + return + default: + time.Sleep(5 * time.Second) + + data := ledger.LastBlock().Storage + LEDGER: + for d := range data { + toScanForWorkers := false + cd := ClusterData{} + isWorkerCluster := d == p2p.WorkerID || (strings.Contains(d, "_") && strings.Contains(d, p2p.WorkerID)) + isFederatedCluster := d == p2p.FederatedID || (strings.Contains(d, "_") && strings.Contains(d, p2p.FederatedID)) + switch { + case isWorkerCluster: + toScanForWorkers = true + cd.Type = "worker" + case isFederatedCluster: + toScanForWorkers = true + cd.Type = "federated" + } + + if strings.Contains(d, "_") { + cd.NetworkID = strings.Split(d, "_")[0] + } + + if !toScanForWorkers { + continue LEDGER + } + + atLeastOneWorker := false + DATA: + for _, v := range data[d] { + nd := &p2p.NodeData{} + if err := v.Unmarshal(nd); err != nil { + continue DATA + } + + if nd.IsOnline() { + atLeastOneWorker = true + (&cd).Workers = append(cd.Workers, nd.ID) + } + } + + if atLeastOneWorker { + clusters[d] = cd + } + } + } + } +} + +// Start the discovery server. This is meant to be run in to a goroutine. +func (s *DiscoveryServer) Start(ctx context.Context, keepRunning bool) error { + for { + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled") + default: + // Collect data + s.runBackground() + if !keepRunning { + return nil + } + } + } +} diff --git a/core/explorer/explorer_suite_test.go b/core/explorer/explorer_suite_test.go new file mode 100644 index 000000000000..fc718d5f8dfa --- /dev/null +++ b/core/explorer/explorer_suite_test.go @@ -0,0 +1,13 @@ +package explorer_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestExplorer(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Explorer test suite") +} diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go index d102eac8a7dd..6ced6244128f 100644 --- a/core/gallery/gallery.go +++ b/core/gallery/gallery.go @@ -131,7 +131,8 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) { var refFile string - err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error { + uri := downloader.URI(url) + err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { refFile = string(d) if len(refFile) == 0 { return fmt.Errorf("invalid reference file at url %s: %s", url, d) @@ -153,8 +154,9 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel, return models, err } } + uri := downloader.URI(gallery.URL) - err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error { + err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &models) }) if err != nil { @@ -204,34 +206,33 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin log.Error().Err(err).Msgf("failed to read gallery file %s", configFile) } + var filesToRemove []string + // Remove additional files if galleryconfig != nil { for _, f := range galleryconfig.Files { fullPath := filepath.Join(basePath, f.Filename) - log.Debug().Msgf("Removing file %s", fullPath) - if e := os.Remove(fullPath); e != nil { - err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e)) - } + filesToRemove = append(filesToRemove, fullPath) } } for _, f := range additionalFiles { fullPath := filepath.Join(filepath.Join(basePath, f)) - log.Debug().Msgf("Removing additional file %s", fullPath) - if e := os.Remove(fullPath); e != nil { - err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) - } + filesToRemove = append(filesToRemove, fullPath) } - log.Debug().Msgf("Removing model config file %s", configFile) + filesToRemove = append(filesToRemove, configFile) + filesToRemove = append(filesToRemove, galleryFile) - // Delete the model config file - if e := os.Remove(configFile); e != nil { - err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e)) - } + // skip duplicates + filesToRemove = utils.Unique(filesToRemove) - // Delete gallery config file - os.Remove(galleryFile) + // Removing files + for _, f := range filesToRemove { + if e := os.Remove(f); e != nil { + err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e)) + } + } return err } @@ -253,8 +254,8 @@ func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error func SafetyScanGalleryModel(galleryModel *GalleryModel) error { for _, file := range galleryModel.AdditionalFiles { - scanResults, err := downloader.HuggingFaceScan(file.URI) - if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI)) + if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) { log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") return err } diff --git a/core/gallery/models.go b/core/gallery/models.go index 32460a9cb76d..dec6312eaf30 100644 --- a/core/gallery/models.go +++ b/core/gallery/models.go @@ -68,7 +68,8 @@ type PromptTemplate struct { func GetGalleryConfigFromURL(url string, basePath string) (Config, error) { var config Config - err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error { + uri := downloader.URI(url) + err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &config) }) if err != nil { @@ -118,14 +119,14 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides filePath := filepath.Join(basePath, file.Filename) if enforceScan { - scanResults, err := downloader.HuggingFaceScan(file.URI) - if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) { + scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI)) + if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) { log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!") return err } } - - if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil { + uri := downloader.URI(file.URI) + if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil { return err } } diff --git a/core/http/app_test.go b/core/http/app_test.go index 3fb1658159d5..a837e20c01a4 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -73,8 +73,9 @@ func getModelStatus(url string) (response map[string]interface{}) { } func getModels(url string) (response []gallery.GalleryModel) { + uri := downloader.URI(url) // TODO: No tests currently seem to exercise file:// urls. Fix? - downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error { + uri.DownloadAndUnmarshal("", func(url string, i []byte) error { // Unmarshal YAML data into a struct return json.Unmarshal(i, &response) }) @@ -562,32 +563,6 @@ var _ = Describe("API test", func() { Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res)) Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason)) }) - - It("runs gpt4all", Label("gpt4all"), func() { - if runtime.GOOS != "linux" { - Skip("test supported only on linux") - } - - response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{ - URL: "github:go-skynet/model-gallery/gpt4all-j.yaml", - Name: "gpt4all-j", - }) - - Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response)) - - uuid := response["uuid"].(string) - - Eventually(func() bool { - response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid) - return response["processed"].(bool) - }, "960s", "10s").Should(Equal(true)) - - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well")) - }) - }) }) @@ -791,20 +766,6 @@ var _ = Describe("API test", func() { Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) }) - It("can generate completions from model configs", func() { - resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Text).ToNot(BeEmpty()) - }) - - It("can generate chat completions from model configs", func() { - resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}}) - Expect(err).ToNot(HaveOccurred()) - Expect(len(resp.Choices)).To(Equal(1)) - Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty()) - }) - It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 3b3741d8f210..91a12310cf75 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -9,7 +9,6 @@ import ( "github.com/mudler/LocalAI/core/gallery" "github.com/mudler/LocalAI/core/p2p" "github.com/mudler/LocalAI/core/services" - "github.com/mudler/LocalAI/pkg/xsync" ) const ( @@ -372,7 +371,12 @@ func dropBadChars(s string) string { return strings.ReplaceAll(s, "@", "__") } -func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string { +type ProcessTracker interface { + Exists(string) bool + Get(string) string +} + +func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string { modelsElements := []elem.Node{} descriptionDiv := func(m *gallery.GalleryModel) elem.Node { return elem.Div( @@ -396,7 +400,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri actionDiv := func(m *gallery.GalleryModel) elem.Node { galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name) - currentlyProcessing := processing.Exists(galleryID) + currentlyProcessing := processTracker.Exists(galleryID) jobID := "" isDeletionOp := false if currentlyProcessing { @@ -404,7 +408,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri if status != nil && status.Deletion { isDeletionOp = true } - jobID = processing.Get(galleryID) + jobID = processTracker.Get(galleryID) // TODO: // case not handled, if status == nil : "Waiting" } @@ -497,8 +501,9 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri }, elem.Img(attrs.Props{ // "class": "rounded-t-lg object-fit object-center h-96", - "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", - "src": m.Icon, + "class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3", + "src": m.Icon, + "loading": "lazy", }), ), ), diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go new file mode 100644 index 000000000000..9c731d9a4f78 --- /dev/null +++ b/core/http/endpoints/explorer/dashboard.go @@ -0,0 +1,102 @@ +package explorer + +import ( + "encoding/base64" + "sort" + + "github.com/gofiber/fiber/v2" + "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/internal" +) + +func Dashboard() func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + summary := fiber.Map{ + "Title": "LocalAI API - " + internal.PrintableVersion(), + "Version": internal.PrintableVersion(), + } + + if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 { + // The client expects a JSON response + return c.Status(fiber.StatusOK).JSON(summary) + } else { + // Render index + return c.Render("views/explorer", summary) + } + } +} + +type AddNetworkRequest struct { + Token string `json:"token"` + Name string `json:"name"` + Description string `json:"description"` +} + +type Network struct { + explorer.TokenData + Token string `json:"token"` +} + +func ShowNetworks(db *explorer.Database) func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + results := []Network{} + for _, token := range db.TokenList() { + networkData, exists := db.Get(token) // get the token data + hasWorkers := false + for _, cluster := range networkData.Clusters { + if len(cluster.Workers) > 0 { + hasWorkers = true + break + } + } + if exists && hasWorkers { + results = append(results, Network{TokenData: networkData, Token: token}) + } + } + + // order by number of clusters + sort.Slice(results, func(i, j int) bool { + return len(results[i].Clusters) > len(results[j].Clusters) + }) + + return c.JSON(results) + } +} + +func AddNetwork(db *explorer.Database) func(*fiber.Ctx) error { + return func(c *fiber.Ctx) error { + request := new(AddNetworkRequest) + if err := c.BodyParser(request); err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"}) + } + + if request.Token == "" { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token is required"}) + } + + if request.Name == "" { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Name is required"}) + } + + if request.Description == "" { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Description is required"}) + } + + // TODO: check if token is valid, otherwise reject + // try to decode the token from base64 + _, err := base64.StdEncoding.DecodeString(request.Token) + if err != nil { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Invalid token"}) + } + + if _, exists := db.Get(request.Token); exists { + return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token already exists"}) + } + err = db.Set(request.Token, explorer.TokenData{Name: request.Name, Description: request.Description}) + if err != nil { + return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"error": "Cannot add token"}) + } + + return c.Status(fiber.StatusOK).JSON(fiber.Map{"message": "Token added"}) + } +} diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go index cab0bb5daf59..bbcee8c801e1 100644 --- a/core/http/endpoints/localai/p2p.go +++ b/core/http/endpoints/localai/p2p.go @@ -11,12 +11,14 @@ import ( // @Summary Returns available P2P nodes // @Success 200 {object} []schema.P2PNodesResponse "Response" // @Router /api/p2p [get] -func ShowP2PNodes(c *fiber.Ctx) error { +func ShowP2PNodes(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error { // Render index - return c.JSON(schema.P2PNodesResponse{ - Nodes: p2p.GetAvailableNodes(""), - FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID), - }) + return func(c *fiber.Ctx) error { + return c.JSON(schema.P2PNodesResponse{ + Nodes: p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)), + FederatedNodes: p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)), + }) + } } // ShowP2PToken returns the P2P token diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go index 5d217173c3e5..396c4084b5e2 100644 --- a/core/http/endpoints/localai/welcome.go +++ b/core/http/endpoints/localai/welcome.go @@ -17,7 +17,10 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, backendConfigs := cl.GetAllBackendConfigs() galleryConfigs := map[string]*gallery.Config{} + modelsWithBackendConfig := map[string]interface{}{} + for _, m := range backendConfigs { + modelsWithBackendConfig[m.Name] = nil cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name) if err != nil { @@ -32,7 +35,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig, modelsWithoutConfig := []string{} for _, m := range models { - if _, ok := galleryConfigs[m]; !ok { + if _, ok := modelsWithBackendConfig[m]; !ok { modelsWithoutConfig = append(modelsWithoutConfig, m) } } diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 86b75601bc45..12a14eace4fb 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -172,6 +172,14 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup funcs := input.Functions shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions() + strictMode := false + + for _, f := range input.Functions { + if f.Strict { + strictMode = true + break + } + } // Allow the user to set custom actions via config file // to be "embedded" in each model @@ -187,10 +195,33 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup if config.ResponseFormatMap != nil { d := schema.ChatCompletionResponseFormat{} - dat, _ := json.Marshal(config.ResponseFormatMap) - _ = json.Unmarshal(dat, &d) + dat, err := json.Marshal(config.ResponseFormatMap) + if err != nil { + return err + } + err = json.Unmarshal(dat, &d) + if err != nil { + return err + } if d.Type == "json_object" { input.Grammar = functions.JSONBNF + } else if d.Type == "json_schema" { + d := schema.JsonSchemaRequest{} + dat, err := json.Marshal(config.ResponseFormatMap) + if err != nil { + return err + } + err = json.Unmarshal(dat, &d) + if err != nil { + return err + } + fs := &functions.JSONFunctionStructure{ + AnyOf: []functions.Item{d.JsonSchema.Schema}, + } + g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...) + if err == nil { + input.Grammar = g + } } } @@ -201,7 +232,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup } switch { - case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn: + case (!config.FunctionsConfig.GrammarConfig.NoGrammar || strictMode) && shouldUseFn: noActionGrammar := functions.Function{ Name: noActionName, Description: noActionDescription, diff --git a/core/http/explorer.go b/core/http/explorer.go new file mode 100644 index 000000000000..bdcb93b16d55 --- /dev/null +++ b/core/http/explorer.go @@ -0,0 +1,46 @@ +package http + +import ( + "net/http" + + "github.com/gofiber/fiber/v2" + "github.com/gofiber/fiber/v2/middleware/favicon" + "github.com/gofiber/fiber/v2/middleware/filesystem" + "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/routes" +) + +func Explorer(db *explorer.Database) *fiber.App { + + fiberCfg := fiber.Config{ + Views: renderEngine(), + // We disable the Fiber startup message as it does not conform to structured logging. + // We register a startup log line with connection information in the OnListen hook to keep things user friendly though + DisableStartupMessage: false, + // Override default error handler + } + + app := fiber.New(fiberCfg) + + routes.RegisterExplorerRoutes(app, db) + + httpFS := http.FS(embedDirStatic) + + app.Use(favicon.New(favicon.Config{ + URL: "/favicon.ico", + FileSystem: httpFS, + File: "static/favicon.ico", + })) + + app.Use("/static", filesystem.New(filesystem.Config{ + Root: httpFS, + PathPrefix: "static", + Browse: true, + })) + + // Define a custom 404 handler + // Note: keep this at the bottom! + app.Use(notFoundHandler) + + return app +} diff --git a/core/http/routes/explorer.go b/core/http/routes/explorer.go new file mode 100644 index 000000000000..960b476b8ffc --- /dev/null +++ b/core/http/routes/explorer.go @@ -0,0 +1,13 @@ +package routes + +import ( + "github.com/gofiber/fiber/v2" + coreExplorer "github.com/mudler/LocalAI/core/explorer" + "github.com/mudler/LocalAI/core/http/endpoints/explorer" +) + +func RegisterExplorerRoutes(app *fiber.App, db *coreExplorer.Database) { + app.Get("/", explorer.Dashboard()) + app.Post("/network/add", explorer.AddNetwork(db)) + app.Get("/networks", explorer.ShowNetworks(db)) +} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index b8a811b5faf0..105991e85904 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -21,17 +21,18 @@ func RegisterLocalAIRoutes(app *fiber.App, app.Get("/swagger/*", swagger.HandlerDefault) // default // LocalAI API endpoints + if !appConfig.DisableGalleryEndpoint { + modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) + app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) + app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint()) - modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService) - app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint()) - app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint()) - - app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) - app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) - app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) - app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) - app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) - app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) + app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint()) + app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint()) + app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint()) + app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint()) + app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint()) + app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint()) + } app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig)) @@ -59,7 +60,7 @@ func RegisterLocalAIRoutes(app *fiber.App, // p2p if p2p.IsP2PEnabled() { - app.Get("/api/p2p", auth, localai.ShowP2PNodes) + app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig)) app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig)) } diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go index 33706944fa2e..6dfb3f433df2 100644 --- a/core/http/routes/ui.go +++ b/core/http/routes/ui.go @@ -21,6 +21,40 @@ import ( "github.com/google/uuid" ) +type modelOpCache struct { + status *xsync.SyncedMap[string, string] +} + +func NewModelOpCache() *modelOpCache { + return &modelOpCache{ + status: xsync.NewSyncedMap[string, string](), + } +} + +func (m *modelOpCache) Set(key string, value string) { + m.status.Set(key, value) +} + +func (m *modelOpCache) Get(key string) string { + return m.status.Get(key) +} + +func (m *modelOpCache) DeleteUUID(uuid string) { + for _, k := range m.status.Keys() { + if m.status.Get(k) == uuid { + m.status.Delete(k) + } + } +} + +func (m *modelOpCache) Map() map[string]string { + return m.status.Map() +} + +func (m *modelOpCache) Exists(key string) bool { + return m.status.Exists(key) +} + func RegisterUIRoutes(app *fiber.App, cl *config.BackendConfigLoader, ml *model.ModelLoader, @@ -29,7 +63,7 @@ func RegisterUIRoutes(app *fiber.App, auth func(*fiber.Ctx) error) { // keeps the state of models that are being installed from the UI - var processingModels = xsync.NewSyncedMap[string, string]() + var processingModels = NewModelOpCache() // modelStatus returns the current status of the models being processed (installation or deletion) // it is called asynchonously from the UI @@ -62,6 +96,7 @@ func RegisterUIRoutes(app *fiber.App, //"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID), "IsP2PEnabled": p2p.IsP2PEnabled(), "P2PToken": appConfig.P2PToken, + "NetworkID": appConfig.P2PNetworkID, } // Render index @@ -70,202 +105,202 @@ func RegisterUIRoutes(app *fiber.App, /* show nodes live! */ app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(""))) + return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) }) app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID))) + return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) }) app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(""))) + return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)))) }) app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error { - return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID))) + return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)))) }) } - // Show the Models page (all models) - app.Get("/browse", auth, func(c *fiber.Ctx) error { - term := c.Query("term") + if !appConfig.DisableGalleryEndpoint { + + // Show the Models page (all models) + app.Get("/browse", auth, func(c *fiber.Ctx) error { + term := c.Query("term") - models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) - // Get all available tags - allTags := map[string]struct{}{} - tags := []string{} - for _, m := range models { - for _, t := range m.Tags { - allTags[t] = struct{}{} + // Get all available tags + allTags := map[string]struct{}{} + tags := []string{} + for _, m := range models { + for _, t := range m.Tags { + allTags[t] = struct{}{} + } } - } - for t := range allTags { - tags = append(tags, t) - } - sort.Strings(tags) + for t := range allTags { + tags = append(tags, t) + } + sort.Strings(tags) - if term != "" { - models = gallery.GalleryModels(models).Search(term) - } + if term != "" { + models = gallery.GalleryModels(models).Search(term) + } - // Get model statuses - processingModelsData, taskTypes := modelStatus() + // Get model statuses + processingModelsData, taskTypes := modelStatus() - summary := fiber.Map{ - "Title": "LocalAI - Models", - "Version": internal.PrintableVersion(), - "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), - "Repositories": appConfig.Galleries, - "AllTags": tags, - "ProcessingModels": processingModelsData, - "AvailableModels": len(models), - "IsP2PEnabled": p2p.IsP2PEnabled(), - - "TaskTypes": taskTypes, - // "ApplicationConfig": appConfig, - } + summary := fiber.Map{ + "Title": "LocalAI - Models", + "Version": internal.PrintableVersion(), + "Models": template.HTML(elements.ListModels(models, processingModels, galleryService)), + "Repositories": appConfig.Galleries, + "AllTags": tags, + "ProcessingModels": processingModelsData, + "AvailableModels": len(models), + "IsP2PEnabled": p2p.IsP2PEnabled(), + + "TaskTypes": taskTypes, + // "ApplicationConfig": appConfig, + } - // Render index - return c.Render("views/models", summary) - }) + // Render index + return c.Render("views/models", summary) + }) - // Show the models, filtered from the user input - // https://htmx.org/examples/active-search/ - app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { - form := struct { - Search string `form:"search"` - }{} - if err := c.BodyParser(&form); err != nil { - return c.Status(fiber.StatusBadRequest).SendString(err.Error()) - } + // Show the models, filtered from the user input + // https://htmx.org/examples/active-search/ + app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error { + form := struct { + Search string `form:"search"` + }{} + if err := c.BodyParser(&form); err != nil { + return c.Status(fiber.StatusBadRequest).SendString(err.Error()) + } - models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) + models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath) - return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService)) - }) + return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService)) + }) - /* + /* - Install routes + Install routes - */ + */ - // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service - // https://htmx.org/examples/progress-bar/ - app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! - log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to install : %+v\n", galleryID) - id, err := uuid.NewUUID() - if err != nil { - return err - } + id, err := uuid.NewUUID() + if err != nil { + return err + } - uid := id.String() + uid := id.String() - processingModels.Set(galleryID, uid) + processingModels.Set(galleryID, uid) - op := gallery.GalleryOp{ - Id: uid, - GalleryModelName: galleryID, - Galleries: appConfig.Galleries, - } - go func() { - galleryService.C <- op - }() + op := gallery.GalleryOp{ + Id: uid, + GalleryModelName: galleryID, + Galleries: appConfig.Galleries, + } + go func() { + galleryService.C <- op + }() - return c.SendString(elements.StartProgressBar(uid, "0", "Installation")) - }) + return c.SendString(elements.StartProgressBar(uid, "0", "Installation")) + }) - // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service - // https://htmx.org/examples/progress-bar/ - app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { - galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! - log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) - var galleryName = galleryID - if strings.Contains(galleryID, "@") { - // if the galleryID contains a @ it means that it's a model from a gallery - // but we want to delete it from the local models which does not need - // a repository ID - galleryName = strings.Split(galleryID, "@")[1] - } + // This route is used when the "Install" button is pressed, we submit here a new job to the gallery service + // https://htmx.org/examples/progress-bar/ + app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error { + galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests! + log.Debug().Msgf("UI job submitted to delete : %+v\n", galleryID) + var galleryName = galleryID + if strings.Contains(galleryID, "@") { + // if the galleryID contains a @ it means that it's a model from a gallery + // but we want to delete it from the local models which does not need + // a repository ID + galleryName = strings.Split(galleryID, "@")[1] + } - id, err := uuid.NewUUID() - if err != nil { - return err - } + id, err := uuid.NewUUID() + if err != nil { + return err + } - uid := id.String() + uid := id.String() - // Track the deletion job by galleryID and galleryName - // The GalleryID contains information about the repository, - // while the GalleryName is ONLY the name of the model - processingModels.Set(galleryName, uid) - processingModels.Set(galleryID, uid) + // Track the deletion job by galleryID and galleryName + // The GalleryID contains information about the repository, + // while the GalleryName is ONLY the name of the model + processingModels.Set(galleryName, uid) + processingModels.Set(galleryID, uid) - op := gallery.GalleryOp{ - Id: uid, - Delete: true, - GalleryModelName: galleryName, - } - go func() { - galleryService.C <- op - cl.RemoveBackendConfig(galleryName) - }() + op := gallery.GalleryOp{ + Id: uid, + Delete: true, + GalleryModelName: galleryName, + } + go func() { + galleryService.C <- op + cl.RemoveBackendConfig(galleryName) + }() - return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) - }) + return c.SendString(elements.StartProgressBar(uid, "0", "Deletion")) + }) - // Display the job current progress status - // If the job is done, we trigger the /browse/job/:uid route - // https://htmx.org/examples/progress-bar/ - app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { - jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + // Display the job current progress status + // If the job is done, we trigger the /browse/job/:uid route + // https://htmx.org/examples/progress-bar/ + app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - status := galleryService.GetStatus(jobUID) - if status == nil { - //fmt.Errorf("could not find any status for ID") - return c.SendString(elements.ProgressBar("0")) - } + status := galleryService.GetStatus(jobUID) + if status == nil { + //fmt.Errorf("could not find any status for ID") + return c.SendString(elements.ProgressBar("0")) + } - if status.Progress == 100 { - c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) - return c.SendString(elements.ProgressBar("100")) - } - if status.Error != nil { - return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName)) - } + if status.Progress == 100 { + c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done) + return c.SendString(elements.ProgressBar("100")) + } + if status.Error != nil { + // TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user + processingModels.DeleteUUID(jobUID) + return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName)) + } - return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) - }) + return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress))) + }) - // this route is hit when the job is done, and we display the - // final state (for now just displays "Installation completed") - app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { - jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! + // this route is hit when the job is done, and we display the + // final state (for now just displays "Installation completed") + app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error { + jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests! - status := galleryService.GetStatus(jobUID) + status := galleryService.GetStatus(jobUID) - galleryID := "" - for _, k := range processingModels.Keys() { - if processingModels.Get(k) == jobUID { - galleryID = k - processingModels.Delete(k) + galleryID := "" + processingModels.DeleteUUID(jobUID) + if galleryID == "" { + log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) } - } - if galleryID == "" { - log.Debug().Msgf("no processing model found for job : %+v\n", jobUID) - } - log.Debug().Msgf("JOB finished : %+v\n", status) - showDelete := true - displayText := "Installation completed" - if status.Deletion { - showDelete = false - displayText = "Deletion completed" - } + log.Debug().Msgf("JOB finished : %+v\n", status) + showDelete := true + displayText := "Installation completed" + if status.Deletion { + showDelete = false + displayText = "Deletion completed" + } - return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete)) - }) + return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete)) + }) + } // Show the Chat page app.Get("/chat/:model", auth, func(c *fiber.Ctx) error { diff --git a/core/http/static/p2panimation.js b/core/http/static/p2panimation.js new file mode 100644 index 000000000000..d5599c63b5d2 --- /dev/null +++ b/core/http/static/p2panimation.js @@ -0,0 +1,144 @@ +const canvas = document.getElementById('networkCanvas'); +const ctx = canvas.getContext('2d'); + +let particles = []; +let isDragging = false; +let dragParticle = null; +const maxParticles = 100; // Maximum number of particles +const dragAreaRadius = 10; // Increased area for easier dragging + +// Function to resize canvas based on aspect ratio +function resizeCanvas() { + canvas.width = window.innerWidth; + canvas.height = Math.min(window.innerHeight, 400); // Maintain a max height of 400px +} + +// Adjust the canvas size when the window resizes +window.addEventListener('resize', resizeCanvas); + +// Initialize canvas size +resizeCanvas(); + +class Particle { + constructor(x, y) { + this.x = x; + this.y = y; + this.radius = 4; + this.color = `rgba(0, 255, 204, 1)`; + this.speedX = (Math.random() - 0.5) * 2; // Random horizontal speed + this.speedY = (Math.random() - 0.5) * 2; // Random vertical speed + } + + update() { + if (!isDragging || dragParticle !== this) { + this.x += this.speedX; + this.y += this.speedY; + + // Bounce off the edges of the canvas + if (this.x < 0 || this.x > canvas.width) { + this.speedX *= -1; + } + if (this.y < 0 || this.y > canvas.height) { + this.speedY *= -1; + } + } + } + + draw() { + ctx.beginPath(); + ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2, false); + ctx.fillStyle = this.color; + ctx.fill(); + } + + isMouseOver(mouseX, mouseY) { + // Increase the draggable area by checking if the mouse is within a larger radius + return Math.hypot(mouseX - this.x, mouseY - this.y) < dragAreaRadius; + } +} + +function connectParticles() { + for (let i = 0; i < particles.length; i++) { + for (let j = i + 1; j < particles.length; j++) { + const distance = Math.hypot(particles[i].x - particles[j].x, particles[i].y - particles[j].y); + if (distance < 150) { + ctx.beginPath(); + ctx.moveTo(particles[i].x, particles[i].y); + ctx.lineTo(particles[j].x, particles[j].y); + ctx.strokeStyle = `rgba(0, 255, 204, ${1 - distance / 150})`; + ctx.stroke(); + } + } + } +} + +function initParticles(num) { + for (let i = 0; i < num; i++) { + particles.push(new Particle(Math.random() * canvas.width, Math.random() * canvas.height)); + } +} + +function animate() { + ctx.clearRect(0, 0, canvas.width, canvas.height); + + particles.forEach(particle => { + particle.update(); + particle.draw(); + }); + + connectParticles(); + + requestAnimationFrame(animate); +} + +// Handle mouse click to create a new particle +canvas.addEventListener('click', (e) => { + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + const newParticle = new Particle(mouseX, mouseY); + particles.push(newParticle); + + // Limit the number of particles to the maximum + if (particles.length > maxParticles) { + particles.shift(); // Remove the oldest particle + } +}); + +// Handle mouse down for dragging +canvas.addEventListener('mousedown', (e) => { + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + for (let particle of particles) { + if (particle.isMouseOver(mouseX, mouseY)) { + isDragging = true; + dragParticle = particle; + break; + } + } +}); + +// Handle mouse move for dragging +canvas.addEventListener('mousemove', (e) => { + if (isDragging && dragParticle) { + const rect = canvas.getBoundingClientRect(); + const mouseX = e.clientX - rect.left; + const mouseY = e.clientY - rect.top; + + dragParticle.x = mouseX; + dragParticle.y = mouseY; + } +}); + +// Handle mouse up to stop dragging +canvas.addEventListener('mouseup', () => { + isDragging = false; + dragParticle = null; +}); + +// Initialize and start the animation +initParticles(maxParticles); +animate(); \ No newline at end of file diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html new file mode 100644 index 000000000000..9843b47b833f --- /dev/null +++ b/core/http/views/explorer.html @@ -0,0 +1,380 @@ + + + +{{template "views/partials/head" .}} + + + + +
+ {{template "views/partials/navbar_explorer" .}} +
+ +
+
+

+ Network Clusters Explorer + +

+

+ View the clusters and workers available in each network. + + + +

+ +
+
+
+ +
+ +
+ + The explorer is a global, community-driven tool to share network tokens and view available clusters in the globe. + Anyone can use the tokens to offload computation and use the clusters available or share resources. + This is provided without any warranty. Use it at your own risk. We are not responsible for any potential harm or misuse. Sharing tokens globally allows anyone from the internet to use your instances. + Although the community will address bugs, this is experimental software and may be insecure to deploy on your hardware unless you take all necessary precautions. +
+
+ + +
+ +
+

Add New Network

+
+ + +
+
+ + +
+
+ + +
+ + + +
+ + + + + + + + +
+ + + + {{template "views/partials/footer" .}} +
+ + + + diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html index a8c51310e0fa..52548e33de64 100644 --- a/core/http/views/p2p.html +++ b/core/http/views/p2p.html @@ -1,21 +1,40 @@ {{template "views/partials/head" .}} - -
- +
+ {{template "views/partials/navbar" .}}
- -

- Distributed inference with P2P - - - -

+
+ +
+
+

+ Distributed inference with P2P +

+

+ Distribute computation by sharing and loadbalancing instances or sharding model weights. + + + +

+ +
+
+
LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!
+ +
+

+ Network token + +

+ {{.P2PToken}}
+ The network token can be used to either share the instance or join a federation or a worker network. Below you will find a few examples on how to start a new instance or a worker with the token and you will be able to see the available workers and federated nodes. +
+ {{ if and .IsP2PEnabled (eq .P2PToken "") }}
@@ -40,7 +59,6 @@
LocalAI uses P2P technologies to enable distributi

Start a federated instance

-