diff --git a/.devcontainer-scripts/postcreate.sh b/.devcontainer-scripts/postcreate.sh
new file mode 100644
index 000000000000..3f9035090a35
--- /dev/null
+++ b/.devcontainer-scripts/postcreate.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+cd /workspace
+
+# Get the files into the volume without a bind mount
+if [ ! -d ".git" ]; then
+    git clone https://github.com/mudler/LocalAI.git .
+else
+    git fetch
+fi
+
+echo "Standard Post-Create script completed."
+
+if [ -f "/devcontainer-customization/postcreate.sh" ]; then
+    echo "Launching customization postcreate.sh"
+    bash "/devcontainer-customization/postcreate.sh"
+fi
\ No newline at end of file
diff --git a/.devcontainer-scripts/poststart.sh b/.devcontainer-scripts/poststart.sh
new file mode 100644
index 000000000000..196e821dbd65
--- /dev/null
+++ b/.devcontainer-scripts/poststart.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+cd /workspace
+
+# Grab the pre-stashed backend assets to avoid build issues
+cp -r /build/backend-assets /workspace/backend-assets
+
+# Ensures generated source files are present upon load
+make prepare
+
+echo "Standard Post-Start script completed."
+
+if [ -f "/devcontainer-customization/poststart.sh" ]; then
+    echo "Launching customization poststart.sh"
+    bash "/devcontainer-customization/poststart.sh"
+fi
\ No newline at end of file
diff --git a/.devcontainer-scripts/utils.sh b/.devcontainer-scripts/utils.sh
new file mode 100644
index 000000000000..02b588ae6ac6
--- /dev/null
+++ b/.devcontainer-scripts/utils.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# This file contains some really simple functions that are useful when building up customization scripts.
+
+
+# Checks if the git config has a user registered - and sets it up if not.
+#
+# Param 1: name
+# Param 2: email
+#
+config_user() {
+    local gcn=$(git config --global user.name)
+    if [ -z "${gcn}" ]; then
+        echo "Setting up git user / remote"
+        git config --global user.name "$1"
+        git config --global user.email "$2"
+        
+    fi
+}
+
+# Checks if the git remote is configured - and sets it up if not. Fetches either way.
+#
+# Param 1: remote name
+# Param 2: remote url
+#
+config_remote() {
+    local gr=$(git remote -v | grep $1)
+    if [ -z "${gr}" ]; then
+        git remote add $1 $2
+    fi
+    git fetch $1
+}
+
+# Setup special .ssh files
+#
+# Param 1: bash array, filenames relative to the customization directory that should be copied to ~/.ssh
+setup_ssh() {
+    local files=("$@")
+    for file in "${files[@]}"; then
+        local cfile="/devcontainer-customization/${file}"
+        local hfile="~/.ssh/${file}"
+        if [ ! -f "${hfile}" ]; then
+            echo "copying ${file}"
+            cp "${cfile}" "${hfile}"
+            chmod 600 "${hfile}"
+        fi
+    done
+    ls ~/.ssh
+}
diff --git a/.devcontainer/customization/README.md b/.devcontainer/customization/README.md
new file mode 100644
index 000000000000..89eb48e8da99
--- /dev/null
+++ b/.devcontainer/customization/README.md
@@ -0,0 +1,25 @@
+Place any additional resources your environment requires in this directory
+
+Script hooks are currently called for:
+`postcreate.sh` and `poststart.sh`
+
+If files with those names exist here, they will be called at the end of the normal script.
+
+This is a good place to set things like `git config --global user.name` are set - and to handle any other files that are mounted via this directory.
+
+To assist in doing so, `source /.devcontainer-scripts/utils.sh` will provide utility functions that may be useful - for example:
+
+```
+#!/bin/bash
+
+source "/.devcontainer-scripts/utils.sh"
+
+sshfiles=("config", "key.pub")
+
+setup_ssh "${sshfiles[@]}"
+
+config_user "YOUR NAME" "YOUR EMAIL"
+
+config_remote "REMOTE NAME" "REMOTE URL"
+
+```
\ No newline at end of file
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 000000000000..37c81ffc41da
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,24 @@
+{
+    "$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
+    "name": "LocalAI",
+    "workspaceFolder": "/workspace",
+    "dockerComposeFile": [ "./docker-compose-devcontainer.yml" ],
+    "service": "api",
+    "shutdownAction": "stopCompose",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "golang.go",
+                "ms-vscode.makefile-tools",
+                "ms-azuretools.vscode-docker",
+                "ms-python.python",
+                "ms-python.debugpy",
+                "wayou.vscode-todo-highlight",
+                "waderyan.gitblame"
+            ]
+        }
+    },
+    "forwardPorts": [8080, 3000],
+    "postCreateCommand": "bash /.devcontainer-scripts/postcreate.sh",
+    "postStartCommand": "bash /.devcontainer-scripts/poststart.sh"
+}
\ No newline at end of file
diff --git a/.devcontainer/docker-compose-devcontainer.yml b/.devcontainer/docker-compose-devcontainer.yml
new file mode 100644
index 000000000000..8795d64da169
--- /dev/null
+++ b/.devcontainer/docker-compose-devcontainer.yml
@@ -0,0 +1,48 @@
+services:
+  api:
+    build:
+      context: ..
+      dockerfile: Dockerfile
+      target: devcontainer
+      args:
+      - FFMPEG=true
+      - IMAGE_TYPE=extras
+      - GO_TAGS=stablediffusion p2p tts
+    env_file:
+      - ../.env
+    ports:
+      - 8080:8080
+    volumes:
+      - localai_workspace:/workspace
+      - ../models:/host-models
+      - ./customization:/devcontainer-customization
+    command: /bin/sh -c "while sleep 1000; do :; done"
+    cap_add:
+      - SYS_PTRACE
+    security_opt:
+      - seccomp:unconfined
+  prometheus:
+    image: prom/prometheus
+    container_name: prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+    ports:
+      - 9090:9090
+    restart: unless-stopped
+    volumes:
+      - ./prometheus:/etc/prometheus
+      - prom_data:/prometheus
+  grafana:
+    image: grafana/grafana
+    container_name: grafana
+    ports:
+      - 3000:3000
+    restart: unless-stopped
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=grafana
+    volumes:
+      - ./grafana:/etc/grafana/provisioning/datasources
+volumes:
+  prom_data:
+  localai_workspace:
\ No newline at end of file
diff --git a/.devcontainer/grafana/datasource.yml b/.devcontainer/grafana/datasource.yml
new file mode 100644
index 000000000000..1ed2fa3c2a28
--- /dev/null
+++ b/.devcontainer/grafana/datasource.yml
@@ -0,0 +1,10 @@
+
+apiVersion: 1
+
+datasources:
+- name: Prometheus
+  type: prometheus
+  url: http://prometheus:9090 
+  isDefault: true
+  access: proxy
+  editable: true
diff --git a/.devcontainer/prometheus/prometheus.yml b/.devcontainer/prometheus/prometheus.yml
new file mode 100644
index 000000000000..18c44da71447
--- /dev/null
+++ b/.devcontainer/prometheus/prometheus.yml
@@ -0,0 +1,21 @@
+global:
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  evaluation_interval: 15s
+alerting:
+  alertmanagers:
+  - static_configs:
+    - targets: []
+    scheme: http
+    timeout: 10s
+    api_version: v1
+scrape_configs:
+- job_name: prometheus
+  honor_timestamps: true
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  metrics_path: /metrics
+  scheme: http
+  static_configs:
+  - targets:
+    - localhost:9090
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
index 3954769f5c36..e91f0008f1c0 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,7 @@
 .idea
 .github
 .vscode
+.devcontainer
 models
 examples/chatbot-ui/models
 examples/rwkv/models
diff --git a/.env b/.env
index 95a515bc850f..9e5dbd79ee4f 100644
--- a/.env
+++ b/.env
@@ -79,6 +79,9 @@
 ### Enable to run parallel requests
 # LOCALAI_PARALLEL_REQUESTS=true
 
+# Enable to allow p2p mode
+# LOCALAI_P2P=true
+
 ### Watchdog settings
 ###
 # Enables watchdog to kill backends that are inactive for too much time
diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
index d8fff4a3148d..66dea9a38ad2 100755
--- a/.github/bump_deps.sh
+++ b/.github/bump_deps.sh
@@ -6,4 +6,17 @@ VAR=$3
 
 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 
+# Read $VAR from Makefile (only first match)
+set +e
+CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
+set -e
+
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
+
+if [ -z "$CURRENT_COMMIT" ]; then
+    echo "Could not find $VAR in Makefile."
+    exit 0
+fi
+
+echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
+echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
\ No newline at end of file
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 91b06ba80287..5016ebdb0ee5 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -67,10 +67,6 @@ updates:
     directory: "/backend/python/parler-tts"
     schedule:
       interval: "weekly"
-  - package-ecosystem: "pip"
-    directory: "/backend/python/petals"
-    schedule:
-      interval: "weekly"
   - package-ecosystem: "pip"
     directory: "/backend/python/rerankers"
     schedule:
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 5909c9812442..68cb81cbceca 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -40,17 +40,30 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Bump dependencies 🔧
+        id: bump
         run: |
           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
+          {
+            echo 'message<<EOF'
+            cat "${{ matrix.variable }}_message.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
+          {
+            echo 'commit<<EOF'
+            cat "${{ matrix.variable }}_commit.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
+          rm -rfv ${{ matrix.variable }}_message.txt
+          rm -rfv ${{ matrix.variable }}_commit.txt
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
         with:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
           branch: "update/${{ matrix.variable }}"
-          body: Bump of ${{ matrix.repository }} version
+          body: ${{ steps.bump.outputs.message }}
           signoff: true
 
 
diff --git a/.github/workflows/deploy-explorer.yaml b/.github/workflows/deploy-explorer.yaml
new file mode 100644
index 000000000000..7b5c0484fe73
--- /dev/null
+++ b/.github/workflows/deploy-explorer.yaml
@@ -0,0 +1,64 @@
+name: Explorer deployment
+
+on:
+  push:
+    branches:
+      - master
+    tags:
+      - 'v*'
+
+concurrency:
+  group: ci-deploy-${{ github.head_ref || github.ref }}-${{ github.repository }}
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+      - uses: actions/setup-go@v5
+        with:
+          go-version: '1.21.x'
+          cache: false
+      - name: Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y wget curl build-essential ffmpeg protobuf-compiler ccache upx-ucl gawk cmake libgmock-dev
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2
+          make protogen-go
+      - name: Build api
+        run: |
+          CGO_ENABLED=0 make build-api
+      - name: rm
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            script: |
+                sudo rm -rf local-ai/ || true
+      - name: copy file via ssh
+        uses: appleboy/scp-action@v0.1.7
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            source: "local-ai"
+            overwrite: true
+            rm: true
+            target: ./local-ai
+      - name: restarting
+        uses: appleboy/ssh-action@v1.0.3
+        with:
+            host: ${{ secrets.EXPLORER_SSH_HOST }}
+            username: ${{ secrets.EXPLORER_SSH_USERNAME }}
+            key: ${{ secrets.EXPLORER_SSH_KEY }}
+            port: ${{ secrets.EXPLORER_SSH_PORT }}
+            script: |
+                sudo cp -rfv local-ai/local-ai /usr/bin/local-ai
+                sudo systemctl restart local-ai
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index e969a95fc01d..8b37b52ddb99 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -168,32 +168,6 @@ jobs:
            make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
            make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
 
-
-
-  # tests-petals:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Clone
-  #       uses: actions/checkout@v4
-  #       with:
-  #         submodules: true
-  #     - name: Dependencies
-  #       run: |
-  #         sudo apt-get update
-  #         sudo apt-get install build-essential ffmpeg
-  #         # Install UV
-  #         curl -LsSf https://astral.sh/uv/install.sh | sh
-  #         sudo apt-get install -y ca-certificates cmake curl patch python3-pip
-  #         sudo apt-get install -y libopencv-dev
-  #         pip install --user --no-cache-dir grpcio-tools==1.64.1
-
-  #     - name: Test petals
-  #       run: |
-  #          make --jobs=5 --output-sync=target -C backend/python/petals
-  #          make --jobs=5 --output-sync=target -C backend/python/petals test
-
-
-
   # tests-bark:
   #   runs-on: ubuntu-latest
   #   steps:
diff --git a/.gitignore b/.gitignore
index 096689c50eb5..65eb92570f6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,6 @@ docs/static/gallery.html
 
 # backend virtual environments
 **/venv
+
+# per-developer customization files for the development container
+.devcontainer/customization/*
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 2727da924b0a..504934210b3b 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -3,12 +3,12 @@
     "configurations": [
         {
             "name": "Python: Current File",
-            "type": "python",
+            "type": "debugpy",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
             "justMyCode": false,
-            "cwd": "${workspaceFolder}/examples/langchain-chroma",
+            "cwd": "${fileDirname}",
             "env": {
                 "OPENAI_API_BASE": "http://localhost:8080/v1",
                 "OPENAI_API_KEY": "abc"
@@ -19,15 +19,16 @@
             "type": "go",
             "request": "launch",
             "mode": "debug",
-            "program": "${workspaceFolder}/main.go",
-            "args": [
-                "api"
-            ],
+            "program": "${workspaceRoot}",
+            "args": [],
             "env": {
-                "C_INCLUDE_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "LIBRARY_PATH": "${workspaceFolder}/go-llama:${workspaceFolder}/go-stable-diffusion/:${workspaceFolder}/gpt4all/gpt4all-bindings/golang/:${workspaceFolder}/go-gpt2:${workspaceFolder}/go-rwkv:${workspaceFolder}/whisper.cpp:${workspaceFolder}/go-bert:${workspaceFolder}/bloomz",
-                "DEBUG": "true"
-            }
+                "LOCALAI_LOG_LEVEL": "debug",
+                "LOCALAI_P2P": "true",
+                "LOCALAI_FEDERATED": "true"
+            },
+            "buildFlags": ["-tags", "stablediffusion p2p tts", "-v"],
+            "envFile": "${workspaceFolder}/.env",
+            "cwd": "${workspaceRoot}"
         }
     ]
 }
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index a0feadd9c7eb..9d6517604bfb 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,12 +8,12 @@ FROM ${BASE_IMAGE} AS requirements-core
 
 USER root
 
-ARG GO_VERSION=1.22.5
+ARG GO_VERSION=1.22.6
 ARG TARGETARCH
 ARG TARGETVARIANT
 
 ENV DEBIAN_FRONTEND=noninteractive
-ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
+ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,openvoice:/build/backend/python/openvoice/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
 
 
 RUN apt-get update && \
@@ -30,7 +30,7 @@ RUN apt-get update && \
 
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
+ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
 
 # Install grpc compilers
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
@@ -39,15 +39,18 @@ RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
 
+RUN test -n "$TARGETARCH" \
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+
 # Use the variables in subsequent instructions
 RUN echo "Target Architecture: $TARGETARCH"
 RUN echo "Target Variant: $TARGETVARIANT"
 
 # Cuda
-ENV PATH /usr/local/cuda/bin:${PATH}
+ENV PATH=/usr/local/cuda/bin:${PATH}
 
 # HipBLAS requirements
-ENV PATH /opt/rocm/bin:${PATH}
+ENV PATH=/opt/rocm/bin:${PATH}
 
 # OpenBLAS requirements and stable diffusion
 RUN apt-get update && \
@@ -62,9 +65,6 @@ RUN ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
 
 WORKDIR /build
 
-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-
 ###################################
 ###################################
 
@@ -81,7 +81,7 @@ RUN apt-get update && \
         espeak \
         python3-pip \
         python-is-python3 \
-        python3-dev \
+        python3-dev llvm \
         python3-venv && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \
@@ -217,13 +217,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 ###################################
 ###################################
 
-# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
-# Adjustments to the build process should likely be made here.
-FROM requirements-drivers AS builder
+# The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
+
+FROM requirements-drivers AS builder-base
 
 ARG GO_TAGS="stablediffusion tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
+ARG LD_FLAGS="-s -w"
 
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
@@ -231,14 +232,12 @@ ENV MAKEFLAGS=${MAKEFLAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
+ENV LD_FLAGS=${LD_FLAGS}
 
-WORKDIR /build
+RUN echo "GO_TAGS: $GO_TAGS" && echo "TARGETARCH: $TARGETARCH"
 
-COPY . .
-COPY .git .
-RUN echo "GO_TAGS: $GO_TAGS"
+WORKDIR /build
 
-RUN make prepare
 
 # We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
 # but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
@@ -256,9 +255,30 @@ RUN <<EOT bash
     fi
 EOT
 
+
+###################################
+###################################
+
+# This first portion of builder holds the layers specifically used to build backend-assets/grpc/stablediffusion
+# In most cases, builder is the image you should be using - however, this can save build time if one just needs to copy backend-assets/grpc/stablediffusion and nothing else.
+FROM builder-base AS builder-sd
+
+COPY . .
+COPY .git .
+
+RUN make prepare
+
+
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
 
+###################################
+###################################
+
+# The builder target compiles LocalAI. This target is not the target that will be uploaded to the registry.
+# Adjustments to the build process should likely be made here.
+FROM builder-sd AS builder
+
 # Install the pre-built GRPC
 COPY --from=grpc /opt/grpc /usr/local
 
@@ -276,6 +296,41 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
 ###################################
 ###################################
 
+# The devcontainer target is not used on CI. It is a target for developers to use locally -
+# rather than copying files it mounts them locally and leaves building to the developer
+
+FROM builder-base AS devcontainer
+
+ARG FFMPEG
+
+COPY --from=grpc /opt/grpc /usr/local
+
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion /build/backend-assets/grpc/stablediffusion
+
+COPY .devcontainer-scripts /.devcontainer-scripts
+
+# Add FFmpeg
+RUN if [ "${FFMPEG}" = "true" ]; then \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            ffmpeg && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/* \
+    ; fi
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        ssh less && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN go install github.com/go-delve/delve/cmd/dlv@latest
+
+RUN go install github.com/mikefarah/yq/v4@latest
+
+###################################
+###################################
+
 # This is the final target. The result of this target will be the image uploaded to the registry.
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers
@@ -326,7 +381,7 @@ COPY --from=builder /build/local-ai ./
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 
 # do not let stablediffusion rebuild (requires an older version of absl)
-COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
+COPY --from=builder-sd /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion
 
 # Change the shell to bash so we can use [[ tests below
 SHELL ["/bin/bash", "-c"]
@@ -356,9 +411,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vall-e-x" || -z "${EXTRA_BACKENDS}" ) && "$I
     if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/openvoice \
     ; fi && \
-    if [[ ( "${EXTRA_BACKENDS}" =~ "petals" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
-        make -C backend/python/petals \
-    ; fi && \
     if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
         make -C backend/python/sentencetransformers \
     ; fi && \
diff --git a/Makefile b/Makefile
index 51893868bb9b..988a4fb7a4b8 100644
--- a/Makefile
+++ b/Makefile
@@ -8,11 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=01245f5b1629075543bc4478418c7d72a0b4b3c7
-
-# gpt4all version
-GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=27a8b020c36b0df8f8b82a252d261cda47cf44b8
+CPPLLAMA_VERSION?=2f3c1466ff46a2413b0e363a5005c46538186ee6
 
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
@@ -20,7 +16,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=f68298ce06ca3edd6e6f3f21c3d0bb5f073942c3
+WHISPER_CPP_VERSION?=d65786ea540a5aef21f67cacfa6f134097727780
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
@@ -190,7 +186,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
 ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
-ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
 ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
 ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
 ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
@@ -253,18 +248,6 @@ sources/go-piper:
 sources/go-piper/libpiper_binding.a: sources/go-piper
 	$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o
 
-## GPT4ALL
-sources/gpt4all:
-	mkdir -p sources/gpt4all
-	cd sources/gpt4all && \
-	git init && \
-	git remote add origin $(GPT4ALL_REPO) && \
-	git fetch origin && \
-	git checkout $(GPT4ALL_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a: sources/gpt4all
-	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ libgpt4all.a
 
 ## RWKV
 sources/go-rwkv.cpp:
@@ -318,7 +301,7 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/gpt4all sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
@@ -328,7 +311,6 @@ replace:
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
-	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp
 
 dropreplace:
@@ -339,7 +321,6 @@ dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
-	$(GOCMD) mod edit -dropreplace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-llama.cpp
 
 prepare-sources: get-sources replace
@@ -349,7 +330,6 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C sources/go-llama.cpp clean
-	$(MAKE) -C sources/gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C sources/go-rwkv.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
@@ -396,7 +376,7 @@ build-minimal:
 	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=p2p $(MAKE) build
 
 build-api:
-	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=p2p $(MAKE) build
 
 backend-assets/lib:
 	mkdir -p backend-assets/lib
@@ -407,7 +387,7 @@ ifeq ($(DETECT_LIBS),true)
 	scripts/prepare-libs.sh backend-assets/grpc/llama-cpp-avx2
 endif
 ifeq ($(OS),Darwin)
-	$(info ${GREEN}I Skip CUDA/hipblas build on MacOS${RESET})
+	BUILD_TYPE=none $(MAKE) backend-assets/grpc/llama-cpp-fallback
 else
 	$(MAKE) backend-assets/grpc/llama-cpp-cuda
 	$(MAKE) backend-assets/grpc/llama-cpp-hipblas
@@ -469,8 +449,7 @@ test: prepare test-models/testmodel.ggml grpcs
 	export GO_TAGS="tts stablediffusion debug"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
-	$(MAKE) test-gpt4all
+	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf"  --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS)
 	$(MAKE) test-llama
 	$(MAKE) test-llama-gguf
 	$(MAKE) test-tts
@@ -500,10 +479,6 @@ teardown-e2e:
 	rm -rf $(TEST_DIR) || true
 	docker stop $$(docker ps -q --filter ancestor=localai-tests)
 
-test-gpt4all: prepare-test
-	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
-	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r $(TEST_PATHS)
-
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r $(TEST_PATHS)
@@ -559,10 +534,10 @@ protogen-go-clean:
 	$(RM) bin/*
 
 .PHONY: protogen-python
-protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
+protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen openvoice-protogen
 
 .PHONY: protogen-python-clean
-protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
+protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean openvoice-protogen-clean
 
 .PHONY: autogptq-protogen
 autogptq-protogen:
@@ -620,14 +595,6 @@ mamba-protogen:
 mamba-protogen-clean:
 	$(MAKE) -C backend/python/mamba protogen-clean
 
-.PHONY: petals-protogen
-petals-protogen:
-	$(MAKE) -C backend/python/petals protogen
-
-.PHONY: petals-protogen-clean
-petals-protogen-clean:
-	$(MAKE) -C backend/python/petals protogen-clean
-
 .PHONY: rerankers-protogen
 rerankers-protogen:
 	$(MAKE) -C backend/python/rerankers protogen
@@ -709,7 +676,6 @@ prepare-extra-conda-environments: protogen-python
 	$(MAKE) -C backend/python/vall-e-x
 	$(MAKE) -C backend/python/openvoice
 	$(MAKE) -C backend/python/exllama
-	$(MAKE) -C backend/python/petals
 	$(MAKE) -C backend/python/exllama2
 
 prepare-test-extra: protogen-python
@@ -730,12 +696,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 	mkdir -p backend-assets/espeak-ng-data
 	@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
 
-backend-assets/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a
-	mkdir -p backend-assets/gpt4all
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
-	@cp sources/gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
-
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc
 
@@ -746,13 +706,6 @@ ifneq ($(UPX),)
 	$(UPX) backend-assets/grpc/bert-embeddings
 endif
 
-backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/golang/libgpt4all.a backend-assets/gpt4all backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/gpt4all
-endif
-
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)
@@ -783,9 +736,6 @@ else
 	echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
-ifneq ($(UPX),)
-	$(UPX) backend/cpp/${VARIANT}/grpc-server
-endif
 
 # This target is for manually building a variant with-auto detected flags
 backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
@@ -858,9 +808,6 @@ backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.
 backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
 	mkdir -p backend-assets/util/
 	cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
-ifneq ($(UPX),)
-	$(UPX) backend-assets/util/llama-cpp-rpc-server
-endif
 
 backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
diff --git a/README.md b/README.md
index 765fe5df6485..ce3289f9edd9 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 
 Hot topics (looking for contributors):
 
+- 🔥🔥 Distributed, P2P Global community pools: https://github.com/mudler/LocalAI/issues/3113
 - WebUI improvements: https://github.com/mudler/LocalAI/issues/2156
 - Backends v2: https://github.com/mudler/LocalAI/issues/1126
 - Improving UX v2: https://github.com/mudler/LocalAI/issues/1373
@@ -150,6 +151,7 @@ Other:
 
 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
 
+- [Run Visual studio code with LocalAI (SUSE)](https://www.suse.com/c/running-ai-locally/)
 - 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
index cb5c85f17f23..57ab46fe6a42 100644
--- a/backend/cpp/llama/grpc-server.cpp
+++ b/backend/cpp/llama/grpc-server.cpp
@@ -458,7 +458,9 @@ struct llama_server_context
             }
         }
 
-        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        llama_init_result llama_init = llama_init_from_gpt_params(params);
+        model = llama_init.model;
+        ctx = llama_init.context;
         if (model == nullptr)
         {
             LOG_ERROR("unable to load model", {{"model", params.model}});
@@ -478,7 +480,7 @@ struct llama_server_context
 
         n_ctx = llama_n_ctx(ctx);
 
-        add_bos_token = llama_should_add_bos_token(model);
+        add_bos_token = llama_add_bos_token(model);
 
         return true;
     }
@@ -2258,7 +2260,7 @@ static void params_parse(const backend::ModelOptions* request,
      }
      // get the directory of modelfile
      std::string model_dir = params.model.substr(0, params.model.find_last_of("/\\"));
-     params.lora_adapter.push_back(std::make_tuple(model_dir + "/"+request->loraadapter(), scale_factor));
+     params.lora_adapters.push_back({ model_dir + "/"+request->loraadapter(), scale_factor });
     }
     params.use_mlock = request->mlock();
     params.use_mmap = request->mmap();
diff --git a/backend/go/llm/gpt4all/gpt4all.go b/backend/go/llm/gpt4all/gpt4all.go
deleted file mode 100644
index 9caab48c0dbb..000000000000
--- a/backend/go/llm/gpt4all/gpt4all.go
+++ /dev/null
@@ -1,62 +0,0 @@
-package main
-
-// This is a wrapper to statisfy the GRPC service interface
-// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
-import (
-	"fmt"
-
-	"github.com/mudler/LocalAI/pkg/grpc/base"
-	pb "github.com/mudler/LocalAI/pkg/grpc/proto"
-	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
-)
-
-type LLM struct {
-	base.SingleThread
-
-	gpt4all *gpt4all.Model
-}
-
-func (llm *LLM) Load(opts *pb.ModelOptions) error {
-	model, err := gpt4all.New(opts.ModelFile,
-		gpt4all.SetThreads(int(opts.Threads)),
-		gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
-	llm.gpt4all = model
-	return err
-}
-
-func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
-	predictOptions := []gpt4all.PredictOption{
-		gpt4all.SetTemperature(float64(opts.Temperature)),
-		gpt4all.SetTopP(float64(opts.TopP)),
-		gpt4all.SetTopK(int(opts.TopK)),
-		gpt4all.SetTokens(int(opts.Tokens)),
-	}
-
-	if opts.Batch != 0 {
-		predictOptions = append(predictOptions, gpt4all.SetBatch(int(opts.Batch)))
-	}
-	return predictOptions
-}
-
-func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
-	return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
-}
-
-func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
-	predictOptions := buildPredictOptions(opts)
-
-	go func() {
-		llm.gpt4all.SetTokenCallback(func(token string) bool {
-			results <- token
-			return true
-		})
-		_, err := llm.gpt4all.Predict(opts.Prompt, predictOptions...)
-		if err != nil {
-			fmt.Println("err: ", err)
-		}
-		llm.gpt4all.SetTokenCallback(nil)
-		close(results)
-	}()
-
-	return nil
-}
diff --git a/backend/go/llm/gpt4all/main.go b/backend/go/llm/gpt4all/main.go
deleted file mode 100644
index acf4408799e1..000000000000
--- a/backend/go/llm/gpt4all/main.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package main
-
-// Note: this is started internally by LocalAI and a server is allocated for each model
-
-import (
-	"flag"
-
-	grpc "github.com/mudler/LocalAI/pkg/grpc"
-)
-
-var (
-	addr = flag.String("addr", "localhost:50051", "the address to connect to")
-)
-
-func main() {
-	flag.Parse()
-
-	if err := grpc.StartServer(*addr, &LLM{}); err != nil {
-		panic(err)
-	}
-}
diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt
new file mode 100644
index 000000000000..6461b696f4c6
--- /dev/null
+++ b/backend/python/autogptq/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt
new file mode 100644
index 000000000000..12c6d5d5eac2
--- /dev/null
+++ b/backend/python/autogptq/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt
index 635b4c31ee1a..755e19d854c5 100644
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
index 7a1bf85f6ca3..53946f2398e5 100644
--- a/backend/python/autogptq/requirements.txt
+++ b/backend/python/autogptq/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
-torch
 certifi
 transformers
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cpu.txt b/backend/python/bark/requirements-cpu.txt
new file mode 100644
index 000000000000..0b2c3bc7ea88
--- /dev/null
+++ b/backend/python/bark/requirements-cpu.txt
@@ -0,0 +1,4 @@
+transformers
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
new file mode 100644
index 000000000000..71a6a93f442f
--- /dev/null
+++ b/backend/python/bark/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt
new file mode 100644
index 000000000000..0fa270742a3d
--- /dev/null
+++ b/backend/python/bark/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
index 7bfc411bd51c..af9e820e9241 100644
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt
index 5c4aa6a5ce68..9feb6eef3308 100644
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
index d3f9f52be26a..08bfaec31ed4 100644
--- a/backend/python/bark/requirements.txt
+++ b/backend/python/bark/requirements.txt
@@ -1,6 +1,4 @@
-accelerate
 bark==0.1.5
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index e8dfea03d56d..934b1fd37b14 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -18,10 +18,23 @@
 # source $(dirname $0)/../common/libbackend.sh
 #
 function init() {
+    # Name of the backend (directory name)
     BACKEND_NAME=${PWD##*/}
+
+    # Path where all backends files are
     MY_DIR=$(realpath `dirname $0`)
+
+    # Build type
     BUILD_PROFILE=$(getBuildProfile)
 
+    # Environment directory
+    EDIR=${MY_DIR}
+
+    # Allow to specify a custom env dir for shared environments
+    if [ "x${ENV_DIR}" != "x" ]; then
+        EDIR=${ENV_DIR}
+    fi
+
     # If a backend has defined a list of valid build profiles...
     if [ ! -z "${LIMIT_TARGETS}" ]; then
         isValidTarget=$(checkTargets ${LIMIT_TARGETS})
@@ -74,13 +87,14 @@ function getBuildProfile() {
 # This function is idempotent, so you can call it as many times as you want and it will
 # always result in an activated virtual environment
 function ensureVenv() {
-    if [ ! -d "${MY_DIR}/venv" ]; then
-        uv venv ${MY_DIR}/venv
+    if [ ! -d "${EDIR}/venv" ]; then
+        uv venv ${EDIR}/venv
         echo "virtualenv created"
     fi
-    
-    if [ "x${VIRTUAL_ENV}" != "x${MY_DIR}/venv" ]; then
-        source ${MY_DIR}/venv/bin/activate
+
+    # Source if we are not already in a Virtual env
+    if [ "x${VIRTUAL_ENV}" != "x${EDIR}/venv" ]; then
+        source ${EDIR}/venv/bin/activate
         echo "virtualenv activated"
     fi
 
@@ -113,13 +127,24 @@ function installRequirements() {
 
     # These are the requirements files we will attempt to install, in order
     declare -a requirementFiles=(
-        "${MY_DIR}/requirements-install.txt"
-        "${MY_DIR}/requirements.txt"
-        "${MY_DIR}/requirements-${BUILD_TYPE}.txt"
+        "${EDIR}/requirements-install.txt"
+        "${EDIR}/requirements.txt"
+        "${EDIR}/requirements-${BUILD_TYPE}.txt"
     )
 
     if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
-        requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
+        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}.txt")
+    fi
+
+    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
+    if [ "x${BUILD_TYPE}" == "x" ]; then
+        requirementFiles+=("${EDIR}/requirements-cpu.txt")
+    fi
+
+    requirementFiles+=("${EDIR}/requirements-after.txt")
+
+    if [ "x${BUILD_TYPE}" != "x${BUILD_PROFILE}" ]; then
+        requirementFiles+=("${EDIR}/requirements-${BUILD_PROFILE}-after.txt")
     fi
 
     for reqFile in ${requirementFiles[@]}; do
diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
index 8d1e31513580..3517315535d5 100644
--- a/backend/python/common/template/requirements.txt
+++ b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cpu.txt b/backend/python/coqui/requirements-cpu.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/coqui/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
new file mode 100644
index 000000000000..71a6a93f442f
--- /dev/null
+++ b/backend/python/coqui/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt
new file mode 100644
index 000000000000..0fa270742a3d
--- /dev/null
+++ b/backend/python/coqui/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
index 7bfc411bd51c..af9e820e9241 100644
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt
index 5c4aa6a5ce68..002a55c35c15 100644
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
index e1cddaa3a06c..6125f7391aad 100644
--- a/backend/python/coqui/requirements.txt
+++ b/backend/python/coqui/requirements.txt
@@ -1,6 +1,4 @@
-accelerate
 TTS==0.22.0
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/diffusers/backend.py b/backend/python/diffusers/backend.py
index a348d290e7c6..8f42084822f2 100755
--- a/backend/python/diffusers/backend.py
+++ b/backend/python/diffusers/backend.py
@@ -18,13 +18,13 @@
 import grpc
 
 from diffusers import StableDiffusion3Pipeline, StableDiffusionXLPipeline, StableDiffusionDepth2ImgPipeline, DPMSolverMultistepScheduler, StableDiffusionPipeline, DiffusionPipeline, \
-    EulerAncestralDiscreteScheduler
+    EulerAncestralDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
 from diffusers import StableDiffusionImg2ImgPipeline, AutoPipelineForText2Image, ControlNetModel, StableVideoDiffusionPipeline
 from diffusers.pipelines.stable_diffusion import safety_checker
 from diffusers.utils import load_image, export_to_video
 from compel import Compel, ReturnedEmbeddingsType
-
-from transformers import CLIPTextModel
+from optimum.quanto import freeze, qfloat8, quantize
+from transformers import CLIPTextModel, T5EncoderModel
 from safetensors.torch import load_file
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -163,6 +163,8 @@ def LoadModel(self, request, context):
             modelFile = request.Model
 
             self.cfg_scale = 7
+            self.PipelineType = request.PipelineType
+
             if request.CFGScale != 0:
                 self.cfg_scale = request.CFGScale
 
@@ -244,6 +246,30 @@ def LoadModel(self, request, context):
                         torch_dtype=torchType,
                         use_safetensors=True,
                         variant=variant)
+            elif request.PipelineType == "FluxPipeline":
+                    self.pipe = FluxPipeline.from_pretrained(
+                        request.Model,
+                        torch_dtype=torch.bfloat16)
+                    if request.LowVRAM:
+                        self.pipe.enable_model_cpu_offload()
+            elif request.PipelineType == "FluxTransformer2DModel":
+                    dtype = torch.bfloat16
+                    # specify from environment or default to "ChuckMcSneed/FLUX.1-dev"
+                    bfl_repo = os.environ.get("BFL_REPO", "ChuckMcSneed/FLUX.1-dev")
+
+                    transformer = FluxTransformer2DModel.from_single_file(modelFile, torch_dtype=dtype)
+                    quantize(transformer, weights=qfloat8)
+                    freeze(transformer)
+                    text_encoder_2 = T5EncoderModel.from_pretrained(bfl_repo, subfolder="text_encoder_2", torch_dtype=dtype)
+                    quantize(text_encoder_2, weights=qfloat8)
+                    freeze(text_encoder_2)
+
+                    self.pipe = FluxPipeline.from_pretrained(bfl_repo, transformer=None, text_encoder_2=None, torch_dtype=dtype)
+                    self.pipe.transformer = transformer
+                    self.pipe.text_encoder_2 = text_encoder_2
+
+                    if request.LowVRAM:
+                        self.pipe.enable_model_cpu_offload()
 
             if CLIPSKIP and request.CLIPSkip != 0:
                 self.clip_skip = request.CLIPSkip
@@ -399,6 +425,13 @@ def GenerateImage(self, request, context):
                 request.seed
             )
 
+        if self.PipelineType == "FluxPipeline":
+            kwargs["max_sequence_length"] = 256
+
+        if self.PipelineType == "FluxTransformer2DModel":
+            kwargs["output_type"] = "pil"
+            kwargs["generator"] = torch.Generator("cpu").manual_seed(0)
+
         if self.img2vid:
             # Load the conditioning image
             image = load_image(request.src)
diff --git a/backend/python/diffusers/requirements-cpu.txt b/backend/python/diffusers/requirements-cpu.txt
new file mode 100644
index 000000000000..235bb57e3d2f
--- /dev/null
+++ b/backend/python/diffusers/requirements-cpu.txt
@@ -0,0 +1,9 @@
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+torch
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
new file mode 100644
index 000000000000..40e718cb1f7c
--- /dev/null
+++ b/backend/python/diffusers/requirements-cublas11.txt
@@ -0,0 +1,10 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
new file mode 100644
index 000000000000..3bcc53972aa8
--- /dev/null
+++ b/backend/python/diffusers/requirements-cublas12.txt
@@ -0,0 +1,9 @@
+torch
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index 6c8da20d8c09..17cf72491555 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,3 +1,11 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchvision
\ No newline at end of file
+torch==2.3.1+rocm6.0
+torchvision==0.18.1+rocm6.0
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt
index c393b11896d0..1cc2e2a2bdb6 100644
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,4 +3,12 @@ intel-extension-for-pytorch
 torch
 torchvision
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
+diffusers
+opencv-python
+transformers
+accelerate
+compel
+peft
+sentencepiece
+optimum-quanto
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
index 6f04d677bc6a..b4195fc54c90 100644
--- a/backend/python/diffusers/requirements.txt
+++ b/backend/python/diffusers/requirements.txt
@@ -1,13 +1,5 @@
 setuptools
-accelerate
-compel
-peft
-diffusers
-grpcio==1.65.1
-opencv-python
+grpcio==1.65.4
 pillow
 protobuf
-sentencepiece
-torch
-transformers
 certifi
diff --git a/backend/python/exllama/requirements-cpu.txt b/backend/python/exllama/requirements-cpu.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/exllama/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/exllama/requirements-cublas11.txt b/backend/python/exllama/requirements-cublas11.txt
new file mode 100644
index 000000000000..1dfb5b9854d2
--- /dev/null
+++ b/backend/python/exllama/requirements-cublas11.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama/requirements-cublas12.txt b/backend/python/exllama/requirements-cublas12.txt
new file mode 100644
index 000000000000..1ec544cd1438
--- /dev/null
+++ b/backend/python/exllama/requirements-cublas12.txt
@@ -0,0 +1,3 @@
+torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
index 2aab2631cd9c..99b8109801ab 100644
--- a/backend/python/exllama/requirements.txt
+++ b/backend/python/exllama/requirements.txt
@@ -1,6 +1,4 @@
-grpcio==1.65.0
+grpcio==1.65.5
 protobuf
-torch
-transformers
 certifi
 setuptools
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cpu.txt b/backend/python/exllama2/requirements-cpu.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/exllama2/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
new file mode 100644
index 000000000000..1dfb5b9854d2
--- /dev/null
+++ b/backend/python/exllama2/requirements-cublas11.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt
new file mode 100644
index 000000000000..1ec544cd1438
--- /dev/null
+++ b/backend/python/exllama2/requirements-cublas12.txt
@@ -0,0 +1,3 @@
+torch
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
index 6aae273c94cf..ce15b0b614e3 100644
--- a/backend/python/exllama2/requirements.txt
+++ b/backend/python/exllama2/requirements.txt
@@ -1,7 +1,5 @@
-accelerate
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
-torch
 wheel
 setuptools
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt
new file mode 100644
index 000000000000..ea6890ebb1e5
--- /dev/null
+++ b/backend/python/mamba/requirements-after.txt
@@ -0,0 +1,2 @@
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
new file mode 100644
index 000000000000..39dab0fdd98d
--- /dev/null
+++ b/backend/python/mamba/requirements-cpu.txt
@@ -0,0 +1,2 @@
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt
new file mode 100644
index 000000000000..7048a14f63b9
--- /dev/null
+++ b/backend/python/mamba/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
new file mode 100644
index 000000000000..39dab0fdd98d
--- /dev/null
+++ b/backend/python/mamba/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt
index 2fc9a07cda77..69d263f0b3ed 100644
--- a/backend/python/mamba/requirements-install.txt
+++ b/backend/python/mamba/requirements-install.txt
@@ -3,5 +3,4 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
-torch==2.3.1
\ No newline at end of file
+wheel
\ No newline at end of file
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
index 2aac2cda0800..920971ce0b1a 100644
--- a/backend/python/mamba/requirements.txt
+++ b/backend/python/mamba/requirements.txt
@@ -1,6 +1,3 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cpu.txt b/backend/python/openvoice/requirements-cpu.txt
new file mode 100644
index 000000000000..08ed5eeb4b9f
--- /dev/null
+++ b/backend/python/openvoice/requirements-cpu.txt
@@ -0,0 +1 @@
+torch
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt
new file mode 100644
index 000000000000..6461b696f4c6
--- /dev/null
+++ b/backend/python/openvoice/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt
new file mode 100644
index 000000000000..12c6d5d5eac2
--- /dev/null
+++ b/backend/python/openvoice/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
index bad088a912d6..25921f8f1a81 100644
--- a/backend/python/openvoice/requirements-intel.txt
+++ b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3
diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
index 86d16ec26a70..13ce9c28713e 100644
--- a/backend/python/openvoice/requirements.txt
+++ b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 librosa
 faster-whisper
diff --git a/backend/python/openvoice/test.sh b/backend/python/openvoice/test.sh
index 218c0dcd511f..6c0a840f5a1e 100755
--- a/backend/python/openvoice/test.sh
+++ b/backend/python/openvoice/test.sh
@@ -5,7 +5,7 @@ source $(dirname $0)/../common/libbackend.sh
 
 # Download checkpoints if not present
 if [ ! -d "checkpoints_v2" ]; then
-    wget https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
+    wget https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip -O checkpoints_v2.zip
     unzip checkpoints_v2.zip
 fi
 
diff --git a/backend/python/parler-tts/requirements-after.txt b/backend/python/parler-tts/requirements-after.txt
new file mode 100644
index 000000000000..63599411834c
--- /dev/null
+++ b/backend/python/parler-tts/requirements-after.txt
@@ -0,0 +1 @@
+git+https://github.com/huggingface/parler-tts.git@8e465f1b5fcd223478e07175cb40494d19ffbe17
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cpu.txt b/backend/python/parler-tts/requirements-cpu.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
new file mode 100644
index 000000000000..71a6a93f442f
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
new file mode 100644
index 000000000000..0fa270742a3d
--- /dev/null
+++ b/backend/python/parler-tts/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt
index 7bfc411bd51c..af9e820e9241 100644
--- a/backend/python/parler-tts/requirements-hipblas.txt
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -1,3 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
 torch
-torchaudio
\ No newline at end of file
+torchaudio
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
index 5c4aa6a5ce68..002a55c35c15 100644
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,6 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
+transformers
+accelerate
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
index 147cad9a6a7d..1f17c8922c2e 100644
--- a/backend/python/parler-tts/requirements.txt
+++ b/backend/python/parler-tts/requirements.txt
@@ -1,7 +1,4 @@
-accelerate
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-torch
-git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
-transformers
\ No newline at end of file
+llvmlite==0.43.0
\ No newline at end of file
diff --git a/backend/python/petals/Makefile b/backend/python/petals/Makefile
deleted file mode 100644
index 81b06c2984fd..000000000000
--- a/backend/python/petals/Makefile
+++ /dev/null
@@ -1,31 +0,0 @@
-.PHONY: petals
-petals: protogen
-	@echo "Creating virtual environment..."
-	bash install.sh "petals.yml"
-	@echo "Virtual environment created."
-
-.PHONY: run
-run: protogen
-	@echo "Running petals..."
-	bash run.sh
-	@echo "petals run."
-
-.PHONY: test
-test: protogen
-	@echo "Testing petals..."
-	bash test.sh
-	@echo "petals tested."
-
-.PHONY: protogen
-protogen: backend_pb2_grpc.py backend_pb2.py
-
-.PHONY: protogen-clean
-protogen-clean:
-	$(RM) backend_pb2_grpc.py backend_pb2.py
-
-backend_pb2_grpc.py backend_pb2.py:
-	python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
-
-.PHONY: clean
-clean: protogen-clean
-	rm -rf venv __pycache__
\ No newline at end of file
diff --git a/backend/python/petals/backend.py b/backend/python/petals/backend.py
deleted file mode 100755
index 73bcc4a0da0f..000000000000
--- a/backend/python/petals/backend.py
+++ /dev/null
@@ -1,140 +0,0 @@
-#!/usr/bin/env python3
-from concurrent import futures
-import time
-import argparse
-import signal
-import sys
-import os
-
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-import torch
-from transformers import AutoTokenizer
-from petals import AutoDistributedModelForCausalLM
-
-_ONE_DAY_IN_SECONDS = 60 * 60 * 24
-
-# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
-MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
-
-# Implement the BackendServicer class with the service methods
-class BackendServicer(backend_pb2_grpc.BackendServicer):
-    """
-    A gRPC servicer that implements the Backend service defined in backend.proto.
-    """
-    def Health(self, request, context):
-        """
-        Returns a health check message.
-
-        Args:
-            request: The health check request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Reply: The health check reply.
-        """
-        return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
-
-    def LoadModel(self, request, context):
-        """
-        Loads a language model.
-
-        Args:
-            request: The load model request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The load model result.
-        """
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=False, add_bos_token=False)
-            self.model = AutoDistributedModelForCausalLM.from_pretrained(request.Model)
-            self.cuda = False
-            if request.CUDA:
-                self.model = self.model.cuda()
-                self.cuda = True
-
-        except Exception as err:
-            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
-        return backend_pb2.Result(message="Model loaded successfully", success=True)
-
-    def Predict(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters.
-
-        Args:
-            request: The predict request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict result.
-        """
-
-        inputs = self.tokenizer(request.Prompt, return_tensors="pt")["input_ids"]
-        if self.cuda:
-            inputs = inputs.cuda()
- 
-        if request.Tokens == 0:
-            # Max to max value if tokens are not specified
-            request.Tokens = 8192
-
-        # TODO: kwargs and map all parameters
-        outputs = self.model.generate(inputs, max_new_tokens=request.Tokens)
-
-        generated_text = self.tokenizer.decode(outputs[0])
-        # Remove prompt from response if present
-        if request.Prompt in generated_text:
-            generated_text = generated_text.replace(request.Prompt, "")
-
-        return backend_pb2.Result(message=bytes(generated_text, encoding='utf-8'))
-
-    def PredictStream(self, request, context):
-        """
-        Generates text based on the given prompt and sampling parameters, and streams the results.
-
-        Args:
-            request: The predict stream request.
-            context: The gRPC context.
-
-        Returns:
-            backend_pb2.Result: The predict stream result.
-        """
-        # Implement PredictStream RPC
-        #for reply in some_data_generator():
-        #    yield reply
-        # Not implemented yet
-        return self.Predict(request, context)
-
-def serve(address):
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
-    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
-    server.add_insecure_port(address)
-    server.start()
-    print("Server started. Listening on: " + address, file=sys.stderr)
-
-    # Define the signal handler function
-    def signal_handler(sig, frame):
-        print("Received termination signal. Shutting down...")
-        server.stop(0)
-        sys.exit(0)
-
-    # Set the signal handlers for SIGINT and SIGTERM
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        while True:
-            time.sleep(_ONE_DAY_IN_SECONDS)
-    except KeyboardInterrupt:
-        server.stop(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Run the gRPC server.")
-    parser.add_argument(
-        "--addr", default="localhost:50051", help="The address to bind the server to."
-    )
-    args = parser.parse_args()
-
-    serve(args.addr)
diff --git a/backend/python/petals/install.sh b/backend/python/petals/install.sh
deleted file mode 100755
index 36443ef1c559..000000000000
--- a/backend/python/petals/install.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
-# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
-# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
-# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
-if [ "x${BUILD_PROFILE}" == "xintel" ]; then
-    EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
-fi
-
-installRequirements
diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt
deleted file mode 100644
index 0331f106d614..000000000000
--- a/backend/python/petals/requirements-hipblas.txt
+++ /dev/null
@@ -1,2 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt
deleted file mode 100644
index 635b4c31ee1a..000000000000
--- a/backend/python/petals/requirements-intel.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
-intel-extension-for-pytorch
-torch
-optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/petals/requirements.txt b/backend/python/petals/requirements.txt
deleted file mode 100644
index 10f5114e74ea..000000000000
--- a/backend/python/petals/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-git+https://github.com/bigscience-workshop/petals
-certifi
-transformers
\ No newline at end of file
diff --git a/backend/python/petals/run.sh b/backend/python/petals/run.sh
deleted file mode 100755
index 375c07e5f426..000000000000
--- a/backend/python/petals/run.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-source $(dirname $0)/../common/libbackend.sh
-
-startBackend $@
\ No newline at end of file
diff --git a/backend/python/petals/test.py b/backend/python/petals/test.py
deleted file mode 100644
index 586d24437e16..000000000000
--- a/backend/python/petals/test.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import unittest
-import subprocess
-import time
-import backend_pb2
-import backend_pb2_grpc
-
-import grpc
-
-import unittest
-import subprocess
-import time
-import grpc
-import backend_pb2_grpc
-import backend_pb2
-
-class TestBackendServicer(unittest.TestCase):
-    """
-    TestBackendServicer is the class that tests the gRPC service.
-
-    This class contains methods to test the startup and shutdown of the gRPC service.
-    """
-    def setUp(self):
-        self.service = subprocess.Popen(["python", "backend.py", "--addr", "localhost:50051"])
-        time.sleep(10)
-
-    def tearDown(self) -> None:
-        self.service.terminate()
-        self.service.wait()
-
-    def test_server_startup(self):
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.Health(backend_pb2.HealthMessage())
-                self.assertEqual(response.message, b'OK')
-        except Exception as err:
-            print(err)
-            self.fail("Server failed to start")
-        finally:
-            self.tearDown()
-    def test_load_model(self):
-        """
-        This method tests if the model is loaded successfully
-        """
-        try:
-            self.setUp()
-            with grpc.insecure_channel("localhost:50051") as channel:
-                stub = backend_pb2_grpc.BackendStub(channel)
-                response = stub.LoadModel(backend_pb2.ModelOptions(Model="bigscience/bloom-560m"))
-                print(response)
-                self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
-        except Exception as err:
-            print(err)
-            self.fail("LoadModel service failed")
-        finally:
-            self.tearDown()
diff --git a/backend/python/petals/test.sh b/backend/python/petals/test.sh
deleted file mode 100755
index 6940b0661df2..000000000000
--- a/backend/python/petals/test.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-set -e
-
-source $(dirname $0)/../common/libbackend.sh
-
-runUnittests
diff --git a/backend/python/rerankers/requirements-cpu.txt b/backend/python/rerankers/requirements-cpu.txt
new file mode 100644
index 000000000000..25a1d8ab8492
--- /dev/null
+++ b/backend/python/rerankers/requirements-cpu.txt
@@ -0,0 +1,4 @@
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
new file mode 100644
index 000000000000..06c4b2cfb52c
--- /dev/null
+++ b/backend/python/rerankers/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt
new file mode 100644
index 000000000000..25a1d8ab8492
--- /dev/null
+++ b/backend/python/rerankers/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
index 76018445f448..961d150cd856 100644
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+transformers
+accelerate
+torch
+rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt
index 635b4c31ee1a..1a39cf4fc05a 100644
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -1,5 +1,8 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+transformers
+accelerate
 torch
+rerankers[transformers]
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
index 8b2ad4d0d9f0..2a8d18b10ce8 100644
--- a/backend/python/rerankers/requirements.txt
+++ b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,3 @@
-accelerate
-rerankers[transformers]
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
-certifi
-transformers
\ No newline at end of file
+certifi
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt
new file mode 100644
index 000000000000..cd9924ef0748
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cpu.txt
@@ -0,0 +1,6 @@
+torch
+accelerate
+transformers
+bitsandbytes
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
new file mode 100644
index 000000000000..1131f06624e5
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
new file mode 100644
index 000000000000..2936e17bc178
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt
index 76018445f448..3b187c685f9c 100644
--- a/backend/python/sentencetransformers/requirements-hipblas.txt
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
index 95d4848c5d2c..806e3d475869 100644
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+accelerate
+sentence-transformers==3.0.1
+transformers
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt
index 4ef4a28bd70d..920971ce0b1a 100644
--- a/backend/python/sentencetransformers/requirements.txt
+++ b/backend/python/sentencetransformers/requirements.txt
@@ -1,6 +1,3 @@
-accelerate
-sentence-transformers==3.0.1
-transformers
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cpu.txt b/backend/python/transformers-musicgen/requirements-cpu.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cpu.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt
new file mode 100644
index 000000000000..191a6eefd4d4
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt
new file mode 100644
index 000000000000..bbcdc8cda704
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -0,0 +1,3 @@
+transformers
+accelerate
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt
index 76018445f448..00f0a9464e51 100644
--- a/backend/python/transformers-musicgen/requirements-hipblas.txt
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
+transformers
+accelerate
 torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index 95d4848c5d2c..89bfa6a20023 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -1,5 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+transformers
+accelerate
 torch
 optimum[openvino]
 setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
index 8ffa3c317652..a0076112ed3e 100644
--- a/backend/python/transformers-musicgen/requirements.txt
+++ b/backend/python/transformers-musicgen/requirements.txt
@@ -1,7 +1,4 @@
-accelerate
-transformers
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-torch
 scipy==1.14.0
 certifi
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt
new file mode 100644
index 000000000000..f1e6281bbf2a
--- /dev/null
+++ b/backend/python/transformers/requirements-cpu.txt
@@ -0,0 +1,4 @@
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
new file mode 100644
index 000000000000..0abd72d96503
--- /dev/null
+++ b/backend/python/transformers/requirements-cublas11.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
new file mode 100644
index 000000000000..f1e6281bbf2a
--- /dev/null
+++ b/backend/python/transformers/requirements-cublas12.txt
@@ -0,0 +1,4 @@
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 76018445f448..f6900af129b0 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,2 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+torch
+accelerate
+transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index 8fc18a0ec3d2..5d9efb715dd1 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,3 +2,5 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
+intel-extension-for-transformers
+bitsandbytes
\ No newline at end of file
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 55925b329405..5531ea0ee199 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -1,9 +1,4 @@
-accelerate
-transformers
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
-torch
 certifi
-intel-extension-for-transformers
-bitsandbytes
-setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cpu.txt b/backend/python/vall-e-x/requirements-cpu.txt
new file mode 100644
index 000000000000..3a3304c0b7f9
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cpu.txt
@@ -0,0 +1,3 @@
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt
new file mode 100644
index 000000000000..4e0a151a26c6
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt
new file mode 100644
index 000000000000..3a3304c0b7f9
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -0,0 +1,3 @@
+accelerate
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt
index 7bfc411bd51c..fc43790a2e59 100644
--- a/backend/python/vall-e-x/requirements-hipblas.txt
+++ b/backend/python/vall-e-x/requirements-hipblas.txt
@@ -1,3 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
-torchaudio
\ No newline at end of file
+accelerate
+torch==2.3.0+rocm6.0
+torchaudio==2.3.0+rocm6.0
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt
index 5c4aa6a5ce68..6185314fe232 100644
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+accelerate
 torch
 torchaudio
 optimum[openvino]
-setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
+setuptools==72.1.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
index d1d0583e40c1..920971ce0b1a 100644
--- a/backend/python/vall-e-x/requirements.txt
+++ b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,3 @@
-accelerate
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-after.txt b/backend/python/vllm/requirements-after.txt
new file mode 100644
index 000000000000..76f11f154037
--- /dev/null
+++ b/backend/python/vllm/requirements-after.txt
@@ -0,0 +1 @@
+vllm
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cpu.txt b/backend/python/vllm/requirements-cpu.txt
new file mode 100644
index 000000000000..765a1ef558e6
--- /dev/null
+++ b/backend/python/vllm/requirements-cpu.txt
@@ -0,0 +1,3 @@
+accelerate
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas.txt b/backend/python/vllm/requirements-cublas11-after.txt
similarity index 100%
rename from backend/python/vllm/requirements-cublas.txt
rename to backend/python/vllm/requirements-cublas11-after.txt
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
new file mode 100644
index 000000000000..4381772756dd
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas11.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+accelerate
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12-after.txt b/backend/python/vllm/requirements-cublas12-after.txt
new file mode 100644
index 000000000000..7bfe8efeb555
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas12-after.txt
@@ -0,0 +1 @@
+flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas12.txt
new file mode 100644
index 000000000000..765a1ef558e6
--- /dev/null
+++ b/backend/python/vllm/requirements-cublas12.txt
@@ -0,0 +1,3 @@
+accelerate
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index 76018445f448..c73d8141d3a5 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,2 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch
\ No newline at end of file
+accelerate
+torch
+transformers
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index 635b4c31ee1a..7903282e84a5 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -1,5 +1,7 @@
 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
 intel-extension-for-pytorch
+accelerate
 torch
+transformers
 optimum[openvino]
 setuptools==70.3.0 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
index 7c612a2f87f5..99b8109801ab 100644
--- a/backend/python/vllm/requirements.txt
+++ b/backend/python/vllm/requirements.txt
@@ -1,7 +1,4 @@
-accelerate
-vllm
-grpcio==1.65.1
+grpcio==1.65.5
 protobuf
 certifi
-transformers
 setuptools
\ No newline at end of file
diff --git a/core/cli/cli.go b/core/cli/cli.go
index 0fed33fdf0df..2073778d747f 100644
--- a/core/cli/cli.go
+++ b/core/cli/cli.go
@@ -15,4 +15,5 @@ var CLI struct {
 	Transcript TranscriptCMD `cmd:"" help:"Convert audio to text"`
 	Worker     worker.Worker `cmd:"" help:"Run workers to distribute workload (llama.cpp-only)"`
 	Util       UtilCMD       `cmd:"" help:"Utility commands"`
+	Explorer   ExplorerCMD   `cmd:"" help:"Run p2p explorer"`
 }
diff --git a/core/cli/explorer.go b/core/cli/explorer.go
new file mode 100644
index 000000000000..67d25304165d
--- /dev/null
+++ b/core/cli/explorer.go
@@ -0,0 +1,49 @@
+package cli
+
+import (
+	"context"
+	"time"
+
+	cliContext "github.com/mudler/LocalAI/core/cli/context"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http"
+)
+
+type ExplorerCMD struct {
+	Address                  string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	PoolDatabase             string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
+	ConnectionTimeout        string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
+	ConnectionErrorThreshold int    `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"`
+
+	WithSync bool `env:"LOCALAI_WITH_SYNC,WITH_SYNC" default:"false" help:"Enable sync with the network" group:"api"`
+	OnlySync bool `env:"LOCALAI_ONLY_SYNC,ONLY_SYNC" default:"false" help:"Only sync with the network" group:"api"`
+}
+
+func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
+
+	db, err := explorer.NewDatabase(e.PoolDatabase)
+	if err != nil {
+		return err
+	}
+
+	dur, err := time.ParseDuration(e.ConnectionTimeout)
+	if err != nil {
+		return err
+	}
+
+	if e.WithSync {
+		ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
+		go ds.Start(context.Background(), true)
+	}
+
+	if e.OnlySync {
+		ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
+		ctx := context.Background()
+
+		return ds.Start(ctx, false)
+	}
+
+	appHTTP := http.Explorer(db)
+
+	return appHTTP.Listen(e.Address)
+}
diff --git a/core/cli/federated.go b/core/cli/federated.go
index 32f0fa879555..b917812ce5a8 100644
--- a/core/cli/federated.go
+++ b/core/cli/federated.go
@@ -8,14 +8,16 @@ import (
 )
 
 type FederatedCLI struct {
-	Address        string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
-	Peer2PeerToken string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
-	LoadBalanced   bool   `env:"LOCALAI_LOAD_BALANCED,LOAD_BALANCED" default:"false" help:"Enable load balancing" group:"p2p"`
+	Address            string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
+	Peer2PeerToken     string `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	RandomWorker       bool   `env:"LOCALAI_RANDOM_WORKER,RANDOM_WORKER" default:"false" help:"Select a random worker from the pool" group:"p2p"`
+	Peer2PeerNetworkID string `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances." group:"p2p"`
+	TargetWorker       string `env:"LOCALAI_TARGET_WORKER,TARGET_WORKER" help:"Target worker to run the federated server on" group:"p2p"`
 }
 
 func (f *FederatedCLI) Run(ctx *cliContext.Context) error {
 
-	fs := p2p.NewFederatedServer(f.Address, p2p.FederatedID, f.Peer2PeerToken, f.LoadBalanced)
+	fs := p2p.NewFederatedServer(f.Address, p2p.NetworkID(f.Peer2PeerNetworkID, p2p.FederatedID), f.Peer2PeerToken, !f.RandomWorker, f.TargetWorker)
 
 	return fs.Start(context.Background())
 }
diff --git a/core/cli/models.go b/core/cli/models.go
index 030470185e74..56d13fc7a12f 100644
--- a/core/cli/models.go
+++ b/core/cli/models.go
@@ -83,7 +83,9 @@ func (mi *ModelsInstall) Run(ctx *cliContext.Context) error {
 			return err
 		}
 
-		if !downloader.LooksLikeOCI(modelName) {
+		modelURI := downloader.URI(modelName)
+
+		if !modelURI.LooksLikeOCI() {
 			model := gallery.FindModel(models, modelName, mi.ModelsPath)
 			if model == nil {
 				log.Error().Str("model", modelName).Msg("model not found")
diff --git a/core/cli/run.go b/core/cli/run.go
index b3d9163223a2..c469f05fcfbc 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -54,6 +54,7 @@ type RunCMD struct {
 	OpaqueErrors           bool     `env:"LOCALAI_OPAQUE_ERRORS" default:"false" help:"If true, all error responses are replaced with blank 500 errors. This is intended only for hardening against information leaks and is normally not recommended." group:"hardening"`
 	Peer2Peer              bool     `env:"LOCALAI_P2P,P2P" name:"p2p" default:"false" help:"Enable P2P mode" group:"p2p"`
 	Peer2PeerToken         string   `env:"LOCALAI_P2P_TOKEN,P2P_TOKEN,TOKEN" name:"p2ptoken" help:"Token for P2P mode (optional)" group:"p2p"`
+	Peer2PeerNetworkID     string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 	ParallelRequests       bool     `env:"LOCALAI_PARALLEL_REQUESTS,PARALLEL_REQUESTS" help:"Enable backends to handle multiple requests in parallel if they support it (e.g.: llama.cpp or vllm)" group:"backends"`
 	SingleActiveBackend    bool     `env:"LOCALAI_SINGLE_ACTIVE_BACKEND,SINGLE_ACTIVE_BACKEND" help:"Allow only one backend to be run at a time" group:"backends"`
 	PreloadBackendOnly     bool     `env:"LOCALAI_PRELOAD_BACKEND_ONLY,PRELOAD_BACKEND_ONLY" default:"false" help:"Do not launch the API services, only the preloaded models / backends are started (useful for multi-node setups)" group:"backends"`
@@ -63,6 +64,7 @@ type RunCMD struct {
 	EnableWatchdogBusy     bool     `env:"LOCALAI_WATCHDOG_BUSY,WATCHDOG_BUSY" default:"false" help:"Enable watchdog for stopping backends that are busy longer than the watchdog-busy-timeout" group:"backends"`
 	WatchdogBusyTimeout    string   `env:"LOCALAI_WATCHDOG_BUSY_TIMEOUT,WATCHDOG_BUSY_TIMEOUT" default:"5m" help:"Threshold beyond which a busy backend should be stopped" group:"backends"`
 	Federated              bool     `env:"LOCALAI_FEDERATED,FEDERATED" help:"Enable federated instance" group:"federated"`
+	DisableGalleryEndpoint bool     `env:"LOCALAI_DISABLE_GALLERY_ENDPOINT,DISABLE_GALLERY_ENDPOINT" help:"Disable the gallery endpoints" group:"api"`
 }
 
 func (r *RunCMD) Run(ctx *cliContext.Context) error {
@@ -94,6 +96,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithModelsURL(append(r.Models, r.ModelArgs...)...),
 		config.WithOpaqueErrors(r.OpaqueErrors),
 		config.WithEnforcedPredownloadScans(!r.DisablePredownloadScan),
+		config.WithP2PNetworkID(r.Peer2PeerNetworkID),
 	}
 
 	token := ""
@@ -119,9 +122,9 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		}
 
 		log.Info().Msg("Starting P2P server discovery...")
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, "", func(serviceID string, node p2p.NodeData) {
+		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID), func(serviceID string, node p2p.NodeData) {
 			var tunnelAddresses []string
-			for _, v := range p2p.GetAvailableNodes("") {
+			for _, v := range p2p.GetAvailableNodes(p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID)) {
 				if v.IsOnline() {
 					tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
 				} else {
@@ -132,7 +135,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 
 			os.Setenv("LLAMACPP_GRPC_SERVERS", tunnelEnvVar)
 			log.Debug().Msgf("setting LLAMACPP_GRPC_SERVERS to %s", tunnelEnvVar)
-		}); err != nil {
+		}, true); err != nil {
 			return err
 		}
 	}
@@ -142,14 +145,13 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		if err != nil {
 			return err
 		}
-		if err := p2p.ExposeService(context.Background(), "localhost", port, token, p2p.FederatedID); err != nil {
-			return err
-		}
-		node, err := p2p.NewNode(token)
+		fedCtx := context.Background()
+		node, err := p2p.ExposeService(fedCtx, "localhost", port, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID))
 		if err != nil {
 			return err
 		}
-		if err := p2p.ServiceDiscoverer(context.Background(), node, token, p2p.FederatedID, nil); err != nil {
+
+		if err := p2p.ServiceDiscoverer(fedCtx, node, token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.FederatedID), nil, false); err != nil {
 			return err
 		}
 	}
@@ -161,6 +163,10 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		opts = append(opts, config.DisableWebUI)
 	}
 
+	if r.DisableGalleryEndpoint {
+		opts = append(opts, config.DisableGalleryEndpoint)
+	}
+
 	if idleWatchDog || busyWatchDog {
 		opts = append(opts, config.EnableWatchDog)
 		if idleWatchDog {
diff --git a/core/cli/util.go b/core/cli/util.go
index a7204092bed2..b3e545d869e3 100644
--- a/core/cli/util.go
+++ b/core/cli/util.go
@@ -86,8 +86,8 @@ func (hfscmd *HFScanCMD) Run(ctx *cliContext.Context) error {
 		var errs error = nil
 		for _, uri := range hfscmd.ToScan {
 			log.Info().Str("uri", uri).Msg("scanning specific uri")
-			scanResults, err := downloader.HuggingFaceScan(uri)
-			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+			scanResults, err := downloader.HuggingFaceScan(downloader.URI(uri))
+			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 				log.Error().Err(err).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("! WARNING ! A known-vulnerable model is included in this repo!")
 				errs = errors.Join(errs, err)
 			}
diff --git a/core/cli/worker/worker_llamacpp.go b/core/cli/worker/worker_llamacpp.go
index 5598a4857d90..2baf51ec4c1b 100644
--- a/core/cli/worker/worker_llamacpp.go
+++ b/core/cli/worker/worker_llamacpp.go
@@ -21,7 +21,7 @@ func (r *LLamaCPP) Run(ctx *cliContext.Context) error {
 	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
 	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
 	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 	}
 
 	if len(os.Args) < 4 {
diff --git a/core/cli/worker/worker_p2p.go b/core/cli/worker/worker_p2p.go
index 2eb5cb94bb7c..7c900667abf3 100644
--- a/core/cli/worker/worker_p2p.go
+++ b/core/cli/worker/worker_p2p.go
@@ -19,12 +19,13 @@ import (
 )
 
 type P2P struct {
-	WorkerFlags       `embed:""`
-	Token             string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
-	NoRunner          bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
-	RunnerAddress     string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
-	RunnerPort        string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
-	ExtraLLamaCPPArgs []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
+	WorkerFlags        `embed:""`
+	Token              string   `env:"LOCALAI_TOKEN,LOCALAI_P2P_TOKEN,TOKEN" help:"P2P token to use"`
+	NoRunner           bool     `env:"LOCALAI_NO_RUNNER,NO_RUNNER" help:"Do not start the llama-cpp-rpc-server"`
+	RunnerAddress      string   `env:"LOCALAI_RUNNER_ADDRESS,RUNNER_ADDRESS" help:"Address of the llama-cpp-rpc-server"`
+	RunnerPort         string   `env:"LOCALAI_RUNNER_PORT,RUNNER_PORT" help:"Port of the llama-cpp-rpc-server"`
+	ExtraLLamaCPPArgs  []string `env:"LOCALAI_EXTRA_LLAMA_CPP_ARGS,EXTRA_LLAMA_CPP_ARGS" help:"Extra arguments to pass to llama-cpp-rpc-server"`
+	Peer2PeerNetworkID string   `env:"LOCALAI_P2P_NETWORK_ID,P2P_NETWORK_ID" help:"Network ID for P2P mode, can be set arbitrarly by the user for grouping a set of instances" group:"p2p"`
 }
 
 func (r *P2P) Run(ctx *cliContext.Context) error {
@@ -32,7 +33,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 	err := assets.ExtractFiles(ctx.BackendAssets, r.BackendAssetsPath)
 	log.Debug().Msgf("Extracting backend assets files to %s", r.BackendAssetsPath)
 	if err != nil {
-		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+		log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 	}
 
 	// Check if the token is set
@@ -59,7 +60,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 			p = r.RunnerPort
 		}
 
-		err = p2p.ExposeService(context.Background(), address, p, r.Token, "")
+		_, err = p2p.ExposeService(context.Background(), address, p, r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 		if err != nil {
 			return err
 		}
@@ -99,7 +100,7 @@ func (r *P2P) Run(ctx *cliContext.Context) error {
 		}
 	}()
 
-	err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, "")
+	_, err = p2p.ExposeService(context.Background(), address, fmt.Sprint(port), r.Token, p2p.NetworkID(r.Peer2PeerNetworkID, p2p.WorkerID))
 	if err != nil {
 		return err
 	}
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 7233d1ac0916..947c4f136ba5 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -34,6 +34,7 @@ type ApplicationConfig struct {
 	EnforcePredownloadScans             bool
 	OpaqueErrors                        bool
 	P2PToken                            string
+	P2PNetworkID                        string
 
 	ModelLibraryURL string
 
@@ -56,6 +57,8 @@ type ApplicationConfig struct {
 	ModelsURL []string
 
 	WatchDogBusyTimeout, WatchDogIdleTimeout time.Duration
+
+	DisableGalleryEndpoint bool
 }
 
 type AppOption func(*ApplicationConfig)
@@ -91,6 +94,12 @@ func WithCors(b bool) AppOption {
 	}
 }
 
+func WithP2PNetworkID(s string) AppOption {
+	return func(o *ApplicationConfig) {
+		o.P2PNetworkID = s
+	}
+}
+
 func WithCsrf(b bool) AppOption {
 	return func(o *ApplicationConfig) {
 		o.CSRF = b
@@ -124,6 +133,10 @@ var EnableWatchDogIdleCheck = func(o *ApplicationConfig) {
 	o.WatchDogIdle = true
 }
 
+var DisableGalleryEndpoint = func(o *ApplicationConfig) {
+	o.DisableGalleryEndpoint = true
+}
+
 var EnableWatchDogBusyCheck = func(o *ApplicationConfig) {
 	o.WatchDog = true
 	o.WatchDogBusy = true
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index 561d4c3f2693..ab6a6cc6ea5c 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -8,7 +8,6 @@ import (
 	"github.com/mudler/LocalAI/core/schema"
 	"github.com/mudler/LocalAI/pkg/downloader"
 	"github.com/mudler/LocalAI/pkg/functions"
-	"github.com/mudler/LocalAI/pkg/utils"
 )
 
 const (
@@ -72,9 +71,9 @@ type BackendConfig struct {
 }
 
 type File struct {
-	Filename string `yaml:"filename" json:"filename"`
-	SHA256   string `yaml:"sha256" json:"sha256"`
-	URI      string `yaml:"uri" json:"uri"`
+	Filename string         `yaml:"filename" json:"filename"`
+	SHA256   string         `yaml:"sha256" json:"sha256"`
+	URI      downloader.URI `yaml:"uri" json:"uri"`
 }
 
 type VallE struct {
@@ -213,28 +212,32 @@ func (c *BackendConfig) ShouldCallSpecificFunction() bool {
 // MMProjFileName returns the filename of the MMProj file
 // If the MMProj is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) MMProjFileName() string {
-	modelURL := downloader.ConvertURL(c.MMProj)
-	if downloader.LooksLikeURL(modelURL) {
-		return utils.MD5(modelURL)
+	uri := downloader.URI(c.MMProj)
+	if uri.LooksLikeURL() {
+		f, _ := uri.FilenameFromUrl()
+		return f
 	}
 
 	return c.MMProj
 }
 
 func (c *BackendConfig) IsMMProjURL() bool {
-	return downloader.LooksLikeURL(downloader.ConvertURL(c.MMProj))
+	uri := downloader.URI(c.MMProj)
+	return uri.LooksLikeURL()
 }
 
 func (c *BackendConfig) IsModelURL() bool {
-	return downloader.LooksLikeURL(downloader.ConvertURL(c.Model))
+	uri := downloader.URI(c.Model)
+	return uri.LooksLikeURL()
 }
 
 // ModelFileName returns the filename of the model
 // If the model is a URL, it will return the MD5 of the URL which is the filename
 func (c *BackendConfig) ModelFileName() string {
-	modelURL := downloader.ConvertURL(c.Model)
-	if downloader.LooksLikeURL(modelURL) {
-		return utils.MD5(modelURL)
+	uri := downloader.URI(c.Model)
+	if uri.LooksLikeURL() {
+		f, _ := uri.FilenameFromUrl()
+		return f
 	}
 
 	return c.Model
diff --git a/core/config/backend_config_loader.go b/core/config/backend_config_loader.go
index 283dac52bd7d..45fe259e6417 100644
--- a/core/config/backend_config_loader.go
+++ b/core/config/backend_config_loader.go
@@ -244,7 +244,7 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)
 
-			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
+			if err := file.URI.DownloadFile(filePath, file.SHA256, i, len(config.DownloadFiles), status); err != nil {
 				return err
 			}
 		}
@@ -252,10 +252,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 		// If the model is an URL, expand it, and download the file
 		if config.IsModelURL() {
 			modelFileName := config.ModelFileName()
-			modelURL := downloader.ConvertURL(config.Model)
+			uri := downloader.URI(config.Model)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
@@ -269,10 +269,10 @@ func (bcl *BackendConfigLoader) Preload(modelPath string) error {
 
 		if config.IsMMProjURL() {
 			modelFileName := config.MMProjFileName()
-			modelURL := downloader.ConvertURL(config.MMProj)
+			uri := downloader.URI(config.MMProj)
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, modelFileName)); errors.Is(err, os.ErrNotExist) {
-				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, modelFileName), "", 0, 0, status)
+				err := uri.DownloadFile(filepath.Join(modelPath, modelFileName), "", 0, 0, status)
 				if err != nil {
 					return err
 				}
diff --git a/core/config/guesser.go b/core/config/guesser.go
index 6c6ef4302991..b63dd051a32a 100644
--- a/core/config/guesser.go
+++ b/core/config/guesser.go
@@ -26,15 +26,17 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
+	RepeatPenalty float64
 }
 
 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
+		RepeatPenalty: 1.0,
 		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<|start_of_turn|>model\n",
-			ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
+			Chat:        "{{.Input }}\n<start_of_turn>model\n",
+			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
 			Completion:  "{{.Input}}",
 		},
 	},
@@ -192,6 +194,9 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 		if len(cfg.StopWords) == 0 {
 			cfg.StopWords = settings.StopWords
 		}
+		if cfg.RepeatPenalty == 0.0 {
+			cfg.RepeatPenalty = settings.RepeatPenalty
+		}
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
@@ -219,7 +224,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(f.Model().Name, "gemma")
+	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
 	deepseek2 := arch == "deepseek2"
 
 	switch {
diff --git a/core/dependencies_manager/manager.go b/core/dependencies_manager/manager.go
index b86139e0f749..8434f721071c 100644
--- a/core/dependencies_manager/manager.go
+++ b/core/dependencies_manager/manager.go
@@ -37,7 +37,8 @@ func main() {
 
 	// download the assets
 	for _, asset := range assets {
-		if err := downloader.DownloadFile(asset.URL, filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
+		uri := downloader.URI(asset.URL)
+		if err := uri.DownloadFile(filepath.Join(destPath, asset.FileName), asset.SHA, 1, 1, utils.DisplayDownloadFunction); err != nil {
 			panic(err)
 		}
 	}
diff --git a/core/explorer/database.go b/core/explorer/database.go
new file mode 100644
index 000000000000..e24de0aad26b
--- /dev/null
+++ b/core/explorer/database.go
@@ -0,0 +1,125 @@
+package explorer
+
+// A simple JSON database for storing and retrieving p2p network tokens and a name and description.
+
+import (
+	"encoding/json"
+	"os"
+	"sort"
+	"sync"
+
+	"github.com/gofrs/flock"
+)
+
+// Database is a simple JSON database for storing and retrieving p2p network tokens and a name and description.
+type Database struct {
+	path  string
+	data  map[string]TokenData
+	flock *flock.Flock
+	sync.Mutex
+}
+
+// TokenData is a p2p network token with a name and description.
+type TokenData struct {
+	Name        string `json:"name"`
+	Description string `json:"description"`
+	Clusters    []ClusterData
+	Failures    int
+}
+
+type ClusterData struct {
+	Workers   []string
+	Type      string
+	NetworkID string
+}
+
+// NewDatabase creates a new Database with the given path.
+func NewDatabase(path string) (*Database, error) {
+	fileLock := flock.New(path + ".lock")
+	db := &Database{
+		data:  make(map[string]TokenData),
+		path:  path,
+		flock: fileLock,
+	}
+	return db, db.load()
+}
+
+// Get retrieves a Token from the Database by its token.
+func (db *Database) Get(token string) (TokenData, bool) {
+	db.flock.Lock() // we are making sure that the file is not being written to
+	defer db.flock.Unlock()
+	db.Lock() // we are making sure that is safe if called by another instance in the same process
+	defer db.Unlock()
+	db.load()
+	t, ok := db.data[token]
+	return t, ok
+}
+
+// Set stores a Token in the Database by its token.
+func (db *Database) Set(token string, t TokenData) error {
+	db.flock.Lock()
+	defer db.flock.Unlock()
+	db.Lock()
+	defer db.Unlock()
+	db.load()
+	db.data[token] = t
+
+	return db.save()
+}
+
+// Delete removes a Token from the Database by its token.
+func (db *Database) Delete(token string) error {
+	db.flock.Lock()
+	defer db.flock.Unlock()
+	db.Lock()
+	defer db.Unlock()
+	db.load()
+	delete(db.data, token)
+	return db.save()
+}
+
+func (db *Database) TokenList() []string {
+	db.flock.Lock()
+	defer db.flock.Unlock()
+	db.Lock()
+	defer db.Unlock()
+	db.load()
+	tokens := []string{}
+	for k := range db.data {
+		tokens = append(tokens, k)
+	}
+
+	sort.Slice(tokens, func(i, j int) bool {
+		// sort by token
+		return tokens[i] < tokens[j]
+	})
+
+	return tokens
+}
+
+// load reads the Database from disk.
+func (db *Database) load() error {
+	if _, err := os.Stat(db.path); os.IsNotExist(err) {
+		return nil
+	}
+
+	// Read the file from disk
+	// Unmarshal the JSON into db.data
+	f, err := os.ReadFile(db.path)
+	if err != nil {
+		return err
+	}
+	return json.Unmarshal(f, &db.data)
+}
+
+// Save writes the Database to disk.
+func (db *Database) save() error {
+	// Marshal db.data into JSON
+	// Write the JSON to the file
+	f, err := os.Create(db.path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return json.NewEncoder(f).Encode(db.data)
+}
diff --git a/core/explorer/database_test.go b/core/explorer/database_test.go
new file mode 100644
index 000000000000..7f2cbd268a36
--- /dev/null
+++ b/core/explorer/database_test.go
@@ -0,0 +1,92 @@
+package explorer_test
+
+import (
+	"os"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/mudler/LocalAI/core/explorer"
+)
+
+var _ = Describe("Database", func() {
+	var (
+		dbPath string
+		db     *explorer.Database
+		err    error
+	)
+
+	BeforeEach(func() {
+		// Create a temporary file path for the database
+		dbPath = "test_db.json"
+		db, err = explorer.NewDatabase(dbPath)
+		Expect(err).To(BeNil())
+	})
+
+	AfterEach(func() {
+		// Clean up the temporary database file
+		os.Remove(dbPath)
+	})
+
+	Context("when managing tokens", func() {
+		It("should add and retrieve a token", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			retrievedToken, exists := db.Get(token)
+			Expect(exists).To(BeTrue())
+			Expect(retrievedToken).To(Equal(t))
+		})
+
+		It("should delete a token", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			err = db.Delete(token)
+			Expect(err).To(BeNil())
+
+			_, exists := db.Get(token)
+			Expect(exists).To(BeFalse())
+		})
+
+		It("should persist data to disk", func() {
+			token := "token123"
+			t := explorer.TokenData{Name: "TokenName", Description: "A test token"}
+
+			err = db.Set(token, t)
+			Expect(err).To(BeNil())
+
+			// Recreate the database object to simulate reloading from disk
+			db, err = explorer.NewDatabase(dbPath)
+			Expect(err).To(BeNil())
+
+			retrievedToken, exists := db.Get(token)
+			Expect(exists).To(BeTrue())
+			Expect(retrievedToken).To(Equal(t))
+
+			// Check the token list
+			tokenList := db.TokenList()
+			Expect(tokenList).To(ContainElement(token))
+		})
+	})
+
+	Context("when loading an empty or non-existent file", func() {
+		It("should start with an empty database", func() {
+			dbPath = "empty_db.json"
+			db, err = explorer.NewDatabase(dbPath)
+			Expect(err).To(BeNil())
+
+			_, exists := db.Get("nonexistent")
+			Expect(exists).To(BeFalse())
+
+			// Clean up
+			os.Remove(dbPath)
+		})
+	})
+})
diff --git a/core/explorer/discovery.go b/core/explorer/discovery.go
new file mode 100644
index 000000000000..fe6470cb825d
--- /dev/null
+++ b/core/explorer/discovery.go
@@ -0,0 +1,213 @@
+package explorer
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog/log"
+
+	"github.com/mudler/LocalAI/core/p2p"
+	"github.com/mudler/edgevpn/pkg/blockchain"
+)
+
+type DiscoveryServer struct {
+	sync.Mutex
+	database       *Database
+	connectionTime time.Duration
+	errorThreshold int
+}
+
+// NewDiscoveryServer creates a new DiscoveryServer with the given Database.
+// it keeps the db state in sync with the network state
+func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer {
+	if dur == 0 {
+		dur = 50 * time.Second
+	}
+	if failureThreshold == 0 {
+		failureThreshold = 3
+	}
+	return &DiscoveryServer{
+		database:       db,
+		connectionTime: dur,
+		errorThreshold: failureThreshold,
+	}
+}
+
+type Network struct {
+	Clusters []ClusterData
+}
+
+func (s *DiscoveryServer) runBackground() {
+	if len(s.database.TokenList()) == 0 {
+		time.Sleep(5 * time.Second) // avoid busy loop
+		return
+	}
+
+	for _, token := range s.database.TokenList() {
+		c, cancel := context.WithTimeout(context.Background(), s.connectionTime)
+		defer cancel()
+
+		// Connect to the network
+		// Get the number of nodes
+		// save it in the current state (mutex)
+		// do not do in parallel
+		n, err := p2p.NewNode(token)
+		if err != nil {
+			log.Err(err).Msg("Failed to create node")
+			s.failedToken(token)
+			continue
+		}
+
+		err = n.Start(c)
+		if err != nil {
+			log.Err(err).Msg("Failed to start node")
+			s.failedToken(token)
+			continue
+		}
+
+		ledger, err := n.Ledger()
+		if err != nil {
+			log.Err(err).Msg("Failed to start ledger")
+			s.failedToken(token)
+			continue
+		}
+
+		networkData := make(chan ClusterData)
+
+		// get the network data - it takes the whole timeout
+		// as we might not be connected to the network yet,
+		// and few attempts would have to be made before bailing out
+		go s.retrieveNetworkData(c, ledger, networkData)
+
+		hasWorkers := false
+		ledgerK := []ClusterData{}
+		for key := range networkData {
+			ledgerK = append(ledgerK, key)
+			if len(key.Workers) > 0 {
+				hasWorkers = true
+			}
+		}
+
+		log.Debug().Any("network", token).Msgf("Network has %d clusters", len(ledgerK))
+		if len(ledgerK) != 0 {
+			for _, k := range ledgerK {
+				log.Debug().Any("network", token).Msgf("Clusterdata %+v", k)
+			}
+		}
+
+		if hasWorkers {
+			s.Lock()
+			data, _ := s.database.Get(token)
+			(&data).Clusters = ledgerK
+			(&data).Failures = 0
+			s.database.Set(token, data)
+			s.Unlock()
+		} else {
+			s.failedToken(token)
+		}
+	}
+
+	s.deleteFailedConnections()
+}
+
+func (s *DiscoveryServer) failedToken(token string) {
+	s.Lock()
+	defer s.Unlock()
+	data, _ := s.database.Get(token)
+	(&data).Failures++
+	s.database.Set(token, data)
+}
+
+func (s *DiscoveryServer) deleteFailedConnections() {
+	s.Lock()
+	defer s.Unlock()
+	for _, t := range s.database.TokenList() {
+		data, _ := s.database.Get(t)
+		if data.Failures > s.errorThreshold {
+			log.Info().Any("token", t).Msg("Token has been removed from the database")
+			s.database.Delete(t)
+		}
+	}
+}
+
+func (s *DiscoveryServer) retrieveNetworkData(c context.Context, ledger *blockchain.Ledger, networkData chan ClusterData) {
+	clusters := map[string]ClusterData{}
+
+	defer func() {
+		for _, n := range clusters {
+			networkData <- n
+		}
+		close(networkData)
+	}()
+
+	for {
+		select {
+		case <-c.Done():
+			return
+		default:
+			time.Sleep(5 * time.Second)
+
+			data := ledger.LastBlock().Storage
+		LEDGER:
+			for d := range data {
+				toScanForWorkers := false
+				cd := ClusterData{}
+				isWorkerCluster := d == p2p.WorkerID || (strings.Contains(d, "_") && strings.Contains(d, p2p.WorkerID))
+				isFederatedCluster := d == p2p.FederatedID || (strings.Contains(d, "_") && strings.Contains(d, p2p.FederatedID))
+				switch {
+				case isWorkerCluster:
+					toScanForWorkers = true
+					cd.Type = "worker"
+				case isFederatedCluster:
+					toScanForWorkers = true
+					cd.Type = "federated"
+				}
+
+				if strings.Contains(d, "_") {
+					cd.NetworkID = strings.Split(d, "_")[0]
+				}
+
+				if !toScanForWorkers {
+					continue LEDGER
+				}
+
+				atLeastOneWorker := false
+			DATA:
+				for _, v := range data[d] {
+					nd := &p2p.NodeData{}
+					if err := v.Unmarshal(nd); err != nil {
+						continue DATA
+					}
+
+					if nd.IsOnline() {
+						atLeastOneWorker = true
+						(&cd).Workers = append(cd.Workers, nd.ID)
+					}
+				}
+
+				if atLeastOneWorker {
+					clusters[d] = cd
+				}
+			}
+		}
+	}
+}
+
+// Start the discovery server. This is meant to be run in to a goroutine.
+func (s *DiscoveryServer) Start(ctx context.Context, keepRunning bool) error {
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("context cancelled")
+		default:
+			// Collect data
+			s.runBackground()
+			if !keepRunning {
+				return nil
+			}
+		}
+	}
+}
diff --git a/core/explorer/explorer_suite_test.go b/core/explorer/explorer_suite_test.go
new file mode 100644
index 000000000000..fc718d5f8dfa
--- /dev/null
+++ b/core/explorer/explorer_suite_test.go
@@ -0,0 +1,13 @@
+package explorer_test
+
+import (
+	"testing"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestExplorer(t *testing.T) {
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "Explorer test suite")
+}
diff --git a/core/gallery/gallery.go b/core/gallery/gallery.go
index d102eac8a7dd..6ced6244128f 100644
--- a/core/gallery/gallery.go
+++ b/core/gallery/gallery.go
@@ -131,7 +131,8 @@ func AvailableGalleryModels(galleries []config.Gallery, basePath string) ([]*Gal
 
 func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -153,8 +154,9 @@ func getGalleryModels(gallery config.Gallery, basePath string) ([]*GalleryModel,
 			return models, err
 		}
 	}
+	uri := downloader.URI(gallery.URL)
 
-	err := downloader.DownloadAndUnmarshal(gallery.URL, basePath, func(url string, d []byte) error {
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
@@ -204,34 +206,33 @@ func DeleteModelFromSystem(basePath string, name string, additionalFiles []strin
 		log.Error().Err(err).Msgf("failed to read gallery file %s", configFile)
 	}
 
+	var filesToRemove []string
+
 	// Remove additional files
 	if galleryconfig != nil {
 		for _, f := range galleryconfig.Files {
 			fullPath := filepath.Join(basePath, f.Filename)
-			log.Debug().Msgf("Removing file %s", fullPath)
-			if e := os.Remove(fullPath); e != nil {
-				err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f.Filename, e))
-			}
+			filesToRemove = append(filesToRemove, fullPath)
 		}
 	}
 
 	for _, f := range additionalFiles {
 		fullPath := filepath.Join(filepath.Join(basePath, f))
-		log.Debug().Msgf("Removing additional file %s", fullPath)
-		if e := os.Remove(fullPath); e != nil {
-			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
-		}
+		filesToRemove = append(filesToRemove, fullPath)
 	}
 
-	log.Debug().Msgf("Removing model config file %s", configFile)
+	filesToRemove = append(filesToRemove, configFile)
+	filesToRemove = append(filesToRemove, galleryFile)
 
-	// Delete the model config file
-	if e := os.Remove(configFile); e != nil {
-		err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", configFile, e))
-	}
+	// skip duplicates
+	filesToRemove = utils.Unique(filesToRemove)
 
-	// Delete gallery config file
-	os.Remove(galleryFile)
+	// Removing files
+	for _, f := range filesToRemove {
+		if e := os.Remove(f); e != nil {
+			err = errors.Join(err, fmt.Errorf("failed to remove file %s: %w", f, e))
+		}
+	}
 
 	return err
 }
@@ -253,8 +254,8 @@ func SafetyScanGalleryModels(galleries []config.Gallery, basePath string) error
 
 func SafetyScanGalleryModel(galleryModel *GalleryModel) error {
 	for _, file := range galleryModel.AdditionalFiles {
-		scanResults, err := downloader.HuggingFaceScan(file.URI)
-		if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+		scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
+		if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 			log.Error().Str("model", galleryModel.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 			return err
 		}
diff --git a/core/gallery/models.go b/core/gallery/models.go
index 32460a9cb76d..dec6312eaf30 100644
--- a/core/gallery/models.go
+++ b/core/gallery/models.go
@@ -68,7 +68,8 @@ type PromptTemplate struct {
 
 func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(url string, d []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
@@ -118,14 +119,14 @@ func InstallModel(basePath, nameOverride string, config *Config, configOverrides
 		filePath := filepath.Join(basePath, file.Filename)
 
 		if enforceScan {
-			scanResults, err := downloader.HuggingFaceScan(file.URI)
-			if err != nil && !errors.Is(err, downloader.ErrNonHuggingFaceFile) {
+			scanResults, err := downloader.HuggingFaceScan(downloader.URI(file.URI))
+			if err != nil && errors.Is(err, downloader.ErrUnsafeFilesFound) {
 				log.Error().Str("model", config.Name).Strs("clamAV", scanResults.ClamAVInfectedFiles).Strs("pickles", scanResults.DangerousPickles).Msg("Contains unsafe file(s)!")
 				return err
 			}
 		}
-
-		if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
+		uri := downloader.URI(file.URI)
+		if err := uri.DownloadFile(filePath, file.SHA256, i, len(config.Files), downloadStatus); err != nil {
 			return err
 		}
 	}
diff --git a/core/http/app_test.go b/core/http/app_test.go
index 3fb1658159d5..a837e20c01a4 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -73,8 +73,9 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }
 
 func getModels(url string) (response []gallery.GalleryModel) {
+	uri := downloader.URI(url)
 	// TODO: No tests currently seem to exercise file:// urls. Fix?
-	downloader.DownloadAndUnmarshal(url, "", func(url string, i []byte) error {
+	uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
@@ -562,32 +563,6 @@ var _ = Describe("API test", func() {
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
 			})
-
-			It("runs gpt4all", Label("gpt4all"), func() {
-				if runtime.GOOS != "linux" {
-					Skip("test supported only on linux")
-				}
-
-				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "github:go-skynet/model-gallery/gpt4all-j.yaml",
-					Name: "gpt4all-j",
-				})
-
-				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
-
-				uuid := response["uuid"].(string)
-
-				Eventually(func() bool {
-					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
-					return response["processed"].(bool)
-				}, "960s", "10s").Should(Equal(true))
-
-				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
-				Expect(err).ToNot(HaveOccurred())
-				Expect(len(resp.Choices)).To(Equal(1))
-				Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
-			})
-
 		})
 	})
 
@@ -791,20 +766,6 @@ var _ = Describe("API test", func() {
 			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
 		})
 
-		It("can generate completions from model configs", func() {
-			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "gpt4all", Prompt: testPrompt})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Text).ToNot(BeEmpty())
-		})
-
-		It("can generate chat completions from model configs", func() {
-			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-2", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: testPrompt}}})
-			Expect(err).ToNot(HaveOccurred())
-			Expect(len(resp.Choices)).To(Equal(1))
-			Expect(resp.Choices[0].Message.Content).ToNot(BeEmpty())
-		})
-
 		It("returns errors", func() {
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
 			Expect(err).To(HaveOccurred())
diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 3b3741d8f210..91a12310cf75 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -9,7 +9,6 @@ import (
 	"github.com/mudler/LocalAI/core/gallery"
 	"github.com/mudler/LocalAI/core/p2p"
 	"github.com/mudler/LocalAI/core/services"
-	"github.com/mudler/LocalAI/pkg/xsync"
 )
 
 const (
@@ -372,7 +371,12 @@ func dropBadChars(s string) string {
 	return strings.ReplaceAll(s, "@", "__")
 }
 
-func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[string, string], galleryService *services.GalleryService) string {
+type ProcessTracker interface {
+	Exists(string) bool
+	Get(string) string
+}
+
+func ListModels(models []*gallery.GalleryModel, processTracker ProcessTracker, galleryService *services.GalleryService) string {
 	modelsElements := []elem.Node{}
 	descriptionDiv := func(m *gallery.GalleryModel) elem.Node {
 		return elem.Div(
@@ -396,7 +400,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 
 	actionDiv := func(m *gallery.GalleryModel) elem.Node {
 		galleryID := fmt.Sprintf("%s@%s", m.Gallery.Name, m.Name)
-		currentlyProcessing := processing.Exists(galleryID)
+		currentlyProcessing := processTracker.Exists(galleryID)
 		jobID := ""
 		isDeletionOp := false
 		if currentlyProcessing {
@@ -404,7 +408,7 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 			if status != nil && status.Deletion {
 				isDeletionOp = true
 			}
-			jobID = processing.Get(galleryID)
+			jobID = processTracker.Get(galleryID)
 			// TODO:
 			// case not handled, if status == nil : "Waiting"
 		}
@@ -497,8 +501,9 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 				},
 					elem.Img(attrs.Props{
 						//	"class": "rounded-t-lg object-fit object-center h-96",
-						"class": "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
-						"src":   m.Icon,
+						"class":   "rounded-t-lg max-h-48 max-w-96 object-cover mt-3",
+						"src":     m.Icon,
+						"loading": "lazy",
 					}),
 				),
 			),
diff --git a/core/http/endpoints/explorer/dashboard.go b/core/http/endpoints/explorer/dashboard.go
new file mode 100644
index 000000000000..9c731d9a4f78
--- /dev/null
+++ b/core/http/endpoints/explorer/dashboard.go
@@ -0,0 +1,102 @@
+package explorer
+
+import (
+	"encoding/base64"
+	"sort"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/internal"
+)
+
+func Dashboard() func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		summary := fiber.Map{
+			"Title":   "LocalAI API - " + internal.PrintableVersion(),
+			"Version": internal.PrintableVersion(),
+		}
+
+		if string(c.Context().Request.Header.ContentType()) == "application/json" || len(c.Accepts("html")) == 0 {
+			// The client expects a JSON response
+			return c.Status(fiber.StatusOK).JSON(summary)
+		} else {
+			// Render index
+			return c.Render("views/explorer", summary)
+		}
+	}
+}
+
+type AddNetworkRequest struct {
+	Token       string `json:"token"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+type Network struct {
+	explorer.TokenData
+	Token string `json:"token"`
+}
+
+func ShowNetworks(db *explorer.Database) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		results := []Network{}
+		for _, token := range db.TokenList() {
+			networkData, exists := db.Get(token) // get the token data
+			hasWorkers := false
+			for _, cluster := range networkData.Clusters {
+				if len(cluster.Workers) > 0 {
+					hasWorkers = true
+					break
+				}
+			}
+			if exists && hasWorkers {
+				results = append(results, Network{TokenData: networkData, Token: token})
+			}
+		}
+
+		// order by number of clusters
+		sort.Slice(results, func(i, j int) bool {
+			return len(results[i].Clusters) > len(results[j].Clusters)
+		})
+
+		return c.JSON(results)
+	}
+}
+
+func AddNetwork(db *explorer.Database) func(*fiber.Ctx) error {
+	return func(c *fiber.Ctx) error {
+		request := new(AddNetworkRequest)
+		if err := c.BodyParser(request); err != nil {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Cannot parse JSON"})
+		}
+
+		if request.Token == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token is required"})
+		}
+
+		if request.Name == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Name is required"})
+		}
+
+		if request.Description == "" {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Description is required"})
+		}
+
+		// TODO: check if token is valid, otherwise reject
+		// try to decode the token from base64
+		_, err := base64.StdEncoding.DecodeString(request.Token)
+		if err != nil {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Invalid token"})
+		}
+
+		if _, exists := db.Get(request.Token); exists {
+			return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{"error": "Token already exists"})
+		}
+		err = db.Set(request.Token, explorer.TokenData{Name: request.Name, Description: request.Description})
+		if err != nil {
+			return c.Status(fiber.StatusInternalServerError).JSON(fiber.Map{"error": "Cannot add token"})
+		}
+
+		return c.Status(fiber.StatusOK).JSON(fiber.Map{"message": "Token added"})
+	}
+}
diff --git a/core/http/endpoints/localai/p2p.go b/core/http/endpoints/localai/p2p.go
index cab0bb5daf59..bbcee8c801e1 100644
--- a/core/http/endpoints/localai/p2p.go
+++ b/core/http/endpoints/localai/p2p.go
@@ -11,12 +11,14 @@ import (
 // @Summary Returns available P2P nodes
 // @Success 200 {object} []schema.P2PNodesResponse "Response"
 // @Router /api/p2p [get]
-func ShowP2PNodes(c *fiber.Ctx) error {
+func ShowP2PNodes(appConfig *config.ApplicationConfig) func(*fiber.Ctx) error {
 	// Render index
-	return c.JSON(schema.P2PNodesResponse{
-		Nodes:          p2p.GetAvailableNodes(""),
-		FederatedNodes: p2p.GetAvailableNodes(p2p.FederatedID),
-	})
+	return func(c *fiber.Ctx) error {
+		return c.JSON(schema.P2PNodesResponse{
+			Nodes:          p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID)),
+			FederatedNodes: p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID)),
+		})
+	}
 }
 
 // ShowP2PToken returns the P2P token
diff --git a/core/http/endpoints/localai/welcome.go b/core/http/endpoints/localai/welcome.go
index 5d217173c3e5..396c4084b5e2 100644
--- a/core/http/endpoints/localai/welcome.go
+++ b/core/http/endpoints/localai/welcome.go
@@ -17,7 +17,10 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		backendConfigs := cl.GetAllBackendConfigs()
 
 		galleryConfigs := map[string]*gallery.Config{}
+		modelsWithBackendConfig := map[string]interface{}{}
+
 		for _, m := range backendConfigs {
+			modelsWithBackendConfig[m.Name] = nil
 
 			cfg, err := gallery.GetLocalModelConfiguration(ml.ModelPath, m.Name)
 			if err != nil {
@@ -32,7 +35,7 @@ func WelcomeEndpoint(appConfig *config.ApplicationConfig,
 		modelsWithoutConfig := []string{}
 
 		for _, m := range models {
-			if _, ok := galleryConfigs[m]; !ok {
+			if _, ok := modelsWithBackendConfig[m]; !ok {
 				modelsWithoutConfig = append(modelsWithoutConfig, m)
 			}
 		}
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 86b75601bc45..12a14eace4fb 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -172,6 +172,14 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 		funcs := input.Functions
 		shouldUseFn := len(input.Functions) > 0 && config.ShouldUseFunctions()
+		strictMode := false
+
+		for _, f := range input.Functions {
+			if f.Strict {
+				strictMode = true
+				break
+			}
+		}
 
 		// Allow the user to set custom actions via config file
 		// to be "embedded" in each model
@@ -187,10 +195,33 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 		if config.ResponseFormatMap != nil {
 			d := schema.ChatCompletionResponseFormat{}
-			dat, _ := json.Marshal(config.ResponseFormatMap)
-			_ = json.Unmarshal(dat, &d)
+			dat, err := json.Marshal(config.ResponseFormatMap)
+			if err != nil {
+				return err
+			}
+			err = json.Unmarshal(dat, &d)
+			if err != nil {
+				return err
+			}
 			if d.Type == "json_object" {
 				input.Grammar = functions.JSONBNF
+			} else if d.Type == "json_schema" {
+				d := schema.JsonSchemaRequest{}
+				dat, err := json.Marshal(config.ResponseFormatMap)
+				if err != nil {
+					return err
+				}
+				err = json.Unmarshal(dat, &d)
+				if err != nil {
+					return err
+				}
+				fs := &functions.JSONFunctionStructure{
+					AnyOf: []functions.Item{d.JsonSchema.Schema},
+				}
+				g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)
+				if err == nil {
+					input.Grammar = g
+				}
 			}
 		}
 
@@ -201,7 +232,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 		}
 
 		switch {
-		case !config.FunctionsConfig.GrammarConfig.NoGrammar && shouldUseFn:
+		case (!config.FunctionsConfig.GrammarConfig.NoGrammar || strictMode) && shouldUseFn:
 			noActionGrammar := functions.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
diff --git a/core/http/explorer.go b/core/http/explorer.go
new file mode 100644
index 000000000000..bdcb93b16d55
--- /dev/null
+++ b/core/http/explorer.go
@@ -0,0 +1,46 @@
+package http
+
+import (
+	"net/http"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/gofiber/fiber/v2/middleware/favicon"
+	"github.com/gofiber/fiber/v2/middleware/filesystem"
+	"github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/routes"
+)
+
+func Explorer(db *explorer.Database) *fiber.App {
+
+	fiberCfg := fiber.Config{
+		Views: renderEngine(),
+		// We disable the Fiber startup message as it does not conform to structured logging.
+		// We register a startup log line with connection information in the OnListen hook to keep things user friendly though
+		DisableStartupMessage: false,
+		// Override default error handler
+	}
+
+	app := fiber.New(fiberCfg)
+
+	routes.RegisterExplorerRoutes(app, db)
+
+	httpFS := http.FS(embedDirStatic)
+
+	app.Use(favicon.New(favicon.Config{
+		URL:        "/favicon.ico",
+		FileSystem: httpFS,
+		File:       "static/favicon.ico",
+	}))
+
+	app.Use("/static", filesystem.New(filesystem.Config{
+		Root:       httpFS,
+		PathPrefix: "static",
+		Browse:     true,
+	}))
+
+	// Define a custom 404 handler
+	// Note: keep this at the bottom!
+	app.Use(notFoundHandler)
+
+	return app
+}
diff --git a/core/http/routes/explorer.go b/core/http/routes/explorer.go
new file mode 100644
index 000000000000..960b476b8ffc
--- /dev/null
+++ b/core/http/routes/explorer.go
@@ -0,0 +1,13 @@
+package routes
+
+import (
+	"github.com/gofiber/fiber/v2"
+	coreExplorer "github.com/mudler/LocalAI/core/explorer"
+	"github.com/mudler/LocalAI/core/http/endpoints/explorer"
+)
+
+func RegisterExplorerRoutes(app *fiber.App, db *coreExplorer.Database) {
+	app.Get("/", explorer.Dashboard())
+	app.Post("/network/add", explorer.AddNetwork(db))
+	app.Get("/networks", explorer.ShowNetworks(db))
+}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index b8a811b5faf0..105991e85904 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -21,17 +21,18 @@ func RegisterLocalAIRoutes(app *fiber.App,
 	app.Get("/swagger/*", swagger.HandlerDefault) // default
 
 	// LocalAI API endpoints
+	if !appConfig.DisableGalleryEndpoint {
+		modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
+		app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
+		app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
 
-	modelGalleryEndpointService := localai.CreateModelGalleryEndpointService(appConfig.Galleries, appConfig.ModelPath, galleryService)
-	app.Post("/models/apply", auth, modelGalleryEndpointService.ApplyModelGalleryEndpoint())
-	app.Post("/models/delete/:name", auth, modelGalleryEndpointService.DeleteModelGalleryEndpoint())
-
-	app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
-	app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
-	app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
-	app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
-	app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
-	app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+		app.Get("/models/available", auth, modelGalleryEndpointService.ListModelFromGalleryEndpoint())
+		app.Get("/models/galleries", auth, modelGalleryEndpointService.ListModelGalleriesEndpoint())
+		app.Post("/models/galleries", auth, modelGalleryEndpointService.AddModelGalleryEndpoint())
+		app.Delete("/models/galleries", auth, modelGalleryEndpointService.RemoveModelGalleryEndpoint())
+		app.Get("/models/jobs/:uuid", auth, modelGalleryEndpointService.GetOpStatusEndpoint())
+		app.Get("/models/jobs", auth, modelGalleryEndpointService.GetAllStatusEndpoint())
+	}
 
 	app.Post("/tts", auth, localai.TTSEndpoint(cl, ml, appConfig))
 
@@ -59,7 +60,7 @@ func RegisterLocalAIRoutes(app *fiber.App,
 
 	// p2p
 	if p2p.IsP2PEnabled() {
-		app.Get("/api/p2p", auth, localai.ShowP2PNodes)
+		app.Get("/api/p2p", auth, localai.ShowP2PNodes(appConfig))
 		app.Get("/api/p2p/token", auth, localai.ShowP2PToken(appConfig))
 	}
 
diff --git a/core/http/routes/ui.go b/core/http/routes/ui.go
index 33706944fa2e..6dfb3f433df2 100644
--- a/core/http/routes/ui.go
+++ b/core/http/routes/ui.go
@@ -21,6 +21,40 @@ import (
 	"github.com/google/uuid"
 )
 
+type modelOpCache struct {
+	status *xsync.SyncedMap[string, string]
+}
+
+func NewModelOpCache() *modelOpCache {
+	return &modelOpCache{
+		status: xsync.NewSyncedMap[string, string](),
+	}
+}
+
+func (m *modelOpCache) Set(key string, value string) {
+	m.status.Set(key, value)
+}
+
+func (m *modelOpCache) Get(key string) string {
+	return m.status.Get(key)
+}
+
+func (m *modelOpCache) DeleteUUID(uuid string) {
+	for _, k := range m.status.Keys() {
+		if m.status.Get(k) == uuid {
+			m.status.Delete(k)
+		}
+	}
+}
+
+func (m *modelOpCache) Map() map[string]string {
+	return m.status.Map()
+}
+
+func (m *modelOpCache) Exists(key string) bool {
+	return m.status.Exists(key)
+}
+
 func RegisterUIRoutes(app *fiber.App,
 	cl *config.BackendConfigLoader,
 	ml *model.ModelLoader,
@@ -29,7 +63,7 @@ func RegisterUIRoutes(app *fiber.App,
 	auth func(*fiber.Ctx) error) {
 
 	// keeps the state of models that are being installed from the UI
-	var processingModels = xsync.NewSyncedMap[string, string]()
+	var processingModels = NewModelOpCache()
 
 	// modelStatus returns the current status of the models being processed (installation or deletion)
 	// it is called asynchonously from the UI
@@ -62,6 +96,7 @@ func RegisterUIRoutes(app *fiber.App,
 				//"FederatedNodes": p2p.GetAvailableNodes(p2p.FederatedID),
 				"IsP2PEnabled": p2p.IsP2PEnabled(),
 				"P2PToken":     appConfig.P2PToken,
+				"NetworkID":    appConfig.P2PNetworkID,
 			}
 
 			// Render index
@@ -70,202 +105,202 @@ func RegisterUIRoutes(app *fiber.App,
 
 		/* show nodes live! */
 		app.Get("/p2p/ui/workers", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes("")))
+			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
 		app.Get("/p2p/ui/workers-federation", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.FederatedID)))
+			return c.SendString(elements.P2PNodeBoxes(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 
 		app.Get("/p2p/ui/workers-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes("")))
+			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.WorkerID))))
 		})
 		app.Get("/p2p/ui/workers-federation-stats", auth, func(c *fiber.Ctx) error {
-			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.FederatedID)))
+			return c.SendString(elements.P2PNodeStats(p2p.GetAvailableNodes(p2p.NetworkID(appConfig.P2PNetworkID, p2p.FederatedID))))
 		})
 	}
 
-	// Show the Models page (all models)
-	app.Get("/browse", auth, func(c *fiber.Ctx) error {
-		term := c.Query("term")
+	if !appConfig.DisableGalleryEndpoint {
+
+		// Show the Models page (all models)
+		app.Get("/browse", auth, func(c *fiber.Ctx) error {
+			term := c.Query("term")
 
-		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
 
-		// Get all available tags
-		allTags := map[string]struct{}{}
-		tags := []string{}
-		for _, m := range models {
-			for _, t := range m.Tags {
-				allTags[t] = struct{}{}
+			// Get all available tags
+			allTags := map[string]struct{}{}
+			tags := []string{}
+			for _, m := range models {
+				for _, t := range m.Tags {
+					allTags[t] = struct{}{}
+				}
 			}
-		}
-		for t := range allTags {
-			tags = append(tags, t)
-		}
-		sort.Strings(tags)
+			for t := range allTags {
+				tags = append(tags, t)
+			}
+			sort.Strings(tags)
 
-		if term != "" {
-			models = gallery.GalleryModels(models).Search(term)
-		}
+			if term != "" {
+				models = gallery.GalleryModels(models).Search(term)
+			}
 
-		// Get model statuses
-		processingModelsData, taskTypes := modelStatus()
+			// Get model statuses
+			processingModelsData, taskTypes := modelStatus()
 
-		summary := fiber.Map{
-			"Title":            "LocalAI - Models",
-			"Version":          internal.PrintableVersion(),
-			"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
-			"Repositories":     appConfig.Galleries,
-			"AllTags":          tags,
-			"ProcessingModels": processingModelsData,
-			"AvailableModels":  len(models),
-			"IsP2PEnabled":     p2p.IsP2PEnabled(),
-
-			"TaskTypes": taskTypes,
-			//	"ApplicationConfig": appConfig,
-		}
+			summary := fiber.Map{
+				"Title":            "LocalAI - Models",
+				"Version":          internal.PrintableVersion(),
+				"Models":           template.HTML(elements.ListModels(models, processingModels, galleryService)),
+				"Repositories":     appConfig.Galleries,
+				"AllTags":          tags,
+				"ProcessingModels": processingModelsData,
+				"AvailableModels":  len(models),
+				"IsP2PEnabled":     p2p.IsP2PEnabled(),
+
+				"TaskTypes": taskTypes,
+				//	"ApplicationConfig": appConfig,
+			}
 
-		// Render index
-		return c.Render("views/models", summary)
-	})
+			// Render index
+			return c.Render("views/models", summary)
+		})
 
-	// Show the models, filtered from the user input
-	// https://htmx.org/examples/active-search/
-	app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
-		form := struct {
-			Search string `form:"search"`
-		}{}
-		if err := c.BodyParser(&form); err != nil {
-			return c.Status(fiber.StatusBadRequest).SendString(err.Error())
-		}
+		// Show the models, filtered from the user input
+		// https://htmx.org/examples/active-search/
+		app.Post("/browse/search/models", auth, func(c *fiber.Ctx) error {
+			form := struct {
+				Search string `form:"search"`
+			}{}
+			if err := c.BodyParser(&form); err != nil {
+				return c.Status(fiber.StatusBadRequest).SendString(err.Error())
+			}
 
-		models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
+			models, _ := gallery.AvailableGalleryModels(appConfig.Galleries, appConfig.ModelPath)
 
-		return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService))
-	})
+			return c.SendString(elements.ListModels(gallery.GalleryModels(models).Search(form.Search), processingModels, galleryService))
+		})
 
-	/*
+		/*
 
-		Install routes
+			Install routes
 
-	*/
+		*/
 
-	// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
-	// https://htmx.org/examples/progress-bar/
-	app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
-		galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
-		log.Debug().Msgf("UI job submitted to install  : %+v\n", galleryID)
+		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
+		// https://htmx.org/examples/progress-bar/
+		app.Post("/browse/install/model/:id", auth, func(c *fiber.Ctx) error {
+			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
+			log.Debug().Msgf("UI job submitted to install  : %+v\n", galleryID)
 
-		id, err := uuid.NewUUID()
-		if err != nil {
-			return err
-		}
+			id, err := uuid.NewUUID()
+			if err != nil {
+				return err
+			}
 
-		uid := id.String()
+			uid := id.String()
 
-		processingModels.Set(galleryID, uid)
+			processingModels.Set(galleryID, uid)
 
-		op := gallery.GalleryOp{
-			Id:               uid,
-			GalleryModelName: galleryID,
-			Galleries:        appConfig.Galleries,
-		}
-		go func() {
-			galleryService.C <- op
-		}()
+			op := gallery.GalleryOp{
+				Id:               uid,
+				GalleryModelName: galleryID,
+				Galleries:        appConfig.Galleries,
+			}
+			go func() {
+				galleryService.C <- op
+			}()
 
-		return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
-	})
+			return c.SendString(elements.StartProgressBar(uid, "0", "Installation"))
+		})
 
-	// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
-	// https://htmx.org/examples/progress-bar/
-	app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
-		galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
-		log.Debug().Msgf("UI job submitted to delete  : %+v\n", galleryID)
-		var galleryName = galleryID
-		if strings.Contains(galleryID, "@") {
-			// if the galleryID contains a @ it means that it's a model from a gallery
-			// but we want to delete it from the local models which does not need
-			// a repository ID
-			galleryName = strings.Split(galleryID, "@")[1]
-		}
+		// This route is used when the "Install" button is pressed, we submit here a new job to the gallery service
+		// https://htmx.org/examples/progress-bar/
+		app.Post("/browse/delete/model/:id", auth, func(c *fiber.Ctx) error {
+			galleryID := strings.Clone(c.Params("id")) // note: strings.Clone is required for multiple requests!
+			log.Debug().Msgf("UI job submitted to delete  : %+v\n", galleryID)
+			var galleryName = galleryID
+			if strings.Contains(galleryID, "@") {
+				// if the galleryID contains a @ it means that it's a model from a gallery
+				// but we want to delete it from the local models which does not need
+				// a repository ID
+				galleryName = strings.Split(galleryID, "@")[1]
+			}
 
-		id, err := uuid.NewUUID()
-		if err != nil {
-			return err
-		}
+			id, err := uuid.NewUUID()
+			if err != nil {
+				return err
+			}
 
-		uid := id.String()
+			uid := id.String()
 
-		// Track the deletion job by galleryID and galleryName
-		// The GalleryID contains information about the repository,
-		// while the GalleryName is ONLY the name of the model
-		processingModels.Set(galleryName, uid)
-		processingModels.Set(galleryID, uid)
+			// Track the deletion job by galleryID and galleryName
+			// The GalleryID contains information about the repository,
+			// while the GalleryName is ONLY the name of the model
+			processingModels.Set(galleryName, uid)
+			processingModels.Set(galleryID, uid)
 
-		op := gallery.GalleryOp{
-			Id:               uid,
-			Delete:           true,
-			GalleryModelName: galleryName,
-		}
-		go func() {
-			galleryService.C <- op
-			cl.RemoveBackendConfig(galleryName)
-		}()
+			op := gallery.GalleryOp{
+				Id:               uid,
+				Delete:           true,
+				GalleryModelName: galleryName,
+			}
+			go func() {
+				galleryService.C <- op
+				cl.RemoveBackendConfig(galleryName)
+			}()
 
-		return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
-	})
+			return c.SendString(elements.StartProgressBar(uid, "0", "Deletion"))
+		})
 
-	// Display the job current progress status
-	// If the job is done, we trigger the /browse/job/:uid route
-	// https://htmx.org/examples/progress-bar/
-	app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
-		jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
+		// Display the job current progress status
+		// If the job is done, we trigger the /browse/job/:uid route
+		// https://htmx.org/examples/progress-bar/
+		app.Get("/browse/job/progress/:uid", auth, func(c *fiber.Ctx) error {
+			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
 
-		status := galleryService.GetStatus(jobUID)
-		if status == nil {
-			//fmt.Errorf("could not find any status for ID")
-			return c.SendString(elements.ProgressBar("0"))
-		}
+			status := galleryService.GetStatus(jobUID)
+			if status == nil {
+				//fmt.Errorf("could not find any status for ID")
+				return c.SendString(elements.ProgressBar("0"))
+			}
 
-		if status.Progress == 100 {
-			c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
-			return c.SendString(elements.ProgressBar("100"))
-		}
-		if status.Error != nil {
-			return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
-		}
+			if status.Progress == 100 {
+				c.Set("HX-Trigger", "done") // this triggers /browse/job/:uid (which is when the job is done)
+				return c.SendString(elements.ProgressBar("100"))
+			}
+			if status.Error != nil {
+				// TODO: instead of deleting the job, we should keep it in the cache and make it dismissable by the user
+				processingModels.DeleteUUID(jobUID)
+				return c.SendString(elements.ErrorProgress(status.Error.Error(), status.GalleryModelName))
+			}
 
-		return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
-	})
+			return c.SendString(elements.ProgressBar(fmt.Sprint(status.Progress)))
+		})
 
-	// this route is hit when the job is done, and we display the
-	// final state (for now just displays "Installation completed")
-	app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
-		jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
+		// this route is hit when the job is done, and we display the
+		// final state (for now just displays "Installation completed")
+		app.Get("/browse/job/:uid", auth, func(c *fiber.Ctx) error {
+			jobUID := strings.Clone(c.Params("uid")) // note: strings.Clone is required for multiple requests!
 
-		status := galleryService.GetStatus(jobUID)
+			status := galleryService.GetStatus(jobUID)
 
-		galleryID := ""
-		for _, k := range processingModels.Keys() {
-			if processingModels.Get(k) == jobUID {
-				galleryID = k
-				processingModels.Delete(k)
+			galleryID := ""
+			processingModels.DeleteUUID(jobUID)
+			if galleryID == "" {
+				log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
 			}
-		}
-		if galleryID == "" {
-			log.Debug().Msgf("no processing model found for job : %+v\n", jobUID)
-		}
 
-		log.Debug().Msgf("JOB finished  : %+v\n", status)
-		showDelete := true
-		displayText := "Installation completed"
-		if status.Deletion {
-			showDelete = false
-			displayText = "Deletion completed"
-		}
+			log.Debug().Msgf("JOB finished  : %+v\n", status)
+			showDelete := true
+			displayText := "Installation completed"
+			if status.Deletion {
+				showDelete = false
+				displayText = "Deletion completed"
+			}
 
-		return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete))
-	})
+			return c.SendString(elements.DoneProgress(galleryID, displayText, showDelete))
+		})
+	}
 
 	// Show the Chat page
 	app.Get("/chat/:model", auth, func(c *fiber.Ctx) error {
diff --git a/core/http/static/p2panimation.js b/core/http/static/p2panimation.js
new file mode 100644
index 000000000000..d5599c63b5d2
--- /dev/null
+++ b/core/http/static/p2panimation.js
@@ -0,0 +1,144 @@
+const canvas = document.getElementById('networkCanvas');
+const ctx = canvas.getContext('2d');
+
+let particles = [];
+let isDragging = false;
+let dragParticle = null;
+const maxParticles = 100; // Maximum number of particles
+const dragAreaRadius = 10; // Increased area for easier dragging
+
+// Function to resize canvas based on aspect ratio
+function resizeCanvas() {
+    canvas.width = window.innerWidth;
+    canvas.height = Math.min(window.innerHeight, 400); // Maintain a max height of 400px
+}
+
+// Adjust the canvas size when the window resizes
+window.addEventListener('resize', resizeCanvas);
+
+// Initialize canvas size
+resizeCanvas();
+
+class Particle {
+    constructor(x, y) {
+        this.x = x;
+        this.y = y;
+        this.radius = 4;
+        this.color = `rgba(0, 255, 204, 1)`;
+        this.speedX = (Math.random() - 0.5) * 2; // Random horizontal speed
+        this.speedY = (Math.random() - 0.5) * 2; // Random vertical speed
+    }
+
+    update() {
+        if (!isDragging || dragParticle !== this) {
+            this.x += this.speedX;
+            this.y += this.speedY;
+
+            // Bounce off the edges of the canvas
+            if (this.x < 0 || this.x > canvas.width) {
+                this.speedX *= -1;
+            }
+            if (this.y < 0 || this.y > canvas.height) {
+                this.speedY *= -1;
+            }
+        }
+    }
+
+    draw() {
+        ctx.beginPath();
+        ctx.arc(this.x, this.y, this.radius, 0, Math.PI * 2, false);
+        ctx.fillStyle = this.color;
+        ctx.fill();
+    }
+
+    isMouseOver(mouseX, mouseY) {
+        // Increase the draggable area by checking if the mouse is within a larger radius
+        return Math.hypot(mouseX - this.x, mouseY - this.y) < dragAreaRadius;
+    }
+}
+
+function connectParticles() {
+    for (let i = 0; i < particles.length; i++) {
+        for (let j = i + 1; j < particles.length; j++) {
+            const distance = Math.hypot(particles[i].x - particles[j].x, particles[i].y - particles[j].y);
+            if (distance < 150) {
+                ctx.beginPath();
+                ctx.moveTo(particles[i].x, particles[i].y);
+                ctx.lineTo(particles[j].x, particles[j].y);
+                ctx.strokeStyle = `rgba(0, 255, 204, ${1 - distance / 150})`;
+                ctx.stroke();
+            }
+        }
+    }
+}
+
+function initParticles(num) {
+    for (let i = 0; i < num; i++) {
+        particles.push(new Particle(Math.random() * canvas.width, Math.random() * canvas.height));
+    }
+}
+
+function animate() {
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
+
+    particles.forEach(particle => {
+        particle.update();
+        particle.draw();
+    });
+
+    connectParticles();
+
+    requestAnimationFrame(animate);
+}
+
+// Handle mouse click to create a new particle
+canvas.addEventListener('click', (e) => {
+    const rect = canvas.getBoundingClientRect();
+    const mouseX = e.clientX - rect.left;
+    const mouseY = e.clientY - rect.top;
+
+    const newParticle = new Particle(mouseX, mouseY);
+    particles.push(newParticle);
+
+    // Limit the number of particles to the maximum
+    if (particles.length > maxParticles) {
+        particles.shift(); // Remove the oldest particle
+    }
+});
+
+// Handle mouse down for dragging
+canvas.addEventListener('mousedown', (e) => {
+    const rect = canvas.getBoundingClientRect();
+    const mouseX = e.clientX - rect.left;
+    const mouseY = e.clientY - rect.top;
+
+    for (let particle of particles) {
+        if (particle.isMouseOver(mouseX, mouseY)) {
+            isDragging = true;
+            dragParticle = particle;
+            break;
+        }
+    }
+});
+
+// Handle mouse move for dragging
+canvas.addEventListener('mousemove', (e) => {
+    if (isDragging && dragParticle) {
+        const rect = canvas.getBoundingClientRect();
+        const mouseX = e.clientX - rect.left;
+        const mouseY = e.clientY - rect.top;
+
+        dragParticle.x = mouseX;
+        dragParticle.y = mouseY;
+    }
+});
+
+// Handle mouse up to stop dragging
+canvas.addEventListener('mouseup', () => {
+    isDragging = false;
+    dragParticle = null;
+});
+
+// Initialize and start the animation
+initParticles(maxParticles);
+animate();
\ No newline at end of file
diff --git a/core/http/views/explorer.html b/core/http/views/explorer.html
new file mode 100644
index 000000000000..9843b47b833f
--- /dev/null
+++ b/core/http/views/explorer.html
@@ -0,0 +1,380 @@
+<!DOCTYPE html>
+<html lang="en">
+
+{{template "views/partials/head" .}}
+
+<style>
+    body {
+        background-color: #1a202c;
+        color: #e2e8f0;
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .token {
+        word-break: break-all;
+    }
+    .container {
+        max-width: 800px;
+        margin: 0 auto;
+        padding: 20px;
+        position: relative;
+    }
+    .network-card {
+        background-color: #2d3748;
+        padding: 20px;
+        border-radius: 8px;
+        margin-bottom: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        transition: transform 0.3s ease, box-shadow 0.3s ease;
+    }
+    .network-card:hover {
+        transform: translateY(-5px);
+        box-shadow: 0 6px 10px rgba(0, 0, 0, 0.15);
+    }
+    .network-title {
+        font-size: 24px;
+        font-weight: bold;
+        margin-bottom: 10px;
+        color: #63b3ed;
+    }
+    .network-token {
+        font-size: 14px;
+        font-style: italic;
+        color: #cbd5e0;
+        margin-bottom: 10px;
+        word-break: break-word; /* Breaks words to prevent overflow */
+        overflow-wrap: break-word; /* Ensures long strings break */
+        white-space: pre-wrap; /* Preserves whitespace for breaking */
+    }
+    .cluster {
+        margin-top: 10px;
+        background-color: #4a5568;
+        padding: 10px;
+        border-radius: 6px;
+        transition: background-color 0.3s ease;
+    }
+    .cluster:hover {
+        background-color: #5a6b78;
+    }
+    .cluster-title {
+        font-size: 18px;
+        font-weight: bold;
+        color: #e2e8f0;
+    }
+    .form-container {
+        background-color: #2d3748;
+        padding: 20px;
+        border-radius: 8px;
+        margin-bottom: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .form-control {
+        margin-bottom: 15px;
+    }
+    label {
+        display: block;
+        margin-bottom: 5px;
+        font-weight: bold;
+    }
+    input[type="text"],
+    textarea {
+        width: 100%;
+        padding: 10px;
+        border-radius: 4px;
+        border: 1px solid #4a5568;
+        background-color: #3a4250;
+        color: #e2e8f0;
+        transition: border-color 0.3s ease, background-color 0.3s ease;
+    }
+    input[type="text"]:focus,
+    textarea:focus {
+        border-color: #63b3ed;
+        background-color: #4a5568;
+    }
+    button {
+        background-color: #3182ce;
+        color: #e2e8f0;
+        padding: 10px 20px;
+        border: none;
+        border-radius: 4px;
+        cursor: pointer;
+        transition: background-color 0.3s ease;
+    }
+    .error {
+        color: #e53e3e;
+        margin-top: 5px;
+    }
+    .success {
+        color: #38a169;
+        margin-top: 5px;
+    }
+    /* Spinner Styles */
+    .spinner {
+        display: inline-block;
+        width: 50px;
+        height: 50px;
+        border: 5px solid rgba(255, 255, 255, 0.2);
+        border-radius: 50%;
+        border-top-color: #3182ce;
+        animation: spin 1s linear infinite;
+        margin: 0 auto;
+    }
+
+    @keyframes spin {
+        to { transform: rotate(360deg); }
+    }
+
+    /* Center the loading text and spinner */
+    .loading-container {
+        text-align: center;
+        padding: 50px;
+    }
+    .warning-box {
+            border-radius: 5px;
+    }
+    .warning-box i {
+        margin-right: 10px;
+    }
+    .token-box {
+            background-color: #4a5568;
+            padding: 10px;
+            border-radius: 4px;
+            margin-top: 10px;
+            position: relative;
+            cursor: pointer;
+        }
+        .token-box:hover {
+            background-color: #5a6b7e;
+        }
+        .token-text {
+            overflow-wrap: break-word;
+            font-family: monospace;
+        }
+        .copy-icon {
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            color: #e2e8f0;
+        }
+</style>
+
+<body class="bg-gray-900 text-gray-200">
+    <div class="flex flex-col min-h-screen" x-data="networkClusters()" x-init="init()">
+        {{template "views/partials/navbar_explorer" .}}
+        <div class="animation-container">
+            <canvas id="networkCanvas"></canvas>
+            <div class="text-overlay">
+                <header class="text-center py-12">
+                    <h1 class="text-5xl font-bold text-gray-100">
+                        <i class="fa-solid fa-circle-nodes mr-2"></i> Network Clusters Explorer   
+        
+                    </h1>
+                    <p class="mt-4 text-lg">
+                        View the clusters and workers available in each network.
+                        <a href="https://localai.io/features/distribute/" target="_blank">
+                            <i class="fas fa-circle-info pr-2"></i>
+                        </a>
+                    </p>
+        
+                </header>
+            </div>
+        </div>
+
+        <div class="container mx-auto px-4 flex-grow">
+        <!-- Warning Box -->
+        <div class="warning-box bg-yellow-100 text-gray-800 mb-20 pt-5 pb-5 pr-5 pl-5 text-lg">
+            <i class="fa-solid fa-triangle-exclamation"></i><i class="fa-solid fa-flask"></i>
+            The explorer is a global, community-driven tool to share network tokens and view available clusters in the globe.
+            Anyone can use the tokens to offload computation and use the clusters available or share resources.
+            This is provided without any warranty. Use it at your own risk. We are not responsible for any potential harm or misuse. Sharing tokens globally allows anyone from the internet to use your instances. 
+            Although the community will address bugs, this is experimental software and may be insecure to deploy on your hardware unless you take all necessary precautions.
+        </div>
+            <div class="flow-root">
+            <!-- Toggle button for showing/hiding the form -->
+            <button class="bg-red-600 hover:bg-blue-600 float-right mb-2 flex items-center px-4 py-2 rounded" @click="toggleForm()">
+                <!-- Conditional icon display -->
+                <i :class="showForm ? 'fa-solid fa-times' : 'fa-solid fa-plus'" class="mr-2"></i>
+                <span x-text="showForm ? 'Close' : 'Add New Network'"></span>
+            </button>
+        </div>
+            <!-- Form for adding a new network -->
+            <div class="form-container" x-show="showForm" @click.outside="showForm = false">
+                <h2 class="text-3xl font-bold mb-4"><i class="fa-solid fa-plus"></i> Add New Network</h2>
+                <div class="form-control">
+                    <label for="name">Network Name</label>
+                    <input type="text" id="name" x-model="newNetwork.name" placeholder="Enter network name" />
+                </div>
+                <div class="form-control">
+                    <label for="description">Description</label>
+                    <textarea id="description" x-model="newNetwork.description" placeholder="Enter description"></textarea>
+                </div>
+                <div class="form-control">
+                    <label for="token">Token</label>
+                    <textarea id="token" x-model="newNetwork.token" placeholder="Enter token"></textarea>
+                </div>
+                <button @click="addNetwork"><i class="fa-solid fa-plus"></i> Add Network</button>
+                <template x-if="errorMessage">
+                    <p class="error" x-text="errorMessage"></p>
+                </template>
+                <template x-if="successMessage">
+                    <p class="success" x-text="successMessage"></p>
+                </template>
+            </div>
+
+            <!-- Loading Spinner -->
+            <template x-if="networks.length === 0 && !loadingComplete">
+                <div class="loading-container">
+                    <div class="spinner"></div>
+                    <p class="text-center mt-4">Loading networks...</p>
+                </div>
+            </template>
+
+            <template x-if="networks.length === 0 && loadingComplete">
+                <div class="loading-container">
+                    <p class="text-center mt-4">No networks available with online workers</p>
+                </div>
+            </template>
+
+            <!-- Display Networks -->
+            <template x-for="network in networks" :key="network.name">
+                <div class="network-card">
+                    <i class="fa-solid fa-circle-nodes mr-2"></i><span class="network-title font-bold mb-4 mt-1" x-text="network.name"></span>
+                    <div class="token-box" @click="copyToken(network.token)">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            <i class="fa-solid fa-copy copy-icon"></i>
+                            <i class="fa-solid fa-key mr-2"></i>Token (click to copy): 
+                        </p>
+                        <span class="token-text" x-text="network.token"></span>
+                    </div>
+
+                    <div class="cluster">
+                        <p class="text-lg font-bold mb-4 mt-1"><i class="fa-solid fa-book mr-2"></i> Description</p>
+                        <p x-text="network.description"></p>
+                    </div>
+                    <h2 class="text-3xl font-bold mb-4 mt-4">Available Clusters in this network</h2>
+                    <template x-for="cluster in network.Clusters" :key="cluster.NetworkID + cluster.Type">
+                        <div class="cluster">
+                            <div class="cluster-title"></div>
+                            <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Cluster Type: ' + cluster.Type">
+                            </span>
+
+                            <span class="inline-block bg-orange-500 text-white py-1 px-3 rounded-full text-xs" x-show="cluster.NetworkID" x-text="'Network ID: ' + (cluster.NetworkID || 'N/A')">
+                            </span>
+                            <span class="inline-block bg-blue-500 text-white py-1 px-3 rounded-full text-xs"  x-text="'Number of Workers: ' + cluster.Workers.length">
+                            </span>
+                            <!-- Give commands and instructions to join the network -->
+                            <span class="inline-block token-box text-white py-1 px-3 text-xs" x-show="cluster.Type == 'federated'" >
+                                <p class="text-lg font-bold mb-4 mt-1">
+                                    <i class="fa-solid fa-copy copy-icon float-right"></i>
+                                    Command to connect (click to copy): 
+                                </p>
+                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
+                                    docker run -d --restart=always -e ADDRESS=":80" -e LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> -e LOCALAI_P2P_LOGLEVEL=debug --name local-ai -e TOKEN="<span class="token" x-text="network.token"></span>" --net host -ti localai/localai:master-ffmpeg-core federated --debug
+                                </code>
+                                or via CLI:
+                                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyToken($el.textContent)" >
+                                   ADDRESS=":80" LOCALAI_P2P_NETWORK_ID=<span class="token" x-text="cluster.NetworkID"></span> LOCALAI_P2P_LOGLEVEL=debug TOKEN="<span class="token" x-text="network.token"></span>" local-ai federated --debug
+                                </code>
+                            </span>
+                        </div>
+                    </template>
+                </div>
+            </template>
+        </div>
+        <script>
+            function networkClusters() {
+                return {
+                    networks: [],
+                    newNetwork: {
+                        name: '',
+                        description: '',
+                        token: ''
+                    },
+                    errorMessage: '',
+                    successMessage: '',
+                    showForm: false, // Form visibility state
+                    loadingComplete: false, // To track if loading is complete
+                    toggleForm() {
+                        this.showForm = !this.showForm;
+                        console.log('Toggling form:', this.showForm);
+                    },
+                    fetchNetworks() {
+                        console.log('Fetching networks...');
+                        fetch('/networks')
+                            .then(response => response.json())
+                            .then(data => {
+                                console.log('Data fetched successfully:', data);
+                                this.networks = data;
+                                this.loadingComplete = true; // Set loading complete
+                            })
+                            .catch(error => {
+                                console.error('Error fetching networks:', error);
+                                this.loadingComplete = true; // Ensure spinner is hidden if error occurs
+                            });
+                    },
+
+                    addNetwork() {
+                        this.errorMessage = '';
+                        this.successMessage = '';
+                        console.log('Adding new network:', this.newNetwork);
+
+                        // Validate input
+                        if (!this.newNetwork.name || !this.newNetwork.description || !this.newNetwork.token) {
+                            this.errorMessage = 'All fields are required.';
+                            return;
+                        }
+
+                        fetch('/network/add', {
+                            method: 'POST',
+                            headers: {
+                                'Content-Type': 'application/json'
+                            },
+                            body: JSON.stringify(this.newNetwork)
+                        })
+                            .then(response => {
+                                if (!response.ok) {
+                                    return response.json().then(err => { throw err; });
+                                }
+                                return response.json();
+                            })
+                            .then(data => {
+                                console.log('Network added successfully:', data);
+                                this.successMessage = 'Network added successfully!';
+                                this.fetchNetworks(); // Refresh the networks list
+                                this.newNetwork = { name: '', description: '', token: '' }; // Clear form
+                            })
+                            .catch(error => {
+                                console.error('Error adding network:', error);
+                                this.errorMessage = 'Failed to add network. Please try again.'
+                                if (error.error) {
+                                    this.errorMessage += " Error : " + error.error;
+                                }
+                            });
+                    },
+                    copyToken(token) {
+                        navigator.clipboard.writeText(token)
+                        .then(() => {
+                            console.log('Text copied to clipboard:', token);
+                            alert('Text copied to clipboard!');
+                        })
+                        .catch(err => {
+                            console.error('Failed to copy token:', err);
+                        });
+                    },
+                    init() {
+                        console.log('Initializing Alpine component...');
+                        this.fetchNetworks();
+                        setInterval(() => {
+                            this.fetchNetworks();
+                        }, 5000); // Refresh every 5 seconds
+                    }
+                }
+            }
+        </script>
+        <script src="/static/p2panimation.js"></script>
+
+        {{template "views/partials/footer" .}}
+    </div>
+
+</body>
+
+</html>
diff --git a/core/http/views/p2p.html b/core/http/views/p2p.html
index a8c51310e0fa..52548e33de64 100644
--- a/core/http/views/p2p.html
+++ b/core/http/views/p2p.html
@@ -1,21 +1,40 @@
 <!DOCTYPE html>
 <html lang="en">
 {{template "views/partials/head" .}}
-
 <body class="bg-gray-900 text-gray-200">
-<div class="flex flex-col min-h-screen">
-   
+<div class="flex flex-col min-h-screen" x-data="{}">
+
     {{template "views/partials/navbar" .}}
     <div class="container mx-auto px-4 flex-grow">
         <div class="workers mt-12 text-center">
-      
-            <h2 class="text-3xl font-semibold text-gray-100 mb-8">
-                <i class="fa-solid fa-circle-nodes"></i> Distributed inference with P2P   
-                <a href="https://localai.io/features/distribute/" target="_blank">
-                    <i class="fas fa-circle-info pr-2"></i>
-                </a> 
-            </h2> 
+            <div class="animation-container">
+                <canvas id="networkCanvas"></canvas>
+                <div class="text-overlay">
+                    <header class="text-center py-12">
+                        <h1 class="text-5xl font-bold text-gray-100">
+                            <i class="fa-solid fa-circle-nodes mr-2"></i> Distributed inference with P2P 
+                        </h1>
+                        <p class="mt-4 text-lg">
+                            Distribute computation by sharing and loadbalancing instances or  sharding model weights.
+                            <a href="https://localai.io/features/distribute/" target="_blank">
+                                <i class="fas fa-circle-info pr-2"></i>
+                            </a>
+                        </p>
+            
+                    </header>
+                </div>
+            </div>      
             <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distribution of work between peers. It is possible to share an instance with Federation and/or split the weights of a model across peers (only available with llama.cpp models). You can now share computational resources between your devices or your friends!</h5>
+
+            <div class="bg-gray-800 p-6 rounded-lg shadow-lg mb-12 text-left">
+                <p class="text-lg font-bold mb-4 mt-1">
+                    Network token
+                    <i class="fa-solid fa-copy copy-icon float-right"></i>
+                </p>
+                <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">{{.P2PToken}}</code><br>
+                The network token can be used to either share the instance or join a federation or a worker network. Below you will find a few examples on how to start a new instance or a worker with the token and you will be able to see the available workers and federated nodes.
+            </div>
+        
             <!-- Warning box if p2p token is empty and p2p is enabled -->
             {{ if and .IsP2PEnabled (eq .P2PToken "") }}
             <div class="bg-red-500 p-4 rounded-lg shadow-lg mb-12 text-left">
@@ -40,7 +59,6 @@ <h5 class="mb-4 text-justify">LocalAI uses P2P technologies to enable distributi
 
                 <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book"></i> Start a federated instance</h3>
 
-                
                 <!-- Tabs navigation -->
                 <ul class="mb-5 flex list-none flex-row flex-wrap ps-0" role="tablist" data-twe-nav-ref>
                     <li role="presentation" class="flex-auto text-center">
@@ -52,22 +70,25 @@ <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book
                 </ul>
 
                 <!-- Tabs content -->
-                <div class="mb-6">
-                    
+                <div class="mb-6">                    
                     <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-cli" role="tabpanel" aria-labelledby="tabs-federated-cli" data-twe-tab-active>
 
-
-                        <p class="mb-2">To start a new instance to share:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new instance to share:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             # Start a new instance to share with --federated and a TOKEN<br>
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai run --federated --p2p
                         </code>
 
                         <p class="mt-2">Note: If you don't have a token do not specify it and use the generated one that you can find in this page.</p>
-
-                        <p class="mb-2">To start a new federated load balancer:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated load balancer:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai federated
                         </code>
@@ -77,13 +98,19 @@ <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book
                         <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
                     </div>
                     <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-federated-docker" role="tabpanel" aria-labelledby="tabs-federated-docker">
-                        <p class="mb-2">To start a new federated instance:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated instance:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu run --federated --p2p
                         </code>
 
-                        <p class="mb-2">To start a new federated server (port to 9090):</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new federated server with Docker (port to 9090):
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 9090:8080 localai/localai:latest-cpu federated
                         </code>
 
@@ -119,8 +146,11 @@ <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book
                 <!-- Tabs content -->
                 <div class="mb-6">
                     <div class="tabcontent hidden opacity-100 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-cli" role="tabpanel" aria-labelledby="tabs-cli" data-twe-tab-active>
-                        <p class="mb-2">To start a new worker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new worker, run the following command:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words" @click="copyClipboard($el.textContent)">
                             export TOKEN="<span class="token">{{.P2PToken}}</span>"<br>
                             local-ai worker p2p-llama-cpp-rpc
                         </code>
@@ -128,8 +158,11 @@ <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book
                         <p class="mt-2">For all the options available, please refer to the <a href="https://localai.io/features/distribute/#starting-workers" target="_blank" class="text-yellow-300 hover:text-yellow-400">documentation</a>.</p>
                     </div>
                     <div class="tabcontent hidden opacity-0 transition-opacity duration-150 ease-linear data-[twe-tab-active]:block p-4" id="tabs-docker" role="tabpanel" aria-labelledby="tabs-docker">
-                        <p class="mb-2">To start a new worker with docker, run the following command:</p>
-                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words">
+                        <p class="text-lg font-bold mb-4 mt-1">
+                            To start a new worker with docker, run the following command:
+                            <i class="fa-solid fa-copy copy-icon float-right"></i>
+                        </p>
+                        <code class="block bg-gray-700 text-yellow-300 p-4 rounded-lg break-words"  @click="copyClipboard($el.textContent)">
                             docker run -ti --net host -e TOKEN="<span class="token">{{.P2PToken}}</span>" --name local-ai -p 8080:8080 localai/localai:latest-cpu worker p2p-llama-cpp-rpc
                         </code>
 
@@ -144,7 +177,7 @@ <h3 class="text-2xl font-semibold text-gray-100 mb-6"><i class="fa-solid fa-book
 
     {{template "views/partials/footer" .}}
 </div>
-
+<script src="/static/p2panimation.js"></script>
 <style>
     .token {
         word-break: break-all;
diff --git a/core/http/views/partials/head.html b/core/http/views/partials/head.html
index 954fc625fe2d..8d0720937f91 100644
--- a/core/http/views/partials/head.html
+++ b/core/http/views/partials/head.html
@@ -6,6 +6,7 @@
     rel="stylesheet"
     href="/static/assets/highlightjs.css"
   />
+  <script defer src="/static/assets/anime.min.js"></script>
     <script
     defer
     src="/static/assets/highlightjs.js"
@@ -46,9 +47,79 @@
         preflight: false,
       },
     };
+    function copyClipboard(token) {
+      navigator.clipboard.writeText(token)
+      .then(() => {
+          console.log('Text copied to clipboard:', token);
+          alert('Text copied to clipboard!');
+      })
+      .catch(err => {
+          console.error('Failed to copy token:', err);
+      });
+    }
   </script>
-    <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
-    <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
-    <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
-    <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <link href="/static/assets/fontawesome/css/fontawesome.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/brands.css" rel="stylesheet" />
+  <link href="/static/assets/fontawesome/css/solid.css" rel="stylesheet" />
+  <script src="/static/assets/htmx.js" crossorigin="anonymous"></script>
+  <!-- P2P Animation START -->
+  <style>
+    .animation-container {
+        position: relative;
+        width: 100%;
+        height: 25vh;
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        overflow: hidden;
+    }
+
+    canvas {
+        position: absolute;
+        top: 0;
+        left: 0;
+    }
+
+    .text-overlay {
+        position: absolute;
+        top: 50%;
+        left: 50%;
+        transform: translate(-50%, -50%);
+        text-align: center;
+        z-index: 1;
+    }
+  </style>
+  <!-- P2P Animation END -->
+  <!-- Flask and node animation -->
+  <style>
+            .fa-circle-nodes {
+           /* font-size: 100px; /* Adjust the size as needed */
+            animation: rotateCircleNodes 8s linear infinite; /* Slow and fluid rotation */
+            display: inline-block;
+        }
+
+        @keyframes rotateCircleNodes {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Animation for the warning box */
+        .fa-flask {
+         /*  font-size: 100px; /* Adjust the size as needed */
+            animation: shakeFlask 3s ease-in-out infinite; /* Smooth easing and longer duration for fluidity */
+            transform-origin: bottom center;
+        }
+
+        @keyframes shakeFlask {
+            0%, 10% { transform: rotate(0deg); } /* Start and end still */
+            20% { transform: rotate(-10deg); } /* Smooth transition to left */
+            30% { transform: rotate(10deg); } /* Smooth transition to right */
+            40% { transform: rotate(-8deg); } /* Smooth transition to left */
+            50% { transform: rotate(8deg); } /* Smooth transition to right */
+            60% { transform: rotate(-5deg); } /* Smooth transition to left */
+            70% { transform: rotate(5deg); } /* Smooth transition to right */
+            80% { transform: rotate(-2deg); } /* Smooth transition to left */
+            90% { transform: rotate(2deg); } /* Smooth transition to right */
+            100% { transform: rotate(0deg); } /* Return to center */
+        }
+  </style>
 </head>
\ No newline at end of file
diff --git a/core/http/views/partials/navbar_explorer.html b/core/http/views/partials/navbar_explorer.html
new file mode 100644
index 000000000000..ffc6c4d5ae7d
--- /dev/null
+++ b/core/http/views/partials/navbar_explorer.html
@@ -0,0 +1,39 @@
+<nav class="bg-gray-800 shadow-lg">
+    <div class="container mx-auto px-4 py-4">
+        <div class="flex items-center justify-between">
+            <div class="flex items-center">
+                <!-- Logo Image: Replace 'logo_url_here' with your actual logo URL -->
+                <a href="/" class="text-white text-xl font-bold"><img src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd" alt="LocalAI Logo" class="h-10 mr-3 border-2 border-gray-300 shadow rounded"></a>
+                <a href="/" class="text-white text-xl font-bold">LocalAI</a>
+            </div>
+            <!-- Menu button for small screens -->
+            <div class="lg:hidden">
+                <button id="menu-toggle" class="text-gray-400 hover:text-white focus:outline-none">
+                    <i class="fas fa-bars fa-lg"></i>
+                </button>
+            </div>
+            <!-- Navigation links -->
+            <div class="hidden lg:flex lg:items-center lg:justify-end lg:flex-1 lg:w-0">
+                <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+            </div>
+        </div>
+        <!-- Collapsible menu for small screens -->
+        <div class="hidden lg:hidden" id="mobile-menu">
+            <div class="pt-4 pb-3 border-t border-gray-700">
+                <a href="/" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1"><i class="fas fa-home pr-2"></i>Home</a>
+                <a href="https://localai.io" class="block text-gray-400 hover:text-white px-3 py-2 rounded mt-1" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="https://models.localai.io/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-brain pr-2"></i> Models</a>
+            </div>
+        </div>
+    </div>
+</nav>
+
+<script>
+    // JavaScript to toggle the mobile menu
+    document.getElementById('menu-toggle').addEventListener('click', function () {
+        var mobileMenu = document.getElementById('mobile-menu');
+        mobileMenu.classList.toggle('hidden');
+    });
+</script>
diff --git a/core/p2p/federated.go b/core/p2p/federated.go
index b56c9e0ca00f..6475715e1f82 100644
--- a/core/p2p/federated.go
+++ b/core/p2p/federated.go
@@ -1,24 +1,87 @@
 package p2p
 
+import (
+	"fmt"
+	"math/rand/v2"
+	"sync"
+
+	"github.com/rs/zerolog/log"
+)
+
 const FederatedID = "federated"
 
+func NetworkID(networkID, serviceID string) string {
+	if networkID != "" {
+		return fmt.Sprintf("%s_%s", networkID, serviceID)
+	}
+	return serviceID
+}
+
 type FederatedServer struct {
+	sync.Mutex
 	listenAddr, service, p2ptoken string
 	requestTable                  map[string]int
 	loadBalanced                  bool
+	workerTarget                  string
 }
 
-func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool) *FederatedServer {
+func NewFederatedServer(listenAddr, service, p2pToken string, loadBalanced bool, workerTarget string) *FederatedServer {
 	return &FederatedServer{
 		listenAddr:   listenAddr,
 		service:      service,
 		p2ptoken:     p2pToken,
 		requestTable: map[string]int{},
 		loadBalanced: loadBalanced,
+		workerTarget: workerTarget,
+	}
+}
+
+func (fs *FederatedServer) RandomServer() string {
+	var tunnelAddresses []string
+	for _, v := range GetAvailableNodes(fs.service) {
+		if v.IsOnline() {
+			tunnelAddresses = append(tunnelAddresses, v.ID)
+		} else {
+			delete(fs.requestTable, v.ID) // make sure it's not tracked
+			log.Info().Msgf("Node %s is offline", v.ID)
+		}
+	}
+
+	if len(tunnelAddresses) == 0 {
+		return ""
+	}
+
+	return tunnelAddresses[rand.IntN(len(tunnelAddresses))]
+}
+
+func (fs *FederatedServer) syncTableStatus() {
+	fs.Lock()
+	defer fs.Unlock()
+	currentTunnels := make(map[string]struct{})
+
+	for _, v := range GetAvailableNodes(fs.service) {
+		if v.IsOnline() {
+			fs.ensureRecordExist(v.ID)
+			currentTunnels[v.ID] = struct{}{}
+		}
+	}
+
+	// delete tunnels that don't exist anymore
+	for t := range fs.requestTable {
+		if _, ok := currentTunnels[t]; !ok {
+			delete(fs.requestTable, t)
+		}
 	}
 }
 
 func (fs *FederatedServer) SelectLeastUsedServer() string {
+	fs.syncTableStatus()
+
+	fs.Lock()
+	defer fs.Unlock()
+
+	log.Debug().Any("request_table", fs.requestTable).Msgf("SelectLeastUsedServer()")
+
 	// cycle over requestTable and find the entry with the lower number
 	// if there are multiple entries with the same number, select one randomly
 	// if there are no entries, return an empty string
@@ -30,18 +93,26 @@ func (fs *FederatedServer) SelectLeastUsedServer() string {
 			minKey = k
 		}
 	}
+	log.Debug().Any("requests_served", min).Any("request_table", fs.requestTable).Msgf("Selected tunnel %s", minKey)
+
 	return minKey
 }
 
 func (fs *FederatedServer) RecordRequest(nodeID string) {
+	fs.Lock()
+	defer fs.Unlock()
 	// increment the counter for the nodeID in the requestTable
 	fs.requestTable[nodeID]++
+
+	log.Debug().Any("request_table", fs.requestTable).Any("request", nodeID).Msgf("Recording request")
 }
 
-func (fs *FederatedServer) EnsureRecordExist(nodeID string) {
+func (fs *FederatedServer) ensureRecordExist(nodeID string) {
 	// if the nodeID is not in the requestTable, add it with a counter of 0
 	_, ok := fs.requestTable[nodeID]
 	if !ok {
 		fs.requestTable[nodeID] = 0
 	}
+
+	log.Debug().Any("request_table", fs.requestTable).Any("request", nodeID).Msgf("Ensure record exists")
 }
diff --git a/core/p2p/federated_server.go b/core/p2p/federated_server.go
index 75da97ecd5f9..d0a8c2f84c8f 100644
--- a/core/p2p/federated_server.go
+++ b/core/p2p/federated_server.go
@@ -8,18 +8,12 @@ import (
 	"errors"
 	"fmt"
 	"net"
-	"time"
-
-	"math/rand/v2"
 
 	"github.com/mudler/edgevpn/pkg/node"
-	"github.com/mudler/edgevpn/pkg/protocol"
-	"github.com/mudler/edgevpn/pkg/types"
 	"github.com/rs/zerolog/log"
 )
 
 func (f *FederatedServer) Start(ctx context.Context) error {
-
 	n, err := NewNode(f.p2ptoken)
 	if err != nil {
 		return fmt.Errorf("creating a new node: %w", err)
@@ -31,7 +25,7 @@ func (f *FederatedServer) Start(ctx context.Context) error {
 
 	if err := ServiceDiscoverer(ctx, n, f.p2ptoken, f.service, func(servicesID string, tunnel NodeData) {
 		log.Debug().Msgf("Discovered node: %s", tunnel.ID)
-	}); err != nil {
+	}, false); err != nil {
 		return err
 	}
 
@@ -48,27 +42,12 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		return err
 	}
 	//	ll.Info("Binding local port on", srcaddr)
+	go func() {
+		<-ctx.Done()
+		l.Close()
+	}()
 
-	ledger, _ := node.Ledger()
-
-	// Announce ourselves so nodes accepts our connection
-	ledger.Announce(
-		ctx,
-		10*time.Second,
-		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
-			// If mismatch, update the blockchain
-			//if !found {
-			updatedMap := map[string]interface{}{}
-			updatedMap[node.Host().ID().String()] = &types.User{
-				PeerID:    node.Host().ID().String(),
-				Timestamp: time.Now().String(),
-			}
-			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
-		},
-	)
+	nodeAnnounce(ctx, node)
 
 	defer l.Close()
 	for {
@@ -76,7 +55,7 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 		case <-ctx.Done():
 			return errors.New("context canceled")
 		default:
-			log.Debug().Msg("New for connection")
+			log.Debug().Msgf("New connection from %s", l.Addr().String())
 			// Listen for an incoming connection.
 			conn, err := l.Accept()
 			if err != nil {
@@ -86,55 +65,38 @@ func (fs *FederatedServer) proxy(ctx context.Context, node *node.Node) error {
 
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
-				var tunnelAddresses []string
-				for _, v := range GetAvailableNodes(fs.service) {
-					if v.IsOnline() {
-						tunnelAddresses = append(tunnelAddresses, v.TunnelAddress)
-					} else {
-						log.Info().Msgf("Node %s is offline", v.ID)
+				workerID := ""
+				if fs.workerTarget != "" {
+					workerID = fs.workerTarget
+				} else if fs.loadBalanced {
+					log.Debug().Msgf("Load balancing request")
+
+					workerID = fs.SelectLeastUsedServer()
+					if workerID == "" {
+						log.Debug().Msgf("Least used server not found, selecting random")
+						workerID = fs.RandomServer()
 					}
+				} else {
+					workerID = fs.RandomServer()
 				}
 
-				if len(tunnelAddresses) == 0 {
+				if workerID == "" {
 					log.Error().Msg("No available nodes yet")
 					return
 				}
 
-				tunnelAddr := ""
-
-				if fs.loadBalanced {
-					for _, t := range tunnelAddresses {
-						fs.EnsureRecordExist(t)
-					}
-
-					tunnelAddr = fs.SelectLeastUsedServer()
-					log.Debug().Msgf("Selected tunnel %s", tunnelAddr)
-					if tunnelAddr == "" {
-						tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-					}
-
-					fs.RecordRequest(tunnelAddr)
-				} else {
-					tunnelAddr = tunnelAddresses[rand.IntN(len(tunnelAddresses))]
-				}
-
-				tunnelConn, err := net.Dial("tcp", tunnelAddr)
-				if err != nil {
-					log.Error().Err(err).Msg("Error connecting to tunnel")
+				log.Debug().Msgf("Selected node %s", workerID)
+				nodeData, exists := GetNode(fs.service, workerID)
+				if !exists {
+					log.Error().Msgf("Node %s not found", workerID)
 					return
 				}
 
-				log.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), tunnelConn.RemoteAddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, tunnelConn, conn)
-				go copyStream(closer, conn, tunnelConn)
-				<-closer
-
-				tunnelConn.Close()
-				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+				proxyP2PConnection(ctx, node, nodeData.ServiceID, conn)
+				if fs.loadBalanced {
+					fs.RecordRequest(workerID)
+				}
 			}()
 		}
 	}
-
 }
diff --git a/core/p2p/node.go b/core/p2p/node.go
index 6394498fd416..6c43dde00886 100644
--- a/core/p2p/node.go
+++ b/core/p2p/node.go
@@ -5,12 +5,16 @@ import (
 	"time"
 )
 
-const defaultServicesID = "services_localai"
+const (
+	defaultServicesID = "services"
+	WorkerID          = "worker"
+)
 
 type NodeData struct {
 	Name          string
 	ID            string
 	TunnelAddress string
+	ServiceID     string
 	LastSeen      time.Time
 }
 
@@ -36,6 +40,19 @@ func GetAvailableNodes(serviceID string) []NodeData {
 	return availableNodes
 }
 
+func GetNode(serviceID, nodeID string) (NodeData, bool) {
+	if serviceID == "" {
+		serviceID = defaultServicesID
+	}
+	mu.Lock()
+	defer mu.Unlock()
+	if _, ok := nodes[serviceID]; !ok {
+		return NodeData{}, false
+	}
+	nd, exists := nodes[serviceID][nodeID]
+	return nd, exists
+}
+
 func AddNode(serviceID string, node NodeData) {
 	if serviceID == "" {
 		serviceID = defaultServicesID
diff --git a/core/p2p/p2p.go b/core/p2p/p2p.go
index 927f0e241319..fe55346a7f1a 100644
--- a/core/p2p/p2p.go
+++ b/core/p2p/p2p.go
@@ -21,16 +21,40 @@ import (
 	"github.com/mudler/edgevpn/pkg/protocol"
 	"github.com/mudler/edgevpn/pkg/services"
 	"github.com/mudler/edgevpn/pkg/types"
+	eutils "github.com/mudler/edgevpn/pkg/utils"
 	"github.com/phayes/freeport"
 	zlog "github.com/rs/zerolog/log"
 
 	"github.com/mudler/edgevpn/pkg/logger"
 )
 
+func generateNewConnectionData() *node.YAMLConnectionConfig {
+	maxMessSize := 20 << 20 // 20MB
+	keyLength := 43
+
+	return &node.YAMLConnectionConfig{
+		MaxMessageSize: maxMessSize,
+		RoomName:       eutils.RandStringRunes(keyLength),
+		Rendezvous:     eutils.RandStringRunes(keyLength),
+		MDNS:           eutils.RandStringRunes(keyLength),
+		OTP: node.OTP{
+			DHT: node.OTPConfig{
+				Key:      eutils.RandStringRunes(keyLength),
+				Interval: 120,
+				Length:   keyLength,
+			},
+			Crypto: node.OTPConfig{
+				Key:      eutils.RandStringRunes(keyLength),
+				Interval: 9000,
+				Length:   keyLength,
+			},
+		},
+	}
+}
+
 func GenerateToken() string {
 	// Generates a new config and exit
-	newData := node.GenerateNewConnectionData(900)
-	return newData.Base64()
+	return generateNewConnectionData().Base64()
 }
 
 func IsP2PEnabled() bool {
@@ -42,17 +66,7 @@ func nodeID(s string) string {
 	return fmt.Sprintf("%s-%s", hostname, s)
 }
 
-func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
-
-	zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
-	// Open local port for listening
-	l, err := net.Listen("tcp", listenAddr)
-	if err != nil {
-		zlog.Error().Err(err).Msg("Error listening")
-		return err
-	}
-	//	ll.Info("Binding local port on", srcaddr)
-
+func nodeAnnounce(ctx context.Context, node *node.Node) {
 	ledger, _ := node.Ledger()
 
 	// Announce ourselves so nodes accepts our connection
@@ -60,19 +74,74 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 		ctx,
 		10*time.Second,
 		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey(protocol.UsersLedgerKey, node.Host().ID().String())
-			// If mismatch, update the blockchain
-			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[node.Host().ID().String()] = &types.User{
 				PeerID:    node.Host().ID().String(),
 				Timestamp: time.Now().String(),
 			}
 			ledger.Add(protocol.UsersLedgerKey, updatedMap)
-			//	}
 		},
 	)
+}
+
+func proxyP2PConnection(ctx context.Context, node *node.Node, serviceID string, conn net.Conn) {
+	ledger, _ := node.Ledger()
+	// Retrieve current ID for ip in the blockchain
+	existingValue, found := ledger.GetKey(protocol.ServicesLedgerKey, serviceID)
+	service := &types.Service{}
+	existingValue.Unmarshal(service)
+	// If mismatch, update the blockchain
+	if !found {
+		zlog.Error().Msg("Service not found on blockchain")
+		conn.Close()
+		//	ll.Debugf("service '%s' not found on blockchain", serviceID)
+		return
+	}
+
+	// Decode the Peer
+	d, err := peer.Decode(service.PeerID)
+	if err != nil {
+		zlog.Error().Msg("cannot decode peer")
+
+		conn.Close()
+		//	ll.Debugf("could not decode peer '%s'", service.PeerID)
+		return
+	}
+
+	// Open a stream
+	stream, err := node.Host().NewStream(ctx, d, protocol.ServiceProtocol.ID())
+	if err != nil {
+		zlog.Error().Err(err).Msg("cannot open stream peer")
+
+		conn.Close()
+		//	ll.Debugf("could not open stream '%s'", err.Error())
+		return
+	}
+	//	ll.Debugf("(service %s) Redirecting", serviceID, l.Addr().String())
+	zlog.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), stream.Conn().RemoteMultiaddr().String())
+	closer := make(chan struct{}, 2)
+	go copyStream(closer, stream, conn)
+	go copyStream(closer, conn, stream)
+	<-closer
+
+	stream.Close()
+	conn.Close()
+}
+
+func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, service string) error {
+	zlog.Info().Msgf("Allocating service '%s' on: %s", service, listenAddr)
+	// Open local port for listening
+	l, err := net.Listen("tcp", listenAddr)
+	if err != nil {
+		zlog.Error().Err(err).Msg("Error listening")
+		return err
+	}
+	go func() {
+		<-ctx.Done()
+		l.Close()
+	}()
+
+	nodeAnnounce(ctx, node)
 
 	defer l.Close()
 	for {
@@ -90,47 +159,7 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 
 			// Handle connections in a new goroutine, forwarding to the p2p service
 			go func() {
-				// Retrieve current ID for ip in the blockchain
-				existingValue, found := ledger.GetKey(protocol.ServicesLedgerKey, service)
-				service := &types.Service{}
-				existingValue.Unmarshal(service)
-				// If mismatch, update the blockchain
-				if !found {
-					zlog.Error().Msg("Service not found on blockchain")
-					conn.Close()
-					//	ll.Debugf("service '%s' not found on blockchain", serviceID)
-					return
-				}
-
-				// Decode the Peer
-				d, err := peer.Decode(service.PeerID)
-				if err != nil {
-					zlog.Error().Msg("cannot decode peer")
-
-					conn.Close()
-					//	ll.Debugf("could not decode peer '%s'", service.PeerID)
-					return
-				}
-
-				// Open a stream
-				stream, err := node.Host().NewStream(ctx, d, protocol.ServiceProtocol.ID())
-				if err != nil {
-					zlog.Error().Msg("cannot open stream peer")
-
-					conn.Close()
-					//	ll.Debugf("could not open stream '%s'", err.Error())
-					return
-				}
-				//	ll.Debugf("(service %s) Redirecting", serviceID, l.Addr().String())
-				zlog.Info().Msgf("Redirecting %s to %s", conn.LocalAddr().String(), stream.Conn().RemoteMultiaddr().String())
-				closer := make(chan struct{}, 2)
-				go copyStream(closer, stream, conn)
-				go copyStream(closer, conn, stream)
-				<-closer
-
-				stream.Close()
-				conn.Close()
-				//	ll.Infof("(service %s) Done handling %s", serviceID, l.Addr().String())
+				proxyP2PConnection(ctx, node, service, conn)
 			}()
 		}
 	}
@@ -139,11 +168,11 @@ func allocateLocalService(ctx context.Context, node *node.Node, listenAddr, serv
 
 // This is the main of the server (which keeps the env variable updated)
 // This starts a goroutine that keeps LLAMACPP_GRPC_SERVERS updated with the discovered services
-func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID string, discoveryFunc func(serviceID string, node NodeData), allocate bool) error {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
-	tunnels, err := discoveryTunnels(ctx, n, token, servicesID)
+	tunnels, err := discoveryTunnels(ctx, n, token, servicesID, allocate)
 	if err != nil {
 		return err
 	}
@@ -170,7 +199,7 @@ func ServiceDiscoverer(ctx context.Context, n *node.Node, token, servicesID stri
 	return nil
 }
 
-func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string) (chan NodeData, error) {
+func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID string, allocate bool) (chan NodeData, error) {
 	tunnels := make(chan NodeData)
 
 	err := n.Start(ctx)
@@ -181,7 +210,6 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 	if err != nil {
 		return nil, fmt.Errorf("creating a new node: %w", err)
 	}
-
 	// get new services, allocate and return to the channel
 
 	// TODO:
@@ -198,17 +226,19 @@ func discoveryTunnels(ctx context.Context, n *node.Node, token, servicesID strin
 				return
 			default:
 				time.Sleep(5 * time.Second)
-				zlog.Debug().Msg("Searching for workers")
 
 				data := ledger.LastBlock().Storage[servicesID]
+
+				zlog.Debug().Any("data", ledger.LastBlock().Storage).Msg("Ledger data")
+
 				for k, v := range data {
-					zlog.Info().Msgf("Found worker %s", k)
+					zlog.Debug().Msgf("New worker found in the ledger data '%s'", k)
 					nd := &NodeData{}
 					if err := v.Unmarshal(nd); err != nil {
 						zlog.Error().Msg("cannot unmarshal node data")
 						continue
 					}
-					ensureService(ctx, n, nd, k)
+					ensureService(ctx, n, nd, k, allocate)
 					muservice.Lock()
 					if _, ok := service[nd.Name]; ok {
 						tunnels <- service[nd.Name].NodeData
@@ -230,28 +260,35 @@ type nodeServiceData struct {
 var service = map[string]nodeServiceData{}
 var muservice sync.Mutex
 
-func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string) {
+func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string, allocate bool) {
 	muservice.Lock()
 	defer muservice.Unlock()
+	nd.ServiceID = sserv
 	if ndService, found := service[nd.Name]; !found {
 		if !nd.IsOnline() {
 			// if node is offline and not present, do nothing
+			zlog.Debug().Msgf("Node %s is offline", nd.ID)
 			return
 		}
+
 		newCtxm, cancel := context.WithCancel(ctx)
-		// Start the service
-		port, err := freeport.GetFreePort()
-		if err != nil {
-			fmt.Print(err)
+		if allocate {
+			// Start the service
+			port, err := freeport.GetFreePort()
+			if err != nil {
+				zlog.Error().Err(err).Msgf("Could not allocate a free port for %s", nd.ID)
+				return
+			}
+
+			tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
+			nd.TunnelAddress = tunnelAddress
+			go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
+			zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
 		}
-		tunnelAddress := fmt.Sprintf("127.0.0.1:%d", port)
-		nd.TunnelAddress = tunnelAddress
 		service[nd.Name] = nodeServiceData{
 			NodeData:   *nd,
 			CancelFunc: cancel,
 		}
-		go allocateLocalService(newCtxm, n, tunnelAddress, sserv)
-		zlog.Debug().Msgf("Starting service %s on %s", sserv, tunnelAddress)
 	} else {
 		// Check if the service is still alive
 		// if not cancel the context
@@ -272,7 +309,7 @@ func ensureService(ctx context.Context, n *node.Node, nd *NodeData, sserv string
 }
 
 // This is the P2P worker main
-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
+func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
 	if servicesID == "" {
 		servicesID = defaultServicesID
 	}
@@ -280,7 +317,7 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 
 	nodeOpts, err := newNodeOpts(token)
 	if err != nil {
-		return err
+		return nil, err
 	}
 	// generate a random string for the name
 	name := utils.RandString(10)
@@ -290,27 +327,23 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 		services.RegisterService(llger, time.Duration(60)*time.Second, name, fmt.Sprintf("%s:%s", host, port))...)
 	n, err := node.New(nodeOpts...)
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return nil, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	err = n.Start(ctx)
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return n, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	ledger, err := n.Ledger()
 	if err != nil {
-		return fmt.Errorf("creating a new node: %w", err)
+		return n, fmt.Errorf("creating a new node: %w", err)
 	}
 
 	ledger.Announce(
 		ctx,
 		20*time.Second,
 		func() {
-			// Retrieve current ID for ip in the blockchain
-			//_, found := ledger.GetKey("services_localai", name)
-			// If mismatch, update the blockchain
-			//if !found {
 			updatedMap := map[string]interface{}{}
 			updatedMap[name] = &NodeData{
 				Name:     name,
@@ -318,11 +351,10 @@ func ExposeService(ctx context.Context, host, port, token, servicesID string) er
 				ID:       nodeID(name),
 			}
 			ledger.Add(servicesID, updatedMap)
-			//	}
 		},
 	)
 
-	return err
+	return n, err
 }
 
 func NewNode(token string) (*node.Node, error) {
@@ -345,19 +377,25 @@ func newNodeOpts(token string) ([]node.Option, error) {
 
 	// TODO: move this up, expose more config options when creating a node
 	noDHT := os.Getenv("LOCALAI_P2P_DISABLE_DHT") == "true"
-	noLimits := os.Getenv("LOCALAI_P2P_DISABLE_LIMITS") == "true"
-
-	loglevel := "info"
+	noLimits := os.Getenv("LOCALAI_P2P_ENABLE_LIMITS") == "true"
 
+	loglevel := os.Getenv("LOCALAI_P2P_LOGLEVEL")
+	if loglevel == "" {
+		loglevel = "info"
+	}
+	libp2ploglevel := os.Getenv("LOCALAI_LIBP2P_LOGLEVEL")
+	if libp2ploglevel == "" {
+		libp2ploglevel = "fatal"
+	}
 	c := config.Config{
 		Limit: config.ResourceLimit{
-			Enable:   !noLimits,
+			Enable:   noLimits,
 			MaxConns: 100,
 		},
 		NetworkToken:   token,
 		LowProfile:     false,
 		LogLevel:       loglevel,
-		Libp2pLogLevel: "fatal",
+		Libp2pLogLevel: libp2ploglevel,
 		Ledger: config.Ledger{
 			SyncInterval:     defaultInterval,
 			AnnounceInterval: defaultInterval,
@@ -366,19 +404,19 @@ func newNodeOpts(token string) ([]node.Option, error) {
 			Service:           true,
 			Map:               true,
 			RateLimit:         true,
-			RateLimitGlobal:   10,
-			RateLimitPeer:     10,
+			RateLimitGlobal:   100,
+			RateLimitPeer:     100,
 			RateLimitInterval: defaultInterval,
 		},
 		Discovery: config.Discovery{
-			DHT:      noDHT,
+			DHT:      !noDHT,
 			MDNS:     true,
-			Interval: 30 * time.Second,
+			Interval: 10 * time.Second,
 		},
 		Connection: config.Connection{
 			HolePunch:      true,
 			AutoRelay:      true,
-			MaxConnections: 100,
+			MaxConnections: 1000,
 		},
 	}
 
diff --git a/core/p2p/p2p_disabled.go b/core/p2p/p2p_disabled.go
index ab1d69dc26be..f0d331dfb807 100644
--- a/core/p2p/p2p_disabled.go
+++ b/core/p2p/p2p_disabled.go
@@ -18,12 +18,12 @@ func (f *FederatedServer) Start(ctx context.Context) error {
 	return fmt.Errorf("not implemented")
 }
 
-func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData)) error {
+func ServiceDiscoverer(ctx context.Context, node *node.Node, token, servicesID string, fn func(string, NodeData), allocate bool) error {
 	return fmt.Errorf("not implemented")
 }
 
-func ExposeService(ctx context.Context, host, port, token, servicesID string) error {
-	return fmt.Errorf("not implemented")
+func ExposeService(ctx context.Context, host, port, token, servicesID string) (*node.Node, error) {
+	return nil, fmt.Errorf("not implemented")
 }
 
 func IsP2PEnabled() bool {
diff --git a/core/schema/openai.go b/core/schema/openai.go
index 3b39eaf3c7c6..fe4745bfcbd6 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -139,6 +139,17 @@ type ChatCompletionResponseFormat struct {
 	Type ChatCompletionResponseFormatType `json:"type,omitempty"`
 }
 
+type JsonSchemaRequest struct {
+	Type       string     `json:"type"`
+	JsonSchema JsonSchema `json:"json_schema"`
+}
+
+type JsonSchema struct {
+	Name   string         `json:"name"`
+	Strict bool           `json:"strict"`
+	Schema functions.Item `json:"schema"`
+}
+
 type OpenAIRequest struct {
 	PredictionOptions
 
diff --git a/core/startup/startup.go b/core/startup/startup.go
index 55f930a48875..3565d196aa5e 100644
--- a/core/startup/startup.go
+++ b/core/startup/startup.go
@@ -106,7 +106,7 @@ func Startup(opts ...config.AppOption) (*config.BackendConfigLoader, *model.Mode
 		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
 		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
 		if err != nil {
-			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
+			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly)", err)
 		}
 	}
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b6384c520271..2306c28f3efb 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,5 +1,3 @@
-version: '3.6'
-
 services:
   api:
     # See https://localai.io/basics/getting_started/#container-images for
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index b382309ec318..ae146ca632de 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -150,7 +150,6 @@ The devices in the following list have been tested with `hipblas` images running
 | exllama | no | none |
 | exllama2 | no | none |
 | mamba | no | none |
-| petals | no | none |
 | sentencetransformers | no | none |
 | transformers-musicgen | no | none |
 | vall-e-x | no | none |
diff --git a/docs/content/docs/reference/binaries.md b/docs/content/docs/reference/binaries.md
index edefca75eae5..7780864cbe12 100644
--- a/docs/content/docs/reference/binaries.md
+++ b/docs/content/docs/reference/binaries.md
@@ -19,4 +19,13 @@ Otherwise, here are the links to the binaries:
 | --- | --- |
 | Linux (amd64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) |
 | Linux (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-arm64) |
-| MacOS (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
\ No newline at end of file
+| MacOS (arm64)  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
+
+
+{{% alert icon="⚡" context="warning" %}}
+Binaries do have limited support compared to container images:
+
+- Python-based backends are not shipped with binaries (e.g. `bark`, `diffusers` or `transformers`)
+- MacOS binaries and Linux-arm64 do not ship TTS nor `stablediffusion-cpp` backends
+- Linux binaries do not ship `stablediffusion-cpp` backend
+{{% /alert %}}
diff --git a/docs/content/docs/reference/compatibility-table.md b/docs/content/docs/reference/compatibility-table.md
index 31b47bb7fac8..f76ad85daa8e 100644
--- a/docs/content/docs/reference/compatibility-table.md
+++ b/docs/content/docs/reference/compatibility-table.md
@@ -44,7 +44,6 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | `transformers-musicgen`  |                    | no                       | Audio generation                | no                               | no                   | N/A |
 | [tinydream](https://github.com/symisc/tiny-dream#tiny-dreaman-embedded-header-only-stable-diffusion-inference-c-librarypixlabiotiny-dream)         | stablediffusion               | no                       | Image                 | no                                | no                   | N/A |
 | `coqui` | Coqui    | no                       | Audio generation and Voice cloning    | no                               | no                   | CPU/CUDA |
-| `petals` | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CPU/CUDA |
 | `transformers` | Various GPTs and quantization formats | yes                      | GPT, embeddings            | yes | yes****                  | CPU/CUDA/XPU |
 
 Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).
diff --git a/docs/data/version.json b/docs/data/version.json
index efda370f9669..d07ef798cffe 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.19.2"
+  "version": "v2.19.4"
 }
diff --git a/docs/static/install.sh b/docs/static/install.sh
index 3209b24eae55..8d9287506de4 100644
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -194,7 +194,7 @@ install_container_toolkit_yum() {
     curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
     $SUDO  tee /etc/yum.repos.d/nvidia-container-toolkit.repo
 
-    if [ "$PACKAGE_MANAGER" == "dnf" ]; then
+    if [ "$PACKAGE_MANAGER" = "dnf" ]; then
         $SUDO $PACKAGE_MANAGER config-manager --enable nvidia-container-toolkit-experimental
     else 
         $SUDO $PACKAGE_MANAGER -y install yum-utils
@@ -629,7 +629,7 @@ case "$ARCH" in
     *) fatal "Unsupported architecture: $ARCH" ;;
 esac
 
-if [ "$OS" == "Darwin" ]; then
+if [ "$OS" = "Darwin" ]; then
     install_binary_darwin
     exit 0
 fi
diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
index 7aec99b38dc2..82a5e9876c67 160000
--- a/docs/themes/hugo-theme-relearn
+++ b/docs/themes/hugo-theme-relearn
@@ -1 +1 @@
-Subproject commit 7aec99b38dc2668c6139bf71855535ace41c123c
+Subproject commit 82a5e9876c67f4c86b9e37e825e27c951ce18d54
diff --git a/embedded/embedded.go b/embedded/embedded.go
index d5fd72df13cf..672c32edd637 100644
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@@ -38,8 +38,8 @@ func init() {
 
 func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
 	remoteLibrary := map[string]string{}
-
-	err := downloader.DownloadAndUnmarshal(url, basePath, func(_ string, i []byte) error {
+	uri := downloader.URI(url)
+	err := uri.DownloadAndUnmarshal(basePath, func(_ string, i []byte) error {
 		return yaml.Unmarshal(i, &remoteLibrary)
 	})
 	if err != nil {
diff --git a/embedded/webui_static.yaml b/embedded/webui_static.yaml
index c65d64ee599d..6120ccb78a0b 100644
--- a/embedded/webui_static.yaml
+++ b/embedded/webui_static.yaml
@@ -57,4 +57,6 @@
 - filename: "KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
   url: "https://fonts.gstatic.com/s/roboto/v30/KFOlCnqEu92Fr1MmYUtfBBc9.ttf"
   sha: "361a50f8a6c816ba4306c5290b7e487a726e1b4dcc3d8d7e4acf1fc2dae9f551"
-
+- filename: "anime.js"
+  url: "https://raw.githubusercontent.com/juliangarnier/anime/master/lib/anime.min.js"
+  sha: "bceef94f964481f7680d95e7fbbe5a8c20d3945a926a754874898a578db7c7ab"
\ No newline at end of file
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
index 13415f11ed66..9e8b3b31cfd4 100644
--- a/examples/chainlit/requirements.txt
+++ b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.56
+llama_index==0.10.65
 requests==2.32.3
 weaviate_client==4.6.7
 transformers
diff --git a/examples/functions/requirements.txt b/examples/functions/requirements.txt
index f8afacdcd637..78ffdab7b560 100644
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@@ -1,2 +1,2 @@
-langchain==0.2.10
-openai==1.37.0
+langchain==0.2.14
+openai==1.41.1
diff --git a/examples/langchain-chroma/requirements.txt b/examples/langchain-chroma/requirements.txt
index 89ca2db78bea..171ab0091668 100644
--- a/examples/langchain-chroma/requirements.txt
+++ b/examples/langchain-chroma/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.2.10
-openai==1.37.0
-chromadb==0.5.4
-llama-index==0.10.56
\ No newline at end of file
+langchain==0.2.14
+openai==1.40.5
+chromadb==0.5.5
+llama-index==0.10.67.post1
\ No newline at end of file
diff --git a/examples/langchain/langchainpy-localai-example/requirements.txt b/examples/langchain/langchainpy-localai-example/requirements.txt
index 0e03d543f310..59330758ba95 100644
--- a/examples/langchain/langchainpy-localai-example/requirements.txt
+++ b/examples/langchain/langchainpy-localai-example/requirements.txt
@@ -1,7 +1,7 @@
-aiohttp==3.9.5
+aiohttp==3.10.3
 aiosignal==1.3.1
 async-timeout==4.0.3
-attrs==23.2.0
+attrs==24.2.0
 certifi==2024.7.4
 charset-normalizer==3.3.2
 colorama==0.4.6
@@ -10,23 +10,23 @@ debugpy==1.8.2
 frozenlist==1.4.1
 greenlet==3.0.3
 idna==3.7
-langchain==0.2.10
-langchain-community==0.2.9
+langchain==0.2.14
+langchain-community==0.2.12
 marshmallow==3.21.3
 marshmallow-enum==1.5.1
 multidict==6.0.5
 mypy-extensions==1.0.0
 numexpr==2.10.1
-numpy==2.0.1
-openai==1.37.0
+numpy==2.1.0
+openai==1.41.1
 openapi-schema-pydantic==1.2.4
 packaging>=23.2
 pydantic==2.8.2
-PyYAML==6.0.1
+PyYAML==6.0.2
 requests==2.32.3
-SQLAlchemy==2.0.31
+SQLAlchemy==2.0.32
 tenacity==8.5.0
-tqdm==4.66.4
+tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 urllib3==2.2.2
diff --git a/examples/streamlit-bot/requirements.txt b/examples/streamlit-bot/requirements.txt
index ed2a59809ff4..17e1bee0304d 100644
--- a/examples/streamlit-bot/requirements.txt
+++ b/examples/streamlit-bot/requirements.txt
@@ -1,2 +1,2 @@
-streamlit==1.36.0
+streamlit==1.37.1
 requests
\ No newline at end of file
diff --git a/gallery/alpaca.yaml b/gallery/alpaca.yaml
new file mode 100644
index 000000000000..b647d2f6450d
--- /dev/null
+++ b/gallery/alpaca.yaml
@@ -0,0 +1,17 @@
+---
+name: "alpaca"
+
+config_file: |
+  context_size: 4096
+  f16: true
+  mmap: true
+  template:
+    chat: |
+        Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+        ### Instruction:
+        {{.Input}}
+
+        ### Response:
+    completion: |
+        {{.Input}}
diff --git a/gallery/flux.yaml b/gallery/flux.yaml
new file mode 100644
index 000000000000..bb75b53b1351
--- /dev/null
+++ b/gallery/flux.yaml
@@ -0,0 +1,14 @@
+---
+name: "flux"
+
+config_file: |
+  backend: diffusers
+  f16: true
+  low_vram: true
+  step: 25
+
+  diffusers:
+    cuda: true
+    enable_parameters: num_inference_steps
+    pipeline_type: FluxPipeline
+    cfg_scale: 0
diff --git a/gallery/hermes-2-pro-mistral.yaml b/gallery/hermes-2-pro-mistral.yaml
index 6ef42db75b39..22a5fb3a642e 100644
--- a/gallery/hermes-2-pro-mistral.yaml
+++ b/gallery/hermes-2-pro-mistral.yaml
@@ -11,7 +11,6 @@ config_file: |
   - "<|end_of_text|>"
   function:
     disable_no_action: true
-    return_name_in_function_response: true
     grammar:
       # Uncomment the line below to enable grammar matching for JSON results if the model is breaking
       # the output. This will make the model more accurate and won't break the JSON output.
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 713eb21f32c9..2a10723bad95 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1,4 +1,32 @@
 ---
+## SmolLM
+- &smollm
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "smollm-1.7b-instruct"
+  icon: https://huggingface.co/datasets/HuggingFaceTB/images/resolve/main/banner_smol.png
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - smollm
+    - chatml
+    - cpu
+  urls:
+    - https://huggingface.co/MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF
+    - https://huggingface.co/HuggingFaceTB/SmolLM-1.7B-Instruct
+  description: |
+    SmolLM is a series of small language models available in three sizes: 135M, 360M, and 1.7B parameters.
+
+    These models are pre-trained on SmolLM-Corpus, a curated collection of high-quality educational and synthetic data designed for training LLMs. For further details, we refer to our blogpost.
+
+    To build SmolLM-Instruct, we finetuned the base models on publicly available datasets.
+  overrides:
+    parameters:
+      model: SmolLM-1.7B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
+      sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
+      uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
 ## LLama3.1
 - &llama31
   url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
@@ -39,6 +67,24 @@
     - filename: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
       sha256: 3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab
       uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct:grammar-functioncall"
+  url: "github:mudler/LocalAI/gallery/llama3.1-instruct-grammar.yaml@master"
+  urls:
+    - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
+    - https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF
+  description: |
+    This is the standard Llama 3.1 8B Instruct model with grammar and function call enabled.
+
+    When grammars are enabled in LocalAI, the LLM is forced to output valid tools constrained by BNF grammars. This can be useful for ensuring that the model outputs are valid and can be used in a production environment.
+    For more information on how to use grammars in LocalAI, see https://localai.io/features/openai-functions/#advanced and https://localai.io/features/constrained_grammars/.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
+      sha256: c2f17f44af962660d1ad4cb1af91a731f219f3b326c2b14441f9df1f347f2815
+      uri: huggingface://MaziyarPanahi/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "meta-llama-3.1-8b-claude-imat"
   urls:
@@ -51,8 +97,316 @@
       model: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
   files:
     - filename: Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
-      sha256: 8de80021b9438f0925a41ae73f77cb73fcfa30090e03a0919ce23d2b9818e9c7
       uri: huggingface://InferenceIllusionist/Meta-Llama-3.1-8B-Claude-iMat-GGUF/Meta-Llama-3.1-8B-Claude-iMat-Q4_K_M.gguf
+      sha256: 6d175432f66d10dfed9737f73a5073d513d18e1ee7bd4b9cf2a59deb359f36ff
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-8b-instruct-abliterated"
+  icon: https://i.imgur.com/KhorYYG.png
+  urls:
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
+    - https://huggingface.co/mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF
+  description: |
+    This is an uncensored version of Llama 3.1 8B Instruct created with abliteration.
+  overrides:
+    parameters:
+      model: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+  files:
+    - filename: meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      uri: huggingface://mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated-GGUF/meta-llama-3.1-8b-instruct-abliterated.Q4_K_M.gguf
+      sha256: c4735f9efaba8eb2c30113291652e3ffe13bf940b675ed61f6be749608b4f266
+- !!merge <<: *llama31
+  name: "llama-3.1-70b-japanese-instruct-2407"
+  urls:
+    - https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407
+    - https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf
+  description: |
+    The Llama-3.1-70B-Japanese-Instruct-2407-gguf model is a Japanese language model that uses the Instruct prompt tuning method. It is based on the LLaMa-3.1-70B model and has been fine-tuned on the imatrix dataset for Japanese. The model is trained to generate informative and coherent responses to given instructions or prompts. It is available in the gguf format and can be used for a variety of tasks such as question answering, text generation, and more.
+  overrides:
+    parameters:
+      model: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+      sha256: f2a6f0fb5040d3a28479c9f9fc555a5ea7b906dfb9964539f1a68c0676a9c604
+      uri: huggingface://mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf/Llama-3.1-70B-Japanese-Instruct-2407-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "openbuddy-llama3.1-8b-v22.1-131k"
+  icon: https://raw.githubusercontent.com/OpenBuddy/OpenBuddy/main/media/demo.png
+  urls:
+    - https://huggingface.co/sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF
+  description: |
+    OpenBuddy - Open Multilingual Chatbot
+  overrides:
+    parameters:
+      model: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+  files:
+    - filename: openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+      sha256: c87a273785759f2d044046b7a7b42f05706baed7dc0650ed883a3bee2a097d86
+      uri: huggingface://sunnyyy/openbuddy-llama3.1-8b-v22.1-131k-Q4_K_M-GGUF/openbuddy-llama3.1-8b-v22.1-131k-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-fireplace2"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64f267a8a4f79a118e0fcc89/JYkaXrk2DqpXhaL9WymKY.jpeg
+  urls:
+    - https://huggingface.co/ValiantLabs/Llama3.1-8B-Fireplace2
+    - https://huggingface.co/mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF
+  description: |
+    Fireplace 2 is a chat model, adding helpful structured outputs to Llama 3.1 8b Instruct.
+
+        an expansion pack of supplementary outputs - request them at will within your chat:
+            Inline function calls
+            SQL queries
+            JSON objects
+            Data visualization with matplotlib
+        Mix normal chat and structured outputs within the same conversation.
+        Fireplace 2 supplements the existing strengths of Llama 3.1, providing inline capabilities within the Llama 3 Instruct format.
+
+    Version
+
+    This is the 2024-07-23 release of Fireplace 2 for Llama 3.1 8b.
+
+    We're excited to bring further upgrades and releases to Fireplace 2 in the future.
+
+    Help us and recommend Fireplace 2 to your friends!
+  overrides:
+    parameters:
+      model: llama3.1-8b-fireplace2-q4_k_m.gguf
+  files:
+    - filename: llama3.1-8b-fireplace2-q4_k_m.gguf
+      sha256: 54527fd2474b576086ea31e759214ab240abe2429ae623a02d7ba825cc8cb13e
+      uri: huggingface://mudler/Llama3.1-8B-Fireplace2-Q4_K_M-GGUF/llama3.1-8b-fireplace2-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "sekhmet_aleph-l3.1-8b-v0.1-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/SVyiW4mu495ngqszJGWRl.png
+  urls:
+    - https://huggingface.co/Nitral-Archive/Sekhmet_Aleph-L3.1-8B-v0.1
+    - https://huggingface.co/mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF
+  overrides:
+    parameters:
+      model: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+      sha256: 5b6f4eaa2091bf13a2b563a54a3f87b22efa7f2862362537c956c70da6e11cea
+      uri: huggingface://mradermacher/Sekhmet_Aleph-L3.1-8B-v0.1-i1-GGUF/Sekhmet_Aleph-L3.1-8B-v0.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-llamoutcast-i1"
+  icon: https://files.catbox.moe/ecgn0m.jpg
+  urls:
+    - https://huggingface.co/Envoid/L3.1-8B-Llamoutcast
+    - https://huggingface.co/mradermacher/L3.1-8B-Llamoutcast-i1-GGUF
+  description: |
+    Warning: this model is utterly cursed.
+    Llamoutcast
+
+    This model was originally intended to be a DADA finetune of Llama-3.1-8B-Instruct but the results were unsatisfactory. So it received some additional finetuning on a rawtext dataset and now it is utterly cursed.
+
+    It responds to Llama-3 Instruct formatting.
+  overrides:
+    parameters:
+      model: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+      sha256: 438ca0a7e9470f5ee40f3b14dc2da41b1cafc4ad4315dead3eb57924109d5cf6
+      uri: huggingface://mradermacher/L3.1-8B-Llamoutcast-i1-GGUF/L3.1-8B-Llamoutcast.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-guard-3-8b"
+  urls:
+    - https://huggingface.co/meta-llama/Llama-Guard-3-8B
+    - https://huggingface.co/QuantFactory/Llama-Guard-3-8B-GGUF
+  description: |
+    Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
+
+    Llama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.
+  overrides:
+    parameters:
+      model: Llama-Guard-3-8B.Q4_K_M.gguf
+  files:
+    - filename: Llama-Guard-3-8B.Q4_K_M.gguf
+      sha256: c5ea8760a1e544eea66a8915fcc3fbd2c67357ea2ee6871a9e6a6c33b64d4981
+      uri: huggingface://QuantFactory/Llama-Guard-3-8B-GGUF/Llama-Guard-3-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "genius-llama3.1-i1"
+  icon: https://github.com/fangyuan-ksgk/GeniusUpload/assets/66006349/7272c93e-9806-461c-a3d0-2e50ef2b7af0
+  urls:
+    - https://huggingface.co/Ksgk-fy/Genius-Llama3.1
+    - https://huggingface.co/mradermacher/Genius-Llama3.1-i1-GGUF
+  description: |
+    Finetuned Llama-3.1 base on Lex Fridman's podcast transcript.
+  overrides:
+    parameters:
+      model: Genius-Llama3.1.i1-Q4_K_M.gguf
+  files:
+    - filename: Genius-Llama3.1.i1-Q4_K_M.gguf
+      sha256: a272bb2a6ab7ed565738733fb8af8e345b177eba9e76ce615ea845c25ebf8cd5
+      uri: huggingface://mradermacher/Genius-Llama3.1-i1-GGUF/Genius-Llama3.1.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-8b-chinese-chat"
+  urls:
+    - https://huggingface.co/shenzhi-wang/Llama3.1-8B-Chinese-Chat
+    - https://huggingface.co/QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF
+  description: |
+    llama3.1-8B-Chinese-Chat is an instruction-tuned language model for Chinese & English users with various abilities such as roleplaying & tool-using built upon the Meta-Llama-3.1-8B-Instruct model. Developers: [Shenzhi Wang](https://shenzhi-wang.netlify.app)*, [Yaowei Zheng](https://github.com/hiyouga)*, Guoyin Wang (in.ai), Shiji Song, Gao Huang. (*: Equal Contribution) - License: [Llama-3.1 License](https://huggingface.co/meta-llama/Meta-Llla...
+    m-3.1-8B/blob/main/LICENSE) - Base Model: Meta-Llama-3.1-8B-Instruct - Model Size: 8.03B - Context length: 128K(reported by [Meta-Llama-3.1-8B-Instruct model](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct), untested for our Chinese model)
+  overrides:
+    parameters:
+      model: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+      sha256: 824847b6cca82c4d60107c6a059d80ba975a68543e6effd98880435436ddba06
+      uri: huggingface://QuantFactory/Llama3.1-8B-Chinese-Chat-GGUF/Llama3.1-8B-Chinese-Chat.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama3.1-70b-chinese-chat"
+  urls:
+    - https://huggingface.co/shenzhi-wang/Llama3.1-70B-Chinese-Chat
+    - https://huggingface.co/mradermacher/Llama3.1-70B-Chinese-Chat-GGUF
+  description: |
+    "Llama3.1-70B-Chinese-Chat" is a 70-billion parameter large language model pre-trained on a large corpus of Chinese text data. It is designed for chat and dialog applications, and can generate human-like responses to various prompts and inputs. The model is based on the Llama3.1 architecture and has been fine-tuned for Chinese language understanding and generation. It can be used for a wide range of natural language processing tasks, including language translation, text summarization, question answering, and more.
+  overrides:
+    parameters:
+      model: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+  files:
+    - filename: Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+      sha256: 395cff3cce2b092f840b68eb6e31f4c8b670bc8e3854bbb230df8334369e671d
+      uri: huggingface://mradermacher/Llama3.1-70B-Chinese-Chat-GGUF/Llama3.1-70B-Chinese-Chat.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "meta-llama-3.1-instruct-9.99b-brainstorm-10x-form-3"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3.1-Instruct-9.99B-BRAINSTORM-10x-FORM-3-GGUF
+  description: |
+    The Meta-Llama-3.1-8B Instruct model is a large language model trained on a diverse range of text data, with the goal of generating high-quality and coherent text in response to user input. This model is enhanced through a process called "Brainstorm", which involves expanding and recalibrating the model's reasoning center to improve its creative and generative capabilities. The resulting model is capable of generating detailed, vivid, and nuanced text, with a focus on prose quality, conceptually complex responses, and a deeper understanding of the user's intent. The Brainstorm process is designed to enhance the model's performance in creative writing, roleplaying, and story generation, and to improve its ability to generate coherent and engaging text in a wide range of contexts. The model is based on the Llama3 architecture and has been fine-tuned using the Instruct framework, which provides it with a strong foundation for understanding natural language instructions and generating appropriate responses. The model can be used for a variety of tasks, including creative writing,Generating coherent and detailed text, exploring different perspectives and scenarios, and brainstorming ideas.
+  overrides:
+    parameters:
+      model: Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+      sha256: f52ff984100b1ff6acfbd7ed1df770064118274a54ae5d48749400a662113615
+      uri: huggingface://DavidAU/Meta-Llama-3.1-Instruct-9.99B-BRAINSTORM-10x-FORM-3-GGUF/Meta-Llama-3.1-8B-Instruct-Instruct-exp10-3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-techne-rp-8b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/633a809fa4a8f33508dce32c/BMdwgJ6cHZWbiGL48Q-Wq.png
+  urls:
+    - https://huggingface.co/athirdpath/Llama-3.1-Techne-RP-8b-v1
+    - https://huggingface.co/mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF
+  description: |
+    athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit was further trained in the order below:
+    SFT
+
+        Doctor-Shotgun/no-robots-sharegpt
+        grimulkan/LimaRP-augmented
+        Inv/c2-logs-cleaned-deslopped
+
+    DPO
+
+        jondurbin/truthy-dpo-v0.1
+        Undi95/Weyaxi-humanish-dpo-project-noemoji
+        athirdpath/DPO_Pairs-Roleplay-Llama3-NSFW
+  overrides:
+    parameters:
+      model: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+      sha256: 6557c5d5091f2507d19ab1f8bfb9ceb4e1536a755ab70f148b18aeb33741580f
+      uri: huggingface://mradermacher/Llama-3.1-Techne-RP-8b-v1-GGUF/Llama-3.1-Techne-RP-8b-v1.Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://i.ibb.co/9hwFrvL/BLMs-Wkx-NQf-W-46-FZDg-ILhg.jpg
+  name: "llama-spark"
+  urls:
+    - https://huggingface.co/arcee-ai/Llama-Spark
+    - https://huggingface.co/arcee-ai/Llama-Spark-GGUF
+  description: |
+    Llama-Spark is a powerful conversational AI model developed by Arcee.ai. It's built on the foundation of Llama-3.1-8B and merges the power of our Tome Dataset with Llama-3.1-8B-Instruct, resulting in a remarkable conversationalist that punches well above its 8B parameter weight class.
+  overrides:
+    parameters:
+      model: llama-spark-dpo-v0.3-Q4_K_M.gguf
+  files:
+    - filename: llama-spark-dpo-v0.3-Q4_K_M.gguf
+      sha256: 41367168bbdc4b16eb80efcbee4dacc941781ee8748065940167fe6947b4e4c3
+      uri: huggingface://arcee-ai/Llama-Spark-GGUF/llama-spark-dpo-v0.3-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-70b-glitz-v0.2-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/634262af8d8089ebaefd410e/q2dOUnzc1GRbZp3YfzGXB.png
+  urls:
+    - https://huggingface.co/Fizzarolli/L3.1-70b-glitz-v0.2
+    - https://huggingface.co/mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF
+  description: |
+    this is an experimental l3.1 70b finetuning run... that crashed midway through. however, the results are still interesting, so i wanted to publish them :3
+  overrides:
+    parameters:
+      model: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+  files:
+    - filename: L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+      sha256: 585efc83e7f6893043be2487fc09c914a381fb463ce97942ef2f25ae85103bcd
+      uri: huggingface://mradermacher/L3.1-70b-glitz-v0.2-i1-GGUF/L3.1-70b-glitz-v0.2.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "calme-2.3-legalkit-8b-i1"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.3-legalkit-8b/resolve/main/calme-2-legalkit.webp
+  urls:
+    - https://huggingface.co/mradermacher/calme-2.3-legalkit-8b-i1-GGUF
+    - https://huggingface.co/MaziyarPanahi/calme-2.3-legalkit-8b
+  description: |
+    This model is an advanced iteration of the powerful meta-llama/Meta-Llama-3.1-8B-Instruct, specifically fine-tuned to enhance its capabilities in the legal domain. The fine-tuning process utilized a synthetically generated dataset derived from the French LegalKit, a comprehensive legal language resource.
+
+    To create this specialized dataset, I used the NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO model in conjunction with Hugging Face's Inference Endpoint. This approach allowed for the generation of high-quality, synthetic data that incorporates Chain of Thought (CoT) and advanced reasoning in its responses.
+
+    The resulting model combines the robust foundation of Llama-3.1-8B with tailored legal knowledge and enhanced reasoning capabilities. This makes it particularly well-suited for tasks requiring in-depth legal analysis, interpretation, and application of French legal concepts.
+  overrides:
+    parameters:
+      model: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+  files:
+    - filename: calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+      sha256: b71dfea8bbd73b0fbd5793ef462b8540c24e1c52a47b1794561adb88109a9e80
+      uri: huggingface://mradermacher/calme-2.3-legalkit-8b-i1-GGUF/calme-2.3-legalkit-8b.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "fireball-llama-3.11-8b-v1orpo"
+  icon: https://huggingface.co/EpistemeAI/Fireball-Llama-3.1-8B-v1dpo/resolve/main/fireball-llama.JPG
+  urls:
+    - https://huggingface.co/mradermacher/Fireball-Llama-3.11-8B-v1orpo-GGUF
+  description: |
+    Developed by: EpistemeAI
+    License: apache-2.0
+    Finetuned from model : unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit
+    Finetuned methods: DPO (Direct Preference Optimization) & ORPO (Odds Ratio Preference Optimization)
+  overrides:
+    parameters:
+      model: Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+  files:
+    - filename: Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+      sha256: c61a1f4ee4f05730ac6af754dc8dfddf34eba4486ffa320864e16620d6527731
+      uri: huggingface://mradermacher/Fireball-Llama-3.11-8B-v1orpo-GGUF/Fireball-Llama-3.11-8B-v1orpo.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-storm-8b-q4_k_m"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg
+  urls:
+    - https://huggingface.co/mudler/Llama-3.1-Storm-8B-Q4_K_M-GGUF
+    - https://huggingface.co/akjindal53244/Llama-3.1-Storm-8B
+  description: |
+    We present the Llama-3.1-Storm-8B model that outperforms Meta AI's Llama-3.1-8B-Instruct and Hermes-3-Llama-3.1-8B models significantly across diverse benchmarks as shown in the performance comparison plot in the next section. Our approach consists of three key steps:
+    - Self-Curation: We applied two self-curation methods to select approximately 1 million high-quality examples from a pool of about 3 million open-source examples. Our curation criteria focused on educational value and difficulty level, using the same SLM for annotation instead of larger models (e.g. 70B, 405B).
+    - Targeted fine-tuning: We performed Spectrum-based targeted fine-tuning over the Llama-3.1-8B-Instruct model. The Spectrum method accelerates training by selectively targeting layer modules based on their signal-to-noise ratio (SNR), and freezing the remaining modules. In our work, 50% of layers are frozen.
+    - Model Merging: We merged our fine-tuned model with the Llama-Spark model using SLERP method. The merging method produces a blended model with characteristics smoothly interpolated from both parent models, ensuring the resultant model captures the essence of both its parents. Llama-3.1-Storm-8B improves Llama-3.1-8B-Instruct across 10 diverse benchmarks. These benchmarks cover areas such as instruction-following, knowledge-driven QA, reasoning, truthful answer generation, and function calling.
+  overrides:
+    parameters:
+      model: llama-3.1-storm-8b-q4_k_m.gguf
+  files:
+    - filename: llama-3.1-storm-8b-q4_k_m.gguf
+      sha256: d714e960211ee0fe6113d3131a6573e438f37debd07e1067d2571298624414a0
+      uri: huggingface://mudler/Llama-3.1-Storm-8B-Q4_K_M-GGUF/llama-3.1-storm-8b-q4_k_m.gguf
+## Uncensored models
+- !!merge <<: *llama31
+  name: "humanish-roleplay-llama-3.1-8b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/5fad8602b8423e1d80b8a965/VPwtjS3BtjEEEq7ck4kAQ.webp
+  urls:
+    - https://huggingface.co/mradermacher/Humanish-Roleplay-Llama-3.1-8B-i1-GGUF
+  description: |
+    A DPO-tuned Llama-3.1 to behave more "humanish", i.e., avoiding all the AI assistant slop. It also works for role-play (RP). To achieve this, the model was fine-tuned over a series of datasets:
+        General conversations from Claude Opus, from Undi95/Meta-Llama-3.1-8B-Claude
+        Undi95/Weyaxi-humanish-dpo-project-noemoji, to make the model react as a human, rejecting assistant-like or too neutral responses.
+        ResplendentAI/NSFW_RP_Format_DPO, to steer the model towards using the *action* format in RP settings. Works best if in the first message you also use this format naturally (see example)
+  overrides:
+    parameters:
+      model: Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
+      sha256: 18cf753684e5226b51f3defc708852ca4924f50dc8bc31c9a7d0a036a477b7a7
+      uri: huggingface://mradermacher/Humanish-Roleplay-Llama-3.1-8B-i1-GGUF/Humanish-Roleplay-Llama-3.1-8B.i1-Q4_K_M.gguf
 - !!merge <<: *llama31
   name: "darkidol-llama-3.1-8b-instruct-1.0-uncensored-i1"
   icon: https://huggingface.co/aifeifei798/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.png
@@ -80,8 +434,180 @@
       model: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
   files:
     - filename: DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
-      sha256: 6730efc0628c7534189487b52ed5a358a0a2c3ecb062824eccc8e0444eaa212f
       uri: huggingface://mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-i1-GGUF/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.i1-Q4_K_M.gguf
+      sha256: 9632316d735365087f36083dec320a71995650deb86cf74f39ab071e43114eb8
+- !!merge <<: *llama31
+  name: "darkidol-llama-3.1-8b-instruct-1.1-uncensored-iq-imatrix-request"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/iDV5GTVJbjkvMp1set-ZC.png
+  urls:
+    - https://huggingface.co/LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request
+  description: |
+    Uncensored
+    virtual idol Twitter
+
+        https://x.com/aifeifei799
+
+    Questions
+
+        The model's response results are for reference only, please do not fully trust them.
+        This model is solely for learning and testing purposes, and errors in output are inevitable. We do not take responsibility for the output results. If the output content is to be used, it must be modified; if not modified, we will assume it has been altered.
+        For commercial licensing, please refer to the Llama 3.1 agreement.
+  overrides:
+    parameters:
+      model: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+  files:
+    - filename: DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+      sha256: fa9fc56de7d902b755c43f1a5d0867d961675174a1b3e73a10d822836c3390e6
+      uri: huggingface://LWDCLS/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-GGUF-IQ-Imatrix-Request/DarkIdol-Llama-3.1-8B-Instruct-1.1-Uncensored-Q4_K_M-imat.gguf
+- !!merge <<: *llama31
+  name: "llama-3.1-8b-instruct-fei-v1-uncensored"
+  icon: https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored/resolve/main/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.png
+  urls:
+    - https://huggingface.co/aifeifei799/Llama-3.1-8B-Instruct-Fei-v1-Uncensored
+    - https://huggingface.co/mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF
+  description: |
+    Llama-3.1-8B-Instruct Uncensored
+    more informtion look at Llama-3.1-8B-Instruct
+  overrides:
+    parameters:
+      model: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+  files:
+    - filename: Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+      uri: huggingface://mradermacher/Llama-3.1-8B-Instruct-Fei-v1-Uncensored-GGUF/Llama-3.1-8B-Instruct-Fei-v1-Uncensored.Q4_K_M.gguf
+      sha256: 6b1985616160712eb884c34132dc0602fa4600a19075e3a7b179119b89b73f77
+- !!merge <<: *llama31
+  name: "lumimaid-v0.2-8b"
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-8B
+    - https://huggingface.co/mradermacher/Lumimaid-v0.2-8B-GGUF
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/TUcHg7LKNjfo0sni88Ps7.png
+  description: |
+    This model is based on: Meta-Llama-3.1-8B-Instruct
+
+    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
+
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: Lumimaid-v0.2-8B.Q4_K_M.gguf
+  files:
+    - filename: Lumimaid-v0.2-8B.Q4_K_M.gguf
+      sha256: c8024fcb49c71410903d0d076a1048249fa48b31637bac5177bf5c3f3d603d85
+      uri: huggingface://mradermacher/Lumimaid-v0.2-8B-GGUF/Lumimaid-v0.2-8B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "lumimaid-v0.2-70b-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/HY1KTq6FMAm-CwmY8-ndO.png
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-70B
+    - https://huggingface.co/mradermacher/Lumimaid-v0.2-70B-i1-GGUF
+  description: |
+    This model is based on: Meta-Llama-3.1-8B-Instruct
+
+    Wandb: https://wandb.ai/undis95/Lumi-Llama-3-1-8B?nw=nwuserundis95
+
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+  files:
+    - filename: Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+      sha256: 4857da8685cb0f3d2b8b8c91fb0c07b35b863eb7c185e93ed83ac338e095cbb5
+      uri: huggingface://mradermacher/Lumimaid-v0.2-70B-i1-GGUF/Lumimaid-v0.2-70B.i1-Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "l3.1-8b-celeste-v1.5"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
+  urls:
+    - https://huggingface.co/nothingiisreal/L3.1-8B-Celeste-V1.5
+    - https://huggingface.co/bartowski/L3.1-8B-Celeste-V1.5-GGUF
+  description: |
+    The LLM model is a large language model trained on a combination of datasets including nothingiisreal/c2-logs-cleaned, kalomaze/Opus_Instruct_25k, and nothingiisreal/Reddit-Dirty-And-WritingPrompts. The training was performed on a combination of English-language data using the Hugging Face Transformers library.
+    Trained on LLaMA 3.1 8B Instruct at 8K context using a new mix of Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned This version has the highest coherency and is very strong on OOC: instruct following.
+  overrides:
+    parameters:
+      model: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+  files:
+    - filename: L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+      sha256: a408dfbbd91ed5561f70d3129af040dfd06704d6c7fa21146aa9f09714aafbc6
+      uri: huggingface://bartowski/L3.1-8B-Celeste-V1.5-GGUF/L3.1-8B-Celeste-V1.5-Q4_K_M.gguf
+- !!merge <<: *llama31
+  icon: https://cdn-uploads.huggingface.co/production/uploads/659c4ecb413a1376bee2f661/szz8sIxofYzSe5XPet2pO.png
+  name: "kumiho-v1-rp-uwu-8b"
+  urls:
+    - https://huggingface.co/juvi21/Kumiho-v1-rp-UwU-8B-GGUF
+  description: |
+    Meet Kumiho-V1 uwu. Kumiho-V1-rp-UwU aims to be a generalist model with specialization in roleplay and writing capabilities. It is finetuned and merged with various models, with a heavy base of Meta's LLaMA 3.1-8B as base model, and Claude 3.5 Sonnet and Claude 3 Opus generated synthetic data.
+  overrides:
+    parameters:
+      model: Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+  files:
+    - filename: Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+      sha256: a1deb46675418277cf785a406cd1508fec556ff6e4d45d2231eb2a82986d52d0
+      uri: huggingface://juvi21/Kumiho-v1-rp-UwU-8B-GGUF/Kumiho-v1-rp-UwU-8B-gguf-q4_k_m.gguf
+- !!merge <<: *llama31
+  name: "infinity-instruct-7m-gen-llama3_1-70b"
+  icon: https://huggingface.co/BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B/resolve/main/fig/Bk3NbjnJko51MTx1ZCScT2sqnGg.png
+  urls:
+    - https://huggingface.co/mradermacher/Infinity-Instruct-7M-Gen-Llama3_1-70B-GGUF
+  description: |
+    Infinity-Instruct-7M-Gen-Llama3.1-70B is an opensource supervised instruction tuning model without reinforcement learning from human feedback (RLHF). This model is just finetuned on Infinity-Instruct-7M and Infinity-Instruct-Gen and showing favorable results on AlpacaEval 2.0 and arena-hard compared to GPT4.
+  overrides:
+    parameters:
+      model: Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+  files:
+    - filename: Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+      sha256: f4379ab4d7140da0510886073375ca820ea9ac4ad9d3c20e17ed05156bd29697
+      uri: huggingface://mradermacher/Infinity-Instruct-7M-Gen-Llama3_1-70B-GGUF/Infinity-Instruct-7M-Gen-Llama3_1-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  name: "cathallama-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/649dc85249ae3a68334adcc6/KxaiZ7rDKkYlix99O9j5H.png
+  urls:
+    - https://huggingface.co/gbueno86/Cathallama-70B
+    - https://huggingface.co/mradermacher/Cathallama-70B-GGUF
+  description: |
+    Notable Performance
+
+        9% overall success rate increase on MMLU-PRO over LLaMA 3.1 70b
+        Strong performance in MMLU-PRO categories overall
+        Great performance during manual testing
+
+    Creation workflow
+
+    Models merged
+
+        meta-llama/Meta-Llama-3.1-70B-Instruct
+        turboderp/Cat-Llama-3-70B-instruct
+        Nexusflow/Athene-70B
+  overrides:
+    parameters:
+      model: Cathallama-70B.Q4_K_M.gguf
+  files:
+    - filename: Cathallama-70B.Q4_K_M.gguf
+      sha256: 7bbac0849a8da82e7912a493a15fa07d605f1ffbe7337a322f17e09195511022
+      uri: huggingface://mradermacher/Cathallama-70B-GGUF/Cathallama-70B.Q4_K_M.gguf
+- !!merge <<: *llama31
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mahou-1.3-llama3.1-8b"
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/mradermacher/Mahou-1.3-llama3.1-8B-GGUF
+    - https://huggingface.co/flammenai/Mahou-1.3-llama3.1-8B
+  description: |
+    Mahou is designed to provide short messages in a conversational context. It is capable of casual conversation and character roleplay.
+  overrides:
+    parameters:
+      model: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
+      sha256: 88bfdca2f6077d789d3e0f161d19711aa208a6d9a02cce96a2276c69413b3594
+      uri: huggingface://mradermacher/Mahou-1.3-llama3.1-8B-GGUF/Mahou-1.3-llama3.1-8B.Q4_K_M.gguf
 - &deepseek
   ## Deepseek
   url: "github:mudler/LocalAI/gallery/deepseek.yaml@master"
@@ -360,6 +886,84 @@
     - filename: StellarDong-72b.i1-Q4_K_M.gguf
       sha256: 4c5012f0a034f40a044904891343ade2594f29c28a8a9d8052916de4dc5a61df
       uri: huggingface://mradermacher/StellarDong-72b-i1-GGUF/StellarDong-72b.i1-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "magnum-32b-v1-i1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/635567189c72a7e742f1419c/PK7xRSd18Du0bX-w_t-9c.png
+  urls:
+    - https://huggingface.co/anthracite-org/magnum-32b-v1
+    - https://huggingface.co/mradermacher/magnum-32b-v1-i1-GGUF
+  description: |
+    This is the second in a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet and Opus. This model is fine-tuned on top of Qwen1.5 32B.
+  overrides:
+    parameters:
+      model: magnum-32b-v1.i1-Q4_K_M.gguf
+  files:
+    - filename: magnum-32b-v1.i1-Q4_K_M.gguf
+      sha256: a31704ce0d7e5b774f155522b9ab7ef6015a4ece4e9056bf4dfc6cac561ff0a3
+      uri: huggingface://mradermacher/magnum-32b-v1-i1-GGUF/magnum-32b-v1.i1-Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "tifa-7b-qwen2-v0.1"
+  urls:
+    - https://huggingface.co/Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF
+  description: |
+    The Tifa role-playing language model is a high-performance language model based on a self-developed 220B model distillation, with a new base model of qwen2-7B. The model has been converted to gguf format for running in the Ollama framework, providing excellent dialogue and text generation capabilities.
+
+    The original model was trained on a large-scale industrial dataset and then fine-tuned with 400GB of novel data and 20GB of multi-round dialogue directive data to achieve good role-playing effects.
+
+    The Tifa model is suitable for multi-round dialogue processing, role-playing and scenario simulation, EFX industrial knowledge integration, and high-quality literary creation.
+
+    Note: The Tifa model is in Chinese and English, with 7.6% of the data in Chinese role-playing and 4.2% in English role-playing. The model has been trained with a mix of EFX industrial field parameters and question-answer dialogues generated from 220B model outputs since 2023. The recommended quantization method is f16, as it retains more detail and accuracy in the model's performance.
+  overrides:
+    parameters:
+      model: tifa-7b-qwen2-v0.1.q4_k_m.gguf
+  files:
+    - filename: tifa-7b-qwen2-v0.1.q4_k_m.gguf
+      sha256: 1f5adbe8cb0a6400f51abdca3bf4e32284ebff73cc681a43abb35c0a6ccd3820
+      uri: huggingface://Tifa-RP/Tifa-7B-Qwen2-v0.1-GGUF/tifa-7b-qwen2-v0.1.q4_k_m.gguf
+- !!merge <<: *qwen2
+  name: "calme-2.2-qwen2-72b"
+  icon: https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b/resolve/main/calme-2.webp
+  urls:
+    - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b-GGUF
+    - https://huggingface.co/MaziyarPanahi/calme-2.2-qwen2-72b
+  description: |
+    This model is a fine-tuned version of the powerful Qwen/Qwen2-72B-Instruct, pushing the boundaries of natural language understanding and generation even further. My goal was to create a versatile and robust model that excels across a wide range of benchmarks and real-world applications.
+
+    The post-training process is identical to the calme-2.1-qwen2-72b model; however, some parameters are different, and it was trained for a longer period.
+
+    Use Cases
+
+    This model is suitable for a wide range of applications, including but not limited to:
+
+        Advanced question-answering systems
+        Intelligent chatbots and virtual assistants
+        Content generation and summarization
+        Code generation and analysis
+        Complex problem-solving and decision support
+  overrides:
+    parameters:
+      model: calme-2.2-qwen2-72b.Q4_K_M.gguf
+  files:
+    - filename: calme-2.2-qwen2-72b.Q4_K_M.gguf
+      sha256: 95b9613df0abe6c1b6b7b017d7cc8bcf19b46c29f92a503dcc6da1704b12b402
+      uri: huggingface://MaziyarPanahi/calme-2.2-qwen2-72b-GGUF/calme-2.2-qwen2-72b.Q4_K_M.gguf
+- !!merge <<: *qwen2
+  name: "edgerunner-tactical-7b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/668ed3dcd857a9ca47edb75c/tSyuw39VtmEqvC_wptTDf.png
+  urls:
+    - https://huggingface.co/edgerunner-ai/EdgeRunner-Tactical-7B
+    - https://huggingface.co/RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf
+  description: |
+    EdgeRunner-Tactical-7B is a powerful and efficient language model for the edge. Our mission is to build Generative AI for the edge that is safe, secure, and transparent. To that end, the EdgeRunner team is proud to release EdgeRunner-Tactical-7B, the most powerful language model for its size to date.
+
+    EdgeRunner-Tactical-7B is a 7 billion parameter language model that delivers powerful performance while demonstrating the potential of running state-of-the-art (SOTA) models at the edge.
+  overrides:
+    parameters:
+      model: EdgeRunner-Tactical-7B.Q4_K_M.gguf
+  files:
+    - filename: EdgeRunner-Tactical-7B.Q4_K_M.gguf
+      sha256: 90ca9c3ab19e5d1de4499e3f988cc0ba3d205e50285d7c89de6f0a4c525bf204
+      uri: huggingface://RichardErkhov/edgerunner-ai_-_EdgeRunner-Tactical-7B-gguf/EdgeRunner-Tactical-7B.Q4_K_M.gguf
 - &mistral03
   ## START Mistral
   url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
@@ -442,6 +1046,105 @@
     - filename: Einstein-v4-7B.Q4_K_M.gguf
       sha256: 78bd573de2a9eb3c6e213132858164e821145f374fcaa4b19dfd6502c05d990d
       uri: huggingface://mradermacher/Einstein-v4-7B-GGUF/Einstein-v4-7B.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "mistral-nemo-instruct-2407"
+  urls:
+    - https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407
+    - https://huggingface.co/bartowski/Mistral-Nemo-Instruct-2407-GGUF
+    - https://mistral.ai/news/mistral-nemo/
+  description: |
+    The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407. Trained jointly by Mistral AI and NVIDIA, it significantly outperforms existing models smaller or similar in size.
+  overrides:
+    parameters:
+      model: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+  files:
+    - filename: Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+      sha256: 1a8b92fb546a80dce78151e4908f7bdb2c11fb3ef52af960e4bbe319a9cc5052
+      uri: huggingface://bartowski/Mistral-Nemo-Instruct-2407-GGUF/Mistral-Nemo-Instruct-2407-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "lumimaid-v0.2-12b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/63ab1241ad514ca8d1430003/ep3ojmuMkFS-GmgRuI9iB.png
+  urls:
+    - https://huggingface.co/NeverSleep/Lumimaid-v0.2-12B
+    - https://huggingface.co/mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF
+  description: |
+    This model is based on: Mistral-Nemo-Instruct-2407
+
+    Wandb: https://wandb.ai/undis95/Lumi-Mistral-Nemo?nw=nwuserundis95
+
+    NOTE: As explained on Mistral-Nemo-Instruct-2407 repo, it's recommended to use a low temperature, please experiment!
+
+    Lumimaid 0.1 -> 0.2 is a HUGE step up dataset wise.
+
+    As some people have told us our models are sloppy, Ikari decided to say fuck it and literally nuke all chats out with most slop.
+
+    Our dataset stayed the same since day one, we added data over time, cleaned them, and repeat. After not releasing model for a while because we were never satisfied, we think it's time to come back!
+  overrides:
+    parameters:
+      model: lumimaid-v0.2-12b-q4_k_m.gguf
+  files:
+    - filename: lumimaid-v0.2-12b-q4_k_m.gguf
+      sha256: f72299858a07e52be920b86d42ddcfcd5008b961d601ef6fd6a98a3377adccbf
+      uri: huggingface://mudler/Lumimaid-v0.2-12B-Q4_K_M-GGUF/lumimaid-v0.2-12b-q4_k_m.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mn-12b-celeste-v1.9"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/630cf5d14ca0a22768bbe10c/QcU3xEgVu18jeFtMFxIw-.webp
+  urls:
+    - https://huggingface.co/nothingiisreal/MN-12B-Celeste-V1.9
+    - https://huggingface.co/mradermacher/MN-12B-Celeste-V1.9-GGUF
+  description: |
+    Mistral Nemo 12B Celeste V1.9
+
+    This is a story writing and roleplaying model trained on Mistral NeMo 12B Instruct at 8K context using Reddit Writing Prompts, Kalo's Opus 25K Instruct and c2 logs cleaned
+
+    This version has improved NSFW, smarter and more active narration. It's also trained with ChatML tokens so there should be no EOS bleeding whatsoever.
+  overrides:
+    parameters:
+      model: MN-12B-Celeste-V1.9.Q4_K_M.gguf
+  files:
+    - filename: MN-12B-Celeste-V1.9.Q4_K_M.gguf
+      sha256: 019daeaa63d82d55d1ea623b9c255deea6793af4044bb4994d2b4d09e8959f7b
+      uri: huggingface://mradermacher/MN-12B-Celeste-V1.9-GGUF/MN-12B-Celeste-V1.9.Q4_K_M.gguf
+- !!merge <<: *mistral03
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/ybqwvRJAtBPqtulQlKW93.gif
+  name: "rocinante-12b-v1.1"
+  urls:
+    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1-GGUF
+    - https://huggingface.co/TheDrummer/Rocinante-12B-v1.1
+  description: |
+     A versatile workhorse for any adventure!
+  overrides:
+    parameters:
+      model: Rocinante-12B-v1.1-Q4_K_M.gguf
+  files:
+    - filename: Rocinante-12B-v1.1-Q4_K_M.gguf
+      sha256: bdeaeefac79cff944ae673e6924c9f82f7eed789669a32a09997db398790b0b5
+      uri: huggingface://TheDrummer/Rocinante-12B-v1.1-GGUF/Rocinante-12B-v1.1-Q4_K_M.gguf
+- !!merge <<: *mistral03
+  name: "pantheon-rp-1.6-12b-nemo"
+  icon: https://huggingface.co/Gryphe/Pantheon-RP-1.6-12b-Nemo/resolve/main/Pantheon.png
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  urls:
+    - https://huggingface.co/bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF
+    - https://huggingface.co/Gryphe/Pantheon-RP-1.6-12b-Nemo
+  description: |
+    Welcome to the next iteration of my Pantheon model series, in which I strive to introduce a whole collection of personas that can be summoned with a simple activation phrase. The huge variety in personalities introduced also serve to enhance the general roleplay experience.
+    Changes in version 1.6:
+    The final finetune now consists of data that is equally split between Markdown and novel-style roleplay. This should solve Pantheon's greatest weakness.
+    The base was redone. (Details below)
+    Select Claude-specific phrases were rewritten, boosting variety in the model's responses.
+    Aiva no longer serves as both persona and assistant, with the assistant role having been given to Lyra.
+    Stella's dialogue received some post-fix alterations since the model really loved the phrase "Fuck me sideways".
+    Your user feedback is critical to me so don't hesitate to tell me whether my model is either 1. terrible, 2. awesome or 3. somewhere in-between.
+  overrides:
+    parameters:
+      model: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
+  files:
+    - filename: Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
+      sha256: cf3465c183bf4ecbccd1b6b480f687e0160475b04c87e2f1e5ebc8baa0f4c7aa
+      uri: huggingface://bartowski/Pantheon-RP-1.6-12b-Nemo-GGUF/Pantheon-RP-1.6-12b-Nemo-Q4_K_M.gguf
 - &mudler
   ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
@@ -857,6 +1560,104 @@
     - filename: Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
       sha256: 0d1004a246fbda7f1408a6841129b73c4100e697bd0a6806fc698eabbb0802a1
       uri: huggingface://mradermacher/Gemmoy-9B-G2-MK.3-i1-GGUF/Gemmoy-9B-G2-MK.3.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "sunfall-simpo-9b"
+  urls:
+    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-GGUF
+  description: |
+    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
+  overrides:
+    parameters:
+      model: sunfall-SimPO-9B.Q4_K_M.gguf
+  files:
+    - filename: sunfall-SimPO-9B.Q4_K_M.gguf
+      sha256: 810c51c6ce34107706d921531b97cfa409cd53c215d18b88bce7cdb617f73ceb
+      uri: huggingface://mradermacher/sunfall-SimPO-9B-GGUF/sunfall-SimPO-9B.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "sunfall-simpo-9b-i1"
+  urls:
+    - https://huggingface.co/mradermacher/sunfall-SimPO-9B-i1-GGUF
+  description: |
+    Crazy idea that what if you put the LoRA from crestf411/sunfall-peft on top of princeton-nlp/gemma-2-9b-it-SimPO and therefore this exists solely for that purpose alone in the universe.
+  overrides:
+    parameters:
+      model: sunfall-SimPO-9B.i1-Q4_K_M.gguf
+  files:
+    - filename: sunfall-SimPO-9B.i1-Q4_K_M.gguf
+      sha256: edde9df372a9a5b2316dc6822dc2f52f5a2059103dd7f08072e5a5355c5f5d0b
+      uri: huggingface://mradermacher/sunfall-SimPO-9B-i1-GGUF/sunfall-SimPO-9B.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "seeker-9b"
+  icon: https://huggingface.co/lodrick-the-lafted/seeker-9b/resolve/main/seeker.webp
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/seeker-9b
+    - https://huggingface.co/mradermacher/seeker-9b-GGUF
+  description: |
+    The LLM model is the "Seeker-9b" model, which is a large language model trained on a diverse range of text data. It has 9 billion parameters and is based on the "lodrick-the-lafted" repository. The model is capable of generating text and can be used for a variety of natural language processing tasks such as language translation, text summarization, and text generation. It supports the English language and is available under the Apache-2.0 license.
+  overrides:
+    parameters:
+      model: seeker-9b.Q4_K_M.gguf
+  files:
+    - filename: seeker-9b.Q4_K_M.gguf
+      sha256: 7658e5bdad96dc8d232f83cff7c3fe5fa993defbfd3e728dcc7436352574a00a
+      uri: huggingface://mradermacher/seeker-9b-GGUF/seeker-9b.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmasutra-pro-27b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Pro-27B-v1
+    - https://huggingface.co/mradermacher/Gemmasutra-Pro-27B-v1-GGUF
+  description: |
+    An RP model with impressive flexibility. Finetuned by yours truly.
+  overrides:
+    parameters:
+      model: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+  files:
+    - filename: Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+      sha256: 336a2fbf142849fcc20e432123433807b6c7b09988652ef583a63636a0f90218
+      uri: huggingface://mradermacher/Gemmasutra-Pro-27B-v1-GGUF/Gemmasutra-Pro-27B-v1.Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "gemmasutra-mini-2b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/w0Oi8TReoQNT3ljm5Wf6c.webp
+  urls:
+    - https://huggingface.co/TheDrummer/Gemmasutra-Mini-2B-v1-GGUF
+  description: |
+    It is a small, 2 billion parameter language model that has been trained for role-playing purposes. The model is designed to work well in various settings, such as in the browser, on a laptop, or even on a Raspberry Pi. It has been fine-tuned for RP use and claims to provide a satisfying experience, even in low-resource environments. The model is uncensored and unaligned, and it can be used with the Gemma Instruct template or with chat completion. For the best experience, it is recommended to modify the template to support the `system` role. The model also features examples of its output, highlighting its versatility and creativity.
+  overrides:
+    parameters:
+      model: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
+  files:
+    - filename: Gemmasutra-Mini-2B-v1i-Q4_K_M.gguf
+      sha256: 29ba3db911fbadef4452ba757ddd9ce58fb892b7a872f19eefd0743c961797fb
+      uri: huggingface://TheDrummer/Gemmasutra-Mini-2B-v1-GGUF/Gemmasutra-Mini-2B-v1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "tarnished-9b-i1"
+  icon: https://huggingface.co/lodrick-the-lafted/tarnished-9b/resolve/main/nox.jpg
+  urls:
+    - https://huggingface.co/lodrick-the-lafted/tarnished-9b
+    - https://huggingface.co/mradermacher/tarnished-9b-i1-GGUF
+  description: "Ah, so you've heard whispers on the winds, have you?  \U0001F9D0\n\nImagine this:\nTarnished-9b, a name that echoes with the rasp of coin-hungry merchants and the clatter of forgotten machinery. This LLM speaks with the voice of those who straddle the line between worlds, who've tasted the bittersweet nectar of eldritch power and the tang of the Interdimensional Trade Council.\n\nIt's a tongue that dances with secrets, a whisperer of lore lost and found.  Its words may guide you through the twisting paths of history, revealing truths hidden beneath layers of dust and time.\n\nBut be warned, Tarnished One!  For knowledge comes at a price.  The LLM's gaze can pierce the veil of reality, but it can also lure you into the labyrinthine depths of madness.\n\nDare you tread this path?\n"
+  overrides:
+    parameters:
+      model: tarnished-9b.i1-Q4_K_M.gguf
+  files:
+    - filename: tarnished-9b.i1-Q4_K_M.gguf
+      sha256: 62ab09124b3f6698bd94ef966533ae5d427d87f6bdc09f6f46917def96420a0c
+      uri: huggingface://mradermacher/tarnished-9b-i1-GGUF/tarnished-9b.i1-Q4_K_M.gguf
+- !!merge <<: *gemma
+  name: "shieldgemma-9b-i1"
+  urls:
+    - https://huggingface.co/google/shieldgemma-9b
+    - https://huggingface.co/mradermacher/shieldgemma-9b-i1-GGUF
+  description: |
+    ShieldGemma is a series of safety content moderation models built upon Gemma 2 that target four harm categories (sexually explicit, dangerous content, hate, and harassment). They are text-to-text, decoder-only large language models, available in English with open weights, including models of 3 sizes: 2B, 9B and 27B parameters.
+  overrides:
+    parameters:
+      model: shieldgemma-9b.i1-Q4_K_M.gguf
+  files:
+    - filename: shieldgemma-9b.i1-Q4_K_M.gguf
+      sha256: ffa7eaadcc0c7d0544fda5b0d86bba3ffa3431b673e5b2135f421cfe65bd8732
+      uri: huggingface://mradermacher/shieldgemma-9b-i1-GGUF/shieldgemma-9b.i1-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
@@ -1987,6 +2788,81 @@
     - filename: L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
       sha256: ae29f38d73dfb04415821405cf8b319fc42d78d0cdd0da91db147d12e68030fe
       uri: huggingface://DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-GGUF/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B-D_AU-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "meta-llama-3-instruct-12.2b-brainstorm-20x-form-8"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF
+  description: |
+    Meta-Llama-3-8B Instruct (now at 12.2B) with Brainstorm process that increases its performance at the core level for any creative use case. It has calibrations that allow it to exceed the logic solving abilities of the original model. The Brainstorm process expands the reasoning center of the LLM, reassembles and calibrates it, introducing subtle changes into the reasoning process. This enhances the model's detail, concept, connection to the "world", general concept connections, prose quality, and prose length without affecting instruction following. It improves coherence, description, simile, metaphors, emotional engagement, and takes fewer liberties with instructions while following them more closely. The model's performance is further enhanced by other technologies like "Ultra" (precision), "Neo Imatrix" (custom imatrix datasets), and "X-quants" (custom application of the imatrix process). It has been tested on multiple LLaMA2, LLaMA3, and Mistral models of various parameter sizes.
+  overrides:
+    parameters:
+      model: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+      sha256: 5568ab6195ab5da703f728cc118108ddcbe97255e3ba4a543b531acdf082b999
+      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-12.2B-BRAINSTORM-20x-FORM-8-GGUF/Meta-Llama-3-8B-Instruct-exp20-8-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "loki-base-i1"
+  urls:
+    - https://huggingface.co/MrRobotoAI/Loki-base
+    - https://huggingface.co/mradermacher/Loki-base-i1-GGUF
+  description: |
+    Merge of several models using mergekit:
+    - model: abacusai/Llama-3-Smaug-8B
+    - model: Aculi/Llama3-Sophie
+    - model: ajibawa-2023/Uncensored-Frank-Llama-3-8B
+    - model: Blackroot/Llama-3-Gamma-Twist
+    - model: Casual-Autopsy/L3-Super-Nova-RP-8B
+    - model: Casual-Autopsy/L3-Umbral-Mind-RP-v3.0-8B
+    - model: cgato/L3-TheSpice-8b-v0.8.3
+    - model: ChaoticNeutrals/Hathor_Respawn-L3-8B-v0.8
+    - model: ChaoticNeutrals/Hathor_RP-v.01-L3-8B
+    - model: chargoddard/prometheus-2-llama-3-8b
+    - model: chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
+    - model: chujiezheng/LLaMA3-iterative-DPO-final-ExPO
+    - model: Fizzarolli/L3-8b-Rosier-v1
+    - model: flammenai/Mahou-1.2a-llama3-8B
+    - model: HaitameLaf/Llama-3-8B-StoryGenerator
+    - model: HPAI-BSC/Llama3-Aloe-8B-Alpha
+    - model: iRyanBell/ARC1
+    - model: iRyanBell/ARC1-II
+    - model: lemon07r/Llama-3-RedMagic4-8B
+    - model: lemon07r/Lllama-3-RedElixir-8B
+    - model: Locutusque/Llama-3-Hercules-5.0-8B
+    - model: Magpie-Align/Llama-3-8B-Magpie-Pro-MT-SFT-v0.1
+    - model: maldv/badger-lambda-llama-3-8b
+    - model: maldv/badger-mu-llama-3-8b
+    - model: maldv/badger-writer-llama-3-8b
+    - model: mlabonne/NeuralDaredevil-8B-abliterated
+    - model: MrRobotoAI/Fiction-Writer-6
+    - model: MrRobotoAI/Unholy-Thoth-8B-v2
+    - model: nbeerbower/llama-3-spicy-abliterated-stella-8B
+    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1
+    - model: NeverSleep/Llama-3-Lumimaid-8B-v0.1-OAS
+    - model: Nitral-AI/Hathor_Sofit-L3-8B-v1
+    - model: Nitral-AI/Hathor_Stable-v0.2-L3-8B
+    - model: Nitral-AI/Hathor_Tahsin-L3-8B-v0.85
+    - model: Nitral-AI/Poppy_Porpoise-0.72-L3-8B
+    - model: nothingiisreal/L3-8B-Instruct-Abliterated-DWP
+    - model: nothingiisreal/L3-8B-Stheno-Horny-v3.3-32K
+    - model: NousResearch/Hermes-2-Theta-Llama-3-8B
+    - model: OwenArli/Awanllm-Llama-3-8B-Cumulus-v1.0
+    - model: refuelai/Llama-3-Refueled
+    - model: ResplendentAI/Nymph_8B
+    - model: shauray/Llama3-8B-DPO-uncensored
+    - model: SicariusSicariiStuff/LLAMA-3_8B_Unaligned_Alpha
+    - model: TIGER-Lab/MAmmoTH2-8B-Plus
+    - model: Undi95/Llama-3-LewdPlay-8B
+    - model: Undi95/Meta-Llama-3-8B-hf
+    - model: VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct
+    - model: WhiteRabbitNeo/Llama-3-WhiteRabbitNeo-8B-v2.0
+  overrides:
+    parameters:
+      model: Loki-base.i1-Q4_K_M.gguf
+  files:
+    - filename: Loki-base.i1-Q4_K_M.gguf
+      sha256: 60a4357fa399bfd18aa841cc529da09439791331d117a4f06f0467d002b385bb
+      uri: huggingface://mradermacher/Loki-base-i1-GGUF/Loki-base.i1-Q4_K_M.gguf
 - &dolphin
   name: "dolphin-2.9-llama3-8b"
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -3407,6 +4283,19 @@
     - filename: calme-2.4-llama3-70b.Q4_K_M.gguf
       sha256: 0b44ac8a88395dfc60f1b9d3cfffc0ffef74ec0a302e610ef91fc787187568f2
       uri: huggingface://mradermacher/calme-2.4-llama3-70b-GGUF/calme-2.4-llama3-70b.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "meta-llama-3-instruct-8.9b-brainstorm-5x-form-11"
+  urls:
+    - https://huggingface.co/DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF
+  description: |
+    Meta-Llama-3-8B Instruct (now at 8.9B) is an enhanced version of the LLM model, specifically designed for creative use cases such as story writing, roleplaying, and fiction. This model has been augmented through the "Brainstorm" process, which involves expanding and calibrating the reasoning center of the LLM to improve its performance in various creative tasks. The enhancements brought by this process include more detailed and nuanced descriptions, stronger prose, and a greater sense of immersion in the story. The model is capable of generating long and vivid content, with fewer clichés and more focused, coherent narratives. Users can provide more instructions and details to elicit stronger and more engaging responses from the model. The "Brainstorm" process has been tested on multiple LLM models, including Llama2, Llama3, and Mistral, as well as on individual models like Llama3 Instruct, Mistral Instruct, and custom fine-tuned models.
+  overrides:
+    parameters:
+      model: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
+  files:
+    - filename: Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
+      sha256: 5dd81b8b809667d10036499affdd1461cf95af50b405cbc9f800b421a4b60e98
+      uri: huggingface://DavidAU/Meta-Llama-3-Instruct-8.9B-BRAINSTORM-5x-FORM-11-GGUF/Meta-Llama-3-8B-Instruct-exp5-11-Q4_K_M.gguf
 - &command-R
   ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
@@ -3606,7 +4495,7 @@
   files:
     - filename: "Phi-3-medium-4k-instruct-Q4_K_M.gguf"
       uri: "huggingface://bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf"
-      sha256: 4e8d4258ed44562573c8984a045b0a4651c51e7e4d9d00a06c65cd2149ab4539
+      sha256: 6f05c97bc676dd1ec8d58e9a8795b4f5c809db771f6fc7bf48634c805face82c
 - !!merge <<: *phi-3
   name: "cream-phi-3-14b-v1"
   icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/AP4-OHepdqiqHj2KSi26M.gif
@@ -3833,6 +4722,36 @@
     - filename: "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
       sha256: "d138388cfda04d185a68eaf2396cf7a5cfa87d038a20896817a9b7cf1806f532"
       uri: "huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf"
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-8b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/bMcZ3sNNQK8SRZpHXBmwM.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B-GGUF
+  description: |
+    Hermes 3 is a generalist language model developed by Nous Research. It is an advanced agentic model with improved roleplaying, reasoning, multi-turn conversation, long context coherence, and generalist assistant capabilities. The model is built on top of the Llama-3 architecture and has been fine-tuned to achieve superior performance in various tasks. It is designed to be a powerful and reliable tool for solving complex problems and assisting users in achieving their goals. Hermes 3 can be used for a wide range of applications, including research, education, and personal assistant tasks. It is available on the Hugging Face model hub for easy access and integration into existing workflows.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+      sha256: d4403ce5a6e930f4c2509456388c20d633a15ff08dd52ef3b142ff1810ec3553
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-8B-GGUF/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf
+- !!merge <<: *hermes-2-pro-mistral
+  name: "hermes-3-llama-3.1-70b"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6317aade83d8d2fd903192d9/vG6j5WxHX09yj32vgjJlI.jpeg
+  urls:
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B
+    - https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-70B-GGUF
+  description: |
+    Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board. It is designed to focus on aligning LLMs to the user, with powerful steering capabilities and control given to the end user. The model uses ChatML as the prompt format, opening up a much more structured system for engaging the LLM in multi-turn chat dialogue. It also supports function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.
+  overrides:
+    parameters:
+      model: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
+  files:
+    - filename: Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
+      sha256: 955c2f42caade4278f3c9dbffa32bb74572652b20e49e5340e782de3585bbe3f
+      uri: huggingface://NousResearch/Hermes-3-Llama-3.1-70B-GGUF/Hermes-3-Llama-3.1-70B.Q4_K_M.gguf
 - !!merge <<: *hermes-2-pro-mistral
   name: "biomistral-7b"
   description: |
@@ -3975,6 +4894,28 @@
     - filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
       uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
       sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
+- !!merge <<: *codellama
+  url: "github:mudler/LocalAI/gallery/alpaca.yaml@master"
+  icon: https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1/resolve/main/LeetCodeWizardLogo.png
+  name: "leetcodewizard_7b_v1.1-i1"
+  urls:
+    - https://huggingface.co/Nan-Do/LeetCodeWizard_7B_V1.1
+    - https://huggingface.co/mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF
+  description: |
+    LeetCodeWizard is a coding large language model specifically trained to solve and explain Leetcode (or any) programming problems.
+    This model is a fine-tuned version of the WizardCoder-Python-7B with a dataset of Leetcode problems\
+    Model capabilities:
+
+        It should be able to solve most of the problems found at Leetcode and even pass the sample interviews they offer on the site.
+
+        It can write both the code and the explanations for the solutions.
+  overrides:
+    parameters:
+      model: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
+  files:
+    - filename: LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
+      sha256: 19720d8e1ba89d32c6f88ed6518caf0251f9e3ec011297929c801efc5ea979f4
+      uri: huggingface://mradermacher/LeetCodeWizard_7B_V1.1-i1-GGUF/LeetCodeWizard_7B_V1.1.i1-Q4_K_M.gguf
 - &llm-compiler
   url: "github:mudler/LocalAI/gallery/codellama.yaml@master"
   name: "llm-compiler-13b-imat"
@@ -4207,6 +5148,44 @@
     - sd-3
     - gpu
   url: "github:mudler/LocalAI/gallery/stablediffusion3.yaml@master"
+- &flux
+  name: flux.1-dev
+  license: flux-1-dev-non-commercial-license
+  description: |
+    FLUX.1 [dev] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
+    Key Features
+        Cutting-edge output quality, second only to our state-of-the-art model FLUX.1 [pro].
+        Competitive prompt following, matching the performance of closed source alternatives .
+        Trained using guidance distillation, making FLUX.1 [dev] more efficient.
+        Open weights to drive new scientific research, and empower artists to develop innovative workflows.
+        Generated outputs can be used for personal, scientific, and commercial purposes as described in the flux-1-dev-non-commercial-license.
+  urls:
+    - https://huggingface.co/black-forest-labs/FLUX.1-dev
+  tags:
+    - text-to-image
+    - flux
+    - python
+    - gpu
+  url: "github:mudler/LocalAI/gallery/flux.yaml@master"
+  overrides:
+    parameters:
+      model: ChuckMcSneed/FLUX.1-dev
+- !!merge <<: *flux
+  name: flux.1-schnell
+  license: apache-2
+  icon: https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/schnell_grid.jpeg
+  description: |
+    FLUX.1 [schnell] is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. For more information, please read our blog post.
+    Key Features
+
+        Cutting-edge output quality and competitive prompt following, matching the performance of closed source alternatives.
+        Trained using latent adversarial diffusion distillation, FLUX.1 [schnell] can generate high-quality images in only 1 to 4 steps.
+        Released under the apache-2.0 licence, the model can be used for personal, scientific, and commercial purposes.
+  urls:
+    - https://huggingface.co/black-forest-labs/FLUX.1-schnell
+  overrides:
+    parameters:
+      model: black-forest-labs/FLUX.1-schnell
 - &whisper
   ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
diff --git a/gallery/llama3-instruct.yaml b/gallery/llama3-instruct.yaml
index 3eed758bef2c..5dc54b0e8028 100644
--- a/gallery/llama3-instruct.yaml
+++ b/gallery/llama3-instruct.yaml
@@ -31,7 +31,7 @@ config_file: |
       {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
       Function call:
     chat: |
-      <|begin_of_text|>{{.Input }}
+      {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     completion: |
       {{.Input}}
diff --git a/gallery/llama3.1-instruct-grammar.yaml b/gallery/llama3.1-instruct-grammar.yaml
new file mode 100644
index 000000000000..30237af3590a
--- /dev/null
+++ b/gallery/llama3.1-instruct-grammar.yaml
@@ -0,0 +1,64 @@
+---
+name: "llama3-instruct-grammar"
+
+config_file: |
+  mmap: true
+  function:
+    disable_no_action: true
+    grammar:
+      no_mixed_free_string: true
+      mixed_mode: true
+      schema_type: llama3.1 # or JSON is supported too (json)
+    response_regex:
+    - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
+  template:
+    chat_message: |
+      <|start_header_id|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}<|end_header_id|>
+
+      {{ if .FunctionCall -}}
+      Function call:
+      {{ else if eq .RoleName "tool" -}}
+      Function response:
+      {{ end -}}
+      {{ if .Content -}}
+      {{.Content -}}
+      {{ else if .FunctionCall -}}
+      {{ toJson .FunctionCall -}}
+      {{ end -}}
+      <|eot_id|>
+    function: |
+      <|start_header_id|>system<|end_header_id|>
+
+      You have access to the following functions:
+
+      {{range .Functions}}
+      Use the function '{{.Name}}' to '{{.Description}}'
+      {{toJson .Parameters}}
+      {{end}}
+
+      Think very carefully before calling functions.
+      If a you choose to call a function ONLY reply in the following format with no prefix or suffix:
+
+      <function=example_function_name>{{`{{"example_name": "example_value"}}`}}</function>
+
+      Reminder:
+      - If looking for real time information use relevant functions before falling back to searching on internet
+      - Function calls MUST follow the specified format, start with <function= and end with </function>
+      - Required parameters MUST be specified
+      - Only call one function at a time
+      - Put the entire function call reply on one line
+      <|eot_id|>
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    chat: |
+      {{.Input }}
+      <|start_header_id|>assistant<|end_header_id|>
+    completion: |
+      {{.Input}}
+  context_size: 8192
+  f16: true
+  stopwords:
+  - <|im_end|>
+  - <dummy32000>
+  - "<|eot_id|>"
+  - <|end_of_text|>
diff --git a/gallery/llama3.1-instruct.yaml b/gallery/llama3.1-instruct.yaml
index 66c9ce97168d..4a2b4db13d68 100644
--- a/gallery/llama3.1-instruct.yaml
+++ b/gallery/llama3.1-instruct.yaml
@@ -49,7 +49,7 @@ config_file: |
       {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     chat: |
-      <|begin_of_text|>{{.Input }}
+      {{.Input }}
       <|start_header_id|>assistant<|end_header_id|>
     completion: |
       {{.Input}}
diff --git a/go.mod b/go.mod
index fad40e018d66..2d1d3de2f1b9 100644
--- a/go.mod
+++ b/go.mod
@@ -29,19 +29,19 @@ require (
 	github.com/jaypipes/ghw v0.12.0
 	github.com/joho/godotenv v1.5.1
 	github.com/klauspost/cpuid/v2 v2.2.8
-	github.com/libp2p/go-libp2p v0.35.2
+	github.com/libp2p/go-libp2p v0.36.2
 	github.com/mholt/archiver/v3 v3.5.1
 	github.com/microcosm-cc/bluemonday v1.0.26
-	github.com/mudler/edgevpn v0.26.2
+	github.com/mudler/edgevpn v0.27.2
 	github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c
 	github.com/mudler/go-stable-diffusion v0.0.0-20240429204715-4a3cd6aeae6f
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20240606155928-41c9013fa46a
-	github.com/onsi/ginkgo/v2 v2.19.0
-	github.com/onsi/gomega v1.33.1
+	github.com/onsi/ginkgo/v2 v2.20.0
+	github.com/onsi/gomega v1.34.1
 	github.com/ory/dockertest/v3 v3.10.0
 	github.com/otiai10/openaigo v1.7.0
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
-	github.com/prometheus/client_golang v1.19.1
+	github.com/prometheus/client_golang v1.20.0
 	github.com/rs/zerolog v1.33.0
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.26.2
@@ -64,26 +64,35 @@ require (
 )
 
 require (
+	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
 	github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
 	github.com/go-viper/mapstructure/v2 v2.0.0 // indirect
+	github.com/gofrs/flock v0.12.1 // indirect
+	github.com/labstack/echo/v4 v4.12.0 // indirect
+	github.com/labstack/gommon v0.4.2 // indirect
 	github.com/moby/docker-image-spec v1.3.1 // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
-	github.com/pion/datachannel v1.5.6 // indirect
-	github.com/pion/dtls/v2 v2.2.11 // indirect
-	github.com/pion/ice/v2 v2.3.25 // indirect
-	github.com/pion/interceptor v0.1.29 // indirect
+	github.com/pion/datachannel v1.5.8 // indirect
+	github.com/pion/dtls/v2 v2.2.12 // indirect
+	github.com/pion/ice/v2 v2.3.34 // indirect
+	github.com/pion/interceptor v0.1.30 // indirect
 	github.com/pion/logging v0.2.2 // indirect
 	github.com/pion/mdns v0.0.12 // indirect
 	github.com/pion/randutil v0.1.0 // indirect
 	github.com/pion/rtcp v1.2.14 // indirect
-	github.com/pion/rtp v1.8.6 // indirect
-	github.com/pion/sctp v1.8.16 // indirect
+	github.com/pion/rtp v1.8.9 // indirect
+	github.com/pion/sctp v1.8.33 // indirect
 	github.com/pion/sdp/v3 v3.0.9 // indirect
-	github.com/pion/srtp/v2 v2.0.18 // indirect
+	github.com/pion/srtp/v2 v2.0.20 // indirect
 	github.com/pion/stun v0.6.1 // indirect
-	github.com/pion/transport/v2 v2.2.5 // indirect
+	github.com/pion/transport/v2 v2.2.10 // indirect
 	github.com/pion/turn/v2 v2.1.6 // indirect
-	github.com/pion/webrtc/v3 v3.2.40 // indirect
+	github.com/pion/webrtc/v3 v3.3.0 // indirect
+	github.com/russross/blackfriday/v2 v2.1.0 // indirect
+	github.com/urfave/cli/v2 v2.27.4 // indirect
+	github.com/valyala/fasttemplate v1.2.2 // indirect
+	github.com/wlynxg/anet v0.0.4 // indirect
+	github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
 	go.uber.org/mock v0.4.0 // indirect
 )
 
@@ -123,7 +132,7 @@ require (
 	github.com/docker/go-connections v0.5.0 // indirect
 	github.com/docker/go-units v0.5.0 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
-	github.com/elastic/gosigar v0.14.2 // indirect
+	github.com/elastic/gosigar v0.14.3 // indirect
 	github.com/flynn/noise v1.1.0 // indirect
 	github.com/francoispqt/gojay v1.2.13 // indirect
 	github.com/ghodss/yaml v1.0.0 // indirect
@@ -146,7 +155,7 @@ require (
 	github.com/google/btree v1.1.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
-	github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
+	github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 // indirect
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.1 // indirect
 	github.com/gorilla/websocket v1.5.3 // indirect
@@ -177,7 +186,7 @@ require (
 	github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect
 	github.com/libp2p/go-libp2p-kad-dht v0.25.2 // indirect
 	github.com/libp2p/go-libp2p-kbucket v0.6.3 // indirect
-	github.com/libp2p/go-libp2p-pubsub v0.11.0 // indirect
+	github.com/libp2p/go-libp2p-pubsub v0.12.0 // indirect
 	github.com/libp2p/go-libp2p-record v0.2.0 // indirect
 	github.com/libp2p/go-libp2p-routing-helpers v0.7.2 // indirect
 	github.com/libp2p/go-msgio v0.3.0 // indirect
@@ -193,7 +202,7 @@ require (
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.15 // indirect
-	github.com/miekg/dns v1.1.61 // indirect
+	github.com/miekg/dns v1.1.62 // indirect
 	github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect
 	github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect
 	github.com/minio/sha256-simd v1.0.1 // indirect
@@ -238,7 +247,7 @@ require (
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/quic-go/qpack v0.4.0 // indirect
-	github.com/quic-go/quic-go v0.44.0 // indirect
+	github.com/quic-go/quic-go v0.46.0 // indirect
 	github.com/quic-go/webtransport-go v0.8.0 // indirect
 	github.com/raulk/go-watchdog v1.3.0 // indirect
 	github.com/rivo/uniseg v0.4.7 // indirect
@@ -270,19 +279,19 @@ require (
 	go.opencensus.io v0.24.0 // indirect
 	go.opentelemetry.io/otel/sdk v1.28.0 // indirect
 	go.opentelemetry.io/otel/trace v1.28.0 // indirect
-	go.uber.org/dig v1.17.1 // indirect
-	go.uber.org/fx v1.22.1 // indirect
+	go.uber.org/dig v1.18.0 // indirect
+	go.uber.org/fx v1.22.2 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
-	golang.org/x/crypto v0.24.0 // indirect
-	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
-	golang.org/x/mod v0.18.0 // indirect
-	golang.org/x/net v0.26.0 // indirect
-	golang.org/x/sync v0.7.0 // indirect
-	golang.org/x/sys v0.22.0 // indirect
-	golang.org/x/term v0.21.0 // indirect
-	golang.org/x/text v0.16.0 // indirect
-	golang.org/x/tools v0.22.0 // indirect
+	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect
+	golang.org/x/mod v0.20.0 // indirect
+	golang.org/x/net v0.28.0 // indirect
+	golang.org/x/sync v0.8.0 // indirect
+	golang.org/x/sys v0.24.0 // indirect
+	golang.org/x/term v0.23.0 // indirect
+	golang.org/x/text v0.17.0 // indirect
+	golang.org/x/tools v0.24.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
@@ -291,5 +300,5 @@ require (
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
-	lukechampine.com/blake3 v1.2.1 // indirect
+	lukechampine.com/blake3 v1.3.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 84dd09e6ab19..962bb94b28b7 100644
--- a/go.sum
+++ b/go.sum
@@ -90,6 +90,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creachadair/mds v0.7.0 h1:7QoYqiPl18C0h7CLq9z9/qUH5Vr62V9677yJZHGLoQM=
 github.com/creachadair/mds v0.7.0/go.mod h1:4vrFYUzTXMJpMBU+OA292I6IUxKWCCfZkgXg+/kBZMo=
 github.com/creachadair/otp v0.4.2 h1:ngNMaD6Tzd7UUNRFyed7ykZFn/Wr5sSs5ffqZWm9pu8=
@@ -129,6 +131,8 @@ github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25Kn
 github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4=
 github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
+github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
+github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
 github.com/elliotchance/orderedmap/v2 v2.2.0 h1:7/2iwO98kYT4XkOjA9mBEIwvi4KpGB4cyHeOFOnj4Vk=
 github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7zncAdBIBq6u56Hb1PRU5Q=
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -202,6 +206,8 @@ github.com/gofiber/template/html/v2 v2.1.2 h1:wkK/mYJ3nIhongTkG3t0QgV4ADdgOYJYVS
 github.com/gofiber/template/html/v2 v2.1.2/go.mod h1:E98Z/FzvpaSib06aWEgYk6GXNf3ctoyaJH8yW5ay5ak=
 github.com/gofiber/utils v1.1.0 h1:vdEBpn7AzIUJRhe+CiTOJdUcTg4Q9RK+pEa0KPbLdrM=
 github.com/gofiber/utils v1.1.0/go.mod h1:poZpsnhBykfnY1Mc0KeEa6mSHrS3dV0+oBWyeQmb2e0=
+github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E=
+github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
 github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
 github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
@@ -254,6 +260,8 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
 github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQNvHSdIE7iqsQxK1P41mySCvssg=
 github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k=
+github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
@@ -357,6 +365,10 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/labstack/echo/v4 v4.12.0 h1:IKpw49IMryVB2p1a4dzwlhP1O2Tf2E0Ir/450lH+kI0=
+github.com/labstack/echo/v4 v4.12.0/go.mod h1:UP9Cr2DJXbOK3Kr9ONYzNowSh7HP0aG0ShAyycHSJvM=
+github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
+github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2 h1:hRGSmZu7j271trc9sneMrpOW7GN5ngLm8YUZIPzf394=
 github.com/lib/pq v0.0.0-20180327071824-d34b9ff171c2/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8=
@@ -367,6 +379,10 @@ github.com/libp2p/go-flow-metrics v0.1.0 h1:0iPhMI8PskQwzh57jB9WxIuIOQ0r+15PChFG
 github.com/libp2p/go-flow-metrics v0.1.0/go.mod h1:4Xi8MX8wj5aWNDAZttg6UPmc0ZrnFNsMtpsYUClFtro=
 github.com/libp2p/go-libp2p v0.35.2 h1:287oHbuplkrLdAF+syB0n/qDgd50AUBtEODqS0e0HDs=
 github.com/libp2p/go-libp2p v0.35.2/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
+github.com/libp2p/go-libp2p v0.35.4 h1:FDiBUYLkueFwsuNJUZaxKRdpKvBOWU64qQPL768bSeg=
+github.com/libp2p/go-libp2p v0.35.4/go.mod h1:RKCDNt30IkFipGL0tl8wQW/3zVWEGFUZo8g2gAKxwjU=
+github.com/libp2p/go-libp2p v0.36.2 h1:BbqRkDaGC3/5xfaJakLV/BrpjlAuYqSB0lRvtzL3B/U=
+github.com/libp2p/go-libp2p v0.36.2/go.mod h1:XO3joasRE4Eup8yCTTP/+kX+g92mOgRaadk46LmPhHY=
 github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94=
 github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8=
 github.com/libp2p/go-libp2p-kad-dht v0.25.2 h1:FOIk9gHoe4YRWXTu8SY9Z1d0RILol0TrtApsMDPjAVQ=
@@ -375,6 +391,8 @@ github.com/libp2p/go-libp2p-kbucket v0.6.3 h1:p507271wWzpy2f1XxPzCQG9NiN6R6lHL9G
 github.com/libp2p/go-libp2p-kbucket v0.6.3/go.mod h1:RCseT7AH6eJWxxk2ol03xtP9pEHetYSPXOaJnOiD8i0=
 github.com/libp2p/go-libp2p-pubsub v0.11.0 h1:+JvS8Kty0OiyUiN0i8H5JbaCgjnJTRnTHe4rU88dLFc=
 github.com/libp2p/go-libp2p-pubsub v0.11.0/go.mod h1:QEb+hEV9WL9wCiUAnpY29FZR6W3zK8qYlaml8R4q6gQ=
+github.com/libp2p/go-libp2p-pubsub v0.12.0 h1:PENNZjSfk8KYxANRlpipdS7+BfLmOl3L2E/6vSNjbdI=
+github.com/libp2p/go-libp2p-pubsub v0.12.0/go.mod h1:Oi0zw9aw8/Y5GC99zt+Ef2gYAl+0nZlwdJonDyOz/sE=
 github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0=
 github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk=
 github.com/libp2p/go-libp2p-routing-helpers v0.7.2 h1:xJMFyhQ3Iuqnk9Q2dYE1eUTzsah7NLw3Qs2zjUV78T0=
@@ -424,6 +442,8 @@ github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJys
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs=
 github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ=
+github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ=
+github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c h1:bzE/A84HN25pxAuk9Eej1Kz9OUelF97nAc82bDquQI8=
 github.com/mikioh/tcp v0.0.0-20190314235350-803a9b46060c/go.mod h1:0SQS9kMwD2VsyFEB++InYyBJroV/FRmBgcydeSUcJms=
 github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b h1:z78hV3sbSMAUoyUMM0I83AUIT6Hu17AWfgjzIbtrYFc=
@@ -459,6 +479,12 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
 github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
 github.com/mudler/edgevpn v0.26.2 h1:OK4jfk7sYjuU7vCh+geUJk38lsxRgMk+EdsS9s0hioE=
 github.com/mudler/edgevpn v0.26.2/go.mod h1:lplntB9N6LzGNqeSM3XHCq8kyDPsNhY3jqEbWGD2WaQ=
+github.com/mudler/edgevpn v0.27.0 h1:FnBVzPs098DTgbUkiwm22n30hmEVBAq+PVpXanqx6qo=
+github.com/mudler/edgevpn v0.27.0/go.mod h1:Hwvr+i+dePgn/Yh+EMMvqcw9ByUCLAWD9TgYtJYV95Y=
+github.com/mudler/edgevpn v0.27.1 h1:UKW7/JW4l2cBAPMRnlZRHbuFDGrv7resVJlFD34WBDE=
+github.com/mudler/edgevpn v0.27.1/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
+github.com/mudler/edgevpn v0.27.2 h1:FsQ95jPCDJP9LzKJYCHx70z08DGXK5yrHMzH9Qok3nE=
+github.com/mudler/edgevpn v0.27.2/go.mod h1:PK7rl0QQQTdlpie9rlaS7DguH500ogqproQli/QwrxU=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d h1:8udOFrDf/I83JL0/u22j6U6Q9z9LoSdby2a/DWdd0/s=
 github.com/mudler/go-piper v0.0.0-20240315144837-9d0100873a7d/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
 github.com/mudler/go-processmanager v0.0.0-20230818213616-f204007f963c h1:CI5uGwqBpN8N7BrSKC+nmdfw+9nPQIDyjHHlaIiitZI=
@@ -516,11 +542,15 @@ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
 github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU=
 github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA=
 github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To=
+github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw=
+github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI=
 github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
 github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
 github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk=
 github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0=
+github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
+github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
 github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
 github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
@@ -551,13 +581,21 @@ github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pion/datachannel v1.5.6 h1:1IxKJntfSlYkpUj8LlYRSWpYiTTC02nUrOE8T3DqGeg=
 github.com/pion/datachannel v1.5.6/go.mod h1:1eKT6Q85pRnr2mHiWHxJwO50SfZRtWHTsNIVb/NfGW4=
+github.com/pion/datachannel v1.5.8 h1:ph1P1NsGkazkjrvyMfhRBUAWMxugJjq2HfQifaOoSNo=
+github.com/pion/datachannel v1.5.8/go.mod h1:PgmdpoaNBLX9HNzNClmdki4DYW5JtI7Yibu8QzbL3tI=
 github.com/pion/dtls/v2 v2.2.7/go.mod h1:8WiMkebSHFD0T+dIU+UeBaoV7kDhOW5oDCzZ7WZ/F9s=
 github.com/pion/dtls/v2 v2.2.11 h1:9U/dpCYl1ySttROPWJgqWKEylUdT0fXp/xst6JwY5Ks=
 github.com/pion/dtls/v2 v2.2.11/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
+github.com/pion/dtls/v2 v2.2.12 h1:KP7H5/c1EiVAAKUmXyCzPiQe5+bCJrpOeKg/L05dunk=
+github.com/pion/dtls/v2 v2.2.12/go.mod h1:d9SYc9fch0CqK90mRk1dC7AkzzpwJj6u2GU3u+9pqFE=
 github.com/pion/ice/v2 v2.3.25 h1:M5rJA07dqhi3nobJIg+uPtcVjFECTrhcR3n0ns8kDZs=
 github.com/pion/ice/v2 v2.3.25/go.mod h1:KXJJcZK7E8WzrBEYnV4UtqEZsGeWfHxsNqhVcVvgjxw=
+github.com/pion/ice/v2 v2.3.34 h1:Ic1ppYCj4tUOcPAp76U6F3fVrlSw8A9JtRXLqw6BbUM=
+github.com/pion/ice/v2 v2.3.34/go.mod h1:mBF7lnigdqgtB+YHkaY/Y6s6tsyRyo4u4rPGRuOjUBQ=
 github.com/pion/interceptor v0.1.29 h1:39fsnlP1U8gw2JzOFWdfCU82vHvhW9o0rZnZF56wF+M=
 github.com/pion/interceptor v0.1.29/go.mod h1:ri+LGNjRUc5xUNtDEPzfdkmSqISixVTBF/z/Zms/6T4=
+github.com/pion/interceptor v0.1.30 h1:au5rlVHsgmxNi+v/mjOPazbW1SHzfx7/hYOEYQnUcxA=
+github.com/pion/interceptor v0.1.30/go.mod h1:RQuKT5HTdkP2Fi0cuOS5G5WNymTjzXaGF75J4k7z2nc=
 github.com/pion/logging v0.2.2 h1:M9+AIj/+pxNsDfAT64+MAVgJO0rsyLnoJKCqf//DoeY=
 github.com/pion/logging v0.2.2/go.mod h1:k0/tDVsRCX2Mb2ZEmTqNa7CWsQPc+YYCB7Q+5pahoms=
 github.com/pion/mdns v0.0.12 h1:CiMYlY+O0azojWDmxdNr7ADGrnZ+V6Ilfner+6mSVK8=
@@ -570,13 +608,19 @@ github.com/pion/rtcp v1.2.14/go.mod h1:sn6qjxvnwyAkkPzPULIbVqSKI5Dv54Rv7VG0kNxh9
 github.com/pion/rtp v1.8.3/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/rtp v1.8.6 h1:MTmn/b0aWWsAzux2AmP8WGllusBVw4NPYPVFFd7jUPw=
 github.com/pion/rtp v1.8.6/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
+github.com/pion/rtp v1.8.9 h1:E2HX740TZKaqdcPmf4pw6ZZuG8u5RlMMt+l3dxeu6Wk=
+github.com/pion/rtp v1.8.9/go.mod h1:pBGHaFt/yW7bf1jjWAoUjpSNoDnw98KTMg+jWWvziqU=
 github.com/pion/sctp v1.8.13/go.mod h1:YKSgO/bO/6aOMP9LCie1DuD7m+GamiK2yIiPM6vH+GA=
 github.com/pion/sctp v1.8.16 h1:PKrMs+o9EMLRvFfXq59WFsC+V8mN1wnKzqrv+3D/gYY=
 github.com/pion/sctp v1.8.16/go.mod h1:P6PbDVA++OJMrVNg2AL3XtYHV4uD6dvfyOovCgMs0PE=
+github.com/pion/sctp v1.8.33 h1:dSE4wX6uTJBcNm8+YlMg7lw1wqyKHggsP5uKbdj+NZw=
+github.com/pion/sctp v1.8.33/go.mod h1:beTnqSzewI53KWoG3nqB282oDMGrhNxBdb+JZnkCwRM=
 github.com/pion/sdp/v3 v3.0.9 h1:pX++dCHoHUwq43kuwf3PyJfHlwIj4hXA7Vrifiq0IJY=
 github.com/pion/sdp/v3 v3.0.9/go.mod h1:B5xmvENq5IXJimIO4zfp6LAe1fD9N+kFv+V/1lOdz8M=
 github.com/pion/srtp/v2 v2.0.18 h1:vKpAXfawO9RtTRKZJbG4y0v1b11NZxQnxRl85kGuUlo=
 github.com/pion/srtp/v2 v2.0.18/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
+github.com/pion/srtp/v2 v2.0.20 h1:HNNny4s+OUmG280ETrCdgFndp4ufx3/uy85EawYEhTk=
+github.com/pion/srtp/v2 v2.0.20/go.mod h1:0KJQjA99A6/a0DOVTu1PhDSw0CXF2jTkqOoMg3ODqdA=
 github.com/pion/stun v0.6.1 h1:8lp6YejULeHBF8NmV8e2787BogQhduZugh5PdhDyyN4=
 github.com/pion/stun v0.6.1/go.mod h1:/hO7APkX4hZKu/D0f2lHzNyvdkTGtIy3NDmLR7kSz/8=
 github.com/pion/transport/v2 v2.2.1/go.mod h1:cXXWavvCnFF6McHTft3DWS9iic2Mftcz1Aq29pGcU5g=
@@ -585,6 +629,8 @@ github.com/pion/transport/v2 v2.2.3/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLh
 github.com/pion/transport/v2 v2.2.4/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
 github.com/pion/transport/v2 v2.2.5 h1:iyi25i/21gQck4hfRhomF6SktmUQjRsRW4WJdhfc3Kc=
 github.com/pion/transport/v2 v2.2.5/go.mod h1:q2U/tf9FEfnSBGSW6w5Qp5PFWRLRj3NjLhCCgpRK4p0=
+github.com/pion/transport/v2 v2.2.10 h1:ucLBLE8nuxiHfvkFKnkDQRYWYfp8ejf4YBOPfaQpw6Q=
+github.com/pion/transport/v2 v2.2.10/go.mod h1:sq1kSLWs+cHW9E+2fJP95QudkzbK7wscs8yYgQToO5E=
 github.com/pion/transport/v3 v3.0.1/go.mod h1:UY7kiITrlMv7/IKgd5eTUcaahZx5oUN3l9SzK5f5xE0=
 github.com/pion/transport/v3 v3.0.2 h1:r+40RJR25S9w3jbA6/5uEPTzcdn7ncyU44RWCbHkLg4=
 github.com/pion/transport/v3 v3.0.2/go.mod h1:nIToODoOlb5If2jF9y2Igfx3PFYWfuXi37m0IlWa/D0=
@@ -593,6 +639,8 @@ github.com/pion/turn/v2 v2.1.6 h1:Xr2niVsiPTB0FPtt+yAWKFUkU1eotQbGgpTIld4x1Gc=
 github.com/pion/turn/v2 v2.1.6/go.mod h1:huEpByKKHix2/b9kmTAM3YoX6MKP+/D//0ClgUYR2fY=
 github.com/pion/webrtc/v3 v3.2.40 h1:Wtfi6AZMQg+624cvCXUuSmrKWepSB7zfgYDOYqsSOVU=
 github.com/pion/webrtc/v3 v3.2.40/go.mod h1:M1RAe3TNTD1tzyvqHrbVODfwdPGSXOUo/OgpoGGJqFY=
+github.com/pion/webrtc/v3 v3.3.0 h1:Rf4u6n6U5t5sUxhYPQk/samzU/oDv7jk6BA5hyO2F9I=
+github.com/pion/webrtc/v3 v3.3.0/go.mod h1:hVmrDJvwhEertRWObeb1xzulzHGeVUoPlWvxdGzcfU0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -607,6 +655,8 @@ github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:Om
 github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
 github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
 github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
+github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
@@ -621,6 +671,8 @@ github.com/quic-go/qpack v0.4.0 h1:Cr9BXA1sQS2SmDUWjSofMPNKmvF6IiIfDRmgU0w1ZCo=
 github.com/quic-go/qpack v0.4.0/go.mod h1:UZVnYIfi5GRk+zI9UMaCPsmZ2xKJP7XBUvVyT1Knj9A=
 github.com/quic-go/quic-go v0.44.0 h1:So5wOr7jyO4vzL2sd8/pD9Kesciv91zSk8BoFngItQ0=
 github.com/quic-go/quic-go v0.44.0/go.mod h1:z4cx/9Ny9UtGITIPzmPTXh1ULfOyWh4qGQlpnPcWmek=
+github.com/quic-go/quic-go v0.46.0 h1:uuwLClEEyk1DNvchH8uCByQVjo3yKL9opKulExNDs7Y=
+github.com/quic-go/quic-go v0.46.0/go.mod h1:1dLehS7TIR64+vxGR70GDcatWTOtMX2PUtnKsjbTurI=
 github.com/quic-go/webtransport-go v0.8.0 h1:HxSrwun11U+LlmwpgM1kEqIqH90IT4N8auv/cD7QFJg=
 github.com/quic-go/webtransport-go v0.8.0/go.mod h1:N99tjprW432Ut5ONql/aUhSLT0YVSlwHohQsuac9WaM=
 github.com/raulk/go-watchdog v1.3.0 h1:oUmdlHxdkXRJlwfG0O9omj8ukerm8MEQavSiDTEtBsk=
@@ -639,6 +691,7 @@ github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR
 github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
 github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/sashabaranov/go-openai v1.26.2 h1:cVlQa3gn3eYqNXRW03pPlpy6zLG52EU4g0FrWXc0EFI=
 github.com/sashabaranov/go-openai v1.26.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
@@ -736,11 +789,18 @@ github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
 github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
+github.com/urfave/cli v1.22.12 h1:igJgVw1JdKH+trcLWLeLwZjU9fEfPesQ+9/e4MQ44S8=
 github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8=
+github.com/urfave/cli/v2 v2.27.3 h1:/POWahRmdh7uztQ3CYnaDddk0Rm90PyOgIxgW2rr41M=
+github.com/urfave/cli/v2 v2.27.3/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
+github.com/urfave/cli/v2 v2.27.4 h1:o1owoI+02Eb+K107p27wEX9Bb8eqIoZCfLXloLUSWJ8=
+github.com/urfave/cli/v2 v2.27.4/go.mod h1:m4QzxcD2qpra4z7WhzEGn74WZLViBnMpb1ToCAKdGRQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.55.0 h1:Zkefzgt6a7+bVKHnu/YaYSOPfNYNisSVBo/unVCf8k8=
 github.com/valyala/fasthttp v1.55.0/go.mod h1:NkY9JtkrpPKmgwV3HTaS2HWaJss9RSIsRVfcxxoHiOM=
+github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo=
+github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/vbatts/tar-split v0.11.3 h1:hLFqsOLQ1SsppQNTMpkpPXClLDfC2A3Zgy9OUU+RVck=
@@ -756,6 +816,9 @@ github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSD
 github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw=
 github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k=
 github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc=
+github.com/wlynxg/anet v0.0.3/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
+github.com/wlynxg/anet v0.0.4 h1:0de1OFQxnNqAu+x2FAKKCVIrnfGKQbs7FQz++tB0+Uw=
+github.com/wlynxg/anet v0.0.4/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA=
 github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -765,6 +828,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
 github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
+github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
@@ -796,8 +861,12 @@ go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/dig v1.17.1 h1:Tga8Lz8PcYNsWsyHMZ1Vm0OQOUaJNDyvPImgbAu9YSc=
 go.uber.org/dig v1.17.1/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
+go.uber.org/dig v1.18.0 h1:imUL1UiY0Mg4bqbFfsRQO5G4CGRBec/ZujWTvSVp3pw=
+go.uber.org/dig v1.18.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE=
 go.uber.org/fx v1.22.1 h1:nvvln7mwyT5s1q201YE29V/BFrGor6vMiDNpU/78Mys=
 go.uber.org/fx v1.22.1/go.mod h1:HT2M7d7RHo+ebKGh9NRcrsrHHfpZ60nW3QRubMRfv48=
+go.uber.org/fx v1.22.2 h1:iPW+OPxv0G8w75OemJ1RAnTUrF55zOJlXlo1TbJ0Buw=
+go.uber.org/fx v1.22.2/go.mod h1:o/D9n+2mLP6v1EG+qsdT1O8wKopYAsqZasju97SDFCU=
 go.uber.org/goleak v1.1.11-0.20210813005559-691160354723/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
 go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
@@ -833,9 +902,15 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf
 golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
 golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
+golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM=
 golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
+golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
+golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI=
+golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ=
 golang.org/x/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -852,6 +927,8 @@ golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0=
 golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -885,6 +962,8 @@ golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
 golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
+golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -902,6 +981,8 @@ golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
 golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -952,6 +1033,8 @@ golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
 golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg=
+golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
@@ -967,6 +1050,8 @@ golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
 golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
+golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -981,6 +1066,8 @@ golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
+golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
@@ -1008,6 +1095,8 @@ golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA=
 golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -1095,6 +1184,8 @@ howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM=
 howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g=
 lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI=
 lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
+lukechampine.com/blake3 v1.3.0 h1:sJ3XhFINmHSrYCgl958hscfIa3bw8x4DqMP3u1YvoYE=
+lukechampine.com/blake3 v1.3.0/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
 oras.land/oras-go/v2 v2.5.0 h1:o8Me9kLY74Vp5uw07QXPiitjsw7qNXi8Twd+19Zf02c=
 oras.land/oras-go/v2 v2.5.0/go.mod h1:z4eisnLP530vwIOUOJeBIj0aGI0L1C3d53atvCBqZHg=
 sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
diff --git a/pkg/downloader/huggingface.go b/pkg/downloader/huggingface.go
new file mode 100644
index 000000000000..34ba9bd9aeba
--- /dev/null
+++ b/pkg/downloader/huggingface.go
@@ -0,0 +1,49 @@
+package downloader
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+)
+
+type HuggingFaceScanResult struct {
+	RepositoryId        string   `json:"repositoryId"`
+	Revision            string   `json:"revision"`
+	HasUnsafeFiles      bool     `json:"hasUnsafeFile"`
+	ClamAVInfectedFiles []string `json:"clamAVInfectedFiles"`
+	DangerousPickles    []string `json:"dangerousPickles"`
+	ScansDone           bool     `json:"scansDone"`
+}
+
+var ErrNonHuggingFaceFile = errors.New("not a huggingface repo")
+var ErrUnsafeFilesFound = errors.New("unsafe files found")
+
+func HuggingFaceScan(uri URI) (*HuggingFaceScanResult, error) {
+	cleanParts := strings.Split(uri.ResolveURL(), "/")
+	if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
+		return nil, ErrNonHuggingFaceFile
+	}
+	results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
+	if err != nil {
+		return nil, err
+	}
+	if results.StatusCode != 200 {
+		return nil, fmt.Errorf("unexpected status code during HuggingFaceScan: %d", results.StatusCode)
+	}
+	scanResult := &HuggingFaceScanResult{}
+	bodyBytes, err := io.ReadAll(results.Body)
+	if err != nil {
+		return nil, err
+	}
+	err = json.Unmarshal(bodyBytes, scanResult)
+	if err != nil {
+		return nil, err
+	}
+	if scanResult.HasUnsafeFiles {
+		return scanResult, ErrUnsafeFilesFound
+	}
+	return scanResult, nil
+}
diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index 1f88bbb14c43..7fedd6461205 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -2,12 +2,10 @@ package downloader
 
 import (
 	"crypto/sha256"
-	"encoding/base64"
-	"encoding/json"
-	"errors"
 	"fmt"
 	"io"
 	"net/http"
+	"net/url"
 	"os"
 	"path/filepath"
 	"strconv"
@@ -28,13 +26,16 @@ const (
 	HTTPSPrefix       = "https://"
 	GithubURI         = "github:"
 	GithubURI2        = "github://"
+	LocalPrefix       = "file://"
 )
 
-func DownloadAndUnmarshal(url string, basePath string, f func(url string, i []byte) error) error {
-	url = ConvertURL(url)
+type URI string
 
-	if strings.HasPrefix(url, "file://") {
-		rawURL := strings.TrimPrefix(url, "file://")
+func (uri URI) DownloadAndUnmarshal(basePath string, f func(url string, i []byte) error) error {
+	url := uri.ResolveURL()
+
+	if strings.HasPrefix(url, LocalPrefix) {
+		rawURL := strings.TrimPrefix(url, LocalPrefix)
 		// checks if the file is symbolic, and resolve if so - otherwise, this function returns the path unmodified.
 		resolvedFile, err := filepath.EvalSymlinks(rawURL)
 		if err != nil {
@@ -78,24 +79,54 @@ func DownloadAndUnmarshal(url string, basePath string, f func(url string, i []by
 	return f(url, body)
 }
 
-func LooksLikeURL(s string) bool {
-	return strings.HasPrefix(s, HTTPPrefix) ||
-		strings.HasPrefix(s, HTTPSPrefix) ||
-		strings.HasPrefix(s, HuggingFacePrefix) ||
-		strings.HasPrefix(s, GithubURI) ||
-		strings.HasPrefix(s, OllamaPrefix) ||
-		strings.HasPrefix(s, OCIPrefix) ||
-		strings.HasPrefix(s, GithubURI2)
+func (u URI) FilenameFromUrl() (string, error) {
+	f, err := filenameFromUrl(string(u))
+	if err != nil || f == "" {
+		f = utils.MD5(string(u))
+		if strings.HasSuffix(string(u), ".yaml") || strings.HasSuffix(string(u), ".yml") {
+			f = f + ".yaml"
+		}
+		err = nil
+	}
+
+	return f, err
+}
+
+func filenameFromUrl(urlstr string) (string, error) {
+	// strip anything after @
+	if strings.Contains(urlstr, "@") {
+		urlstr = strings.Split(urlstr, "@")[0]
+	}
+
+	u, err := url.Parse(urlstr)
+	if err != nil {
+		return "", fmt.Errorf("error due to parsing url: %w", err)
+	}
+	x, err := url.QueryUnescape(u.EscapedPath())
+	if err != nil {
+		return "", fmt.Errorf("error due to escaping: %w", err)
+	}
+	return filepath.Base(x), nil
+}
+
+func (u URI) LooksLikeURL() bool {
+	return strings.HasPrefix(string(u), HTTPPrefix) ||
+		strings.HasPrefix(string(u), HTTPSPrefix) ||
+		strings.HasPrefix(string(u), HuggingFacePrefix) ||
+		strings.HasPrefix(string(u), GithubURI) ||
+		strings.HasPrefix(string(u), OllamaPrefix) ||
+		strings.HasPrefix(string(u), OCIPrefix) ||
+		strings.HasPrefix(string(u), GithubURI2)
 }
 
-func LooksLikeOCI(s string) bool {
-	return strings.HasPrefix(s, OCIPrefix) || strings.HasPrefix(s, OllamaPrefix)
+func (s URI) LooksLikeOCI() bool {
+	return strings.HasPrefix(string(s), OCIPrefix) || strings.HasPrefix(string(s), OllamaPrefix)
 }
 
-func ConvertURL(s string) string {
+func (s URI) ResolveURL() string {
 	switch {
-	case strings.HasPrefix(s, GithubURI2):
-		repository := strings.Replace(s, GithubURI2, "", 1)
+	case strings.HasPrefix(string(s), GithubURI2):
+		repository := strings.Replace(string(s), GithubURI2, "", 1)
 
 		repoParts := strings.Split(repository, "@")
 		branch := "main"
@@ -110,8 +141,8 @@ func ConvertURL(s string) string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(s, GithubURI):
-		parts := strings.Split(s, ":")
+	case strings.HasPrefix(string(s), GithubURI):
+		parts := strings.Split(string(s), ":")
 		repoParts := strings.Split(parts[1], "@")
 		branch := "main"
 
@@ -125,8 +156,8 @@ func ConvertURL(s string) string {
 		projectPath := strings.Join(repoPath[2:], "/")
 
 		return fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
-	case strings.HasPrefix(s, HuggingFacePrefix):
-		repository := strings.Replace(s, HuggingFacePrefix, "", 1)
+	case strings.HasPrefix(string(s), HuggingFacePrefix):
+		repository := strings.Replace(string(s), HuggingFacePrefix, "", 1)
 		// convert repository to a full URL.
 		// e.g. TheBloke/Mixtral-8x7B-v0.1-GGUF/mixtral-8x7b-v0.1.Q2_K.gguf@main -> https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q2_K.gguf
 		owner := strings.Split(repository, "/")[0]
@@ -144,7 +175,7 @@ func ConvertURL(s string) string {
 		return fmt.Sprintf("https://huggingface.co/%s/%s/resolve/%s/%s", owner, repo, branch, filepath)
 	}
 
-	return s
+	return string(s)
 }
 
 func removePartialFile(tmpFilePath string) error {
@@ -161,9 +192,9 @@ func removePartialFile(tmpFilePath string) error {
 	return nil
 }
 
-func DownloadFile(url string, filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
-	url = ConvertURL(url)
-	if LooksLikeOCI(url) {
+func (uri URI) DownloadFile(filePath, sha string, fileN, total int, downloadStatus func(string, string, string, float64)) error {
+	url := uri.ResolveURL()
+	if uri.LooksLikeOCI() {
 		progressStatus := func(desc ocispec.Descriptor) io.Writer {
 			return &progressWriter{
 				fileName:       filePath,
@@ -298,37 +329,6 @@ func DownloadFile(url string, filePath, sha string, fileN, total int, downloadSt
 	return nil
 }
 
-// this function check if the string is an URL, if it's an URL downloads the image in memory
-// encodes it in base64 and returns the base64 string
-func GetBase64Image(s string) (string, error) {
-	if strings.HasPrefix(s, "http") {
-		// download the image
-		resp, err := http.Get(s)
-		if err != nil {
-			return "", err
-		}
-		defer resp.Body.Close()
-
-		// read the image data into memory
-		data, err := io.ReadAll(resp.Body)
-		if err != nil {
-			return "", err
-		}
-
-		// encode the image data in base64
-		encoded := base64.StdEncoding.EncodeToString(data)
-
-		// return the base64 string
-		return encoded, nil
-	}
-
-	// if the string instead is prefixed with "data:image/jpeg;base64,", drop it
-	if strings.HasPrefix(s, "data:image/jpeg;base64,") {
-		return strings.ReplaceAll(s, "data:image/jpeg;base64,", ""), nil
-	}
-	return "", fmt.Errorf("not valid string")
-}
-
 func formatBytes(bytes int64) string {
 	const unit = 1024
 	if bytes < unit {
@@ -356,42 +356,3 @@ func calculateSHA(filePath string) (string, error) {
 
 	return fmt.Sprintf("%x", hash.Sum(nil)), nil
 }
-
-type HuggingFaceScanResult struct {
-	RepositoryId        string   `json:"repositoryId"`
-	Revision            string   `json:"revision"`
-	HasUnsafeFiles      bool     `json:"hasUnsafeFile"`
-	ClamAVInfectedFiles []string `json:"clamAVInfectedFiles"`
-	DangerousPickles    []string `json:"dangerousPickles"`
-	ScansDone           bool     `json:"scansDone"`
-}
-
-var ErrNonHuggingFaceFile = errors.New("not a huggingface repo")
-var ErrUnsafeFilesFound = errors.New("unsafe files found")
-
-func HuggingFaceScan(uri string) (*HuggingFaceScanResult, error) {
-	cleanParts := strings.Split(ConvertURL(uri), "/")
-	if len(cleanParts) <= 4 || cleanParts[2] != "huggingface.co" {
-		return nil, ErrNonHuggingFaceFile
-	}
-	results, err := http.Get(fmt.Sprintf("https://huggingface.co/api/models/%s/%s/scan", cleanParts[3], cleanParts[4]))
-	if err != nil {
-		return nil, err
-	}
-	if results.StatusCode != 200 {
-		return nil, fmt.Errorf("unexpected status code during HuggingFaceScan: %d", results.StatusCode)
-	}
-	scanResult := &HuggingFaceScanResult{}
-	bodyBytes, err := io.ReadAll(results.Body)
-	if err != nil {
-		return nil, err
-	}
-	err = json.Unmarshal(bodyBytes, scanResult)
-	if err != nil {
-		return nil, err
-	}
-	if scanResult.HasUnsafeFiles {
-		return scanResult, ErrUnsafeFilesFound
-	}
-	return scanResult, nil
-}
diff --git a/pkg/downloader/uri_test.go b/pkg/downloader/uri_test.go
index 66a4cb4eaa37..21a093a95505 100644
--- a/pkg/downloader/uri_test.go
+++ b/pkg/downloader/uri_test.go
@@ -9,24 +9,28 @@ import (
 var _ = Describe("Gallery API tests", func() {
 	Context("URI", func() {
 		It("parses github with a branch", func() {
+			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml")
 			Expect(
-				DownloadAndUnmarshal("github:go-skynet/model-gallery/gpt4all-j.yaml", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
 			).ToNot(HaveOccurred())
 		})
 		It("parses github without a branch", func() {
+			uri := URI("github:go-skynet/model-gallery/gpt4all-j.yaml@main")
+
 			Expect(
-				DownloadAndUnmarshal("github:go-skynet/model-gallery/gpt4all-j.yaml@main", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
 			).ToNot(HaveOccurred())
 		})
 		It("parses github with urls", func() {
+			uri := URI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")
 			Expect(
-				DownloadAndUnmarshal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", "", func(url string, i []byte) error {
+				uri.DownloadAndUnmarshal("", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go
index 19012d53dd99..1a7e1ff1711e 100644
--- a/pkg/functions/functions.go
+++ b/pkg/functions/functions.go
@@ -14,6 +14,7 @@ const (
 type Function struct {
 	Name        string                 `json:"name"`
 	Description string                 `json:"description"`
+	Strict      bool                   `json:"strict"`
 	Parameters  map[string]interface{} `json:"parameters"`
 }
 type Functions []Function
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 88a08f288c68..11980f0325e3 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -45,11 +45,6 @@ const (
 
 	LLamaCPPGRPC = "llama-cpp-grpc"
 
-	Gpt4AllLlamaBackend = "gpt4all-llama"
-	Gpt4AllMptBackend   = "gpt4all-mpt"
-	Gpt4AllJBackend     = "gpt4all-j"
-	Gpt4All             = "gpt4all"
-
 	BertEmbeddingsBackend  = "bert-embeddings"
 	RwkvBackend            = "rwkv"
 	WhisperBackend         = "whisper"
@@ -144,11 +139,10 @@ ENTRY:
 
 	// sets a priority list - first has more priority
 	priorityList := []string{
-
 		// First llama.cpp(variants) and llama-ggml to follow.
 		// We keep the fallback to prevent that if the llama.cpp variants
 		// that depends on shared libs if breaks have still a safety net.
-		LLamaCPP, LlamaGGML, Gpt4All, LLamaCPPFallback,
+		LLamaCPP, LlamaGGML, LLamaCPPFallback,
 	}
 
 	toTheEnd := []string{
@@ -434,9 +428,6 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
 	var backendToConsume string
 
 	switch backend {
-	case Gpt4AllLlamaBackend, Gpt4AllMptBackend, Gpt4AllJBackend, Gpt4All:
-		o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "gpt4all")
-		backendToConsume = Gpt4All
 	case PiperBackend:
 		o.gRPCOptions.LibrarySearchPath = filepath.Join(o.assetDir, "backend-assets", "espeak-ng-data")
 		backendToConsume = PiperBackend
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index 9fa890b0f9f6..a445b10eb38e 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -3,7 +3,6 @@ package startup
 import (
 	"errors"
 	"fmt"
-	"net/url"
 	"os"
 	"path/filepath"
 	"strings"
@@ -23,21 +22,21 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 	// create an error that groups all errors
 	var err error
 
-	for _, url := range models {
+	lib, _ := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
 
+	for _, url := range models {
 		// As a best effort, try to resolve the model from the remote library
 		// if it's not resolved we try with the other method below
 		if modelLibraryURL != "" {
-			lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
-			if err == nil {
-				if lib[url] != "" {
-					log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
-					url = lib[url]
-				}
+			if lib[url] != "" {
+				log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])
+				url = lib[url]
 			}
 		}
 
 		url = embedded.ModelShortURL(url)
+		uri := downloader.URI(url)
+
 		switch {
 		case embedded.ExistsInModelsLibrary(url):
 			modelYAML, e := embedded.ResolveContent(url)
@@ -55,7 +54,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 				log.Error().Err(e).Str("filepath", modelDefinitionFilePath).Msg("error writing model definition")
 				err = errors.Join(err, e)
 			}
-		case downloader.LooksLikeOCI(url):
+		case uri.LooksLikeOCI():
 			log.Debug().Msgf("[startup] resolved OCI model to download: %s", url)
 
 			// convert OCI image name to a file name.
@@ -67,7 +66,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 			// check if file exists
 			if _, e := os.Stat(filepath.Join(modelPath, ociName)); errors.Is(e, os.ErrNotExist) {
 				modelDefinitionFilePath := filepath.Join(modelPath, ociName)
-				e := downloader.DownloadFile(url, modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
+				e := uri.DownloadFile(modelDefinitionFilePath, "", 0, 0, func(fileName, current, total string, percent float64) {
 					utils.DisplayDownloadFunction(fileName, current, total, percent)
 				})
 				if e != nil {
@@ -77,19 +76,15 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 			}
 
 			log.Info().Msgf("[startup] installed model from OCI repository: %s", ociName)
-		case downloader.LooksLikeURL(url):
+		case uri.LooksLikeURL():
 			log.Debug().Msgf("[startup] downloading %s", url)
 
 			// Extract filename from URL
-			fileName, e := filenameFromUrl(url)
-			if e != nil || fileName == "" {
-				fileName = utils.MD5(url)
-				if strings.HasSuffix(url, ".yaml") || strings.HasSuffix(url, ".yml") {
-					fileName = fileName + ".yaml"
-				}
+			fileName, e := uri.FilenameFromUrl()
+			if e != nil {
 				log.Warn().Err(e).Str("url", url).Msg("error extracting filename from URL")
-				//err = errors.Join(err, e)
-				//continue
+				err = errors.Join(err, e)
+				continue
 			}
 
 			modelPath := filepath.Join(modelPath, fileName)
@@ -102,7 +97,7 @@ func InstallModels(galleries []config.Gallery, modelLibraryURL string, modelPath
 
 			// check if file exists
 			if _, e := os.Stat(modelPath); errors.Is(e, os.ErrNotExist) {
-				e := downloader.DownloadFile(url, modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
+				e := uri.DownloadFile(modelPath, "", 0, 0, func(fileName, current, total string, percent float64) {
 					utils.DisplayDownloadFunction(fileName, current, total, percent)
 				})
 				if e != nil {
@@ -167,20 +162,3 @@ func installModel(galleries []config.Gallery, modelName, modelPath string, downl
 
 	return nil, true
 }
-
-func filenameFromUrl(urlstr string) (string, error) {
-	// strip anything after @
-	if strings.Contains(urlstr, "@") {
-		urlstr = strings.Split(urlstr, "@")[0]
-	}
-
-	u, err := url.Parse(urlstr)
-	if err != nil {
-		return "", fmt.Errorf("error due to parsing url: %w", err)
-	}
-	x, err := url.QueryUnescape(u.EscapedPath())
-	if err != nil {
-		return "", fmt.Errorf("error due to escaping: %w", err)
-	}
-	return filepath.Base(x), nil
-}
diff --git a/pkg/utils/strings.go b/pkg/utils/strings.go
index 2a782e033c69..4ac0458d49c5 100644
--- a/pkg/utils/strings.go
+++ b/pkg/utils/strings.go
@@ -18,3 +18,15 @@ func RandString(n int) string {
 	}
 	return string(b)
 }
+
+func Unique(arr []string) []string {
+	unique := make(map[string]bool)
+	var result []string
+	for _, item := range arr {
+		if _, ok := unique[item]; !ok {
+			unique[item] = true
+			result = append(result, item)
+		}
+	}
+	return result
+}
diff --git a/swagger/docs.go b/swagger/docs.go
index 9a5a178417a9..44ae10ad051c 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -712,6 +712,9 @@ const docTemplate = `{
                 "parameters": {
                     "type": "object",
                     "additionalProperties": true
+                },
+                "strict": {
+                    "type": "boolean"
                 }
             }
         },
@@ -1068,6 +1071,9 @@ const docTemplate = `{
                 "name": {
                     "type": "string"
                 },
+                "serviceID": {
+                    "type": "string"
+                },
                 "tunnelAddress": {
                     "type": "string"
                 }
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 9d53fbbe139d..6edfebbdf2c8 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -705,6 +705,9 @@
                 "parameters": {
                     "type": "object",
                     "additionalProperties": true
+                },
+                "strict": {
+                    "type": "boolean"
                 }
             }
         },
@@ -1061,6 +1064,9 @@
                 "name": {
                     "type": "string"
                 },
+                "serviceID": {
+                    "type": "string"
+                },
                 "tunnelAddress": {
                     "type": "string"
                 }
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 2d6285668ca9..c953b0afd504 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -16,6 +16,8 @@ definitions:
       parameters:
         additionalProperties: true
         type: object
+      strict:
+        type: boolean
     type: object
   functions.Item:
     properties:
@@ -260,6 +262,8 @@ definitions:
         type: string
       name:
         type: string
+      serviceID:
+        type: string
       tunnelAddress:
         type: string
     type: object