robusta-dev · May 30, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024 · May 30, 2024
Showing 504 changed files with 139,367 additions and 2,544 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+holmes/.git_archival.json  export-subst
diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
@@ -0,0 +1,60 @@
+# this runs on every commit/PR to test that we are properly building binaries that work
+# we test this on each commit/PR to catch build problems early
+name: Build and test HolmesGPT
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  check:
+    name: Pre-commit checks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - uses: pre-commit/action@v3.0.1
+
+  build:
+    needs: check
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Python dependencies and build
+        # if you change something here, you must also change it in .github/workflows/build-binaries-and-brew.yaml
+        run: |
+          python -m pip install --upgrade pip setuptools pyinstaller
+
+          curl -sSL https://install.python-poetry.org | python3 - --version 1.4.0
+          poetry config virtualenvs.create false
+          poetry install --no-root
+
+          sudo apt-get install -y binutils
+          pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --hiddenimport litellm.litellm_core_utils.tokenizers --collect-data litellm
+          ls dist
+
+      - name: Run tests
+        shell: bash
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          poetry run pytest -m "not llm"
+
+      - name: Test the binary
+        shell: bash
+        run: |
+          dist/holmes/holmes version
+          if [ $? -ne 0 ]; then
+            echo "Binary test failed"
+            exit 1
+          fi
+          echo "Binary test passed"
diff --git a/.github/workflows/build-binaries-and-brew.yaml b/.github/workflows/build-binaries-and-brew.yaml
@@ -0,0 +1,178 @@
+name: Build and Release
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        # We build on an older Ubuntu as pyinstaller binaries are forward-compatible not backwards-compatible
+        # See https://pyinstaller.org/en/stable/usage.html?highlight=glibc#making-gnu-linux-apps-forward-compatible:~:text=The%20solution%20is%20to%20always%20build%20your%20app%20on%20the%20oldest%20version%20of%20GNU/Linux%20you%20mean%20to%20support.%20It%20should%20continue%20to%20work%20with%20the%20libc%20found%20on%20newer%20versions.
+        # TODO: for similar reasons, we may want to build on older Windows/MacOS versions as well
+        os: [ubuntu-22.04, windows-latest, macos-latest]
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      if: matrix.os != 'windows-latest'
+      run: |
+        python -m pip install --upgrade pip setuptools pyinstaller
+        curl -sSL https://install.python-poetry.org | python3 - --version 1.4.0
+        poetry config virtualenvs.create false
+        poetry install --no-root
+
+    - name: Install dependencies
+      if: matrix.os == 'windows-latest'
+      run: |
+        python -m pip install --upgrade pip setuptools pyinstaller
+        curl -sSL https://install.python-poetry.org | python3 - --version 1.4.0
+        C:\Users\runneradmin\AppData\Roaming\Python\Scripts\poetry config virtualenvs.create false
+        C:\Users\runneradmin\AppData\Roaming\Python\Scripts\poetry install --no-root
+
+    - name: Install dependencies (Linux)
+      if: matrix.os == 'ubuntu-22.04'
+      run: |
+        sudo apt-get install -y binutils
+
+    - name: Update package version (Linux)
+      if: matrix.os == 'ubuntu-22.04'
+      run: sed -i 's/__version__ = .*/__version__ = "${{ github.ref_name }}"/g' holmes/__init__.py
+
+    # mac has BSD style sed command where you specify -i '' and not just -i
+    - name: Update package version (macOS)
+      if: matrix.os == 'macos-latest'
+      run: sed -i '' 's/__version__ = .*/__version__ = "${{ github.ref_name }}"/g' holmes/__init__.py
+
+    # windows has no sed, so we use powershell
+    - name: Update package version (Windows)
+      if: matrix.os == 'windows-latest'
+      run: |
+        $filePath = 'holmes/__init__.py'
+        (Get-Content $filePath) -replace '__version__ = .+', '__version__ = "${{ github.ref_name }}"' | Set-Content $filePath
+      shell: pwsh
+
+    # if you change something here, you must also change it in .github/workflows/build-and-test.yaml
+    - name: Build with PyInstaller
+      shell: bash
+      # regarding the tiktoken part of the command, see https://github.com/openai/tiktoken/issues/80
+      # regarding the litellm part of the command, see https://github.com/pyinstaller/pyinstaller/issues/8620#issuecomment-2186540504
+      run: |
+        pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --hiddenimport litellm.litellm_core_utils.tokenizers --collect-data litellm
+        ls dist
+
+    - name: Zip the application (Unix)
+      if: matrix.os == 'macos-latest' || matrix.os == 'ubuntu-22.04'
+      run: |
+        cd dist
+        zip -r holmes-${{ matrix.os }}-${{ github.ref_name }}.zip holmes
+        mv holmes-${{ matrix.os }}-${{ github.ref_name }}.zip ../
+        cd ..
+
+    - name: Zip the application (Windows)
+      if: matrix.os == 'windows-latest'
+      run: |
+        Set-Location -Path dist
+        Compress-Archive -Path holmes -DestinationPath holmes-${{ matrix.os }}-${{ github.ref_name }}.zip -Force
+        Move-Item -Path holmes-${{ matrix.os }}-${{ github.ref_name }}.zip -Destination ..\
+        Set-Location -Path ..
+
+    - name: Upload Release Asset
+      uses: actions/upload-release-asset@v1.0.2
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        upload_url: ${{ github.event.release.upload_url }}
+        asset_path: ./holmes-${{ matrix.os }}-${{ github.ref_name }}.zip
+        asset_name: holmes-${{ matrix.os }}-${{ github.ref_name }}.zip
+        asset_content_type: application/octet-stream
+
+    - name: Upload build as artifact
+      uses: actions/upload-artifact@v4
+      with:
+        name: holmes-${{ matrix.os }}-${{ github.ref_name }}
+        path: ./holmes-${{ matrix.os }}-${{ github.ref_name }}.zip
+
+  check-latest:
+    needs: build
+    runs-on: ubuntu-22.04
+    outputs:
+      IS_LATEST: ${{ steps.check-latest.outputs.release == github.ref_name }}
+    steps:
+      - id: check-latest
+        uses: pozetroninc/github-action-get-latest-release@v0.7.0
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          repository: ${{ github.repository }}
+          excludes: prerelease, draft
+
+  # Define MacOS hash job
+  mac-hash:
+    needs: check-latest
+    runs-on: ubuntu-latest
+    if: needs.check-latest.outputs.IS_LATEST
+    outputs:
+      MAC_BUILD_HASH: ${{ steps.calc-hash.outputs.MAC_BUILD_HASH }}
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v2
+      - name: Download MacOS artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: holmes-macos-latest-${{ github.ref_name }}
+      - name: Calculate hash
+        id: calc-hash
+        run: echo "::set-output name=MAC_BUILD_HASH::$(sha256sum holmes-macos-latest-${{ github.ref_name }}.zip | awk '{print $1}')"
+
+  # Define Linux hash job
+  linux-hash:
+    needs: check-latest
+    runs-on: ubuntu-latest
+    if: needs.check-latest.outputs.IS_LATEST
+    outputs:
+      LINUX_BUILD_HASH: ${{ steps.calc-hash.outputs.LINUX_BUILD_HASH }}
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v2
+      - name: Download Linux artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: holmes-ubuntu-22.04-${{ github.ref_name }}
+      - name: Calculate hash
+        id: calc-hash
+        run: echo "::set-output name=LINUX_BUILD_HASH::$(sha256sum holmes-ubuntu-22.04-${{ github.ref_name }}.zip | awk '{print $1}')"
+
+  # TODO: update homebrew formula
+  update-formula:
+    needs: [mac-hash, linux-hash]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout homebrew-holmesgpt repository
+        uses: actions/checkout@v2
+        with:
+          repository: robusta-dev/homebrew-holmesgpt
+          token: ${{ secrets.MULTIREPO_GITHUB_TOKEN }}
+      - name: Update holmesgpt.rb formula
+        run: |
+          MAC_BUILD_HASH=${{ needs.mac-hash.outputs.MAC_BUILD_HASH }}
+          LINUX_BUILD_HASH=${{ needs.linux-hash.outputs.LINUX_BUILD_HASH }}
+          TAG_NAME=${{ github.ref_name }}
+          awk 'NR==6{$0="        url \"https://github.com/robusta-dev/holmesgpt/releases/download/'"$TAG_NAME"'/holmes-macos-latest-'"$TAG_NAME"'.zip\""}1' ./Formula/holmesgpt.rb > temp && mv temp ./Formula/holmesgpt.rb
+          awk 'NR==7{$0="        sha256 \"'$MAC_BUILD_HASH'\""}1' ./Formula/holmesgpt.rb > temp && mv temp ./Formula/holmesgpt.rb
+          awk 'NR==9{$0="        url \"https://github.com/robusta-dev/holmesgpt/releases/download/'"$TAG_NAME"'/holmes-ubuntu-22.04-'"$TAG_NAME"'.zip\""}1' ./Formula/holmesgpt.rb > temp && mv temp ./Formula/holmesgpt.rb
+          awk 'NR==10{$0="        sha256 \"'$LINUX_BUILD_HASH'\""}1' ./Formula/holmesgpt.rb > temp && mv temp ./Formula/holmesgpt.rb
+      - name: Commit and push changes
+        run: |
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+          git commit -am "Update formula for release ${TAG_NAME}"
+          git push
diff --git a/.github/workflows/build-docker-images.yaml b/.github/workflows/build-docker-images.yaml
@@ -0,0 +1,96 @@
+name: Docker Build on Tag
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up gcloud CLI
+      uses: google-github-actions/setup-gcloud@v0.2.0
+      with:
+        service_account_key: ${{ secrets.GCP_SA_KEY }}
+        project_id: genuine-flight-317411
+        export_default_credentials: true
+
+    # Configure Docker to use the gcloud command-line tool as a credential helper for authentication
+    - name: Configure Docker
+      run: |-
+        gcloud auth configure-docker us-central1-docker.pkg.dev
+
+    - name: Verify gcloud configuration
+      run: |-
+        gcloud config get-value project
+
+    - name: Update package version
+      run: |
+        sed -i 's/__version__ = .*/__version__ = "${{github.ref_name}}"/g' holmes/__init__.py
+        sed -i 's/0.0.0/${{github.ref_name}}/g' helm/holmes/Chart.yaml helm/holmes/values.yaml
+        sed -i 's/0.0.1/${{github.ref_name}}/g' helm/holmes/Chart.yaml
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v1
+
+    - name: Build and push Docker images
+      uses: docker/build-push-action@v2
+      with:
+        file: Dockerfile.dev
+        context: .
+        platforms: linux/arm64,linux/amd64
+        push: true
+        tags: us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes-dev:${{ github.ref_name }}
+        build-args: |
+          BUILDKIT_INLINE_CACHE=1
+
+    - name: Build and push Docker images
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        platforms: linux/arm64,linux/amd64
+        push: true
+        tags: us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:${{ github.ref_name }}
+        build-args: |
+          BUILDKIT_INLINE_CACHE=1
+
+    - name: Login to Docker Hub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_PASSWORD }}
+
+    - name: Build and push Docker images Dockerhub
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        platforms: linux/arm64,linux/amd64
+        push: true
+        tags: robustadev/holmes:${{ github.ref_name }}
+        build-args: |
+          BUILDKIT_INLINE_CACHE=1
+
+    # Note: this ignores the "Set as latest release" checkbox in the GitHub UI
+    # it isn't possible to check whether that was set or not
+    # so if you do not want to override the "latest" tag, you should mark the release as a prerelease or a draft
+    # for prereleases and drafts we don't tag latest
+    - name: Tag and push Docker image as latest if applicable
+      if: ${{ github.event.release.prerelease == false && github.event.release.draft == false }}
+      run: |
+        docker pull us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:${{ github.ref_name }}
+        docker tag us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:${{ github.ref_name }} us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:latest
+        docker push us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes:latest
+
+    - name: Save artifact with helm chart
+      uses: actions/upload-artifact@v4
+      with:
+        name: helm-chart
+        path: helm/holmes/
+
+    - name: Upload helm chart
+      run: |
+        cd helm && ./upload_chart.sh
diff --git a/.github/workflows/build-on-release.yaml b/.github/workflows/build-on-release.yaml
diff --git a/.github/workflows/docker-build-on-tag.yaml b/.github/workflows/docker-build-on-tag.yaml
diff --git a/.github/workflows/llm-evaluation.yaml b/.github/workflows/llm-evaluation.yaml
@@ -0,0 +1,94 @@
+name: Evaluate LLM test cases
+
+on:
+  pull_request:
+    branches: ["*"]
+  push:
+    branches: [master]
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  build:
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install Python dependencies and build
+        # if you change something here, you must also change it in .github/workflows/build-binaries-and-brew.yaml
+        run: |
+          python -m pip install --upgrade pip setuptools pyinstaller
+
+          curl -sSL https://install.python-poetry.org | python3 - --version 1.4.0
+          poetry config virtualenvs.create false
+          poetry install --no-root
+
+      - name: Run tests
+        id: evals
+        continue-on-error: true
+        shell: bash
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
+          UPLOAD_DATASET: "true"
+          PUSH_EVALS_TO_BRAINTRUST: "true"
+          EXPERIMENT_ID: github-${{ github.run_id }}.${{ github.run_number }}.${{ github.run_attempt }}
+        run: |
+          poetry run pytest tests/llm/test_ask_holmes.py tests/llm/test_investigate.py -n 6
+      - uses: actions/github-script@v7
+        if: always()
+        with:
+          retries: 3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            try {
+              if(!context.issue || !context.issue.number) {
+                // Only comment on PR if the workflow is run as part of a PR
+                return
+              }
+              const reportContent = fs.readFileSync('evals_report.txt', 'utf8');
+
+              const comments = await github.rest.issues.listComments({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number
+              });
+
+              const botComment = comments.data.find(comment =>
+                comment.user.type === 'Bot' &&
+                comment.body.includes('## Results of HolmesGPT evals')
+              );
+
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: reportContent
+              });
+
+              if (botComment) {
+                await github.rest.issues.deleteComment({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  comment_id: botComment.id
+                });
+              }
+            } catch(e) {
+              console.log(e)
+            }
+      - name: Check test results
+        if: always()
+        run: if [[ ${{ steps.evals.outcome }} == 'success' ]]; then exit 0; else exit 1; fi
diff --git a/.gitignore b/.gitignore
@@ -154,4 +154,13 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+.vscode
+
+playwright.png
+.deepeval*
+pyrightconfig.json
+
+*.AUTOGENERATED
+evals_report.txt
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+repos:
+  - repo: https://github.com/python-poetry/poetry
+    rev: 1.8.4
+    hooks:
+      - id: poetry-check
+      - id: poetry-lock
+        pass_filenames: false
+        args:
+          - --no-update
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.7.2
+    hooks:
+      - id: ruff
+        entry: ruff check --fix
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: detect-private-key
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -0,0 +1,132 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, caste, color, religion, or sexual
+identity and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+* Demonstrating empathy and kindness toward other people
+* Being respectful of differing opinions, viewpoints, and experiences
+* Giving and gracefully accepting constructive feedback
+* Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+* Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+* The use of sexualized language or imagery, and sexual attention or advances of
+  any kind
+* Trolling, insulting or derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for moderation
+decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official email address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+conduct@robusta.dev.
+All complaints will be reviewed and investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of
+actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the
+community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
+[https://www.contributor-covenant.org/translations][translations].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
+[Mozilla CoC]: https://github.com/mozilla/diversity
+[FAQ]: https://www.contributor-covenant.org/faq
+[translations]: https://www.contributor-covenant.org/translations
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,35 @@
+# Contributing
+
+## Before you get started
+
+### Code of Conduct
+
+Please make sure to read and observe our [Code of Conduct](https://github.com/robusta-dev/holmesgpt?tab=coc-ov-file).
+
+### Install requirements
+- Python `3.11`
+  - poetry `1.8.4` & up
+- A LLM API key is required to use and test HolmesGPT
+  - OpenAI's `gpt4-o` is recommended.
+  - For details see [Getting an API Key](https://github.com/robusta-dev/holmesgpt?tab=readme-ov-file#getting-an-api-key).
+
+## Reporting bugs
+
+We encourage those interested to contribute code and also appreciate when issues are reported.
+
+- Create a new issue and label is as `bug`
+- Clearly state how to reproduce the bug:
+  - Which LLM you've used
+  - Which steps are required to reproduce
+    - As LLMs answers may differ between runs - Does it always reproduce, or occasionally?
+
+
+## Contributing Code
+
+- Fork the repository and clone it locally.
+- Create a new branch and make your changes
+- Add or update tests to ensure your changes are covered.
+- Run `pytest` to verify all tests pass.
+- Keep pull requests small and focused. if you have multiple changes, open a PR for each.
+- Create a pull request back to the upstream repository.
+- Wait for a review and address any comments
diff --git a/Dockerfile b/Dockerfile
@@ -1,84 +1,139 @@
-# to build it:
-#   docker build -t robusta-ai .
-# to use it:
-#   docker run -it --net=host -v $(pwd)/config.yaml:/app/config.yaml -v ~/.aws:/root/.aws -v ~/.config/gcloud:/root/.config/gcloud -v $HOME/.kube/config:/root/.kube/config robusta-ai ask "what pods are unhealthy and why?"
-FROM python:3.11-slim
-
-WORKDIR /app
-
-# zscaler trust - uncomment for building image locally
-#COPY  zscaler.root.crt /usr/local/share/ca-certificates/
-#RUN chmod 644 /usr/local/share/ca-certificates/*.crt && update-ca-certificates
+# Build stage
+FROM python:3.11-slim as builder
+ENV PATH="/root/.local/bin/:$PATH"
 
-RUN apt-get update && apt-get install -y \
+RUN apt-get update \
+    && apt-get install -y \
     curl \
     git \
     apt-transport-https \
     gnupg2 \
     build-essential \
-    && curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg \
-    && echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list \
+    unzip \
+    && apt-get purge -y --auto-remove \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Create and activate virtual environment
+RUN python -m venv /app/venv --upgrade-deps && \
+    . /app/venv/bin/activate
+
+ENV VIRTUAL_ENV=/app/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Needed for kubectl
+RUN curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key -o Release.key
+
+# Set the architecture-specific kube lineage URLs
+ARG KUBE_LINEAGE_ARM_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.2/kube-lineage-macos-latest-v2.2.2
+ARG KUBE_LINEAGE_AMD_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.2/kube-lineage-ubuntu-latest-v2.2.2
+# Define a build argument to identify the platform
+ARG TARGETPLATFORM
+# Conditional download based on the platform
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    curl -L -o kube-lineage $KUBE_LINEAGE_ARM_URL; \
+    elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    curl -L -o kube-lineage $KUBE_LINEAGE_AMD_URL; \
+    else \
+    echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \
+    fi
+RUN chmod 777 kube-lineage
+RUN ./kube-lineage --version
+
+# Set the architecture-specific argocd URLs
+# Freezing to argocd 2.13.5 as it has fixes CVE-2025-21613 and CVE-2025-21614.
+# The argocd release 2.14.2 (latest as 2025-02-19) unfortunately has these CVEs.
+ARG ARGOCD_ARM_URL=https://github.com/argoproj/argo-cd/releases/download/v2.13.5/argocd-linux-arm64
+ARG ARGOCD_AMD_URL=https://github.com/argoproj/argo-cd/releases/download/v2.13.5/argocd-linux-amd64
+# Conditional download based on the platform
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    curl -L -o argocd $ARGOCD_ARM_URL; \
+    elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    curl -L -o argocd $ARGOCD_AMD_URL; \
+    else \
+    echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \
+    fi
+RUN chmod 777 argocd
+RUN ./argocd --help
+
+# Install Helm
+RUN curl https://baltocdn.com/helm/signing.asc | gpg --dearmor -o /usr/share/keyrings/helm.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" \
+    | tee /etc/apt/sources.list.d/helm-stable-debian.list \
     && apt-get update \
-    && apt-get install -y kubectl unzip\
-    && rm -rf /var/lib/apt/lists/* 
-
-# Install AWS CLI v2 so kubectl works w/ remote eks clusters
-# build-arg to choose architecture of the awscli binary x68_64 or aarch64 - defaulting to x86_64
-ARG ARCH=x86_64
-RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-${ARCH}.zip" -o "awscliv2.zip" \
-    && unzip awscliv2.zip \
-    && ./aws/install
-
-# Install Google cli so kubectl works w/ remove gke clusters
-RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz
-RUN mkdir -p /usr/local/gcloud \
-  && tar -C /usr/local/gcloud -xvf /tmp/google-cloud-sdk.tar.gz \
-  && /usr/local/gcloud/google-cloud-sdk/install.sh
-ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin
-RUN gcloud components install gke-gcloud-auth-plugin
-
-# Install Krew and add its installation directory to PATH
-RUN sh -c "\
-    set -x; cd \$(mktemp -d) && \
-    OS=\$(uname | tr '[:upper:]' '[:lower:]') && \
-    ARCH=\$(uname -m | sed -e 's/x86_64/amd64/' -e 's/\\(arm\\)\\(64\\)\\?.*/\\1\\2/' -e 's/aarch64$/arm64/') && \
-    KREW=krew-\${OS}_\${ARCH} && \
-    curl -fsSLO \"https://github.com/kubernetes-sigs/krew/releases/latest/download/\${KREW}.tar.gz\" && \
-    tar zxvf \"\${KREW}.tar.gz\" && \
-    ./\"\${KREW}\" install krew \
-    "
-
-# Add Krew to PATH
-ENV PATH="/root/.krew/bin:$PATH"
-
-# Install kube-lineage via Krew
-RUN kubectl krew install lineage
-
-# Copy the poetry configuration files into the container at /app
-COPY pyproject.toml poetry.lock* /app/
+    && apt-get install -y helm \
+    && rm -rf /var/lib/apt/lists/*
 
+# Set up poetry
 ARG PRIVATE_PACKAGE_REGISTRY="none"
 RUN if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \
     pip config set global.index-url "${PRIVATE_PACKAGE_REGISTRY}"; \
     fi \
-    && pip install poetry     
-
-# Increase poetry timeout in case package registry times out
+    && pip install poetry
 ARG POETRY_REQUESTS_TIMEOUT
-RUN poetry config virtualenvs.create false \
-    && if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \
+RUN poetry config virtualenvs.create false
+COPY pyproject.toml poetry.lock /app/
+RUN if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \
     poetry source add --priority=primary artifactory "${PRIVATE_PACKAGE_REGISTRY}"; \
     fi \
     && poetry install --no-interaction --no-ansi --no-root
 
-#COPY config.yaml /app/
+# Final stage
+FROM python:3.11-slim
 
-COPY . /app
+ENV PYTHONUNBUFFERED=1
+ENV PATH="/venv/bin:$PATH"
+ENV PYTHONPATH=$PYTHONPATH:.:/app/holmes
+
+WORKDIR /app
+
+COPY --from=builder /app/venv /venv
+
+# We're installing here libexpat1, to upgrade the package to include a fix to 3 high CVEs. CVE-2024-45491,CVE-2024-45490,CVE-2024-45492
+RUN apt-get update \
+    && apt-get install -y \
+    curl \
+    jq \
+    git \
+    apt-transport-https \
+    gnupg2 \
+    && apt-get purge -y --auto-remove \
+    && apt-get install -y --no-install-recommends libexpat1 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up kubectl
+COPY --from=builder /app/Release.key Release.key
+RUN cat Release.key |  gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg \
+    && echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list \
+    && apt-get update
+RUN apt-get install -y kubectl
+
+# Set up kube lineage
+COPY --from=builder /app/kube-lineage /usr/local/bin
+RUN kube-lineage --version
+
+# Set up ArgoCD
+COPY --from=builder /app/argocd /usr/local/bin/argocd
+RUN argocd --help
+
+# Set up Helm
+COPY --from=builder /usr/bin/helm /usr/local/bin/helm
+RUN chmod 555 /usr/local/bin/helm
+RUN helm version
 
 ARG AWS_DEFAULT_PROFILE
 ARG AWS_DEFAULT_REGION
 ARG AWS_PROFILE
 ARG AWS_REGION
 
-ENTRYPOINT ["poetry", "run", "--quiet", "python", "holmes.py"]
+# Patching CVE-2024-32002
+RUN git config --global core.symlinks false
+
+# Remove setuptools-65.5.1 installed from python:3.11-slim base image as fix for CVE-2024-6345 until image will be updated
+RUN rm -rf /usr/local/lib/python3.11/site-packages/setuptools-65.5.1.dist-info
+
+COPY . /app
+
+ENTRYPOINT ["python", "holmes.py"]
 #CMD ["http://docker.for.mac.localhost:9093"]
diff --git a/Dockerfile.dev b/Dockerfile.dev
@@ -0,0 +1,134 @@
+# Build stage
+FROM python:3.11-slim as builder
+ENV PATH="/root/.local/bin/:$PATH"
+
+RUN apt-get update \
+    && apt-get install -y \
+       curl \
+       git \
+       apt-transport-https \
+       gnupg2 \
+       build-essential \
+       unzip \
+    && apt-get purge -y --auto-remove \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+
+# Create and activate virtual environment
+RUN python -m venv /app/venv --upgrade-deps && \
+    . /app/venv/bin/activate
+
+ENV VIRTUAL_ENV=/app/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Needed for kubectl
+RUN curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key -o Release.key
+
+# Define a build argument to identify the platform
+ARG TARGETPLATFORM
+
+# Set the architecture-specific aws-cli
+ARG AWS_CLI_ARM_URL=https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip
+ARG AWS_CLI_AMD_URL=https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip
+# Conditional download based on the platform
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+    curl $AWS_CLI_ARM_URL -o "awscliv2.zip"; \
+    elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+    curl $AWS_CLI_AMD_URL -o "awscliv2.zip"; \
+    else \
+    echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \
+    fi
+RUN unzip awscliv2.zip && ./aws/install
+RUN aws --version
+
+# Set up packages for gcloud
+RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz
+RUN mkdir -p /usr/local/gcloud \
+  && tar -C /usr/local/gcloud -xvf /tmp/google-cloud-sdk.tar.gz \
+  && /usr/local/gcloud/google-cloud-sdk/install.sh
+
+# Set the architecture-specific kube lineage URLs
+ARG ARM_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.1/kube-lineage-macos-latest-v2.1
+ARG AMD_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.1/kube-lineage-ubuntu-latest-v2.1
+# Conditional download based on the platform
+RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
+        curl -L -o kube-lineage $ARM_URL; \
+    elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
+        curl -L -o kube-lineage $AMD_URL; \
+    else \
+        echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \
+    fi
+RUN chmod 777 kube-lineage
+RUN ./kube-lineage --version
+
+# Set up poetry
+ARG PRIVATE_PACKAGE_REGISTRY="none"
+RUN if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \
+    pip config set global.index-url "${PRIVATE_PACKAGE_REGISTRY}"; \
+    fi \
+    && pip install poetry
+ARG POETRY_REQUESTS_TIMEOUT
+RUN poetry config virtualenvs.create false
+COPY pyproject.toml poetry.lock /app/
+RUN if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \
+    poetry source add --priority=primary artifactory "${PRIVATE_PACKAGE_REGISTRY}"; \
+    fi \
+    && poetry install --no-interaction --no-ansi --no-root
+
+# Final stage
+FROM python:3.11-slim
+
+ENV PYTHONUNBUFFERED=1
+ENV PATH="/venv/bin:$PATH"
+ENV PYTHONPATH=$PYTHONPATH:.:/app/holmes
+
+WORKDIR /app
+
+COPY --from=builder /app/venv /venv
+COPY . /app
+
+
+RUN apt-get update \
+    && apt-get install -y \
+       git \
+       apt-transport-https \
+       gnupg2 \
+       curl \
+       jq \
+    && apt-get purge -y --auto-remove \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up AWS CLI
+COPY --from=builder /usr/local/aws-cli/ /usr/local/aws-cli/
+ENV PATH $PATH:/usr/local/aws-cli/v2/current/bin
+RUN aws --version
+
+COPY --from=builder /usr/local/gcloud /usr/local/gcloud
+ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin
+RUN gcloud components install gke-gcloud-auth-plugin
+
+# Set up kubectl
+# for details on what each step does see here https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/#installing-kubeadm-kubelet-and-kubectl
+COPY --from=builder /app/Release.key Release.key
+RUN cat Release.key |  gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg \
+    && echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list \
+    && apt-get update
+RUN apt-get install -y kubectl
+
+# Set up kube lineage
+COPY --from=builder /app/kube-lineage /usr/local/bin
+RUN kube-lineage --version
+
+
+# Remove setuptools-65.5.1 installed from python:3.11-slim base image as fix for CVE-2024-6345 until image will be updated
+RUN rm -rf /usr/local/lib/python3.11/site-packages/setuptools-65.5.1.dist-info
+
+ARG AWS_DEFAULT_PROFILE
+ARG AWS_DEFAULT_REGION
+ARG AWS_PROFILE
+ARG AWS_REGION
+
+ENTRYPOINT ["python", "holmes.py"]
+#CMD ["http://docker.for.mac.localhost:9093"]
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
@@ -0,0 +1,19 @@
+# Maintainers
+
+## Current
+
+| Maintainer          | GitHub ID                                         |
+| --------------------|---------------------------------------------------|
+| Natan Yellin        | [aantn](https://github.com/aantn)                 |
+| Arik Alon           | [arikalon1](https://github.com/arikalon1)         |
+| Avi Kotlicky        | [Avi-Robusta](https://github.com/Avi-Robusta)     |
+| Dmytro Chievtaiev   | [itisallgood](https://github.com/itisallgood)     |
+| Pavan Gudiwada      | [pavangudiwada](https://github.com/pavangudiwada) |
+| Nicolas Herment     | [nherment](https://github.com/nherment)           |
+| Tomer Keshet        | [Sheeproid](https://github.com/Sheeproid)         |
+| Roi Glinik          | [RoiGlinik](https://github.com/RoiGlinik)         |
+| Moshe Morad         | [moshemorad](https://github.com/moshemorad)       |
+
+## Emeritus
+
+N/A
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+
+
+check:
+	poetry run pre-commit run -a
+
+
+test-llm-investigate:
+	poetry run pytest tests/llm/test_investigate.py -n 6 -vv
+
+test-llm-ask-holmes:
+	poetry run pytest tests/llm/test_ask_holmes.py -n 6 -vv
diff --git a/README.md b/README.md
diff --git a/build_with_arm.sh b/build_with_arm.sh
@@ -0,0 +1 @@
+docker buildx build --platform linux/arm64,linux/amd64 --tag $TAG --push .
diff --git a/config.example.yaml b/config.example.yaml
@@ -1,3 +1,9 @@
+#model: "gpt-4o"
+#api_key: "..."
+
+#model: "azure/model-deployment-name"
+#api_key: "..."
+
 # try adding your own tools here!
 # e.g. query company-specific data, fetch logs from your existing observability tools, etc
 #custom_toolsets: ["examples/custom_toolset.yaml"]
@@ -8,16 +14,15 @@
 #jira_api_key: "..."
 #jira_url: "https://your-company.atlassian.net"
 #jira_query: "project = 'Natan Test Project' and Status = 'To Do'"
- 
+
 #slack_token: "..."
 #slack_channel: "#general"
 
-#llm: "openai"
-#api_key: "..."
-
-# llm: "azure"
-#api_key: "..."
-#azure_endpoint: "..."
+#github_owner: "robusta-dev"
+#github_pat: "..."
+#github_url: "https://api.github.com" (default)
+#github_repository: "holmesgpt"
+#github_query: "is:issue is:open"
 
 # give the LLM explicit instructions how to investigate certain alerts
 # try adding runbooks to get better results on known alerts

diff --git a/docs/api-keys.md b/docs/api-keys.md
@@ -0,0 +1,163 @@
+# Getting an LLM API Key for HolmesGPT
+
+If you use HolmesGPT with Robusta SaaS, you can start using HolmesGPT right away, without an API Key like OpenAI.
+
+If you're running HolmesGPT standalone, you'll need to bring your own API Key for an AI model of your choice.
+
+The most popular LLM provider is OpenAI, but you can use most [LiteLLM-compatible](https://docs.litellm.ai/docs/providers/) AI models with HolmesGPT. To use an LLM, set `--model` (e.g. `gpt-4o` or `bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0`) and `--api-key` (if necessary). Depending on the provider, you may need to set environment variables too.
+
+**Instructions for popular LLMs:**
+
+<details>
+<summary>OpenAI</summary>
+
+To work with OpenAI's GPT 3.5 or GPT-4 models you need a paid [OpenAI API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key).
+
+**Note**: This is different from being a "ChatGPT Plus" subscriber.
+
+Pass your API key to holmes with the `--api-key` cli argument. Because OpenAI is the default LLM, the `--model` flag is optional for OpenAI (gpt-4o is the default).
+
+```
+holmes ask --api-key="..." "what pods are crashing in my cluster and why?"
+```
+
+If you prefer not to pass secrets on the cli, set the OPENAI_API_KEY environment variable or save the API key in a HolmesGPT config file.
+
+</details>
+
+<details>
+<summary>Azure OpenAI</summary>
+
+To work with Azure AI, you need an [Azure OpenAI resource](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource) and to set the following environment variables:
+
+* AZURE_API_VERSION - e.g. 2024-02-15-preview
+* AZURE_API_BASE - e.g. https://my-org.openai.azure.com/
+* AZURE_API_KEY (optional) - equivalent to the `--api-key` cli argument
+
+Set those environment variables and run:
+
+```bash
+holmes ask "what pods are unhealthy and why?" --model=azure/<DEPLOYMENT_NAME> --api-key=<API_KEY>
+```
+
+Refer [LiteLLM Azure docs ↗](https://litellm.vercel.app/docs/providers/azure) for more details.
+</details>
+
+<details>
+<summary>AWS Bedrock</summary>
+
+Before running the below command you must run `pip install boto3>=1.28.57` and set the following environment variables:
+
+* `AWS_REGION_NAME`
+* `AWS_ACCESS_KEY_ID`
+* `AWS_SECRET_ACCESS_KEY`
+
+If the AWS cli is already configured on your machine, you may be able to find those parameters with:
+
+```console
+cat ~/.aws/credentials ~/.aws/config
+```
+
+Once everything is configured, run:
+```console
+holmes ask "what pods are unhealthy and why?" --model=bedrock/<MODEL_NAME>
+```
+
+Be sure to replace `MODEL_NAME` with a model you have access to - e.g. `anthropic.claude-3-5-sonnet-20240620-v1:0`. To list models your account can access:
+
+```
+aws bedrock list-foundation-models --region=us-east-1
+```
+
+Note that different models are available in different regions. For example, Claude Opus is only available in us-west-2.
+
+Refer to [LiteLLM Bedrock docs ↗](https://litellm.vercel.app/docs/providers/bedrock) for more details.
+</details>
+
+<details>
+<summary>Using Ollama</summary>
+Ollama is supported, but buggy. We recommend using other models if you can, until Ollama tool-calling capabilities improve.
+Specifically, Ollama often calls tools with non-existent or missing parameters.
+
+If you'd like to try using Ollama anyway, see below:
+```
+export OLLAMA_API_BASE="http://localhost:11434"
+holmes ask "what pods are unhealthy in my cluster?" --model="ollama_chat/llama3.1"
+```
+
+You can also connect to Ollama in the standard OpenAI format (this should be equivalent to the above):
+
+```
+# note the v1 at the end
+export OPENAI_API_BASE="http://localhost:11434/v1"
+# holmes requires OPENAPI_API_KEY to be set but value does not matter
+export OPENAI_API_KEY=123
+holmes ask "what pods are unhealthy in my cluster?" --model="openai/llama3.1"
+```
+
+</details>
+<details>
+<summary>Gemini/Google AI Studio</summary>
+
+To use Gemini, set the `GEMINI_API_KEY` environment variable as follows:
+
+```bash
+export GEMINI_API_KEY="your-gemini-api-key"
+```
+
+Once the environment variable is set, you can run the following command to interact with Gemini:
+
+```bash
+holmes ask "what pods are unhealthy and why?" --model=gemini/<MODEL_NAME>
+```
+
+Be sure to replace `MODEL_NAME` with a model you have access to - e.g., `gemini-pro`,`gemini/gemini-1.5-flash`, etc.
+
+</details>
+<details>
+<summary>Vertex AI Gemini</summary>
+
+To use Vertex AI with Gemini models, set the following environment variables:
+
+```bash
+export VERTEXAI_PROJECT="your-project-id"
+export VERTEXAI_LOCATION="us-central1"
+export GOOGLE_APPLICATION_CREDENTIALS="path/to/your/service_account_key.json"
+```
+
+Once the environment variables are set, you can run the following command to interact with Vertex AI Gemini models:
+
+```bash
+poetry run python holmes.py ask "what pods are unhealthy and why?" --model "vertex_ai/<MODEL_NAME>"
+```
+
+Be sure to replace `MODEL_NAME` with a model you have access to - e.g., `gemini-pro`,`gemini-2.0-flash-exp`, etc.
+Ensure you have the correct project, location, and credentials for accessing the desired Vertex AI model.
+
+</details>
+<details>
+<summary>Using other OpenAI-compatible models</summary>
+
+You will need an LLM with support for function-calling (tool-calling).
+
+* Set the environment variable for your URL with `OPENAI_API_BASE`
+* Set the model as `openai/<your-model-name>` (e.g., `llama3.1:latest`)
+* Set your API key (if your URL doesn't require a key, then add a random value for `--api-key`)
+
+```bash
+export OPENAI_API_BASE=<URL_HERE>
+holmes ask "what pods are unhealthy and why?" --model=openai/<MODEL_NAME> --api-key=<API_KEY_HERE>
+```
+
+**Important: Please verify that your model and inference server support function calling! HolmesGPT is currently unable to check if the LLM it was given supports function-calling or not. Some models that lack function-calling capabilities will  hallucinate answers instead of reporting that they are unable to call functions. This behaviour depends on the model.**
+
+In particular, note that [vLLM does not yet support function calling](https://github.com/vllm-project/vllm/issues/1869), whereas [llama-cpp does support it](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#function-calling).
+
+</details>
+
+**Additional LLM Configuration:**
+
+<details>
+<summary>Trusting custom Certificate Authority (CA) certificate</summary>
+If your llm provider url uses a certificate from a custom CA, in order to trust it, base-64 encode the certificate, and store it in an environment variable named <b>CERTIFICATE</b>
+</details>
diff --git a/docs/installation.md b/docs/installation.md
@@ -0,0 +1,127 @@
+# Installing HolmesGPT
+
+## In-Cluster Installation (Recommended)
+
+If you use Kubernetes, we recommend installing Holmes + [Robusta](https://github.com/robusta-dev/robusta) as a unified package:
+
+- Forward alerts by webhook to Robusta
+- Analyze alerts in a web UI and ask follow-up questions
+- ChatGPT-like interface to query observability and K8s data in natural language
+- Built-in integrations with **Prometheus alerts**, [Slack](https://docs.robusta.dev/master/configuration/ai-analysis.html), and more
+- No need for an OpenAI API Key (allows bringing your own LLM if you prefer)
+- Simple installation using `helm`
+
+[Sign up for Robusta SaaS](https://platform.robusta.dev/signup/?utm_source=github&utm_medium=holmesgpt-readme&utm_content=ways_to_use_holmesgpt_section) (Kubernetes cluster required) or contact us about on-premise options.
+
+## CLI Installation
+
+You can install Holmes as a CLI tool and run it on your local machine:
+
+<details>
+  <summary>Brew (Mac/Linux)</summary>
+
+1. Add our tap:
+
+```sh
+brew tap robusta-dev/homebrew-holmesgpt
+```
+
+2. Install holmesgpt:
+
+```sh
+brew install holmesgpt
+```
+
+3. Check that installation was successful. **This will take a few seconds on the first run - wait patiently.**:
+
+```sh
+holmes --help
+```
+
+4. Apply an example Pod to Kubernetes with an error that Holmes can investigate:
+
+```sh
+kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_node_selector.yaml
+```
+
+4. Run holmesgpt:
+
+```sh
+holmes ask "what is wrong with the user-profile-import pod?"
+```
+</details>
+
+
+<details>
+<summary>Docker Container</summary>
+
+Run the prebuilt Docker container `docker.pkg.dev/genuine-flight-317411/devel/holmes`, with extra flags to mount relevant config files (so that kubectl and other tools can access AWS/GCP resources using your local machine's credentials)
+
+```bash
+docker run -it --net=host -v ~/.holmes:/root/.holmes -v ~/.aws:/root/.aws -v ~/.config/gcloud:/root/.config/gcloud -v $HOME/.kube/config:/root/.kube/config us-central1-docker.pkg.dev/genuine-flight-317411/devel/holmes ask "what pods are unhealthy and why?"
+```
+</details>
+
+<details>
+
+<summary>Pip and Pipx</summary>
+
+You can install HolmesGPT from the latest git version with pip or pipx.
+
+We recommend using pipx because it guarantees that HolmesGPT is isolated from other python packages on your system, preventing dependency conflicts.
+
+First [Pipx](https://github.com/pypa/pipx) (skip this step if you are using pip).
+
+Then install HolmesGPT from git with either pip or pipx:
+
+```
+pipx install "https://github.com/robusta-dev/holmesgpt/archive/refs/heads/master.zip"
+```
+
+Verify that HolmesGPT was installed by checking the version:
+
+```
+holmes version
+```
+
+To upgrade HolmesGPT with pipx, you can run:
+
+```
+pipx upgrade holmesgpt
+```
+</details>
+
+<details>
+
+<summary>From Source (Python Poetry)</summary>
+
+First [install poetry (the python package manager)](https://python-poetry.org/docs/#installing-with-the-official-installer)
+
+```
+git clone https://github.com/robusta-dev/holmesgpt.git
+cd holmesgpt
+poetry install --no-root
+poetry run python3 holmes.py ask "what pods are unhealthy and why?"
+```
+</details>
+
+<details>
+<summary>From Source (Docker)</summary>
+
+Clone the project from github, and then run:
+
+```bash
+cd holmesgpt
+docker build -t holmes . -f Dockerfile.dev
+docker run -it --net=host -v -v ~/.holmes:/root/.holmes -v ~/.aws:/root/.aws -v ~/.config/gcloud:/root/.config/gcloud -v $HOME/.kube/config:/root/.kube/config holmes ask "what pods are unhealthy and why?"
+```
+</details>
+
+<details>
+<summary>Python API</summary>
+
+You can use Holmes as a library and pass in your own LLM implementation. This is particularly useful if LiteLLM or the default Holmes implementation does not suit you.
+
+See an example implementation [here](examples/custom_llm.py).
+
+</details>
diff --git a/docs/k9s.md b/docs/k9s.md
@@ -0,0 +1,65 @@
+# Using HolmesGPT in K9s
+
+Add the following contents to the K9s plugin file, typically `~/.config/k9s/plugins.yaml` on Linux and `~/Library/Application Support/k9s/plugins.yaml` on Mac. Read more about K9s plugins [here](https://k9scli.io/topics/plugins/) and check your plugin path [here](https://github.com/derailed/k9s?tab=readme-ov-file#k9s-configuration).
+
+**Note**: HolmesGPT must be installed and configured for the K9s plugin to work.
+
+Basic plugin to run an investigation on any Kubernetes object, using the shortcut `Shift + H`:
+
+```yaml
+plugins:
+  holmesgpt:
+    shortCut: Shift-H
+    description: Ask HolmesGPT
+    scopes:
+      - all
+    command: bash
+    background: false
+    confirm: false
+    args:
+      - -c
+      - |
+        holmes ask "why is $NAME of $RESOURCE_NAME in -n $NAMESPACE not working as expected"
+        echo "Press 'q' to exit"
+        while : ; do
+        read -n 1 k <&1
+        if [[ $k = q ]] ; then
+        break
+        fi
+        done
+```
+
+Advanced plugin that lets you modify the questions HolmesGPT asks about the LLM, using the shortcut `Shift + O`. (E.g. you can change the question to "generate an HPA for this deployment" and the AI will follow those instructions and output an HPA configuration.)
+```yaml
+plugins:
+  custom-holmesgpt:
+    shortCut: Shift-Q
+    description: Custom HolmesGPT Ask
+    scopes:
+      - all
+    command: bash
+
+      - |
+        INSTRUCTIONS="# Edit the line below. Lines starting with '#' will be ignored."
+        DEFAULT_ASK_COMMAND="why is $NAME of $RESOURCE_NAME in -n $NAMESPACE not working as expected"
+        QUESTION_FILE=$(mktemp)
+
+        echo "$INSTRUCTIONS" > "$QUESTION_FILE"
+        echo "$DEFAULT_ASK_COMMAND" >> "$QUESTION_FILE"
+
+        # Open the line in the default text editor
+        ${EDITOR:-nano} "$QUESTION_FILE"
+
+        # Read the modified line, ignoring lines starting with '#'
+        user_input=$(grep -v '^#' "$QUESTION_FILE")
+        echo running: holmes ask "\"$user_input\""
+
+        holmes ask "$user_input"
+        echo "Press 'q' to exit"
+        while : ; do
+        read -n 1 k <&1
+        if [[ $k = q ]] ; then
+        break
+        fi
+        done
+```
diff --git a/docs/python.md b/docs/python.md
@@ -0,0 +1,8 @@
+# Using HolmesGPT as a Python Library
+
+You can use HolmesGPT as a library, if you want to:
+
+* Use some custom LLM implementation not supported by HolmesGPT or LiteLLM
+* Build a complex workflow not supported by HolmesGPT itself, while re-using all of Holmes's integrations and investigation capabilities
+
+First [install the library](installation.md) using pip, pipx, or poetry. Then see an example implementation [here](../examples/custom_llm.py).
diff --git a/examples/custom_llm.py b/examples/custom_llm.py
@@ -0,0 +1,64 @@
+from typing import Any, Dict, List, Optional, Type, Union
+from holmes.core.llm import LLM
+from litellm.types.utils import ModelResponse
+from holmes.core.tool_calling_llm import ToolCallingLLM
+from holmes.core.tools import Tool, ToolExecutor
+from holmes.plugins.toolsets import load_builtin_toolsets
+from pydantic import BaseModel
+from holmes.plugins.prompts import load_and_render_prompt
+
+
+class MyCustomLLM(LLM):
+    def get_context_window_size(self) -> int:
+        return 128000
+
+    def get_maximum_output_token(self) -> int:
+        return 4096
+
+    def count_tokens_for_message(self, messages: list[dict]) -> int:
+        return 1
+
+    def completion(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Tool]] = [],
+        tool_choice: Optional[Union[str, dict]] = None,
+        response_format: Optional[Union[dict, Type[BaseModel]]] = None,
+        temperature: Optional[float] = None,
+        drop_params: Optional[bool] = None,
+    ) -> ModelResponse:
+        return ModelResponse(
+            choices=[
+                {
+                    "finish_reason": "stop",
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "There are no issues with your cluster",
+                    },
+                }
+            ],
+            usage={
+                "prompt_tokens": 0,  # Integer
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
+        )
+
+
+def ask_holmes():
+    prompt = "what pods are unhealthy in my cluster?"
+
+    system_prompt = load_and_render_prompt(
+        prompt="builtin://generic_ask.jinja2", context={}
+    )
+
+    tool_executor = ToolExecutor(load_builtin_toolsets())
+    ai = ToolCallingLLM(tool_executor, max_steps=10, llm=MyCustomLLM())
+
+    response = ai.prompt_call(system_prompt, prompt)
+
+    print(response.model_dump())
+
+
+ask_holmes()
diff --git a/examples/custom_runbooks.yaml b/examples/custom_runbooks.yaml
@@ -4,4 +4,4 @@ runbooks:
     instructions: >
       Analyze pod logs for errors and also read the monogodb logs
       Correlate between the two logs and try to find the root cause of the issue.
-      Based on the logs, report the session ids of impacted transactions
+      Based on the logs, report the session ids of impacted transactions
diff --git a/examples/custom_toolset.yaml b/examples/custom_toolset.yaml
@@ -1,10 +1,30 @@
+# Here is an example of how you can give the LLM your own tools to use (loaded with the `-t` command line argument)
+# For more examples, refer to the builtin toolsets - https://github.com/robusta-dev/holmesgpt/blob/master/holmes/plugins/toolsets/
+
 toolsets:
   # Name of the toolset (for example "mycompany/internal-tools")
-  - name: "switch_clusters"
-    # list of tools available for the LLM to use during investigation
-    # each tool is a templated bash command using Jinja2 templates
-    # the LLM can only control parameters that you expose as template variables like {{ this_variable }}
+  # Used for informational purposes only (e.g. to print the name of the toolset if it can't be loaded)
+  switch_clusters:
+    # Description of the toolset. Used for display in the UI
+    description: "Set of tools for switching between kubernetes clusters"
+    # Documentation URL. Used for display in the UI
+    docs_url: "https://kubernetes.io/docs/home/"
+    # Icon URL. Used for display in the UI
+    icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
+    # Tags for categorizing toolsets, 'core' will be used for all Holmes features (both cli's commands and chats in UI).
+    # The 'cluster' tag is used for UI functionality, while 'cli' is for for command-line specific tools
+    tags:
+      - core
+    # Prerequisites is a list of conditions checked by Holmes before using the toolset
+    prerequisites:
+      - command: "kubectl version --client"
+    # List of tools the LLM can use - this is the important part
     tools:
+      # Name is a unique identifier for the tool
       - name: "switch_cluster"
+        # The LLM looks at this description when deciding what tools are relevant for each task
         description: "Used to switch between multiple kubernetes contexts(clusters)"
-        command: "kubectl config use-context {cluster-name}"
+
+        # A templated bash command using Jinja2 templates
+        # The LLM can only control parameters that you expose as template variables like {{ this_variable }}
+        command: "kubectl config use-context {{ cluster_name }}"
diff --git a/helm/holmes/.helmignore b/helm/holmes/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/helm/holmes/Chart.yaml b/helm/holmes/Chart.yaml
@@ -0,0 +1,10 @@
+apiVersion: v2
+name: holmes
+description: HolmesGPT Helm chart for Kubernetes
+type: application
+
+# these are set to the right value by .github/workflows/release.yaml
+# we use 0.0.1 as a placeholder for the version` because Helm wont allow `0.0.0` and we want to be able to run
+# `helm install` on development checkouts without updating this file. the version doesn't matter in that case anyway
+version: 0.0.1
+appVersion: 0.0.0
diff --git a/helm/holmes/templates/holmes.yaml b/helm/holmes/templates/holmes.yaml
@@ -0,0 +1,114 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}-holmes
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: holmes
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: holmes
+  template:
+    metadata:
+      labels:
+        app: holmes
+      annotations:
+        # checksum annotation triggering pod reload when .Values.toolsets changes by helm upgrade
+        checksum/toolset-config: {{ toYaml .Values.toolsets | sha256sum }}
+    spec:
+      {{- if .Values.customServiceAccountName }}
+      serviceAccountName: {{ .Values.customServiceAccountName }}
+      {{ else }}
+      serviceAccountName: {{ .Release.Name }}-holmes-service-account
+      {{- end }}
+      {{- if .Values.imagePullSecrets }}
+      imagePullSecrets:
+      {{- toYaml .Values.imagePullSecrets | nindent 6 }}
+      {{- end }}
+      {{- if .Values.nodeSelector }}
+      nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }}
+      {{- end }}
+      {{- if .Values.affinity }}
+      affinity: {{ toYaml .Values.affinity | nindent 8 }}
+      {{- end }}
+      {{- if .Values.priorityClassName }}
+      priorityClassName: {{ .Values.priorityClassName }}
+      {{- end }}
+      containers:
+      - name: holmes
+        image: "{{ .Values.registry }}/{{ .Values.image }}"
+        imagePullPolicy: Always
+        command: ["python3", "-u", "server.py"]
+        env:
+          - name: LOG_LEVEL
+            value: {{ .Values.logLevel }}
+          - name: ENABLE_TELEMETRY
+            value: {{ .Values.enableTelemetry }}
+          - name: SENTRY_DSN
+            value: {{ .Values.sentryDSN }}
+          {{ if .Values.certificate -}}
+          - name: CERTIFICATE
+            value: {{ .Values.certificate }}
+          {{- end }}
+          {{ if .Values.enablePostProcessing -}}
+          - name: HOLMES_POST_PROCESSING_PROMPT
+            value: {{ .Values.postProcessingPrompt }}
+          {{- end }}
+          {{- if .Values.additionalEnvVars -}}
+          {{ toYaml .Values.additionalEnvVars | nindent 10 }}
+          {{- end }}
+          {{- if .Values.additional_env_vars -}}
+          {{ toYaml .Values.additional_env_vars | nindent 10 }}
+          {{- end }}
+        lifecycle:
+          preStop:
+            exec:
+              command: ["bash", "-c", "kill -SIGINT 1"]
+        volumeMounts:
+          - name: playbooks-config-secret
+            mountPath: /etc/robusta/config
+          - name: custom-toolsets-configmap
+            mountPath: /etc/holmes/config
+          {{- if .Values.additionalVolumeMounts -}}
+          {{ toYaml .Values.additionalVolumeMounts | nindent 10 }}
+          {{- end }}
+        resources:
+          requests:
+            cpu: {{ .Values.resources.requests.cpu }}
+            memory: {{ .Values.resources.requests.memory }}
+          limits:
+            memory: {{ .Values.resources.limits.memory }}
+            {{ if .Values.resources.limits.cpu }}cpu: {{ .Values.resources.limits.cpu | quote }}{{ end }}
+      {{- if .Values.tolerations }}
+      tolerations: {{ toYaml .Values.tolerations | nindent 8 }}
+      {{- end }}
+      volumes:
+        - name: playbooks-config-secret
+          secret:
+            secretName: robusta-playbooks-config-secret
+            optional: true
+        - name: custom-toolsets-configmap
+          configMap:
+            name: custom-toolsets-configmap
+            optional: true
+        {{- if .Values.additionalVolumes -}}
+        {{ toYaml .Values.additionalVolumes | nindent 8 }}
+        {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-holmes
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app: holmes
+spec:
+  selector:
+    app: holmes
+  ports:
+    - name: http
+      protocol: TCP
+      port: 80
+      targetPort: 5050
diff --git a/helm/holmes/templates/holmesgpt-service-account.yaml b/helm/holmes/templates/holmesgpt-service-account.yaml
@@ -0,0 +1,232 @@
+{{- if .Values.createServiceAccount }}
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ .Release.Name }}-holmes-cluster-role
+  namespace : {{ .Release.Namespace }}
+rules:
+  {{- if .Values.customClusterRoleRules }}
+{{ toYaml .Values.customClusterRoleRules | indent 2 }}
+  {{- end }}
+  - apiGroups:
+      - "storage.k8s.io"
+    resources:
+      - storageclasses
+    verbs:
+      - list
+      - get
+      - watch
+  - apiGroups:
+      - "metrics.k8s.io"
+    resources:
+      - pods
+      - nodes
+    verbs:
+      - get
+      - list
+  - apiGroups:
+      - ""
+    resources:
+      - configmaps
+      - daemonsets
+      - deployments
+      - events
+      - namespaces
+      - persistentvolumes
+      - persistentvolumeclaims
+      - pods
+      - pods/status
+      - pods/log
+      - replicasets
+      - replicationcontrollers
+      - services
+      - serviceaccounts
+      - endpoints
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - ""
+    resources:
+      - nodes
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - "apiregistration.k8s.io"
+    resources:
+      - apiservices
+    verbs:
+      - get
+      - list
+
+  - apiGroups:
+      - "rbac.authorization.k8s.io"
+    resources:
+      - clusterroles
+      - clusterrolebindings
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - "autoscaling"
+    resources:
+      - horizontalpodautoscalers
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - apps
+    resources:
+      - daemonsets
+      - deployments
+      - deployments/scale
+      - replicasets
+      - replicasets/scale
+      - statefulsets
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - extensions
+    resources:
+      - daemonsets
+      - deployments
+      - deployments/scale
+      - ingresses
+      - replicasets
+      - replicasets/scale
+      - replicationcontrollers/scale
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - batch
+    resources:
+      - cronjobs
+      - jobs
+    verbs:
+      - get
+      - list
+      - watch
+
+  - apiGroups:
+      - "events.k8s.io"
+    resources:
+      - events
+    verbs:
+      - get
+      - list
+
+  - apiGroups:
+      - networking.k8s.io
+    resources:
+    - ingresses
+    - networkpolicies
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - autoscaling
+    resources:
+    - horizontalpodautoscalers
+    verbs:
+      - get
+      - list
+  - apiGroups:
+      - "policy"
+    resources:
+    - poddisruptionbudgets
+    - podsecuritypolicies
+    verbs:
+      - get
+      - list
+  - apiGroups:
+      - rbac.authorization.k8s.io
+    resources:
+    - clusterroles
+    - clusterrolebindings
+    - roles
+    - rolebindings
+    verbs:
+      - get
+      - list
+{{- if .Values.openshift }}
+  - apiGroups:
+    - apps.openshift.io
+    resources:
+    - deploymentconfigs
+    verbs:
+    - get
+    - list
+    - watch
+{{- end }}
+   # Prometheus CRDs
+  - apiGroups:
+      - monitoring.coreos.com
+    resources:
+      - alertmanagers
+      - alertmanagers/finalizers
+      - alertmanagers/status
+      - alertmanagerconfigs
+      - prometheuses
+      - prometheuses/finalizers
+      - prometheuses/status
+      - prometheusagents
+      - prometheusagents/finalizers
+      - prometheusagents/status
+      - thanosrulers
+      - thanosrulers/finalizers
+      - thanosrulers/status
+      - scrapeconfigs
+      - servicemonitors
+      - podmonitors
+      - probes
+      - prometheusrules
+    verbs:
+      - get
+      - list
+      - watch
+
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ .Release.Name }}-holmes-service-account
+  namespace: {{ .Release.Namespace }}
+  {{- if .Values.serviceAccount.annotations }}
+  annotations:
+  {{- with .Values.serviceAccount.annotations }}
+    {{- toYaml . | nindent 4}}
+  {{- end }}
+  {{- end }}
+{{- if .Values.serviceAccount.imagePullSecrets }}
+imagePullSecrets:
+{{- toYaml .Values.serviceAccount.imagePullSecrets | nindent 2}}
+{{- end }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ .Release.Name }}-holmes-cluster-role-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ .Release.Name }}-holmes-cluster-role
+subjects:
+  - kind: ServiceAccount
+    name: {{ .Release.Name }}-holmes-service-account
+    namespace: {{ .Release.Namespace }}
+{{- end }}
diff --git a/helm/holmes/templates/toolset-config.yaml b/helm/holmes/templates/toolset-config.yaml
@@ -0,0 +1,10 @@
+{{- if .Values.toolsets | not | empty }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: custom-toolsets-configmap
+  namespace: {{ .Release.Namespace }}
+data:
+  custom_toolset.yaml: |-
+    toolsets: {{ toYaml .Values.toolsets | nindent 6 }}
+{{- end }}
diff --git a/helm/holmes/values.yaml b/helm/holmes/values.yaml
@@ -0,0 +1,55 @@
+certificate: "" # base64 encoded
+logLevel: INFO
+
+
+additionalEnvVars: []
+additional_env_vars: []
+imagePullSecrets: []
+
+image: holmes:0.0.0
+registry: robustadev
+
+sentryDSN: https://51f9cd9bd2fdee16144db08fc423cd3b@o1120648.ingest.us.sentry.io/4508799804702720
+enableTelemetry: true
+
+enableAccountsCreate: true
+
+createServiceAccount: true
+customServiceAccountName: ""
+
+customClusterRoleRules: []
+
+enablePostProcessing: false
+postProcessingPrompt: "builtin://generic_post_processing.jinja2"
+openshift: false
+
+affinity: {}
+nodeSelector: ~
+tolerations: []
+
+serviceAccount:
+  imagePullSecrets: []
+  annotations: {}
+
+toolsets:
+  kubernetes/core:
+    enabled: true
+  kubernetes/logs:
+    enabled: true
+  robusta:
+    enabled: true
+  internet:
+    enabled: true
+
+
+resources:
+  requests:
+    cpu: 100m
+    memory: 1024Mi
+  limits:
+    memory: 1024Mi
+
+additionalVolumes: []
+additionalVolumeMounts: []
+
+priorityClassName: ""
diff --git a/helm/upload_chart.sh b/helm/upload_chart.sh
@@ -0,0 +1,12 @@
+rm -rf ./tmp
+mkdir ./tmp
+cd ./tmp
+helm package ../holmes
+mkdir holmes
+mv *.tgz ./holmes
+curl https://robusta-charts.storage.googleapis.com/index.yaml > index.yaml
+helm repo index --merge index.yaml --url https://robusta-charts.storage.googleapis.com ./holmes
+gsutil rsync -r holmes gs://robusta-charts
+gsutil setmeta -h "Cache-Control:max-age=0" gs://robusta-charts/index.yaml
+cd ../
+rm -rf ./tmp
diff --git a/holmes.py b/holmes.py
@@ -1,340 +1,4 @@
-# from holmes.ssh_utils import add_custom_certificate
-# add_custom_certificate("cert goes here as a string (not path to the cert rather the cert itself)")
-
-import logging
-import re
-import warnings
-from pathlib import Path
-from typing import List, Optional, Pattern
-
-import typer
-from rich.console import Console
-from rich.logging import RichHandler
-from rich.markdown import Markdown
-from rich.prompt import Prompt
-from rich.rule import Rule
-
-from holmes.config import ConfigFile, LLMType
-from holmes.plugins.destinations import DestinationType
-from holmes.plugins.prompts import load_prompt
-
-app = typer.Typer(add_completion=False, pretty_exceptions_show_locals=False)
-investigate_app = typer.Typer(
-    add_completion=False,
-    name="investigate",
-    no_args_is_help=True,
-    help="Investigate firing alerts or tickets",
-)
-app.add_typer(investigate_app, name="investigate")
-
-def init_logging(verbose = False):
-    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO, format="%(message)s", handlers=[RichHandler(show_level=False, show_time=False)])
-    # disable INFO logs from OpenAI
-    logging.getLogger("httpx").setLevel(logging.WARNING)
-    # when running in --verbose mode we don't want to see DEBUG logs from these libraries
-    logging.getLogger("openai._base_client").setLevel(logging.INFO)
-    logging.getLogger("httpcore").setLevel(logging.INFO)
-    logging.getLogger("markdown_it").setLevel(logging.INFO)
-    # Suppress UserWarnings from the slack_sdk module
-    warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*")
-    return Console()
-
-# Common cli options
-opt_llm: Optional[LLMType] = typer.Option(
-    LLMType.OPENAI,
-    help="Which LLM to use ('openai' or 'azure')",
-)
-opt_api_key: Optional[str] = typer.Option(
-    None,
-    help="API key to use for the LLM (if not given, uses environment variables OPENAI_API_KEY or AZURE_OPENAI_API_KEY)",
-)
-opt_azure_endpoint: Optional[str] = typer.Option(
-    None,
-    help="Endpoint to use for Azure AI (if not given, uses environment variable AZURE_OPENAI_ENDPOINT)",
-)
-opt_model: Optional[str] = typer.Option("gpt-4o", help="Model to use for the LLM")
-opt_config_file: Optional[Path] = typer.Option(
-    None,
-    "--config",
-    help="Path to the config file. Defaults to config.yaml when it exists. Command line arguments take precedence over config file settings",
-)
-opt_custom_toolsets: Optional[List[Path]] = typer.Option(
-    [],
-    "--custom-toolsets",
-    "-t",
-    help="Path to a custom toolsets (can specify -t multiple times to add multiple toolsets)",
-)
-opt_allowed_toolsets: Optional[str] = typer.Option(
-    "*",
-    help="Toolsets the LLM is allowed to use to investigate (default is * for all available toolsets, can be comma separated list of toolset names)",
-)
-opt_custom_runbooks: Optional[List[Path]] = typer.Option(
-    [],
-    "--custom-runbooks",
-    "-r",
-    help="Path to a custom runbooks (can specify -r multiple times to add multiple runbooks)",
-)
-opt_max_steps: Optional[int] = typer.Option(
-    10,
-    "--max-steps",
-    help="Advanced. Maximum number of steps the LLM can take to investigate the issue",
-)
-opt_verbose: Optional[bool] = typer.Option(
-    False,
-    "--verbose",
-    "-v",
-    help="Verbose output",
-)
-opt_destination: Optional[DestinationType] = typer.Option(
-    DestinationType.CLI,
-    "--destination",
-    help="Destination for the results of the investigation (defaults to STDOUT)",
-)
-opt_slack_token: Optional[str] = typer.Option(
-    None,
-    "--slack-token",
-    help="Slack API key if --destination=slack (experimental). Can generate with `pip install robusta-cli && robusta integrations slack`",
-)
-opt_slack_channel: Optional[str] = typer.Option(
-    None,
-    "--slack-channel",
-    help="Slack channel if --destination=slack (experimental). E.g. #devops",
-)
-
-# Common help texts
-system_prompt_help = "Advanced. System prompt for LLM. Values starting with builtin:// are loaded from holmes/plugins/prompts, values starting with file:// are loaded from the given path, other values are interpreted as a prompt string"
-
-
-# TODO: add interactive interpreter mode
-# TODO: add streaming output
-@app.command()
-def ask(
-    prompt: str = typer.Argument(help="What to ask the LLM (user prompt)"),
-    # common options
-    llm=opt_llm,
-    api_key: Optional[str] = opt_api_key,
-    azure_endpoint: Optional[str] = opt_azure_endpoint,
-    model: Optional[str] = opt_model,
-    config_file: Optional[str] = opt_config_file,
-    custom_toolsets: Optional[List[Path]] = opt_custom_toolsets,
-    allowed_toolsets: Optional[str] = opt_allowed_toolsets,
-    max_steps: Optional[int] = opt_max_steps,
-    verbose: Optional[bool] = opt_verbose,
-    # advanced options for this command
-    system_prompt: Optional[str] = typer.Option(
-        "builtin://generic_ask.jinja2", help=system_prompt_help
-    ),
-    show_tool_output: bool = typer.Option(
-        False,
-        "--show-tool-output",
-        help="Advanced. Show the output of each tool that was called",
-    ),
-):
-    """
-    Ask any question and answer using available tools
-    """
-    console = init_logging(verbose)
-    config = ConfigFile.load(
-        config_file,
-        api_key=api_key,
-        llm=llm,
-        azure_endpoint=azure_endpoint,
-        model=model,
-        max_steps=max_steps,
-        custom_toolsets=custom_toolsets,
-    )
-    system_prompt = load_prompt(system_prompt)
-    ai = config.create_toolcalling_llm(console, allowed_toolsets)
-    console.print("[bold yellow]User:[/bold yellow] " + prompt)
-    response = ai.call(system_prompt, prompt)
-    text_result = Markdown(response.result)
-    if show_tool_output and response.tool_calls:
-        for tool_call in response.tool_calls:
-            console.print(f"[bold magenta]Used Tool:[/bold magenta]", end="")
-            # we need to print this separately with markup=False because it contains arbitrary text and we don't want console.print to interpret it
-            console.print(f"{tool_call.description}. Output=\n{tool_call.result}", markup=False)
-    console.print(f"[bold green]AI:[/bold green]", end=" ")
-    console.print(text_result)
-
-
-@investigate_app.command()
-def alertmanager(
-    alertmanager_url: Optional[str] = typer.Option(None, help="AlertManager url"),
-    alertname: Optional[str] = typer.Option(
-        None,
-        help="Investigate all alerts with this name (can be regex that matches multiple alerts). If not given, defaults to all firing alerts",
-    ),
-    alertmanager_username: Optional[str] = typer.Option(
-        None, help="Username to use for basic auth"
-    ),
-    alertmanager_password: Optional[str] = typer.Option(
-        None, help="Password to use for basic auth"
-    ),
-    # common options
-    llm: Optional[LLMType] = opt_llm,
-    api_key: Optional[str] = opt_api_key,
-    azure_endpoint: Optional[str] = opt_azure_endpoint,
-    model: Optional[str] = opt_model,
-    config_file: Optional[str] = opt_config_file,
-    custom_toolsets: Optional[List[Path]] = opt_custom_toolsets,
-    allowed_toolsets: Optional[str] = opt_allowed_toolsets,
-    custom_runbooks: Optional[List[Path]] = opt_custom_runbooks,
-    max_steps: Optional[int] = opt_max_steps,
-    verbose: Optional[bool] = opt_verbose,
-    # advanced options for this command
-    destination: Optional[DestinationType] = opt_destination,
-    slack_token: Optional[str] = opt_slack_token,
-    slack_channel: Optional[str] = opt_slack_channel,
-    system_prompt: Optional[str] = typer.Option(
-        "builtin://generic_investigation.jinja2", help=system_prompt_help
-    ),
-):
-    """
-    Investigate a Prometheus/Alertmanager alert
-    """
-    console = init_logging(verbose)
-    config = ConfigFile.load(
-        config_file,
-        api_key=api_key,
-        llm=llm,
-        azure_endpoint=azure_endpoint,
-        model=model,
-        max_steps=max_steps,
-        alertmanager_url=alertmanager_url,
-        alertmanager_username=alertmanager_username,
-        alertmanager_password=alertmanager_password,
-        slack_token=slack_token,
-        slack_channel=slack_channel,
-        custom_toolsets=custom_toolsets,
-        custom_runbooks=custom_runbooks
-    )
-
-    if alertname:
-        alertname = re.compile(alertname)
-
-    system_prompt = load_prompt(system_prompt)
-    ai = config.create_issue_investigator(console, allowed_toolsets)
-
-    source = config.create_alertmanager_source()
-
-    if destination == DestinationType.SLACK:
-        slack = config.create_slack_destination()
-
-    try:
-        issues = source.fetch_issues(alertname)
-    except Exception as e:
-        logging.error(f"Failed to fetch issues from alertmanager: {e}")
-        return
-
-    if alertname is not None:
-        console.print(
-            f"[bold yellow]Analyzing {len(issues)} issues matching filter.[/bold yellow] [red]Press Ctrl+C to stop.[/red]"
-        )
-    else:
-        console.print(
-            f"[bold yellow]Analyzing all {len(issues)} issues. (Use --alertname to filter.)[/bold yellow] [red]Press Ctrl+C to stop.[/red]"
-        )
-    for i, issue in enumerate(issues):
-        console.print(
-            f"[bold yellow]Analyzing issue {i+1}/{len(issues)}: {issue.name}...[/bold yellow]"
-        )
-        result = ai.investigate(issue, system_prompt, console)
-
-        if destination == DestinationType.CLI:
-            console.print(Rule())
-            console.print("[bold green]AI:[/bold green]", end=" ")
-            console.print(
-                Markdown(result.result.replace("\n", "\n\n")), style="bold green"
-            )
-            console.print(Rule())
-        elif destination == DestinationType.SLACK:
-            slack.send_issue(issue, result)
-
-
-@investigate_app.command()
-def jira(
-    jira_url: Optional[str] = typer.Option(
-        None, help="Jira url - e.g. https://your-company.atlassian.net"
-    ),
-    jira_username: Optional[str] = typer.Option(
-        None, help="The email address with which you log into Jira"
-    ),
-    jira_api_key: str = typer.Option(
-        None,
-    ),
-    jira_query: Optional[str] = typer.Option(
-        None,
-        help="Investigate tickets matching a JQL query (e.g. 'project=DEFAULT_PROJECT')",
-    ),
-    update_ticket: Optional[bool] = typer.Option(
-        False, help="Update tickets with AI results"
-    ),
-    # common options
-    llm: Optional[LLMType] = opt_llm,
-    api_key: Optional[str] = opt_api_key,
-    azure_endpoint: Optional[str] = opt_azure_endpoint,
-    model: Optional[str] = opt_model,
-    config_file: Optional[str] = opt_config_file,
-    custom_toolsets: Optional[List[Path]] = opt_custom_toolsets,
-    allowed_toolsets: Optional[str] = opt_allowed_toolsets,
-    custom_runbooks: Optional[List[Path]] = opt_custom_runbooks,
-    max_steps: Optional[int] = opt_max_steps,
-    verbose: Optional[bool] = opt_verbose,
-    # advanced options for this command
-    system_prompt: Optional[str] = typer.Option(
-        "builtin://generic_investigation.jinja2", help=system_prompt_help
-    ),
-):
-    """
-    Investigate a Jira ticket
-    """
-    console = init_logging(verbose)
-    config = ConfigFile.load(
-        config_file,
-        api_key=api_key,
-        llm=llm,
-        azure_endpoint=azure_endpoint,
-        model=model,
-        max_steps=max_steps,
-        jira_url=jira_url,
-        jira_username=jira_username,
-        jira_api_key=jira_api_key,
-        jira_query=jira_query,
-        custom_toolsets=custom_toolsets,
-        custom_runbooks=custom_runbooks
-    )
-
-    system_prompt = load_prompt(system_prompt)
-    ai = config.create_issue_investigator(console, allowed_toolsets)
-    source = config.create_jira_source()
-    try:
-        # TODO: allow passing issue ID
-        issues = source.fetch_issues()
-    except Exception as e:
-        logging.error(f"Failed to fetch issues from Jira: {e}")
-        return
-
-    console.print(
-        f"[bold yellow]Analyzing {len(issues)} Jira tickets.[/bold yellow] [red]Press Ctrl+C to stop.[/red]"
-    )
-    for i, issue in enumerate(issues):
-        console.print(
-            f"[bold yellow]Analyzing Jira ticket {i+1}/{len(issues)}: {issue.name}...[/bold yellow]"
-        )
-        result = ai.investigate(issue, system_prompt, console)
-
-        console.print(Rule())
-        console.print(f"[bold green]AI analysis of {issue.url}[/bold green]")
-        console.print(Markdown(result.result.replace("\n", "\n\n")), style="bold green")
-        console.print(Rule())
-        if update_ticket:
-            source.write_back_result(issue.id, result)
-            console.print(f"[bold]Updated ticket {issue.url}.[/bold]")
-        else:
-            console.print(
-                f"[bold]Not updating ticket {issue.url}. Use the --update-ticket option to do so.[/bold]"
-            )
-
+from holmes.main import run
 
 if __name__ == "__main__":
-    app()
+    run()
diff --git a/holmes/.git_archival.json b/holmes/.git_archival.json
@@ -0,0 +1,7 @@
+{
+  "hash-full": "$Format:%H$",
+  "hash-short": "$Format:%h$",
+  "timestamp": "$Format:%cI$",
+  "refs": "$Format:%D$",
+  "describe": "$Format:%(describe:tags=true,match=v[0-9]*)$"
+}
diff --git a/holmes/__init__.py b/holmes/__init__.py
@@ -1,3 +1,68 @@
-# For relative imports to work in Python 3.6
-# See https://stackoverflow.com/a/49375740
-import os, sys; sys.path.append(os.path.dirname(os.path.realpath(__file__)))
+import json
+import os
+import subprocess
+import sys
+
+# For relative imports to work in Python 3.6 - see https://stackoverflow.com/a/49375740
+this_path = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(this_path)
+
+# This is patched by github actions during release
+__version__ = "0.0.0"
+
+
+def get_version() -> str:
+    # the version string was patched by a release - return __version__ which will be correct
+    if not __version__.startswith("0.0.0"):
+        return __version__
+
+    # we are running from an unreleased dev version
+    try:
+        # Get the latest git tag
+        tag = (
+            subprocess.check_output(
+                ["git", "describe", "--tags"], stderr=subprocess.STDOUT, cwd=this_path
+            )
+            .decode()
+            .strip()
+        )
+
+        # Get the current branch name
+        branch = (
+            subprocess.check_output(
+                ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+                stderr=subprocess.STDOUT,
+                cwd=this_path,
+            )
+            .decode()
+            .strip()
+        )
+
+        # Check if there are uncommitted changes
+        status = (
+            subprocess.check_output(
+                ["git", "status", "--porcelain"],
+                stderr=subprocess.STDOUT,
+                cwd=this_path,
+            )
+            .decode()
+            .strip()
+        )
+        dirty = "-dirty" if status else ""
+
+        return f"{tag}-{branch}{dirty}"
+
+    except Exception:
+        pass
+
+    # we are running without git history, but we still might have git archival data (e.g. if we were pip installed)
+    archival_file_path = os.path.join(this_path, ".git_archival.json")
+    if os.path.exists(archival_file_path):
+        try:
+            with open(archival_file_path, "r") as f:
+                archival_data = json.load(f)
+                return f"{archival_data['refs']}-{archival_data['hash-short']}"
+        except Exception:
+            pass
+
+        return "dev-version"
diff --git a/holmes/common/env_vars.py b/holmes/common/env_vars.py
@@ -0,0 +1,33 @@
+import os
+import json
+
+
+def load_bool(env_var, default: bool):
+    s = os.environ.get(env_var, str(default))
+    return json.loads(s.lower())
+
+
+ENABLED_BY_DEFAULT_TOOLSETS = os.environ.get(
+    "ENABLED_BY_DEFAULT_TOOLSETS", "kubernetes/core,kubernetes/logs,robusta,internet"
+)
+HOLMES_HOST = os.environ.get("HOLMES_HOST", "0.0.0.0")
+HOLMES_PORT = int(os.environ.get("HOLMES_PORT", 5050))
+ROBUSTA_CONFIG_PATH = os.environ.get(
+    "ROBUSTA_CONFIG_PATH", "/etc/robusta/config/active_playbooks.yaml"
+)
+
+ROBUSTA_ACCOUNT_ID = os.environ.get("ROBUSTA_ACCOUNT_ID", "")
+STORE_URL = os.environ.get("STORE_URL", "")
+STORE_API_KEY = os.environ.get("STORE_API_KEY", "")
+STORE_EMAIL = os.environ.get("STORE_EMAIL", "")
+STORE_PASSWORD = os.environ.get("STORE_PASSWORD", "")
+HOLMES_POST_PROCESSING_PROMPT = os.environ.get("HOLMES_POST_PROCESSING_PROMPT", "")
+ROBUSTA_AI = load_bool("ROBUSTA_AI", False)
+ROBUSTA_API_ENDPOINT = os.environ.get("ROBUSTA_API_ENDPOINT", "https://api.robusta.dev")
+
+LOG_PERFORMANCE = os.environ.get("LOG_PERFORMANCE", None)
+
+
+ENABLE_TELEMETRY = load_bool("ENABLE_TELEMETRY", False)
+SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
+SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
diff --git a/holmes/config.py b/holmes/config.py
diff --git a/holmes/core/conversations.py b/holmes/core/conversations.py
diff --git a/holmes/core/investigation.py b/holmes/core/investigation.py
@@ -0,0 +1,51 @@
+from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT
+from holmes.config import Config
+from holmes.core.investigation_structured_output import process_response_into_sections
+from holmes.core.issue import Issue
+from holmes.core.models import InvestigateRequest, InvestigationResult
+from holmes.core.supabase_dal import SupabaseDal
+from holmes.utils.robusta import load_robusta_api_key
+
+
+def investigate_issues(
+    investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config
+):
+    load_robusta_api_key(dal=dal, config=config)
+    context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id"))
+
+    resource_instructions = dal.get_resource_instructions(
+        "alert", investigate_request.context.get("issue_type")
+    )
+    global_instructions = dal.get_global_instructions_for_account()
+
+    raw_data = investigate_request.model_dump()
+    if context:
+        raw_data["extra_context"] = context
+
+    ai = config.create_issue_investigator(dal=dal)
+
+    issue = Issue(
+        id=context["id"] if context else "",
+        name=investigate_request.title,
+        source_type=investigate_request.source,
+        source_instance_id=investigate_request.source_instance_id,
+        raw=raw_data,
+    )
+
+    investigation = ai.investigate(
+        issue,
+        prompt=investigate_request.prompt_template,
+        post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT,
+        instructions=resource_instructions,
+        global_instructions=global_instructions,
+        sections=investigate_request.sections,
+    )
+
+    (text_response, sections) = process_response_into_sections(investigation.result)
+
+    return InvestigationResult(
+        analysis=text_response,
+        sections=sections,
+        tool_calls=investigation.tool_calls or [],
+        instructions=investigation.instructions,
+    )
diff --git a/holmes/core/investigation_structured_output.py b/holmes/core/investigation_structured_output.py
@@ -0,0 +1,264 @@
+import logging
+from typing import Any, Dict, Optional, Tuple
+import json
+import re
+from contextlib import suppress
+from holmes.common.env_vars import load_bool
+
+
+REQUEST_STRUCTURED_OUTPUT_FROM_LLM = load_bool(
+    "REQUEST_STRUCTURED_OUTPUT_FROM_LLM", True
+)
+PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS = load_bool(
+    "PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS", True
+)
+
+
+InputSectionsDataType = Dict[str, str]
+
+DEFAULT_SECTIONS: InputSectionsDataType = {
+    "Alert Explanation": '1-2 sentences explaining the alert itself - note don\'t say "The alert indicates a warning event related to a Kubernetes pod doing blah" rather just say "The pod XYZ did blah" because that is what the user actually cares about',
+    "Key Findings": "What you checked and found",
+    "Conclusions and Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains. Don't say root cause but 'possible root causes'. Be clear to distinguish between what you know for certain and what is a possible explanation",
+    "Next Steps": "What you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)",
+    "Related logs": "Truncate and share the most relevant logs, especially if these explain the root cause. For example: \nLogs from pod robusta-holmes:\n```\n<logs>```\n. Always embed the surroundding +/- 5 log lines to any relevant logs. ",
+    "App or Infra?": "Explain whether the issue is more likely an infrastructure or an application level issue and why you think that.",
+    "External links": "Provide links to external sources and a short sentence describing each link. For example provide links to relevant runbooks, etc. This section is a markdown formatted string.",
+}
+
+
+def get_output_format_for_investigation(
+    sections: InputSectionsDataType,
+) -> Dict[str, Any]:
+    properties = {}
+    required_fields = []
+
+    for title, description in sections.items():
+        properties[title] = {"type": ["string", "null"], "description": description}
+        required_fields.append(title)
+
+    schema = {
+        "$schema": "http://json-schema.org/draft-07/schema#",
+        "type": "object",
+        "required": required_fields,
+        "properties": properties,
+        "additionalProperties": False,
+    }
+    output_format = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "InvestigationResult",
+            "schema": schema,
+            "strict": False,
+        },
+    }
+
+    return output_format
+
+
+def combine_sections(sections: Dict) -> str:
+    content = ""
+    for section_title, section_content in sections.items():
+        if section_content:
+            content = content + f"\n# {section_title}\n{section_content}\n"
+    return content
+
+
+def parse_markdown_into_sections_from_equal_sign(
+    markdown_content: str,
+) -> Optional[Dict[str, Optional[str]]]:
+    """Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
+    ```
+    Header Title
+    ===========
+    Content here
+    ```
+    =>
+    {
+      "Header Title": "Content here"
+    }
+    """
+    matches = re.split(r"(?:^|\n)([^\n]+)\n=+\n", markdown_content.strip())
+
+    # Remove any empty first element if the text starts with a header
+    if matches[0].strip() == "":
+        matches = matches[1:]
+
+    sections = {}
+
+    for i in range(0, len(matches), 2):
+        if i + 1 < len(matches):
+            header = matches[i]
+            content = matches[i + 1].strip()
+            sections[header] = content
+
+    if len(sections) > 0:
+        return sections
+    else:
+        return None
+
+
+def parse_markdown_into_sections_from_hash_sign(
+    markdown_content: str,
+) -> Optional[Dict[str, Optional[str]]]:
+    """Splits a markdown in different sections where the key is a top level title underlined with `====` and the value is the content
+    ```
+    # Header Title
+    Content here
+    ```
+    =>
+    {
+      "Header Title": "Content here"
+    }
+    """
+    # Split the text into sections based on headers (# Section)
+    matches = re.split(r"\n(?=# )", markdown_content.strip())
+
+    if not matches[0].startswith("#"):
+        matches = matches[1:]
+
+    sections = {}
+
+    for match in matches:
+        match = match.strip()
+        if match:
+            parts = match.split("\n", 1)
+
+            if len(parts) > 1:
+                # Remove the # from the title and use it as key
+                title = parts[0].replace("#", "").strip()
+                # Use the rest as content
+                content = parts[1].strip()
+                sections[title] = content
+            else:
+                # Handle case where section has no content
+                title = parts[0].replace("#", "").strip()
+                sections[title] = None
+
+    if len(sections) > 0:
+        return sections
+    else:
+        return None
+
+
+def extract_within(content: str, from_idx: int, to_idx: int) -> str:
+    with suppress(Exception):
+        extracted_content = content[from_idx:to_idx]
+        parsed = json.loads(
+            extracted_content
+        )  # if this parses as json, set the response as that.
+        if isinstance(parsed, dict):
+            logging.warning(
+                "The LLM did not return structured data but embedded the data into a markdown code block. This indicates the prompt is not optimised for that AI model."
+            )
+            content = extracted_content
+    return content
+
+
+def pre_format_sections(response: Any) -> Any:
+    """Pre-cleaning of the response for some known, specific use cases
+    prior to it being parsed for sections
+    """
+    if isinstance(response, dict):
+        # No matter if the result is already structured, we want to go through the code below to validate the JSON
+        response = json.dumps(response)
+
+    if not isinstance(response, str):
+        # if it's not a string, we make it so as it'll be parsed later
+        response = str(response)
+
+    # In some cases, the LLM will not return a structured json but instead embed the JSON into a markdown code block
+    # This is not ideal and actually should not happen
+    if response.startswith("```json\n") and response.endswith("\n```"):
+        response = extract_within(response, 8, -3)
+
+    if response.startswith('"{') and response.endswith('}"'):
+        # Some Anthropic models embed the actual JSON dict inside a JSON string
+        # In that case it gets parsed once to get rid of the first level of marshalling
+        with suppress(Exception):
+            response = json.loads(response)
+    return response
+
+
+def parse_json_sections(
+    response: Any,
+) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
+    response = pre_format_sections(response)
+
+    with suppress(Exception):
+        parsed_json = json.loads(response)
+
+        if not isinstance(parsed_json, dict):
+            return (response, None)
+        sections = {}
+        for key, value in parsed_json.items():
+            if isinstance(value, list) and len(value) == 0:
+                value = None  # For links, LLM returns '[]' which is unsightly when converted to markdown
+
+            if isinstance(value, list):
+                sections[key] = "\n\n".join(f"{str(item)}" for item in value)
+            elif value is not None:
+                sections[key] = str(
+                    value
+                )  # force to strings. We only expect markdown and don't want to give anything but a string to the UI
+            else:
+                sections[key] = value
+        if sections:
+            combined = combine_sections(sections)
+            return (combined, sections)
+
+    return (response, None)
+
+
+def process_response_into_sections(
+    response: Any,
+) -> Tuple[str, Optional[Dict[str, Optional[str]]]]:
+    sections = None
+
+    if REQUEST_STRUCTURED_OUTPUT_FROM_LLM:
+        (response, sections) = parse_json_sections(response)
+
+    if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
+        sections = parse_markdown_into_sections_from_hash_sign(response)
+    if not sections and PARSE_INVESTIGATION_MARKDOWN_INTO_STRUCTURED_SECTIONS:
+        sections = parse_markdown_into_sections_from_equal_sign(response)
+
+    return (response, sections)
+
+
+def is_response_an_incorrect_tool_call(
+    sections: Optional[InputSectionsDataType], choice: dict
+) -> bool:
+    """Cf. https://github.com/BerriAI/litellm/issues/8241
+    This code detects when LiteLLM is incapable of handling both tool calls and structured output. This only happens when the LLM is returning a single tool call.
+    In that case the intention is to retry the LLM calls without structured output.
+    Post processing may still try to generate a structured output from a monolithic markdown.
+    """
+    with suppress(Exception):
+        message = choice.get("message", {})
+        finish_reason = choice.get("finish_reason")
+        content = message.get("content")
+        tool_calls = message.get("tool_calls")
+        role = message.get("role")
+        if (
+            sections
+            and content
+            and (
+                # azure
+                finish_reason == "stop"
+                or
+                # bedrock
+                finish_reason == "tool_calls"
+            )
+            and role == "assistant"
+            and not tool_calls
+        ):
+            if not isinstance(content, dict):
+                content = json.loads(content)
+            if not isinstance(content, dict):
+                return False
+            for section_title in sections:
+                if section_title in content:
+                    return False
+            return True
+    return False
diff --git a/holmes/core/issue.py b/holmes/core/issue.py
@@ -1,42 +1,41 @@
-from datetime import datetime
-from enum import StrEnum
+from strenum import StrEnum
 from typing import Optional
 
 from pydantic import BaseModel, ConfigDict
 
 
-class IssueStatus (StrEnum):
+class IssueStatus(StrEnum):
     OPEN = "open"
     CLOSED = "closed"
 
 
 # TODO: look at finding in Robusta
 class Issue(BaseModel):
-    model_config = ConfigDict(extra='forbid', validate_default=True)
+    model_config = ConfigDict(extra="forbid", validate_default=True)
 
     # Identifier for the issue - source + issue_id should be unique
-    id: str   
+    id: str
 
-    # Name of the issue - not necessarily unique  
-    name: str                                      
+    # Name of the issue - not necessarily unique
+    name: str
 
-    # Source of the issue - e.g. Jira
+    # Source of the issue - e.g. jira
     source_type: str
 
-    # Identifier for the instance of the source - e.g. Jira project key                                
+    # Identifier for the instance of the source - e.g. Jira project key
     source_instance_id: str
 
     # Link to the issue, when available
     url: Optional[str] = None
 
     # Raw object from the source - e.g. a dict from the source's API
     raw: dict = None
-    
+
     # these fields are all optional and used for visual presentation of the issue
     # there may not be a 1:1 mapping between source fields and these fields, which is OK
     # e.g. jira issues can have arbitrary statuses like 'closed' and 'resolved' whereas for presentation sake
     # we want to classify as open/closed so we can color the issue red/green
-    # if these fields are not present, an LLM  may be used to guess them 
+    # if these fields are not present, an LLM  may be used to guess them
     presentation_status: Optional[IssueStatus] = None
 
     # Markdown with key metadata about the issue. Suggested format is several lines each styled as "*X*: Y" and separated by \n
@@ -53,4 +52,3 @@ class Issue(BaseModel):
     # created_at: Optional[datetime] = None         # Timestamp of when the issue was created
     # updated_at: Optional[datetime] = None         # Timestamp of when the issue was last updated
     # metadata: Optional[dict] = None               # All additional metadata from the source (can be hierchical - e.g. dicts in dicts
-
diff --git a/holmes/core/llm.py b/holmes/core/llm.py
@@ -0,0 +1,205 @@
+import logging
+from abc import abstractmethod
+from typing import Any, Dict, List, Optional, Type, Union
+
+from litellm.types.utils import ModelResponse
+import sentry_sdk
+
+from holmes.core.tools import Tool
+from pydantic import BaseModel
+import litellm
+import os
+from holmes.common.env_vars import ROBUSTA_AI, ROBUSTA_API_ENDPOINT
+
+
+def environ_get_safe_int(env_var, default="0"):
+    try:
+        return max(int(os.environ.get(env_var, default)), 0)
+    except ValueError:
+        return int(default)
+
+
+OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN")
+OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE")
+
+
+class LLM:
+    @abstractmethod
+    def __init__(self):
+        self.model: str
+
+    @abstractmethod
+    def get_context_window_size(self) -> int:
+        pass
+
+    @abstractmethod
+    def get_maximum_output_token(self) -> int:
+        pass
+
+    @abstractmethod
+    def count_tokens_for_message(self, messages: list[dict]) -> int:
+        pass
+
+    @abstractmethod
+    def completion(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Tool]] = [],
+        tool_choice: Optional[Union[str, dict]] = None,
+        response_format: Optional[Union[dict, Type[BaseModel]]] = None,
+        temperature: Optional[float] = None,
+        drop_params: Optional[bool] = None,
+    ) -> ModelResponse:
+        pass
+
+
+class DefaultLLM(LLM):
+    model: str
+    api_key: Optional[str]
+    base_url: Optional[str]
+
+    def __init__(self, model: str, api_key: Optional[str] = None):
+        self.model = model
+        self.api_key = api_key
+        self.base_url = None
+
+        if ROBUSTA_AI:
+            self.base_url = ROBUSTA_API_ENDPOINT
+
+        self.check_llm(self.model, self.api_key)
+
+    def check_llm(self, model: str, api_key: Optional[str]):
+        logging.debug(f"Checking LiteLLM model {model}")
+        # TODO: this WAS a hack to get around the fact that we can't pass in an api key to litellm.validate_environment
+        # so without this hack it always complains that the environment variable for the api key is missing
+        # to fix that, we always set an api key in the standard format that litellm expects (which is ${PROVIDER}_API_KEY)
+        # TODO: we can now handle this better - see https://github.com/BerriAI/litellm/issues/4375#issuecomment-2223684750
+        lookup = litellm.get_llm_provider(self.model)
+        if not lookup:
+            raise Exception(f"Unknown provider for model {model}")
+        provider = lookup[1]
+        if provider == "watsonx":
+            # NOTE: LiteLLM's validate_environment does not currently include checks for IBM WatsonX.
+            # The following WatsonX-specific variables are set based on documentation from:
+            # https://docs.litellm.ai/docs/providers/watsonx
+            # Required variables for WatsonX:
+            # - WATSONX_URL: Base URL of your WatsonX instance (required)
+            # - WATSONX_APIKEY or WATSONX_TOKEN: IBM Cloud API key or IAM auth token (one is required)
+            model_requirements = {"missing_keys": [], "keys_in_environment": True}
+            if api_key:
+                os.environ["WATSONX_APIKEY"] = api_key
+            if "WATSONX_URL" not in os.environ:
+                model_requirements["missing_keys"].append("WATSONX_URL")
+                model_requirements["keys_in_environment"] = False
+            if "WATSONX_APIKEY" not in os.environ and "WATSONX_TOKEN" not in os.environ:
+                model_requirements["missing_keys"].extend(
+                    ["WATSONX_APIKEY", "WATSONX_TOKEN"]
+                )
+                model_requirements["keys_in_environment"] = False
+            # WATSONX_PROJECT_ID is required because we don't let user pass it to completion call directly
+            if "WATSONX_PROJECT_ID" not in os.environ:
+                model_requirements["missing_keys"].append("WATSONX_PROJECT_ID")
+                model_requirements["keys_in_environment"] = False
+            # https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces
+            # using custom watsonx deployments might require to set WATSONX_DEPLOYMENT_SPACE_ID env
+            if "watsonx/deployment/" in self.model:
+                logging.warning(
+                    "Custom WatsonX deployment detected. You may need to set the WATSONX_DEPLOYMENT_SPACE_ID "
+                    "environment variable for proper functionality. For more information, refer to the documentation: "
+                    "https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces"
+                )
+        else:
+            #
+            api_key_env_var = f"{provider.upper()}_API_KEY"
+            if api_key:
+                os.environ[api_key_env_var] = api_key
+            model_requirements = litellm.validate_environment(model=model)
+
+        if not model_requirements["keys_in_environment"]:
+            raise Exception(
+                f"model {model} requires the following environment variables: {model_requirements['missing_keys']}"
+            )
+
+    def _strip_model_prefix(self) -> str:
+        """
+        Helper function to strip 'openai/' prefix from model name if it exists.
+        model cost is taken from here which does not have the openai prefix
+        https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
+        """
+        model_name = self.model
+        if model_name.startswith("openai/"):
+            model_name = model_name[len("openai/") :]  # Strip the 'openai/' prefix
+        elif model_name.startswith("bedrock/"):
+            model_name = model_name[len("bedrock/") :]  # Strip the 'bedrock/' prefix
+        elif model_name.startswith("vertex_ai/"):
+            model_name = model_name[
+                len("vertex_ai/") :
+            ]  # Strip the 'vertex_ai/' prefix
+
+        return model_name
+
+        # this unfortunately does not seem to work for azure if the deployment name is not a well-known model name
+        # if not litellm.supports_function_calling(model=model):
+        #    raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.")
+
+    def get_context_window_size(self) -> int:
+        if OVERRIDE_MAX_CONTENT_SIZE:
+            logging.debug(
+                f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}"
+            )
+            return OVERRIDE_MAX_CONTENT_SIZE
+
+        model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix())
+        try:
+            return litellm.model_cost[model_name]["max_input_tokens"]
+        except Exception:
+            logging.warning(
+                f"Couldn't find model's name {model_name} in litellm's model list, fallback to 128k tokens for max_input_tokens"
+            )
+            return 128000
+
+    @sentry_sdk.trace
+    def count_tokens_for_message(self, messages: list[dict]) -> int:
+        return litellm.token_counter(model=self.model, messages=messages)
+
+    def completion(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Tool]] = [],
+        tool_choice: Optional[Union[str, dict]] = None,
+        response_format: Optional[Union[dict, Type[BaseModel]]] = None,
+        temperature: Optional[float] = None,
+        drop_params: Optional[bool] = None,
+    ) -> ModelResponse:
+        result = litellm.completion(
+            model=self.model,
+            api_key=self.api_key,
+            messages=messages,
+            tools=tools,
+            tool_choice=tool_choice,
+            base_url=self.base_url,
+            temperature=temperature,
+            response_format=response_format,
+            drop_params=drop_params,
+        )
+
+        if isinstance(result, ModelResponse):
+            return result
+        else:
+            raise Exception(f"Unexpected type returned by the LLM {type(result)}")
+
+    def get_maximum_output_token(self) -> int:
+        if OVERRIDE_MAX_OUTPUT_TOKEN:
+            logging.debug(
+                f"Using OVERRIDE_MAX_OUTPUT_TOKEN {OVERRIDE_MAX_OUTPUT_TOKEN}"
+            )
+            return OVERRIDE_MAX_OUTPUT_TOKEN
+
+        model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix())
+        try:
+            return litellm.model_cost[model_name]["max_output_tokens"]
+        except Exception:
+            logging.warning(
+                f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens"
+            )
+            return 4096
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		docker buildx build --platform linux/arm64,linux/amd64 --tag $TAG --push .