From 4a963fe23b6adc1602b6c8978766937cbaadf3e6 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 14:49:31 -0500
Subject: [PATCH 01/10] Add llama.cpp backend

Assumes your model wants llama 1.5-style prompting. Happy to add other
styles.

I had a decent experience with the Vicuna-13B-CoT.Q5_K_M.gguf model,
which fits in GPU on a 12GB RTX 3060.
---
 src/chap/backends/llama_cpp.py | 94 ++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 src/chap/backends/llama_cpp.py

diff --git a/src/chap/backends/llama_cpp.py b/src/chap/backends/llama_cpp.py
new file mode 100644
index 0000000..4d8d521
--- /dev/null
+++ b/src/chap/backends/llama_cpp.py
@@ -0,0 +1,94 @@
+# SPDX-FileCopyrightText: 2023 Jeff Epler <jepler@gmail.com>
+#
+# SPDX-License-Identifier: MIT
+
+import asyncio
+import json
+from dataclasses import dataclass
+
+import httpx
+
+from ..session import Assistant, User
+
+
+class LlamaCpp:
+    @dataclass
+    class Parameters:
+        url: str = "http://localhost:8080/completion"
+        """The URL of a llama.cpp server's completion endpoint. See https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md for more information."""
+
+    def __init__(self):
+        self.parameters = self.Parameters()
+
+    system_message = """\
+A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, and knows its own limits.
+"""
+
+    def make_full_query(self, messages, max_query_size):
+        del messages[1:-max_query_size]
+        rows = []
+        for m in messages:
+            content = (m.content or "").strip()
+            if not content:
+                continue
+            if m.role == "system":
+                rows.append(f"ASSISTANT'S RULE: {content}\n")
+            elif m.role == "assistant":
+                rows.append(f"ASSISTANT: {content}\n")
+            elif m.role == "user":
+                rows.append(f"USER: {content}")
+        rows.append("ASSISTANT: ")
+        full_query = ("\n".join(rows)).rstrip()
+        return full_query
+
+    async def aask(
+        self, session, query, *, max_query_size=5, timeout=60
+    ):  # pylint: disable=unused-argument,too-many-locals,too-many-branches
+        params = {
+            "prompt": self.make_full_query(
+                session.session + [User(query)], max_query_size
+            ),
+            "stream": True,
+        }
+        new_content = []
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                async with client.stream(
+                    "POST",
+                    self.parameters.url,
+                    json=params,
+                ) as response:
+                    if response.status_code == 200:
+                        async for line in response.aiter_lines():
+                            if line.startswith("data:"):
+                                data = line.removeprefix("data:").strip()
+                                j = json.loads(data)
+                                content = j.get("content")
+                                if not new_content:
+                                    content = content.lstrip()
+                                if content:
+                                    new_content.append(content)
+                                    yield content
+                                if j.get("stop"):
+                                    break
+                    else:
+                        content = f"\nFailed with {response=!r}"
+                        new_content.append(content)
+                        yield content
+
+        except httpx.HTTPError as e:
+            content = f"\nException: {e!r}"
+            new_content.append(content)
+            yield content
+
+        session.session.extend([User(query), Assistant("".join(new_content))])
+
+    def ask(self, session, query, *, max_query_size=5, timeout=60):
+        asyncio.run(
+            self.aask(session, query, max_query_size=max_query_size, timeout=timeout)
+        )
+        return session.session[-1].message
+
+
+def factory():
+    return LlamaCpp()

From d7ad89f411747f92bab2d27b390bc6e8ce94094a Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 14:50:09 -0500
Subject: [PATCH 02/10] Allow configuration of backend parameters from
 environment

---
 src/chap/core.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/chap/core.py b/src/chap/core.py
index cc4b6bd..5de5b5f 100644
--- a/src/chap/core.py
+++ b/src/chap/core.py
@@ -5,6 +5,7 @@
 
 import datetime
 import importlib
+import os
 import pathlib
 import pkgutil
 import subprocess
@@ -35,8 +36,28 @@ def new_session_path(opt_path=None):
     )
 
 
+def configure_api_from_environment(api_name, api):
+    if not hasattr(api, "parameters"):
+        return
+
+    for field in fields(api.parameters):
+        envvar = f"CHAP_{api_name.upper()}_{field.name.upper()}"
+        value = os.environ.get(envvar)
+        if value is None:
+            continue
+        try:
+            tv = field.type(value)
+        except ValueError as e:
+            raise click.BadParameter(
+                f"Invalid value for {field.name} with value {value}: {e}"
+            ) from e
+        setattr(api.parameters, field.name, tv)
+
+
 def get_api(name="openai_chatgpt"):
-    return importlib.import_module(f"{__package__}.backends.{name}").factory()
+    result = importlib.import_module(f"{__package__}.backends.{name}").factory()
+    configure_api_from_environment(name, result)
+    return result
 
 
 def ask(*args, **kw):

From 1d24aa63816d8ce8393ffbcb14da35b10175754b Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 14:56:47 -0500
Subject: [PATCH 03/10] set default backend in environment

---
 src/chap/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/chap/core.py b/src/chap/core.py
index 5de5b5f..262ac5a 100644
--- a/src/chap/core.py
+++ b/src/chap/core.py
@@ -217,6 +217,7 @@ def command_uses_new_session(f):
         callback=set_backend,
         expose_value=False,
         is_eager=True,
+        envvar="CHAP_BACKEND",
     )(f)
     f = click.option(
         "--backend-help",

From 5d394b5fcfbcc4380abda604bdaa969107316048 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 14:56:54 -0500
Subject: [PATCH 04/10] document llama_cpp and environment vars

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 5e9bfae..4abfaf4 100644
--- a/README.md
+++ b/README.md
@@ -49,9 +49,16 @@ You can set the "system message" with the `-S` flag.
 
 You can select the text generating backend with the `-b` flag:
  * openai\_chatgpt: the default, paid API, best quality results
+ * llama_cpp: Works with (llama.cpp's http server)[https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md] and can run locally with various models. Set the server URL with `-B url:...`.
  * textgen: Works with https://github.com/oobabooga/text-generation-webui and can run locally with various models, basic and low quality. Needs the server URL in *$configuration_directory/textgen\_url*.
  * lorem: local non-AI lorem generator for testing
 
+## Environment variables
+
+The backend can be set with `CHAP_BACKEND`.
+Backend settings can be set with `CHAP_<backend_name>_<parameter_name>`, with `backend_name` and `parameter_name` all in caps.
+For instance, `CHAP_LLAMA_CPP_URL=http://server.local:8080/completion` changes the default server URL for the llama_cpp back-end.
+
 ## Importing from ChatGPT
 
 The userscript https://github.com/pionxzh/chatgpt-exporter can export chat logs from chat.openai.com in a json format.

From 382a8fb520f75b90a81c7375df5f7da151a28be2 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:14:06 -0500
Subject: [PATCH 05/10] Add a backend list command

---
 src/chap/backends/llama_cpp.py      |  1 +
 src/chap/backends/lorem.py          |  1 +
 src/chap/backends/openai_chatgpt.py |  1 +
 src/chap/backends/textgen.py        |  1 +
 src/chap/core.py                    | 31 ++++++++++++++++++++++++++++-
 5 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/chap/backends/llama_cpp.py b/src/chap/backends/llama_cpp.py
index 4d8d521..f301c94 100644
--- a/src/chap/backends/llama_cpp.py
+++ b/src/chap/backends/llama_cpp.py
@@ -91,4 +91,5 @@ def ask(self, session, query, *, max_query_size=5, timeout=60):
 
 
 def factory():
+    """Uses the llama.cpp completion web API"""
     return LlamaCpp()
diff --git a/src/chap/backends/lorem.py b/src/chap/backends/lorem.py
index 563a3af..84a6b4f 100644
--- a/src/chap/backends/lorem.py
+++ b/src/chap/backends/lorem.py
@@ -56,4 +56,5 @@ def ask(
 
 
 def factory():
+    """That just prints 'lorem' text. Useful for testing."""
     return Lorem()
diff --git a/src/chap/backends/openai_chatgpt.py b/src/chap/backends/openai_chatgpt.py
index b555764..f9627b2 100644
--- a/src/chap/backends/openai_chatgpt.py
+++ b/src/chap/backends/openai_chatgpt.py
@@ -175,4 +175,5 @@ def get_key(cls):
 
 
 def factory():
+    """Uses the OpenAI chat completion API"""
     return ChatGPT()
diff --git a/src/chap/backends/textgen.py b/src/chap/backends/textgen.py
index 830216b..c34e4f0 100644
--- a/src/chap/backends/textgen.py
+++ b/src/chap/backends/textgen.py
@@ -134,4 +134,5 @@ def ask(self, session, query, *, max_query_size=5, timeout=60):
 
 
 def factory():
+    """Uses the textgen completion API"""
     return Textgen()
diff --git a/src/chap/core.py b/src/chap/core.py
index 262ac5a..246262b 100644
--- a/src/chap/core.py
+++ b/src/chap/core.py
@@ -15,7 +15,7 @@
 import platformdirs
 from simple_parsing.docstring import get_attribute_docstring
 
-from . import commands  # pylint: disable=no-name-in-module
+from . import backends, commands  # pylint: disable=no-name-in-module
 from .session import Session
 
 conversations_path = platformdirs.user_state_path("chap") / "conversations"
@@ -110,6 +110,12 @@ def set_system_message(ctx, param, value):  # pylint: disable=unused-argument
 
 
 def set_backend(ctx, param, value):  # pylint: disable=unused-argument
+    if value == "list":
+        formatter = ctx.make_formatter()
+        format_backend_list(formatter)
+        click.utils.echo(formatter.getvalue().rstrip("\n"))
+        ctx.exit()
+
     try:
         ctx.obj.api = get_api(value)
     except ModuleNotFoundError as e:
@@ -171,6 +177,28 @@ def set_one_backend_option(kv):
         set_one_backend_option(kv)
 
 
+def format_backend_list(formatter):
+    all_backends = []
+    for pi in pkgutil.walk_packages(backends.__path__):
+        name = pi.name
+        if not name.startswith("__"):
+            all_backends.append(name)
+    all_backends.sort()
+
+    rows = []
+    for name in all_backends:
+        try:
+            factory = importlib.import_module(f"{__package__}.backends.{name}").factory
+        except ImportError as e:
+            rows.append((name, str(e)))
+        else:
+            doc = getattr(factory, "__doc__", None)
+            rows.append((name, doc or ""))
+
+    with formatter.section("Available backends"):
+        formatter.write_dl(rows)
+
+
 def uses_session(f):
     f = click.option(
         "--continue-session",
@@ -218,6 +246,7 @@ def command_uses_new_session(f):
         expose_value=False,
         is_eager=True,
         envvar="CHAP_BACKEND",
+        help="The back-end to use ('--backend list' for a list)",
     )(f)
     f = click.option(
         "--backend-help",

From b2aae88195058de1bd6edd48472344c71a781ed1 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:14:21 -0500
Subject: [PATCH 06/10] drop backend-help, it's integrated

---
 src/chap/core.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/src/chap/core.py b/src/chap/core.py
index 246262b..63064dc 100644
--- a/src/chap/core.py
+++ b/src/chap/core.py
@@ -136,22 +136,6 @@ def format_backend_help(api, formatter):
         formatter.write_dl(rows)
 
 
-def backend_help(ctx, param, value):  # pylint: disable=unused-argument
-    if ctx.resilient_parsing or not value:
-        return
-
-    api = ctx.obj.api or get_api()
-
-    if not hasattr(api, "parameters"):
-        click.utils.echo(f"{api.__class__.__name__} does not support parameters")
-    else:
-        formatter = ctx.make_formatter()
-        format_backend_help(api, formatter)
-        click.utils.echo(formatter.getvalue().rstrip("\n"))
-
-    ctx.exit()
-
-
 def set_backend_option(ctx, param, opts):  # pylint: disable=unused-argument
     api = ctx.obj.api
     if not hasattr(api, "parameters"):
@@ -248,14 +232,6 @@ def command_uses_new_session(f):
         envvar="CHAP_BACKEND",
         help="The back-end to use ('--backend list' for a list)",
     )(f)
-    f = click.option(
-        "--backend-help",
-        is_flag=True,
-        is_eager=True,
-        callback=backend_help,
-        expose_value=False,
-        help="Show information about backend options",
-    )(f)
     f = click.option(
         "--backend-option",
         "-B",

From ec57f84eefe20a55c179e6a6ad567302cf027ec9 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:27:19 -0500
Subject: [PATCH 07/10] don't diss textgen so hard

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4abfaf4..1c259f5 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ You can set the "system message" with the `-S` flag.
 You can select the text generating backend with the `-b` flag:
  * openai\_chatgpt: the default, paid API, best quality results
  * llama_cpp: Works with (llama.cpp's http server)[https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md] and can run locally with various models. Set the server URL with `-B url:...`.
- * textgen: Works with https://github.com/oobabooga/text-generation-webui and can run locally with various models, basic and low quality. Needs the server URL in *$configuration_directory/textgen\_url*.
+ * textgen: Works with https://github.com/oobabooga/text-generation-webui and can run locally with various models. Needs the server URL in *$configuration_directory/textgen\_url*.
  * lorem: local non-AI lorem generator for testing
 
 ## Environment variables

From 80feb624a5ec084791bbcd54e751c5c7995d149d Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:27:24 -0500
Subject: [PATCH 08/10] markup

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 1c259f5..c3d8671 100644
--- a/README.md
+++ b/README.md
@@ -25,11 +25,11 @@ Put your OpenAI API key in the platform configuration directory for chap, e.g.,
 
 ## commandline usage
 
- * chap ask "What advice would you give a 20th century human visiting the 21st century for the first time?"
+ * `chap ask "What advice would you give a 20th century human visiting the 21st century for the first time?"`
 
- * chap render --last
+ * `chap render --last`
 
- * chap import chatgpt-style-chatlog.json
+ * `chap import chatgpt-style-chatlog.json` (for files from pionxzh/chatgpt-exporter)
 
 ## interactive terminal usage
  * chap tui

From cbcdec41fd67d7b0006902871ac2f23eb148c0b5 Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:27:50 -0500
Subject: [PATCH 09/10] Move --backend, -B to base command

---
 src/chap/commands/cat.py    |  5 +--
 src/chap/commands/render.py |  5 +--
 src/chap/core.py            | 77 +++++++++++++++++--------------------
 3 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/src/chap/commands/cat.py b/src/chap/commands/cat.py
index 1eb3321..d7afc43 100644
--- a/src/chap/commands/cat.py
+++ b/src/chap/commands/cat.py
@@ -4,11 +4,10 @@
 
 import click
 
-from ..core import uses_existing_session
+from ..core import command_uses_existing_session
 
 
-@click.command
-@uses_existing_session
+@command_uses_existing_session
 @click.option("--no-system", is_flag=True)
 def main(obj, no_system):
     """Print session in plaintext"""
diff --git a/src/chap/commands/render.py b/src/chap/commands/render.py
index fe42d7c..122bba6 100644
--- a/src/chap/commands/render.py
+++ b/src/chap/commands/render.py
@@ -7,7 +7,7 @@
 from markdown_it import MarkdownIt
 from rich.markdown import Markdown
 
-from ..core import uses_existing_session
+from ..core import command_uses_existing_session
 
 
 def to_markdown(message):
@@ -25,8 +25,7 @@ def to_markdown(message):
     return m
 
 
-@click.command
-@uses_existing_session
+@command_uses_existing_session
 @click.option("--no-system", is_flag=True)
 def main(obj, no_system):
     """Print session with formatting"""
diff --git a/src/chap/core.py b/src/chap/core.py
index 63064dc..a41dd61 100644
--- a/src/chap/core.py
+++ b/src/chap/core.py
@@ -199,48 +199,14 @@ def uses_session(f):
     return f
 
 
-def uses_existing_session(f):
+def command_uses_existing_session(f):
     f = uses_session(f)
+    f = click.command()(f)
     return f
 
 
-class CommandWithBackendHelp(click.Command):
-    def format_options(self, ctx, formatter):
-        super().format_options(ctx, formatter)
-        api = ctx.obj.api or get_api()
-        if hasattr(api, "parameters"):
-            format_backend_help(api, formatter)
-
-
 def command_uses_new_session(f):
-    f = click.option(
-        "--system-message",
-        "-S",
-        type=str,
-        default=None,
-        callback=set_system_message,
-        expose_value=False,
-    )(f)
-    f = click.option(
-        "--backend",
-        "-b",
-        type=str,
-        default="openai_chatgpt",
-        callback=set_backend,
-        expose_value=False,
-        is_eager=True,
-        envvar="CHAP_BACKEND",
-        help="The back-end to use ('--backend list' for a list)",
-    )(f)
-    f = click.option(
-        "--backend-option",
-        "-B",
-        type=colonstr,
-        callback=set_backend_option,
-        expose_value=False,
-        multiple=True,
-    )(f)
-    f = uses_existing_session(f)
+    f = uses_session(f)
     f = click.option(
         "--new-session",
         "-n",
@@ -249,7 +215,7 @@ def command_uses_new_session(f):
         callback=do_session_new,
         expose_value=False,
     )(f)
-    f = click.command(cls=CommandWithBackendHelp)(f)
+    f = click.command()(f)
     return f
 
 
@@ -285,8 +251,7 @@ class Obj:
 
 class MyCLI(click.MultiCommand):
     def make_context(self, info_name, args, parent=None, **extra):
-        result = super().make_context(info_name, args, parent, **extra)
-        result.obj = Obj()
+        result = super().make_context(info_name, args, parent, obj=Obj(), **extra)
         return result
 
     def list_commands(self, ctx):
@@ -304,6 +269,12 @@ def get_command(self, ctx, cmd_name):
         except ModuleNotFoundError as exc:
             raise click.UsageError(f"Invalid subcommand {cmd_name!r}", ctx) from exc
 
+    def format_options(self, ctx, formatter):
+        super().format_options(ctx, formatter)
+        api = ctx.obj.api or get_api()
+        if hasattr(api, "parameters"):
+            format_backend_help(api, formatter)
+
 
 main = MyCLI(
     help="Commandline interface to ChatGPT",
@@ -314,6 +285,30 @@ def get_command(self, ctx, cmd_name):
             is_eager=True,
             help="Show the version and exit",
             callback=version_callback,
-        )
+        ),
+        click.Option(
+            ("--system-message", "-S"),
+            type=str,
+            default=None,
+            callback=set_system_message,
+            expose_value=False,
+        ),
+        click.Option(
+            ("--backend", "-b"),
+            type=str,
+            default="openai_chatgpt",
+            callback=set_backend,
+            expose_value=False,
+            is_eager=True,
+            envvar="CHAP_BACKEND",
+            help="The back-end to use ('--backend list' for a list)",
+        ),
+        click.Option(
+            ("--backend-option", "-B"),
+            type=colonstr,
+            callback=set_backend_option,
+            expose_value=False,
+            multiple=True,
+        ),
     ],
 )

From 26912b1fe98a821740a6e23960b5b789f2537a9f Mon Sep 17 00:00:00 2001
From: Jeff Epler <jepler@gmail.com>
Date: Sun, 24 Sep 2023 15:28:20 -0500
Subject: [PATCH 10/10] that link just doesn't format well in the docs

---
 src/chap/backends/llama_cpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/chap/backends/llama_cpp.py b/src/chap/backends/llama_cpp.py
index f301c94..4854b43 100644
--- a/src/chap/backends/llama_cpp.py
+++ b/src/chap/backends/llama_cpp.py
@@ -15,7 +15,7 @@ class LlamaCpp:
     @dataclass
     class Parameters:
         url: str = "http://localhost:8080/completion"
-        """The URL of a llama.cpp server's completion endpoint. See https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md for more information."""
+        """The URL of a llama.cpp server's completion endpoint."""
 
     def __init__(self):
         self.parameters = self.Parameters()