Skip to content

Commit

Permalink
Merge pull request #12 from jepler/llama_cpp
Browse files Browse the repository at this point in the history
Add llama.cpp support
  • Loading branch information
jepler authored Sep 24, 2023
2 parents 02de0b3 + 26912b1 commit 1b700aa
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 75 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ Put your OpenAI API key in the platform configuration directory for chap, e.g.,

## commandline usage

* chap ask "What advice would you give a 20th century human visiting the 21st century for the first time?"
* `chap ask "What advice would you give a 20th century human visiting the 21st century for the first time?"`

* chap render --last
* `chap render --last`

* chap import chatgpt-style-chatlog.json
* `chap import chatgpt-style-chatlog.json` (for files from pionxzh/chatgpt-exporter)

## interactive terminal usage
* chap tui
Expand All @@ -49,9 +49,16 @@ You can set the "system message" with the `-S` flag.

You can select the text generating backend with the `-b` flag:
* openai\_chatgpt: the default, paid API, best quality results
* textgen: Works with https://github.com/oobabooga/text-generation-webui and can run locally with various models, basic and low quality. Needs the server URL in *$configuration_directory/textgen\_url*.
* llama_cpp: Works with (llama.cpp's http server)[https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md] and can run locally with various models. Set the server URL with `-B url:...`.
* textgen: Works with https://github.com/oobabooga/text-generation-webui and can run locally with various models. Needs the server URL in *$configuration_directory/textgen\_url*.
* lorem: local non-AI lorem generator for testing

## Environment variables

The backend can be set with `CHAP_BACKEND`.
Backend settings can be set with `CHAP_<backend_name>_<parameter_name>`, with `backend_name` and `parameter_name` all in caps.
For instance, `CHAP_LLAMA_CPP_URL=http://server.local:8080/completion` changes the default server URL for the llama_cpp back-end.

## Importing from ChatGPT

The userscript https://github.com/pionxzh/chatgpt-exporter can export chat logs from chat.openai.com in a json format.
Expand Down
95 changes: 95 additions & 0 deletions src/chap/backends/llama_cpp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# SPDX-FileCopyrightText: 2023 Jeff Epler <[email protected]>
#
# SPDX-License-Identifier: MIT

import asyncio
import json
from dataclasses import dataclass

import httpx

from ..session import Assistant, User


class LlamaCpp:
@dataclass
class Parameters:
url: str = "http://localhost:8080/completion"
"""The URL of a llama.cpp server's completion endpoint."""

def __init__(self):
self.parameters = self.Parameters()

system_message = """\
A dialog, where USER interacts with AI. AI is helpful, kind, obedient, honest, and knows its own limits.
"""

def make_full_query(self, messages, max_query_size):
del messages[1:-max_query_size]
rows = []
for m in messages:
content = (m.content or "").strip()
if not content:
continue
if m.role == "system":
rows.append(f"ASSISTANT'S RULE: {content}\n")
elif m.role == "assistant":
rows.append(f"ASSISTANT: {content}\n")
elif m.role == "user":
rows.append(f"USER: {content}")
rows.append("ASSISTANT: ")
full_query = ("\n".join(rows)).rstrip()
return full_query

async def aask(
self, session, query, *, max_query_size=5, timeout=60
): # pylint: disable=unused-argument,too-many-locals,too-many-branches
params = {
"prompt": self.make_full_query(
session.session + [User(query)], max_query_size
),
"stream": True,
}
new_content = []
try:
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream(
"POST",
self.parameters.url,
json=params,
) as response:
if response.status_code == 200:
async for line in response.aiter_lines():
if line.startswith("data:"):
data = line.removeprefix("data:").strip()
j = json.loads(data)
content = j.get("content")
if not new_content:
content = content.lstrip()
if content:
new_content.append(content)
yield content
if j.get("stop"):
break
else:
content = f"\nFailed with {response=!r}"
new_content.append(content)
yield content

except httpx.HTTPError as e:
content = f"\nException: {e!r}"
new_content.append(content)
yield content

session.session.extend([User(query), Assistant("".join(new_content))])

def ask(self, session, query, *, max_query_size=5, timeout=60):
asyncio.run(
self.aask(session, query, max_query_size=max_query_size, timeout=timeout)
)
return session.session[-1].message


def factory():
"""Uses the llama.cpp completion web API"""
return LlamaCpp()
1 change: 1 addition & 0 deletions src/chap/backends/lorem.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,5 @@ def ask(


def factory():
"""That just prints 'lorem' text. Useful for testing."""
return Lorem()
1 change: 1 addition & 0 deletions src/chap/backends/openai_chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,4 +175,5 @@ def get_key(cls):


def factory():
"""Uses the OpenAI chat completion API"""
return ChatGPT()
1 change: 1 addition & 0 deletions src/chap/backends/textgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,5 @@ def ask(self, session, query, *, max_query_size=5, timeout=60):


def factory():
"""Uses the textgen completion API"""
return Textgen()
5 changes: 2 additions & 3 deletions src/chap/commands/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@

import click

from ..core import uses_existing_session
from ..core import command_uses_existing_session


@click.command
@uses_existing_session
@command_uses_existing_session
@click.option("--no-system", is_flag=True)
def main(obj, no_system):
"""Print session in plaintext"""
Expand Down
5 changes: 2 additions & 3 deletions src/chap/commands/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from markdown_it import MarkdownIt
from rich.markdown import Markdown

from ..core import uses_existing_session
from ..core import command_uses_existing_session


def to_markdown(message):
Expand All @@ -25,8 +25,7 @@ def to_markdown(message):
return m


@click.command
@uses_existing_session
@command_uses_existing_session
@click.option("--no-system", is_flag=True)
def main(obj, no_system):
"""Print session with formatting"""
Expand Down
Loading

0 comments on commit 1b700aa

Please sign in to comment.