-
Notifications
You must be signed in to change notification settings - Fork 8
/
ask-llm.js
executable file
·152 lines (136 loc) · 4.74 KB
/
ask-llm.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env node
const readline = require('readline');
const LLM_API_BASE_URL = process.env.LLM_API_BASE_URL || 'https://api.openai.com/v1';
const LLM_API_KEY = process.env.LLM_API_KEY || process.env.OPENAI_API_KEY;
const LLM_CHAT_MODEL = process.env.LLM_CHAT_MODEL;
const LLM_STREAMING = process.env.LLM_STREAMING !== 'no';
const LLM_DEBUG = process.env.LLM_DEBUG;
/**
* Represents a chat message.
*
* @typedef {Object} Message
* @property {'system'|'user'|'assistant'} role
* @property {string} content
*/
/**
* A callback function to stream then completion.
*
* @callback CompletionHandler
* @param {string} text
* @returns {void}
*/
/**
* Generates a chat completion using a RESTful LLM API service.
*
* @param {Array<Message>} messages - List of chat messages.
* @param {CompletionHandler=} handler - An optional callback to stream the completion.
* @returns {Promise<string>} The completion generated by the LLM.
*/
const chat = async (messages, handler) => {
const url = `${LLM_API_BASE_URL}/chat/completions`;
const auth = LLM_API_KEY ? { 'Authorization': `Bearer ${LLM_API_KEY}` } : {};
const model = LLM_CHAT_MODEL || 'gpt-4o-mini';
const stop = ['<|im_end|>', '<|end|>', '<|eot_id|>'];
const max_tokens = 200;
const temperature = 0;
const stream = LLM_STREAMING && typeof handler === 'function';
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json', ...auth },
body: JSON.stringify({ messages, model, stop, max_tokens, temperature, stream })
});
if (!response.ok) {
throw new Error(`HTTP error with the status: ${response.status} ${response.statusText}`);
}
if (!stream) {
const data = await response.json();
const { choices } = data;
const first = choices[0];
const { message } = first;
const { content } = message;
const answer = content.trim();
handler && handler(answer);
return answer;
}
const parse = (line) => {
let partial = null;
const prefix = line.substring(0, 6);
if (prefix === 'data: ') {
const payload = line.substring(6);
try {
const { choices } = JSON.parse(payload);
const [choice] = choices;
const { delta } = choice;
partial = delta?.content;
} catch (e) {
// ignore
} finally {
return partial;
}
}
return partial;
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let answer = '';
let buffer = '';
while (true) {
const { value, done } = await reader.read();
if (done) {
break;
}
const lines = decoder.decode(value).split('\n');
for (let i = 0; i < lines.length; ++i) {
const line = buffer + lines[i];
if (line[0] === ':') {
buffer = '';
continue;
}
if (line === 'data: [DONE]') {
break;
}
if (line.length > 0) {
const partial = parse(line.trim());
if (partial === null) {
buffer = line;
} else if (partial && partial.length > 0) {
buffer = '';
if (answer.length < 1) {
const leading = partial.trim();
answer = leading;
handler && (leading.length > 0) && handler(leading);
} else {
answer += partial;
handler && handler(partial);
}
}
}
}
}
return answer;
}
const SYSTEM_PROMPT = 'Answer the question politely and concisely.';
(async () => {
console.log(`Using LLM at ${LLM_API_BASE_URL}.`);
console.log('Press Ctrl+D to exit.')
console.log();
const messages = [];
messages.push({ role: 'system', content: SYSTEM_PROMPT });
let loop = true;
const io = readline.createInterface({ input: process.stdin, output: process.stdout });
io.on('close', () => { loop = false; });
const qa = () => {
io.question('>> ', async (question) => {
messages.push({ role: 'user', content: question });
const start = Date.now();
const answer = await chat(messages, (str) => process.stdout.write(str));
messages.push({ role: 'assistant', content: answer.trim() });
console.log();
const elapsed = Date.now() - start;
LLM_DEBUG && console.log(`[${elapsed} ms]`);
console.log();
loop && qa();
})
}
qa();
})();