-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_server.py
178 lines (131 loc) · 4.01 KB
/
example_server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""Local server to run a model locally and keep it loaded for faster prompting.
Usage guide:
## Run the server in the foreground (debug)
```bash
python examples/example_server.py server
```
## Run the server in the background
```bash
python examples/example_server.py start
```
You can test if the server is available by running the following command:
```bash
curl localhost:8000
watch -n 1 curl localhost:8000 # To check every second
```
The server may take a few seconds to start because it must load the model first.
## Stop the server
```bash
python examples/example_server.py stop
```
## Call the server
### With an inline prompt
```bash
python examples/example_server.py call -p "Hello, how are you ?"
```
### With a prompt from a file
```bash
python examples/example_server.py call -f prompt.txt
```
"""
import click
import httpx
PORT = 8000
@click.command("start")
def start():
"""Launches the local server"""
import subprocess
# Test is the server is already running
try:
httpx.get(f"http://localhost:{PORT}")
click.echo("Local server already running.")
return
except httpx.ConnectError:
pass
# Launch the server and write the logs to a logfile
with open("process.log", "a") as log_file:
process = subprocess.Popen(
[
"python",
"-m",
"examples.example_server",
"server",
],
stdout=log_file,
stderr=log_file,
universal_newlines=True,
)
click.echo(f"Model started (PID {process.pid}).")
@click.command("stop")
def stop():
"""Stops the local server"""
try:
httpx.get(f"http://localhost:{PORT}/stop")
except httpx.ConnectError:
click.echo("Local server not running.")
return
except httpx.RemoteProtocolError:
click.echo("Local server stopped.")
return
click.echo("Something went wrong.")
@click.command("call")
@click.option("--prompt", "-p", help="The prompt to send to the local server.")
@click.option(
"--file",
"-f",
help="The file from which to read the prompt to send to the local server.",
)
def call(prompt: str | None, file: str | None):
"""Calls the local server"""
if prompt is None:
if file is None:
click.echo("Please provide a prompt or a file.")
return
with open(file, "r") as f:
prompt = f.read()
try:
response = httpx.post(
f"http://localhost:{PORT}/call",
json={"text": prompt, "seed": None},
timeout=40,
)
click.echo(response.text)
except httpx.ConnectError:
click.echo("Local server not running.")
@click.group()
def main():
pass
###################################################################################################
# LLM SERVER #
###################################################################################################
@click.command("server")
def server():
"""Start a Flask server to keep the LLM loaded"""
from llm_mediator_simulation.models.mistral_local_model import MistralLocalModel
# Load the model
model = MistralLocalModel(model_name="/mnt/datastore/models/mistralai/Mistral-7B-Instruct-v0.2",
max_length=500,
json=True)
from flask import Flask, request
app = Flask("LLM Server")
@app.route("/")
def home():
return "Local LLM Server"
@app.route("/call", methods=["POST"])
def call():
data = request.get_json()
text = data.get("text")
seed = data.get("seed")
return model.sample(text, seed=seed)
@app.route("/stop")
def stop():
"""Stop the server"""
import os
os._exit(0)
app.run(port=PORT)
main.add_command(start)
main.add_command(stop)
main.add_command(call)
main.add_command(server)
if __name__ == "__main__":
main()