Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically set streaming when invoking requests, add support for running model endpoint. #25

Merged
merged 1 commit into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The test cases do a good job of providing discrete examples for each of the API
- [Load a Model into Memory](#load-a-model-into-memory)
- [Pull, Copy, and Delete Models](#pull-copy-and-delete-models)
- [Retrieve Model Info](#retrieve-model-info)
- [List locally available models](#list-locally-available-models)
- [List locally-available and running models](#list-locally-available-and-running-models)
- [Exception Handling](#exception-handling)
- [Basic Generation](#basic-generation)
- [Using Options](#using-options)
Expand Down Expand Up @@ -161,14 +161,23 @@ nlohmann::json model_info = ollama::show_model_info("llama3:8b");
std::cout << "Model family is " << model_info["details"]["family"] << std::endl;
```

### List locally available models
### List locally-available and running models
You can query a list of locally-available models on your ollama server using the following. This is returned as a `std::vector` of `std::string`.

```C++
// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_models();
```

You can similarly query a list of currently-running models on your ollama server using:

```C++
// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_running_models();
```

For detailed parameters for these models, you can obtain the verbose JSON model descriptions using `ollama::list_model_json()` and `ollama::running_model_json()`.

### Exception Handling
Most calls will throw `ollama::exception` in the event of an error, with details on the exception that has occurred. Exceptions are enabled by default.

Expand Down Expand Up @@ -400,7 +409,6 @@ For those looking for greater control of the requests sent to the ollama server,
ollama::request request(ollama::message_type::generation);
request["model"]="mistral";
request["prompt"]="Why is the sky blue?";
request["stream"] = false;
request["system"] = "Talk like a pirate for the next reply."
std::cout << ollama::generate(request) << std::endl;
```
Expand Down Expand Up @@ -430,7 +438,6 @@ ollama::response response = ollama::generate("llama3.1:8b", "Why is the sky blue
ollama::request request(ollama::message_type::generation);
request["model"]="llama3.1:8b";
request["prompt"]="Why is the sky blue?";
request["stream"] = false;
request["context"] = response.as_json()["context"];
std::cout << ollama::generate(request) << std::endl;
```
Expand Down
49 changes: 45 additions & 4 deletions include/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,11 @@ class Ollama
}

// Generate a non-streaming reply as a string.
ollama::response generate(const ollama::request& request)
ollama::response generate(ollama::request& request)
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -435,6 +436,7 @@ class Ollama
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -476,6 +478,7 @@ class Ollama
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -504,7 +507,8 @@ class Ollama

bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
ollama::response response;
ollama::response response;
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -640,6 +644,33 @@ class Ollama
return models;
}

json running_model_json()
{
json models;
if (auto res = cli->Get("/api/ps"))
{
if (ollama::log_replies) std::cout << res->body << std::endl;
models = json::parse(res->body);
}
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}

return models;
}

std::vector<std::string> list_running_models()
{
std::vector<std::string> models;

json json_response = running_model_json();

for (auto& model: json_response["models"])
{
models.push_back(model["name"]);
}

return models;
}

bool blob_exists(const std::string& digest)
{
if (auto res = cli->Head("/api/blobs/"+digest))
Expand Down Expand Up @@ -869,12 +900,12 @@ namespace ollama
return ollama.generate(model, prompt, options, images);
}

ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
{
return ollama.generate(model, prompt, context, options, images);
}

inline ollama::response generate(const ollama::request& request)
inline ollama::response generate(ollama::request& request)
{
return ollama.generate(request);
}
Expand Down Expand Up @@ -944,6 +975,16 @@ namespace ollama
return ollama.list_model_json();
}

inline std::vector<std::string> list_running_models()
{
return ollama.list_running_models();
}

inline json running_model_json()
{
return ollama.running_model_json();
}

inline bool blob_exists(const std::string& digest)
{
return ollama.blob_exists(digest);
Expand Down
49 changes: 45 additions & 4 deletions singleheader/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35185,10 +35185,11 @@ class Ollama
}

// Generate a non-streaming reply as a string.
ollama::response generate(const ollama::request& request)
ollama::response generate(ollama::request& request)
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -35225,6 +35226,7 @@ class Ollama
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -35266,6 +35268,7 @@ class Ollama
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -35294,7 +35297,8 @@ class Ollama

bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
ollama::response response;
ollama::response response;
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -35430,6 +35434,33 @@ class Ollama
return models;
}

json running_model_json()
{
json models;
if (auto res = cli->Get("/api/ps"))
{
if (ollama::log_replies) std::cout << res->body << std::endl;
models = json::parse(res->body);
}
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}

return models;
}

std::vector<std::string> list_running_models()
{
std::vector<std::string> models;

json json_response = running_model_json();

for (auto& model: json_response["models"])
{
models.push_back(model["name"]);
}

return models;
}

bool blob_exists(const std::string& digest)
{
if (auto res = cli->Head("/api/blobs/"+digest))
Expand Down Expand Up @@ -35659,12 +35690,12 @@ namespace ollama
return ollama.generate(model, prompt, options, images);
}

ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
{
return ollama.generate(model, prompt, context, options, images);
}

inline ollama::response generate(const ollama::request& request)
inline ollama::response generate(ollama::request& request)
{
return ollama.generate(request);
}
Expand Down Expand Up @@ -35734,6 +35765,16 @@ namespace ollama
return ollama.list_model_json();
}

inline std::vector<std::string> list_running_models()
{
return ollama.list_running_models();
}

inline json running_model_json()
{
return ollama.running_model_json();
}

inline bool blob_exists(const std::string& digest)
{
return ollama.blob_exists(digest);
Expand Down
10 changes: 10 additions & 0 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ TEST_SUITE("Ollama Tests") {
CHECK( contains_model );
}

TEST_CASE("List Running Models") {

// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_running_models();

bool contains_model = (std::find(models.begin(), models.end(), test_model) != models.end() );

CHECK( contains_model );
}

TEST_CASE("Exception Handling") {

bool exception_handled = false;
Expand Down