Skip to content

Commit

Permalink
Merge pull request #25 from jmont-dev/manual_requests
Browse files Browse the repository at this point in the history
Automatically set streaming when invoking requests, add support for running model endpoint.
  • Loading branch information
jmont-dev authored Aug 18, 2024
2 parents 63632d7 + 584327d commit 7e100c9
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 12 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The test cases do a good job of providing discrete examples for each of the API
- [Load a Model into Memory](#load-a-model-into-memory)
- [Pull, Copy, and Delete Models](#pull-copy-and-delete-models)
- [Retrieve Model Info](#retrieve-model-info)
- [List locally available models](#list-locally-available-models)
- [List locally-available and running models](#list-locally-available-and-running-models)
- [Exception Handling](#exception-handling)
- [Basic Generation](#basic-generation)
- [Using Options](#using-options)
Expand Down Expand Up @@ -161,14 +161,23 @@ nlohmann::json model_info = ollama::show_model_info("llama3:8b");
std::cout << "Model family is " << model_info["details"]["family"] << std::endl;
```

### List locally available models
### List locally-available and running models
You can query a list of locally-available models on your ollama server using the following. This is returned as a `std::vector` of `std::string`.

```C++
// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_models();
```

You can similarly query a list of currently-running models on your ollama server using:

```C++
// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_running_models();
```

For detailed parameters for these models, you can obtain the verbose JSON model descriptions using `ollama::list_model_json()` and `ollama::running_model_json()`.

### Exception Handling
Most calls will throw `ollama::exception` in the event of an error, with details on the exception that has occurred. Exceptions are enabled by default.

Expand Down Expand Up @@ -400,7 +409,6 @@ For those looking for greater control of the requests sent to the ollama server,
ollama::request request(ollama::message_type::generation);
request["model"]="mistral";
request["prompt"]="Why is the sky blue?";
request["stream"] = false;
request["system"] = "Talk like a pirate for the next reply."
std::cout << ollama::generate(request) << std::endl;
```
Expand Down Expand Up @@ -430,7 +438,6 @@ ollama::response response = ollama::generate("llama3.1:8b", "Why is the sky blue
ollama::request request(ollama::message_type::generation);
request["model"]="llama3.1:8b";
request["prompt"]="Why is the sky blue?";
request["stream"] = false;
request["context"] = response.as_json()["context"];
std::cout << ollama::generate(request) << std::endl;
```
Expand Down
49 changes: 45 additions & 4 deletions include/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,11 @@ class Ollama
}

// Generate a non-streaming reply as a string.
ollama::response generate(const ollama::request& request)
ollama::response generate(ollama::request& request)
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -435,6 +436,7 @@ class Ollama
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -476,6 +478,7 @@ class Ollama
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -504,7 +507,8 @@ class Ollama

bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
ollama::response response;
ollama::response response;
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -640,6 +644,33 @@ class Ollama
return models;
}

json running_model_json()
{
json models;
if (auto res = cli->Get("/api/ps"))
{
if (ollama::log_replies) std::cout << res->body << std::endl;
models = json::parse(res->body);
}
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}

return models;
}

std::vector<std::string> list_running_models()
{
std::vector<std::string> models;

json json_response = running_model_json();

for (auto& model: json_response["models"])
{
models.push_back(model["name"]);
}

return models;
}

bool blob_exists(const std::string& digest)
{
if (auto res = cli->Head("/api/blobs/"+digest))
Expand Down Expand Up @@ -869,12 +900,12 @@ namespace ollama
return ollama.generate(model, prompt, options, images);
}

ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
{
return ollama.generate(model, prompt, context, options, images);
}

inline ollama::response generate(const ollama::request& request)
inline ollama::response generate(ollama::request& request)
{
return ollama.generate(request);
}
Expand Down Expand Up @@ -944,6 +975,16 @@ namespace ollama
return ollama.list_model_json();
}

inline std::vector<std::string> list_running_models()
{
return ollama.list_running_models();
}

inline json running_model_json()
{
return ollama.running_model_json();
}

inline bool blob_exists(const std::string& digest)
{
return ollama.blob_exists(digest);
Expand Down
49 changes: 45 additions & 4 deletions singleheader/ollama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35185,10 +35185,11 @@ class Ollama
}

// Generate a non-streaming reply as a string.
ollama::response generate(const ollama::request& request)
ollama::response generate(ollama::request& request)
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -35225,6 +35226,7 @@ class Ollama
// Generate a streaming reply where a user-defined callback function is invoked when each token is received.
bool generate(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -35266,6 +35268,7 @@ class Ollama
{
ollama::response response;

request["stream"] = false;
std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;

Expand Down Expand Up @@ -35294,7 +35297,8 @@ class Ollama

bool chat(ollama::request& request, std::function<void(const ollama::response&)> on_receive_token)
{
ollama::response response;
ollama::response response;
request["stream"] = true;

std::string request_string = request.dump();
if (ollama::log_requests) std::cout << request_string << std::endl;
Expand Down Expand Up @@ -35430,6 +35434,33 @@ class Ollama
return models;
}

json running_model_json()
{
json models;
if (auto res = cli->Get("/api/ps"))
{
if (ollama::log_replies) std::cout << res->body << std::endl;
models = json::parse(res->body);
}
else { if (ollama::use_exceptions) throw ollama::exception("No response returned from server when querying running models: "+httplib::to_string( res.error() ) );}

return models;
}

std::vector<std::string> list_running_models()
{
std::vector<std::string> models;

json json_response = running_model_json();

for (auto& model: json_response["models"])
{
models.push_back(model["name"]);
}

return models;
}

bool blob_exists(const std::string& digest)
{
if (auto res = cli->Head("/api/blobs/"+digest))
Expand Down Expand Up @@ -35659,12 +35690,12 @@ namespace ollama
return ollama.generate(model, prompt, options, images);
}

ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
inline ollama::response generate(const std::string& model,const std::string& prompt, const ollama::response& context, const json& options=nullptr, const std::vector<std::string>& images=std::vector<std::string>())
{
return ollama.generate(model, prompt, context, options, images);
}

inline ollama::response generate(const ollama::request& request)
inline ollama::response generate(ollama::request& request)
{
return ollama.generate(request);
}
Expand Down Expand Up @@ -35734,6 +35765,16 @@ namespace ollama
return ollama.list_model_json();
}

inline std::vector<std::string> list_running_models()
{
return ollama.list_running_models();
}

inline json running_model_json()
{
return ollama.running_model_json();
}

inline bool blob_exists(const std::string& digest)
{
return ollama.blob_exists(digest);
Expand Down
10 changes: 10 additions & 0 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ TEST_SUITE("Ollama Tests") {
CHECK( contains_model );
}

TEST_CASE("List Running Models") {

// List the models available locally in the ollama server
std::vector<std::string> models = ollama::list_running_models();

bool contains_model = (std::find(models.begin(), models.end(), test_model) != models.end() );

CHECK( contains_model );
}

TEST_CASE("Exception Handling") {

bool exception_handled = false;
Expand Down

0 comments on commit 7e100c9

Please sign in to comment.