diff --git a/README.md b/README.md index 95693f4..e345ab8 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ The test cases do a good job of providing discrete examples for each of the API - [Chat with Images](#chat-with-images) - [Embedding Generation](#embedding-generation) - [Debug Information](#debug-information) + - [Manual Requests](#manual-requests) - [Single-header vs Separate Headers](#single-header-vs-separate-headers) - [About this software:](#about-this-software) - [License](#license) @@ -389,7 +390,20 @@ Debug logging for requests and replies to the server can easily be turned on and ollama::show_requests(true); ollama::show_replies(true); ``` - + +### Manual Requests +For those looking for greater control of the requests sent to the ollama server, manual requests can be created through the `ollama::request` class. This class extends `nlohmann::json` and can be treated as a standard JSON object. + +```C++ +ollama::request request(ollama::message_type::generation); +request["model"]="mistral"; +request["prompt"]="Why is the sky blue?"; +request["stream"] = false; +request["system"] = "Talk like a pirate for the next reply." +std::cout << ollama::generate(request) << std::endl; +``` +This provides the most customization of the request. Users should take care to ensure that valid fields are provided, otherwise an exception will likely be thrown on response. Manual requests can be made for generate, chat, and embedding endpoints. + ## Single-header vs Separate Headers For convenience, ollama-hpp includes a single-header version of the library in `singleheader/ollama.hpp` which bundles the core ollama.hpp code with single-header versions of nlohmann json, httplib, and base64.h. Each of these libraries is available under the MIT license and their respective licenses are included. The single-header include can be regenerated from these standalone files by running `./make_single_header.sh` diff --git a/include/ollama.hpp b/include/ollama.hpp index 2fed82d..a059682 100644 --- a/include/ollama.hpp +++ b/include/ollama.hpp @@ -377,12 +377,16 @@ class Ollama Ollama(): Ollama("http://localhost:11434") {} ~Ollama() { delete this->cli; } - // Generate a non-streaming reply as a string. ollama::response generate(const std::string& model,const std::string& prompt, const json& options=nullptr, const std::vector& images=std::vector()) { + ollama::request request(model, prompt, options, false, images); + return generate(request); + } + // Generate a non-streaming reply as a string. + ollama::response generate(const ollama::request& request) + { ollama::response response; - ollama::request request(model, prompt, options, false, images); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -400,14 +404,19 @@ class Ollama if (ollama::use_exceptions) throw ollama::exception("No response returned from server "+this->server_url+". Error was: "+httplib::to_string( res.error() )); } - return response; + return response; } - // Generate a streaming reply where a user-defined callback function is invoked when each token is received. bool generate(const std::string& model,const std::string& prompt, std::function on_receive_token, const json& options=nullptr, const std::vector& images=std::vector()) { - ollama::request request(model, prompt, options, true, images); + return generate(request, on_receive_token); + } + + + // Generate a streaming reply where a user-defined callback function is invoked when each token is received. + bool generate(ollama::request& request, std::function on_receive_token) + { std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -428,12 +437,17 @@ class Ollama return false; } - // Generate a non-streaming reply as a string. ollama::response chat(const std::string& model, const ollama::messages& messages, json options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { + ollama::request request(model, messages, options, false, format, keep_alive_duration); + return chat(request); + } + + // Generate a non-streaming reply as a string. + ollama::response chat(ollama::request& request) + { ollama::response response; - ollama::request request(model, messages, options, false, format, keep_alive_duration); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -456,9 +470,14 @@ class Ollama bool chat(const std::string& model, const ollama::messages& messages, std::function on_receive_token, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { + ollama::request request(model, messages, options, true, format, keep_alive_duration); + return chat(request, on_receive_token); + } + + bool chat(ollama::request& request, std::function on_receive_token) + { ollama::response response; - ollama::request request(model, messages, options, true, format, keep_alive_duration); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -719,6 +738,12 @@ class Ollama ollama::response generate_embeddings(const std::string& model, const std::string& input, const json& options=nullptr, bool truncate = true, const std::string& keep_alive_duration="5m") { ollama::request request = ollama::request::from_embedding(model, input, options, truncate, keep_alive_duration); + return generate_embeddings(request); + } + + + ollama::response generate_embeddings(ollama::request& request) + { ollama::response response; std::string request_string = request.dump(); @@ -806,21 +831,41 @@ namespace ollama return ollama.generate(model, prompt, options, images); } + inline ollama::response generate(const ollama::request& request) + { + return ollama.generate(request); + } + inline bool generate(const std::string& model,const std::string& prompt, std::function on_receive_response, const json& options=nullptr, const std::vector& images=std::vector()) { return ollama.generate(model, prompt, on_receive_response, options, images); } + inline bool generate(ollama::request& request, std::function on_receive_response) + { + return ollama.generate(request, on_receive_response); + } + inline ollama::response chat(const std::string& model, const ollama::messages& messages, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { return ollama.chat(model, messages, options, format, keep_alive_duration); } + inline ollama::response chat(ollama::request& request) + { + return ollama.chat(request); + } + inline bool chat(const std::string& model, const ollama::messages& messages, std::function on_receive_response, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { return ollama.chat(model, messages, on_receive_response, options, format, keep_alive_duration); } + inline bool chat(ollama::request& request, std::function on_receive_response) + { + return ollama.chat(request, on_receive_response); + } + inline bool create(const std::string& modelName, const std::string& modelFile, bool loadFromFile=true) { return ollama.create_model(modelName, modelFile, loadFromFile); @@ -891,6 +936,11 @@ namespace ollama return ollama.generate_embeddings(model, input, options, truncate, keep_alive_duration); } + inline ollama::response generate_embeddings(ollama::request& request) + { + return ollama.generate_embeddings(request); + } + inline void setReadTimeout(const int& seconds) { ollama.setReadTimeout(seconds); diff --git a/singleheader/ollama.hpp b/singleheader/ollama.hpp index 95fb5f6..e347048 100644 --- a/singleheader/ollama.hpp +++ b/singleheader/ollama.hpp @@ -35167,12 +35167,16 @@ class Ollama Ollama(): Ollama("http://localhost:11434") {} ~Ollama() { delete this->cli; } - // Generate a non-streaming reply as a string. ollama::response generate(const std::string& model,const std::string& prompt, const json& options=nullptr, const std::vector& images=std::vector()) { + ollama::request request(model, prompt, options, false, images); + return generate(request); + } + // Generate a non-streaming reply as a string. + ollama::response generate(const ollama::request& request) + { ollama::response response; - ollama::request request(model, prompt, options, false, images); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -35190,14 +35194,19 @@ class Ollama if (ollama::use_exceptions) throw ollama::exception("No response returned from server "+this->server_url+". Error was: "+httplib::to_string( res.error() )); } - return response; + return response; } - // Generate a streaming reply where a user-defined callback function is invoked when each token is received. bool generate(const std::string& model,const std::string& prompt, std::function on_receive_token, const json& options=nullptr, const std::vector& images=std::vector()) { - ollama::request request(model, prompt, options, true, images); + return generate(request, on_receive_token); + } + + + // Generate a streaming reply where a user-defined callback function is invoked when each token is received. + bool generate(ollama::request& request, std::function on_receive_token) + { std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -35218,12 +35227,17 @@ class Ollama return false; } - // Generate a non-streaming reply as a string. ollama::response chat(const std::string& model, const ollama::messages& messages, json options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { + ollama::request request(model, messages, options, false, format, keep_alive_duration); + return chat(request); + } + + // Generate a non-streaming reply as a string. + ollama::response chat(ollama::request& request) + { ollama::response response; - ollama::request request(model, messages, options, false, format, keep_alive_duration); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -35246,9 +35260,14 @@ class Ollama bool chat(const std::string& model, const ollama::messages& messages, std::function on_receive_token, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { + ollama::request request(model, messages, options, true, format, keep_alive_duration); + return chat(request, on_receive_token); + } + + bool chat(ollama::request& request, std::function on_receive_token) + { ollama::response response; - ollama::request request(model, messages, options, true, format, keep_alive_duration); std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; @@ -35509,6 +35528,12 @@ class Ollama ollama::response generate_embeddings(const std::string& model, const std::string& input, const json& options=nullptr, bool truncate = true, const std::string& keep_alive_duration="5m") { ollama::request request = ollama::request::from_embedding(model, input, options, truncate, keep_alive_duration); + return generate_embeddings(request); + } + + + ollama::response generate_embeddings(ollama::request& request) + { ollama::response response; std::string request_string = request.dump(); @@ -35596,21 +35621,41 @@ namespace ollama return ollama.generate(model, prompt, options, images); } + inline ollama::response generate(const ollama::request& request) + { + return ollama.generate(request); + } + inline bool generate(const std::string& model,const std::string& prompt, std::function on_receive_response, const json& options=nullptr, const std::vector& images=std::vector()) { return ollama.generate(model, prompt, on_receive_response, options, images); } + inline bool generate(ollama::request& request, std::function on_receive_response) + { + return ollama.generate(request, on_receive_response); + } + inline ollama::response chat(const std::string& model, const ollama::messages& messages, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { return ollama.chat(model, messages, options, format, keep_alive_duration); } + inline ollama::response chat(ollama::request& request) + { + return ollama.chat(request); + } + inline bool chat(const std::string& model, const ollama::messages& messages, std::function on_receive_response, const json& options=nullptr, const std::string& format="json", const std::string& keep_alive_duration="5m") { return ollama.chat(model, messages, on_receive_response, options, format, keep_alive_duration); } + inline bool chat(ollama::request& request, std::function on_receive_response) + { + return ollama.chat(request, on_receive_response); + } + inline bool create(const std::string& modelName, const std::string& modelFile, bool loadFromFile=true) { return ollama.create_model(modelName, modelFile, loadFromFile); @@ -35681,6 +35726,11 @@ namespace ollama return ollama.generate_embeddings(model, input, options, truncate, keep_alive_duration); } + inline ollama::response generate_embeddings(ollama::request& request) + { + return ollama.generate_embeddings(request); + } + inline void setReadTimeout(const int& seconds) { ollama.setReadTimeout(seconds); diff --git a/test/test.cpp b/test/test.cpp index 10cf894..58ebc11 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -241,6 +241,36 @@ TEST_SUITE("Ollama Tests") { CHECK(response.as_json().contains("embeddings") == true); } + TEST_CASE("Manual Requests") { + + ollama::request request(ollama::message_type::generation); + request["model"] = test_model; + request["prompt"] = "Why is the sky blue?"; + request["stream"] = false; + ollama::response response = ollama::generate(request); + + CHECK(response.as_json().contains("response") == true); + + request = ollama::request(ollama::message_type::chat); + request["model"] = test_model; + ollama::messages messages = { ollama::message("user","Why is the sky blue?") }; + request["messages"] = messages.to_json(); + request["stream"] = false; + response = ollama::chat(request); + + CHECK(response.as_json().contains("message") == true); + + request = ollama::request(ollama::message_type::embedding); + request["model"] = test_model; + request["input"] = "Why is the sky blue?"; + request["stream"] = false; + response = ollama::generate_embeddings(request); + + CHECK(response.as_json().contains("embeddings") == true); + } + + + TEST_CASE("Enable Debug Logging") { ollama::show_requests(true);