diff --git a/include/ollama.hpp b/include/ollama.hpp index a059682..1e4cce5 100644 --- a/include/ollama.hpp +++ b/include/ollama.hpp @@ -62,8 +62,10 @@ #include "Base64.h" #include +#include #include #include +#include #include #include #include @@ -421,13 +423,22 @@ class Ollama std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; - auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{ + std::shared_ptr> partial_responses = std::make_shared>(); + + auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{ std::string message(data, data_length); if (ollama::log_replies) std::cout << message << std::endl; - ollama::response response(message); - on_receive_token(response); - + try + { + partial_responses->push_back(message); + std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string("")); + ollama::response response(total_response); + partial_responses->clear(); + on_receive_token(response); + } + catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ } + return true; }; @@ -810,6 +821,7 @@ class Ollama return true; } + std::string server_url; httplib::Client *cli; diff --git a/singleheader/ollama.hpp b/singleheader/ollama.hpp index e347048..5325a59 100644 --- a/singleheader/ollama.hpp +++ b/singleheader/ollama.hpp @@ -34852,8 +34852,10 @@ class Base64 { */ #include +#include #include #include +#include #include #include #include @@ -35211,13 +35213,22 @@ class Ollama std::string request_string = request.dump(); if (ollama::log_requests) std::cout << request_string << std::endl; - auto stream_callback = [on_receive_token](const char *data, size_t data_length)->bool{ + std::shared_ptr> partial_responses = std::make_shared>(); + + auto stream_callback = [on_receive_token, partial_responses](const char *data, size_t data_length)->bool{ std::string message(data, data_length); if (ollama::log_replies) std::cout << message << std::endl; - ollama::response response(message); - on_receive_token(response); - + try + { + partial_responses->push_back(message); + std::string total_response = std::accumulate(partial_responses->begin(), partial_responses->end(), std::string("")); + ollama::response response(total_response); + partial_responses->clear(); + on_receive_token(response); + } + catch (...) { /* Partial response was received. Will do nothing and attempt to concatenate with the next response. */ } + return true; }; @@ -35600,6 +35611,7 @@ class Ollama return true; } + std::string server_url; httplib::Client *cli;