diff --git a/PIPREADME.md b/PIPREADME.md index 629eae9..e0c581b 100644 --- a/PIPREADME.md +++ b/PIPREADME.md @@ -19,36 +19,6 @@ pip install intelli # Code Examples -## Create AI Flows -You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow: -- ChatGPT agent to write a post. -- Google gemini agent to write image description. -- Stable diffusion to generate images. - -```python -from intelli.flow.agents.agent import Agent -from intelli.flow.tasks.task import Task -from intelli.flow.sequence_flow import SequenceFlow -from intelli.flow.input.task_input import TextTaskInput -from intelli.flow.processors.basic_processor import TextProcessor - -# define agents -blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'}) -copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'}) -artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY}) - -# define tasks -task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True) -task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True) -task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True) - -# start sequence flow -flow = SequenceFlow([task1, task2, task3], log=True) -final_result = flow.start() -``` - -To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows). - ## Create Chatbot Switch between multiple chatbot providers without changing your code. @@ -62,7 +32,7 @@ def call_chatbot(provider, model=None): input.add_user_message("What is the capital of France?") # creating chatbot instance - openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai") + openai_bot = Chatbot(YOUR_API_KEY, provider) response = openai_bot.chat(input) return response @@ -73,11 +43,10 @@ call_chatbot("openai", "gpt-4") # call mistralai call_chatbot("mistral", "mistral-medium") -# call gooogle gemini +# call google gemini call_chatbot("gemini") ``` - ## Connect Your Docs With Chatbot IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data. @@ -110,6 +79,36 @@ wrapper = RemoteImageModel(your_api_key, provider) results = wrapper.generate_images(image_input) ``` +## Create AI Flows +You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow: +- ChatGPT agent to write a post. +- Google gemini agent to write image description. +- Stable diffusion to generate images. + +```python +from intelli.flow.agents.agent import Agent +from intelli.flow.tasks.task import Task +from intelli.flow.sequence_flow import SequenceFlow +from intelli.flow.input.task_input import TextTaskInput +from intelli.flow.processors.basic_processor import TextProcessor + +# define agents +blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'}) +copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'}) +artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY}) + +# define tasks +task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True) +task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True) +task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True) + +# start sequence flow +flow = SequenceFlow([task1, task2, task3], log=True) +final_result = flow.start() +``` + +To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows). + # Pillars - **The wrapper layer** provides low-level access to the latest AI models. - **The controller layer** offers a unified input to any AI model by handling the differences. diff --git a/README.md b/README.md index 04a0f9c..d81845f 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,34 @@ pip install intelli # Code Examples +## Create Chatbot +Switch between multiple chatbot providers without changing your code. + +```python +from intelli.function.chatbot import Chatbot +from intelli.model.input.chatbot_input import ChatModelInput + +def call_chatbot(provider, model=None): + # prepare common input + input = ChatModelInput("You are a helpful assistant.", model) + input.add_user_message("What is the capital of France?") + + # creating chatbot instance + openai_bot = Chatbot(YOUR_API_KEY, provider) + response = openai_bot.chat(input) + + return response + +# call openai +call_chatbot("openai", "gpt-4") + +# call mistralai +call_chatbot("mistral", "mistral-medium") + +# call google gemini +call_chatbot("gemini") +``` + ## Create AI Flows You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow: @@ -62,34 +90,6 @@ final_result = flow.start() To build async flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows). -## Create Chatbot -Switch between multiple chatbot providers without changing your code. - -```python -from intelli.function.chatbot import Chatbot -from intelli.model.input.chatbot_input import ChatModelInput - -def call_chatbot(provider, model=None): - # prepare common input - input = ChatModelInput("You are a helpful assistant.", model) - input.add_user_message("What is the capital of France?") - - # creating chatbot instance - openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai") - response = openai_bot.chat(input) - - return response - -# call openai -call_chatbot("openai", "gpt-4") - -# call mistralai -call_chatbot("mistral", "mistral-medium") - -# call google gemini -call_chatbot("gemini") -``` - ## Connect Your Docs With Chatbot IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data. diff --git a/instructions/run_integration_text.sh b/instructions/run_integration_text.sh index 3c21916..c735a4a 100644 --- a/instructions/run_integration_text.sh +++ b/instructions/run_integration_text.sh @@ -22,6 +22,9 @@ python3 -m unittest intelli.test.integration.test_remote_embed_model # images python3 -m unittest intelli.test.integration.test_remote_image_model +# vision +python3 -m unittest intelli.test.integration.test_remote_vision_model + ## functions # chatbot python3 -m unittest intelli.test.integration.test_chatbot diff --git a/intelli/controller/remote_vision_model.py b/intelli/controller/remote_vision_model.py new file mode 100644 index 0000000..a62c36d --- /dev/null +++ b/intelli/controller/remote_vision_model.py @@ -0,0 +1,44 @@ +from intelli.wrappers.openai_wrapper import OpenAIWrapper +from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper +from intelli.model.input.vision_input import VisionModelInput + +class RemoteVisionModel: + supported_vision_models = { + "openai": OpenAIWrapper, + "gemini": GeminiAIWrapper, + } + + def __init__(self, api_key, provider="openai"): + + self.api_key = api_key + + if provider in self.supported_vision_models: + self.provider = provider + self.provider_wrapper = self.supported_vision_models[provider](api_key) + else: + supported_models = ", ".join(self.supported_vision_models.keys()) + raise ValueError(f"The provided provider {provider} not supported. Supported providers: {supported_models}") + + def image_to_text(self, vision_input): + + if isinstance(vision_input, dict): + inputs = vision_input + elif isinstance(vision_input, VisionModelInput): + inputs = vision_input.get_provider_inputs(self.provider) + else: + raise ValueError("vision_input must be an instance of VisionModelInput or a dictionary.") + + + if self.provider == "openai": + return self.call_openai_vision(inputs) + elif self.provider == "gemini": + return self.call_gemini_vision(inputs) + + + def call_openai_vision(self, inputs): + data = self.provider_wrapper.image_to_text(inputs) + return " ".join(choice['message']['content'] for choice in data['choices']) + + def call_gemini_vision(self, inputs): + data = self.provider_wrapper.image_to_text_params(inputs) + return " ".join(part['text'] for part in data['candidates'][0]['content']['parts']) diff --git a/intelli/flow/agents/agent.py b/intelli/flow/agents/agent.py index bc438e8..f0b8d1c 100644 --- a/intelli/flow/agents/agent.py +++ b/intelli/flow/agents/agent.py @@ -6,6 +6,8 @@ from intelli.model.input.chatbot_input import ChatModelInput from intelli.model.input.image_input import ImageModelInput from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput +from intelli.controller.remote_vision_model import RemoteVisionModel +from intelli.model.input.vision_input import VisionModelInput class BasicAgent(ABC): @@ -31,14 +33,24 @@ def execute(self, agent_input: AgentInput): # Check the agent type and call the appropriate function if self.type == AgentTypes.TEXT.value: - chatbot = Chatbot(self.model_params['key'], self.provider, self.options) chat_input = ChatModelInput(self.mission, model=self.model_params.get('model')) + + chatbot = Chatbot(self.model_params['key'], self.provider, self.options) chat_input.add_user_message(agent_input.desc) result = chatbot.chat(chat_input)[0] elif self.type == AgentTypes.IMAGE.value: + image_input = ImageModelInput(prompt=self.mission + ": " + agent_input.desc, model=self.model_params.get('model')) + image_model = RemoteImageModel(self.model_params['key'], self.provider) - image_input = ImageModelInput(prompt=agent_input.desc, model=self.model_params.get('model')) - result = image_model.generate_images(image_input) + result = image_model.generate_images(image_input)[0] + elif self.type == AgentTypes.VISION.value: + vision_input = VisionModelInput(content=self.mission + ": " + agent_input.desc, + image_data=agent_input.img, + extension=self.model_params.get('extension', 'png'), + model=self.model_params['model']) + + vision_model = RemoteVisionModel(self.model_params['key'], self.provider) + result = vision_model.image_to_text(vision_input) else: raise ValueError(f"Unsupported agent type: {self.type}.") diff --git a/intelli/flow/flow.py b/intelli/flow/flow.py index 6fba46f..cdf374f 100644 --- a/intelli/flow/flow.py +++ b/intelli/flow/flow.py @@ -1,6 +1,7 @@ import asyncio import networkx as nx from intelli.utils.logging import Logger +from intelli.flow.types import AgentTypes, InputTypes, Matcher from functools import partial @@ -42,8 +43,14 @@ async def _execute_task(self, task_name): print(f"Warning: Output for predecessor task '{pred}' not found. Skipping...") self.logger.log(f'The number of combined inputs for task {task_name} is {len(predecessor_outputs)}') - merged_input = " ".join(predecessor_outputs) merged_type = next(iter(predecessor_types)) if len(predecessor_types) == 1 else None + if merged_type and merged_type == InputTypes.TEXT.value: + merged_input = " ".join(predecessor_outputs) + elif predecessor_outputs: + # get one input if not combined strings + merged_input = predecessor_outputs[0] + else: + merged_input = None # Execute task with merged input loop = asyncio.get_event_loop() diff --git a/intelli/flow/tasks/task.py b/intelli/flow/tasks/task.py index 072a109..5aba697 100644 --- a/intelli/flow/tasks/task.py +++ b/intelli/flow/tasks/task.py @@ -1,5 +1,5 @@ from intelli.flow.template.basic_template import TextInputTemplate -from intelli.flow.types import AgentTypes, InputTypes +from intelli.flow.types import AgentTypes, InputTypes, Matcher from intelli.utils.logging import Logger from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput @@ -7,41 +7,75 @@ class Task: def __init__(self, task_input, agent, exclude=False, pre_process=None, post_process=None, template=None, log=False): + self.task_input = task_input self.desc = task_input.desc self.agent = agent self.pre_process = pre_process self.post_process = post_process self.exclude = exclude self.output = None - self.output_type = agent.type + self.output_type = Matcher.output[agent.type] self.template = template self.logger = Logger(log) - if not template and agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]: + if not template and Matcher.input[agent.type] in [InputTypes.TEXT.value]: self.template = TextInputTemplate(self.desc) def execute(self, input_data=None, input_type=None): # logging - if input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]: + if input_type in [InputTypes.TEXT.value]: self.logger.log_head('- Inside the task with input data head: ', input_data) - elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value, - AgentTypes.IMAGE.value]: - self.logger.log_head('- Inside the task. the previous step input not supported') + elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]: + self.logger.log('- Inside the task. the previous step input not supported') + elif input_type == InputTypes.IMAGE.value: + self.logger.log('- Inside the task with previous image, size: ', len(input_data)) # Run task pre procesing if self.pre_process: input_data = self.pre_process(input_data) - # Apply template - if input_data and input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]: + # Apply input template + if input_data and input_type in [InputTypes.TEXT.value]: agent_text = self.template.apply_input(input_data) # log self.logger.log_head('- Input data with template: ', agent_text) else: agent_text = self.desc + # Prepare the input + agent_inputs = [] + if Matcher.input[self.agent.type] == InputTypes.IMAGE.value: + + if self.task_input.img: + agent_input = ImageAgentInput(desc=agent_text, img=self.task_input.img) + agent_inputs.append(agent_input) + + # add previous output as input, in case of second input for image, only if the output supported + if len(agent_inputs) == 0 or Matcher.output[self.agent.type] == InputTypes.TEXT.value: + if input_data and input_type == InputTypes.IMAGE.value: + agent_input = ImageAgentInput(desc=agent_text, img=input_data) + agent_inputs.append(agent_input) + + elif Matcher.input[self.agent.type] == AgentTypes.TEXT.value: + agent_input = TextAgentInput(agent_text) + agent_inputs.append(agent_input) + # Check the agent type and call the appropriate function - result = self.agent.execute(TextAgentInput(agent_text)) + combined_results = [] + for current_agent_input in agent_inputs: + + result = self.agent.execute(current_agent_input) + + if isinstance(result, list): + combined_results.extend(result) + else: + combined_results.append(str(result)) + + if Matcher.output[self.agent.type] == InputTypes.TEXT.value: + result = " ".join(combined_results) + else: + # get first result only for none text outputs + result = combined_results[0] # log if self.agent.type in [AgentTypes.TEXT.value]: diff --git a/intelli/flow/types.py b/intelli/flow/types.py index 04caf7e..194d51b 100644 --- a/intelli/flow/types.py +++ b/intelli/flow/types.py @@ -4,8 +4,23 @@ class AgentTypes(Enum): TEXT = 'text' IMAGE = 'image' + VISION = 'vision' class InputTypes(Enum): TEXT = 'text' IMAGE = 'image' + VISION = 'vision' + +class Matcher(): + input = { + 'text': 'text', + 'image': 'text', + 'vision': 'image' + } + + output = { + 'text': 'text', + 'image': 'image', + 'vision': 'text' + } \ No newline at end of file diff --git a/intelli/function/chatbot.py b/intelli/function/chatbot.py index 01b527a..15f8fdb 100644 --- a/intelli/function/chatbot.py +++ b/intelli/function/chatbot.py @@ -56,7 +56,13 @@ def _chat_mistral(self, params): def _chat_gemini(self, params): response = self.wrapper.generate_content(params) - return [candidate["content"]["parts"][0]["text"] for candidate in response["candidates"]] + output = [] + for candidate in response.get("candidates", []): + if "content" in candidate: + output.append(candidate["content"]["parts"][0]["text"]) + else: + raise Exception("Error when calling gemini: {}".format(response)) + return output def stream(self, chat_input): """ diff --git a/intelli/model/input/image_input.py b/intelli/model/input/image_input.py index a967690..850f15b 100644 --- a/intelli/model/input/image_input.py +++ b/intelli/model/input/image_input.py @@ -17,9 +17,13 @@ def __init__(self, prompt, number_images=1, imageSize=None, self.engine = engine self.model = model - sizes_parts = imageSize.split('x') if imageSize else [None, None] - self.width = self.width or sizes_parts[0] - self.height = self.height or sizes_parts[1] + if imageSize and not width: + sizes_parts = imageSize.split('x') if imageSize else [None, None] + self.width = self.width or sizes_parts[0] + self.height = self.height or sizes_parts[1] + + if not self.imageSize: + self.imageSize = str(self.width) + 'x' + str(self.height) def get_openai_inputs(self): inputs = { diff --git a/intelli/model/input/vision_input.py b/intelli/model/input/vision_input.py new file mode 100644 index 0000000..8e4885a --- /dev/null +++ b/intelli/model/input/vision_input.py @@ -0,0 +1,72 @@ +import os +import base64 + +class VisionModelInput: + + def __init__(self, content, image_data=None, file_path=None, model=None, extension='png', max_tokens=300): + + self.content = content + self.model = model + self.max_tokens = max_tokens + self.extension = extension + + if file_path: + with open(file_path, "rb") as image_file: + self.image_data = base64.b64encode(image_file.read()).decode('utf-8') + self.extension = os.path.splitext(file_path)[-1].strip('.') + else: + self.image_data = image_data + + def get_openai_inputs(self): + + inputs = { + "model": self.model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": self.content + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/{self.extension};base64,{self.image_data}" + } + } + ] + } + ], + "max_tokens": self.max_tokens + } + + return inputs + + def get_gemini_inputs(self): + + inputs = { + "contents": [ + { + "parts": [ + {"text": f"{self.content}"}, + { + "inline_data": { + "mime_type": f"image/{self.extension}", + "data": self.image_data, + } + } + ] + } + ] + } + + return inputs + + def get_provider_inputs(self, provider): + if provider == "openai": + return self.get_openai_inputs() + elif provider == "gemini": + return self.get_gemini_inputs() + else: + raise ValueError(f"Invalid provider name: {provider}") diff --git a/intelli/test/integration/test_flow_map.py b/intelli/test/integration/test_flow_map.py index 5513598..8da76f1 100644 --- a/intelli/test/integration/test_flow_map.py +++ b/intelli/test/integration/test_flow_map.py @@ -15,18 +15,19 @@ def setUp(self): self.gemini_key = os.getenv("GEMINI_API_KEY") self.stability_key = os.getenv("STABILITY_API_KEY") - def create_agent_and_task(self, task_input_desc, agent_type, provider, mission, model_key, model): + def create_agent_and_task(self, task_input_desc, agent_type, provider, mission, model_key, model, log=True): task = Task( TextTaskInput(task_input_desc), Agent(agent_type, provider, mission, {"key": model_key, "model": model}), - log=True + log=log ) if agent_type == "image": task.exclude = True return task - + async def async_test_blog_flow(self): + print("--- test blog flow ---") task1 = self.create_agent_and_task("identify requirements of building a blogging website about environment", "text", "gemini", "write specifications", @@ -55,7 +56,7 @@ async def async_test_blog_flow(self): task6 = self.create_agent_and_task("generate code based on combined tasks", "text", "gemini", "code generation from specifications", - self.gemini_key, "gemini") + self.gemini_key, "gemini", log=True) flow = Flow(tasks = { "task1": task1, @@ -75,7 +76,35 @@ async def async_test_blog_flow(self): output = await flow.start() print("Final output:", output) + + async def async_test_blog_flow(self): + print("--- test vision flow ---") + + task1 = self.create_agent_and_task(task_input_desc="generate arts", + agent_type="image", + provider="stability", + mission="generate a roboto riding a tax from the future.", + model_key=self.stability_key, + model="stable-diffusion-xl-1024-v1-0") + + task2 = self.create_agent_and_task(task_input_desc="explain the image", + agent_type="vision", + provider="openai", + mission="generate description of the image elements", + model_key=self.openai_api_key, + model="gpt-4-vision-preview") + + flow = Flow(tasks = { + "task1": task1, + "task2": task2 + }, map_paths = { + "task1": ["task2"] + }, log=True) + + output = await flow.start() + print("Final output:", output) + def test_blog_flow(self): asyncio.run(self.async_test_blog_flow()) diff --git a/intelli/test/integration/test_flow_sequence.py b/intelli/test/integration/test_flow_sequence.py index 0255856..3498d49 100644 --- a/intelli/test/integration/test_flow_sequence.py +++ b/intelli/test/integration/test_flow_sequence.py @@ -1,12 +1,15 @@ import os +import base64 import unittest +from intelli.flow.types import * from intelli.flow.agents.agent import Agent -from intelli.flow.input.task_input import TextTaskInput +from intelli.flow.input.task_input import TextTaskInput, ImageTaskInput from intelli.flow.processors.basic_processor import TextProcessor from intelli.flow.sequence_flow import SequenceFlow from intelli.flow.tasks.task import Task from dotenv import load_dotenv + load_dotenv() @@ -16,24 +19,25 @@ def setUp(self): self.openai_api_key = os.getenv("OPENAI_API_KEY") self.gemini_key = os.getenv("GEMINI_API_KEY") self.stability_key = os.getenv("STABILITY_API_KEY") - + def test_blog_post_flow(self): - print("---- start blog post flow ----") + print("---- start blog portal flow ----") + # Define agents blog_agent = Agent( - agent_type="text", + agent_type=AgentTypes.TEXT.value, provider="openai", mission="write blog posts", model_params={"key": self.openai_api_key, "model": "gpt-3.5-turbo"}, ) description_agent = Agent( - agent_type="text", + agent_type=AgentTypes.TEXT.value, provider="gemini", - mission="generate description", + mission="generate description only", model_params={"key": self.gemini_key, "model": "gemini"}, ) image_agent = Agent( - agent_type="image", + agent_type=AgentTypes.IMAGE.value, provider="stability", mission="generate image", model_params={"key": self.stability_key}, @@ -44,7 +48,7 @@ def test_blog_post_flow(self): TextTaskInput("blog post about electric cars"), blog_agent, log=True ) task2 = Task( - TextTaskInput("Generate short image description for image model"), + TextTaskInput("Write short image description for image generation model"), description_agent, pre_process=TextProcessor.text_head, log=True, @@ -58,7 +62,41 @@ def test_blog_post_flow(self): final_result = flow.start() print("Final result:", final_result) + + def test_flow_chart_image_flow(self): + print("---- start vision coder flow ----") + + analyst = Agent( + agent_type=AgentTypes.VISION.value, + provider="openai", + mission="describe flow charts from images", + model_params={"key": self.openai_api_key, "extension": "jpg", "model": "gpt-4-vision-preview"}, + ) + + coder = Agent( + agent_type=AgentTypes.TEXT.value, + provider="openai", + mission="write python code. response only with the code without explination or text or marks.", + model_params={"key": self.openai_api_key, "model": "gpt-3.5-turbo"}, + ) + + # Define tasks + with open('../temp/code_flow_char.jpg', "rb") as image_file: + image_data = base64.b64encode(image_file.read()).decode('utf-8') + + task1 = Task( + ImageTaskInput(desc="describe the steps of the code flow chat for an engineer.", img=image_data), agent=analyst, log=True + ) + + task2 = Task( + TextTaskInput("write python code from the provided context"), agent=coder, log=True + ) + + # Start SequenceFlow + flow = SequenceFlow([task1, task2], log=True) + final_result = flow.start() + print("Final result:", final_result) if __name__ == "__main__": unittest.main() diff --git a/intelli/test/integration/test_geminiai_wrapper.py b/intelli/test/integration/test_geminiai_wrapper.py index 68bfa8d..28e2a86 100644 --- a/intelli/test/integration/test_geminiai_wrapper.py +++ b/intelli/test/integration/test_geminiai_wrapper.py @@ -2,7 +2,7 @@ import os from dotenv import load_dotenv from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper - +import base64 load_dotenv() class TestGeminiAIWrapper(unittest.TestCase): @@ -12,7 +12,7 @@ def setUpClass(cls): api_key = os.getenv("GEMINI_API_KEY") assert api_key is not None, "GEMINI_API_KEY is not set." cls.wrapper = GeminiAIWrapper(api_key) - + def test_generate_content(self): params = { "contents": [{ @@ -27,10 +27,13 @@ def test_generate_content(self): self.assertIsNotNone(result['candidates'][0]['content']['parts'][0]['text']) def test_image_to_text(self): - file_path = 'temp/test_image_desc.png' + file_path = '../temp/test_image_desc.png' try: - result = self.wrapper.image_to_text('describe the image', file_path, 'png') + with open(file_path, "rb") as image_file: + image_data = base64.b64encode(image_file.read()).decode('utf-8') + + result = self.wrapper.image_to_text('describe the image', image_data, 'png') self.assertTrue('candidates' in result, "The result should have a 'candidates' field.") self.assertIsInstance(result['candidates'], list, "Expected 'candidates' to be a list.") @@ -44,7 +47,7 @@ def test_image_to_text(self): except Exception as error: self.fail(f'Gemini AI Error: {error}') - + def test_get_embeddings(self): text = "Write a story about a magic backpack." params = { diff --git a/intelli/test/integration/test_remote_vision_model.py b/intelli/test/integration/test_remote_vision_model.py new file mode 100644 index 0000000..10f3c4a --- /dev/null +++ b/intelli/test/integration/test_remote_vision_model.py @@ -0,0 +1,39 @@ +import unittest +import os +from intelli.controller.remote_vision_model import RemoteVisionModel +from intelli.model.input.vision_input import VisionModelInput +from dotenv import load_dotenv + +load_dotenv() + +class TestRemoteVisionModel(unittest.TestCase): + + def setUp(self): + self.openai_api_key = os.getenv('OPENAI_API_KEY') + self.gemini_api_key = os.getenv('GEMINI_API_KEY') + + if not self.openai_api_key or not self.gemini_api_key: + raise unittest.SkipTest("Both OpenAI and Gemini keys are required for testing RemoteVisionModel") + + def test_openai_image_descriptor(self): + print('--- call openai vision ---') + provider = "openai" + controller = RemoteVisionModel(self.openai_api_key, provider) + + vision_input = VisionModelInput(content = "Describe the image", file_path = '../temp/test_image_desc.png', model = "gpt-4-vision-preview") + result = controller.image_to_text(vision_input) + + print(result) + + def test_gemini_image_descriptor(self): + print('--- call gemini vision ---') + provider = "gemini" + controller = RemoteVisionModel(self.gemini_api_key, provider) + + vision_input = VisionModelInput(content = "Describe this image", file_path = '../temp/test_image_desc.png', extension='png') + result = controller.image_to_text(vision_input) + + print(result) + +if __name__ == '__main__': + unittest.main() diff --git a/intelli/wrappers/geminiai_wrapper.py b/intelli/wrappers/geminiai_wrapper.py index 3bb3a55..819fa1e 100644 --- a/intelli/wrappers/geminiai_wrapper.py +++ b/intelli/wrappers/geminiai_wrapper.py @@ -25,10 +25,8 @@ def generate_content(self, params, vision=False): except Exception as error: raise Exception(str(error)) - def image_to_text(self, user_input, file_path, extension): - with open(file_path, "rb") as image_file: - image_data = base64.b64encode(image_file.read()).decode('utf-8') - + def image_to_text(self, user_input, image_data, extension): + params = { "contents": [ { @@ -45,8 +43,12 @@ def image_to_text(self, user_input, file_path, extension): ] } - return self.generate_content(params, True) + return self.image_to_text_params(params=params) + def image_to_text_params(self, params): + + return self.generate_content(params, True) + def get_embeddings(self, params): url = f"{self.API_BASE_URL}{config['url']['gemini']['embeddingEndpoint']}" diff --git a/requirements.txt b/requirements.txt index 9677848..0f644a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ -requests-mock==1.11.0 python-dotenv==1.0.1 networkx==3.2.1 diff --git a/setup.py b/setup.py index 13028fb..54e5252 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="intelli", - version="0.0.7", + version="0.0.8", author="Intellinode", author_email="admin@intellinode.ai", description="Create chatbots and AI agent work flows. Intelli allows to connect your data with multiple AI models like OpenAI, Gemini, and Mistral through a unified access layer.", @@ -18,6 +18,6 @@ packages=find_packages(), python_requires='>=3.6', install_requires=[ - "requests-mock==1.11.0", "python-dotenv==1.0.1", "networkx==3.2.1" + "python-dotenv==1.0.1", "networkx==3.2.1" ], ) \ No newline at end of file