diff --git a/PIPREADME.md b/PIPREADME.md
index 629eae9..e0c581b 100644
--- a/PIPREADME.md
+++ b/PIPREADME.md
@@ -19,36 +19,6 @@ pip install intelli
 
 # Code Examples
 
-## Create AI Flows
-You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:
-- ChatGPT agent to write a post.
-- Google gemini agent to write image description.
-- Stable diffusion to generate images.
-
-```python
-from intelli.flow.agents.agent import Agent
-from intelli.flow.tasks.task import Task
-from intelli.flow.sequence_flow import SequenceFlow
-from intelli.flow.input.task_input import TextTaskInput
-from intelli.flow.processors.basic_processor import TextProcessor
-
-# define agents
-blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'})
-copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'})
-artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY})
-
-# define tasks
-task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True)
-task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True)
-task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True)
-
-# start sequence flow
-flow = SequenceFlow([task1, task2, task3], log=True)
-final_result = flow.start()
-```
-
-To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).
-
 ## Create Chatbot
 Switch between multiple chatbot providers without changing your code.
 
@@ -62,7 +32,7 @@ def call_chatbot(provider, model=None):
     input.add_user_message("What is the capital of France?")
 
     # creating chatbot instance
-    openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai")
+    openai_bot = Chatbot(YOUR_API_KEY, provider)
     response = openai_bot.chat(input)
 
     return response
@@ -73,11 +43,10 @@ call_chatbot("openai", "gpt-4")
 # call mistralai
 call_chatbot("mistral", "mistral-medium")
 
-# call gooogle gemini
+# call google gemini
 call_chatbot("gemini")
 ```
 
-
 ## Connect Your Docs With Chatbot
 IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data.
 
@@ -110,6 +79,36 @@ wrapper = RemoteImageModel(your_api_key, provider)
 results = wrapper.generate_images(image_input)
 ```
 
+## Create AI Flows
+You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:
+- ChatGPT agent to write a post.
+- Google gemini agent to write image description.
+- Stable diffusion to generate images.
+
+```python
+from intelli.flow.agents.agent import Agent
+from intelli.flow.tasks.task import Task
+from intelli.flow.sequence_flow import SequenceFlow
+from intelli.flow.input.task_input import TextTaskInput
+from intelli.flow.processors.basic_processor import TextProcessor
+
+# define agents
+blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'})
+copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'})
+artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY})
+
+# define tasks
+task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True)
+task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True)
+task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True)
+
+# start sequence flow
+flow = SequenceFlow([task1, task2, task3], log=True)
+final_result = flow.start()
+```
+
+To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).
+
 # Pillars
 - **The wrapper layer** provides low-level access to the latest AI models.
 - **The controller layer** offers a unified input to any AI model by handling the differences.
diff --git a/README.md b/README.md
index 04a0f9c..d81845f 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,34 @@ pip install intelli
 
 # Code Examples
 
+## Create Chatbot
+Switch between multiple chatbot providers without changing your code.
+
+```python
+from intelli.function.chatbot import Chatbot
+from intelli.model.input.chatbot_input import ChatModelInput
+
+def call_chatbot(provider, model=None):
+    # prepare common input 
+    input = ChatModelInput("You are a helpful assistant.", model)
+    input.add_user_message("What is the capital of France?")
+
+    # creating chatbot instance
+    openai_bot = Chatbot(YOUR_API_KEY, provider)
+    response = openai_bot.chat(input)
+
+    return response
+
+# call openai
+call_chatbot("openai", "gpt-4")
+
+# call mistralai
+call_chatbot("mistral", "mistral-medium")
+
+# call google gemini
+call_chatbot("gemini")
+```
+
 ## Create AI Flows
 You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:
 
@@ -62,34 +90,6 @@ final_result = flow.start()
 
 To build async flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).
 
-## Create Chatbot
-Switch between multiple chatbot providers without changing your code.
-
-```python
-from intelli.function.chatbot import Chatbot
-from intelli.model.input.chatbot_input import ChatModelInput
-
-def call_chatbot(provider, model=None):
-    # prepare common input 
-    input = ChatModelInput("You are a helpful assistant.", model)
-    input.add_user_message("What is the capital of France?")
-
-    # creating chatbot instance
-    openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai")
-    response = openai_bot.chat(input)
-
-    return response
-
-# call openai
-call_chatbot("openai", "gpt-4")
-
-# call mistralai
-call_chatbot("mistral", "mistral-medium")
-
-# call google gemini
-call_chatbot("gemini")
-```
-
 
 ## Connect Your Docs With Chatbot 
 IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data.
diff --git a/instructions/run_integration_text.sh b/instructions/run_integration_text.sh
index 3c21916..c735a4a 100644
--- a/instructions/run_integration_text.sh
+++ b/instructions/run_integration_text.sh
@@ -22,6 +22,9 @@ python3 -m unittest intelli.test.integration.test_remote_embed_model
 # images
 python3 -m unittest intelli.test.integration.test_remote_image_model
 
+# vision
+python3 -m unittest intelli.test.integration.test_remote_vision_model
+
 ## functions
 # chatbot
 python3 -m unittest intelli.test.integration.test_chatbot
diff --git a/intelli/controller/remote_vision_model.py b/intelli/controller/remote_vision_model.py
new file mode 100644
index 0000000..a62c36d
--- /dev/null
+++ b/intelli/controller/remote_vision_model.py
@@ -0,0 +1,44 @@
+from intelli.wrappers.openai_wrapper import OpenAIWrapper
+from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper
+from intelli.model.input.vision_input import VisionModelInput
+
+class RemoteVisionModel:
+    supported_vision_models = {
+        "openai": OpenAIWrapper,
+        "gemini": GeminiAIWrapper,
+    }
+
+    def __init__(self, api_key, provider="openai"):
+    
+        self.api_key = api_key
+        
+        if provider in self.supported_vision_models:
+            self.provider = provider
+            self.provider_wrapper = self.supported_vision_models[provider](api_key)
+        else:
+            supported_models = ", ".join(self.supported_vision_models.keys())
+            raise ValueError(f"The provided provider {provider} not supported. Supported providers: {supported_models}")
+
+    def image_to_text(self, vision_input):
+        
+        if isinstance(vision_input, dict):
+            inputs = vision_input
+        elif isinstance(vision_input, VisionModelInput):
+            inputs = vision_input.get_provider_inputs(self.provider)
+        else:
+            raise ValueError("vision_input must be an instance of VisionModelInput or a dictionary.")
+
+
+        if self.provider == "openai":
+            return self.call_openai_vision(inputs)
+        elif self.provider == "gemini":
+            return self.call_gemini_vision(inputs)
+            
+
+    def call_openai_vision(self, inputs):
+        data = self.provider_wrapper.image_to_text(inputs)
+        return " ".join(choice['message']['content'] for choice in data['choices'])
+    
+    def call_gemini_vision(self, inputs):
+        data = self.provider_wrapper.image_to_text_params(inputs)
+        return " ".join(part['text'] for part in data['candidates'][0]['content']['parts'])
diff --git a/intelli/flow/agents/agent.py b/intelli/flow/agents/agent.py
index bc438e8..f0b8d1c 100644
--- a/intelli/flow/agents/agent.py
+++ b/intelli/flow/agents/agent.py
@@ -6,6 +6,8 @@
 from intelli.model.input.chatbot_input import ChatModelInput
 from intelli.model.input.image_input import ImageModelInput
 from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput
+from intelli.controller.remote_vision_model import RemoteVisionModel
+from intelli.model.input.vision_input import VisionModelInput
 
 
 class BasicAgent(ABC):
@@ -31,14 +33,24 @@ def execute(self, agent_input: AgentInput):
 
         # Check the agent type and call the appropriate function
         if self.type == AgentTypes.TEXT.value:
-            chatbot = Chatbot(self.model_params['key'], self.provider, self.options)
             chat_input = ChatModelInput(self.mission, model=self.model_params.get('model'))
+            
+            chatbot = Chatbot(self.model_params['key'], self.provider, self.options)
             chat_input.add_user_message(agent_input.desc)
             result = chatbot.chat(chat_input)[0]
         elif self.type == AgentTypes.IMAGE.value:
+            image_input = ImageModelInput(prompt=self.mission + ": " + agent_input.desc, model=self.model_params.get('model'))
+            
             image_model = RemoteImageModel(self.model_params['key'], self.provider)
-            image_input = ImageModelInput(prompt=agent_input.desc, model=self.model_params.get('model'))
-            result = image_model.generate_images(image_input)
+            result = image_model.generate_images(image_input)[0]
+        elif self.type == AgentTypes.VISION.value:
+            vision_input = VisionModelInput(content=self.mission + ": " + agent_input.desc, 
+                                            image_data=agent_input.img, 
+                                            extension=self.model_params.get('extension', 'png'),
+                                            model=self.model_params['model'])
+            
+            vision_model = RemoteVisionModel(self.model_params['key'], self.provider)
+            result = vision_model.image_to_text(vision_input)
         else:
             raise ValueError(f"Unsupported agent type: {self.type}.")
 
diff --git a/intelli/flow/flow.py b/intelli/flow/flow.py
index 6fba46f..cdf374f 100644
--- a/intelli/flow/flow.py
+++ b/intelli/flow/flow.py
@@ -1,6 +1,7 @@
 import asyncio
 import networkx as nx
 from intelli.utils.logging import Logger
+from intelli.flow.types import AgentTypes, InputTypes, Matcher
 from functools import partial
 
 
@@ -42,8 +43,14 @@ async def _execute_task(self, task_name):
                 print(f"Warning: Output for predecessor task '{pred}' not found. Skipping...")
 
         self.logger.log(f'The number of combined inputs for task {task_name} is {len(predecessor_outputs)}')
-        merged_input = " ".join(predecessor_outputs)
         merged_type = next(iter(predecessor_types)) if len(predecessor_types) == 1 else None
+        if merged_type and merged_type == InputTypes.TEXT.value:
+            merged_input = " ".join(predecessor_outputs)
+        elif predecessor_outputs:
+            # get one input if not combined strings
+            merged_input = predecessor_outputs[0]
+        else:
+            merged_input = None
 
         # Execute task with merged input
         loop = asyncio.get_event_loop()
diff --git a/intelli/flow/tasks/task.py b/intelli/flow/tasks/task.py
index 072a109..5aba697 100644
--- a/intelli/flow/tasks/task.py
+++ b/intelli/flow/tasks/task.py
@@ -1,5 +1,5 @@
 from intelli.flow.template.basic_template import TextInputTemplate
-from intelli.flow.types import AgentTypes, InputTypes
+from intelli.flow.types import AgentTypes, InputTypes, Matcher
 from intelli.utils.logging import Logger
 from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput
 
@@ -7,41 +7,75 @@
 class Task:
     def __init__(self, task_input, agent, exclude=False, pre_process=None,
                  post_process=None, template=None, log=False):
+        self.task_input = task_input
         self.desc = task_input.desc
         self.agent = agent
         self.pre_process = pre_process
         self.post_process = post_process
         self.exclude = exclude
         self.output = None
-        self.output_type = agent.type
+        self.output_type = Matcher.output[agent.type]
         self.template = template
         self.logger = Logger(log)
-        if not template and agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]:
+        if not template and Matcher.input[agent.type] in [InputTypes.TEXT.value]:
             self.template = TextInputTemplate(self.desc)
 
     def execute(self, input_data=None, input_type=None):
         
         # logging
-        if input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]:
+        if input_type in [InputTypes.TEXT.value]:
             self.logger.log_head('- Inside the task with input data head: ', input_data)
-        elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value,
-                                                                          AgentTypes.IMAGE.value]:
-            self.logger.log_head('- Inside the task. the previous step input not supported')
+        elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]:
+            self.logger.log('- Inside the task. the previous step input not supported')
+        elif input_type == InputTypes.IMAGE.value:
+            self.logger.log('- Inside the task with previous image, size: ', len(input_data))
 
         # Run task pre procesing
         if self.pre_process:
             input_data = self.pre_process(input_data)
 
-        # Apply template
-        if input_data and input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]:
+        # Apply input template
+        if input_data and input_type in [InputTypes.TEXT.value]:
             agent_text = self.template.apply_input(input_data)
             # log
             self.logger.log_head('- Input data with template: ', agent_text)
         else:
             agent_text = self.desc
 
+        # Prepare the input
+        agent_inputs = []
+        if Matcher.input[self.agent.type] == InputTypes.IMAGE.value:
+            
+            if self.task_input.img:
+                agent_input = ImageAgentInput(desc=agent_text, img=self.task_input.img)
+                agent_inputs.append(agent_input)
+            
+            # add previous output as input, in case of second input for image, only if the output supported
+            if len(agent_inputs) == 0 or Matcher.output[self.agent.type] == InputTypes.TEXT.value:
+                if input_data and input_type == InputTypes.IMAGE.value:
+                    agent_input = ImageAgentInput(desc=agent_text, img=input_data)
+                    agent_inputs.append(agent_input)
+            
+        elif Matcher.input[self.agent.type] == AgentTypes.TEXT.value:
+            agent_input = TextAgentInput(agent_text)
+            agent_inputs.append(agent_input)
+        
         # Check the agent type and call the appropriate function
-        result = self.agent.execute(TextAgentInput(agent_text))
+        combined_results = []
+        for current_agent_input in agent_inputs:
+            
+            result = self.agent.execute(current_agent_input)
+            
+            if isinstance(result, list):
+                combined_results.extend(result)
+            else:
+                combined_results.append(str(result))
+        
+        if Matcher.output[self.agent.type] == InputTypes.TEXT.value:
+            result = " ".join(combined_results)
+        else:
+            # get first result only for none text outputs
+            result = combined_results[0]
 
         # log
         if self.agent.type in [AgentTypes.TEXT.value]:
diff --git a/intelli/flow/types.py b/intelli/flow/types.py
index 04caf7e..194d51b 100644
--- a/intelli/flow/types.py
+++ b/intelli/flow/types.py
@@ -4,8 +4,23 @@
 class AgentTypes(Enum):
     TEXT = 'text'
     IMAGE = 'image'
+    VISION = 'vision'
 
 
 class InputTypes(Enum):
     TEXT = 'text'
     IMAGE = 'image'
+    VISION = 'vision'
+
+class Matcher():
+    input = {
+        'text': 'text',
+        'image': 'text',
+        'vision': 'image'
+    }
+
+    output = {
+        'text': 'text',
+        'image': 'image',
+        'vision': 'text'
+    }
\ No newline at end of file
diff --git a/intelli/function/chatbot.py b/intelli/function/chatbot.py
index 01b527a..15f8fdb 100644
--- a/intelli/function/chatbot.py
+++ b/intelli/function/chatbot.py
@@ -56,7 +56,13 @@ def _chat_mistral(self, params):
     
     def _chat_gemini(self, params):
         response = self.wrapper.generate_content(params)
-        return [candidate["content"]["parts"][0]["text"] for candidate in response["candidates"]]
+        output = []
+        for candidate in response.get("candidates", []):
+            if "content" in candidate:
+                output.append(candidate["content"]["parts"][0]["text"])
+            else:
+                raise Exception("Error when calling gemini: {}".format(response))
+        return output
 
     def stream(self, chat_input):
         """
diff --git a/intelli/model/input/image_input.py b/intelli/model/input/image_input.py
index a967690..850f15b 100644
--- a/intelli/model/input/image_input.py
+++ b/intelli/model/input/image_input.py
@@ -17,9 +17,13 @@ def __init__(self, prompt, number_images=1, imageSize=None,
         self.engine = engine
         self.model = model
 
-        sizes_parts = imageSize.split('x') if imageSize else [None, None]
-        self.width = self.width or sizes_parts[0]
-        self.height = self.height or sizes_parts[1]
+        if imageSize and not width:
+            sizes_parts = imageSize.split('x') if imageSize else [None, None]
+            self.width = self.width or sizes_parts[0]
+            self.height = self.height or sizes_parts[1]
+        
+        if not self.imageSize:
+            self.imageSize = str(self.width) + 'x' + str(self.height)
 
     def get_openai_inputs(self):
         inputs = {
diff --git a/intelli/model/input/vision_input.py b/intelli/model/input/vision_input.py
new file mode 100644
index 0000000..8e4885a
--- /dev/null
+++ b/intelli/model/input/vision_input.py
@@ -0,0 +1,72 @@
+import os
+import base64
+
+class VisionModelInput:
+
+    def __init__(self, content, image_data=None, file_path=None, model=None, extension='png', max_tokens=300):
+        
+        self.content = content
+        self.model = model
+        self.max_tokens = max_tokens
+        self.extension = extension
+        
+        if file_path:
+            with open(file_path, "rb") as image_file:
+                self.image_data = base64.b64encode(image_file.read()).decode('utf-8')
+            self.extension = os.path.splitext(file_path)[-1].strip('.')
+        else:
+            self.image_data = image_data
+
+    def get_openai_inputs(self):
+
+        inputs = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": self.content
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/{self.extension};base64,{self.image_data}"
+                            }
+                        }
+                    ]
+                }
+            ],
+            "max_tokens": self.max_tokens
+        }
+        
+        return inputs
+    
+    def get_gemini_inputs(self):
+
+        inputs = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": f"{self.content}"},
+                        {
+                            "inline_data": {
+                                "mime_type": f"image/{self.extension}",
+                                "data": self.image_data,
+                            }
+                        }
+                    ]
+                }
+            ]
+        }
+
+        return inputs
+
+    def get_provider_inputs(self, provider):
+        if provider == "openai":
+            return self.get_openai_inputs()
+        elif provider == "gemini":
+            return self.get_gemini_inputs()
+        else:
+            raise ValueError(f"Invalid provider name: {provider}")
diff --git a/intelli/test/integration/test_flow_map.py b/intelli/test/integration/test_flow_map.py
index 5513598..8da76f1 100644
--- a/intelli/test/integration/test_flow_map.py
+++ b/intelli/test/integration/test_flow_map.py
@@ -15,18 +15,19 @@ def setUp(self):
         self.gemini_key = os.getenv("GEMINI_API_KEY")
         self.stability_key = os.getenv("STABILITY_API_KEY")
 
-    def create_agent_and_task(self, task_input_desc, agent_type, provider, mission, model_key, model):
+    def create_agent_and_task(self, task_input_desc, agent_type, provider, mission, model_key, model, log=True):
         task = Task(
             TextTaskInput(task_input_desc),
             Agent(agent_type, provider, mission, {"key": model_key, "model": model}),
-            log=True
+            log=log
         )
         if agent_type == "image":
             task.exclude = True
         
         return task
-
+    
     async def async_test_blog_flow(self):
+        print("--- test blog flow ---")
         task1 = self.create_agent_and_task("identify requirements of building a blogging website about environment", 
                                            "text", "gemini", 
                                            "write specifications", 
@@ -55,7 +56,7 @@ async def async_test_blog_flow(self):
         task6 = self.create_agent_and_task("generate code based on combined tasks", 
                                            "text", "gemini", 
                                            "code generation from specifications", 
-                                           self.gemini_key, "gemini")
+                                           self.gemini_key, "gemini", log=True)
 
         flow = Flow(tasks = {
                         "task1": task1,
@@ -75,7 +76,35 @@ async def async_test_blog_flow(self):
         output = await flow.start()
 
         print("Final output:", output)
+    
+    async def async_test_blog_flow(self):
+        print("--- test vision flow ---")
+        
+        task1 = self.create_agent_and_task(task_input_desc="generate arts", 
+                                           agent_type="image", 
+                                           provider="stability", 
+                                           mission="generate a roboto riding a tax from the future.", 
+                                           model_key=self.stability_key,
+                                           model="stable-diffusion-xl-1024-v1-0")
+
+        task2 = self.create_agent_and_task(task_input_desc="explain the image", 
+                                           agent_type="vision", 
+                                           provider="openai", 
+                                           mission="generate description of the image elements", 
+                                           model_key=self.openai_api_key, 
+                                           model="gpt-4-vision-preview")
+        
+        flow = Flow(tasks = {
+                        "task1": task1,
+                        "task2": task2
+                    }, map_paths = {
+                        "task1": ["task2"]
+                    }, log=True)
+        
+        output = await flow.start()
 
+        print("Final output:", output)
+        
     def test_blog_flow(self):
         asyncio.run(self.async_test_blog_flow())
 
diff --git a/intelli/test/integration/test_flow_sequence.py b/intelli/test/integration/test_flow_sequence.py
index 0255856..3498d49 100644
--- a/intelli/test/integration/test_flow_sequence.py
+++ b/intelli/test/integration/test_flow_sequence.py
@@ -1,12 +1,15 @@
 import os
+import base64
 import unittest
+from intelli.flow.types import *
 from intelli.flow.agents.agent import Agent
-from intelli.flow.input.task_input import TextTaskInput
+from intelli.flow.input.task_input import TextTaskInput, ImageTaskInput
 from intelli.flow.processors.basic_processor import TextProcessor
 from intelli.flow.sequence_flow import SequenceFlow
 from intelli.flow.tasks.task import Task
 from dotenv import load_dotenv
 
+
 load_dotenv()
 
 
@@ -16,24 +19,25 @@ def setUp(self):
         self.openai_api_key = os.getenv("OPENAI_API_KEY")
         self.gemini_key = os.getenv("GEMINI_API_KEY")
         self.stability_key = os.getenv("STABILITY_API_KEY")
-
+    
     def test_blog_post_flow(self):
-        print("---- start blog post flow ----")
+        print("---- start blog portal flow ----")
+        
         # Define agents
         blog_agent = Agent(
-            agent_type="text",
+            agent_type=AgentTypes.TEXT.value,
             provider="openai",
             mission="write blog posts",
             model_params={"key": self.openai_api_key, "model": "gpt-3.5-turbo"},
         )
         description_agent = Agent(
-            agent_type="text",
+            agent_type=AgentTypes.TEXT.value,
             provider="gemini",
-            mission="generate description",
+            mission="generate description only",
             model_params={"key": self.gemini_key, "model": "gemini"},
         )
         image_agent = Agent(
-            agent_type="image",
+            agent_type=AgentTypes.IMAGE.value,
             provider="stability",
             mission="generate image",
             model_params={"key": self.stability_key},
@@ -44,7 +48,7 @@ def test_blog_post_flow(self):
             TextTaskInput("blog post about electric cars"), blog_agent, log=True
         )
         task2 = Task(
-            TextTaskInput("Generate short image description for image model"),
+            TextTaskInput("Write short image description for image generation model"),
             description_agent,
             pre_process=TextProcessor.text_head,
             log=True,
@@ -58,7 +62,41 @@ def test_blog_post_flow(self):
         final_result = flow.start()
 
         print("Final result:", final_result)
+    
+    def test_flow_chart_image_flow(self):
+        print("---- start vision coder flow ----")
+        
+        analyst = Agent(
+            agent_type=AgentTypes.VISION.value,
+            provider="openai",
+            mission="describe flow charts from images",
+            model_params={"key": self.openai_api_key, "extension": "jpg", "model": "gpt-4-vision-preview"},
+        )
+        
+        coder = Agent(
+            agent_type=AgentTypes.TEXT.value,
+            provider="openai",
+            mission="write python code. response only with the code without explination or text or marks.",
+            model_params={"key": self.openai_api_key, "model": "gpt-3.5-turbo"},
+        )
+        
+        # Define tasks
+        with open('../temp/code_flow_char.jpg', "rb") as image_file:
+            image_data = base64.b64encode(image_file.read()).decode('utf-8')
+                
+        task1 = Task(
+            ImageTaskInput(desc="describe the steps of the code flow chat for an engineer.", img=image_data), agent=analyst, log=True
+        )
+
+        task2 = Task(
+            TextTaskInput("write python code from the provided context"), agent=coder, log=True
+        )
+        
+        # Start SequenceFlow
+        flow = SequenceFlow([task1, task2], log=True)
+        final_result = flow.start()
 
+        print("Final result:", final_result)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/intelli/test/integration/test_geminiai_wrapper.py b/intelli/test/integration/test_geminiai_wrapper.py
index 68bfa8d..28e2a86 100644
--- a/intelli/test/integration/test_geminiai_wrapper.py
+++ b/intelli/test/integration/test_geminiai_wrapper.py
@@ -2,7 +2,7 @@
 import os
 from dotenv import load_dotenv
 from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper
-
+import base64
 load_dotenv()
 
 class TestGeminiAIWrapper(unittest.TestCase):
@@ -12,7 +12,7 @@ def setUpClass(cls):
         api_key = os.getenv("GEMINI_API_KEY")
         assert api_key is not None, "GEMINI_API_KEY is not set."
         cls.wrapper = GeminiAIWrapper(api_key)
-
+    
     def test_generate_content(self):
         params = {
             "contents": [{
@@ -27,10 +27,13 @@ def test_generate_content(self):
         self.assertIsNotNone(result['candidates'][0]['content']['parts'][0]['text'])
     
     def test_image_to_text(self):
-        file_path = 'temp/test_image_desc.png' 
+        file_path = '../temp/test_image_desc.png' 
 
         try:
-            result = self.wrapper.image_to_text('describe the image', file_path, 'png')
+            with open(file_path, "rb") as image_file:
+                image_data = base64.b64encode(image_file.read()).decode('utf-8')
+            
+            result = self.wrapper.image_to_text('describe the image', image_data, 'png')
 
             self.assertTrue('candidates' in result, "The result should have a 'candidates' field.")
             self.assertIsInstance(result['candidates'], list, "Expected 'candidates' to be a list.")
@@ -44,7 +47,7 @@ def test_image_to_text(self):
 
         except Exception as error:
             self.fail(f'Gemini AI Error: {error}')
-
+    
     def test_get_embeddings(self):
         text = "Write a story about a magic backpack."
         params = {
diff --git a/intelli/test/integration/test_remote_vision_model.py b/intelli/test/integration/test_remote_vision_model.py
new file mode 100644
index 0000000..10f3c4a
--- /dev/null
+++ b/intelli/test/integration/test_remote_vision_model.py
@@ -0,0 +1,39 @@
+import unittest
+import os
+from intelli.controller.remote_vision_model import RemoteVisionModel
+from intelli.model.input.vision_input import VisionModelInput
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class TestRemoteVisionModel(unittest.TestCase):
+    
+    def setUp(self):
+        self.openai_api_key = os.getenv('OPENAI_API_KEY')
+        self.gemini_api_key = os.getenv('GEMINI_API_KEY')
+
+        if not self.openai_api_key or not self.gemini_api_key:
+            raise unittest.SkipTest("Both OpenAI and Gemini keys are required for testing RemoteVisionModel")
+    
+    def test_openai_image_descriptor(self):
+        print('--- call openai vision ---')
+        provider = "openai"
+        controller = RemoteVisionModel(self.openai_api_key, provider)
+
+        vision_input = VisionModelInput(content = "Describe the image", file_path = '../temp/test_image_desc.png', model = "gpt-4-vision-preview")
+        result = controller.image_to_text(vision_input)
+        
+        print(result)
+    
+    def test_gemini_image_descriptor(self):
+        print('--- call gemini vision ---')
+        provider = "gemini"
+        controller = RemoteVisionModel(self.gemini_api_key, provider)
+
+        vision_input = VisionModelInput(content = "Describe this image", file_path = '../temp/test_image_desc.png', extension='png')
+        result = controller.image_to_text(vision_input)
+        
+        print(result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/intelli/wrappers/geminiai_wrapper.py b/intelli/wrappers/geminiai_wrapper.py
index 3bb3a55..819fa1e 100644
--- a/intelli/wrappers/geminiai_wrapper.py
+++ b/intelli/wrappers/geminiai_wrapper.py
@@ -25,10 +25,8 @@ def generate_content(self, params, vision=False):
         except Exception as error:
             raise Exception(str(error))
 
-    def image_to_text(self, user_input, file_path, extension):
-        with open(file_path, "rb") as image_file:
-            image_data = base64.b64encode(image_file.read()).decode('utf-8')
-
+    def image_to_text(self, user_input, image_data, extension):
+        
         params = {
             "contents": [
                 {
@@ -45,8 +43,12 @@ def image_to_text(self, user_input, file_path, extension):
             ]
         }
 
-        return self.generate_content(params, True)
+        return self.image_to_text_params(params=params)
 
+    def image_to_text_params(self, params):
+
+        return self.generate_content(params, True)
+    
     def get_embeddings(self, params):
         url = f"{self.API_BASE_URL}{config['url']['gemini']['embeddingEndpoint']}"
 
diff --git a/requirements.txt b/requirements.txt
index 9677848..0f644a1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,2 @@
-requests-mock==1.11.0
 python-dotenv==1.0.1
 networkx==3.2.1
diff --git a/setup.py b/setup.py
index 13028fb..54e5252 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="intelli",
-    version="0.0.7",
+    version="0.0.8",
     author="Intellinode",
     author_email="admin@intellinode.ai",
     description="Create chatbots and AI agent work flows. Intelli allows to connect your data with multiple AI models like OpenAI, Gemini, and Mistral through a unified access layer.",
@@ -18,6 +18,6 @@
     packages=find_packages(),
     python_requires='>=3.6',
     install_requires=[
-        "requests-mock==1.11.0", "python-dotenv==1.0.1", "networkx==3.2.1"
+        "python-dotenv==1.0.1", "networkx==3.2.1"
     ],
 )
\ No newline at end of file