Skip to content

Commit

Permalink
Merge pull request #52 from intelligentnode/3-create-unified-vision-c…
Browse files Browse the repository at this point in the history
…ontroller

3 create unified vision controller
  • Loading branch information
intelligentnode authored Feb 11, 2024
2 parents 00d4897 + 962148e commit 0e2f63a
Show file tree
Hide file tree
Showing 18 changed files with 410 additions and 104 deletions.
65 changes: 32 additions & 33 deletions PIPREADME.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,6 @@ pip install intelli

# Code Examples

## Create AI Flows
You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:
- ChatGPT agent to write a post.
- Google gemini agent to write image description.
- Stable diffusion to generate images.

```python
from intelli.flow.agents.agent import Agent
from intelli.flow.tasks.task import Task
from intelli.flow.sequence_flow import SequenceFlow
from intelli.flow.input.task_input import TextTaskInput
from intelli.flow.processors.basic_processor import TextProcessor

# define agents
blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'})
copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'})
artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY})

# define tasks
task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True)
task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True)
task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True)

# start sequence flow
flow = SequenceFlow([task1, task2, task3], log=True)
final_result = flow.start()
```

To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).

## Create Chatbot
Switch between multiple chatbot providers without changing your code.

Expand All @@ -62,7 +32,7 @@ def call_chatbot(provider, model=None):
input.add_user_message("What is the capital of France?")

# creating chatbot instance
openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai")
openai_bot = Chatbot(YOUR_API_KEY, provider)
response = openai_bot.chat(input)

return response
Expand All @@ -73,11 +43,10 @@ call_chatbot("openai", "gpt-4")
# call mistralai
call_chatbot("mistral", "mistral-medium")

# call gooogle gemini
# call google gemini
call_chatbot("gemini")
```


## Connect Your Docs With Chatbot
IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data.

Expand Down Expand Up @@ -110,6 +79,36 @@ wrapper = RemoteImageModel(your_api_key, provider)
results = wrapper.generate_images(image_input)
```

## Create AI Flows
You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:
- ChatGPT agent to write a post.
- Google gemini agent to write image description.
- Stable diffusion to generate images.

```python
from intelli.flow.agents.agent import Agent
from intelli.flow.tasks.task import Task
from intelli.flow.sequence_flow import SequenceFlow
from intelli.flow.input.task_input import TextTaskInput
from intelli.flow.processors.basic_processor import TextProcessor

# define agents
blog_agent = Agent(agent_type='text', provider='openai', mission='write blog posts', model_params={'key': YOUR_OPENAI_API_KEY, 'model': 'gpt-4'})
copy_agent = Agent(agent_type='text', provider='gemini', mission='generate description', model_params={'key': YOUR_GEMINI_API_KEY, 'model': 'gemini'})
artist_agent = Agent(agent_type='image', provider='stability', mission='generate image', model_params={'key': YOUR_STABILITY_API_KEY})

# define tasks
task1 = Task(TextTaskInput('blog post about electric cars'), blog_agent, log=True)
task2 = Task(TextTaskInput('Generate short image description for image model'), copy_agent, pre_process=TextProcessor.text_head, log=True)
task3 = Task(TextTaskInput('Generate cartoon style image'), artist_agent, log=True)

# start sequence flow
flow = SequenceFlow([task1, task2, task3], log=True)
final_result = flow.start()
```

To build async AI flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).

# Pillars
- **The wrapper layer** provides low-level access to the latest AI models.
- **The controller layer** offers a unified input to any AI model by handling the differences.
Expand Down
56 changes: 28 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,34 @@ pip install intelli

# Code Examples

## Create Chatbot
Switch between multiple chatbot providers without changing your code.

```python
from intelli.function.chatbot import Chatbot
from intelli.model.input.chatbot_input import ChatModelInput

def call_chatbot(provider, model=None):
# prepare common input
input = ChatModelInput("You are a helpful assistant.", model)
input.add_user_message("What is the capital of France?")

# creating chatbot instance
openai_bot = Chatbot(YOUR_API_KEY, provider)
response = openai_bot.chat(input)

return response

# call openai
call_chatbot("openai", "gpt-4")

# call mistralai
call_chatbot("mistral", "mistral-medium")

# call google gemini
call_chatbot("gemini")
```

## Create AI Flows
You can create a flow of tasks executed by different AI models. Here's an example of creating a blog post flow:

Expand Down Expand Up @@ -62,34 +90,6 @@ final_result = flow.start()

To build async flows with multiple paths, refer to the [flow tutorial](https://github.com/intelligentnode/Intelli/wiki/Flows).

## Create Chatbot
Switch between multiple chatbot providers without changing your code.

```python
from intelli.function.chatbot import Chatbot
from intelli.model.input.chatbot_input import ChatModelInput

def call_chatbot(provider, model=None):
# prepare common input
input = ChatModelInput("You are a helpful assistant.", model)
input.add_user_message("What is the capital of France?")

# creating chatbot instance
openai_bot = Chatbot(YOUR_OPENAI_API_KEY, "openai")
response = openai_bot.chat(input)

return response

# call openai
call_chatbot("openai", "gpt-4")

# call mistralai
call_chatbot("mistral", "mistral-medium")

# call google gemini
call_chatbot("gemini")
```


## Connect Your Docs With Chatbot
IntelliPy allows you to chat with your docs using multiple LLMs. To connect your data, visit the [IntelliNode App](https://app.intellinode.ai/), start a project using the Document option, upload your documents or images, and copy the generated One Key. This key will be used to connect the chatbot to your uploaded data.
Expand Down
3 changes: 3 additions & 0 deletions instructions/run_integration_text.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ python3 -m unittest intelli.test.integration.test_remote_embed_model
# images
python3 -m unittest intelli.test.integration.test_remote_image_model

# vision
python3 -m unittest intelli.test.integration.test_remote_vision_model

## functions
# chatbot
python3 -m unittest intelli.test.integration.test_chatbot
Expand Down
44 changes: 44 additions & 0 deletions intelli/controller/remote_vision_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from intelli.wrappers.openai_wrapper import OpenAIWrapper
from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper
from intelli.model.input.vision_input import VisionModelInput

class RemoteVisionModel:
supported_vision_models = {
"openai": OpenAIWrapper,
"gemini": GeminiAIWrapper,
}

def __init__(self, api_key, provider="openai"):

self.api_key = api_key

if provider in self.supported_vision_models:
self.provider = provider
self.provider_wrapper = self.supported_vision_models[provider](api_key)
else:
supported_models = ", ".join(self.supported_vision_models.keys())
raise ValueError(f"The provided provider {provider} not supported. Supported providers: {supported_models}")

def image_to_text(self, vision_input):

if isinstance(vision_input, dict):
inputs = vision_input
elif isinstance(vision_input, VisionModelInput):
inputs = vision_input.get_provider_inputs(self.provider)
else:
raise ValueError("vision_input must be an instance of VisionModelInput or a dictionary.")


if self.provider == "openai":
return self.call_openai_vision(inputs)
elif self.provider == "gemini":
return self.call_gemini_vision(inputs)


def call_openai_vision(self, inputs):
data = self.provider_wrapper.image_to_text(inputs)
return " ".join(choice['message']['content'] for choice in data['choices'])

def call_gemini_vision(self, inputs):
data = self.provider_wrapper.image_to_text_params(inputs)
return " ".join(part['text'] for part in data['candidates'][0]['content']['parts'])
18 changes: 15 additions & 3 deletions intelli/flow/agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from intelli.model.input.chatbot_input import ChatModelInput
from intelli.model.input.image_input import ImageModelInput
from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput
from intelli.controller.remote_vision_model import RemoteVisionModel
from intelli.model.input.vision_input import VisionModelInput


class BasicAgent(ABC):
Expand All @@ -31,14 +33,24 @@ def execute(self, agent_input: AgentInput):

# Check the agent type and call the appropriate function
if self.type == AgentTypes.TEXT.value:
chatbot = Chatbot(self.model_params['key'], self.provider, self.options)
chat_input = ChatModelInput(self.mission, model=self.model_params.get('model'))

chatbot = Chatbot(self.model_params['key'], self.provider, self.options)
chat_input.add_user_message(agent_input.desc)
result = chatbot.chat(chat_input)[0]
elif self.type == AgentTypes.IMAGE.value:
image_input = ImageModelInput(prompt=self.mission + ": " + agent_input.desc, model=self.model_params.get('model'))

image_model = RemoteImageModel(self.model_params['key'], self.provider)
image_input = ImageModelInput(prompt=agent_input.desc, model=self.model_params.get('model'))
result = image_model.generate_images(image_input)
result = image_model.generate_images(image_input)[0]
elif self.type == AgentTypes.VISION.value:
vision_input = VisionModelInput(content=self.mission + ": " + agent_input.desc,
image_data=agent_input.img,
extension=self.model_params.get('extension', 'png'),
model=self.model_params['model'])

vision_model = RemoteVisionModel(self.model_params['key'], self.provider)
result = vision_model.image_to_text(vision_input)
else:
raise ValueError(f"Unsupported agent type: {self.type}.")

Expand Down
9 changes: 8 additions & 1 deletion intelli/flow/flow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import networkx as nx
from intelli.utils.logging import Logger
from intelli.flow.types import AgentTypes, InputTypes, Matcher
from functools import partial


Expand Down Expand Up @@ -42,8 +43,14 @@ async def _execute_task(self, task_name):
print(f"Warning: Output for predecessor task '{pred}' not found. Skipping...")

self.logger.log(f'The number of combined inputs for task {task_name} is {len(predecessor_outputs)}')
merged_input = " ".join(predecessor_outputs)
merged_type = next(iter(predecessor_types)) if len(predecessor_types) == 1 else None
if merged_type and merged_type == InputTypes.TEXT.value:
merged_input = " ".join(predecessor_outputs)
elif predecessor_outputs:
# get one input if not combined strings
merged_input = predecessor_outputs[0]
else:
merged_input = None

# Execute task with merged input
loop = asyncio.get_event_loop()
Expand Down
54 changes: 44 additions & 10 deletions intelli/flow/tasks/task.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,81 @@
from intelli.flow.template.basic_template import TextInputTemplate
from intelli.flow.types import AgentTypes, InputTypes
from intelli.flow.types import AgentTypes, InputTypes, Matcher
from intelli.utils.logging import Logger
from intelli.flow.input.agent_input import AgentInput, TextAgentInput, ImageAgentInput


class Task:
def __init__(self, task_input, agent, exclude=False, pre_process=None,
post_process=None, template=None, log=False):
self.task_input = task_input
self.desc = task_input.desc
self.agent = agent
self.pre_process = pre_process
self.post_process = post_process
self.exclude = exclude
self.output = None
self.output_type = agent.type
self.output_type = Matcher.output[agent.type]
self.template = template
self.logger = Logger(log)
if not template and agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]:
if not template and Matcher.input[agent.type] in [InputTypes.TEXT.value]:
self.template = TextInputTemplate(self.desc)

def execute(self, input_data=None, input_type=None):

# logging
if input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]:
if input_type in [InputTypes.TEXT.value]:
self.logger.log_head('- Inside the task with input data head: ', input_data)
elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value,
AgentTypes.IMAGE.value]:
self.logger.log_head('- Inside the task. the previous step input not supported')
elif input_type == InputTypes.IMAGE.value and self.agent.type in [AgentTypes.TEXT.value, AgentTypes.IMAGE.value]:
self.logger.log('- Inside the task. the previous step input not supported')
elif input_type == InputTypes.IMAGE.value:
self.logger.log('- Inside the task with previous image, size: ', len(input_data))

# Run task pre procesing
if self.pre_process:
input_data = self.pre_process(input_data)

# Apply template
if input_data and input_type in [InputTypes.TEXT.value, InputTypes.IMAGE.value]:
# Apply input template
if input_data and input_type in [InputTypes.TEXT.value]:
agent_text = self.template.apply_input(input_data)
# log
self.logger.log_head('- Input data with template: ', agent_text)
else:
agent_text = self.desc

# Prepare the input
agent_inputs = []
if Matcher.input[self.agent.type] == InputTypes.IMAGE.value:

if self.task_input.img:
agent_input = ImageAgentInput(desc=agent_text, img=self.task_input.img)
agent_inputs.append(agent_input)

# add previous output as input, in case of second input for image, only if the output supported
if len(agent_inputs) == 0 or Matcher.output[self.agent.type] == InputTypes.TEXT.value:
if input_data and input_type == InputTypes.IMAGE.value:
agent_input = ImageAgentInput(desc=agent_text, img=input_data)
agent_inputs.append(agent_input)

elif Matcher.input[self.agent.type] == AgentTypes.TEXT.value:
agent_input = TextAgentInput(agent_text)
agent_inputs.append(agent_input)

# Check the agent type and call the appropriate function
result = self.agent.execute(TextAgentInput(agent_text))
combined_results = []
for current_agent_input in agent_inputs:

result = self.agent.execute(current_agent_input)

if isinstance(result, list):
combined_results.extend(result)
else:
combined_results.append(str(result))

if Matcher.output[self.agent.type] == InputTypes.TEXT.value:
result = " ".join(combined_results)
else:
# get first result only for none text outputs
result = combined_results[0]

# log
if self.agent.type in [AgentTypes.TEXT.value]:
Expand Down
15 changes: 15 additions & 0 deletions intelli/flow/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,23 @@
class AgentTypes(Enum):
TEXT = 'text'
IMAGE = 'image'
VISION = 'vision'


class InputTypes(Enum):
TEXT = 'text'
IMAGE = 'image'
VISION = 'vision'

class Matcher():
input = {
'text': 'text',
'image': 'text',
'vision': 'image'
}

output = {
'text': 'text',
'image': 'image',
'vision': 'text'
}
Loading

0 comments on commit 0e2f63a

Please sign in to comment.