Skip to content

Commit

Permalink
Merge pull request #87 from intelligentnode/86-add-nvidia-models
Browse files Browse the repository at this point in the history
Add Nvidia models
  • Loading branch information
intelligentnode authored Feb 1, 2025
2 parents 670a129 + af65b74 commit 13e1a98
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 0 deletions.
5 changes: 5 additions & 0 deletions intelli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@
"messages": "/v1/messages",
"version": "2023-06-01"
},
"nvidia": {
"base": "https://integrate.api.nvidia.com",
"chat": "/v1/chat/completions",
"embeddings": "/v1/embeddings"
},
},
"models": {
"replicate": {
Expand Down
4 changes: 4 additions & 0 deletions intelli/controller/remote_embed_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from intelli.wrappers.geminiai_wrapper import GeminiAIWrapper
from intelli.wrappers.mistralai_wrapper import MistralAIWrapper
from intelli.wrappers.openai_wrapper import OpenAIWrapper
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper


class RemoteEmbedModel:
Expand All @@ -11,6 +12,7 @@ def __init__(self, api_key, provider_name):
'openai': OpenAIWrapper,
'mistral': MistralAIWrapper,
'gemini': GeminiAIWrapper,
'nvidia': NvidiaWrapper
}
if self.provider_name in providers:
self.provider = providers[self.provider_name](api_key)
Expand All @@ -30,6 +32,8 @@ def get_embeddings(self, embed_input):
params = embed_input.get_mistral_inputs()
elif self.provider_name == 'gemini':
params = embed_input.get_gemini_inputs()
elif self.provider_name == 'nvidia':
params = embed_input.get_nvidia_inputs()
else:
raise Exception("Invalid provider name.")

Expand Down
25 changes: 25 additions & 0 deletions intelli/function/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from intelli.wrappers.openai_wrapper import OpenAIWrapper
from intelli.wrappers.anthropic_wrapper import AnthropicWrapper
from intelli.wrappers.keras_wrapper import KerasWrapper
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper
from enum import Enum

class ChatProvider(Enum):
Expand All @@ -16,6 +17,7 @@ class ChatProvider(Enum):
MISTRAL = "mistral"
ANTHROPIC = "anthropic"
KERAS = "keras"
NVIDIA = "nvidia"

class Chatbot:

Expand Down Expand Up @@ -58,6 +60,8 @@ def _initialize_provider(self):
return AnthropicWrapper(self.api_key)
elif self.provider == ChatProvider.KERAS.value:
return KerasWrapper(self.options['model_name'], self.options.get('model_params', {}))
elif self.provider == ChatProvider.NVIDIA.value:
return NvidiaWrapper(self.api_key)
else:
raise ValueError(f"Unsupported provider: {self.provider}")

Expand Down Expand Up @@ -104,6 +108,13 @@ def _chat_anthropic(self, params):
response = self.wrapper.generate_text(params)

return [message['text'] for message in response['content']]

def _chat_nvidia(self, params):
result = self.wrapper.generate_text(params)
choices = result.get("choices", [])
if not choices:
raise Exception("No choices returned from NVIDIA API")
return [choices[0]["message"]["content"]]

def stream(self, chat_input):
"""Streams responses from the selected provider for the given chat input."""
Expand Down Expand Up @@ -156,6 +167,20 @@ def _stream_anthropic(self, params):
except json.JSONDecodeError as e:
print("Error decoding JSON from stream:", e)

def _stream_nvidia(self, params):
params["stream"] = True
stream = self.wrapper.generate_text_stream(params)
for line in stream:
if line.strip() and line.startswith("data: ") and line != "data: [DONE]":
json_content = line[len("data: "):].strip()
try:
data_chunk = json.loads(json_content)
content = data_chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
if content:
yield content
except json.JSONDecodeError as e:
print("Error decoding JSON:", e)

# helpers
def _parse_openai_responses(self, results):
responses = []
Expand Down
12 changes: 12 additions & 0 deletions intelli/model/input/chatbot_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,15 @@ def get_keras_input(self):
**self.options
}
return params

def get_nvidia_input(self):
messages = [{'role': msg.role, 'content': msg.content} for msg in self.messages]
params = {
'model': self.model,
'messages': messages,
**({'temperature': self.temperature} if self.temperature is not None else {}),
**({'max_tokens': self.max_tokens} if self.max_tokens is not None else {}),
**self.options
}
return params

10 changes: 10 additions & 0 deletions intelli/model/input/embed_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ def get_gemini_inputs(self):
"parts": [{"text": text} for text in self.texts]
}
}

def get_nvidia_inputs(self):
inputs = {
"input": self.texts,
"model": self.model,
"input_type": "query",
"encoding_format": "float",
"truncate": "NONE"
}
return inputs

def set_default_values(self, provider):
if provider == "openai":
Expand Down
45 changes: 45 additions & 0 deletions intelli/test/integration/test_chatbot_nvidia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import unittest
import os
import asyncio
from dotenv import load_dotenv
from intelli.function.chatbot import Chatbot, ChatProvider
from intelli.model.input.chatbot_input import ChatModelInput

load_dotenv()

class TestChatbotNvidiaChatAndStream(unittest.TestCase):
def setUp(self):
self.nvidia_api_key = os.getenv("NVIDIA_API_KEY")
assert self.nvidia_api_key, "NVIDIA_API_KEY is not set."
self.chatbot = Chatbot(self.nvidia_api_key, ChatProvider.NVIDIA.value)

def test_nvidia_chat_and_stream(self):

# Test normal chat
print("Testing Nvidia chat")
normal_input = ChatModelInput("You are a helpful assistant.", model="deepseek-ai/deepseek-r1", max_tokens=1024, temperature=0.6)
normal_input.add_user_message("What is the capital city of france?")
response = self.chatbot.chat(normal_input)
if isinstance(response, dict) and "result" in response:
normal_output = response["result"]
else:
normal_output = response
self.assertTrue(len(normal_output) > 0, "Nvidia normal chat response should not be empty")
print("Nvidia normal chat output:", normal_output)

# Test streaming chat
print("Testing Nvidia stream")
stream_input = ChatModelInput("You are a helpful assistant.", model="deepseek-ai/deepseek-r1", max_tokens=1024, temperature=0.6)
stream_input.add_user_message("What is the capital city of france?")
stream_output = asyncio.run(self.get_stream_output(stream_input))
self.assertTrue(len(stream_output) > 0, "Nvidia stream response should not be empty")
print("Nvidia stream output:", stream_output)

async def get_stream_output(self, chat_input):
output = ""
for chunk in self.chatbot.stream(chat_input):
output += chunk
return output

if __name__ == "__main__":
unittest.main()
68 changes: 68 additions & 0 deletions intelli/test/integration/test_nvidia_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import unittest
import os
from dotenv import load_dotenv
from intelli.wrappers.nvidia_wrapper import NvidiaWrapper

load_dotenv()


class TestNvidiaWrapper(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.api_key = os.getenv("NVIDIA_API_KEY")
assert cls.api_key, "NVIDIA_API_KEY is not set."
cls.wrapper = NvidiaWrapper(cls.api_key)

def test_generate_text_llama(self):
params = {
"model": "meta/llama-3.3-70b-instruct",
"messages": [
{"role": "user", "content": "Write a limerick about GPU computing."}
],
"max_tokens": 1024,
"temperature": 0.2,
"top_p": 0.7,
"stream": False,
}
response = self.wrapper.generate_text(params)
self.assertIn("choices", response)
self.assertGreater(len(response["choices"]), 0)
message = response["choices"][0]["message"]["content"]
self.assertTrue(isinstance(message, str) and len(message) > 0)

def test_generate_text_deepseek(self):
params = {
"model": "deepseek-ai/deepseek-r1",
"messages": [
{"role": "user", "content": "Which number is larger, 9.11 or 9.8?"}
],
"max_tokens": 4096,
"temperature": 0.6,
"top_p": 0.7,
"stream": False,
}
response = self.wrapper.generate_text(params)
self.assertIn("choices", response)
self.assertGreater(len(response["choices"]), 0)
message = response["choices"][0]["message"]["content"]
self.assertTrue(isinstance(message, str) and len(message) > 0)

def test_get_embeddings(self):
params = {
"input": ["What is the capital of France?"],
"model": "nvidia/llama-3.2-nv-embedqa-1b-v2",
"input_type": "query",
"encoding_format": "float",
"truncate": "NONE",
}
response = self.wrapper.get_embeddings(params)
self.assertIn("data", response)
self.assertGreater(len(response["data"]), 0)
self.assertIn("embedding", response["data"][0])
embedding = response["data"][0]["embedding"]
self.assertIsInstance(embedding, list)
self.assertGreater(len(embedding), 0)


if __name__ == "__main__":
unittest.main()
29 changes: 29 additions & 0 deletions intelli/test/integration/test_remote_embed_model_nvidia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
import os
from dotenv import load_dotenv
from intelli.model.input.embed_input import EmbedInput
from intelli.controller.remote_embed_model import RemoteEmbedModel

load_dotenv()

class TestRemoteEmbedModelNvidia(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.api_key = os.getenv("NVIDIA_API_KEY")
assert cls.api_key, "NVIDIA_API_KEY is not set."
cls.embed_model = RemoteEmbedModel(cls.api_key, "nvidia")

def test_get_embeddings(self):
text = "What is the capital of France?"
embed_input = EmbedInput([text], model="nvidia/llama-3.2-nv-embedqa-1b-v2")
result = self.embed_model.get_embeddings(embed_input)
self.assertIn("data", result)
self.assertGreater(len(result["data"]), 0)
self.assertIn("embedding", result["data"][0])
embedding = result["data"][0]["embedding"]
self.assertIsInstance(embedding, list)
self.assertGreater(len(embedding), 0)
print("Nvidia embedding sample:", embedding[:5])

if __name__ == "__main__":
unittest.main()
38 changes: 38 additions & 0 deletions intelli/wrappers/nvidia_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import requests
from intelli.config import config


class NvidiaWrapper:
def __init__(self, api_key: str):
self.api_key = api_key
self.base_url = config["url"]["nvidia"]["base"]
self.chat_endpoint = config["url"]["nvidia"]["chat"]
self.embeddings_endpoint = config["url"]["nvidia"]["embeddings"]
self.headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {api_key}",
}

def generate_text(self, params: dict) -> dict:
if "stream" not in params:
params["stream"] = False
url = self.base_url + self.chat_endpoint
response = requests.post(url, json=params, headers=self.headers)
response.raise_for_status()
return response.json()

def generate_text_stream(self, params: dict):
params["stream"] = True
url = self.base_url + self.chat_endpoint
response = requests.post(url, json=params, headers=self.headers, stream=True)
response.raise_for_status()
for line in response.iter_lines(decode_unicode=True):
if line:
yield line

def get_embeddings(self, params: dict) -> dict:
url = self.base_url + self.embeddings_endpoint
response = requests.post(url, json=params, headers=self.headers)
response.raise_for_status()
return response.json()

0 comments on commit 13e1a98

Please sign in to comment.