diff --git a/demos/README.md b/demos/README.md index e3ac1848..f9fa6aff 100644 --- a/demos/README.md +++ b/demos/README.md @@ -11,6 +11,12 @@ Lightweight on-device AI assistant with function calling (web search) using Gran - [Python-Binding-Example](./Agent-Granite/Python-Binding-Example) - [Serve-Example](./Agent-Granite/Serve-Example) +### 🔧 Function-Calling + +Function calling capabilities with NexaAI VLM model, integrated with Google Calendar via MCP protocol. Supports multi-modal input (text, image, audio) with Web UI and CLI interfaces. + +- [Demo](./function-calling) + ### 📚 RAG-LLM End-to-end Retrieval-Augmented Generation pipeline with embeddings, reranking, and generation models. Query your own documents (PDFs, Word, text) locally on device. diff --git a/demos/function-calling/.gitignore b/demos/function-calling/.gitignore new file mode 100644 index 00000000..bf5507b2 --- /dev/null +++ b/demos/function-calling/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.json +app/uploads/ \ No newline at end of file diff --git a/demos/function-calling/README.md b/demos/function-calling/README.md new file mode 100644 index 00000000..20a573e0 --- /dev/null +++ b/demos/function-calling/README.md @@ -0,0 +1,70 @@ +# NexaAI VLM Function Call Demo with Google Calendar MCP + +Demonstrates function calling capabilities of NexaAI/OmniNeural-4B model, integrated with Google Calendar via MCP protocol. + +## Features + +- Function calling with NexaAI VLM model +- Google Calendar integration via MCP +- Automatic function call parsing and execution +- Multi-modal input support (text, image, audio) +- Web UI and command-line interfaces + +## Prerequisites + +- **Python 3** (arm64 architecture recommended) + - See [bindings/python/notebook/windows(arm64).ipynb](../../bindings/python/notebook/windows(arm64).ipynb) for setup +- **Node.js and npm** (for MCP server) + ```powershell + winget install OpenJS.NodeJS.LTS + ``` + Restart terminal/IDE after installation. + +## Installation + +```bash +pip install -r requirements.txt +``` + +## Google Calendar Setup + +1. Go to [Google Cloud Console](https://console.cloud.google.com) +2. Create/select a project and enable [Google Calendar API](https://console.cloud.google.com/apis/library/calendar-json.googleapis.com) +3. Go to [OAuth consent screen](https://console.cloud.google.com/apis/credentials/consent) follow the instructions to configure your consent screen. +3. Create OAuth 2.0 credentials: + - Go to [Credentials](https://console.cloud.google.com/apis/credentials) + - Create "Create Credentials" > "OAuth client ID" > Select "Desktop app" > click "Create" + - Click "Download JSON" and save as `gcp-oauth.keys.json` on the same directory as this README.md +4. Add your email as a test user in [Audience](https://console.cloud.google.com/auth/audience) + - Click "Add User" > enter your email address > click "Save" + - Note: Test mode tokens expire after 1 week + +5. Authenication (only need to do once) +```powershell +$env:GOOGLE_OAUTH_CREDENTIALS="gcp-oauth.keys.json" +npx @cocal/google-calendar-mcp auth +``` +follow the instructions to authorize the application to access your Google Calendar. + +**Tip**: Ensure the OAuth client ID is enabled for Calendar API at [Credentials](https://console.cloud.google.com/apis/api/calendar-json.googleapis.com/credentials) +For detailed setup, see: https://github.com/nspady/google-calendar-mcp?tab=readme-ov-file#google-cloud-setup + +## Usage + +### Command Line + +```bash +# Text only +python main.py --text "what is the time now?" + +# Image with text +python main.py --image image.png --text "help me add this event to my calendar" + +# Audio with text +python main.py --audio audio.mp3 --text "transcribe and add to calendar" +``` +### Web UI + +```powershell +python .\app\flask_ui.py +``` diff --git a/demos/function-calling/app/flask_ui.py b/demos/function-calling/app/flask_ui.py new file mode 100644 index 00000000..7436d6b9 --- /dev/null +++ b/demos/function-calling/app/flask_ui.py @@ -0,0 +1,231 @@ +from flask import Flask, render_template, request, jsonify, send_from_directory +from pathlib import Path +from datetime import datetime +from image_utils import image_to_base64 +import uuid + +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) +from main import call_agent_wrapper, FunctionCallAgentResult + +app = Flask(__name__) +app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size +app.config['UPLOAD_FOLDER'] = Path(__file__).resolve().parent / 'uploads' +app.config['UPLOAD_FOLDER'].mkdir(parents=True, exist_ok=True) + +chat_history = [] +processing_tasks = {} + +def allowed_file(filename): + """check allowed file types""" + allowed_extensions = {'png', 'jpg', 'jpeg', 'gif', 'webp', 'bmp'} + return '.' in filename and filename.rsplit('.', 1)[1].lower() in allowed_extensions + +def process_message(text_content=None, image_path=None): + """handle user message""" + message = { + 'type': 'user', + 'timestamp': datetime.now().isoformat(), + 'text': text_content if text_content else None, + 'image': None + } + + if image_path: + try: + message['image'] = image_to_base64(image_path) + except Exception as e: + return None, f"Image handle error: {str(e)}" + + chat_history.append(message) + return message, None + +def add_bot_response(response_type='text', content=None): + bot_message = { + 'type': 'bot', + 'timestamp': datetime.now().isoformat(), + 'response_type': response_type, # 'text', 'event' + 'content': content + } + chat_history.append(bot_message) + return bot_message + +@app.route('/') +def index(): + """chat page""" + return render_template('chat.html') + +@app.route('/api/send-message', methods=['POST']) +def send_message(): + """ + handle user message + inputs: text and/or image + return: task_id + """ + try: + text_content = request.form.get('message', '').strip() + image_file = request.files.get('image') + + if not text_content and not image_file: + return jsonify({'error': 'Please provide text or image'}), 400 + + image_path = None + if image_file and image_file.filename: + if not allowed_file(image_file.filename): + return jsonify({'error': 'not allowed file'}), 400 + + # save uploaded image + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_') + filename = timestamp + image_file.filename + image_path = app.config['UPLOAD_FOLDER'] / filename + image_file.save(image_path) + + # handle user message + user_message, error = process_message(text_content, image_path) + if error: + return jsonify({'error': error}), 400 + + # create task id + task_id = str(uuid.uuid4()) + # register task so client can poll /api/get-response/ + processing_tasks[task_id] = { + 'status': 'pending', + 'user_text': text_content, + 'image_path': str(image_path) if image_path else None + } + + return jsonify({ + 'user_message': user_message, + 'task_id': task_id, + 'success': True + }) + + except Exception as e: + return jsonify({'error': f'Serve error: {str(e)}'}), 500 + +@app.route('/api/get-response/', methods=['GET']) +async def get_response(task_id): + if task_id in processing_tasks: + task = processing_tasks[task_id] + if task['status'] == 'done': + # clear task + del processing_tasks[task_id] + return jsonify(task['result']) + elif task['status'] == 'processing': + return jsonify({'status': 'processing'}) + elif task['status'] == 'pending': + # process now (synchronous handling for simplicity) + processing_tasks[task_id]['status'] = 'processing' + try: + response = await call_agent_wrapper( + text=task['user_text'], + image=task['image_path'] + ) + + if not response: + bot_response = add_bot_response( + response_type='text', + content="Sorry, I couldn't process your request." + ) + result = {'status': 'done', 'bot_response': bot_response} + processing_tasks[task_id]['status'] = 'done' + processing_tasks[task_id]['result'] = result + return jsonify(result) + + if response.func_name == "create-event" and response.func_result is not None: + import json + data = json.loads(response.func_result) + is_error = bool(data.get("isError")) + content = data.get("content") or [] + bot_response = None + if is_error: + bot_response = add_bot_response( + response_type='text', + content=content[0]["text"] + ) + else: + text = json.loads(content[0]["text"]) + event = text["event"] + print(f"[info] Event detected: {event}") + summary=event.get("summary", "No Title") + date = "N/A" + if event.get("start"): + start_time = event["start"].get("dateTime", "N/A") + date = start_time.split("T")[0] + start_time = start_time.split("T")[1] if "T" in start_time else "N/A" + else: + start_time = "N/A" + if event.get("end"): + end_time = event["end"].get("dateTime", "N/A") + end_time = end_time.split("T")[1] if "T" in end_time else "N/A" + else: + end_time = "N/A" + venue = event.get("location", "N/A") + description = event.get("description", summary) + address = event.get("address", "N/A") + htmlLink = event.get("htmlLink", "") + bot_response = add_bot_response( + response_type='event', + content={ + 'event_name': summary, + 'date': date, + 'start_time': start_time, + 'end_time': end_time, + 'venue': venue, + 'address': address, + 'description': description, + 'htmlLink': htmlLink + } + ) + result = {'status': 'done', 'bot_response': bot_response} + processing_tasks[task_id]['status'] = 'done' + processing_tasks[task_id]['result'] = result + # return and let client clear on next poll + return jsonify(result) + elif response.response_text is not None: + bot_response = add_bot_response( + response_type='text', + content=response.response_text + ) + result = {'status': 'done', 'bot_response': bot_response} + processing_tasks[task_id]['status'] = 'done' + processing_tasks[task_id]['result'] = result + return jsonify(result) + else: + bot_response = add_bot_response( + response_type='text', + content="Sorry, I couldn't process your request." + ) + result = {'status': 'done', 'bot_response': bot_response} + processing_tasks[task_id]['status'] = 'done' + processing_tasks[task_id]['result'] = result + return jsonify(result) + except Exception as e: + bot_response = add_bot_response( + response_type='text', + content=str(e) + ) + result = {'status': 'done', 'bot_response': bot_response} + processing_tasks[task_id]['status'] = 'done' + processing_tasks[task_id]['result'] = result + return jsonify(result) + + return jsonify({ + 'status': 'done' + }) + +@app.route('/api/chat-history', methods=['GET']) +def get_chat_history(): + return jsonify(chat_history) + +@app.route('/api/clear-history', methods=['POST']) +def clear_history(): + global chat_history + chat_history = [] + return jsonify({'success': True}) + +@app.route('/uploads/') +def serve_upload(filename): + return send_from_directory(app.config['UPLOAD_FOLDER'], filename) + +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0', port=3000) diff --git a/demos/function-calling/app/image_utils.py b/demos/function-calling/app/image_utils.py new file mode 100644 index 00000000..02b8ab24 --- /dev/null +++ b/demos/function-calling/app/image_utils.py @@ -0,0 +1,29 @@ + +import base64 +import os +from pathlib import Path +from typing import Union + + +def image_to_base64(image_path: Union[str, Path]) -> str: + image_path = Path(image_path) + if not image_path.exists(): + raise FileNotFoundError(f"Image not exists: {image_path}") + suffix = image_path.suffix.lower() + mime_types = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.bmp': 'image/bmp', + } + + if suffix not in mime_types: + raise ValueError(f"Not support: {suffix}") + + mime_type = mime_types[suffix] + with open(image_path, 'rb') as f: + image_data = f.read() + base64_data = base64.b64encode(image_data).decode('utf-8') + return f"data:{mime_type};base64,{base64_data}" \ No newline at end of file diff --git a/demos/function-calling/app/static/calendar.png b/demos/function-calling/app/static/calendar.png new file mode 100644 index 00000000..bdc9e327 Binary files /dev/null and b/demos/function-calling/app/static/calendar.png differ diff --git a/demos/function-calling/app/static/check.png b/demos/function-calling/app/static/check.png new file mode 100644 index 00000000..76e206ab Binary files /dev/null and b/demos/function-calling/app/static/check.png differ diff --git a/demos/function-calling/app/static/close.png b/demos/function-calling/app/static/close.png new file mode 100644 index 00000000..9168c609 Binary files /dev/null and b/demos/function-calling/app/static/close.png differ diff --git a/demos/function-calling/app/static/image.png b/demos/function-calling/app/static/image.png new file mode 100644 index 00000000..f5e0c957 Binary files /dev/null and b/demos/function-calling/app/static/image.png differ diff --git a/demos/function-calling/app/static/nexaai.png b/demos/function-calling/app/static/nexaai.png new file mode 100644 index 00000000..9f6f603b Binary files /dev/null and b/demos/function-calling/app/static/nexaai.png differ diff --git a/demos/function-calling/app/static/plus.png b/demos/function-calling/app/static/plus.png new file mode 100644 index 00000000..05c3359e Binary files /dev/null and b/demos/function-calling/app/static/plus.png differ diff --git a/demos/function-calling/app/static/send.png b/demos/function-calling/app/static/send.png new file mode 100644 index 00000000..5262b0a7 Binary files /dev/null and b/demos/function-calling/app/static/send.png differ diff --git a/demos/function-calling/app/templates/chat.html b/demos/function-calling/app/templates/chat.html new file mode 100644 index 00000000..66349f22 --- /dev/null +++ b/demos/function-calling/app/templates/chat.html @@ -0,0 +1,950 @@ + + + + + + NexaAI + + + +
+ +
+ Logo +
+ + + + + +
+ +
+
Hi!
+
How can I help?
+
+ + +
+ + +
+
+
+ +
+
+ + +
+ +
+
+
+
+ + + + diff --git a/demos/function-calling/image.png b/demos/function-calling/image.png new file mode 100644 index 00000000..f55ba9f2 Binary files /dev/null and b/demos/function-calling/image.png differ diff --git a/demos/function-calling/main.py b/demos/function-calling/main.py new file mode 100644 index 00000000..b5561544 --- /dev/null +++ b/demos/function-calling/main.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +"""NexaAI VLM Function Call Demo with Google Calendar MCP""" + +import asyncio +import json +import os +import sys +import argparse +import re +from dataclasses import dataclass +from typing import List, Dict, Any, Optional + +from nexaai import GenerationConfig, ModelConfig, VlmChatMessage, VlmContent, setup_logging +from nexaai.vlm import VLM +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + + +def mcp_tool_to_openai_format(tool) -> Dict[str, Any]: + """Convert MCP tool to OpenAI function calling format.""" + properties = {} + required = [] + + if tool.inputSchema and "properties" in tool.inputSchema: + for prop_name, prop_schema in tool.inputSchema["properties"].items(): + properties[prop_name] = { + "type": prop_schema.get("type", "string"), + "description": prop_schema.get("description", ""), + } + if tool.inputSchema.get("required") and prop_name in tool.inputSchema["required"]: + required.append(prop_name) + + return { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description or "", + "parameters": { + "type": "object", + "properties": properties, + "required": required, + } + } + } + + +async def get_mcp_tools(session: ClientSession) -> List[Dict[str, Any]]: + """Get tools from MCP server and convert to OpenAI format.""" + result = await session.list_tools() + return [mcp_tool_to_openai_format(tool) for tool in result.tools] + + +def normalize_tool_name(tool_name: str, available_tools: List[Dict[str, Any]]) -> str: + """Normalize tool name to match available tools.""" + name_mappings = { + "create_calendar_event": "create-event", + "create-event": "create-event", + "list_calendar_events": "list-events", + "list-events": "list-events", + "update_calendar_event": "update-event", + "update-event": "update-event", + "delete_calendar_event": "delete-event", + "delete-event": "delete-event", + "get_current_time": "get-current-time", + "get-current-time": "get-current-time", + } + + tool_names = [t.get("function", {}).get("name", "") for t in available_tools] + + if tool_name in name_mappings: + normalized = name_mappings[tool_name] + if normalized in tool_names: + return normalized + + if tool_name in tool_names: + return tool_name + + normalized = tool_name.replace("_", "-") + return normalized if normalized in tool_names else tool_name + + +async def execute_mcp_tool(session: ClientSession, tool_name: str, arguments: Dict[str, Any], + available_tools: Optional[List[Dict[str, Any]]] = None) -> str: + """Execute a tool call via MCP server.""" + try: + if available_tools: + tool_name = normalize_tool_name(tool_name, available_tools) + result = await session.call_tool(tool_name, arguments=arguments) + return result.model_dump_json(indent=2) + except Exception as e: + return f"Error: {str(e)}" + + +def create_calendar_server(credentials: str) -> StdioServerParameters: + """Create Google Calendar MCP server parameters.""" + if not os.path.exists(credentials): + raise FileNotFoundError( + f"Credentials file not found: {credentials}\n" + f"Please create the OAuth credentials file at: {os.path.abspath(credentials)}" + ) + return StdioServerParameters( + command="npx", + args=["-y", "@cocal/google-calendar-mcp"], + env={"GOOGLE_OAUTH_CREDENTIALS": os.path.abspath(credentials)}, + ) + + +def extract_function_call(text: str): + """Extract function call JSON from LLM response.""" + if not text: + return None + + text = re.sub(r"<\|[^|]+\|>", "", text.strip()) + + try: + parsed = json.loads(text) + if isinstance(parsed, dict) and "name" in parsed: + return parsed.get("name"), parsed.get("arguments", {}) + except json.JSONDecodeError: + json_start = text.find('{') + if json_start == -1: + return None + + brace_count = 0 + for i in range(json_start, len(text)): + if text[i] == '{': + brace_count += 1 + elif text[i] == '}': + brace_count -= 1 + if brace_count == 0: + json_str = text[json_start:i + 1] + try: + parsed = json.loads(json_str) + if isinstance(parsed, dict) and "name" in parsed: + return parsed.get("name"), parsed.get("arguments", {}) + except json.JSONDecodeError: + pass + break + + return None + + +def build_system_prompt(tools: list) -> str: + """Build system prompt from tool schemas.""" + tools_descriptions = [] + for t in tools: + func = t.get('function', {}) + name = func.get('name', '') + desc = func.get('description', '') + params = func.get('parameters', {}) + props = params.get('properties', {}) + required = params.get('required', []) + + param_list = [] + for param_name, param_info in props.items(): + param_type = param_info.get('type', 'string') + param_desc = param_info.get('description', '') + is_required = param_name in required + req_mark = " (required)" if is_required else "" + if param_desc: + param_list.append(f" {param_name} ({param_type}){req_mark}: {param_desc}") + else: + param_list.append(f" {param_name} ({param_type}){req_mark}") + + params_str = "\n".join(param_list) if param_list else " (no parameters)" + tools_descriptions.append(f"{name}: {desc}\n{params_str}") + + tools_list = "\n\n".join([f"{i+1}. {td}" for i, td in enumerate(tools_descriptions)]) + return f"""You are a calendar assistant. When the user requests calendar actions, respond with ONLY a JSON object in this format: + +{{"name": "function_name", "arguments": {{"param": "value"}}}} + +Available functions: +{tools_list} + +Rules: +- Output ONLY valid JSON, no other text +- Use exact function and parameter names (case-sensitive) +- Include all required parameters +- For calendar events, use create-event +- For current time, use get-current-time +""" + + +def _handle_function_call_error(error_text: str, func_args: Dict[str, Any]) -> bool: + """Handle function call errors and auto-fix parameters. Returns True if should retry.""" + # Auto-fix account errors + if "Account" in error_text and "not found" in error_text and "Available accounts:" in error_text: + match = re.search(r'Available accounts:\s*(\w+)', error_text) + if match: + func_args['account'] = match.group(1) + return True + elif 'account' in func_args: + del func_args['account'] + return True + + # Auto-fix eventId errors + if "Invalid event ID" in error_text or ("event ID" in error_text.lower() and "invalid" in error_text.lower()): + if 'eventId' in func_args: + del func_args['eventId'] + return True + + # Auto-remove optional parameters that cause errors + if "validation error" in error_text.lower() or "invalid" in error_text.lower(): + optional_params = ['account', 'eventId', 'timeZone', 'fields'] + for param in optional_params: + if param in func_args: + del func_args[param] + return True + + return False + + +async def _execute_with_retry(session: ClientSession, func_name: str, func_args: Dict[str, Any], + tools: List[Dict[str, Any]], max_retries: int = 3) -> str: + """Execute function call with automatic error handling and retry.""" + retry_count = 0 + func_result = "" + + while retry_count <= max_retries: + func_result = await execute_mcp_tool(session, func_name, func_args, tools) + + try: + result_data = json.loads(func_result) if isinstance(func_result, str) else func_result + if result_data.get('isError', False): + error_text = "" + if isinstance(result_data.get('content'), list): + for item in result_data['content']: + if item.get('type') == 'text': + error_text = item.get('text', '') + break + + if retry_count < max_retries and _handle_function_call_error(error_text, func_args): + retry_count += 1 + continue + break + except Exception: + break + + return func_result or "" + + +def init_vlm(tools: List[Dict[str, Any]]) -> VLM: + """Initialize VLM with tools.""" + system_prompt = build_system_prompt(tools) + print('[debug] system_prompt:', system_prompt) + return VLM.from_("NexaAI/OmniNeural-4B", config=ModelConfig( + system_prompt=system_prompt, + n_ctx=4096, n_threads=0, n_threads_batch=0, n_batch=0, + n_ubatch=0, n_seq_max=0, n_gpu_layers=999 + )) + + + +@dataclass +class FunctionCallAgentResult: + """Result of function call agent execution.""" + func_name: Optional[str] + func_result: Optional[str] + response_text: str + +async def call_agent( + vlm: VLM, + session: ClientSession, + tools: List[Dict[str, Any]], + text: Optional[str] = None, + image: Optional[str] = None, + audio: Optional[str] = None +) -> FunctionCallAgentResult: + if not text and not image and not audio: + raise ValueError("At least one of text, image, or audio must be provided") + + contents = [] + image_paths = [] + audio_paths = [] + + if image: + image_path = os.path.abspath(image) + if not os.path.exists(image_path): + raise FileNotFoundError(f"Image file not found: {image_path}") + image_paths.append(image_path) + contents.append(VlmContent(type="image", text=image_path)) + + if audio: + audio_path = os.path.abspath(audio) + if not os.path.exists(audio_path): + raise FileNotFoundError(f"Audio file not found: {audio_path}") + audio_paths.append(audio_path) + contents.append(VlmContent(type="audio", text=audio_path)) + + if text: + contents.append(VlmContent(type="text", text=text)) + + conversation = [VlmChatMessage(role="user", contents=contents)] + + # Generate initial response + prompt = vlm.apply_chat_template(conversation) + print('[debug] prompt:', prompt) + print('[debug] generate_stream...') + response_text = "" + for token in vlm.generate_stream(prompt, config=GenerationConfig( + max_tokens=2048, image_paths=image_paths or None, + audio_paths=audio_paths or None, image_max_length=512 + )): + print(token, end="", flush=True) + response_text += token + print() + print('[debug] response_text:', response_text) + func_call = extract_function_call(response_text) + if not func_call: + print(f"[error] Failed to extract function call from response") + return FunctionCallAgentResult( + func_name=None, + func_result=None, + response_text=response_text + ) + + func_name, func_args = func_call + if func_name and isinstance(func_name, str): + print('[debug] calling function:', func_name) + func_result = await _execute_with_retry(session, func_name, func_args, tools) + print('[debug] func_result:', func_result) + followup = conversation + [ + VlmChatMessage(role="user", contents=[VlmContent(type="text", + text=f"You called {func_name} with {func_args}. Result: {func_result}. " + f"Provide a natural language response. Do NOT call any function again.")]) + ] + followup_response = "" + for token in vlm.generate_stream( + vlm.apply_chat_template(followup, enable_thinking=False), + config=GenerationConfig(max_tokens=2048) + ): + followup_response += token + return FunctionCallAgentResult( + func_name=func_name, + func_result=func_result, + response_text=followup_response + ) + + return FunctionCallAgentResult( + func_name=None, + func_result=None, + response_text=response_text + ) + + +async def call_agent_wrapper( + text: Optional[str] = None, + image: Optional[str] = None, + audio: Optional[str] = None, + credentials: str = "gcp-oauth.keys.json" +) -> FunctionCallAgentResult: + setup_logging() + + if not text and not image and not audio: + raise ValueError("At least one of text, image, or audio must be provided") + + server = create_calendar_server(credentials) + async with stdio_client(server) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + tools = await get_mcp_tools(session) + tools = [t for t in tools if t.get('function', {}).get('name', '') in + ['create-event', 'get-current-time']] + + vlm = init_vlm(tools) + result = await call_agent(vlm, session, tools, text, image, audio) + return result + + +async def main(): + """Command-line interface for the agent.""" + setup_logging() + + parser = argparse.ArgumentParser() + parser.add_argument("--credentials", default="gcp-oauth.keys.json") + parser.add_argument("--text", help="Text input") + parser.add_argument("--image", help="Image file path") + parser.add_argument("--audio", help="Audio file path") + args = parser.parse_args() + + if not args.text and not args.image and not args.audio: + parser.print_help() + return + + server = create_calendar_server(args.credentials) + async with stdio_client(server) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + tools = await get_mcp_tools(session) + tools = [t for t in tools if t.get('function', {}).get('name', '') in + ['create-event', 'get-current-time']] + + vlm = init_vlm(tools) + result = await call_agent(vlm, session, tools, args.text, args.image, args.audio) + + if result.response_text: + print(result.response_text) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/demos/function-calling/requirements.txt b/demos/function-calling/requirements.txt new file mode 100644 index 00000000..0829fbc2 --- /dev/null +++ b/demos/function-calling/requirements.txt @@ -0,0 +1,3 @@ +nexaai==1.0.35rc11 +mcp +Flask[async] \ No newline at end of file