Merge pull request #72 from ghd3v/main

init27 · web-flow · commit 69db1a2d749d · 2025-09-29T12:46:08.000-07:00
adding required provider; keeping default as vllm but adding sample a…
diff --git a/use-cases/adding_reasoning_to_llama_3/cot_tools_config.yaml b/use-cases/adding_reasoning_to_llama_3/cot_tools_config.yaml
@@ -1,11 +1,23 @@
 # Custom configuration for tool use Chain of Thought enhancement
+llm:
+  # Provider selection: "vllm" or "api-endpoint"
+  provider: "vllm"
 
 vllm:
   api_base: "http://localhost:8000/v1"
   model: "unsloth/Meta-Llama-3.3-70B-Instruct"
   max_retries: 3
   retry_delay: 1.0
 
+# API endpoint configuration
+api-endpoint:
+  api_base: "https://api.llama.com/v1" # Optional base URL for API endpoint (null for default API)
+  api_key: "llama_api_key"               # API key for API endpoint or compatible service (can use env var instead)
+  model: "Llama-4-Maverick-17B-128E-Instruct-FP8" # Default model to use
+  max_retries: 3                       # Number of retries for API calls
+  retry_delay: 1.0                     # Initial delay between retries (seconds)
+  sleep_time: 0.5                      # Small delay in seconds between batches to avoid rate limits
+
 generation:
   temperature: 0.2   # Lower temperature for more consistent reasoning
   top_p: 0.95
@@ -14,7 +26,7 @@ generation:
 # The most important part - our custom Chain of Thought prompt
 prompts:
   cot_enhancement: |
-        You are a highly intelligent AI with an IQ of 170, and your job is to enhance existing conversation examples. Remember to return the entire conversation as is, BUT
+    You are a highly intelligent AI with an IQ of 170, and your job is to enhance existing conversation examples. Remember to return the entire conversation as is,
     BUT, we will add Chain of Thought and planning to "Assistant" messages whenever they return a tool call.
     Remember, ONLY when an assistant message returns a tool call will we add thinking and reasoning traces before it to add logic. Otherwise, we don't touch the conversation history.
     Remember to return the entire message, but only enhance the assistant messages whenever a tool is called in the conversation by adding thoughts.