Merge pull request #17 from hjvogel/main

fixed data_writer and MD data framing
seanchatmangpt · Aug 2, 2024 · 27fc30e · 27fc30e
2 parents efec925 + 54736b0
commit 27fc30e
Show file tree

Hide file tree

Showing 9 changed files with 89 additions and 74 deletions.
diff --git a/src/dspygen/experiments/done/chatbots.py b/src/dspygen/experiments/done/chatbots.py
@@ -1,18 +1,26 @@
 from dspygen.modules.gen_keyword_arguments_module import invoke
-from dspygen.utils.dspy_tools import init_dspy
-
-
-def main():
-    chat("bot: Hello world")
-
+from dspygen.utils.dspy_tools import init_dspy, init_ol
 
 def chat(message: str):
-    response = chat(invoke(chat, f"{message}\nbot:"))
-    print(response)
+    # Function to handle the chat logic
+    print(f"Received message: {message}")
+    return f"Response to: {message}"
+
+def invoke_response(message: str):
+    # Provide the prompt as a formatted string
+    prompt = f"{message}\nbot:"
+    print(f"Generated prompt: {prompt}")  # Debug statement to check prompt
+    try:
+        response = invoke(chat, prompt=prompt)
+    except ValueError as e:
+        print(f"Error while invoking: {e}")  # Catch and print the error
+        raise
     return response
 
+def main():
+    response = invoke_response("bot: Hello world")
+    print(response)
 
 if __name__ == '__main__':
-    init_dspy()
-
+    init_ol()
     main()
diff --git a/src/dspygen/experiments/done/code_generator_agent.py b/src/dspygen/experiments/done/code_generator_agent.py
@@ -1,10 +1,10 @@
 from dspygen.modules.gen_keyword_arguments_module import invoke
 from dspygen.modules.python_expert_module import python_expert_call
-from dspygen.utils.dspy_tools import init_dspy
+from dspygen.utils.dspy_tools import init_ol
 
 
 def main():
-    init_dspy()
+    init_ol()
     result = invoke(python_expert_call, "User Story: FastAPI CRUD routes for Fire Alarm IoT")
     print(result)
 

diff --git a/src/dspygen/experiments/self_coding/interview_processing.py b/src/dspygen/experiments/self_coding/interview_processing.py
@@ -1,7 +1,7 @@
 import dspy
 
 from dspygen.lm.groq_lm import Groq
-from dspygen.utils.dspy_tools import init_dspy
+from dspygen.utils.dspy_tools import init_dspy, init_ol
 
 class ContextEstablishment(dspy.Signature):
     """Sets the stage for the interaction, providing necessary background."""
@@ -33,10 +33,12 @@ class FeedbackAndRetry(dspy.Signature):
 
 def main2():
     """Main function"""
-    init_dspy(lm_class=Groq, max_tokens=1000, model="llama3-70b-8192")  # for Groq you must pass the Groq existing model
+    #init_dspy(lm_class=Groq, max_tokens=1000, model="llama3-70b-8192")  # for Groq you must pass the Groq existing model
+    init_ol()
 
     story = ("You are a software engineer preparing for a technical interview. "
              "You have been given a coding challenge to solve. The challenge involves a NuxtJS frontend with a Convex API backend. ")
+    print(story)
 
     # Establish the context for the interaction
     context = dspy.ChainOfThought(ContextEstablishment)(story=story).context

diff --git a/src/dspygen/lm/groq_lm.py b/src/dspygen/lm/groq_lm.py
@@ -15,7 +15,7 @@ def __init__(self, model=default_model, **kwargs):  #model="mixtral-8x7b-32768",
         #model="llama3-70b-8192" # this is a fix cs somewhere the the model getting still set to openai gpt-3.5-turbo-instruct
         super().__init__(model)
 
-        print("Groq model used today: " + model)
+        #print("Groq model used today: " + model)
         self.provider = "default"
         self.history = []
         groq_api_key = os.environ.get("GROQ_API_KEY")

diff --git a/src/dspygen/lm/ollama_lm.py b/src/dspygen/lm/ollama_lm.py
@@ -16,7 +16,7 @@ def __init__(self, model=default_ollama_model, **kwargs):
         super().__init__(model)
 
         # Print which model is being used
-        print("Ollama model used today: " + model)
+        #print("Ollama model used today: " + model)
         self.provider = "default"
         self.history = []
 
@@ -41,7 +41,7 @@ def __call__(self, prompt, **kwargs):
 # Main function to initialize dspy with Ollama and run a prediction
 def main():
     # Initialize dspy with the Ollama class and specified model
-    init_dspy(Ollama, model=default_ollama_model, max_tokens=8000)
+    init_dspy(lm_class=Ollama, model=default_ollama_model, max_tokens=8000)
 
     # Generate prediction for a specific prompt
     pred = dspy.Predict("prompt -> code")(prompt="Fast API CRUD endpoint for fire alarm global IoT network")

diff --git a/src/dspygen/modules/blog_module.py b/src/dspygen/modules/blog_module.py
@@ -66,14 +66,15 @@ async def blog_route(data: dict):
 def main():
     #init_dspy(lm_class=Groq, model="llama3-70b-8192", max_tokens=8000) # with Groq you must set the model!
     #init_ol("codellama:python", max_tokens=12000)
-    init_ol("phi3:medium", max_tokens=5000 , timeout=500)
+    init_ol( max_tokens=5000 , timeout=500)
 
     #init_dspy(Ollama, model="llama3:8b-instruct-q5_1", max_tokens=8000) # with Ollama you must set the model! -- llama3:70b-instruct ollama run llama3:70b-instruct-q3_K_M
-    subject = "The Tetris Game, simple but working : in 100 lines" # 300 did not end ok with ollama mistral
+    subject = "The Qix Atari Arcade  Game logic , simple but working : in 100 lines" # 300 did not end ok with ollama mistral
     #( pls do not run into those issues here: TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType')"
-    print(blog_call(subject=subject))
+    data = blog_call(subject=subject)
+    print(data)
     # manually created the output to src\dspygen\experiments\blog\Tetris_1.md
-    data_writer(data=subject, file_path="./Tetris_Blog_Phi3Med.md",)
+    data_writer.DataWriter(data=data, file_path="./data/Qix_Atari_Blog_qwen2_7b-instruct.md",).forward()
 
 
 if __name__ == "__main__":

diff --git a/src/dspygen/tutorials/natural_lang_to_sql/nl_to_sql.py b/src/dspygen/tutorials/natural_lang_to_sql/nl_to_sql.py
@@ -1,8 +1,8 @@
 import dspy
 
-from dspygen.utils.dspy_tools import init_dspy
+from dspygen.utils.dspy_tools import init_ol
 
-init_dspy()
+#init_ol() - breaks auto poe tests  >> TBD move into main
 
 
 class NLtoSQL(dspy.Signature):

diff --git a/src/dspygen/utils/dspy_tools.py b/src/dspygen/utils/dspy_tools.py
@@ -11,7 +11,7 @@ def init_dspy(model: str = "gpt-3.5-turbo-instruct", lm_class=dspy.OpenAI, max_t
         return lm
 
 
-def init_ol(model: str = "phi3:instruct", base_url="http://localhost:11434", max_tokens: int = 800, lm_instance=None, lm_class=dspy.OllamaLocal, timeout=100, temperature=0.6):
+def init_ol(model: str = "qwen2:7b-instruct", base_url="http://localhost:11434", max_tokens: int = 800, lm_instance=None, lm_class=dspy.OllamaLocal, timeout=100, temperature=0.6):
     if lm_instance:
         dspy.settings.configure(lm=lm_instance)
         return lm_instance

diff --git a/src/dspygen/writer/data_writer.py b/src/dspygen/writer/data_writer.py
@@ -1,19 +1,39 @@
 import os
-
-from dspygen.utils.pydantic_tools import InstanceMixin
-
+import pandas as pd
+from pydantic import BaseModel, Field
+from io import StringIO
 
 class DataWriter:
     def __init__(self, data, file_path="", write_options=None):
+        print("init " + file_path)
+
         if write_options is None:
             write_options = {}
         self.file_path = file_path
-        self.df = pd.DataFrame(data)
+
+        # Determine file extension
+        _, file_extension = os.path.splitext(self.file_path)
+        file_extension = file_extension.lower()
+
+        # Handle different data formats
+        if file_extension == '.csv':
+            if isinstance(data, dict) and all(isinstance(v, list) for v in data.values()):
+                self.df = pd.DataFrame(data)
+            else:
+                raise ValueError("For CSV files, data must be a dictionary of lists.")
+        elif file_extension == '.md':
+            if isinstance(data, str):
+                self.md_content = data
+            else:
+                raise ValueError("For Markdown files, data must be a string.")
+        else:
+            raise ValueError(f"Unsupported file type: {file_extension}")
+
         self.write_options = write_options
 
     def get_file_path(self):
         context_generator = DataFrameContextGenerator()
-        context_string = context_generator.generate_context(self.df)
+        context_string = context_generator.generate_context(self.df) if hasattr(self, 'df') else ""
 
         inst = FileNameModel.to_inst("Create a filename that fits \n" + context_string)
         return inst.file_name
@@ -25,27 +45,32 @@ def forward(self, **kwargs):
         _, file_extension = os.path.splitext(self.file_path)
         file_extension = file_extension.lower()
 
-        write_functions = {
-            '.csv': self.df.to_csv,
-            # Add more mappings for different file types
-        }
-
-        if file_extension in write_functions:
-            write_function = write_functions[file_extension]
+        if file_extension == '.csv':
+            write_functions = {
+                '.csv': self.df.to_csv,
+                # Add more mappings for different file types
+            }
+            print("write " + self.file_path)
+            if file_extension in write_functions:
+                write_function = write_functions[file_extension]
+                try:
+                    write_function(self.file_path, **self.write_options)
+                except Exception as e:
+                    raise ValueError(f"Failed to write to {self.file_path} due to: {e}")
+            else:
+                raise ValueError(f"Unsupported file type: {file_extension}")
+
+        elif file_extension == '.md':
+            print("write " + self.file_path)
             try:
-                write_function(self.file_path, **self.write_options)
+                with open(self.file_path, 'w') as file:
+                    file.write(self.md_content)
             except Exception as e:
                 raise ValueError(f"Failed to write to {self.file_path} due to: {e}")
+
         else:
             raise ValueError(f"Unsupported file type: {file_extension}")
 
-
-from pydantic import BaseModel, Field
-import pandas as pd
-from typing import List, Dict, Any
-from io import StringIO
-
-
 class DataFrameContextGenerator(BaseModel):
     descriptive_stats: bool = True
     dtypes_info: bool = True
@@ -55,64 +80,43 @@ class Config:
         arbitrary_types_allowed = True
 
     def generate_context(self, df) -> str:
-        # Convert the input data to a pandas DataFrame
-
-        # Initialize a buffer for DataFrame info
         buffer = StringIO()
         df.info(buf=buffer)
         info_str = buffer.getvalue()
 
         context_parts = []
 
-        # Optionally include descriptive statistics
         if self.descriptive_stats:
             desc_stats = df.describe().to_string()
             context_parts.append(desc_stats)
 
-        # Optionally include data types information
         if self.dtypes_info:
             dtypes_str = df.dtypes.to_string()
             context_parts.append(dtypes_str)
 
-        # Concatenate all parts to form the complete context
         context = "\n".join(context_parts)
         self.context = context
         return self.context
 
-
-class FileNameModel(BaseModel, InstanceMixin):
+class FileNameModel(BaseModel):
     file_name: str = Field(..., description="Unique CSV filename based on the data provided.")
     extension: str = Field("csv", description="File extension for the output file.")
 
-
 def main():
-    from dspygen.utils.dspy_tools import init_dspy
-    init_dspy()
-    # Example Usage
-    # data = [
-    #     {'Date': '2023-01-01', 'Temperature': 22, 'Humidity': 80},
-    #     {'Date': '2023-01-02', 'Temperature': 25, 'Humidity': 75},
-    #     {'Date': '2023-01-03', 'Temperature': 21, 'Humidity': 85},
-    # ]
-
-    data = {
+    # Example Usage for CSV
+    data_csv = {
         'Book Title': ['The Great Gatsby', '1984', 'Brave New World', 'The Catcher in the Rye'],
         'Author': ['F. Scott Fitzgerald', 'George Orwell', 'Aldous Huxley', 'J.D. Salinger'],
         'Price': [10.99, 9.99, 8.99, 12.99],
         'Sold Copies': [500, 800, 650, 450]
     }
+    writer_csv = DataWriter(file_path="./data/Book_Title_Author_Price_Sold_Copies.csv", data=data_csv)
+    writer_csv.forward()
 
-    # DataWriter(data).forward()
-    from dspygen.rm.data_retriever import DataRetriever
-    print(DataRetriever("/Users/sac/dev/dspygen/src/dspygen/writer/Book_Title_Author_Price_Sold_Copies.csv").forward())
-
+    # Example Usage for Markdown
+    data_md = "# Book List\n\n- The Great Gatsby\n- 1984\n- Brave New World\n- The Catcher in the Rye"
+    writer_md = DataWriter(file_path="./data/Tetris_Blog_Phi3Med.md", data=data_md)
+    writer_md.forward()
 
-# Usage example
 if __name__ == "__main__":
     main()
-#     file_path = 'output_data.csv'
-#     data = pd.DataFrame({'id': [1, 2], 'value': ['A', 'B']})
-#     writer = DataWriter(file_path, data)
-#     writer.write()
-#
-