Skip to content

Commit

Permalink
Merge pull request #17 from hjvogel/main
Browse files Browse the repository at this point in the history
fixed data_writer and MD data framing
  • Loading branch information
seanchatmangpt authored Aug 2, 2024
2 parents efec925 + 54736b0 commit 27fc30e
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 74 deletions.
28 changes: 18 additions & 10 deletions src/dspygen/experiments/done/chatbots.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
from dspygen.modules.gen_keyword_arguments_module import invoke
from dspygen.utils.dspy_tools import init_dspy


def main():
chat("bot: Hello world")

from dspygen.utils.dspy_tools import init_dspy, init_ol

def chat(message: str):
response = chat(invoke(chat, f"{message}\nbot:"))
print(response)
# Function to handle the chat logic
print(f"Received message: {message}")
return f"Response to: {message}"

def invoke_response(message: str):
# Provide the prompt as a formatted string
prompt = f"{message}\nbot:"
print(f"Generated prompt: {prompt}") # Debug statement to check prompt
try:
response = invoke(chat, prompt=prompt)
except ValueError as e:
print(f"Error while invoking: {e}") # Catch and print the error
raise
return response

def main():
response = invoke_response("bot: Hello world")
print(response)

if __name__ == '__main__':
init_dspy()

init_ol()
main()
4 changes: 2 additions & 2 deletions src/dspygen/experiments/done/code_generator_agent.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from dspygen.modules.gen_keyword_arguments_module import invoke
from dspygen.modules.python_expert_module import python_expert_call
from dspygen.utils.dspy_tools import init_dspy
from dspygen.utils.dspy_tools import init_ol


def main():
init_dspy()
init_ol()
result = invoke(python_expert_call, "User Story: FastAPI CRUD routes for Fire Alarm IoT")
print(result)

Expand Down
6 changes: 4 additions & 2 deletions src/dspygen/experiments/self_coding/interview_processing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import dspy

from dspygen.lm.groq_lm import Groq
from dspygen.utils.dspy_tools import init_dspy
from dspygen.utils.dspy_tools import init_dspy, init_ol

class ContextEstablishment(dspy.Signature):
"""Sets the stage for the interaction, providing necessary background."""
Expand Down Expand Up @@ -33,10 +33,12 @@ class FeedbackAndRetry(dspy.Signature):

def main2():
"""Main function"""
init_dspy(lm_class=Groq, max_tokens=1000, model="llama3-70b-8192") # for Groq you must pass the Groq existing model
#init_dspy(lm_class=Groq, max_tokens=1000, model="llama3-70b-8192") # for Groq you must pass the Groq existing model
init_ol()

story = ("You are a software engineer preparing for a technical interview. "
"You have been given a coding challenge to solve. The challenge involves a NuxtJS frontend with a Convex API backend. ")
print(story)

# Establish the context for the interaction
context = dspy.ChainOfThought(ContextEstablishment)(story=story).context
Expand Down
2 changes: 1 addition & 1 deletion src/dspygen/lm/groq_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, model=default_model, **kwargs): #model="mixtral-8x7b-32768",
#model="llama3-70b-8192" # this is a fix cs somewhere the the model getting still set to openai gpt-3.5-turbo-instruct
super().__init__(model)

print("Groq model used today: " + model)
#print("Groq model used today: " + model)
self.provider = "default"
self.history = []
groq_api_key = os.environ.get("GROQ_API_KEY")
Expand Down
4 changes: 2 additions & 2 deletions src/dspygen/lm/ollama_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, model=default_ollama_model, **kwargs):
super().__init__(model)

# Print which model is being used
print("Ollama model used today: " + model)
#print("Ollama model used today: " + model)
self.provider = "default"
self.history = []

Expand All @@ -41,7 +41,7 @@ def __call__(self, prompt, **kwargs):
# Main function to initialize dspy with Ollama and run a prediction
def main():
# Initialize dspy with the Ollama class and specified model
init_dspy(Ollama, model=default_ollama_model, max_tokens=8000)
init_dspy(lm_class=Ollama, model=default_ollama_model, max_tokens=8000)

# Generate prediction for a specific prompt
pred = dspy.Predict("prompt -> code")(prompt="Fast API CRUD endpoint for fire alarm global IoT network")
Expand Down
9 changes: 5 additions & 4 deletions src/dspygen/modules/blog_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ async def blog_route(data: dict):
def main():
#init_dspy(lm_class=Groq, model="llama3-70b-8192", max_tokens=8000) # with Groq you must set the model!
#init_ol("codellama:python", max_tokens=12000)
init_ol("phi3:medium", max_tokens=5000 , timeout=500)
init_ol( max_tokens=5000 , timeout=500)

#init_dspy(Ollama, model="llama3:8b-instruct-q5_1", max_tokens=8000) # with Ollama you must set the model! -- llama3:70b-instruct ollama run llama3:70b-instruct-q3_K_M
subject = "The Tetris Game, simple but working : in 100 lines" # 300 did not end ok with ollama mistral
subject = "The Qix Atari Arcade Game logic , simple but working : in 100 lines" # 300 did not end ok with ollama mistral
#( pls do not run into those issues here: TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType')"
print(blog_call(subject=subject))
data = blog_call(subject=subject)
print(data)
# manually created the output to src\dspygen\experiments\blog\Tetris_1.md
data_writer(data=subject, file_path="./Tetris_Blog_Phi3Med.md",)
data_writer.DataWriter(data=data, file_path="./data/Qix_Atari_Blog_qwen2_7b-instruct.md",).forward()


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions src/dspygen/tutorials/natural_lang_to_sql/nl_to_sql.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import dspy

from dspygen.utils.dspy_tools import init_dspy
from dspygen.utils.dspy_tools import init_ol

init_dspy()
#init_ol() - breaks auto poe tests >> TBD move into main


class NLtoSQL(dspy.Signature):
Expand Down
2 changes: 1 addition & 1 deletion src/dspygen/utils/dspy_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def init_dspy(model: str = "gpt-3.5-turbo-instruct", lm_class=dspy.OpenAI, max_t
return lm


def init_ol(model: str = "phi3:instruct", base_url="http://localhost:11434", max_tokens: int = 800, lm_instance=None, lm_class=dspy.OllamaLocal, timeout=100, temperature=0.6):
def init_ol(model: str = "qwen2:7b-instruct", base_url="http://localhost:11434", max_tokens: int = 800, lm_instance=None, lm_class=dspy.OllamaLocal, timeout=100, temperature=0.6):
if lm_instance:
dspy.settings.configure(lm=lm_instance)
return lm_instance
Expand Down
104 changes: 54 additions & 50 deletions src/dspygen/writer/data_writer.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
import os

from dspygen.utils.pydantic_tools import InstanceMixin

import pandas as pd
from pydantic import BaseModel, Field
from io import StringIO

class DataWriter:
def __init__(self, data, file_path="", write_options=None):
print("init " + file_path)

if write_options is None:
write_options = {}
self.file_path = file_path
self.df = pd.DataFrame(data)

# Determine file extension
_, file_extension = os.path.splitext(self.file_path)
file_extension = file_extension.lower()

# Handle different data formats
if file_extension == '.csv':
if isinstance(data, dict) and all(isinstance(v, list) for v in data.values()):
self.df = pd.DataFrame(data)
else:
raise ValueError("For CSV files, data must be a dictionary of lists.")
elif file_extension == '.md':
if isinstance(data, str):
self.md_content = data
else:
raise ValueError("For Markdown files, data must be a string.")
else:
raise ValueError(f"Unsupported file type: {file_extension}")

self.write_options = write_options

def get_file_path(self):
context_generator = DataFrameContextGenerator()
context_string = context_generator.generate_context(self.df)
context_string = context_generator.generate_context(self.df) if hasattr(self, 'df') else ""

inst = FileNameModel.to_inst("Create a filename that fits \n" + context_string)
return inst.file_name
Expand All @@ -25,27 +45,32 @@ def forward(self, **kwargs):
_, file_extension = os.path.splitext(self.file_path)
file_extension = file_extension.lower()

write_functions = {
'.csv': self.df.to_csv,
# Add more mappings for different file types
}

if file_extension in write_functions:
write_function = write_functions[file_extension]
if file_extension == '.csv':
write_functions = {
'.csv': self.df.to_csv,
# Add more mappings for different file types
}
print("write " + self.file_path)
if file_extension in write_functions:
write_function = write_functions[file_extension]
try:
write_function(self.file_path, **self.write_options)
except Exception as e:
raise ValueError(f"Failed to write to {self.file_path} due to: {e}")
else:
raise ValueError(f"Unsupported file type: {file_extension}")

elif file_extension == '.md':
print("write " + self.file_path)
try:
write_function(self.file_path, **self.write_options)
with open(self.file_path, 'w') as file:
file.write(self.md_content)
except Exception as e:
raise ValueError(f"Failed to write to {self.file_path} due to: {e}")

else:
raise ValueError(f"Unsupported file type: {file_extension}")


from pydantic import BaseModel, Field
import pandas as pd
from typing import List, Dict, Any
from io import StringIO


class DataFrameContextGenerator(BaseModel):
descriptive_stats: bool = True
dtypes_info: bool = True
Expand All @@ -55,64 +80,43 @@ class Config:
arbitrary_types_allowed = True

def generate_context(self, df) -> str:
# Convert the input data to a pandas DataFrame

# Initialize a buffer for DataFrame info
buffer = StringIO()
df.info(buf=buffer)
info_str = buffer.getvalue()

context_parts = []

# Optionally include descriptive statistics
if self.descriptive_stats:
desc_stats = df.describe().to_string()
context_parts.append(desc_stats)

# Optionally include data types information
if self.dtypes_info:
dtypes_str = df.dtypes.to_string()
context_parts.append(dtypes_str)

# Concatenate all parts to form the complete context
context = "\n".join(context_parts)
self.context = context
return self.context


class FileNameModel(BaseModel, InstanceMixin):
class FileNameModel(BaseModel):
file_name: str = Field(..., description="Unique CSV filename based on the data provided.")
extension: str = Field("csv", description="File extension for the output file.")


def main():
from dspygen.utils.dspy_tools import init_dspy
init_dspy()
# Example Usage
# data = [
# {'Date': '2023-01-01', 'Temperature': 22, 'Humidity': 80},
# {'Date': '2023-01-02', 'Temperature': 25, 'Humidity': 75},
# {'Date': '2023-01-03', 'Temperature': 21, 'Humidity': 85},
# ]

data = {
# Example Usage for CSV
data_csv = {
'Book Title': ['The Great Gatsby', '1984', 'Brave New World', 'The Catcher in the Rye'],
'Author': ['F. Scott Fitzgerald', 'George Orwell', 'Aldous Huxley', 'J.D. Salinger'],
'Price': [10.99, 9.99, 8.99, 12.99],
'Sold Copies': [500, 800, 650, 450]
}
writer_csv = DataWriter(file_path="./data/Book_Title_Author_Price_Sold_Copies.csv", data=data_csv)
writer_csv.forward()

# DataWriter(data).forward()
from dspygen.rm.data_retriever import DataRetriever
print(DataRetriever("/Users/sac/dev/dspygen/src/dspygen/writer/Book_Title_Author_Price_Sold_Copies.csv").forward())

# Example Usage for Markdown
data_md = "# Book List\n\n- The Great Gatsby\n- 1984\n- Brave New World\n- The Catcher in the Rye"
writer_md = DataWriter(file_path="./data/Tetris_Blog_Phi3Med.md", data=data_md)
writer_md.forward()

# Usage example
if __name__ == "__main__":
main()
# file_path = 'output_data.csv'
# data = pd.DataFrame({'id': [1, 2], 'value': ['A', 'B']})
# writer = DataWriter(file_path, data)
# writer.write()
#

0 comments on commit 27fc30e

Please sign in to comment.