Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mlp softmax buzzer #6

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added feateng/.DS_Store
Binary file not shown.
86 changes: 86 additions & 0 deletions feateng/analyze_mlp_buzzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import pickle
import torch
import torch.nn as nn

# Define a utility to infer architecture from state_dict
def infer_architecture(state_dict):
hidden_dims = []
input_dim = None
for key, tensor in state_dict.items():
if "weight" in key: # Look at weight matrices
if input_dim is None:
input_dim = tensor.shape[1] # First layer input dimension
hidden_dims.append(tensor.shape[0]) # Layer output dimension
return input_dim, hidden_dims[:-1] # Exclude the final output layer

# Define the MLP model structure
class SampleMLP(nn.Module):
def __init__(self, input_dim, hidden_dims):
super(SampleMLP, self).__init__()
layers = []
prev_dim = input_dim
for hidden_dim in hidden_dims:
layers.append(nn.Linear(prev_dim, hidden_dim))
layers.append(nn.ReLU())
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, 1))
layers.append(nn.Sigmoid())
self.layers = nn.Sequential(*layers) # Directly define as layers

def forward(self, x):
return self.layers(x)

# Load the model from .pkl file
def load_model(file_path, input_dim, hidden_dims):
with open(file_path, 'rb') as f:
state_dict = pickle.load(f)

# Recreate the model structure
model = SampleMLP(input_dim, hidden_dims)

# Remap keys to match the model's architecture
remapped_state_dict = {f"layers.{k}": v for k, v in state_dict.items()}
model.load_state_dict(remapped_state_dict)
model.eval() # Set the model to evaluation mode
return model

# Analyze model weights
def analyze_model_weights(model):
analysis = {}
for name, param in model.named_parameters():
analysis[name] = {
"shape": tuple(param.shape),
"sample_values": param.detach().cpu().numpy().flatten()[:5].tolist() # Display first 5 values
}
return analysis

# Test inference
def test_inference(model, input_dim):
dummy_input = torch.rand((1, input_dim)) # Create a random input tensor
output = model(dummy_input)
return output

# Example usage
if __name__ == "__main__":
# Path to your .pkl file
model_file = "models/mlp_no_features.model.pkl" # Replace with the actual file path

# Load the state_dict and infer architecture
with open(model_file, 'rb') as f:
state_dict = pickle.load(f)
input_dim, hidden_dims = infer_architecture(state_dict)
print(f"Inferred Input Dimension: {input_dim}")
print(f"Inferred Hidden Dimensions: {hidden_dims}")

# Load the model
model = load_model(model_file, input_dim, hidden_dims)

# Analyze weights
weight_analysis = analyze_model_weights(model)
print("\nModel Weights Analysis:")
for layer, info in weight_analysis.items():
print(f"{layer}: Shape={info['shape']}, Sample Values={info['sample_values']}")

# Test inference
output = test_inference(model, input_dim)
print(f"\nSample Output from Inference: {output}")
7 changes: 7 additions & 0 deletions feateng/buzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@

from guesser import add_guesser_params
from features import LengthFeature
from features import ContextualMatchFeature
from features import FrequencyFeature
from features import PreviousGuessFeature
from features import CategoryFeature
from params import add_buzzer_params, add_question_params, load_guesser, load_buzzer, load_questions, add_general_params, setup_logging

def runs(text, run_length):
Expand Down Expand Up @@ -136,6 +140,9 @@ def featurize(self, question, run_text, guess_history, guesses=None):
features["%s_%s" % (ff.name, feat)] = val

assert guess is not None
print(run_text)
print(f"Guess: {guess}")
print(f"Features: {features}")
return guess, features

def finalize(self):
Expand Down
222 changes: 222 additions & 0 deletions feateng/compare_buzzers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
import itertools
import os
import subprocess
import sys
import pandas as pd
import json
import time
from datetime import datetime

LOSS_FUNCTIONS = {
"MLP": "BuzzLoss",
"LogisticBuzzer": "Logistic Loss",
}

# Define the features to use in generating the power set
features = ["Length", "Frequency", "Category", "ContextualMatch", "PreviousGuess"]

# DataFrame to store results
results_df = pd.DataFrame(columns=[
"Features", "Buzzer Type", "Filename Stem", "Loss Function", "Training Limit", "Testing Limit",
"Training Dataset", "Test Dataset", "Evaluation",
"best %", "timid %", "hit %", "close %", "miss %", "aggressive %", "waiting %",
"Questions Right", "Total", "Accuracy", "Buzz Ratio", "Buzz Position"
])

# Function to generate the filename stem based on the subset of features
def generate_filename_stem(subset, buzzer_type="LogisticBuzzer"):
buzzer_str = "logit" if buzzer_type == "LogisticBuzzer" else buzzer_type.lower()
if not subset:
return f"{buzzer_str}_no_features"
elif set(subset) == set(features):
return f"{buzzer_str}_with_all_features"
else:
return f"{buzzer_str}_with_" + "_".join(subset).lower()

# Function to validate JSON output
def validate_json_output(json_path):
try:
if not os.path.exists(json_path):
raise FileNotFoundError(f"Output JSON file not found: {json_path}")

if os.path.getsize(json_path) == 0:
raise ValueError(f"Output JSON file is empty: {json_path}")

with open(json_path, "r") as f:
data = json.load(f)
if not data:
raise ValueError(f"Output JSON file contains invalid or empty data: {json_path}")

return data

except (FileNotFoundError, ValueError, json.JSONDecodeError) as e:
return str(e) # Return error message for logging purposes

# Generate the power set of features
feature_subsets = list(itertools.chain.from_iterable(itertools.combinations(features, r) for r in range(len(features)+1)))

# Set values for the parameters
training_limit = 50
testing_limit = 25
training_dataset = "../data/qanta.buzztrain.json.gz"
test_dataset = "../data/qanta.buzzdev.json.gz"
evaluation = "buzzer"
guesser_model_train = "../models/buzztrain_gpt4o_cache"
guesser_model_test = "../models/buzzdev_gpt4o_cache"

# List of buzzer models
buzzer_models = ["MLP", "LogisticBuzzer"]
feature_subsets = [["Length", "Frequency", "Category", "ContextualMatch", "PreviousGuess"]]

# Main loop to iterate over buzzer models and feature subsets
for buzzer_type in buzzer_models:
print(f"Running for buzzer type: {buzzer_type}")

for subset in feature_subsets:

# Determine the filename stem based on the subset
filename_stem = generate_filename_stem(subset, buzzer_type)


# TRAINING SPECS
# Construct the `buzzer.py` command using sys.executable
buzzer_command = [
sys.executable, 'buzzer.py', '--guesser_type=Gpr', '--limit', str(training_limit),
'--GprGuesser_filename', guesser_model_train,
'--questions', training_dataset, '--buzzer_guessers', 'Gpr',
'--buzzer_type', buzzer_type
]



#TESTING SPECS
# Construct the `eval.py` command using sys.executable
output_json = f"summary/eval_output_{filename_stem}.json"
eval_command = [
sys.executable, 'eval.py', '--guesser_type=Gpr',
'--TfidfGuesser_filename=models/TfidfGuesser', '--limit', str(testing_limit),
'--questions', test_dataset, '--buzzer_guessers', 'Gpr',
'--GprGuesser_filename', guesser_model_test,
# '--LogisticBuzzer_filename=models/' + filename_stem,
'--evaluate', evaluation,
# '--buzzer_type', buzzer_type,
'--output_json', output_json # Include output_json flag to specify unique output
]
if buzzer_type == "MLP":
buzzer_filename_flag = ['--MLPBuzzer_filename=models/' + filename_stem]
buzzer_command.extend(buzzer_filename_flag)
eval_command.extend(buzzer_filename_flag)
else:
buzzer_filename_flag = ['--LogisticBuzzer_filename=models/' + filename_stem]
buzzer_command.extend(buzzer_filename_flag)
eval_command.extend(buzzer_filename_flag)


# Only add --features if subset is not empty
if subset:
feature_flag = ['--features'] + list(subset)
buzzer_command.extend(feature_flag)
eval_command.extend(feature_flag)

error_log_file = f"summary/error_log_{filename_stem}.txt"

try:
# Log start of commands
print(f"Running with feature subset: {subset} -> {filename_stem}")
time.sleep(1)
# Run the buzzer.py command
subprocess.run(buzzer_command, check=True)

# Add an explicit delay to ensure I/O has sufficient time to complete
time.sleep(2)

eval_output_log = f"evals/eval_output_{filename_stem}.txt"
with open(eval_output_log, "w") as out_f, open(error_log_file, "w") as err_f:
subprocess.run(eval_command, stdout=out_f, stderr=err_f, check=True)


# Add an explicit delay before checking output
time.sleep(2)

# Retry logic for validating the output
max_retries = 3
retry_delay = 2 # seconds
for attempt in range(max_retries):
validation_result = validate_json_output(output_json)
if isinstance(validation_result, dict):
# Successfully validated
eval_results = validation_result
break
else:
# Log the retry attempt
with open(error_log_file, "a") as err_f:
err_f.write(f"Attempt {attempt + 1}: {validation_result}\n")
time.sleep(retry_delay)
else:
# If all retries fail, raise an error
raise ValueError(f"Failed to validate JSON output after {max_retries} attempts: {output_json}")

loss_function = LOSS_FUNCTIONS.get(buzzer_type, "Unknown")

# Create a DataFrame for the new row
new_row_df = pd.DataFrame([{
"Features": list(subset),
"Buzzer Type": buzzer_type,
"Filename Stem": filename_stem,
"Loss Function": loss_function, # Include the loss function dynamically
"Training Limit": training_limit,
"Testing Limit": testing_limit,
"Training Dataset": training_dataset,
"Test Dataset": test_dataset,
"Evaluation": evaluation,
**eval_results["outcome_percentages"],
"Questions Right": eval_results["questions_right"],
"Total": eval_results["total"],
"Accuracy": eval_results["accuracy"],
"Buzz Ratio": eval_results["buzz_ratio"],
"Buzz Position": eval_results["buzz_position"]
}])

# Validate that the new row is not a duplicate of existing rows
columns_to_check = results_df.columns[results_df.columns.get_loc("waiting %"):]
if not results_df[columns_to_check].duplicated().any():
# Use pd.concat to add the new row to results_df
results_df = pd.concat([results_df, new_row_df], ignore_index=True)
else:
print(f"Warning: Duplicate row detected for subset {subset}. Skipping row addition.")

except Exception as e:
# Detailed error logging
with open(error_log_file, "a") as err_file:
err_file.write(f"Error for subset {subset}: {e}\n")
err_file.write(f"Buzzer command: {' '.join(buzzer_command)}\n")
err_file.write(f"Eval command: {' '.join(eval_command)}\n")
if os.path.exists(output_json) and os.path.getsize(output_json) > 0:
err_file.write("Output JSON file was partially written or corrupted.\n")
else:
err_file.write("Output JSON file was empty or not generated.\n")

print(f"Subset {subset} generated an exception: {e}. Check {error_log_file} for details.")
continue

# Export results
# Sort the DataFrame by descending order of Buzz Ratio
if not results_df.empty:
results_df = results_df.sort_values(by="Buzz Ratio", ascending=False)
columns_to_check = results_df.columns[results_df.columns.get_loc("waiting %"):]
output_stem = '_'.join(buzzer_models)
# Validate and remove duplicate rows
duplicates = results_df.duplicated(subset=columns_to_check, keep=False)
if duplicates.any():
print("Warning: Duplicate rows found in the CSV output.")
duplicate_rows = results_df[duplicates]
duplicate_log_path = f"summary/{output_stem}_duplicate_rows_log.csv"
duplicate_rows.to_csv(duplicate_log_path, index=False)
print(f"Duplicate rows have been saved to {duplicate_log_path}")

# Remove duplicates and save a new CSV without them
results_df.drop_duplicates(subset=columns_to_check, keep='first', inplace=True)

results_df.to_csv(f"summary/{output_stem}_eval_summary.csv", index=False)
else:
print("No results generated.")
Loading