From a9226acf3057e0fbdcc3bb8b829756088ef953b2 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 17 Oct 2024 21:33:34 -0400 Subject: [PATCH 1/2] Get logistic regression running on EC2 --- plm_interpretability/sae_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plm_interpretability/sae_model.py b/plm_interpretability/sae_model.py index c79af51..9b8f1bd 100644 --- a/plm_interpretability/sae_model.py +++ b/plm_interpretability/sae_model.py @@ -7,7 +7,8 @@ import torch.nn as nn from torch.nn import functional as F from transformers import PreTrainedModel, PreTrainedTokenizer -from utils import get_layer_activations + +from plm_interpretability.utils import get_layer_activations class SparseAutoencoder(nn.Module): From 9f16d594cefc33bc1ac2a649d9053590f344fe89 Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 17 Oct 2024 23:32:19 -0400 Subject: [PATCH 2/2] Add script --- .../scripts/run_all_probes.sh | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100755 plm_interpretability/scripts/run_all_probes.sh diff --git a/plm_interpretability/scripts/run_all_probes.sh b/plm_interpretability/scripts/run_all_probes.sh new file mode 100755 index 0000000..40b61d0 --- /dev/null +++ b/plm_interpretability/scripts/run_all_probes.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Check if all required arguments are provided +if [ $# -lt 3 ]; then + echo "Error: Insufficient arguments provided." + echo "Usage: $0 " + exit 1 +fi + +# Assign arguments to variables +checkpoint_file="$1" +sae_dim="$2" +plm_layer="$3" + +echo "Checkpoint file: $checkpoint_file" +echo "SAE dimension: $sae_dim" +echo "PLM layer: $plm_layer" + + +# Check if swissprot_full_annotations.tsv exists. If not, download it. +if [ ! -f "swissprot_full_annotations.tsv" ]; then + echo "swissprot_full_annotations.tsv not found. Downloading..." + gdown https://drive.google.com/uc?id=1TmbZGKt81Php8NT4s4OfbIwh05h-GJDS + echo "Download complete." +else + echo "swissprot_full_annotations.tsv already exists. Skipping download." +fi + + +# Extract the base name of the checkpoint file +checkpoint_file=$(basename "$1") + +# Remove the file extension +checkpoint_name="${checkpoint_file%.*}" + +# Create the output directory +output_dir="${checkpoint_name}_probe_results" +mkdir -p "$output_dir" + +# Run the logistic regression probes +logistic_regression_probe single-latent \ + --sae-checkpoint $checkpoint_file \ + --sae-dim $sae_dim \ + --plm-dim 1280 \ + --plm-layer $plm_layer \ + --swissprot-tsv swissprot_full_annotations.tsv \ + --output-dir single_latent_single_residue + +logistic_regression_probe single-latent \ + --sae-checkpoint $checkpoint_file \ + --sae-dim $sae_dim \ + --plm-dim 1280 \ + --plm-layer $plm_layer \ + --swissprot-tsv swissprot_full_annotations.tsv \ + --pool-over-annotation True \ + --output-dir single_latent_pool_over_annotation + +logistic_regression_probe all-latents \ + --sae-checkpoint $checkpoint_file \ + --sae-dim $sae_dim \ + --plm-dim 1280 \ + --plm-layer $plm_layer \ + --swissprot-tsv swissprot_full_annotations.tsv \ + --pool-over-annotation True \ + --output-dir all_latents + +echo "Finished running all probes. Results saved in $output_dir"