From a9226acf3057e0fbdcc3bb8b829756088ef953b2 Mon Sep 17 00:00:00 2001
From: Liam <liambai2000@gmail.com>
Date: Thu, 17 Oct 2024 21:33:34 -0400
Subject: [PATCH 1/2] Get logistic regression running on EC2

---
 plm_interpretability/sae_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/plm_interpretability/sae_model.py b/plm_interpretability/sae_model.py
index c79af51..9b8f1bd 100644
--- a/plm_interpretability/sae_model.py
+++ b/plm_interpretability/sae_model.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 from torch.nn import functional as F
 from transformers import PreTrainedModel, PreTrainedTokenizer
-from utils import get_layer_activations
+
+from plm_interpretability.utils import get_layer_activations
 
 
 class SparseAutoencoder(nn.Module):

From 9f16d594cefc33bc1ac2a649d9053590f344fe89 Mon Sep 17 00:00:00 2001
From: Liam <liambai2000@gmail.com>
Date: Thu, 17 Oct 2024 23:32:19 -0400
Subject: [PATCH 2/2] Add script

---
 .../scripts/run_all_probes.sh                 | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100755 plm_interpretability/scripts/run_all_probes.sh

diff --git a/plm_interpretability/scripts/run_all_probes.sh b/plm_interpretability/scripts/run_all_probes.sh
new file mode 100755
index 0000000..40b61d0
--- /dev/null
+++ b/plm_interpretability/scripts/run_all_probes.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Check if all required arguments are provided
+if [ $# -lt 3 ]; then
+    echo "Error: Insufficient arguments provided."
+    echo "Usage: $0 <path_to_checkpoint_file> <sae_dim> <plm_layer>"
+    exit 1
+fi
+
+# Assign arguments to variables
+checkpoint_file="$1"
+sae_dim="$2"
+plm_layer="$3"
+
+echo "Checkpoint file: $checkpoint_file"
+echo "SAE dimension: $sae_dim"
+echo "PLM layer: $plm_layer"
+
+
+# Check if swissprot_full_annotations.tsv exists. If not, download it.
+if [ ! -f "swissprot_full_annotations.tsv" ]; then
+    echo "swissprot_full_annotations.tsv not found. Downloading..."
+    gdown https://drive.google.com/uc?id=1TmbZGKt81Php8NT4s4OfbIwh05h-GJDS
+    echo "Download complete."
+else
+    echo "swissprot_full_annotations.tsv already exists. Skipping download."
+fi
+
+
+# Extract the base name of the checkpoint file
+checkpoint_file=$(basename "$1")
+
+# Remove the file extension
+checkpoint_name="${checkpoint_file%.*}"
+
+# Create the output directory
+output_dir="${checkpoint_name}_probe_results"
+mkdir -p "$output_dir"
+
+# Run the logistic regression probes
+logistic_regression_probe single-latent \
+    --sae-checkpoint $checkpoint_file \
+    --sae-dim $sae_dim \
+    --plm-dim 1280 \
+    --plm-layer $plm_layer \
+    --swissprot-tsv swissprot_full_annotations.tsv \
+    --output-dir single_latent_single_residue
+
+logistic_regression_probe single-latent \
+    --sae-checkpoint $checkpoint_file \
+    --sae-dim $sae_dim \
+    --plm-dim 1280 \
+    --plm-layer $plm_layer \
+    --swissprot-tsv swissprot_full_annotations.tsv \
+    --pool-over-annotation True \
+    --output-dir single_latent_pool_over_annotation
+
+logistic_regression_probe all-latents \
+    --sae-checkpoint $checkpoint_file \
+    --sae-dim $sae_dim \
+    --plm-dim 1280 \
+    --plm-layer $plm_layer \
+    --swissprot-tsv swissprot_full_annotations.tsv \
+    --pool-over-annotation True \
+    --output-dir all_latents
+
+echo "Finished running all probes. Results saved in $output_dir"