Skip to content

Commit

Permalink
Clean up SAE configs and add HuggingFace links
Browse files Browse the repository at this point in the history
  • Loading branch information
liambai committed Feb 15, 2025
1 parent 87d381c commit 285ad51
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 28 deletions.
60 changes: 38 additions & 22 deletions viz/src/SAEConfigs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ export type CuratedFeature = {

export type SAEConfig = {
storagePath: string;
description: string;
baseModel: string;
huggingFaceModelName?: string;
trainingData: string;
numHiddenDims: number;
plmLayer: number;
searchExamples?: { [key: string]: AminoAcidSequence | PDBID };
Expand All @@ -28,11 +30,16 @@ export const CONTRIBUTORS: Record<string, string> = {
export const STORAGE_ROOT_URL =
"https://raw.githubusercontent.com/liambai/plm-interp-viz-data/refs/heads/main";

export const HUGGINGFACE_REPO_URL = "https://huggingface.co/liambai/InterProt-ESM2-SAEs/blob/main";
export const HUGGINGFACE_DOWNLOAD_URL =
"https://huggingface.co/liambai/InterProt-ESM2-SAEs/resolve/main";

export const SAE_CONFIGS: Record<string, SAEConfig> = {
"SAE4096-L24": {
storagePath: "esm2_plm1280_l24_sae4096_100Kseqs",
description:
"This SAE was trained on layer 24 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
baseModel: "[ESM-2 650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
huggingFaceModelName: "esm2_plm1280_l24_sae4096_100k.safetensors",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 24,
searchExamples: {
Expand Down Expand Up @@ -416,8 +423,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L4": {
storagePath: "4096_layer_sweep/esm2_plm1280_l4_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 4 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l4_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 4,
defaultDim: 0,
Expand All @@ -426,8 +434,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L8": {
storagePath: "4096_layer_sweep/esm2_plm1280_l8_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 8 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l8_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 8,
defaultDim: 0,
Expand All @@ -436,8 +445,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L12": {
storagePath: "4096_layer_sweep/esm2_plm1280_l12_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 12 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l12_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 12,
defaultDim: 0,
Expand All @@ -446,8 +456,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L16": {
storagePath: "4096_layer_sweep/esm2_plm1280_l16_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 16 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l16_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 16,
defaultDim: 0,
Expand All @@ -456,8 +467,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L20": {
storagePath: "4096_layer_sweep/esm2_plm1280_l20_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 20 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l20_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 20,
defaultDim: 0,
Expand All @@ -466,8 +478,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L28": {
storagePath: "4096_layer_sweep/esm2_plm1280_l28_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 28 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l28_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 28,
defaultDim: 0,
Expand All @@ -476,8 +489,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L32": {
storagePath: "4096_layer_sweep/esm2_plm1280_l32_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 32 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l32_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 32,
defaultDim: 0,
Expand All @@ -486,8 +500,9 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L33": {
storagePath: "4096_layer_sweep/esm2_plm1280_l33_sae4096_k64_auxk640",
description:
"This SAE was trained on layer 33 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using sequences from [UniRef50](https://www.uniprot.org/help/uniref) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
huggingFaceModelName: "esm2_plm1280_l33_sae4096.safetensors",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 4096,
plmLayer: 33,
defaultDim: 0,
Expand All @@ -496,8 +511,8 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE4096-L24-ab": {
storagePath: "esm2_plm1280_l24_sae4096_k128_auxk512_antibody_seqs",
description:
"This SAE was trained on layer 24 of [ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D) using antibody sequences from [PLAbDab](https://opig.stats.ox.ac.uk/webapps/plabdab/) and has 4096 hidden dimensions. Click on a feature below to visualize its activation pattern.",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[PLAbDab](https://opig.stats.ox.ac.uk/webapps/plabdab/)",
numHiddenDims: 4096,
plmLayer: 24,
searchExamples: {
Expand Down Expand Up @@ -660,7 +675,8 @@ export const SAE_CONFIGS: Record<string, SAEConfig> = {
},
"SAE8192-L24": {
storagePath: "k_sweep/esm2_plm1280_l24_sae8192_k16_auxk640",
description: "",
baseModel: "[ESM2-650M](https://huggingface.co/facebook/esm2_t33_650M_UR50D)",
trainingData: "[UniRef50](https://www.uniprot.org/help/uniref)",
numHiddenDims: 8192,
plmLayer: 24,
defaultDim: 0,
Expand Down
2 changes: 1 addition & 1 deletion viz/src/SAEVisualizerPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ const SAEVisualizerPage: React.FC = () => {
</TooltipTrigger>
<TooltipContent className="max-w-[300px]">
We precomputed SAE activations on 75,000 sequences from SwissProt clustered at
30% sequence identity. This feature activated on this percentage of those
30% sequence identity. The feature activated on this percentage of those
sequences.
</TooltipContent>
</Tooltip>
Expand Down
46 changes: 41 additions & 5 deletions viz/src/components/SAESidebar.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { useContext } from "react";
import { CuratedFeature, SAE_CONFIGS } from "../SAEConfigs";
import {
CuratedFeature,
HUGGINGFACE_DOWNLOAD_URL,
HUGGINGFACE_REPO_URL,
SAE_CONFIGS,
} from "../SAEConfigs";
import { Link } from "react-router-dom";
import {
Select,
Expand Down Expand Up @@ -76,19 +81,50 @@ export default function SAESidebar() {
))}
</SelectContent>
</Select>
<div className="text-sm text-left px-3 mb-2">
<Markdown>{SAEConfig.description}</Markdown>
<div className="text-sm text-left px-2 mb-4 space-y-3">
<table className="w-full mb-2">
<tbody>
<tr>
<td className="font-medium pr-2">Base model:</td>
<td>
<Markdown>{SAEConfig.baseModel}</Markdown>
</td>
</tr>
<tr>
<td className="font-medium pr-2">Base model layer:</td>
<td>{SAEConfig.plmLayer}</td>
</tr>
<tr>
<td className="font-medium pr-2">SAE dimension:</td>
<td>{SAEConfig.numHiddenDims}</td>
</tr>
<tr>
<td className="font-medium pr-2">SAE training data:</td>
<td>
<Markdown>{SAEConfig.trainingData}</Markdown>
</td>
</tr>
</tbody>
</table>
{SAEConfig.huggingFaceModelName && (
<p>
<Markdown>
{`The SAE model weights are available on [HuggingFace](${HUGGINGFACE_REPO_URL}/${SAEConfig.huggingFaceModelName}) ([download](${HUGGINGFACE_DOWNLOAD_URL}/${SAEConfig.huggingFaceModelName})).`}
</Markdown>
</p>
)}
<p>Click on a feature below to visualize its activation pattern.</p>
</div>
<Link
to={`/sae-viz/${model}/${Math.floor(Math.random() * SAEConfig.numHiddenDims)}`}
className="mb-3 mx-3 py-2 flex items-center justify-center text-sm border rounded-md hover:bg-accent hover:text-accent-foreground bg-background shadow-sm transition-colors"
className="mb-3 mx-2 py-2 flex items-center justify-center text-sm border rounded-md hover:bg-accent hover:text-accent-foreground bg-background shadow-sm transition-colors"
onClick={() => setOpenMobile(false)}
>
<Dices className="w-4 h-4 mr-2" /> Random Feature
</Link>
<Link
to={`/sae-viz/${model}`}
className="mb-3 mx-3 py-2 flex items-center justify-center text-sm border rounded-md hover:bg-accent hover:text-accent-foreground bg-background shadow-sm transition-colors"
className="mb-3 mx-2 py-2 flex items-center justify-center text-sm border rounded-md hover:bg-accent hover:text-accent-foreground bg-background shadow-sm transition-colors"
onClick={() => setOpenMobile(false)}
>
<Search className="w-4 h-4 mr-2 shrink-0" />
Expand Down

0 comments on commit 285ad51

Please sign in to comment.