stanfordnlp · frankaging · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/README.md b/README.md
@@ -38,31 +38,41 @@ pip install pyvene
 ```
 
 ## _Wrap_ , _Intervene_ and _Share_
-You can intervene with supported models as,
+You can intervene with any HuggingFace model as,
 ```python
 import torch
 import pyvene as pv
+from transformers import AutoTokenizer, AutoModelForCausalLM
 
-_, tokenizer, gpt2 = pv.create_gpt2()
+model_name = "meta-llama/Llama-2-7b-hf" # your HF model name.
+model = AutoModelForCausalLM.from_pretrained(
+    model_name, torch_dtype=torch.bfloat16, device_map="cuda")
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+def zeroout_intervention_fn(b, s): 
+    b[:,3] = 0. # 3rd position
+    return b
 
-pv_gpt2 = pv.IntervenableModel({
-    "layer": 0, "component": "block_output",
-    "source_representation": torch.zeros(gpt2.config.n_embd)
-}, model=gpt2)
+pv_model = pv.IntervenableModel({
+    "component": "model.layers[15].mlp.output", # string access
+    "intervention": zeroout_intervention_fn}, model=model)
 
-orig_outputs, intervened_outputs = pv_gpt2(
-    base = tokenizer("The capital of Spain is", return_tensors="pt"), 
-    unit_locations={"base": 3}
+# run the intervened forward pass
+orig_outputs, intervened_outputs = pv_model(
+    tokenizer("The capital of Spain is", return_tensors="pt").to('cuda'),
+    output_original_output=True
 )
-print(intervened_outputs.last_hidden_state - orig_outputs.last_hidden_state)
+print(intervened_outputs.logits - orig_outputs.logits)
 ```
 which returns,
 ```
 tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
-         [ 0.0483, -0.1212, -0.2816,  ...,  0.1958,  0.0830,  0.0784],
-         [ 0.0519,  0.2547, -0.1631,  ...,  0.0050, -0.0453, -0.1624]]])
+         [ 0.4375,  1.0625,  0.3750,  ..., -0.1562,  0.4844,  0.2969],
+         [ 0.0938,  0.1250,  0.1875,  ...,  0.2031,  0.0625,  0.2188],
+         [ 0.0000, -0.0625, -0.0312,  ...,  0.0000,  0.0000, -0.0156]]],
+       device='cuda:0')
 ```
 
 ## _IntervenableModel_ Loaded from HuggingFace Directly

diff --git a/pyvene_101.ipynb b/pyvene_101.ipynb
@@ -126,7 +126,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "id": "17c7f2f6-b0d3-4fe2-8e4f-c044b93f3ef0",
    "metadata": {},
    "outputs": [],
@@ -135,27 +135,19 @@
     "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
     "\n",
     "model_name = \"gpt2\"\n",
-    "model = AutoModelForCausalLM.from_pretrained(model_name)\n",
+    "gpt2 = AutoModelForCausalLM.from_pretrained(model_name)\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
     "\n",
-    "# create a dict-based intervention config\n",
-    "pv_config = pv.IntervenableConfig({\n",
-    "  \"component\": \"transformer.h[0].mlp.output\"},\n",
-    "  intervention_types=pv.VanillaIntervention\n",
-    ")\n",
-    "# wrap your model with the config\n",
-    "pv_gpt2 = pv.IntervenableModel(pv_config, model=model)\n",
+    "pv_gpt2 = pv.IntervenableModel({\n",
+    "    \"layer\": 10,\n",
+    "    \"component\": \"attention_weight\",\n",
+    "    \"intervention_type\": pv.CollectIntervention}, model=gpt2)\n",
     "\n",
-    "# run an interchange intervention (activation swap between two examples)\n",
-    "intervened_outputs = pv_gpt2(\n",
-    "  # the base input\n",
-    "  base=tokenizer(\"The capital of Spain is\", return_tensors = \"pt\"), \n",
-    "  # the source input\n",
-    "  sources=tokenizer(\"The capital of Italy is\", return_tensors = \"pt\"), \n",
-    "  # the location to intervene at (3rd token)\n",
-    "  unit_locations={\"sources->base\": 3},\n",
-    "  output_original_output=True # False then the first element in the tuple is None\n",
-    ")"
+    "base = \"When John and Mary went to the shops, Mary gave the bag to\"\n",
+    "collected_attn_w = pv_gpt2(\n",
+    "    base = tokenizer(base, return_tensors=\"pt\"\n",
+    "    ), unit_locations={\"base\": [h for h in range(12)]}\n",
+    ")[0][-1][0]"
    ]
   },
   {
@@ -166,53 +158,6 @@
     "#### Get Attention Weights with Direct Access String"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "1ef4a1db-5187-4457-9878-f1dc03e9859b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "GPT2LMHeadModel(\n",
-       "  (transformer): GPT2Model(\n",
-       "    (wte): Embedding(50257, 768)\n",
-       "    (wpe): Embedding(1024, 768)\n",
-       "    (drop): Dropout(p=0.1, inplace=False)\n",
-       "    (h): ModuleList(\n",
-       "      (0-11): 12 x GPT2Block(\n",
-       "        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "        (attn): GPT2Attention(\n",
-       "          (c_attn): Conv1D()\n",
-       "          (c_proj): Conv1D()\n",
-       "          (attn_dropout): Dropout(p=0.1, inplace=False)\n",
-       "          (resid_dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "        (mlp): GPT2MLP(\n",
-       "          (c_fc): Conv1D()\n",
-       "          (c_proj): Conv1D()\n",
-       "          (act): NewGELUActivation()\n",
-       "          (dropout): Dropout(p=0.1, inplace=False)\n",
-       "        )\n",
-       "      )\n",
-       "    )\n",
-       "    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
-       "  )\n",
-       "  (lm_head): Linear(in_features=768, out_features=50257, bias=False)\n",
-       ")"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 19,
@@ -231,6 +176,7 @@
     "import torch\n",
     "import pyvene as pv\n",
     "\n",
+    "# gpt2 helper loading model from HuggingFace\n",
     "_, tokenizer, gpt2 = pv.create_gpt2()\n",
     "\n",
     "pv_gpt2 = pv.IntervenableModel({\n",
@@ -724,7 +670,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "id": "7f058ecd",
    "metadata": {},
    "outputs": [