From 3c6b13ac75813db15d9bab9de222e98d65c2eedc Mon Sep 17 00:00:00 2001
From: Liam
Date: Fri, 1 Nov 2024 12:26:55 -0400
Subject: [PATCH] Improve steering UX
---
viz/src/components/CustomSeqPlayground.tsx | 120 ++++++++++++---------
1 file changed, 67 insertions(+), 53 deletions(-)
diff --git a/viz/src/components/CustomSeqPlayground.tsx b/viz/src/components/CustomSeqPlayground.tsx
index 3b1efd4..6370d6f 100644
--- a/viz/src/components/CustomSeqPlayground.tsx
+++ b/viz/src/components/CustomSeqPlayground.tsx
@@ -206,74 +206,88 @@ const CustomSeqPlayground = ({ feature }: CustomSeqPlaygroundProps) => {
href="https://transformer-circuits.pub/2024/scaling-monosemanticity/index.html#appendix-methods-steering"
className="underline"
>
- their implementation
+ their approach
, we reconstruct the input sequence with the SAE "spliced into" ESM2 at layer 24.
With steering multiplier N, the SAE activation at every residue in the sequence is
set to N * (max activation along the sequence). So,
-
N = 0 {String.fromCharCode(8594)} setting this feature to 0
+
N = 0 {String.fromCharCode(8594)} setting the feature to 0
- N = 1 {String.fromCharCode(8594)} amplifying this feature by setting its
- activation at each residue to the max activation along the sequence
+ N = 1 {String.fromCharCode(8594)} amplifying the feature by setting its activation
+ at each residue to the max activation along the sequence
- Check out{" "}
-
- this explanation
- {" "}
- from Anthropic for more technical details. We're experimenting with different
- methods of steering and will make them available soon!
+ We're experimenting with different methods of steering and will make them available
+ soon!