Skip to content

Commit

Permalink
Trim
Browse files Browse the repository at this point in the history
  • Loading branch information
liambai committed Oct 15, 2024
1 parent 669d8cb commit ec78ba9
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions plm_interpretability/latent_probe/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ class ResidueAnnotation:
]


# @functools.lru_cache(maxsize=5000)
def get_sae_acts(
seq: str,
tokenizer: AutoTokenizer,
Expand Down Expand Up @@ -154,14 +153,13 @@ def get_annotation_entries_for_class(
# The note field is sometimes like "Homeobox", "Homeobox 1", etc.,
# so use string `in` to check.
entries = [e for e in entries if class_name in e.get("note", "")]
if len(entries) > 0:
if len(entries) > 0 and len(seq) < 2000:
seq_to_annotation_entries[seq] = entries
seq_lengths.append(len(seq))

logger.info(
f"Found {len(seq_to_annotation_entries)} sequences with class {class_name}."
f"Sequence length min: {min(seq_lengths)}, max: {max(seq_lengths)}, "
f"mean: {np.mean(seq_lengths)}."
f"Mean sequence length: {np.mean(seq_lengths):.2f}."
)

if len(seq_to_annotation_entries) > max_seqs_per_task:
Expand Down

0 comments on commit ec78ba9

Please sign in to comment.