remove mention of NVTE from tests/docs/etc

NVIDIA · Nov 12, 2024 · 2f05f9c · 2f05f9c
1 parent 2db37b7
commit 2f05f9c
Show file tree

Hide file tree

Showing 4 changed files with 1 addition and 18 deletions.
diff --git a/README.md b/README.md
@@ -186,9 +186,6 @@ export MY_DATA_SOURCE="pbss"
 
 ```bash
 # The fastest transformer engine environment variables in testing were the following two
-export NVTE_FUSED_ATTN=1
-export NVTE_FLASH_ATTN=0
-
 TEST_DATA_DIR=$(download_bionemo_data esm2/testdata_esm2_pretrain:2.0 --source $MY_DATA_SOURCE); \
 ESM2_650M_CKPT=$(download_bionemo_data esm2/650m:2.0 --source $MY_DATA_SOURCE); \
 python  \
@@ -248,9 +245,6 @@ and DataModule types.
 > ⚠️ **Warning:** This setup does NO configuration of Weights and Biases. Edit your config JSON and populate it with your WandB details.
 
 ```
-export NVTE_FUSED_ATTN=1
-export NVTE_FLASH_ATTN=0
-
 bionemo-esm2-train \
 --data-config-t bionemo.esm2.run.config_models.ESM2DataConfig \
 --model-config-t bionemo.esm2.run.config_models.ExposedESM2PretrainConfig \

diff --git a/docs/docs/user-guide/examples/bionemo-esm2/pretrain.md b/docs/docs/user-guide/examples/bionemo-esm2/pretrain.md
@@ -280,9 +280,6 @@ llm.train(
 Or simply call `esm2_pretrain.py` directly.
 ```bash
 # Enable fused attention in transformer engine for speed-up
-export NVTE_FUSED_ATTN=1
-export NVTE_FLASH_ATTN=0
-
 DATA_DIR=$(download_bionemo_data esm2/testdata_esm2_pretrain:2.0 --source ngc)
 
 python scripts/protein/esm2/esm2_pretrain.py \

diff --git a/scripts/protein/esm2/test_esm2_pretrain.py b/scripts/protein/esm2/test_esm2_pretrain.py
@@ -90,8 +90,6 @@ def test_main_runs(monkeypatch, tmpdir, dummy_protein_dataset, dummy_parquet_tra
     result_dir = Path(tmpdir.mkdir("results"))
 
     with megatron_parallel_state_utils.distributed_model_parallel_state():
-        monkeypatch.setenv("NVTE_FUSED_ATTN", "1")
-        monkeypatch.setenv("NVTE_FLASH_ATTN", "0")
         main(
             train_cluster_path=train_cluster_path,
             train_database_path=dummy_protein_dataset,
@@ -159,8 +157,6 @@ def test_val_dataloader_in_main_runs_with_limit_val_batches(
     result_dir = Path(tmpdir.mkdir("results"))
 
     with megatron_parallel_state_utils.distributed_model_parallel_state():
-        monkeypatch.setenv("NVTE_FUSED_ATTN", "1")
-        monkeypatch.setenv("NVTE_FLASH_ATTN", "0")
         main(
             train_cluster_path=train_cluster_path,
             train_database_path=dummy_protein_dataset,
@@ -239,9 +235,6 @@ def test_pretrain_cli(tmpdir, dummy_protein_dataset, dummy_parquet_train_val_inp
     # a local copy of the environment
     env = dict(**os.environ)
     env["MASTER_PORT"] = str(open_port)
-    env["NVTE_FUSED_ATTN"] = "1"
-    env["NVTE_FLASH_ATTN"] = "0"
-
     cmd = shlex.split(cmd_str)
     result = subprocess.run(
         cmd,

diff --git a/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py b/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/model.py
@@ -525,8 +525,7 @@ def configure_model(self, tokenizer: AutoTokenizer) -> MegatronBioBertModelType:
                 self.num_layers // p_size
             ) % vp_size == 0, "Make sure the number of model chunks is the same across all pipeline stages."
 
-        # The local specs all require the standard full attention mask. For transformer engine only the NVTE_FLASH_ATTN=0
-        #  option requires this full attention mask.
+        # The local specs all require the standard full attention mask.
         use_full_attention_mask: bool = "transformer_engine" not in self.biobert_spec_option
         do_next_sentence = False
         if self.model_cls is None: