From f56e88a17e707afc97fa55b587e714824f98c1da Mon Sep 17 00:00:00 2001
From: YeonwooSung <neos960518@gmail.com>
Date: Sun, 4 Aug 2024 22:33:02 +0900
Subject: [PATCH] refactor: Rename the internal variable for clearness

---
 Transformers/llama/llama3_implementation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Transformers/llama/llama3_implementation.py b/Transformers/llama/llama3_implementation.py
index b1a4bef..6dfd30b 100644
--- a/Transformers/llama/llama3_implementation.py
+++ b/Transformers/llama/llama3_implementation.py
@@ -251,16 +251,16 @@ def forward(self, tokens: torch.Tensor, start_pos: int):
 
         mask = None
         if seq_len > 1:
-            mask = torch.full((seq_len, seq_len), float("-inf"), device=tokens.device)
+            mask_for_kvcache = torch.full((seq_len, seq_len), float("-inf"), device=tokens.device)
 
-            mask = torch.triu(mask, diagonal=1)
+            mask_for_kvcache = torch.triu(mask_for_kvcache, diagonal=1)
 
             # When performing key-value caching, we compute the attention scores
             # only for the new sequence. Thus, the matrix of scores is of size
             # (seqlen, cache_len + seqlen), and the only masked entries are (i, j) for
             # j > cache_len + i, since row i corresponds to token cache_len + i.
             mask = torch.hstack(
-                [torch.zeros((seq_len, start_pos), device=tokens.device), mask]
+                [torch.zeros((seq_len, start_pos), device=tokens.device), mask_for_kvcache]
             ).type_as(h)
 
         for layer in self.layers: