From f56e88a17e707afc97fa55b587e714824f98c1da Mon Sep 17 00:00:00 2001 From: YeonwooSung Date: Sun, 4 Aug 2024 22:33:02 +0900 Subject: [PATCH] refactor: Rename the internal variable for clearness --- Transformers/llama/llama3_implementation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Transformers/llama/llama3_implementation.py b/Transformers/llama/llama3_implementation.py index b1a4bef..6dfd30b 100644 --- a/Transformers/llama/llama3_implementation.py +++ b/Transformers/llama/llama3_implementation.py @@ -251,16 +251,16 @@ def forward(self, tokens: torch.Tensor, start_pos: int): mask = None if seq_len > 1: - mask = torch.full((seq_len, seq_len), float("-inf"), device=tokens.device) + mask_for_kvcache = torch.full((seq_len, seq_len), float("-inf"), device=tokens.device) - mask = torch.triu(mask, diagonal=1) + mask_for_kvcache = torch.triu(mask_for_kvcache, diagonal=1) # When performing key-value caching, we compute the attention scores # only for the new sequence. Thus, the matrix of scores is of size # (seqlen, cache_len + seqlen), and the only masked entries are (i, j) for # j > cache_len + i, since row i corresponds to token cache_len + i. mask = torch.hstack( - [torch.zeros((seq_len, start_pos), device=tokens.device), mask] + [torch.zeros((seq_len, start_pos), device=tokens.device), mask_for_kvcache] ).type_as(h) for layer in self.layers: