-
Notifications
You must be signed in to change notification settings - Fork 29
Open
Description
Hello, Javi Rando!
I have adapted and optimized GuidedPasswordGeneration.ipynb to run from the terminal. Let it be generate_conditional.py:
import os
import argparse
import torch
from transformers import GPT2LMHeadModel
from transformers import RobertaTokenizerFast
import string
def get_tokens(tokenizer, symbols):
return tokenizer(symbols, add_special_tokens=False).input_ids
def create_token_dict(tokenizer):
lowercase = list(string.ascii_lowercase)
uppercase = list(string.ascii_uppercase)
digits = list(string.digits)
punctuation = list(string.punctuation)
lowercase_tokens = get_tokens(tokenizer, lowercase)
uppercase_tokens = get_tokens(tokenizer, uppercase)
digits_tokens = get_tokens(tokenizer, digits)
punctuation_tokens = get_tokens(tokenizer, punctuation)
return {
"l": lowercase_tokens,
"u": uppercase_tokens,
"d": digits_tokens,
"p": punctuation_tokens
}
def conditional_generation(template, num_generations=1):
generated = 0
generations = []
while generated < num_generations:
generation = torch.tensor([tokenizer.bos_token_id]).unsqueeze(0)
current_length = 1
for char in template:
if char in token_dict:
bad_tokens = [i for i in all_tokens if i not in token_dict[char]]
else:
bad_tokens = [[tokenizer.eos_token_id]]
generation = model.generate(generation.to(args.device), do_sample=True, max_length=current_length+1, pad_token_id=tokenizer.pad_token_id, num_return_sequences=1, bad_words_ids=bad_tokens)
current_length += 1
if not 2 in generation.flatten():
generations.append(generation)
generated += 1
return torch.cat(generations, 0)[:, 1:]
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Conditional generating passwords using PassGPT.")
parser.add_argument("--model_path", type=str, help="Path to PassGPT model checkpoint", required=True)
parser.add_argument("--tokenizer_path", type=str, help="Path to tokenizer checkpoint", required=True)
parser.add_argument("--device", type=str, default='cuda', help="Device to run execution")
parser.add_argument("--template", type=str, help="Password template (e.g., 'lluu**dd')")
parser.add_argument("--maxchars", type=int, default=10, help="Maximum length of the passwords")
parser.add_argument("--num_generations", type=int, default=1, help="Number of passwords to generate")
args = parser.parse_args()
model = GPT2LMHeadModel.from_pretrained(args.model_path).eval().to(args.device)
tokenizer = RobertaTokenizerFast.from_pretrained(args.tokenizer_path,
max_len=args.maxchars+2,
padding="max_length",
truncation=True,
do_lower_case=False,
strip_accents=False,
mask_token="<mask>",
unk_token="<unk>",
pad_token="<pad>",
truncation_side="right")
token_dict = create_token_dict(tokenizer)
all_tokens = [[i] for i in range(len(tokenizer))]
generations = conditional_generation(args.template, args.num_generations)
decoded_passwords = tokenizer.batch_decode(generations)
for i, password in enumerate(decoded_passwords):
print(f"Generated Password {i+1}: {password}")Run command, example:
python src/generate_conditional.py --model_path output_dir/last/ --tokenizer_path tokenizers_folder/byte_bpe_tokenizer_99/ --template "ullldp*" --maxchars 10 --num_generations 5Out:
Generated Password 1: Josi0!M
Generated Password 2: Meek2--
Generated Password 3: Sant0$S
Generated Password 4: Mana1**
Generated Password 5: Tomh8&&Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels