Skip to content

Commit

Permalink
fix: improve word generation cyclic file reading (#54)
Browse files Browse the repository at this point in the history
This pull request includes several improvements to the word_generator function in the bing_rewards/__init__.py file. The changes enhance error handling, ensure uniform random distribution, and improve code readability.

Enhancements to word_generator function:
- Type hints: specified the return type as Generator[str, None, None].
- Added yields and potential exceptions in the docstring.
- Implemented error handling for FileNotFoundError and IOError, with appropriate error messages and re-raising of exceptions.
- Ensured the function handles empty keyword files by raising a ValueError if the file size is zero. (Useful when implementing a feature to add your own .txt files for searches)
- Improved the random starting position logic (to capture the whole txt file) and ensured clean line boundaries by discarding partial lines and skipping empty lines.
  • Loading branch information
znarfm authored Feb 9, 2025
1 parent 56d92c8 commit 4f018ca
Showing 1 changed file with 46 additions and 13 deletions.
59 changes: 46 additions & 13 deletions bing_rewards/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,25 +44,58 @@
import bing_rewards.options as app_options


def word_generator() -> Generator[str]:
def word_generator() -> Generator[str, None, None]:
"""Infinitely generate terms from the word file.
Starts reading from a random position in the file.
If end of file is reached, close and restart.
Handles file operations safely and ensures uniform random distribution.
Yields:
str: A random keyword from the file, stripped of whitespace.
Raises:
OSError: If there are issues accessing or reading the file.
"""
word_data = resources.files('bing_rewards').joinpath('data', 'keywords.txt')
while True:
with (
resources.as_file(word_data) as p,
p.open(mode='r', encoding='utf-8') as fh,
):
fh.seek(0, SEEK_END)
size = fh.tell() # Get the filesize of the Keywords file
# Start at a random position in the stream
fh.seek(random.randint(0, (size * 3 // 4)), SEEK_SET)
for line in fh:
# Use the built in file handler generator
yield line.strip()

try:
while True:
with (
resources.as_file(word_data) as p,
p.open(mode='r', encoding='utf-8') as fh,
):
# Get the file size of the Keywords file
fh.seek(0, SEEK_END)
size = fh.tell()

if size == 0:
raise ValueError('Keywords file is empty')

# Start at a random position in the stream
fh.seek(random.randint(0, size - 1), SEEK_SET)

# Read and discard partial line to ensure we start at a clean line boundary
fh.readline()

# Read lines until EOF
for raw_line in fh:
stripped_line = raw_line.strip()
if stripped_line: # Skip empty lines
yield stripped_line

# If we hit EOF, seek back to start and continue until we've yielded enough words
fh.seek(0)
for raw_line in fh:
stripped_line = raw_line.strip()
if stripped_line:
yield stripped_line
except OSError as e:
print(f'Error accessing keywords file: {e}')
raise
except Exception as e:
print(f'Unexpected error in word generation: {e}')
raise


def browser_cmd(exe: Path, agent: str, profile: str = '') -> list[str]:
Expand Down

0 comments on commit 4f018ca

Please sign in to comment.