diff --git a/tests/test_rate_limit_manager.py b/tests/test_rate_limit_manager.py new file mode 100644 index 000000000..39d6237b8 --- /dev/null +++ b/tests/test_rate_limit_manager.py @@ -0,0 +1,156 @@ +"""Tests for rate limit manager.""" +import pytest +from datetime import datetime, timedelta, timezone +from utils.rate_limit_manager import ( + RateLimitManager, + RateLimitConfig, + RateLimitException, + BackoffStrategy +) + +class MockResponse: + """Mock response object with headers.""" + def __init__(self, headers): + self.headers = headers + +def test_backoff_strategy(): + """Test backoff delay calculations.""" + strategy = BackoffStrategy( + initial_delay=1.0, + max_delay=10.0, + factor=2.0, + enable_jitter=False + ) + + assert strategy.get_delay(0) == 1.0 + assert strategy.get_delay(1) == 2.0 + assert strategy.get_delay(2) == 4.0 + assert strategy.get_delay(3) == 8.0 + assert strategy.get_delay(4) == 10.0 # Max delay + +def test_rate_limit_manager_initialization(): + """Test manager initialization with custom config.""" + config = RateLimitConfig( + max_retries=3, + initial_delay=2.0, + max_delay=30.0 + ) + manager = RateLimitManager(config) + + assert manager.config.max_retries == 3 + assert manager.config.initial_delay == 2.0 + assert manager.config.max_delay == 30.0 + +def test_parse_rate_limit_headers(): + """Test parsing of GitHub API rate limit headers.""" + manager = RateLimitManager() + now = datetime.now(timezone.utc) + reset_time = int(now.timestamp()) + + headers = { + 'X-RateLimit-Limit': '5000', + 'X-RateLimit-Remaining': '4999', + 'X-RateLimit-Reset': str(reset_time), + 'X-RateLimit-Used': '1' + } + + info = manager._parse_rate_limit_headers(headers) + + assert info['limit'] == 5000 + assert info['remaining'] == 4999 + assert info['used'] == 1 + assert isinstance(info['reset'], datetime) + +def test_should_retry_logic(): + """Test retry decision logic.""" + manager = RateLimitManager( + RateLimitConfig(rate_limit_threshold=0.1) + ) + + # Set up test case + operation_id = 'test_op' + manager.retry_counts[operation_id] = 0 + + # Test case: should retry (low remaining ratio) + rate_info = {'remaining': 50, 'limit': 5000} + assert manager._should_retry(operation_id, rate_info) + + # Test case: should not retry (high remaining ratio) + rate_info = {'remaining': 4000, 'limit': 5000} + assert not manager._should_retry(operation_id, rate_info) + + # Test case: should not retry (max retries reached) + manager.retry_counts[operation_id] = 5 + assert not manager._should_retry(operation_id, rate_info) + +def test_execute_with_retry_success(): + """Test successful execution with no retries needed.""" + manager = RateLimitManager() + + # Mock successful operation + def operation(): + return MockResponse({ + 'X-RateLimit-Limit': '5000', + 'X-RateLimit-Remaining': '4999', + 'X-RateLimit-Reset': str(int(datetime.now(timezone.utc).timestamp())), + 'X-RateLimit-Used': '1' + }) + + result = manager.execute_with_retry(operation, 'test_op') + assert isinstance(result, MockResponse) + +def test_execute_with_retry_rate_limit(): + """Test retry behavior when rate limit is exceeded.""" + manager = RateLimitManager( + RateLimitConfig( + max_retries=2, + initial_delay=0.1, + max_delay=0.2 + ) + ) + + # Mock operation that fails with rate limit error + attempt = 0 + def operation(): + nonlocal attempt + attempt += 1 + if attempt < 2: + raise Exception("rate limit exceeded") + return MockResponse({ + 'X-RateLimit-Limit': '5000', + 'X-RateLimit-Remaining': '4999', + 'X-RateLimit-Reset': str(int(datetime.now(timezone.utc).timestamp())), + 'X-RateLimit-Used': '1' + }) + + result = manager.execute_with_retry(operation, 'test_op') + assert isinstance(result, MockResponse) + assert attempt == 2 + +def test_rate_limit_exception(): + """Test rate limit exception creation and message.""" + reset_time = datetime.now(timezone.utc) + exception = RateLimitException(reset_time, 0, 5000) + + assert str(exception).startswith("Rate limit exceeded") + assert "0/5000 remaining" in str(exception) + assert reset_time.isoformat() in str(exception) + +def test_reset_counts(): + """Test retry count reset functionality.""" + manager = RateLimitManager() + + # Set up some retry counts + manager.retry_counts = { + 'op1': 2, + 'op2': 3 + } + + # Test resetting specific operation + manager.reset_counts('op1') + assert 'op1' not in manager.retry_counts + assert manager.retry_counts['op2'] == 3 + + # Test resetting all operations + manager.reset_counts() + assert len(manager.retry_counts) == 0 \ No newline at end of file diff --git a/utils/README.md b/utils/README.md new file mode 100644 index 000000000..435b64acf --- /dev/null +++ b/utils/README.md @@ -0,0 +1,83 @@ +# Rate Limit Manager + +A robust rate limit management system for GitHub API interactions with intelligent retry logic and exponential backoff. + +## Features + +- Intelligent rate limit detection and monitoring +- Exponential backoff with jitter +- Configurable retry strategies +- Comprehensive logging +- Pre-emptive throttling +- Detailed rate limit tracking + +## Usage + +```python +from utils.rate_limit_manager import RateLimitManager, RateLimitConfig + +# Create a manager with custom configuration +config = RateLimitConfig( + max_retries=5, + initial_delay=1.0, + max_delay=3600.0, + backoff_factor=2.0, + rate_limit_threshold=0.1 +) +manager = RateLimitManager(config) + +# Use the manager to execute API operations +def github_api_call(): + response = requests.get('https://api.github.com/user') + return response + +result = manager.execute_with_retry(github_api_call, 'get_user') +``` + +## Configuration + +The `RateLimitConfig` class supports the following parameters: + +- `max_retries`: Maximum number of retry attempts (default: 5) +- `initial_delay`: Initial delay in seconds (default: 1.0) +- `max_delay`: Maximum delay in seconds (default: 3600.0) +- `backoff_factor`: Multiplication factor for exponential backoff (default: 2.0) +- `rate_limit_threshold`: Threshold for pre-emptive throttling (default: 0.1) +- `enable_jitter`: Whether to add random jitter to delays (default: True) +- `jitter_factor`: Maximum jitter as a fraction of delay (default: 0.1) + +## Testing + +Run the tests using pytest: + +```bash +pytest tests/test_rate_limit_manager.py +``` + +## Implementation Details + +### Rate Limit Detection + +The manager monitors GitHub API rate limits through response headers: +- X-RateLimit-Limit +- X-RateLimit-Remaining +- X-RateLimit-Reset +- X-RateLimit-Used + +### Backoff Strategy + +The exponential backoff is implemented with the following formula: +```python +delay = min(initial_delay * (backoff_factor ** attempt), max_delay) +if enable_jitter: + delay += random.uniform(0, jitter_factor * delay) +``` + +### Pre-emptive Throttling + +The manager will start throttling requests when the remaining rate limit falls below the configured threshold: +```python +remaining_ratio = rate_info['remaining'] / rate_info['limit'] +if remaining_ratio <= rate_limit_threshold: + # Apply backoff +``` \ No newline at end of file diff --git a/utils/rate_limit_manager.py b/utils/rate_limit_manager.py new file mode 100644 index 000000000..9342d4592 --- /dev/null +++ b/utils/rate_limit_manager.py @@ -0,0 +1,218 @@ +""" +Rate limit manager for GitHub API interactions. +Implements intelligent retry logic with exponential backoff. +""" +import random +import time +import logging +from typing import Any, Callable, Dict, Optional, TypeVar +from datetime import datetime, timezone +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +T = TypeVar('T') + +@dataclass +class RateLimitConfig: + """Configuration for rate limit handling.""" + max_retries: int = 5 + initial_delay: float = 1.0 # seconds + max_delay: float = 3600.0 # 1 hour + backoff_factor: float = 2.0 + rate_limit_threshold: float = 0.1 # 10% remaining + enable_jitter: bool = True + jitter_factor: float = 0.1 + +class RateLimitException(Exception): + """Exception raised when rate limit is exceeded.""" + def __init__(self, reset_time: datetime, remaining: int, limit: int): + self.reset_time = reset_time + self.remaining = remaining + self.limit = limit + super().__init__( + f"Rate limit exceeded. {remaining}/{limit} remaining. " + f"Resets at {reset_time.isoformat()}" + ) + +class BackoffStrategy: + """Implements exponential backoff with optional jitter.""" + + def __init__( + self, + initial_delay: float, + max_delay: float, + factor: float, + enable_jitter: bool = True, + jitter_factor: float = 0.1 + ): + self.initial_delay = initial_delay + self.max_delay = max_delay + self.factor = factor + self.enable_jitter = enable_jitter + self.jitter_factor = jitter_factor + + def get_delay(self, attempt: int) -> float: + """Calculate delay for given attempt number.""" + delay = min( + self.initial_delay * (self.factor ** attempt), + self.max_delay + ) + + if self.enable_jitter: + jitter = random.uniform(0, self.jitter_factor * delay) + delay += jitter + + return delay + +class RateLimitManager: + """Manages GitHub API rate limits with retry logic.""" + + def __init__(self, config: Optional[RateLimitConfig] = None): + self.config = config or RateLimitConfig() + self.backoff = BackoffStrategy( + initial_delay=self.config.initial_delay, + max_delay=self.config.max_delay, + factor=self.config.backoff_factor, + enable_jitter=self.config.enable_jitter, + jitter_factor=self.config.jitter_factor + ) + self.rate_limits: Dict[str, Dict[str, Any]] = {} + self.retry_counts: Dict[str, int] = {} + + def _parse_rate_limit_headers(self, headers: Dict[str, str]) -> Dict[str, Any]: + """Parse GitHub API rate limit headers.""" + try: + return { + 'limit': int(headers.get('X-RateLimit-Limit', 0)), + 'remaining': int(headers.get('X-RateLimit-Remaining', 0)), + 'reset': datetime.fromtimestamp( + int(headers.get('X-RateLimit-Reset', 0)), + timezone.utc + ), + 'used': int(headers.get('X-RateLimit-Used', 0)) + } + except (ValueError, TypeError) as e: + logger.warning(f"Error parsing rate limit headers: {e}") + return {} + + def _should_retry(self, operation_id: str, rate_info: Dict[str, Any]) -> bool: + """Determine if operation should be retried.""" + if self.retry_counts[operation_id] >= self.config.max_retries: + return False + + remaining_ratio = rate_info['remaining'] / rate_info['limit'] + return remaining_ratio <= self.config.rate_limit_threshold + + def _wait_for_reset(self, reset_time: datetime) -> None: + """Wait until rate limit reset time.""" + now = datetime.now(timezone.utc) + if reset_time > now: + wait_seconds = (reset_time - now).total_seconds() + logger.info(f"Waiting {wait_seconds:.2f} seconds for rate limit reset") + time.sleep(wait_seconds) + + def execute_with_retry( + self, + operation: Callable[[], T], + operation_id: Optional[str] = None, + *, + retry_on: Optional[Type[Exception]] = None + ) -> T: + """Execute operation with retry logic. + + Args: + operation: Callable that returns type T + operation_id: Optional identifier for the operation + retry_on: Optional exception type to retry on (default: None, retries on any rate limit error) + + Returns: + The result of type T from the operation + + Raises: + RuntimeError: If max retries exceeded + Exception: Any exception from operation that doesn't match retry_on + """ + op_id = operation_id or str(hash(operation)) + self.retry_counts.setdefault(op_id, 0) + + def _is_rate_limit_error(error: Exception) -> bool: + """Check if error is a rate limit error.""" + return ( + isinstance(error, retry_on) if retry_on + else "rate limit exceeded" in str(error).lower() + ) + + def _handle_rate_info(result: Any) -> None: + """Handle rate limit information from result.""" + if not hasattr(result, 'headers'): + return + + rate_info = self._parse_rate_limit_headers(result.headers) + if not rate_info: + return + + self.rate_limits[op_id] = rate_info + + # Log rate limit status + logger.info( + f"Rate limit status for {op_id}: " + f"{rate_info['remaining']}/{rate_info['limit']} " + f"remaining (Reset: {rate_info['reset'].isoformat()})" + ) + + # Check if we should preemptively back off + if self._should_retry(op_id, rate_info): + delay = self.backoff.get_delay(self.retry_counts[op_id]) + logger.info(f"Preemptive backoff for {delay:.2f} seconds") + time.sleep(delay) + + def _handle_rate_limit_error(error: Exception) -> None: + """Handle rate limit error with appropriate backoff.""" + rate_info = self.rate_limits.get(op_id, {}) + reset_time = rate_info.get('reset') + + if reset_time: + logger.warning( + f"Rate limit exceeded. Attempt {self.retry_counts[op_id]} " + f"of {self.config.max_retries}" + ) + self._wait_for_reset(reset_time) + else: + delay = self.backoff.get_delay(self.retry_counts[op_id]) + logger.warning( + f"Rate limit exceeded with unknown reset time. " + f"Backing off for {delay:.2f} seconds" + ) + time.sleep(delay) + + # Main retry loop + while self.retry_counts[op_id] < self.config.max_retries: + try: + result = operation() + _handle_rate_info(result) + return result + + except Exception as e: + self.retry_counts[op_id] += 1 + + if _is_rate_limit_error(e): + _handle_rate_limit_error(e) + else: + logger.error(f"Non-rate-limit error in {op_id}: {str(e)}") + raise + + error_msg = f"Operation {op_id} failed after {self.config.max_retries} retries" + logger.error(error_msg) + raise RuntimeError(error_msg) + + def reset_counts(self, operation_id: Optional[str] = None) -> None: + """Reset retry counts for given operation or all operations.""" + if operation_id: + self.retry_counts.pop(operation_id, None) + else: + self.retry_counts.clear() + + def get_rate_limit_info(self, operation_id: str) -> Dict[str, Any]: + """Get rate limit information for operation.""" + return self.rate_limits.get(operation_id, {}) \ No newline at end of file