"""Request caching module for Reddit Scraper.""" import hashlib from functools import lru_cache from typing import Dict, Any, Optional from datetime import datetime, timedelta class RequestCache: """In-memory LRU cache with TTL for scraping results.""" def __init__(self, default_ttl_seconds: int = 300): """ Initialize the cache. Args: default_ttl_seconds: Time-to-live for cached entries (default: 5 minutes) """ self.default_ttl = timedelta(seconds=default_ttl_seconds) self._cache: Dict[str, Dict[str, Any]] = {} def _generate_key(self, **kwargs) -> str: """Generate a cache key from query parameters.""" # Sort kwargs to ensure consistent ordering sorted_kwargs = sorted(kwargs.items()) key_str = str(sorted_kwargs) return hashlib.md5(key_str.encode()).hexdigest() def get(self, **kwargs) -> Optional[Dict[str, Any]]: """ Get cached result if still valid. Args: kwargs: Query parameters to match Returns: Cached result or None if not found/expired """ key = self._generate_key(**kwargs) if key not in self._cache: return None entry = self._cache[key] # Check expiration if datetime.now() > entry['expires_at']: del self._cache[key] return None return entry['result'] def set(self, result: Dict[str, Any], ttl_seconds: Optional[int] = None, **kwargs): """ Cache a scraping result. Args: result: The scraping result to cache ttl_seconds: Override default TTL (optional) kwargs: Query parameters as key=value pairs """ key = self._generate_key(**kwargs) # Calculate expiration time (handle both int seconds and timedelta objects) if isinstance(ttl_seconds, int): ttl_delta = timedelta(seconds=ttl_seconds) else: ttl_delta = ttl_seconds or self.default_ttl expires_at = datetime.now() + ttl_delta self._cache[key] = { 'result': result, 'expires_at': expires_at, 'cached_at': datetime.now() } def clear(self): """Clear all cached entries.""" self._cache.clear() # Global cache instance (shared across scraper instances) _cache_instance: Optional[RequestCache] = None def get_cache(ttl_seconds: int = 300) -> RequestCache: """Get or create global cache instance.""" global _cache_instance if _cache_instance is None: _cache_instance = RequestCache(default_ttl_seconds=ttl_seconds) return _cache_instance