""" Memory-Aware Queue Management System ==================================== Provides intelligent queue management based on memory usage and file sizes rather than simple task count limits. Supports multiple users with fair queuing and capacity management. Features: - Memory-based queue limits (not just task count) - Fair queuing across multiple users - Upload capacity checking with user feedback - Graceful degradation under load - Service-specific memory tracking """ import os import time import json import uuid import logging from typing import Dict, List, Optional, Any, Tuple from dataclasses import dataclass, asdict from enum import Enum import redis from .redis_manager import get_redis_manager import psutil logger = logging.getLogger(__name__) class QueueStatus(Enum): ACCEPTING = "accepting" # Normal operation BUSY = "busy" # High load, warn users OVERLOADED = "overloaded" # Reject new uploads MAINTENANCE = "maintenance" # Manual override @dataclass class MemoryConfig: """Memory configuration for queue management.""" max_total_memory_mb: int = 2048 # 2GB total queue memory max_user_memory_mb: int = 512 # 512MB per user max_file_size_mb: int = 100 # 100MB max file size memory_warning_threshold: float = 0.8 # Warn at 80% memory_reject_threshold: float = 0.95 # Reject at 95% @dataclass class QueuedFile: """Represents a file waiting in the queue.""" file_id: str user_id: str filename: str size_bytes: int mime_type: str cabinet_id: str priority: int = 1 queued_at: float = 0 estimated_processing_time: int = 300 # seconds memory_estimate_mb: float = 0 def __post_init__(self): if self.queued_at == 0: self.queued_at = time.time() # Estimate memory usage (rough heuristic) self.memory_estimate_mb = self._estimate_memory_usage() def _estimate_memory_usage(self) -> float: """Estimate memory usage for this file during processing.""" base_mb = self.size_bytes / (1024 * 1024) # Processing multipliers based on operations if self.mime_type == 'application/pdf': # PDF: original + extracted text + images + thumbnails return base_mb * 3.5 elif self.mime_type.startswith('image/'): # Images: original + resized variants + OCR text return base_mb * 2.5 else: # Other docs: original + PDF conversion + processing return base_mb * 4.0 class MemoryAwareQueue: """Memory-aware queue management system.""" def __init__(self, environment: str = "dev"): self.redis_manager = get_redis_manager(environment) self.redis_client = self.redis_manager.client self.config = self._load_config() # Redis keys self.upload_queue_key = "upload_queue" self.processing_memory_key = "processing_memory" self.user_quota_key = "user_quotas" self.system_status_key = "system_status" logger.info(f"🧠 Memory-aware queue initialized (max: {self.config.max_total_memory_mb}MB)") def _load_config(self) -> MemoryConfig: """Load memory configuration from environment.""" return MemoryConfig( max_total_memory_mb=int(os.getenv('QUEUE_MAX_MEMORY_MB', '2048')), max_user_memory_mb=int(os.getenv('QUEUE_MAX_USER_MEMORY_MB', '512')), max_file_size_mb=int(os.getenv('MAX_FILE_SIZE_MB', '100')), memory_warning_threshold=float(os.getenv('MEMORY_WARNING_THRESHOLD', '0.8')), memory_reject_threshold=float(os.getenv('MEMORY_REJECT_THRESHOLD', '0.95')) ) def check_upload_capacity(self, user_id: str, file_size_bytes: int, mime_type: str) -> Tuple[bool, str, Dict[str, Any]]: """ Check if system can accept a new upload. Returns: (can_accept, message, queue_info) """ # Create temporary QueuedFile to estimate memory temp_file = QueuedFile( file_id="temp", user_id=user_id, filename="temp", size_bytes=file_size_bytes, mime_type=mime_type, cabinet_id="temp" ) file_memory_mb = temp_file.memory_estimate_mb # Check file size limit if file_size_bytes > (self.config.max_file_size_mb * 1024 * 1024): return False, f"File too large (max: {self.config.max_file_size_mb}MB)", {} # Get current memory usage current_memory = self._get_current_memory_usage() user_memory = self._get_user_memory_usage(user_id) # Check user quota if user_memory + file_memory_mb > self.config.max_user_memory_mb: return False, f"User quota exceeded (limit: {self.config.max_user_memory_mb}MB)", { 'user_current': user_memory, 'user_limit': self.config.max_user_memory_mb } # Check system capacity total_after = current_memory + file_memory_mb max_memory = self.config.max_total_memory_mb if total_after > (max_memory * self.config.memory_reject_threshold): queue_info = self._get_queue_info() return False, "System overloaded. Please try again later.", { 'current_memory': current_memory, 'max_memory': max_memory, 'utilization': current_memory / max_memory, 'queue_position': queue_info['total_queued'] + 1 } # Calculate wait time estimate wait_estimate = self._estimate_wait_time(user_id) status = "ready" message = "Upload accepted" if total_after > (max_memory * self.config.memory_warning_threshold): status = "busy" message = f"System busy. Estimated wait: {wait_estimate // 60}m {wait_estimate % 60}s" return True, message, { 'status': status, 'estimated_wait_seconds': wait_estimate, 'memory_usage': { 'current': current_memory, 'after_upload': total_after, 'limit': max_memory, 'utilization': total_after / max_memory }, 'user_quota': { 'used': user_memory, 'after_upload': user_memory + file_memory_mb, 'limit': self.config.max_user_memory_mb } } def enqueue_file(self, file_id: str, user_id: str, filename: str, size_bytes: int, mime_type: str, cabinet_id: str, priority: int = 1) -> Dict[str, Any]: """ Add file to upload queue. Returns: Queue information including position and estimated wait time """ queued_file = QueuedFile( file_id=file_id, user_id=user_id, filename=filename, size_bytes=size_bytes, mime_type=mime_type, cabinet_id=cabinet_id, priority=priority ) # Serialize and add to Redis queue (priority queue: higher priority = lower score) score = time.time() - (priority * 1000000) # Priority affects score significantly self.redis_client.zadd( self.upload_queue_key, {json.dumps(asdict(queued_file)): score} ) # Update user quota tracking self._update_user_quota(user_id, queued_file.memory_estimate_mb, increment=True) # Get queue position and wait estimate position = self._get_queue_position(file_id) wait_estimate = self._estimate_wait_time(user_id) logger.info(f"📋 Queued file {file_id} for user {user_id} (pos: {position}, wait: {wait_estimate}s)") return { 'queued': True, 'file_id': file_id, 'queue_position': position, 'estimated_wait_seconds': wait_estimate, 'memory_estimate_mb': queued_file.memory_estimate_mb } def dequeue_next_file(self, service_name: str) -> Optional[QueuedFile]: """ Get next file from queue for processing. Args: service_name: The service requesting work (for capacity management) """ # Check if service has capacity service_memory = self._get_service_memory_usage(service_name) service_limit = self._get_service_memory_limit(service_name) if service_memory >= service_limit: logger.debug(f"Service {service_name} at capacity ({service_memory}/{service_limit}MB)") return None # Get next item from priority queue (lowest score first) items = self.redis_client.zrange(self.upload_queue_key, 0, 0, withscores=True) if not items: return None file_data_json, score = items[0] file_data = json.loads(file_data_json) queued_file = QueuedFile(**file_data) # Check if this file would exceed service memory limit if service_memory + queued_file.memory_estimate_mb > service_limit: # Skip this file for now, try smaller ones later logger.debug(f"File {queued_file.file_id} too large for {service_name} capacity") return None # Remove from queue self.redis_client.zrem(self.upload_queue_key, file_data_json) # Update tracking self._update_user_quota(queued_file.user_id, queued_file.memory_estimate_mb, increment=False) self._update_service_memory(service_name, queued_file.memory_estimate_mb, increment=True) logger.info(f"🎯 Dequeued file {queued_file.file_id} for {service_name} processing") return queued_file def complete_processing(self, service_name: str, file_id: str, memory_used_mb: float): """Mark file processing as complete and free memory.""" self._update_service_memory(service_name, memory_used_mb, increment=False) logger.info(f"✅ Completed processing {file_id} in {service_name} (freed {memory_used_mb}MB)") def _get_current_memory_usage(self) -> float: """Get current total memory usage across all services.""" services = ['docling', 'tika', 'llm', 'document_analysis'] total = 0 for service in services: service_key = f"{self.processing_memory_key}:{service}" memory = float(self.redis_client.get(service_key) or 0) total += memory return total def _get_user_memory_usage(self, user_id: str) -> float: """Get current memory usage for a specific user.""" user_key = f"{self.user_quota_key}:{user_id}" return float(self.redis_client.get(user_key) or 0) def _get_service_memory_usage(self, service_name: str) -> float: """Get current memory usage for a service.""" service_key = f"{self.processing_memory_key}:{service_name}" return float(self.redis_client.get(service_key) or 0) def _get_service_memory_limit(self, service_name: str) -> float: """Get memory limit for a service.""" # Service-specific memory limits as percentage of total limits = { 'docling': 0.4, # 40% for Docling (memory-intensive) 'tika': 0.2, # 20% for Tika 'llm': 0.3, # 30% for LLM processing 'document_analysis': 0.1 # 10% for document analysis } percentage = limits.get(service_name, 0.1) return self.config.max_total_memory_mb * percentage def _update_user_quota(self, user_id: str, memory_mb: float, increment: bool): """Update user memory quota tracking.""" user_key = f"{self.user_quota_key}:{user_id}" if increment: self.redis_client.incrbyfloat(user_key, memory_mb) else: current = float(self.redis_client.get(user_key) or 0) new_value = max(0, current - memory_mb) self.redis_client.set(user_key, new_value) # Set expiration for cleanup self.redis_client.expire(user_key, 86400) # 24 hours def _update_service_memory(self, service_name: str, memory_mb: float, increment: bool): """Update service memory usage tracking.""" service_key = f"{self.processing_memory_key}:{service_name}" if increment: self.redis_client.incrbyfloat(service_key, memory_mb) else: current = float(self.redis_client.get(service_key) or 0) new_value = max(0, current - memory_mb) self.redis_client.set(service_key, new_value) # Set expiration for cleanup self.redis_client.expire(service_key, 3600) # 1 hour def _get_queue_position(self, file_id: str) -> int: """Get position of file in queue.""" items = self.redis_client.zrange(self.upload_queue_key, 0, -1) for i, item in enumerate(items): file_data = json.loads(item) if file_data['file_id'] == file_id: return i + 1 return 0 def _estimate_wait_time(self, user_id: str) -> int: """Estimate wait time for user's next file.""" # Simple estimation based on queue position and average processing time queue_size = self.redis_client.zcard(self.upload_queue_key) avg_processing_time = 300 # 5 minutes average return int(queue_size * avg_processing_time * 0.5) # Assume parallel processing def _get_queue_info(self) -> Dict[str, Any]: """Get comprehensive queue information.""" total_queued = self.redis_client.zcard(self.upload_queue_key) current_memory = self._get_current_memory_usage() max_memory = self.config.max_total_memory_mb return { 'total_queued': total_queued, 'memory_usage': { 'current_mb': current_memory, 'max_mb': max_memory, 'utilization': current_memory / max_memory if max_memory > 0 else 0 }, 'status': self._determine_system_status(current_memory, max_memory) } def _determine_system_status(self, current_memory: float, max_memory: float) -> str: """Determine current system status based on memory usage.""" utilization = current_memory / max_memory if max_memory > 0 else 0 if utilization >= self.config.memory_reject_threshold: return "overloaded" elif utilization >= self.config.memory_warning_threshold: return "busy" else: return "ready" def get_system_status(self) -> Dict[str, Any]: """Get comprehensive system status for monitoring.""" queue_info = self._get_queue_info() # Service-specific info services = {} for service_name in ['docling', 'tika', 'llm', 'document_analysis']: services[service_name] = { 'memory_used_mb': self._get_service_memory_usage(service_name), 'memory_limit_mb': self._get_service_memory_limit(service_name), 'utilization': self._get_service_memory_usage(service_name) / self._get_service_memory_limit(service_name) } return { 'status': queue_info['status'], 'queue': queue_info, 'services': services, 'config': asdict(self.config) } # Convenience functions def get_memory_queue(environment: str = "dev") -> MemoryAwareQueue: """Get memory-aware queue instance.""" return MemoryAwareQueue(environment) def check_upload_capacity(user_id: str, file_size: int, mime_type: str, environment: str = "dev") -> Tuple[bool, str, Dict]: """Quick capacity check for upload.""" queue = get_memory_queue(environment) return queue.check_upload_capacity(user_id, file_size, mime_type)