api/archive/auto_processing/memory_aware_queue.py
2025-11-14 14:47:19 +00:00

412 lines
16 KiB
Python

"""
Memory-Aware Queue Management System
====================================
Provides intelligent queue management based on memory usage and file sizes
rather than simple task count limits. Supports multiple users with fair
queuing and capacity management.
Features:
- Memory-based queue limits (not just task count)
- Fair queuing across multiple users
- Upload capacity checking with user feedback
- Graceful degradation under load
- Service-specific memory tracking
"""
import os
import time
import json
import uuid
import logging
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from enum import Enum
import redis
from .redis_manager import get_redis_manager
import psutil
logger = logging.getLogger(__name__)
class QueueStatus(Enum):
ACCEPTING = "accepting" # Normal operation
BUSY = "busy" # High load, warn users
OVERLOADED = "overloaded" # Reject new uploads
MAINTENANCE = "maintenance" # Manual override
@dataclass
class MemoryConfig:
"""Memory configuration for queue management."""
max_total_memory_mb: int = 2048 # 2GB total queue memory
max_user_memory_mb: int = 512 # 512MB per user
max_file_size_mb: int = 100 # 100MB max file size
memory_warning_threshold: float = 0.8 # Warn at 80%
memory_reject_threshold: float = 0.95 # Reject at 95%
@dataclass
class QueuedFile:
"""Represents a file waiting in the queue."""
file_id: str
user_id: str
filename: str
size_bytes: int
mime_type: str
cabinet_id: str
priority: int = 1
queued_at: float = 0
estimated_processing_time: int = 300 # seconds
memory_estimate_mb: float = 0
def __post_init__(self):
if self.queued_at == 0:
self.queued_at = time.time()
# Estimate memory usage (rough heuristic)
self.memory_estimate_mb = self._estimate_memory_usage()
def _estimate_memory_usage(self) -> float:
"""Estimate memory usage for this file during processing."""
base_mb = self.size_bytes / (1024 * 1024)
# Processing multipliers based on operations
if self.mime_type == 'application/pdf':
# PDF: original + extracted text + images + thumbnails
return base_mb * 3.5
elif self.mime_type.startswith('image/'):
# Images: original + resized variants + OCR text
return base_mb * 2.5
else:
# Other docs: original + PDF conversion + processing
return base_mb * 4.0
class MemoryAwareQueue:
"""Memory-aware queue management system."""
def __init__(self, environment: str = "dev"):
self.redis_manager = get_redis_manager(environment)
self.redis_client = self.redis_manager.client
self.config = self._load_config()
# Redis keys
self.upload_queue_key = "upload_queue"
self.processing_memory_key = "processing_memory"
self.user_quota_key = "user_quotas"
self.system_status_key = "system_status"
logger.info(f"🧠 Memory-aware queue initialized (max: {self.config.max_total_memory_mb}MB)")
def _load_config(self) -> MemoryConfig:
"""Load memory configuration from environment."""
return MemoryConfig(
max_total_memory_mb=int(os.getenv('QUEUE_MAX_MEMORY_MB', '2048')),
max_user_memory_mb=int(os.getenv('QUEUE_MAX_USER_MEMORY_MB', '512')),
max_file_size_mb=int(os.getenv('MAX_FILE_SIZE_MB', '100')),
memory_warning_threshold=float(os.getenv('MEMORY_WARNING_THRESHOLD', '0.8')),
memory_reject_threshold=float(os.getenv('MEMORY_REJECT_THRESHOLD', '0.95'))
)
def check_upload_capacity(self, user_id: str, file_size_bytes: int,
mime_type: str) -> Tuple[bool, str, Dict[str, Any]]:
"""
Check if system can accept a new upload.
Returns:
(can_accept, message, queue_info)
"""
# Create temporary QueuedFile to estimate memory
temp_file = QueuedFile(
file_id="temp",
user_id=user_id,
filename="temp",
size_bytes=file_size_bytes,
mime_type=mime_type,
cabinet_id="temp"
)
file_memory_mb = temp_file.memory_estimate_mb
# Check file size limit
if file_size_bytes > (self.config.max_file_size_mb * 1024 * 1024):
return False, f"File too large (max: {self.config.max_file_size_mb}MB)", {}
# Get current memory usage
current_memory = self._get_current_memory_usage()
user_memory = self._get_user_memory_usage(user_id)
# Check user quota
if user_memory + file_memory_mb > self.config.max_user_memory_mb:
return False, f"User quota exceeded (limit: {self.config.max_user_memory_mb}MB)", {
'user_current': user_memory,
'user_limit': self.config.max_user_memory_mb
}
# Check system capacity
total_after = current_memory + file_memory_mb
max_memory = self.config.max_total_memory_mb
if total_after > (max_memory * self.config.memory_reject_threshold):
queue_info = self._get_queue_info()
return False, "System overloaded. Please try again later.", {
'current_memory': current_memory,
'max_memory': max_memory,
'utilization': current_memory / max_memory,
'queue_position': queue_info['total_queued'] + 1
}
# Calculate wait time estimate
wait_estimate = self._estimate_wait_time(user_id)
status = "ready"
message = "Upload accepted"
if total_after > (max_memory * self.config.memory_warning_threshold):
status = "busy"
message = f"System busy. Estimated wait: {wait_estimate // 60}m {wait_estimate % 60}s"
return True, message, {
'status': status,
'estimated_wait_seconds': wait_estimate,
'memory_usage': {
'current': current_memory,
'after_upload': total_after,
'limit': max_memory,
'utilization': total_after / max_memory
},
'user_quota': {
'used': user_memory,
'after_upload': user_memory + file_memory_mb,
'limit': self.config.max_user_memory_mb
}
}
def enqueue_file(self, file_id: str, user_id: str, filename: str,
size_bytes: int, mime_type: str, cabinet_id: str,
priority: int = 1) -> Dict[str, Any]:
"""
Add file to upload queue.
Returns:
Queue information including position and estimated wait time
"""
queued_file = QueuedFile(
file_id=file_id,
user_id=user_id,
filename=filename,
size_bytes=size_bytes,
mime_type=mime_type,
cabinet_id=cabinet_id,
priority=priority
)
# Serialize and add to Redis queue (priority queue: higher priority = lower score)
score = time.time() - (priority * 1000000) # Priority affects score significantly
self.redis_client.zadd(
self.upload_queue_key,
{json.dumps(asdict(queued_file)): score}
)
# Update user quota tracking
self._update_user_quota(user_id, queued_file.memory_estimate_mb, increment=True)
# Get queue position and wait estimate
position = self._get_queue_position(file_id)
wait_estimate = self._estimate_wait_time(user_id)
logger.info(f"📋 Queued file {file_id} for user {user_id} (pos: {position}, wait: {wait_estimate}s)")
return {
'queued': True,
'file_id': file_id,
'queue_position': position,
'estimated_wait_seconds': wait_estimate,
'memory_estimate_mb': queued_file.memory_estimate_mb
}
def dequeue_next_file(self, service_name: str) -> Optional[QueuedFile]:
"""
Get next file from queue for processing.
Args:
service_name: The service requesting work (for capacity management)
"""
# Check if service has capacity
service_memory = self._get_service_memory_usage(service_name)
service_limit = self._get_service_memory_limit(service_name)
if service_memory >= service_limit:
logger.debug(f"Service {service_name} at capacity ({service_memory}/{service_limit}MB)")
return None
# Get next item from priority queue (lowest score first)
items = self.redis_client.zrange(self.upload_queue_key, 0, 0, withscores=True)
if not items:
return None
file_data_json, score = items[0]
file_data = json.loads(file_data_json)
queued_file = QueuedFile(**file_data)
# Check if this file would exceed service memory limit
if service_memory + queued_file.memory_estimate_mb > service_limit:
# Skip this file for now, try smaller ones later
logger.debug(f"File {queued_file.file_id} too large for {service_name} capacity")
return None
# Remove from queue
self.redis_client.zrem(self.upload_queue_key, file_data_json)
# Update tracking
self._update_user_quota(queued_file.user_id, queued_file.memory_estimate_mb, increment=False)
self._update_service_memory(service_name, queued_file.memory_estimate_mb, increment=True)
logger.info(f"🎯 Dequeued file {queued_file.file_id} for {service_name} processing")
return queued_file
def complete_processing(self, service_name: str, file_id: str, memory_used_mb: float):
"""Mark file processing as complete and free memory."""
self._update_service_memory(service_name, memory_used_mb, increment=False)
logger.info(f"✅ Completed processing {file_id} in {service_name} (freed {memory_used_mb}MB)")
def _get_current_memory_usage(self) -> float:
"""Get current total memory usage across all services."""
services = ['docling', 'tika', 'llm', 'document_analysis']
total = 0
for service in services:
service_key = f"{self.processing_memory_key}:{service}"
memory = float(self.redis_client.get(service_key) or 0)
total += memory
return total
def _get_user_memory_usage(self, user_id: str) -> float:
"""Get current memory usage for a specific user."""
user_key = f"{self.user_quota_key}:{user_id}"
return float(self.redis_client.get(user_key) or 0)
def _get_service_memory_usage(self, service_name: str) -> float:
"""Get current memory usage for a service."""
service_key = f"{self.processing_memory_key}:{service_name}"
return float(self.redis_client.get(service_key) or 0)
def _get_service_memory_limit(self, service_name: str) -> float:
"""Get memory limit for a service."""
# Service-specific memory limits as percentage of total
limits = {
'docling': 0.4, # 40% for Docling (memory-intensive)
'tika': 0.2, # 20% for Tika
'llm': 0.3, # 30% for LLM processing
'document_analysis': 0.1 # 10% for document analysis
}
percentage = limits.get(service_name, 0.1)
return self.config.max_total_memory_mb * percentage
def _update_user_quota(self, user_id: str, memory_mb: float, increment: bool):
"""Update user memory quota tracking."""
user_key = f"{self.user_quota_key}:{user_id}"
if increment:
self.redis_client.incrbyfloat(user_key, memory_mb)
else:
current = float(self.redis_client.get(user_key) or 0)
new_value = max(0, current - memory_mb)
self.redis_client.set(user_key, new_value)
# Set expiration for cleanup
self.redis_client.expire(user_key, 86400) # 24 hours
def _update_service_memory(self, service_name: str, memory_mb: float, increment: bool):
"""Update service memory usage tracking."""
service_key = f"{self.processing_memory_key}:{service_name}"
if increment:
self.redis_client.incrbyfloat(service_key, memory_mb)
else:
current = float(self.redis_client.get(service_key) or 0)
new_value = max(0, current - memory_mb)
self.redis_client.set(service_key, new_value)
# Set expiration for cleanup
self.redis_client.expire(service_key, 3600) # 1 hour
def _get_queue_position(self, file_id: str) -> int:
"""Get position of file in queue."""
items = self.redis_client.zrange(self.upload_queue_key, 0, -1)
for i, item in enumerate(items):
file_data = json.loads(item)
if file_data['file_id'] == file_id:
return i + 1
return 0
def _estimate_wait_time(self, user_id: str) -> int:
"""Estimate wait time for user's next file."""
# Simple estimation based on queue position and average processing time
queue_size = self.redis_client.zcard(self.upload_queue_key)
avg_processing_time = 300 # 5 minutes average
return int(queue_size * avg_processing_time * 0.5) # Assume parallel processing
def _get_queue_info(self) -> Dict[str, Any]:
"""Get comprehensive queue information."""
total_queued = self.redis_client.zcard(self.upload_queue_key)
current_memory = self._get_current_memory_usage()
max_memory = self.config.max_total_memory_mb
return {
'total_queued': total_queued,
'memory_usage': {
'current_mb': current_memory,
'max_mb': max_memory,
'utilization': current_memory / max_memory if max_memory > 0 else 0
},
'status': self._determine_system_status(current_memory, max_memory)
}
def _determine_system_status(self, current_memory: float, max_memory: float) -> str:
"""Determine current system status based on memory usage."""
utilization = current_memory / max_memory if max_memory > 0 else 0
if utilization >= self.config.memory_reject_threshold:
return "overloaded"
elif utilization >= self.config.memory_warning_threshold:
return "busy"
else:
return "ready"
def get_system_status(self) -> Dict[str, Any]:
"""Get comprehensive system status for monitoring."""
queue_info = self._get_queue_info()
# Service-specific info
services = {}
for service_name in ['docling', 'tika', 'llm', 'document_analysis']:
services[service_name] = {
'memory_used_mb': self._get_service_memory_usage(service_name),
'memory_limit_mb': self._get_service_memory_limit(service_name),
'utilization': self._get_service_memory_usage(service_name) / self._get_service_memory_limit(service_name)
}
return {
'status': queue_info['status'],
'queue': queue_info,
'services': services,
'config': asdict(self.config)
}
# Convenience functions
def get_memory_queue(environment: str = "dev") -> MemoryAwareQueue:
"""Get memory-aware queue instance."""
return MemoryAwareQueue(environment)
def check_upload_capacity(user_id: str, file_size: int, mime_type: str, environment: str = "dev") -> Tuple[bool, str, Dict]:
"""Quick capacity check for upload."""
queue = get_memory_queue(environment)
return queue.check_upload_capacity(user_id, file_size, mime_type)