412 lines
16 KiB
Python
412 lines
16 KiB
Python
"""
|
|
Memory-Aware Queue Management System
|
|
====================================
|
|
|
|
Provides intelligent queue management based on memory usage and file sizes
|
|
rather than simple task count limits. Supports multiple users with fair
|
|
queuing and capacity management.
|
|
|
|
Features:
|
|
- Memory-based queue limits (not just task count)
|
|
- Fair queuing across multiple users
|
|
- Upload capacity checking with user feedback
|
|
- Graceful degradation under load
|
|
- Service-specific memory tracking
|
|
"""
|
|
|
|
import os
|
|
import time
|
|
import json
|
|
import uuid
|
|
import logging
|
|
from typing import Dict, List, Optional, Any, Tuple
|
|
from dataclasses import dataclass, asdict
|
|
from enum import Enum
|
|
import redis
|
|
from .redis_manager import get_redis_manager
|
|
import psutil
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class QueueStatus(Enum):
|
|
ACCEPTING = "accepting" # Normal operation
|
|
BUSY = "busy" # High load, warn users
|
|
OVERLOADED = "overloaded" # Reject new uploads
|
|
MAINTENANCE = "maintenance" # Manual override
|
|
|
|
@dataclass
|
|
class MemoryConfig:
|
|
"""Memory configuration for queue management."""
|
|
max_total_memory_mb: int = 2048 # 2GB total queue memory
|
|
max_user_memory_mb: int = 512 # 512MB per user
|
|
max_file_size_mb: int = 100 # 100MB max file size
|
|
memory_warning_threshold: float = 0.8 # Warn at 80%
|
|
memory_reject_threshold: float = 0.95 # Reject at 95%
|
|
|
|
@dataclass
|
|
class QueuedFile:
|
|
"""Represents a file waiting in the queue."""
|
|
file_id: str
|
|
user_id: str
|
|
filename: str
|
|
size_bytes: int
|
|
mime_type: str
|
|
cabinet_id: str
|
|
priority: int = 1
|
|
queued_at: float = 0
|
|
estimated_processing_time: int = 300 # seconds
|
|
memory_estimate_mb: float = 0
|
|
|
|
def __post_init__(self):
|
|
if self.queued_at == 0:
|
|
self.queued_at = time.time()
|
|
|
|
# Estimate memory usage (rough heuristic)
|
|
self.memory_estimate_mb = self._estimate_memory_usage()
|
|
|
|
def _estimate_memory_usage(self) -> float:
|
|
"""Estimate memory usage for this file during processing."""
|
|
base_mb = self.size_bytes / (1024 * 1024)
|
|
|
|
# Processing multipliers based on operations
|
|
if self.mime_type == 'application/pdf':
|
|
# PDF: original + extracted text + images + thumbnails
|
|
return base_mb * 3.5
|
|
elif self.mime_type.startswith('image/'):
|
|
# Images: original + resized variants + OCR text
|
|
return base_mb * 2.5
|
|
else:
|
|
# Other docs: original + PDF conversion + processing
|
|
return base_mb * 4.0
|
|
|
|
class MemoryAwareQueue:
|
|
"""Memory-aware queue management system."""
|
|
|
|
def __init__(self, environment: str = "dev"):
|
|
self.redis_manager = get_redis_manager(environment)
|
|
self.redis_client = self.redis_manager.client
|
|
self.config = self._load_config()
|
|
|
|
# Redis keys
|
|
self.upload_queue_key = "upload_queue"
|
|
self.processing_memory_key = "processing_memory"
|
|
self.user_quota_key = "user_quotas"
|
|
self.system_status_key = "system_status"
|
|
|
|
logger.info(f"🧠 Memory-aware queue initialized (max: {self.config.max_total_memory_mb}MB)")
|
|
|
|
def _load_config(self) -> MemoryConfig:
|
|
"""Load memory configuration from environment."""
|
|
return MemoryConfig(
|
|
max_total_memory_mb=int(os.getenv('QUEUE_MAX_MEMORY_MB', '2048')),
|
|
max_user_memory_mb=int(os.getenv('QUEUE_MAX_USER_MEMORY_MB', '512')),
|
|
max_file_size_mb=int(os.getenv('MAX_FILE_SIZE_MB', '100')),
|
|
memory_warning_threshold=float(os.getenv('MEMORY_WARNING_THRESHOLD', '0.8')),
|
|
memory_reject_threshold=float(os.getenv('MEMORY_REJECT_THRESHOLD', '0.95'))
|
|
)
|
|
|
|
def check_upload_capacity(self, user_id: str, file_size_bytes: int,
|
|
mime_type: str) -> Tuple[bool, str, Dict[str, Any]]:
|
|
"""
|
|
Check if system can accept a new upload.
|
|
|
|
Returns:
|
|
(can_accept, message, queue_info)
|
|
"""
|
|
|
|
# Create temporary QueuedFile to estimate memory
|
|
temp_file = QueuedFile(
|
|
file_id="temp",
|
|
user_id=user_id,
|
|
filename="temp",
|
|
size_bytes=file_size_bytes,
|
|
mime_type=mime_type,
|
|
cabinet_id="temp"
|
|
)
|
|
|
|
file_memory_mb = temp_file.memory_estimate_mb
|
|
|
|
# Check file size limit
|
|
if file_size_bytes > (self.config.max_file_size_mb * 1024 * 1024):
|
|
return False, f"File too large (max: {self.config.max_file_size_mb}MB)", {}
|
|
|
|
# Get current memory usage
|
|
current_memory = self._get_current_memory_usage()
|
|
user_memory = self._get_user_memory_usage(user_id)
|
|
|
|
# Check user quota
|
|
if user_memory + file_memory_mb > self.config.max_user_memory_mb:
|
|
return False, f"User quota exceeded (limit: {self.config.max_user_memory_mb}MB)", {
|
|
'user_current': user_memory,
|
|
'user_limit': self.config.max_user_memory_mb
|
|
}
|
|
|
|
# Check system capacity
|
|
total_after = current_memory + file_memory_mb
|
|
max_memory = self.config.max_total_memory_mb
|
|
|
|
if total_after > (max_memory * self.config.memory_reject_threshold):
|
|
queue_info = self._get_queue_info()
|
|
return False, "System overloaded. Please try again later.", {
|
|
'current_memory': current_memory,
|
|
'max_memory': max_memory,
|
|
'utilization': current_memory / max_memory,
|
|
'queue_position': queue_info['total_queued'] + 1
|
|
}
|
|
|
|
# Calculate wait time estimate
|
|
wait_estimate = self._estimate_wait_time(user_id)
|
|
|
|
status = "ready"
|
|
message = "Upload accepted"
|
|
|
|
if total_after > (max_memory * self.config.memory_warning_threshold):
|
|
status = "busy"
|
|
message = f"System busy. Estimated wait: {wait_estimate // 60}m {wait_estimate % 60}s"
|
|
|
|
return True, message, {
|
|
'status': status,
|
|
'estimated_wait_seconds': wait_estimate,
|
|
'memory_usage': {
|
|
'current': current_memory,
|
|
'after_upload': total_after,
|
|
'limit': max_memory,
|
|
'utilization': total_after / max_memory
|
|
},
|
|
'user_quota': {
|
|
'used': user_memory,
|
|
'after_upload': user_memory + file_memory_mb,
|
|
'limit': self.config.max_user_memory_mb
|
|
}
|
|
}
|
|
|
|
def enqueue_file(self, file_id: str, user_id: str, filename: str,
|
|
size_bytes: int, mime_type: str, cabinet_id: str,
|
|
priority: int = 1) -> Dict[str, Any]:
|
|
"""
|
|
Add file to upload queue.
|
|
|
|
Returns:
|
|
Queue information including position and estimated wait time
|
|
"""
|
|
|
|
queued_file = QueuedFile(
|
|
file_id=file_id,
|
|
user_id=user_id,
|
|
filename=filename,
|
|
size_bytes=size_bytes,
|
|
mime_type=mime_type,
|
|
cabinet_id=cabinet_id,
|
|
priority=priority
|
|
)
|
|
|
|
# Serialize and add to Redis queue (priority queue: higher priority = lower score)
|
|
score = time.time() - (priority * 1000000) # Priority affects score significantly
|
|
|
|
self.redis_client.zadd(
|
|
self.upload_queue_key,
|
|
{json.dumps(asdict(queued_file)): score}
|
|
)
|
|
|
|
# Update user quota tracking
|
|
self._update_user_quota(user_id, queued_file.memory_estimate_mb, increment=True)
|
|
|
|
# Get queue position and wait estimate
|
|
position = self._get_queue_position(file_id)
|
|
wait_estimate = self._estimate_wait_time(user_id)
|
|
|
|
logger.info(f"📋 Queued file {file_id} for user {user_id} (pos: {position}, wait: {wait_estimate}s)")
|
|
|
|
return {
|
|
'queued': True,
|
|
'file_id': file_id,
|
|
'queue_position': position,
|
|
'estimated_wait_seconds': wait_estimate,
|
|
'memory_estimate_mb': queued_file.memory_estimate_mb
|
|
}
|
|
|
|
def dequeue_next_file(self, service_name: str) -> Optional[QueuedFile]:
|
|
"""
|
|
Get next file from queue for processing.
|
|
|
|
Args:
|
|
service_name: The service requesting work (for capacity management)
|
|
"""
|
|
|
|
# Check if service has capacity
|
|
service_memory = self._get_service_memory_usage(service_name)
|
|
service_limit = self._get_service_memory_limit(service_name)
|
|
|
|
if service_memory >= service_limit:
|
|
logger.debug(f"Service {service_name} at capacity ({service_memory}/{service_limit}MB)")
|
|
return None
|
|
|
|
# Get next item from priority queue (lowest score first)
|
|
items = self.redis_client.zrange(self.upload_queue_key, 0, 0, withscores=True)
|
|
|
|
if not items:
|
|
return None
|
|
|
|
file_data_json, score = items[0]
|
|
file_data = json.loads(file_data_json)
|
|
queued_file = QueuedFile(**file_data)
|
|
|
|
# Check if this file would exceed service memory limit
|
|
if service_memory + queued_file.memory_estimate_mb > service_limit:
|
|
# Skip this file for now, try smaller ones later
|
|
logger.debug(f"File {queued_file.file_id} too large for {service_name} capacity")
|
|
return None
|
|
|
|
# Remove from queue
|
|
self.redis_client.zrem(self.upload_queue_key, file_data_json)
|
|
|
|
# Update tracking
|
|
self._update_user_quota(queued_file.user_id, queued_file.memory_estimate_mb, increment=False)
|
|
self._update_service_memory(service_name, queued_file.memory_estimate_mb, increment=True)
|
|
|
|
logger.info(f"🎯 Dequeued file {queued_file.file_id} for {service_name} processing")
|
|
|
|
return queued_file
|
|
|
|
def complete_processing(self, service_name: str, file_id: str, memory_used_mb: float):
|
|
"""Mark file processing as complete and free memory."""
|
|
self._update_service_memory(service_name, memory_used_mb, increment=False)
|
|
logger.info(f"✅ Completed processing {file_id} in {service_name} (freed {memory_used_mb}MB)")
|
|
|
|
def _get_current_memory_usage(self) -> float:
|
|
"""Get current total memory usage across all services."""
|
|
services = ['docling', 'tika', 'llm', 'document_analysis']
|
|
total = 0
|
|
|
|
for service in services:
|
|
service_key = f"{self.processing_memory_key}:{service}"
|
|
memory = float(self.redis_client.get(service_key) or 0)
|
|
total += memory
|
|
|
|
return total
|
|
|
|
def _get_user_memory_usage(self, user_id: str) -> float:
|
|
"""Get current memory usage for a specific user."""
|
|
user_key = f"{self.user_quota_key}:{user_id}"
|
|
return float(self.redis_client.get(user_key) or 0)
|
|
|
|
def _get_service_memory_usage(self, service_name: str) -> float:
|
|
"""Get current memory usage for a service."""
|
|
service_key = f"{self.processing_memory_key}:{service_name}"
|
|
return float(self.redis_client.get(service_key) or 0)
|
|
|
|
def _get_service_memory_limit(self, service_name: str) -> float:
|
|
"""Get memory limit for a service."""
|
|
# Service-specific memory limits as percentage of total
|
|
limits = {
|
|
'docling': 0.4, # 40% for Docling (memory-intensive)
|
|
'tika': 0.2, # 20% for Tika
|
|
'llm': 0.3, # 30% for LLM processing
|
|
'document_analysis': 0.1 # 10% for document analysis
|
|
}
|
|
|
|
percentage = limits.get(service_name, 0.1)
|
|
return self.config.max_total_memory_mb * percentage
|
|
|
|
def _update_user_quota(self, user_id: str, memory_mb: float, increment: bool):
|
|
"""Update user memory quota tracking."""
|
|
user_key = f"{self.user_quota_key}:{user_id}"
|
|
|
|
if increment:
|
|
self.redis_client.incrbyfloat(user_key, memory_mb)
|
|
else:
|
|
current = float(self.redis_client.get(user_key) or 0)
|
|
new_value = max(0, current - memory_mb)
|
|
self.redis_client.set(user_key, new_value)
|
|
|
|
# Set expiration for cleanup
|
|
self.redis_client.expire(user_key, 86400) # 24 hours
|
|
|
|
def _update_service_memory(self, service_name: str, memory_mb: float, increment: bool):
|
|
"""Update service memory usage tracking."""
|
|
service_key = f"{self.processing_memory_key}:{service_name}"
|
|
|
|
if increment:
|
|
self.redis_client.incrbyfloat(service_key, memory_mb)
|
|
else:
|
|
current = float(self.redis_client.get(service_key) or 0)
|
|
new_value = max(0, current - memory_mb)
|
|
self.redis_client.set(service_key, new_value)
|
|
|
|
# Set expiration for cleanup
|
|
self.redis_client.expire(service_key, 3600) # 1 hour
|
|
|
|
def _get_queue_position(self, file_id: str) -> int:
|
|
"""Get position of file in queue."""
|
|
items = self.redis_client.zrange(self.upload_queue_key, 0, -1)
|
|
for i, item in enumerate(items):
|
|
file_data = json.loads(item)
|
|
if file_data['file_id'] == file_id:
|
|
return i + 1
|
|
return 0
|
|
|
|
def _estimate_wait_time(self, user_id: str) -> int:
|
|
"""Estimate wait time for user's next file."""
|
|
# Simple estimation based on queue position and average processing time
|
|
queue_size = self.redis_client.zcard(self.upload_queue_key)
|
|
avg_processing_time = 300 # 5 minutes average
|
|
|
|
return int(queue_size * avg_processing_time * 0.5) # Assume parallel processing
|
|
|
|
def _get_queue_info(self) -> Dict[str, Any]:
|
|
"""Get comprehensive queue information."""
|
|
total_queued = self.redis_client.zcard(self.upload_queue_key)
|
|
current_memory = self._get_current_memory_usage()
|
|
max_memory = self.config.max_total_memory_mb
|
|
|
|
return {
|
|
'total_queued': total_queued,
|
|
'memory_usage': {
|
|
'current_mb': current_memory,
|
|
'max_mb': max_memory,
|
|
'utilization': current_memory / max_memory if max_memory > 0 else 0
|
|
},
|
|
'status': self._determine_system_status(current_memory, max_memory)
|
|
}
|
|
|
|
def _determine_system_status(self, current_memory: float, max_memory: float) -> str:
|
|
"""Determine current system status based on memory usage."""
|
|
utilization = current_memory / max_memory if max_memory > 0 else 0
|
|
|
|
if utilization >= self.config.memory_reject_threshold:
|
|
return "overloaded"
|
|
elif utilization >= self.config.memory_warning_threshold:
|
|
return "busy"
|
|
else:
|
|
return "ready"
|
|
|
|
def get_system_status(self) -> Dict[str, Any]:
|
|
"""Get comprehensive system status for monitoring."""
|
|
queue_info = self._get_queue_info()
|
|
|
|
# Service-specific info
|
|
services = {}
|
|
for service_name in ['docling', 'tika', 'llm', 'document_analysis']:
|
|
services[service_name] = {
|
|
'memory_used_mb': self._get_service_memory_usage(service_name),
|
|
'memory_limit_mb': self._get_service_memory_limit(service_name),
|
|
'utilization': self._get_service_memory_usage(service_name) / self._get_service_memory_limit(service_name)
|
|
}
|
|
|
|
return {
|
|
'status': queue_info['status'],
|
|
'queue': queue_info,
|
|
'services': services,
|
|
'config': asdict(self.config)
|
|
}
|
|
|
|
# Convenience functions
|
|
def get_memory_queue(environment: str = "dev") -> MemoryAwareQueue:
|
|
"""Get memory-aware queue instance."""
|
|
return MemoryAwareQueue(environment)
|
|
|
|
def check_upload_capacity(user_id: str, file_size: int, mime_type: str, environment: str = "dev") -> Tuple[bool, str, Dict]:
|
|
"""Quick capacity check for upload."""
|
|
queue = get_memory_queue(environment)
|
|
return queue.check_upload_capacity(user_id, file_size, mime_type)
|