""" Queue Management API Provides endpoints for monitoring and managing the document processing queue. """ from fastapi import APIRouter, HTTPException, Depends from typing import Dict, Any, List, Optional from modules.queue_system import get_queue, TaskPriority, ServiceType from modules.task_processors import get_processor from modules.auth.supabase_bearer import SupabaseBearer import os router = APIRouter() auth = SupabaseBearer() @router.get("/queue/stats") def get_queue_stats(payload: Dict[str, Any] = Depends(auth)): """Get comprehensive queue statistics.""" queue = get_queue() stats = queue.get_queue_stats() return stats @router.get("/queue/health") def queue_health(): """Check queue health status.""" try: queue = get_queue() stats = queue.get_queue_stats() # Basic health checks total_processing = stats['total_processing'] total_queued = sum(stats['queues'].values()) dead_letter_count = stats['dead_letter_count'] status = "healthy" issues = [] if dead_letter_count > 10: issues.append(f"High dead letter count: {dead_letter_count}") status = "degraded" if total_processing == 0 and total_queued > 0: issues.append("Tasks queued but no workers processing") status = "degraded" # Check Redis connectivity queue.redis_client.ping() return { "status": status, "total_processing": total_processing, "total_queued": total_queued, "dead_letter_count": dead_letter_count, "issues": issues, "timestamp": queue.redis_client.time()[0] } except Exception as e: return { "status": "unhealthy", "error": str(e), "timestamp": None } @router.post("/queue/workers/start") def start_workers( worker_count: int = 1, services: Optional[List[str]] = None, payload: Dict[str, Any] = Depends(auth) ): """Start queue workers.""" if not payload.get('role') == 'service_role': # Admin only raise HTTPException(status_code=403, detail="Admin access required") processor = get_processor() # Convert service names to enums service_enums = [] if services: for service_name in services: try: service_enums.append(ServiceType(service_name)) except ValueError: raise HTTPException(status_code=400, detail=f"Invalid service: {service_name}") else: service_enums = list(ServiceType) worker_ids = [] for i in range(worker_count): worker_id = processor.start_worker(services=service_enums) worker_ids.append(worker_id) return { "message": f"Started {worker_count} workers", "worker_ids": worker_ids, "services": [s.value for s in service_enums] } @router.post("/queue/workers/stop") def stop_workers(timeout: int = 30, payload: Dict[str, Any] = Depends(auth)): """Stop all queue workers.""" if not payload.get('role') == 'service_role': # Admin only raise HTTPException(status_code=403, detail="Admin access required") processor = get_processor() processor.shutdown(timeout=timeout) return {"message": "Workers shutdown initiated"} @router.get("/queue/tasks/{task_id}") def get_task_status(task_id: str, payload: Dict[str, Any] = Depends(auth)): """Get status of a specific task.""" queue = get_queue() # Get task data from Redis task_key = queue._get_task_key(task_id) task_data = queue.redis_client.hgetall(task_key) if not task_data: raise HTTPException(status_code=404, detail="Task not found") return task_data @router.get("/queue/tasks/by-file/{file_id}") def list_tasks_by_file(file_id: str, limit: int = 200, payload: Dict[str, Any] = Depends(auth)): """List recent queue tasks for a given file_id (best-effort scan).""" queue = get_queue() results: List[Dict[str, Any]] = [] count = 0 import json try: for key in queue.redis_client.scan_iter(match="task:*"): task_data = queue.redis_client.hgetall(key) if not task_data: continue if task_data.get('file_id') != file_id: continue # Build a brief try: payload_raw = task_data.get('payload') or '{}' payload_obj = json.loads(payload_raw) except Exception: payload_obj = {} results.append({ 'id': task_data.get('id') or key.split(':', 1)[-1], 'service': task_data.get('service'), 'task_type': task_data.get('task_type'), 'status': task_data.get('status', 'pending'), 'priority': task_data.get('priority'), 'created_at': float(task_data.get('created_at') or 0), 'scheduled_at': float(task_data.get('scheduled_at') or 0), 'depends_on': payload_obj.get('depends_on') or [] }) count += 1 if count >= max(1, int(limit)): break except Exception as e: raise HTTPException(status_code=500, detail=f"Failed scanning tasks: {e}") # Sort by created_at desc results.sort(key=lambda x: x.get('created_at', 0), reverse=True) return { 'file_id': file_id, 'count': len(results), 'tasks': results } @router.get("/queue/tasks/{task_id}/dependencies") def get_task_dependencies(task_id: str, payload: Dict[str, Any] = Depends(auth)): """Inspect a task's dependency state (direct depends_on only).""" queue = get_queue() task_key = queue._get_task_key(task_id) task_data = queue.redis_client.hgetall(task_key) if not task_data: raise HTTPException(status_code=404, detail="Task not found") # Parse payload JSON stored in Redis import json try: payload_raw = task_data.get('payload') or '{}' payload_obj = json.loads(payload_raw) except Exception: payload_obj = {} depends_on = payload_obj.get('depends_on') or [] if not isinstance(depends_on, list): depends_on = [] details: List[Dict[str, Any]] = [] missing: List[str] = [] all_completed = True for dep_id in depends_on: if not dep_id: continue dep_key = queue._get_task_key(dep_id) dep_data = queue.redis_client.hgetall(dep_key) if not dep_data: missing.append(dep_id) all_completed = False details.append({ 'task_id': dep_id, 'status': 'missing' }) continue status = dep_data.get('status', 'pending') if status != 'completed': all_completed = False details.append({ 'task_id': dep_id, 'status': status, 'service': dep_data.get('service'), 'task_type': dep_data.get('task_type'), 'created_at': dep_data.get('created_at'), 'scheduled_at': dep_data.get('scheduled_at') }) return { 'task_id': task_id, 'depends_on': depends_on, 'all_completed': all_completed, 'missing': missing, 'details': details } @router.delete("/queue/dead-letter/{task_id}") def remove_dead_task(task_id: str, payload: Dict[str, Any] = Depends(auth)): """Remove a task from the dead letter queue.""" if not payload.get('role') == 'service_role': # Admin only raise HTTPException(status_code=403, detail="Admin access required") queue = get_queue() # Remove from dead letter queue removed = queue.redis_client.lrem(queue.dead_letter_key, 1, task_id) if removed == 0: raise HTTPException(status_code=404, detail="Task not found in dead letter queue") # Clean up task data queue.redis_client.delete(queue._get_task_key(task_id)) return {"message": f"Removed task {task_id} from dead letter queue"} @router.post("/queue/dead-letter/{task_id}/retry") def retry_dead_task(task_id: str, payload: Dict[str, Any] = Depends(auth)): """Retry a task from the dead letter queue.""" if not payload.get('role') == 'service_role': # Admin only raise HTTPException(status_code=403, detail="Admin access required") queue = get_queue() # Get task data task_key = queue._get_task_key(task_id) task_data = queue.redis_client.hgetall(task_key) if not task_data: raise HTTPException(status_code=404, detail="Task not found") # Remove from dead letter queue removed = queue.redis_client.lrem(queue.dead_letter_key, 1, task_id) if removed == 0: raise HTTPException(status_code=404, detail="Task not found in dead letter queue") # Reset task for retry queue.redis_client.hset( task_key, mapping={ 'attempts': 0, 'status': 'pending', 'scheduled_at': queue.redis_client.time()[0] } ) # Re-queue task priority = TaskPriority(task_data['priority']) queue_key = queue.queue_keys[priority] queue.redis_client.lpush(queue_key, task_id) return {"message": f"Task {task_id} requeued for retry"} @router.get("/queue/metrics") def get_queue_metrics(hours: int = 1, payload: Dict[str, Any] = Depends(auth)): """Get queue processing metrics over time.""" queue = get_queue() # This is a simplified version - in production you'd want more sophisticated metrics current_time = queue.redis_client.time()[0] start_time = current_time - (hours * 3600) # Scan for metric keys in the time range metrics = {} pattern = f"{queue.metrics_key}:*" for key in queue.redis_client.scan_iter(match=pattern): # Parse key format: metrics:action:service:priority:timestamp_minute parts = key.split(':') if len(parts) >= 5: action = parts[1] service = parts[2] priority = parts[3] timestamp_minute = int(parts[4]) if timestamp_minute >= (start_time // 60): count = int(queue.redis_client.get(key) or 0) metric_key = f"{action}_{service}_{priority}" if metric_key not in metrics: metrics[metric_key] = 0 metrics[metric_key] += count return { "time_range_hours": hours, "metrics": metrics }