314 lines
10 KiB
Python
314 lines
10 KiB
Python
"""
|
|
Queue Management API
|
|
|
|
Provides endpoints for monitoring and managing the document processing queue.
|
|
"""
|
|
|
|
from fastapi import APIRouter, HTTPException, Depends
|
|
from typing import Dict, Any, List, Optional
|
|
from modules.queue_system import get_queue, TaskPriority, ServiceType
|
|
from modules.task_processors import get_processor
|
|
from modules.auth.supabase_bearer import SupabaseBearer
|
|
import os
|
|
|
|
router = APIRouter()
|
|
auth = SupabaseBearer()
|
|
|
|
@router.get("/queue/stats")
|
|
def get_queue_stats(payload: Dict[str, Any] = Depends(auth)):
|
|
"""Get comprehensive queue statistics."""
|
|
queue = get_queue()
|
|
stats = queue.get_queue_stats()
|
|
return stats
|
|
|
|
@router.get("/queue/health")
|
|
def queue_health():
|
|
"""Check queue health status."""
|
|
try:
|
|
queue = get_queue()
|
|
stats = queue.get_queue_stats()
|
|
|
|
# Basic health checks
|
|
total_processing = stats['total_processing']
|
|
total_queued = sum(stats['queues'].values())
|
|
dead_letter_count = stats['dead_letter_count']
|
|
|
|
status = "healthy"
|
|
issues = []
|
|
|
|
if dead_letter_count > 10:
|
|
issues.append(f"High dead letter count: {dead_letter_count}")
|
|
status = "degraded"
|
|
|
|
if total_processing == 0 and total_queued > 0:
|
|
issues.append("Tasks queued but no workers processing")
|
|
status = "degraded"
|
|
|
|
# Check Redis connectivity
|
|
queue.redis_client.ping()
|
|
|
|
return {
|
|
"status": status,
|
|
"total_processing": total_processing,
|
|
"total_queued": total_queued,
|
|
"dead_letter_count": dead_letter_count,
|
|
"issues": issues,
|
|
"timestamp": queue.redis_client.time()[0]
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"status": "unhealthy",
|
|
"error": str(e),
|
|
"timestamp": None
|
|
}
|
|
|
|
@router.post("/queue/workers/start")
|
|
def start_workers(
|
|
worker_count: int = 1,
|
|
services: Optional[List[str]] = None,
|
|
payload: Dict[str, Any] = Depends(auth)
|
|
):
|
|
"""Start queue workers."""
|
|
if not payload.get('role') == 'service_role': # Admin only
|
|
raise HTTPException(status_code=403, detail="Admin access required")
|
|
|
|
processor = get_processor()
|
|
|
|
# Convert service names to enums
|
|
service_enums = []
|
|
if services:
|
|
for service_name in services:
|
|
try:
|
|
service_enums.append(ServiceType(service_name))
|
|
except ValueError:
|
|
raise HTTPException(status_code=400, detail=f"Invalid service: {service_name}")
|
|
else:
|
|
service_enums = list(ServiceType)
|
|
|
|
worker_ids = []
|
|
for i in range(worker_count):
|
|
worker_id = processor.start_worker(services=service_enums)
|
|
worker_ids.append(worker_id)
|
|
|
|
return {
|
|
"message": f"Started {worker_count} workers",
|
|
"worker_ids": worker_ids,
|
|
"services": [s.value for s in service_enums]
|
|
}
|
|
|
|
@router.post("/queue/workers/stop")
|
|
def stop_workers(timeout: int = 30, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Stop all queue workers."""
|
|
if not payload.get('role') == 'service_role': # Admin only
|
|
raise HTTPException(status_code=403, detail="Admin access required")
|
|
|
|
processor = get_processor()
|
|
processor.shutdown(timeout=timeout)
|
|
|
|
return {"message": "Workers shutdown initiated"}
|
|
|
|
@router.get("/queue/tasks/{task_id}")
|
|
def get_task_status(task_id: str, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Get status of a specific task."""
|
|
queue = get_queue()
|
|
|
|
# Get task data from Redis
|
|
task_key = queue._get_task_key(task_id)
|
|
task_data = queue.redis_client.hgetall(task_key)
|
|
|
|
if not task_data:
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
|
|
return task_data
|
|
|
|
@router.get("/queue/tasks/by-file/{file_id}")
|
|
def list_tasks_by_file(file_id: str, limit: int = 200, payload: Dict[str, Any] = Depends(auth)):
|
|
"""List recent queue tasks for a given file_id (best-effort scan)."""
|
|
queue = get_queue()
|
|
results: List[Dict[str, Any]] = []
|
|
count = 0
|
|
import json
|
|
try:
|
|
for key in queue.redis_client.scan_iter(match="task:*"):
|
|
task_data = queue.redis_client.hgetall(key)
|
|
if not task_data:
|
|
continue
|
|
if task_data.get('file_id') != file_id:
|
|
continue
|
|
# Build a brief
|
|
try:
|
|
payload_raw = task_data.get('payload') or '{}'
|
|
payload_obj = json.loads(payload_raw)
|
|
except Exception:
|
|
payload_obj = {}
|
|
results.append({
|
|
'id': task_data.get('id') or key.split(':', 1)[-1],
|
|
'service': task_data.get('service'),
|
|
'task_type': task_data.get('task_type'),
|
|
'status': task_data.get('status', 'pending'),
|
|
'priority': task_data.get('priority'),
|
|
'created_at': float(task_data.get('created_at') or 0),
|
|
'scheduled_at': float(task_data.get('scheduled_at') or 0),
|
|
'depends_on': payload_obj.get('depends_on') or []
|
|
})
|
|
count += 1
|
|
if count >= max(1, int(limit)):
|
|
break
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Failed scanning tasks: {e}")
|
|
# Sort by created_at desc
|
|
results.sort(key=lambda x: x.get('created_at', 0), reverse=True)
|
|
return {
|
|
'file_id': file_id,
|
|
'count': len(results),
|
|
'tasks': results
|
|
}
|
|
|
|
@router.get("/queue/tasks/{task_id}/dependencies")
|
|
def get_task_dependencies(task_id: str, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Inspect a task's dependency state (direct depends_on only)."""
|
|
queue = get_queue()
|
|
task_key = queue._get_task_key(task_id)
|
|
task_data = queue.redis_client.hgetall(task_key)
|
|
if not task_data:
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
|
|
# Parse payload JSON stored in Redis
|
|
import json
|
|
try:
|
|
payload_raw = task_data.get('payload') or '{}'
|
|
payload_obj = json.loads(payload_raw)
|
|
except Exception:
|
|
payload_obj = {}
|
|
|
|
depends_on = payload_obj.get('depends_on') or []
|
|
if not isinstance(depends_on, list):
|
|
depends_on = []
|
|
|
|
details: List[Dict[str, Any]] = []
|
|
missing: List[str] = []
|
|
all_completed = True
|
|
for dep_id in depends_on:
|
|
if not dep_id:
|
|
continue
|
|
dep_key = queue._get_task_key(dep_id)
|
|
dep_data = queue.redis_client.hgetall(dep_key)
|
|
if not dep_data:
|
|
missing.append(dep_id)
|
|
all_completed = False
|
|
details.append({
|
|
'task_id': dep_id,
|
|
'status': 'missing'
|
|
})
|
|
continue
|
|
status = dep_data.get('status', 'pending')
|
|
if status != 'completed':
|
|
all_completed = False
|
|
details.append({
|
|
'task_id': dep_id,
|
|
'status': status,
|
|
'service': dep_data.get('service'),
|
|
'task_type': dep_data.get('task_type'),
|
|
'created_at': dep_data.get('created_at'),
|
|
'scheduled_at': dep_data.get('scheduled_at')
|
|
})
|
|
|
|
return {
|
|
'task_id': task_id,
|
|
'depends_on': depends_on,
|
|
'all_completed': all_completed,
|
|
'missing': missing,
|
|
'details': details
|
|
}
|
|
|
|
@router.delete("/queue/dead-letter/{task_id}")
|
|
def remove_dead_task(task_id: str, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Remove a task from the dead letter queue."""
|
|
if not payload.get('role') == 'service_role': # Admin only
|
|
raise HTTPException(status_code=403, detail="Admin access required")
|
|
|
|
queue = get_queue()
|
|
|
|
# Remove from dead letter queue
|
|
removed = queue.redis_client.lrem(queue.dead_letter_key, 1, task_id)
|
|
|
|
if removed == 0:
|
|
raise HTTPException(status_code=404, detail="Task not found in dead letter queue")
|
|
|
|
# Clean up task data
|
|
queue.redis_client.delete(queue._get_task_key(task_id))
|
|
|
|
return {"message": f"Removed task {task_id} from dead letter queue"}
|
|
|
|
@router.post("/queue/dead-letter/{task_id}/retry")
|
|
def retry_dead_task(task_id: str, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Retry a task from the dead letter queue."""
|
|
if not payload.get('role') == 'service_role': # Admin only
|
|
raise HTTPException(status_code=403, detail="Admin access required")
|
|
|
|
queue = get_queue()
|
|
|
|
# Get task data
|
|
task_key = queue._get_task_key(task_id)
|
|
task_data = queue.redis_client.hgetall(task_key)
|
|
|
|
if not task_data:
|
|
raise HTTPException(status_code=404, detail="Task not found")
|
|
|
|
# Remove from dead letter queue
|
|
removed = queue.redis_client.lrem(queue.dead_letter_key, 1, task_id)
|
|
if removed == 0:
|
|
raise HTTPException(status_code=404, detail="Task not found in dead letter queue")
|
|
|
|
# Reset task for retry
|
|
queue.redis_client.hset(
|
|
task_key,
|
|
mapping={
|
|
'attempts': 0,
|
|
'status': 'pending',
|
|
'scheduled_at': queue.redis_client.time()[0]
|
|
}
|
|
)
|
|
|
|
# Re-queue task
|
|
priority = TaskPriority(task_data['priority'])
|
|
queue_key = queue.queue_keys[priority]
|
|
queue.redis_client.lpush(queue_key, task_id)
|
|
|
|
return {"message": f"Task {task_id} requeued for retry"}
|
|
|
|
@router.get("/queue/metrics")
|
|
def get_queue_metrics(hours: int = 1, payload: Dict[str, Any] = Depends(auth)):
|
|
"""Get queue processing metrics over time."""
|
|
queue = get_queue()
|
|
|
|
# This is a simplified version - in production you'd want more sophisticated metrics
|
|
current_time = queue.redis_client.time()[0]
|
|
start_time = current_time - (hours * 3600)
|
|
|
|
# Scan for metric keys in the time range
|
|
metrics = {}
|
|
pattern = f"{queue.metrics_key}:*"
|
|
|
|
for key in queue.redis_client.scan_iter(match=pattern):
|
|
# Parse key format: metrics:action:service:priority:timestamp_minute
|
|
parts = key.split(':')
|
|
if len(parts) >= 5:
|
|
action = parts[1]
|
|
service = parts[2]
|
|
priority = parts[3]
|
|
timestamp_minute = int(parts[4])
|
|
|
|
if timestamp_minute >= (start_time // 60):
|
|
count = int(queue.redis_client.get(key) or 0)
|
|
|
|
metric_key = f"{action}_{service}_{priority}"
|
|
if metric_key not in metrics:
|
|
metrics[metric_key] = 0
|
|
metrics[metric_key] += count
|
|
|
|
return {
|
|
"time_range_hours": hours,
|
|
"metrics": metrics
|
|
}
|