api/archive/auto_processing/enhanced_upload.py
2025-11-14 14:47:19 +00:00

143 lines
9.3 KiB
Python

"""
Enhanced Upload Router with Memory-Aware Queuing
===============================================
Provides intelligent upload endpoints with capacity checking,
queue management, and real-time status updates.
Endpoints:
- POST /upload/check-capacity - Pre-check if upload is possible
- POST /upload/with-queue - Upload with intelligent queuing
- GET /upload/status/{file_id} - Get processing status
- GET /upload/queue-status - Get overall queue status
- WebSocket /ws/upload-status - Real-time status updates
"""
import os
import uuid
import json
import logging
from typing import Dict, List, Optional, Any
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, WebSocket, WebSocketDisconnect
from fastapi.responses import JSONResponse
from modules.auth.supabase_bearer import SupabaseBearer
from modules.enhanced_upload_handler import get_upload_handler
from modules.memory_aware_queue import get_memory_queue
from modules.logger_tool import initialise_logger
router = APIRouter()
auth = SupabaseBearer()
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), 'default', True)
# WebSocket connection manager for real-time updates
class ConnectionManager:
def __init__(self):
self.active_connections: Dict[str, List[WebSocket]] = {}
async def connect(self, websocket: WebSocket, file_id: str):
await websocket.accept()
if file_id not in self.active_connections:
self.active_connections[file_id] = []
self.active_connections[file_id].append(websocket)
def disconnect(self, websocket: WebSocket, file_id: str):
if file_id in self.active_connections:
self.active_connections[file_id].remove(websocket)
if not self.active_connections[file_id]:
del self.active_connections[file_id]
async def broadcast_to_file(self, file_id: str, message: dict):
if file_id in self.active_connections:
for connection in self.active_connections[file_id].copy():
try:
await connection.send_json(message)
except:
self.active_connections[file_id].remove(connection)
manager = ConnectionManager()
@router.post("/upload/check-capacity")
async def check_upload_capacity(
file_size: int = Form(...),
mime_type: str = Form(...),
payload: Dict[str, Any] = Depends(auth)
):
"""
Check if user can upload a file of given size and type.
Returns capacity information and recommendations.
"""
try:
user_id = payload.get('sub') or payload.get('user_id', 'anonymous')
# Determine environment
environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'
upload_handler = get_upload_handler(environment)
eligible, message, details = upload_handler.check_upload_eligibility(
user_id, file_size, mime_type
)
response = {
'eligible': eligible,
'message': message,
'details': details,
'timestamp': time.time()
}
status_code = 200 if eligible else 429 # Too Many Requests if not eligible
logger.info(f"📋 Capacity check for user {user_id}: {eligible} - {message}")
return JSONResponse(content=response, status_code=status_code)
except Exception as e:
logger.error(f"Capacity check error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/upload/with-queue")
async def upload_with_queue(
cabinet_id: str = Form(...),
path: str = Form(...),
scope: str = Form(...),
priority: int = Form(1),
file: UploadFile = File(...),
payload: Dict[str, Any] = Depends(auth)
):
"""
Upload file with intelligent queuing and capacity management.
Returns queue information and processing status.
"""
try:
user_id = payload.get('sub') or payload.get('user_id', 'anonymous')
# Read file content
file_bytes = await file.read()
file_size = len(file_bytes)
mime_type = file.content_type or 'application/octet-stream'
filename = file.filename or path
logger.info(f"📤 Upload request: {filename} ({file_size} bytes) for user {user_id}")
# Determine environment
environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'
upload_handler = get_upload_handler(environment)
# Check if upload queuing is enabled
if os.getenv('UPLOAD_QUEUE_ENABLED', 'true').lower() == 'true':
# Use new queue-based upload
file_id = str(uuid.uuid4())
result = await upload_handler.handle_upload_with_queue(
file_id=file_id,
user_id=user_id,
filename=filename,
file_bytes=file_bytes,
mime_type=mime_type,
cabinet_id=cabinet_id,
priority=priority
)\n \n return result\n \n else:\n # Fall back to immediate processing (legacy mode)\n logger.warning(\"Using legacy immediate processing mode\")\n # TODO: Call original upload_file function\n raise HTTPException(status_code=501, detail=\"Legacy mode not implemented in this endpoint\")\n \n except HTTPException:\n raise\n except Exception as e:\n logger.error(f\"Upload error: {e}\")\n raise HTTPException(status_code=500, detail=str(e))\n\n@router.get(\"/upload/status/{file_id}\")\nasync def get_upload_status(\n file_id: str,\n payload: Dict[str, Any] = Depends(auth)\n):\n \"\"\"\n Get current processing status for an uploaded file.\n \"\"\"\n try:\n # Determine environment\n environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'\n upload_handler = get_upload_handler(environment)\n \n status = upload_handler.get_processing_status(file_id)\n \n if status.get('status') == 'not_found':\n raise HTTPException(status_code=404, detail=\"File not found\")\n \n return status\n \n except HTTPException:\n raise\n except Exception as e:\n logger.error(f\"Status check error for {file_id}: {e}\")\n raise HTTPException(status_code=500, detail=str(e))\n\n@router.get(\"/upload/queue-status\")\nasync def get_queue_status(\n payload: Dict[str, Any] = Depends(auth)\n):\n \"\"\"\n Get overall queue status and system capacity information.\n \"\"\"\n try:\n # Determine environment\n environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'\n memory_queue = get_memory_queue(environment)\n \n system_status = memory_queue.get_system_status()\n \n return {\n 'system_status': system_status,\n 'timestamp': time.time(),\n 'environment': environment\n }\n \n except Exception as e:\n logger.error(f\"Queue status error: {e}\")\n raise HTTPException(status_code=500, detail=str(e))\n\n@router.websocket(\"/ws/upload-status/{file_id}\")\nasync def websocket_upload_status(websocket: WebSocket, file_id: str):\n \"\"\"\n WebSocket endpoint for real-time upload status updates.\n \"\"\"\n await manager.connect(websocket, file_id)\n \n try:\n # Send initial status\n environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'\n upload_handler = get_upload_handler(environment)\n initial_status = upload_handler.get_processing_status(file_id)\n \n await websocket.send_json({\n 'type': 'status_update',\n 'data': initial_status\n })\n \n # Keep connection alive and listen for updates\n while True:\n # In a real implementation, you'd have a background task\n # that pushes updates when file status changes\n await asyncio.sleep(5)\n \n # Check for status updates\n current_status = upload_handler.get_processing_status(file_id)\n await websocket.send_json({\n 'type': 'status_update', \n 'data': current_status\n })\n \n except WebSocketDisconnect:\n manager.disconnect(websocket, file_id)\n except Exception as e:\n logger.error(f\"WebSocket error for {file_id}: {e}\")\n manager.disconnect(websocket, file_id)\n\n# Background task to process upload queue\n@router.on_event(\"startup\")\nasync def start_queue_processor():\n \"\"\"Start background queue processor.\"\"\"\n \n if os.getenv('UPLOAD_QUEUE_ENABLED', 'true').lower() != 'true':\n logger.info(\"📋 Upload queue disabled, skipping queue processor\")\n return\n \n import asyncio\n \n environment = 'dev' if os.getenv('BACKEND_DEV_MODE', 'true').lower() == 'true' else 'prod'\n upload_handler = get_upload_handler(environment)\n \n # Start background processor\n asyncio.create_task(upload_handler.process_queued_files(\"document_processor\"))\n \n logger.info(\"🚀 Upload queue processor started\")\n\nimport time\nimport asyncio"