api/routers/database/files/files_simplified.py
kcar c69451fba2
Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled
[verified] add upload size and MIME guards
(cherry picked from commit f5e05376f637f55b73e474cac8199529682ca398)
2026-06-08 01:18:39 +00:00

278 lines
10 KiB
Python

"""
Simplified Files Router
======================
Simplified version of the files router with auto-processing removed.
Keeps only essential functionality for file management and manual processing triggers.
This replaces the complex auto-processing system with simple file storage.
"""
import os
import uuid
import logging
from typing import Dict, List, Optional, Any
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks
from fastapi.responses import JSONResponse
from modules.auth.supabase_bearer import SupabaseBearer
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
from modules.database.supabase.utils.storage import StorageAdmin
from modules.upload_validation import read_upload_bytes
from modules.logger_tool import initialise_logger
router = APIRouter()
auth = SupabaseBearer()
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), 'default', True)
def _user_id_from_payload(payload: Dict[str, Any]) -> str:
user_id = payload.get('sub') or payload.get('user_id')
if not user_id:
raise HTTPException(status_code=401, detail="Invalid token payload")
return user_id
def _cabinet_visible_to_user(client: SupabaseServiceRoleClient, cabinet_id: str, user_id: str) -> bool:
"""Require cabinet ownership before service-role reads file metadata."""
owned = (
client.supabase.table('file_cabinets')
.select('id')
.eq('id', cabinet_id)
.eq('user_id', user_id)
.limit(1)
.execute()
)
return bool(owned.data)
def _choose_bucket(scope: str, user_id: str, school_id: Optional[str]) -> str:
"""Choose appropriate bucket based on scope - matches old system logic."""
scope = (scope or 'teacher').lower()
if scope == 'school' and school_id:
return f"cc.institutes.{school_id}.private"
# teacher / student fall back to users bucket for now
return 'cc.users'
@router.post("/files/upload")
async def upload_file(
cabinet_id: str = Form(...),
path: str = Form(...),
scope: str = Form(...),
file: UploadFile = File(...),
payload: Dict[str, Any] = Depends(auth)
):
"""
SIMPLIFIED file upload - no automatic processing.
Just stores the file and creates a database record.
This is the legacy endpoint maintained for backward compatibility.
"""
try:
user_id = payload.get('sub') or payload.get('user_id')
if not user_id:
raise HTTPException(status_code=401, detail="User ID required")
# Validate MIME/type and read file content with a hard size limit.
file_bytes, mime_type = await read_upload_bytes(file)
file_size = len(file_bytes)
filename = file.filename or path
logger.info(f"📤 Simplified upload: {filename} ({file_size} bytes) for user {user_id}")
# Initialize services
client = SupabaseServiceRoleClient()
storage = StorageAdmin()
# Generate file ID and storage path
file_id = str(uuid.uuid4())
# Use same bucket logic as old system for consistency
bucket = _choose_bucket('teacher', user_id, None)
storage_path = f"{cabinet_id}/{file_id}/{filename}"
# Store file in Supabase storage
try:
storage.upload_file(bucket, storage_path, file_bytes, mime_type, upsert=True)
except Exception as e:
logger.error(f"Storage upload failed for {file_id}: {e}")
raise HTTPException(status_code=500, detail=f"Storage upload failed: {str(e)}")
# Create database record
try:
insert_res = client.supabase.table('files').insert({
'id': file_id,
'name': filename,
'cabinet_id': cabinet_id,
'bucket': bucket,
'path': storage_path,
'mime_type': mime_type,
'uploaded_by': user_id,
'size_bytes': file_size,
'source': 'classroomcopilot-web',
'is_directory': False,
'processing_status': 'uploaded', # No auto-processing
'relative_path': filename
}).execute()
if not insert_res.data:
# Clean up storage on DB failure
try:
storage.delete_file(bucket, storage_path)
except:
pass
raise HTTPException(status_code=500, detail="Failed to create file record")
file_record = insert_res.data[0]
except Exception as e:
logger.error(f"Database insert failed for {file_id}: {e}")
# Clean up storage
try:
storage.delete_file(bucket, storage_path)
except:
pass
raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
logger.info(f"✅ Simplified upload completed: {file_id}")
return {
'status': 'success',
'message': 'File uploaded successfully (no auto-processing)',
'file': file_record,
'auto_processing_disabled': True,
'next_steps': 'Use manual processing endpoints if needed'
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Upload error: {e}")
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
@router.get("/files")
def list_files(cabinet_id: str, payload: Dict[str, Any] = Depends(auth)):
"""List files in a cabinet."""
user_id = _user_id_from_payload(payload)
client = SupabaseServiceRoleClient()
if not _cabinet_visible_to_user(client, cabinet_id, user_id):
return []
res = client.supabase.table('files').select('*').eq('cabinet_id', cabinet_id).execute()
return res.data
@router.get("/files/{file_id}")
def get_file(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""Get file details."""
client = SupabaseServiceRoleClient()
res = client.supabase.table('files').select('*').eq('id', file_id).single().execute()
if not res.data:
raise HTTPException(status_code=404, detail="File not found")
return res.data
@router.delete("/files/{file_id}")
def delete_file(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""Delete a file."""
client = SupabaseServiceRoleClient()
storage = StorageAdmin()
# Get file info first
res = client.supabase.table('files').select('*').eq('id', file_id).single().execute()
if not res.data:
raise HTTPException(status_code=404, detail="File not found")
file_data = res.data
# Delete from storage
try:
storage.delete_file(file_data['bucket'], file_data['path'])
except Exception as e:
logger.warning(f"Failed to delete file from storage: {e}")
# Delete from database
delete_res = client.supabase.table('files').delete().eq('id', file_id).execute()
logger.info(f"🗑️ Deleted file: {file_id}")
return {
'status': 'success',
'message': 'File deleted successfully'
}
@router.post("/files/{file_id}/process-manual")
async def trigger_manual_processing(
file_id: str,
processing_type: str = Form('basic'), # basic, advanced, custom
payload: Dict[str, Any] = Depends(auth)
):
"""
Trigger manual processing for a file.
This is where users can manually start processing when they want it.
"""
# TODO: Implement manual processing triggers
# This would call the archived processing logic when the user explicitly requests it
logger.info(f"🔧 Manual processing requested for file {file_id} (type: {processing_type})")
return {
'status': 'accepted',
'message': f'Manual processing queued for file {file_id}',
'processing_type': processing_type,
'note': 'Manual processing not yet implemented - will use archived auto-processing logic'
}
@router.get("/files/{file_id}/status")
def get_processing_status(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""Get processing status for a file."""
client = SupabaseServiceRoleClient()
res = client.supabase.table('files').select('processing_status, error_message, extra').eq('id', file_id).single().execute()
if not res.data:
raise HTTPException(status_code=404, detail="File not found")
return {
'file_id': file_id,
'status': res.data.get('processing_status', 'unknown'),
'error': res.data.get('error_message'),
'details': res.data.get('extra', {})
}
# Keep existing artefacts endpoints for backward compatibility
@router.get("/files/{file_id}/artefacts")
def list_file_artefacts(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""List artefacts for a file."""
client = SupabaseServiceRoleClient()
res = client.supabase.table('document_artefacts').select('*').eq('file_id', file_id).execute()
return res.data or []
@router.get("/files/{file_id}/viewer-artefacts")
def list_viewer_artefacts(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""List artefacts organized for the viewer."""
client = SupabaseServiceRoleClient()
# Get all artefacts
res = client.supabase.table('document_artefacts').select('*').eq('file_id', file_id).execute()
artefacts = res.data or []
# Simple organization - no complex bundle logic
organized = {
'document_analysis': [],
'processing_bundles': [],
'raw_data': []
}
for artefact in artefacts:
artefact_type = artefact.get('type', '')
if 'analysis' in artefact_type.lower():
organized['document_analysis'].append(artefact)
elif any(bundle_type in artefact_type for bundle_type in ['docling', 'bundle']):
organized['processing_bundles'].append(artefact)
else:
organized['raw_data'].append(artefact)
return organized