[verified] add upload size and MIME guards
Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled
Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled
(cherry picked from commit f5e05376f637f55b73e474cac8199529682ca398)
This commit is contained in:
parent
e98fed661f
commit
c69451fba2
99
modules/upload_validation.py
Normal file
99
modules/upload_validation.py
Normal file
@ -0,0 +1,99 @@
|
||||
"""Upload boundary validation shared by file-upload endpoints.
|
||||
|
||||
E3 hardening: keep user-facing upload routes from buffering arbitrary data and
|
||||
from accepting arbitrary MIME/types into Supabase storage.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from fastapi import HTTPException, UploadFile
|
||||
|
||||
# Conservative defaults: Classroom Copilot uploads are user documents/images.
|
||||
# Exam scan uploads already have their own 50 MB PDF-only guard in routers.exam.batches.
|
||||
MAX_UPLOAD_BYTES = int(os.getenv("CC_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024)))
|
||||
UPLOAD_CHUNK_BYTES = 1024 * 1024
|
||||
|
||||
ALLOWED_UPLOAD_MIME_TYPES = frozenset(
|
||||
mt.strip().lower()
|
||||
for mt in os.getenv(
|
||||
"CC_UPLOAD_ALLOWED_MIME_TYPES",
|
||||
",".join(
|
||||
[
|
||||
"application/pdf",
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
"image/webp",
|
||||
"image/gif",
|
||||
"text/plain",
|
||||
"text/csv",
|
||||
"text/markdown",
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.ms-powerpoint",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
]
|
||||
),
|
||||
).split(",")
|
||||
if mt.strip()
|
||||
)
|
||||
|
||||
_PDF_MIME_TYPES = {"application/pdf", "application/x-pdf"}
|
||||
|
||||
|
||||
def allowed_upload_mime_types_csv() -> str:
|
||||
"""Stable display string for evidence/errors without leaking config internals."""
|
||||
return ", ".join(sorted(ALLOWED_UPLOAD_MIME_TYPES))
|
||||
|
||||
|
||||
def _declared_mime(upload: UploadFile) -> str:
|
||||
return (upload.content_type or "application/octet-stream").split(";", 1)[0].strip().lower()
|
||||
|
||||
|
||||
def validate_upload_mime(upload: UploadFile, *, allowed_mime_types: Optional[Iterable[str]] = None) -> str:
|
||||
"""Validate client-declared upload MIME/type and return its normalised value."""
|
||||
declared = _declared_mime(upload)
|
||||
allowed = {mt.lower() for mt in (allowed_mime_types or ALLOWED_UPLOAD_MIME_TYPES)}
|
||||
if declared not in allowed:
|
||||
raise HTTPException(
|
||||
status_code=415,
|
||||
detail=(
|
||||
f"Unsupported upload type '{declared}'. Allowed MIME types: "
|
||||
f"{', '.join(sorted(allowed))}"
|
||||
),
|
||||
)
|
||||
return declared
|
||||
|
||||
|
||||
async def read_upload_bytes(
|
||||
upload: UploadFile,
|
||||
*,
|
||||
max_bytes: int = MAX_UPLOAD_BYTES,
|
||||
allowed_mime_types: Optional[Iterable[str]] = None,
|
||||
) -> tuple[bytes, str]:
|
||||
"""Validate MIME and read an UploadFile with a hard size ceiling."""
|
||||
mime_type = validate_upload_mime(upload, allowed_mime_types=allowed_mime_types)
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
while True:
|
||||
chunk = await upload.read(UPLOAD_CHUNK_BYTES)
|
||||
if not chunk:
|
||||
break
|
||||
total += len(chunk)
|
||||
if total > max_bytes:
|
||||
raise HTTPException(status_code=413, detail=f"Upload exceeds max size ({max_bytes} bytes)")
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks), mime_type
|
||||
|
||||
|
||||
async def read_pdf_upload_bytes(upload: UploadFile, *, max_bytes: int = MAX_UPLOAD_BYTES) -> bytes:
|
||||
"""Read a PDF-only upload with size and lightweight magic-header validation."""
|
||||
data, _mime_type = await read_upload_bytes(upload, max_bytes=max_bytes, allowed_mime_types=_PDF_MIME_TYPES)
|
||||
if not data:
|
||||
raise HTTPException(status_code=400, detail="Uploaded PDF is empty")
|
||||
if not data.startswith(b"%PDF-"):
|
||||
raise HTTPException(status_code=415, detail="Uploaded file is not a valid PDF")
|
||||
return data
|
||||
@ -12,6 +12,7 @@ from modules.auth.supabase_bearer import SupabaseBearer, verify_supabase_jwt_str
|
||||
from modules.logger_tool import initialise_logger
|
||||
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
||||
from modules.database.supabase.utils.storage import StorageAdmin
|
||||
from modules.upload_validation import read_upload_bytes
|
||||
from modules.document_processor import DocumentProcessor
|
||||
from modules.queue_system import (
|
||||
enqueue_tika_task, enqueue_docling_task, enqueue_split_map_task,
|
||||
@ -88,13 +89,13 @@ async def upload_file(
|
||||
# Stage DB row to get file_id
|
||||
staged_path = f"{cabinet_id}/staging/{uuid.uuid4()}"
|
||||
name = _safe_filename(path or file.filename)
|
||||
file_bytes = await file.read()
|
||||
file_bytes, mime_type = await read_upload_bytes(file)
|
||||
insert_res = client.supabase.table('files').insert({
|
||||
'cabinet_id': cabinet_id,
|
||||
'name': name,
|
||||
'path': staged_path,
|
||||
'bucket': bucket,
|
||||
'mime_type': file.content_type,
|
||||
'mime_type': mime_type,
|
||||
'uploaded_by': user_id,
|
||||
'size_bytes': len(file_bytes),
|
||||
'source': 'classroomcopilot-web'
|
||||
@ -107,7 +108,7 @@ async def upload_file(
|
||||
# Final storage path: bucket/cabinet_id/file_id/file
|
||||
final_storage_path = f"{cabinet_id}/{file_id}/{name}"
|
||||
try:
|
||||
storage.upload_file(bucket, final_storage_path, file_bytes, file.content_type or 'application/octet-stream', upsert=True)
|
||||
storage.upload_file(bucket, final_storage_path, file_bytes, mime_type, upsert=True)
|
||||
except Exception as e:
|
||||
# cleanup staged row
|
||||
client.supabase.table('files').delete().eq('id', file_id).execute()
|
||||
|
||||
@ -19,6 +19,7 @@ from fastapi.responses import JSONResponse
|
||||
from modules.auth.supabase_bearer import SupabaseBearer
|
||||
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
||||
from modules.database.supabase.utils.storage import StorageAdmin
|
||||
from modules.upload_validation import read_upload_bytes
|
||||
from modules.logger_tool import initialise_logger
|
||||
|
||||
router = APIRouter()
|
||||
@ -72,10 +73,9 @@ async def upload_file(
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID required")
|
||||
|
||||
# Read file content
|
||||
file_bytes = await file.read()
|
||||
# Validate MIME/type and read file content with a hard size limit.
|
||||
file_bytes, mime_type = await read_upload_bytes(file)
|
||||
file_size = len(file_bytes)
|
||||
mime_type = file.content_type or 'application/octet-stream'
|
||||
filename = file.filename or path
|
||||
|
||||
logger.info(f"📤 Simplified upload: {filename} ({file_size} bytes) for user {user_id}")
|
||||
|
||||
@ -28,6 +28,7 @@ from api.services.docling.regions import detect_response_regions_from_pdf
|
||||
from modules.database.services.exam_projection import project_template, project_template_safe
|
||||
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
||||
from modules.database.supabase.utils.storage import StorageAdmin
|
||||
from modules.upload_validation import read_pdf_upload_bytes
|
||||
from modules.logger_tool import initialise_logger
|
||||
from routers.exam.dependencies import ExamContext, get_exam_context, lookup_exam_code
|
||||
from routers.exam.schemas import (
|
||||
@ -164,11 +165,7 @@ async def _upload_template_source_file(
|
||||
institute_id: str,
|
||||
upload: UploadFile,
|
||||
) -> str:
|
||||
file_bytes = await upload.read()
|
||||
if not file_bytes:
|
||||
raise HTTPException(status_code=400, detail="Uploaded PDF is empty")
|
||||
if upload.content_type and upload.content_type != "application/pdf":
|
||||
raise HTTPException(status_code=400, detail="Uploaded file must be a PDF")
|
||||
file_bytes = await read_pdf_upload_bytes(upload)
|
||||
|
||||
service = SupabaseServiceRoleClient()
|
||||
storage = StorageAdmin()
|
||||
|
||||
@ -26,6 +26,7 @@ from fastapi.responses import JSONResponse
|
||||
from modules.auth.supabase_bearer import SupabaseBearer
|
||||
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
||||
from modules.database.supabase.utils.storage import StorageAdmin
|
||||
from modules.upload_validation import read_upload_bytes
|
||||
from modules.logger_tool import initialise_logger
|
||||
|
||||
router = APIRouter()
|
||||
@ -59,10 +60,9 @@ async def upload_single_file(
|
||||
if not user_id:
|
||||
raise HTTPException(status_code=401, detail="User ID required")
|
||||
|
||||
# Read file content
|
||||
file_bytes = await file.read()
|
||||
# Validate MIME/type and read file content with a hard size limit.
|
||||
file_bytes, mime_type = await read_upload_bytes(file)
|
||||
file_size = len(file_bytes)
|
||||
mime_type = file.content_type or 'application/octet-stream'
|
||||
filename = file.filename or path
|
||||
|
||||
logger.info(f"📤 Simple upload: {filename} ({file_size} bytes) for user {user_id}")
|
||||
@ -234,10 +234,9 @@ async def upload_directory(
|
||||
# Process each file
|
||||
for i, (file, relative_path) in enumerate(zip(files, relative_paths)):
|
||||
try:
|
||||
# Read file content
|
||||
file_bytes = await file.read()
|
||||
# Validate MIME/type and read file content with a hard size limit.
|
||||
file_bytes, mime_type = await read_upload_bytes(file)
|
||||
file_size = len(file_bytes)
|
||||
mime_type = file.content_type or 'application/octet-stream'
|
||||
filename = file.filename or f"file_{i}"
|
||||
|
||||
total_size += file_size
|
||||
@ -291,6 +290,8 @@ async def upload_directory(
|
||||
|
||||
logger.info(f"📄 Uploaded file {i+1}/{len(files)}: {relative_path}")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload file {relative_path}: {e}")
|
||||
# Continue with other files, don't fail entire upload
|
||||
|
||||
54
tests/test_upload_validation.py
Normal file
54
tests/test_upload_validation.py
Normal file
@ -0,0 +1,54 @@
|
||||
import asyncio
|
||||
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from modules.upload_validation import MAX_UPLOAD_BYTES, read_pdf_upload_bytes, read_upload_bytes
|
||||
|
||||
|
||||
class FakeUpload:
|
||||
def __init__(self, data: bytes, content_type: str, filename: str = "file.bin"):
|
||||
self._data = data
|
||||
self._pos = 0
|
||||
self.content_type = content_type
|
||||
self.filename = filename
|
||||
|
||||
async def read(self, size: int = -1) -> bytes:
|
||||
if self._pos >= len(self._data):
|
||||
return b""
|
||||
if size is None or size < 0:
|
||||
size = len(self._data) - self._pos
|
||||
chunk = self._data[self._pos : self._pos + size]
|
||||
self._pos += len(chunk)
|
||||
return chunk
|
||||
|
||||
|
||||
def run(coro):
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
def test_valid_pdf_upload_passes_and_returns_mime():
|
||||
data, mime = run(read_upload_bytes(FakeUpload(b"%PDF-1.7\n", "application/pdf")))
|
||||
assert data.startswith(b"%PDF-")
|
||||
assert mime == "application/pdf"
|
||||
|
||||
|
||||
def test_disallowed_mime_rejected_with_415():
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
run(read_upload_bytes(FakeUpload(b"print(1)", "application/x-python")))
|
||||
assert exc.value.status_code == 415
|
||||
assert "Unsupported upload type" in exc.value.detail
|
||||
|
||||
|
||||
def test_oversize_upload_rejected_with_413():
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
run(read_upload_bytes(FakeUpload(b"x" * (MAX_UPLOAD_BYTES + 1), "text/plain")))
|
||||
assert exc.value.status_code == 413
|
||||
assert "exceeds max size" in exc.value.detail
|
||||
|
||||
|
||||
def test_pdf_helper_rejects_spoofed_pdf_mime():
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
run(read_pdf_upload_bytes(FakeUpload(b"not a pdf", "application/pdf")))
|
||||
assert exc.value.status_code == 415
|
||||
assert "not a valid PDF" in exc.value.detail
|
||||
Loading…
x
Reference in New Issue
Block a user