api/routers/dev/pdf_utils.py
2025-07-11 13:52:19 +00:00

46 lines
1.3 KiB
Python

from fastapi import APIRouter, UploadFile, File, HTTPException
from typing import Dict
from pathlib import Path
import shutil
import tempfile
from modules.pdf_utils import PDFUtils
router = APIRouter()
@router.post("/extract-text")
async def extract_text(
pdf_file: UploadFile = File(...)
):
"""
Extract text content from a PDF file
"""
try:
with tempfile.TemporaryDirectory() as temp_dir:
temp_file = Path(temp_dir) / pdf_file.filename
with temp_file.open("wb") as buffer:
shutil.copyfileobj(pdf_file.file, buffer)
text = PDFUtils.extract_text_from_pdf(temp_file)
return {"text": text}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/metadata")
async def get_metadata(
pdf_file: UploadFile = File(...)
):
"""
Get metadata from a PDF file
"""
try:
with tempfile.TemporaryDirectory() as temp_dir:
temp_file = Path(temp_dir) / pdf_file.filename
with temp_file.open("wb") as buffer:
shutil.copyfileobj(pdf_file.file, buffer)
metadata = PDFUtils.get_pdf_metadata(temp_file)
return metadata
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))