46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
from typing import Dict
|
|
from pathlib import Path
|
|
import shutil
|
|
import tempfile
|
|
from modules.pdf_utils import PDFUtils
|
|
|
|
router = APIRouter()
|
|
|
|
@router.post("/extract-text")
|
|
async def extract_text(
|
|
pdf_file: UploadFile = File(...)
|
|
):
|
|
"""
|
|
Extract text content from a PDF file
|
|
"""
|
|
try:
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
temp_file = Path(temp_dir) / pdf_file.filename
|
|
with temp_file.open("wb") as buffer:
|
|
shutil.copyfileobj(pdf_file.file, buffer)
|
|
|
|
text = PDFUtils.extract_text_from_pdf(temp_file)
|
|
return {"text": text}
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.post("/metadata")
|
|
async def get_metadata(
|
|
pdf_file: UploadFile = File(...)
|
|
):
|
|
"""
|
|
Get metadata from a PDF file
|
|
"""
|
|
try:
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
temp_file = Path(temp_dir) / pdf_file.filename
|
|
with temp_file.open("wb") as buffer:
|
|
shutil.copyfileobj(pdf_file.file, buffer)
|
|
|
|
metadata = PDFUtils.get_pdf_metadata(temp_file)
|
|
return metadata
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e)) |