from fastapi import APIRouter, UploadFile, File, HTTPException from typing import List, Optional, Dict from pathlib import Path import shutil import tempfile from pydantic import BaseModel from modules.document_processor import DocumentProcessor import os class BatchConvertRequest(BaseModel): directory: str output_dir: Optional[str] = None router = APIRouter() doc_processor = DocumentProcessor() @router.post("/convert-to-pdf") async def convert_to_pdf( files: List[UploadFile] = File(...), output_format: str = "pdf" ): """ Convert uploaded documents to PDF format """ results = [] with tempfile.TemporaryDirectory() as temp_dir: for file in files: # Save uploaded file to temp directory temp_file = Path(temp_dir) / file.filename with temp_file.open("wb") as buffer: shutil.copyfileobj(file.file, buffer) try: # Process the document pdf_content = doc_processor.convert_to_pdf(temp_file) results.append({ "filename": file.filename, "converted_content": pdf_content, "status": "success" }) except Exception as e: results.append({ "filename": file.filename, "error": str(e), "status": "error" }) return results @router.post("/batch-convert") async def batch_convert( directory: str, output_format: str = "pdf" ): """ Convert all documents in a directory to PDF format """ try: results = doc_processor.batch_convert_directory(directory) return results except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.post("/batch-convert-recursive") async def batch_convert_recursive(request_data: BatchConvertRequest): """ Convert all documents in a directory and its subdirectories to PDF """ try: directory_path = Path(request_data.directory) if not directory_path.exists(): raise HTTPException(status_code=404, detail=f"Directory not found: {request_data.directory}") output_path = None if request_data.output_dir: output_path = Path(request_data.output_dir) output_path.mkdir(parents=True, exist_ok=True) results = [] supported_extensions = doc_processor.supported_extensions.keys() # Debug: Print processing info print(f"Processing directory: {directory_path}") print(f"Output directory: {output_path}") print(f"Supported extensions: {list(supported_extensions)}") # Count files before processing all_files = [] for ext in supported_extensions: all_files.extend(list(directory_path.rglob(f"*.{ext}"))) print(f"Found {len(all_files)} files to process") # Recursively find all documents for file_path in all_files: try: print(f"Processing: {file_path}") # Convert the document pdf_content = doc_processor.convert_to_pdf(file_path) # Determine output path if output_path: # Preserve directory structure in output_dir rel_path = file_path.relative_to(directory_path) out_path = output_path / rel_path.with_suffix('.pdf') out_path.parent.mkdir(parents=True, exist_ok=True) else: out_path = file_path.with_suffix('.pdf') # Save the PDF with open(out_path, 'wb') as f: f.write(pdf_content) results.append({ "source_file": str(file_path), "output_file": str(out_path), "status": "success" }) print(f"Successfully converted: {file_path} -> {out_path}") except Exception as e: print(f"Error converting {file_path}: {str(e)}") results.append({ "source_file": str(file_path), "status": "error", "error": str(e) }) response_data = { "total_files": len(results), "successful": sum(1 for r in results if r["status"] == "success"), "failed": sum(1 for r in results if r["status"] == "error"), "results": results } print(f"Conversion complete: {response_data['successful']} successful, {response_data['failed']} failed") return response_data except Exception as e: print(f"Error in batch conversion: {str(e)}") raise HTTPException(status_code=500, detail=str(e))