139 lines
4.9 KiB
Python
139 lines
4.9 KiB
Python
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
from typing import List, Optional, Dict
|
|
from pathlib import Path
|
|
import shutil
|
|
import tempfile
|
|
from pydantic import BaseModel
|
|
from modules.document_processor import DocumentProcessor
|
|
import os
|
|
|
|
class BatchConvertRequest(BaseModel):
|
|
directory: str
|
|
output_dir: Optional[str] = None
|
|
|
|
router = APIRouter()
|
|
doc_processor = DocumentProcessor()
|
|
|
|
@router.post("/convert-to-pdf")
|
|
async def convert_to_pdf(
|
|
files: List[UploadFile] = File(...),
|
|
output_format: str = "pdf"
|
|
):
|
|
"""
|
|
Convert uploaded documents to PDF format
|
|
"""
|
|
results = []
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
for file in files:
|
|
# Save uploaded file to temp directory
|
|
temp_file = Path(temp_dir) / file.filename
|
|
with temp_file.open("wb") as buffer:
|
|
shutil.copyfileobj(file.file, buffer)
|
|
|
|
try:
|
|
# Process the document
|
|
pdf_content = doc_processor.convert_to_pdf(temp_file)
|
|
results.append({
|
|
"filename": file.filename,
|
|
"converted_content": pdf_content,
|
|
"status": "success"
|
|
})
|
|
except Exception as e:
|
|
results.append({
|
|
"filename": file.filename,
|
|
"error": str(e),
|
|
"status": "error"
|
|
})
|
|
|
|
return results
|
|
|
|
@router.post("/batch-convert")
|
|
async def batch_convert(
|
|
directory: str,
|
|
output_format: str = "pdf"
|
|
):
|
|
"""
|
|
Convert all documents in a directory to PDF format
|
|
"""
|
|
try:
|
|
results = doc_processor.batch_convert_directory(directory)
|
|
return results
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.post("/batch-convert-recursive")
|
|
async def batch_convert_recursive(request_data: BatchConvertRequest):
|
|
"""
|
|
Convert all documents in a directory and its subdirectories to PDF
|
|
"""
|
|
try:
|
|
directory_path = Path(request_data.directory)
|
|
if not directory_path.exists():
|
|
raise HTTPException(status_code=404, detail=f"Directory not found: {request_data.directory}")
|
|
|
|
output_path = None
|
|
if request_data.output_dir:
|
|
output_path = Path(request_data.output_dir)
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
results = []
|
|
supported_extensions = doc_processor.supported_extensions.keys()
|
|
|
|
# Debug: Print processing info
|
|
print(f"Processing directory: {directory_path}")
|
|
print(f"Output directory: {output_path}")
|
|
print(f"Supported extensions: {list(supported_extensions)}")
|
|
|
|
# Count files before processing
|
|
all_files = []
|
|
for ext in supported_extensions:
|
|
all_files.extend(list(directory_path.rglob(f"*.{ext}")))
|
|
print(f"Found {len(all_files)} files to process")
|
|
|
|
# Recursively find all documents
|
|
for file_path in all_files:
|
|
try:
|
|
print(f"Processing: {file_path}")
|
|
# Convert the document
|
|
pdf_content = doc_processor.convert_to_pdf(file_path)
|
|
|
|
# Determine output path
|
|
if output_path:
|
|
# Preserve directory structure in output_dir
|
|
rel_path = file_path.relative_to(directory_path)
|
|
out_path = output_path / rel_path.with_suffix('.pdf')
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
else:
|
|
out_path = file_path.with_suffix('.pdf')
|
|
|
|
# Save the PDF
|
|
with open(out_path, 'wb') as f:
|
|
f.write(pdf_content)
|
|
|
|
results.append({
|
|
"source_file": str(file_path),
|
|
"output_file": str(out_path),
|
|
"status": "success"
|
|
})
|
|
print(f"Successfully converted: {file_path} -> {out_path}")
|
|
|
|
except Exception as e:
|
|
print(f"Error converting {file_path}: {str(e)}")
|
|
results.append({
|
|
"source_file": str(file_path),
|
|
"status": "error",
|
|
"error": str(e)
|
|
})
|
|
|
|
response_data = {
|
|
"total_files": len(results),
|
|
"successful": sum(1 for r in results if r["status"] == "success"),
|
|
"failed": sum(1 for r in results if r["status"] == "error"),
|
|
"results": results
|
|
}
|
|
print(f"Conversion complete: {response_data['successful']} successful, {response_data['failed']} failed")
|
|
return response_data
|
|
|
|
except Exception as e:
|
|
print(f"Error in batch conversion: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e)) |