api/start_queue_workers.py
2025-11-14 14:47:19 +00:00

139 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
Queue Workers Startup Script
This script starts document processing queue workers.
Run this alongside your main FastAPI application to process queued tasks.
Usage:
python start_queue_workers.py --workers 4 --services tika,docling
python start_queue_workers.py --help
"""
import argparse
import signal
import sys
import time
import os
import redis
from typing import List
# Add the current directory to Python path so we can import modules
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from modules.task_processors import get_processor
from modules.queue_system import ServiceType
from modules.logger_tool import initialise_logger
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL", "INFO"), os.getenv("LOG_PATH", "./data/logs/"), 'default', True)
def parse_services(services_str: str) -> List[ServiceType]:
"""Parse comma-separated service names into ServiceType enums."""
if not services_str:
return list(ServiceType)
services = []
for service_name in services_str.split(','):
service_name = service_name.strip().lower()
try:
services.append(ServiceType(service_name))
except ValueError:
logger.error(f"Invalid service name: {service_name}")
logger.error(f"Valid services: {[s.value for s in ServiceType]}")
sys.exit(1)
return services
def main():
parser = argparse.ArgumentParser(description='Start document processing queue workers')
parser.add_argument(
'--workers', '-w',
type=int,
default=2,
help='Number of worker threads to start (default: 2)'
)
parser.add_argument(
'--services', '-s',
type=str,
default='',
help='Comma-separated list of services to handle (default: all). Options: tika,docling,llm,split_map'
)
# No longer accept REDIS_URL; rely on REDIS_HOST/REDIS_PORT envs
parser.add_argument(
'--check-interval',
type=int,
default=30,
help='Health check interval in seconds (default: 30)'
)
args = parser.parse_args()
# Parse services
services = parse_services(args.services)
logger.info(f"Starting {args.workers} queue workers")
logger.info(f"Services: {[s.value for s in services]}")
logger.info(f"Redis: {os.getenv('REDIS_HOST', '127.0.0.1')}:{os.getenv('REDIS_PORT', '6379')}")
# Get processor instance
processor = get_processor()
# Processor reads REDIS_HOST/REDIS_PORT in its constructor
# Start workers
worker_ids = []
for i in range(args.workers):
worker_id = processor.start_worker(
worker_id=f"cli-worker-{i+1}",
services=services
)
worker_ids.append(worker_id)
logger.info(f"Started workers: {worker_ids}")
# Set up signal handlers for graceful shutdown
def signal_handler(signum, frame):
logger.info(f"Received signal {signum}, shutting down workers...")
processor.shutdown(timeout=30)
logger.info("Workers shut down. Exiting.")
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
# Main loop - monitor workers and queue health
try:
while True:
time.sleep(args.check_interval)
# Check queue stats
try:
stats = processor.get_queue_stats()
total_queued = sum(stats['queues'].values())
total_processing = stats['total_processing']
dead_letter = stats['dead_letter_count']
logger.info(f"Queue status - Queued: {total_queued}, Processing: {total_processing}, Dead: {dead_letter}")
# Log service-specific stats
for service in ServiceType:
processing_count = stats['processing'].get(service.value, 0)
limit = processor.service_limits[service]
if processing_count > 0:
logger.debug(f"{service.value}: {processing_count}/{limit} processing")
# Warn about dead letters
if dead_letter > 10:
logger.warning(f"High number of dead letter tasks: {dead_letter}")
except Exception as e:
logger.error(f"Error checking queue stats: {e}")
except KeyboardInterrupt:
logger.info("Keyboard interrupt received, shutting down...")
processor.shutdown(timeout=30)
logger.info("Workers shut down. Exiting.")
if __name__ == "__main__":
main()