#!/usr/bin/env python3 """ Queue Workers Startup Script This script starts document processing queue workers. Run this alongside your main FastAPI application to process queued tasks. Usage: python start_queue_workers.py --workers 4 --services tika,docling python start_queue_workers.py --help """ import argparse import signal import sys import time import os import redis from typing import List # Add the current directory to Python path so we can import modules sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from modules.task_processors import get_processor from modules.queue_system import ServiceType from modules.logger_tool import initialise_logger logger = initialise_logger(__name__, os.getenv("LOG_LEVEL", "INFO"), os.getenv("LOG_PATH", "./data/logs/"), 'default', True) def parse_services(services_str: str) -> List[ServiceType]: """Parse comma-separated service names into ServiceType enums.""" if not services_str: return list(ServiceType) services = [] for service_name in services_str.split(','): service_name = service_name.strip().lower() try: services.append(ServiceType(service_name)) except ValueError: logger.error(f"Invalid service name: {service_name}") logger.error(f"Valid services: {[s.value for s in ServiceType]}") sys.exit(1) return services def main(): parser = argparse.ArgumentParser(description='Start document processing queue workers') parser.add_argument( '--workers', '-w', type=int, default=2, help='Number of worker threads to start (default: 2)' ) parser.add_argument( '--services', '-s', type=str, default='', help='Comma-separated list of services to handle (default: all). Options: tika,docling,llm,split_map' ) # No longer accept REDIS_URL; rely on REDIS_HOST/REDIS_PORT envs parser.add_argument( '--check-interval', type=int, default=30, help='Health check interval in seconds (default: 30)' ) args = parser.parse_args() # Parse services services = parse_services(args.services) logger.info(f"Starting {args.workers} queue workers") logger.info(f"Services: {[s.value for s in services]}") logger.info(f"Redis: {os.getenv('REDIS_HOST', '127.0.0.1')}:{os.getenv('REDIS_PORT', '6379')}") # Get processor instance processor = get_processor() # Processor reads REDIS_HOST/REDIS_PORT in its constructor # Start workers worker_ids = [] for i in range(args.workers): worker_id = processor.start_worker( worker_id=f"cli-worker-{i+1}", services=services ) worker_ids.append(worker_id) logger.info(f"Started workers: {worker_ids}") # Set up signal handlers for graceful shutdown def signal_handler(signum, frame): logger.info(f"Received signal {signum}, shutting down workers...") processor.shutdown(timeout=30) logger.info("Workers shut down. Exiting.") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) # Main loop - monitor workers and queue health try: while True: time.sleep(args.check_interval) # Check queue stats try: stats = processor.get_queue_stats() total_queued = sum(stats['queues'].values()) total_processing = stats['total_processing'] dead_letter = stats['dead_letter_count'] logger.info(f"Queue status - Queued: {total_queued}, Processing: {total_processing}, Dead: {dead_letter}") # Log service-specific stats for service in ServiceType: processing_count = stats['processing'].get(service.value, 0) limit = processor.service_limits[service] if processing_count > 0: logger.debug(f"{service.value}: {processing_count}/{limit} processing") # Warn about dead letters if dead_letter > 10: logger.warning(f"High number of dead letter tasks: {dead_letter}") except Exception as e: logger.error(f"Error checking queue stats: {e}") except KeyboardInterrupt: logger.info("Keyboard interrupt received, shutting down...") processor.shutdown(timeout=30) logger.info("Workers shut down. Exiting.") if __name__ == "__main__": main()