139 lines
4.6 KiB
Python
139 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Queue Workers Startup Script
|
|
|
|
This script starts document processing queue workers.
|
|
Run this alongside your main FastAPI application to process queued tasks.
|
|
|
|
Usage:
|
|
python start_queue_workers.py --workers 4 --services tika,docling
|
|
python start_queue_workers.py --help
|
|
"""
|
|
|
|
import argparse
|
|
import signal
|
|
import sys
|
|
import time
|
|
import os
|
|
import redis
|
|
from typing import List
|
|
|
|
# Add the current directory to Python path so we can import modules
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
from modules.task_processors import get_processor
|
|
from modules.queue_system import ServiceType
|
|
from modules.logger_tool import initialise_logger
|
|
|
|
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL", "INFO"), os.getenv("LOG_PATH", "./data/logs/"), 'default', True)
|
|
|
|
def parse_services(services_str: str) -> List[ServiceType]:
|
|
"""Parse comma-separated service names into ServiceType enums."""
|
|
if not services_str:
|
|
return list(ServiceType)
|
|
|
|
services = []
|
|
for service_name in services_str.split(','):
|
|
service_name = service_name.strip().lower()
|
|
try:
|
|
services.append(ServiceType(service_name))
|
|
except ValueError:
|
|
logger.error(f"Invalid service name: {service_name}")
|
|
logger.error(f"Valid services: {[s.value for s in ServiceType]}")
|
|
sys.exit(1)
|
|
|
|
return services
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Start document processing queue workers')
|
|
parser.add_argument(
|
|
'--workers', '-w',
|
|
type=int,
|
|
default=2,
|
|
help='Number of worker threads to start (default: 2)'
|
|
)
|
|
parser.add_argument(
|
|
'--services', '-s',
|
|
type=str,
|
|
default='',
|
|
help='Comma-separated list of services to handle (default: all). Options: tika,docling,llm,split_map'
|
|
)
|
|
# No longer accept REDIS_URL; rely on REDIS_HOST/REDIS_PORT envs
|
|
parser.add_argument(
|
|
'--check-interval',
|
|
type=int,
|
|
default=30,
|
|
help='Health check interval in seconds (default: 30)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Parse services
|
|
services = parse_services(args.services)
|
|
|
|
logger.info(f"Starting {args.workers} queue workers")
|
|
logger.info(f"Services: {[s.value for s in services]}")
|
|
logger.info(f"Redis: {os.getenv('REDIS_HOST', '127.0.0.1')}:{os.getenv('REDIS_PORT', '6379')}")
|
|
|
|
# Get processor instance
|
|
processor = get_processor()
|
|
|
|
# Processor reads REDIS_HOST/REDIS_PORT in its constructor
|
|
|
|
# Start workers
|
|
worker_ids = []
|
|
for i in range(args.workers):
|
|
worker_id = processor.start_worker(
|
|
worker_id=f"cli-worker-{i+1}",
|
|
services=services
|
|
)
|
|
worker_ids.append(worker_id)
|
|
|
|
logger.info(f"Started workers: {worker_ids}")
|
|
|
|
# Set up signal handlers for graceful shutdown
|
|
def signal_handler(signum, frame):
|
|
logger.info(f"Received signal {signum}, shutting down workers...")
|
|
processor.shutdown(timeout=30)
|
|
logger.info("Workers shut down. Exiting.")
|
|
sys.exit(0)
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
# Main loop - monitor workers and queue health
|
|
try:
|
|
while True:
|
|
time.sleep(args.check_interval)
|
|
|
|
# Check queue stats
|
|
try:
|
|
stats = processor.get_queue_stats()
|
|
total_queued = sum(stats['queues'].values())
|
|
total_processing = stats['total_processing']
|
|
dead_letter = stats['dead_letter_count']
|
|
|
|
logger.info(f"Queue status - Queued: {total_queued}, Processing: {total_processing}, Dead: {dead_letter}")
|
|
|
|
# Log service-specific stats
|
|
for service in ServiceType:
|
|
processing_count = stats['processing'].get(service.value, 0)
|
|
limit = processor.service_limits[service]
|
|
if processing_count > 0:
|
|
logger.debug(f"{service.value}: {processing_count}/{limit} processing")
|
|
|
|
# Warn about dead letters
|
|
if dead_letter > 10:
|
|
logger.warning(f"High number of dead letter tasks: {dead_letter}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error checking queue stats: {e}")
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Keyboard interrupt received, shutting down...")
|
|
processor.shutdown(timeout=30)
|
|
logger.info("Workers shut down. Exiting.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|