fix: cache Neo4j driver failure state to avoid 60s retry on every request
get_global_driver() now sets _driver_unavailable=True when the initial connection fails, so subsequent calls fail immediately instead of spending 60s retrying each time. Added reset_global_driver() to allow manual reconnection after Neo4j comes back up. Also fixes APP_BOLT_URL in .env: was bolt://bolt.classroomcopilot.ai (public IP, port not exposed), now bolt://192.168.0.209:7687 (LAN). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7ca21ef538
commit
84f7fa9de1
@ -18,7 +18,7 @@ def _retry_with_backoff(
|
|||||||
) -> any:
|
) -> any:
|
||||||
"""
|
"""
|
||||||
Helper function to retry operations with exponential backoff.
|
Helper function to retry operations with exponential backoff.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
func: Function to retry
|
func: Function to retry
|
||||||
max_attempts: Maximum number of retry attempts
|
max_attempts: Maximum number of retry attempts
|
||||||
@ -29,26 +29,26 @@ def _retry_with_backoff(
|
|||||||
attempt = 0
|
attempt = 0
|
||||||
delay = initial_delay
|
delay = initial_delay
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
while attempt < max_attempts:
|
while attempt < max_attempts:
|
||||||
try:
|
try:
|
||||||
return func()
|
return func()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
|
|
||||||
# Check if we've exceeded the maximum total wait time
|
# Check if we've exceeded the maximum total wait time
|
||||||
if elapsed_time >= max_total_wait:
|
if elapsed_time >= max_total_wait:
|
||||||
logger.error(f"Exceeded maximum total wait time of {max_total_wait} seconds")
|
logger.error(f"Exceeded maximum total wait time of {max_total_wait} seconds")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if attempt == max_attempts:
|
if attempt == max_attempts:
|
||||||
logger.error(f"Final attempt {attempt} failed: {e}")
|
logger.error(f"Final attempt {attempt} failed: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Calculate next delay with exponential backoff, but cap it
|
# Calculate next delay with exponential backoff, but cap it
|
||||||
delay = min(delay * 2, max_delay)
|
delay = min(delay * 2, max_delay)
|
||||||
|
|
||||||
# If we're in a container initialization scenario, provide more context
|
# If we're in a container initialization scenario, provide more context
|
||||||
if "Connection refused" in str(e):
|
if "Connection refused" in str(e):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@ -59,7 +59,7 @@ def _retry_with_backoff(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Attempt {attempt} failed: {e}. Retrying in {delay:.1f} seconds...")
|
logger.warning(f"Attempt {attempt} failed: {e}. Retrying in {delay:.1f} seconds...")
|
||||||
|
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
def get_driver(db_name: Optional[str] = None, url: Optional[str] = None, auth: Optional[Tuple[str, str]] = None) -> Optional[Driver]:
|
def get_driver(db_name: Optional[str] = None, url: Optional[str] = None, auth: Optional[Tuple[str, str]] = None) -> Optional[Driver]:
|
||||||
@ -71,7 +71,7 @@ def get_driver(db_name: Optional[str] = None, url: Optional[str] = None, auth: O
|
|||||||
logger.error("Neo4j credentials not found in environment")
|
logger.error("Neo4j credentials not found in environment")
|
||||||
return None
|
return None
|
||||||
auth = (username, password)
|
auth = (username, password)
|
||||||
|
|
||||||
if auth is None:
|
if auth is None:
|
||||||
logger.error("No authentication credentials provided")
|
logger.error("No authentication credentials provided")
|
||||||
return None
|
return None
|
||||||
@ -95,7 +95,7 @@ def get_driver(db_name: Optional[str] = None, url: Optional[str] = None, auth: O
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to establish Neo4j connection after all retries: {e}")
|
logger.error(f"Failed to establish Neo4j connection after all retries: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Test the connection with the specific database
|
# Test the connection with the specific database
|
||||||
if db_name and driver:
|
if db_name and driver:
|
||||||
def verify_database():
|
def verify_database():
|
||||||
@ -127,27 +127,50 @@ def close_driver(driver: Optional[Driver]) -> None:
|
|||||||
logger.info("Closing driver")
|
logger.info("Closing driver")
|
||||||
driver.close()
|
driver.close()
|
||||||
|
|
||||||
# Global driver instance
|
# Global driver instance — None means not yet initialised, _driver_unavailable=True means connection failed
|
||||||
_driver: Optional[Driver] = None
|
_driver: Optional[Driver] = None
|
||||||
|
_driver_unavailable: bool = False
|
||||||
|
|
||||||
def get_global_driver() -> Optional[Driver]:
|
def get_global_driver() -> Optional[Driver]:
|
||||||
"""Get or create the global Neo4j driver instance."""
|
"""Get or create the global Neo4j driver instance.
|
||||||
global _driver
|
|
||||||
|
Caches both success and failure so a broken Neo4j connection causes
|
||||||
|
a single 60-second retry at startup, then fast-fails on every
|
||||||
|
subsequent call instead of hanging for 60s each time.
|
||||||
|
"""
|
||||||
|
global _driver, _driver_unavailable
|
||||||
|
if _driver_unavailable:
|
||||||
|
return None
|
||||||
if _driver is None:
|
if _driver is None:
|
||||||
_driver = get_driver()
|
_driver = get_driver()
|
||||||
|
if _driver is None:
|
||||||
|
_driver_unavailable = True
|
||||||
|
logger.error("Neo4j driver unavailable — all subsequent Neo4j calls will fail fast until process restarts")
|
||||||
return _driver
|
return _driver
|
||||||
|
|
||||||
|
def reset_global_driver() -> None:
|
||||||
|
"""Reset the cached driver, forcing a reconnection attempt on the next call.
|
||||||
|
|
||||||
|
Call this if Neo4j becomes available after the process started.
|
||||||
|
"""
|
||||||
|
global _driver, _driver_unavailable
|
||||||
|
if _driver:
|
||||||
|
close_driver(_driver)
|
||||||
|
_driver = None
|
||||||
|
_driver_unavailable = False
|
||||||
|
logger.info("Global Neo4j driver reset — will reconnect on next request")
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def get_session(database: Optional[str] = None) -> Generator[Session, None, None]:
|
def get_session(database: Optional[str] = None) -> Generator[Session, None, None]:
|
||||||
"""Get a Neo4j session using the global driver."""
|
"""Get a Neo4j session using the global driver."""
|
||||||
driver = get_global_driver()
|
driver = get_global_driver()
|
||||||
if driver is None:
|
if driver is None:
|
||||||
raise Exception("Failed to get Neo4j driver")
|
raise Exception("Failed to get Neo4j driver")
|
||||||
|
|
||||||
session = None
|
session = None
|
||||||
try:
|
try:
|
||||||
session = driver.session(database=database)
|
session = driver.session(database=database)
|
||||||
yield session
|
yield session
|
||||||
finally:
|
finally:
|
||||||
if session:
|
if session:
|
||||||
session.close()
|
session.close()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user