Compare commits

...

3 Commits

Author SHA1 Message Date
JamesFlare1212
f21a400c82 fix(auth): prevent cookie loss during remote server timeout storms
Server timeouts caused orphaned fetchActivityData calls to fire clearCookieCache()
asynchronously, destroying cookies for all concurrent callers. Three fixes:

1. Replace Promise.race timeout with AbortController to properly cancel
   orphaned fetches and prevent delayed clearCookieCache() calls
2. Add cookie backup/restore — backupCookies() before clearCookieCache(),
   restoreCookieBackup() if re-login fails, so cookies are never lost
3. Add 15s auth failure throttle to block thundering herd re-logins when
   server slowdowns generate many 500 errors simultaneously
2026-04-23 03:06:15 -04:00
JamesFlare1212
73e953f579 fix(auth): distinguish 500 (cookie expired) from other 5xx (real outage)
KEY INSIGHT:
- 500 = cookie expiration (early signal, re-login immediately)
- 502/503/504 = real server outage (bad gateway, service unavailable, gateway timeout)

BEHAVIOR:
- On 500: throw AuthenticationError → immediate re-login
- On 502/503/504: preserve cache, don't re-login (server is down)
- On 401/403: throw AuthenticationError → re-login

This prevents unnecessary re-login attempts during actual server outages
while still handling cookie expiration immediately.
2026-04-11 11:43:35 -04:00
JamesFlare1212
71116f9f6e fix(auth): treat 5xx as cookie expiration and re-login immediately
KEY DISCOVERY: 5xx errors are early signs of cookie expiration.
The backend returns 500 when cookie is expired but session not yet invalidated.
It takes several hours before it returns 401/403.

CHANGES:
1. On 5xx: throw AuthenticationError to trigger immediate re-login
2. Removed cookie validation logic (no longer needed)
3. Cache still preserved during re-login process
4. Re-login happens within same request, not on next request

This fixes the issue where expired cookies would cause 5xx errors
for hours before any re-login attempt was made.
2026-04-11 10:55:31 -04:00
3 changed files with 144 additions and 110 deletions

View File

@@ -1,12 +1,15 @@
// engage-api/get-activity.ts // engage-api/get-activity.ts
import axios from 'axios'; import axios, { type AxiosRequestConfig } from 'axios';
import { logger } from '../utils/logger'; import { logger } from '../utils/logger';
import { import {
ensureSingleLogin, ensureSingleLogin,
loadCachedCookies,
saveCookiesToCache, saveCookiesToCache,
clearCookieCache, clearCookieCache,
getCachedCookieString getCachedCookieString,
backupCookies,
restoreCookieBackup,
tryAcquireAuthLock,
releaseAuthCooldown
} from '../services/playwright-auth'; } from '../services/playwright-auth';
// Define interfaces for our data structures // Define interfaces for our data structures
@@ -51,7 +54,8 @@ async function getActivityDetailsRaw(
activityId: string, activityId: string,
cookies: string, cookies: string,
maxRetries: number = 3, maxRetries: number = 3,
timeoutMilliseconds: number = 10000 timeoutMilliseconds: number = 10000,
signal?: AbortSignal
): Promise<string | null> { ): Promise<string | null> {
const url = 'https://engage.nkcswx.cn/Services/ActivitiesService.asmx/GetActivityDetails'; const url = 'https://engage.nkcswx.cn/Services/ActivitiesService.asmx/GetActivityDetails';
const headers = { const headers = {
@@ -65,13 +69,17 @@ async function getActivityDetailsRaw(
}; };
for (let attempt = 0; attempt < maxRetries; attempt++) { for (let attempt = 0; attempt < maxRetries; attempt++) {
if (signal?.aborted) {
logger.debug(`Activity ${activityId} aborted before attempt ${attempt + 1}`);
return null;
}
try { try {
logger.debug(`Attempt ${attempt + 1}/${maxRetries} for activity ${activityId} - Sending POST request to ${url}`); logger.debug(`Attempt ${attempt + 1}/${maxRetries} for activity ${activityId} - Sending POST request to ${url}`);
const response = await axios.post(url, payload, { const response = await axios.post(url, payload, {
headers, headers,
timeout: timeoutMilliseconds, timeout: timeoutMilliseconds,
responseType: 'text', responseType: 'text',
// Add additional timeout safety signal,
maxRedirects: 5 maxRedirects: 5
}); });
@@ -79,14 +87,19 @@ async function getActivityDetailsRaw(
if (response.status !== 200) { if (response.status !== 200) {
logger.error(`Non-200 status ${response.status} for activity ${activityId}. NOT updating cache to preserve local data.`); logger.error(`Non-200 status ${response.status} for activity ${activityId}. NOT updating cache to preserve local data.`);
// On 5xx errors, set flag to validate cookie on next request // IMPORTANT: Only 500 is cookie expiration. Other 5xx (502/503/504) are real server outages.
// Backend may be in degraded state and invalidated sessions // The backend returns 500 when cookie is expired but session not yet invalidated.
if (response.status >= 500 && response.status < 600) { // It takes several hours before it returns 401/403.
logger.warn(`Server error ${response.status} - will validate cookie on next request.`); // 502/503/504 are real server errors (bad gateway, service unavailable, gateway timeout)
shouldValidateCookieOnNextRequest = true; if (response.status === 500) {
logger.warn(`Server error 500 - this is cookie expiration. Throwing AuthenticationError to trigger immediate re-login.`);
throw new AuthenticationError(`Received 500 for activity ${activityId} - expired cookie`, 500);
} else if (response.status >= 500 && response.status < 600) {
// Real server outage (502/503/504), preserve cache and don't re-login
logger.error(`Real server outage ${response.status} - preserving local cache, not re-login.`);
} }
// Return null immediately on non-200, don't retry // Return null immediately on non-200 errors
return null; return null;
} }
@@ -115,12 +128,15 @@ async function getActivityDetailsRaw(
if (error.response) { if (error.response) {
logger.error(`Status: ${error.response.status}, Data (getActivityDetailsRaw): ${ String(error.response.data).slice(0,100)}...`); logger.error(`Status: ${error.response.status}, Data (getActivityDetailsRaw): ${ String(error.response.data).slice(0,100)}...`);
// CRITICAL: 5xx errors should NOT update cache, return null immediately // IMPORTANT: Only 500 is cookie expiration. Other 5xx (502/503/504) are real server outages.
if (error.response.status >= 500 && error.response.status < 600) { // The backend returns 500 when cookie is expired but session not yet invalidated.
logger.error(`Server error ${error.response.status} - preserving local cache, not updating.`); // 502/503/504 are real server errors (bad gateway, service unavailable, gateway timeout)
// Set flag to validate cookie on next request if (error.response.status === 500) {
shouldValidateCookieOnNextRequest = true; logger.warn(`Server error 500 - this is cookie expiration. Throwing AuthenticationError to trigger immediate re-login.`);
return null; throw new AuthenticationError(`Received 500 for activity ${activityId} - expired cookie`, 500);
} else if (error.response.status >= 500 && error.response.status < 600) {
// Real server outage (502/503/504), preserve cache and don't re-login
logger.error(`Real server outage ${error.response.status} - preserving local cache, not re-login.`);
} }
} }
if (attempt === maxRetries - 1) { if (attempt === maxRetries - 1) {
@@ -134,9 +150,6 @@ async function getActivityDetailsRaw(
return null; return null;
} }
// Flag to track if we need to validate cookies after server errors
let shouldValidateCookieOnNextRequest = false;
/** /**
* Main exported function. Handles cookie caching, validation, re-authentication, and fetches activity details. * Main exported function. Handles cookie caching, validation, re-authentication, and fetches activity details.
* @param activityId - The ID of the activity to fetch. * @param activityId - The ID of the activity to fetch.
@@ -150,6 +163,7 @@ export async function fetchActivityData(
userName: string, userName: string,
userPwd: string, userPwd: string,
forceLogin: boolean = false, forceLogin: boolean = false,
signal?: AbortSignal
): Promise<any | null> { ): Promise<any | null> {
let currentCookie = forceLogin ? null : await getCachedCookieString(); let currentCookie = forceLogin ? null : await getCachedCookieString();
@@ -159,17 +173,10 @@ export async function fetchActivityData(
currentCookie = null; currentCookie = null;
} }
// Optimization: Skip pre-validation, directly request data
// Only validate/re-login when we get 4xx error OR after 5xx (backend may be in degraded state)
if (!currentCookie) { if (!currentCookie) {
logger.info('No cached cookie found. Attempting login...'); logger.info('No cached cookie found. Attempting login...');
try { try {
currentCookie = await getCompleteCookies(userName, userPwd); currentCookie = await getCompleteCookies(userName, userPwd);
const cookies = await loadCachedCookies();
if (cookies) {
await saveCookiesToCache(cookies);
}
} catch (loginError) { } catch (loginError) {
logger.error(`Login process failed: ${(loginError as Error).message}`); logger.error(`Login process failed: ${(loginError as Error).message}`);
return null; return null;
@@ -181,101 +188,42 @@ export async function fetchActivityData(
return null; return null;
} }
// Validate cookie after previous 5xx error (backend may have invalidated sessions)
if (shouldValidateCookieOnNextRequest) {
logger.info('Validating cookie after previous server error...');
shouldValidateCookieOnNextRequest = false;
// Simple validation: try to fetch a known activity (ID 3350)
const testResponse = await getActivityDetailsRaw('3350', currentCookie, 1, 5000);
if (!testResponse) {
// Check if this is still a server error (5xx) - if so, it's an outage, don't re-login
// Just preserve existing cookie and return null
logger.warn('Cookie validation returned null. Checking if server is still down...');
// Try one more time with explicit status check
try {
const url = 'https://engage.nkcswx.cn/Services/ActivitiesService.asmx/GetActivityDetails';
const headers = {
'Content-Type': 'application/json; charset=UTF-8',
'Cookie': currentCookie,
'User-Agent': 'Mozilla/5.0 (Bun DSAS-CCA Cookie Validator)',
'X-Requested-With': 'XMLHttpRequest'
};
const payload = { "activityID": "3350" };
const validationResponse = await axios.post(url, payload, {
headers,
timeout: 5000,
responseType: 'text'
});
if (validationResponse.status >= 500 && validationResponse.status < 600) {
// Server still returning 5xx - it's an outage, preserve cookie and don't re-login
logger.warn('Server still returning 5xx during validation - treating as server outage, preserving cookie.');
return null;
} else if (validationResponse.status === 401 || validationResponse.status === 403) {
// Cookie is invalid, re-login
logger.warn('Cookie validation failed with 4xx. Re-login required.');
await clearCookieCache();
try {
currentCookie = await getCompleteCookies(userName, userPwd);
const cookies = await loadCachedCookies();
if (cookies) {
await saveCookiesToCache(cookies);
}
logger.info('Re-login completed due to cookie validation failure.');
} catch (loginError) {
logger.error(`Re-login failed: ${(loginError as Error).message}`);
return null;
}
} else {
// Some other error, preserve cookie
logger.warn('Cookie validation failed with unexpected status. Preserving existing cookie.');
return null;
}
} catch (validationError: any) {
// Network error or timeout during validation - treat as server issue, preserve cookie
if (validationError.response && validationError.response.status >= 500) {
logger.warn('Server error during cookie validation - treating as outage, preserving cookie.');
} else {
logger.warn(`Network error during cookie validation: ${validationError.message}. Preserving existing cookie.`);
}
return null;
}
} else {
logger.info('Cookie validation successful after server error.');
}
}
logger.debug('Using cached cookie for API request.'); logger.debug('Using cached cookie for API request.');
try { try {
logger.debug(`Calling getActivityDetailsRaw for activity ${activityId}...`); logger.debug(`Calling getActivityDetailsRaw for activity ${activityId}...`);
const rawActivityDetailsString = await getActivityDetailsRaw(activityId, currentCookie); const rawActivityDetailsString = await getActivityDetailsRaw(activityId, currentCookie, 3, 10000, signal);
logger.debug(`getActivityDetailsRaw returned for activity ${activityId}`); logger.debug(`getActivityDetailsRaw returned for activity ${activityId}`);
if (rawActivityDetailsString) { if (rawActivityDetailsString) {
const parsedOuter = JSON.parse(rawActivityDetailsString); const parsedOuter = JSON.parse(rawActivityDetailsString);
return JSON.parse(parsedOuter.d); return JSON.parse(parsedOuter.d);
} }
// Check if this was a 5xx error and set flag for cookie validation
logger.warn(`No data returned from getActivityDetailsRaw for activity ${activityId}, but no authentication error was thrown.`); logger.warn(`No data returned from getActivityDetailsRaw for activity ${activityId}, but no authentication error was thrown.`);
return null; return null;
} catch (error) { } catch (error) {
if (signal?.aborted) {
logger.debug(`Activity ${activityId} fetch aborted.`);
return null;
}
if (error instanceof AuthenticationError) { if (error instanceof AuthenticationError) {
// Cookie returned 4xx, now validate and re-login // Throttle: prevent thundering herd from multiple 500 errors
logger.warn(`API returned 4xx error (Status: ${error.status}). Cookie may be invalid. Attempting re-login and retry.`); if (!tryAcquireAuthLock()) {
logger.info(`Auth throttled for activity ${activityId}. Reusing current cookies — likely still valid.`);
return null;
}
// Backup cookies before clearing so we can restore on re-login failure
backupCookies();
await clearCookieCache(); await clearCookieCache();
try { try {
logger.info('Attempting re-login due to authentication failure...'); logger.info('Attempting re-login due to authentication failure...');
currentCookie = await getCompleteCookies(userName, userPwd); currentCookie = await getCompleteCookies(userName, userPwd);
releaseAuthCooldown();
const cookies = await loadCachedCookies();
if (cookies) {
await saveCookiesToCache(cookies);
}
logger.info('Re-login successful. Retrying request for activity details...'); logger.info('Re-login successful. Retrying request for activity details...');
const rawActivityDetailsStringRetry = await getActivityDetailsRaw(activityId, currentCookie); const rawActivityDetailsStringRetry = await getActivityDetailsRaw(activityId, currentCookie, 1, 10000, signal);
if (rawActivityDetailsStringRetry) { if (rawActivityDetailsStringRetry) {
const parsedOuterRetry = JSON.parse(rawActivityDetailsStringRetry); const parsedOuterRetry = JSON.parse(rawActivityDetailsStringRetry);
return JSON.parse(parsedOuterRetry.d); return JSON.parse(parsedOuterRetry.d);
@@ -283,7 +231,9 @@ export async function fetchActivityData(
logger.warn(`Still no details for activity ${activityId} after re-login and retry.`); logger.warn(`Still no details for activity ${activityId} after re-login and retry.`);
return null; return null;
} catch (retryLoginOrFetchError) { } catch (retryLoginOrFetchError) {
logger.error(`Error during re-login or retry fetch for activity ${activityId}: ${(retryLoginOrFetchError as Error).message}`); logger.error(`Re-login or retry failed for activity ${activityId}: ${(retryLoginOrFetchError as Error).message}`);
// Restore old cookies instead of leaving cache empty
await restoreCookieBackup();
return null; return null;
} }
} else { } else {

View File

@@ -52,14 +52,33 @@ async function processAndCacheActivity(activityId: string, forceUpdate: boolean
throw new Error('API username or password not configured'); throw new Error('API username or password not configured');
} }
// Add timeout protection for the entire fetch operation // Add timeout protection via AbortController - properly cancels orphaned fetches
logger.debug(`Fetching activity data for ID: ${activityId}`); logger.debug(`Fetching activity data for ID: ${activityId}`);
const activityJson = await Promise.race([ const controller = new AbortController();
fetchActivityData(activityId, USERNAME, PASSWORD, false), const timeoutId = setTimeout(
new Promise((_, reject) => () => controller.abort(),
setTimeout(() => reject(new Error(`Timeout fetching activity ${activityId} after ${CRAWLER_REQUEST_TIMEOUT_MS}ms`)), CRAWLER_REQUEST_TIMEOUT_MS + 5000) CRAWLER_REQUEST_TIMEOUT_MS + 5000
) );
]);
let activityJson: any = null;
try {
activityJson = await fetchActivityData(
activityId,
USERNAME,
PASSWORD,
false,
controller.signal
);
} finally {
clearTimeout(timeoutId);
}
if (controller.signal.aborted) {
logger.warn(`Request for activity ${activityId} timed out after ${CRAWLER_REQUEST_TIMEOUT_MS + 5000}ms. Cancelling orphaned fetch.`);
// Preserve existing cache on timeout
const existingData = await getActivityData(activityId);
return existingData || { lastCheck: new Date().toISOString(), error: `Timeout after ${CRAWLER_REQUEST_TIMEOUT_MS + 5000}ms` };
}
let structuredActivity: ActivityData; let structuredActivity: ActivityData;
if (!activityJson) { if (!activityJson) {

View File

@@ -11,6 +11,39 @@ let _inMemoryCookies: Cookie[] | null = null;
// Login lock to prevent concurrent login attempts // Login lock to prevent concurrent login attempts
let _loginLock: Promise<Cookie[]> | null = null; let _loginLock: Promise<Cookie[]> | null = null;
// Cookie backup: preserved before clearCookieCache, restored on re-login failure
let _cookieBackup: Cookie[] | null = null;
// Auth failure throttle: debounce consecutive re-login triggers from 500 errors
// Prevents thundering herd when server is slow and returns many 500s
let _authFailureCooldownUntil = 0;
const AUTH_FAILURE_COOLDOWN_MS = 15000; // 15s cooldown between re-login cycles
/**
* Put all callers to wait during auth cooldown window.
* Returns true if auth is allowed (outside cooldown), false if throttled.
*/
export function tryAcquireAuthLock(): boolean {
const now = Date.now();
if (now < _authFailureCooldownUntil) {
const remaining = _authFailureCooldownUntil - now;
logger.warn(
`Re-login throttled: ${Math.round(remaining / 1000)}s cooldown remaining. ` +
`Existing cookies are likely still valid — server 500 is a temporary slowdown.`
);
return false;
}
return true;
}
/**
* Called after a successful re-login to release the cooldown.
*/
export function releaseAuthCooldown(): void {
_authFailureCooldownUntil = Date.now() + AUTH_FAILURE_COOLDOWN_MS;
logger.info(`Auth cooldown set: ${AUTH_FAILURE_COOLDOWN_MS}ms to prevent thundering herd re-logins.`);
}
/** /**
* Ensure only one login process runs at a time * Ensure only one login process runs at a time
*/ */
@@ -178,8 +211,40 @@ export async function saveCookiesToCache(cookies: Cookie[]): Promise<void> {
} }
} }
/**
* Backup current cookies before clearing. Restored if re-login fails.
*/
export function backupCookies(): Cookie[] | null {
if (_inMemoryCookies) {
_cookieBackup = [..._inMemoryCookies];
logger.info('Cookies backed up before clear.');
}
return _cookieBackup;
}
/**
* Restore cookies from backup after failed re-login.
*/
export async function restoreCookieBackup(): Promise<boolean> {
if (_cookieBackup) {
_inMemoryCookies = _cookieBackup;
try {
await fs.promises.writeFile(COOKIE_FILE_PATH, JSON.stringify(_cookieBackup, null, 2), 'utf-8');
logger.info('Cookies restored from backup successfully.');
_cookieBackup = null;
return true;
} catch (error: any) {
logger.error('Failed to restore cookies from backup:', error.message);
return false;
}
}
logger.warn('No cookie backup available for restore.');
return false;
}
/** /**
* Clear cookie cache * Clear cookie cache
* Prefer backupAndClearCookieCache() instead to preserve old cookies.
*/ */
export async function clearCookieCache(): Promise<void> { export async function clearCookieCache(): Promise<void> {
_inMemoryCookies = null; _inMemoryCookies = null;