Compare commits

..

5 Commits

Author SHA1 Message Date
JamesFlare1212
f21a400c82 fix(auth): prevent cookie loss during remote server timeout storms
Server timeouts caused orphaned fetchActivityData calls to fire clearCookieCache()
asynchronously, destroying cookies for all concurrent callers. Three fixes:

1. Replace Promise.race timeout with AbortController to properly cancel
   orphaned fetches and prevent delayed clearCookieCache() calls
2. Add cookie backup/restore — backupCookies() before clearCookieCache(),
   restoreCookieBackup() if re-login fails, so cookies are never lost
3. Add 15s auth failure throttle to block thundering herd re-logins when
   server slowdowns generate many 500 errors simultaneously
2026-04-23 03:06:15 -04:00
JamesFlare1212
73e953f579 fix(auth): distinguish 500 (cookie expired) from other 5xx (real outage)
KEY INSIGHT:
- 500 = cookie expiration (early signal, re-login immediately)
- 502/503/504 = real server outage (bad gateway, service unavailable, gateway timeout)

BEHAVIOR:
- On 500: throw AuthenticationError → immediate re-login
- On 502/503/504: preserve cache, don't re-login (server is down)
- On 401/403: throw AuthenticationError → re-login

This prevents unnecessary re-login attempts during actual server outages
while still handling cookie expiration immediately.
2026-04-11 11:43:35 -04:00
JamesFlare1212
71116f9f6e fix(auth): treat 5xx as cookie expiration and re-login immediately
KEY DISCOVERY: 5xx errors are early signs of cookie expiration.
The backend returns 500 when cookie is expired but session not yet invalidated.
It takes several hours before it returns 401/403.

CHANGES:
1. On 5xx: throw AuthenticationError to trigger immediate re-login
2. Removed cookie validation logic (no longer needed)
3. Cache still preserved during re-login process
4. Re-login happens within same request, not on next request

This fixes the issue where expired cookies would cause 5xx errors
for hours before any re-login attempt was made.
2026-04-11 10:55:31 -04:00
JamesFlare1212
13eccdd3cc fix(validation): use activity 3350 and detect server outage
Cookie validation improvements:
1. Validate with activity ID 3350 (more reliable test endpoint)
2. Distinguish 5xx (outage) from 4xx (invalid cookie) during validation
3. On 5xx during validation: preserve cookie, don't re-login (server outage)
4. On 401/403 during validation: clear cookie and re-login
5. On network error during validation: preserve cookie (treat as server issue)

This prevents unnecessary re-logins during server outages.
2026-04-10 23:41:51 -04:00
JamesFlare1212
c447dc51ee fix(cache): prevent data loss on 5xx and validate cookies
Critical fixes:
1. getActivityDetailsRaw never throws on 5xx - returns null immediately
2. Cache-manager preserves existing data when fetch returns null
3. After 5xx error, validate cookie on next request (backend may invalidate sessions)
4. Cookie validation: fetch activity ID 1 to test, re-login if fails

This prevents local cache corruption during server outages.
2026-04-10 23:41:18 -04:00
5 changed files with 559 additions and 98 deletions

View File

@@ -1,12 +1,15 @@
// engage-api/get-activity.ts
import axios from 'axios';
import axios, { type AxiosRequestConfig } from 'axios';
import { logger } from '../utils/logger';
import {
ensureSingleLogin,
loadCachedCookies,
saveCookiesToCache,
clearCookieCache,
getCachedCookieString
getCachedCookieString,
backupCookies,
restoreCookieBackup,
tryAcquireAuthLock,
releaseAuthCooldown
} from '../services/playwright-auth';
// Define interfaces for our data structures
@@ -51,7 +54,8 @@ async function getActivityDetailsRaw(
activityId: string,
cookies: string,
maxRetries: number = 3,
timeoutMilliseconds: number = 10000
timeoutMilliseconds: number = 10000,
signal?: AbortSignal
): Promise<string | null> {
const url = 'https://engage.nkcswx.cn/Services/ActivitiesService.asmx/GetActivityDetails';
const headers = {
@@ -65,22 +69,37 @@ async function getActivityDetailsRaw(
};
for (let attempt = 0; attempt < maxRetries; attempt++) {
if (signal?.aborted) {
logger.debug(`Activity ${activityId} aborted before attempt ${attempt + 1}`);
return null;
}
try {
logger.debug(`Attempt ${attempt + 1}/${maxRetries} for activity ${activityId} - Sending POST request to ${url}`);
const response = await axios.post(url, payload, {
headers,
timeout: timeoutMilliseconds,
responseType: 'text',
// Add additional timeout safety
signal,
maxRedirects: 5
});
// CRITICAL: Only accept HTTP 200. Reject all other status codes including 5xx
if (response.status !== 200) {
logger.error(`Non-200 status ${response.status} for activity ${activityId}. NOT updating cache to preserve local data.`);
if (attempt === maxRetries - 1) {
logger.error(`All ${maxRetries} retries failed with non-200 status for activity ${activityId}.`);
// IMPORTANT: Only 500 is cookie expiration. Other 5xx (502/503/504) are real server outages.
// The backend returns 500 when cookie is expired but session not yet invalidated.
// It takes several hours before it returns 401/403.
// 502/503/504 are real server errors (bad gateway, service unavailable, gateway timeout)
if (response.status === 500) {
logger.warn(`Server error 500 - this is cookie expiration. Throwing AuthenticationError to trigger immediate re-login.`);
throw new AuthenticationError(`Received 500 for activity ${activityId} - expired cookie`, 500);
} else if (response.status >= 500 && response.status < 600) {
// Real server outage (502/503/504), preserve cache and don't re-login
logger.error(`Real server outage ${response.status} - preserving local cache, not re-login.`);
}
// Return null immediately on non-200 errors
return null;
}
@@ -97,6 +116,7 @@ async function getActivityDetailsRaw(
logger.error(`Unexpected API response structure for activity ${activityId}.`);
}
} catch (error: any) {
// Only treat 401 (Unauthorized) and 403 (Forbidden) as authentication errors
// 404 (Not Found) is valid - activity doesn't exist
// Other 4xx/5xx errors should not trigger re-authentication
@@ -108,14 +128,21 @@ async function getActivityDetailsRaw(
if (error.response) {
logger.error(`Status: ${error.response.status}, Data (getActivityDetailsRaw): ${ String(error.response.data).slice(0,100)}...`);
// CRITICAL: 5xx errors should NOT update cache
if (error.response.status >= 500 && error.response.status < 600) {
logger.error(`Server error ${error.response.status} - preserving local cache, not updating.`);
// IMPORTANT: Only 500 is cookie expiration. Other 5xx (502/503/504) are real server outages.
// The backend returns 500 when cookie is expired but session not yet invalidated.
// 502/503/504 are real server errors (bad gateway, service unavailable, gateway timeout)
if (error.response.status === 500) {
logger.warn(`Server error 500 - this is cookie expiration. Throwing AuthenticationError to trigger immediate re-login.`);
throw new AuthenticationError(`Received 500 for activity ${activityId} - expired cookie`, 500);
} else if (error.response.status >= 500 && error.response.status < 600) {
// Real server outage (502/503/504), preserve cache and don't re-login
logger.error(`Real server outage ${error.response.status} - preserving local cache, not re-login.`);
}
}
if (attempt === maxRetries - 1) {
logger.error(`All ${maxRetries} retries failed for activity ${activityId}.`);
throw error;
// Don't throw on network/timeout errors, just return null to preserve cache
return null;
}
await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
}
@@ -136,6 +163,7 @@ export async function fetchActivityData(
userName: string,
userPwd: string,
forceLogin: boolean = false,
signal?: AbortSignal
): Promise<any | null> {
let currentCookie = forceLogin ? null : await getCachedCookieString();
@@ -145,17 +173,10 @@ export async function fetchActivityData(
currentCookie = null;
}
// Optimization: Skip pre-validation, directly request data
// Only validate/re-login when we get 4xx error (fail-fast strategy)
if (!currentCookie) {
logger.info('No cached cookie found. Attempting login...');
try {
currentCookie = await getCompleteCookies(userName, userPwd);
const cookies = await loadCachedCookies();
if (cookies) {
await saveCookiesToCache(cookies);
}
} catch (loginError) {
logger.error(`Login process failed: ${(loginError as Error).message}`);
return null;
@@ -167,11 +188,12 @@ export async function fetchActivityData(
return null;
}
logger.debug('Using cached cookie for API request.');
try {
logger.debug(`Calling getActivityDetailsRaw for activity ${activityId}...`);
const rawActivityDetailsString = await getActivityDetailsRaw(activityId, currentCookie);
const rawActivityDetailsString = await getActivityDetailsRaw(activityId, currentCookie, 3, 10000, signal);
logger.debug(`getActivityDetailsRaw returned for activity ${activityId}`);
if (rawActivityDetailsString) {
const parsedOuter = JSON.parse(rawActivityDetailsString);
@@ -180,22 +202,28 @@ export async function fetchActivityData(
logger.warn(`No data returned from getActivityDetailsRaw for activity ${activityId}, but no authentication error was thrown.`);
return null;
} catch (error) {
if (signal?.aborted) {
logger.debug(`Activity ${activityId} fetch aborted.`);
return null;
}
if (error instanceof AuthenticationError) {
// Cookie returned 4xx, now validate and re-login
logger.warn(`API returned 4xx error (Status: ${error.status}). Cookie may be invalid. Attempting re-login and retry.`);
// Throttle: prevent thundering herd from multiple 500 errors
if (!tryAcquireAuthLock()) {
logger.info(`Auth throttled for activity ${activityId}. Reusing current cookies — likely still valid.`);
return null;
}
// Backup cookies before clearing so we can restore on re-login failure
backupCookies();
await clearCookieCache();
try {
logger.info('Attempting re-login due to authentication failure...');
currentCookie = await getCompleteCookies(userName, userPwd);
const cookies = await loadCachedCookies();
if (cookies) {
await saveCookiesToCache(cookies);
}
releaseAuthCooldown();
logger.info('Re-login successful. Retrying request for activity details...');
const rawActivityDetailsStringRetry = await getActivityDetailsRaw(activityId, currentCookie);
const rawActivityDetailsStringRetry = await getActivityDetailsRaw(activityId, currentCookie, 1, 10000, signal);
if (rawActivityDetailsStringRetry) {
const parsedOuterRetry = JSON.parse(rawActivityDetailsStringRetry);
return JSON.parse(parsedOuterRetry.d);
@@ -203,7 +231,9 @@ export async function fetchActivityData(
logger.warn(`Still no details for activity ${activityId} after re-login and retry.`);
return null;
} catch (retryLoginOrFetchError) {
logger.error(`Error during re-login or retry fetch for activity ${activityId}: ${(retryLoginOrFetchError as Error).message}`);
logger.error(`Re-login or retry failed for activity ${activityId}: ${(retryLoginOrFetchError as Error).message}`);
// Restore old cookies instead of leaving cache empty
await restoreCookieBackup();
return null;
}
} else {

View File

@@ -14,7 +14,7 @@ import {
import { uploadImageFromBase64, listS3Objects, constructS3Url } from './s3-service';
import { extractBase64Image } from '../utils/image-processor';
import { logger } from '../utils/logger';
import { BatchProcessor } from '../utils/semaphore';
import type { ActivityData } from '../models/activity';
@@ -30,38 +30,8 @@ const STAFF_UPDATE_INTERVAL_MINS = parseInt(process.env.STAFF_UPDATE_INTERVAL_MI
const FIXED_STAFF_ACTIVITY_ID = process.env.FIXED_STAFF_ACTIVITY_ID;
const S3_IMAGE_PREFIX = (process.env.S3_PUBLIC_URL_PREFIX || 'files').replace(/\/$/, '');
/**
* Extracts S3 object key from a public URL.
* Handles both S3_PUBLIC_URL and S3_ENDPOINT formats.
*
* Examples:
* - https://cdn.example.com/bucket/files/abc123.avif → files/abc123.avif
* - https://s3.amazonaws.com/bucket/files/abc123.avif → files/abc123.avif
*
* @param url - The full S3 public URL
* @returns The object key (e.g., "files/abc123.avif") or null if parsing fails
*/
function extractObjectKeyFromUrl(url: string): string | null {
try {
const urlObj = new URL(url);
const path = urlObj.pathname;
const parts = path.replace(/^\//, '').split('/').filter(p => p);
if (parts.length >= 2) {
const key = parts.slice(1).join('/');
return key;
}
logger.warn(`Failed to extract object key from URL: ${url} - insufficient path parts`);
return null;
} catch (error) {
logger.warn(`Failed to parse URL: ${url}`, error);
return null;
}
}
// Crawler configuration
// Crawler concurrency configuration
const CONCURRENT_API_CALLS = parseInt(process.env.CONCURRENT_API_CALLS || '8', 10);
const CRAWLER_REQUEST_TIMEOUT_MS = parseInt(process.env.CRAWLER_REQUEST_TIMEOUT_MS || '25000', 10);
const CRAWLER_MAX_RETRIES = parseInt(process.env.CRAWLER_MAX_RETRIES || '3', 10);
const CRAWLER_RETRY_DELAY_MS = parseInt(process.env.CRAWLER_RETRY_DELAY_MS || '1000', 10);
@@ -82,14 +52,33 @@ async function processAndCacheActivity(activityId: string, forceUpdate: boolean
throw new Error('API username or password not configured');
}
// Add timeout protection for the entire fetch operation
// Add timeout protection via AbortController - properly cancels orphaned fetches
logger.debug(`Fetching activity data for ID: ${activityId}`);
const activityJson = await Promise.race([
fetchActivityData(activityId, USERNAME, PASSWORD, false),
new Promise((_, reject) =>
setTimeout(() => reject(new Error(`Timeout fetching activity ${activityId} after ${CRAWLER_REQUEST_TIMEOUT_MS}ms`)), CRAWLER_REQUEST_TIMEOUT_MS + 5000)
)
]);
const controller = new AbortController();
const timeoutId = setTimeout(
() => controller.abort(),
CRAWLER_REQUEST_TIMEOUT_MS + 5000
);
let activityJson: any = null;
try {
activityJson = await fetchActivityData(
activityId,
USERNAME,
PASSWORD,
false,
controller.signal
);
} finally {
clearTimeout(timeoutId);
}
if (controller.signal.aborted) {
logger.warn(`Request for activity ${activityId} timed out after ${CRAWLER_REQUEST_TIMEOUT_MS + 5000}ms. Cancelling orphaned fetch.`);
// Preserve existing cache on timeout
const existingData = await getActivityData(activityId);
return existingData || { lastCheck: new Date().toISOString(), error: `Timeout after ${CRAWLER_REQUEST_TIMEOUT_MS + 5000}ms` };
}
let structuredActivity: ActivityData;
if (!activityJson) {
@@ -172,10 +161,11 @@ async function processSingleActivity(activityId: string): Promise<void> {
/**
* Initialize the club cache by scanning through all activity IDs
* Processed sequentially
* Processed concurrently with controlled parallelism
*/
export async function initializeClubCache(): Promise<void> {
logger.info(`Starting initial club cache population from ID ${MIN_ACTIVITY_ID_SCAN} to ${MAX_ACTIVITY_ID_SCAN}`);
logger.info(`Concurrency: ${CONCURRENT_API_CALLS} parallel requests`);
const totalIds = MAX_ACTIVITY_ID_SCAN - MIN_ACTIVITY_ID_SCAN + 1;
let successCount = 0;
@@ -188,23 +178,30 @@ export async function initializeClubCache(): Promise<void> {
(_, i) => String(MIN_ACTIVITY_ID_SCAN + i)
);
// Process all activities sequentially
for (let i = 0; i < activityIds.length; i++) {
const activityId = activityIds[i]!;
try {
// Create batch processor with concurrency control
const processor = new BatchProcessor(
async (activityId: string) => {
await processSingleActivity(activityId);
successCount++;
} catch (error) {
errorCount++;
logger.error(`Error processing activity ID ${activityId}:`, error);
return activityId;
},
CONCURRENT_API_CALLS,
{
onError: (error, activityId) => {
errorCount++;
logger.error(`Error processing activity ID ${activityId}:`, error);
},
onProgress: (completed, total) => {
if (completed % 100 === 0 || completed === total) {
const mem = process.memoryUsage();
logger.info(`Progress: ${completed}/${total} (${Math.round(completed/total*100)}%) - Success: ${successCount}, Skipped: ${skippedCount}, Errors: ${errorCount} | Heap: ${Math.round(mem.heapUsed/1024/1024)}MB | Concurrent: ${CONCURRENT_API_CALLS}`);
}
}
}
// Log progress every 100 activities or at completion
if ((i + 1) % 100 === 0 || i === activityIds.length - 1) {
const mem = process.memoryUsage();
logger.info(`Progress: ${i + 1}/${totalIds} (${Math.round((i + 1) / totalIds * 100)}%) - Success: ${successCount}, Skipped: ${skippedCount}, Errors: ${errorCount} | Heap: ${Math.round(mem.heapUsed / 1024 / 1024)}MB`);
}
}
);
// Process all activities concurrently
const results = await processor.process(activityIds);
successCount = results.length;
logger.info(`Initial club cache population finished.`);
logger.info(`Summary: Total: ${totalIds}, Processed: ${activityIds.length}, Success: ${successCount}, Skipped: ${skippedCount}, Errors: ${errorCount}`);
@@ -212,10 +209,11 @@ export async function initializeClubCache(): Promise<void> {
/**
* Update stale clubs in the cache
* Processed sequentially
* Processed concurrently with controlled parallelism
*/
export async function updateStaleClubs(): Promise<void> {
logger.info('Starting stale club check...');
logger.info(`Concurrency: ${CONCURRENT_API_CALLS} parallel requests`);
const now = Date.now();
const updateIntervalMs = CLUB_UPDATE_INTERVAL_MINS * 60 * 1000;
const activityKeys = await getAllActivityKeys();
@@ -245,21 +243,28 @@ export async function updateStaleClubs(): Promise<void> {
logger.info(`Found ${staleActivityIds.length} stale activities to update.`);
// Process stale activities sequentially
for (let i = 0; i < staleActivityIds.length; i++) {
const activityId = staleActivityIds[i]!;
try {
// Create batch processor for concurrent updates
const processor = new BatchProcessor(
async (activityId: string) => {
logger.debug(`Updating stale activity ${activityId}`);
await processAndCacheActivity(activityId);
} catch (error) {
logger.error(`Error updating stale activity ${activityId}:`, error);
return activityId;
},
CONCURRENT_API_CALLS,
{
onError: (error, activityId) => {
logger.error(`Error updating stale activity ${activityId}:`, error);
},
onProgress: (completed, total) => {
if (completed % 10 === 0 || completed === total) {
logger.info(`Update progress: ${completed}/${total} (${Math.round(completed/total*100)}%)`);
}
}
}
// Log progress every 10 activities or at completion
if ((i + 1) % 10 === 0 || i === staleActivityIds.length - 1) {
logger.info(`Update progress: ${i + 1}/${staleActivityIds.length} (${Math.round((i + 1) / staleActivityIds.length * 100)}%)`);
}
}
);
// Process stale activities concurrently
await processor.process(staleActivityIds);
logger.info('Stale club check finished.');
}

View File

@@ -11,6 +11,39 @@ let _inMemoryCookies: Cookie[] | null = null;
// Login lock to prevent concurrent login attempts
let _loginLock: Promise<Cookie[]> | null = null;
// Cookie backup: preserved before clearCookieCache, restored on re-login failure
let _cookieBackup: Cookie[] | null = null;
// Auth failure throttle: debounce consecutive re-login triggers from 500 errors
// Prevents thundering herd when server is slow and returns many 500s
let _authFailureCooldownUntil = 0;
const AUTH_FAILURE_COOLDOWN_MS = 15000; // 15s cooldown between re-login cycles
/**
* Put all callers to wait during auth cooldown window.
* Returns true if auth is allowed (outside cooldown), false if throttled.
*/
export function tryAcquireAuthLock(): boolean {
const now = Date.now();
if (now < _authFailureCooldownUntil) {
const remaining = _authFailureCooldownUntil - now;
logger.warn(
`Re-login throttled: ${Math.round(remaining / 1000)}s cooldown remaining. ` +
`Existing cookies are likely still valid — server 500 is a temporary slowdown.`
);
return false;
}
return true;
}
/**
* Called after a successful re-login to release the cooldown.
*/
export function releaseAuthCooldown(): void {
_authFailureCooldownUntil = Date.now() + AUTH_FAILURE_COOLDOWN_MS;
logger.info(`Auth cooldown set: ${AUTH_FAILURE_COOLDOWN_MS}ms to prevent thundering herd re-logins.`);
}
/**
* Ensure only one login process runs at a time
*/
@@ -178,8 +211,40 @@ export async function saveCookiesToCache(cookies: Cookie[]): Promise<void> {
}
}
/**
* Backup current cookies before clearing. Restored if re-login fails.
*/
export function backupCookies(): Cookie[] | null {
if (_inMemoryCookies) {
_cookieBackup = [..._inMemoryCookies];
logger.info('Cookies backed up before clear.');
}
return _cookieBackup;
}
/**
* Restore cookies from backup after failed re-login.
*/
export async function restoreCookieBackup(): Promise<boolean> {
if (_cookieBackup) {
_inMemoryCookies = _cookieBackup;
try {
await fs.promises.writeFile(COOKIE_FILE_PATH, JSON.stringify(_cookieBackup, null, 2), 'utf-8');
logger.info('Cookies restored from backup successfully.');
_cookieBackup = null;
return true;
} catch (error: any) {
logger.error('Failed to restore cookies from backup:', error.message);
return false;
}
}
logger.warn('No cookie backup available for restore.');
return false;
}
/**
* Clear cookie cache
* Prefer backupAndClearCookieCache() instead to preserve old cookies.
*/
export async function clearCookieCache(): Promise<void> {
_inMemoryCookies = null;

141
test/test-concurrency.ts Normal file
View File

@@ -0,0 +1,141 @@
// test/test-concurrency.ts
/**
* Test script for concurrency features
* Run with: bun run test/test-concurrency.ts
*/
import { Semaphore, executeWithConcurrency, BatchProcessor } from '../utils/semaphore';
// Simulate API call
function simulateApiCall(id: number, delay: number = 100): Promise<{ id: number; result: string }> {
return new Promise((resolve) => {
setTimeout(() => {
resolve({ id, result: `Result for ${id}` });
}, delay);
});
}
async function testSemaphore(): Promise<void> {
console.log('\n=== Test 1: Basic Semaphore ===');
const semaphore = new Semaphore(3);
const start = Date.now();
const promises = [];
for (let i = 1; i <= 10; i++) {
const id = i;
promises.push(
(async () => {
await semaphore.acquire();
console.log(`[${id}] Acquired permit (available: ${semaphore.getAvailablePermits()})`);
await simulateApiCall(id, 200);
console.log(`[${id}] Releasing permit`);
semaphore.release();
})()
);
}
await Promise.all(promises);
const duration = Date.now() - start;
console.log(`\n✓ Completed 10 tasks with max 3 concurrent in ${duration}ms`);
console.log(` (Sequential would take ~2000ms, parallel should be ~700-800ms)`);
}
async function testExecuteWithConcurrency(): Promise<void> {
console.log('\n=== Test 2: executeWithConcurrency ===');
const tasks = Array.from({ length: 10 }, (_, i) => () => simulateApiCall(i + 1, 100));
const start = Date.now();
const results = await executeWithConcurrency(tasks, 5);
const duration = Date.now() - start;
console.log(`✓ Completed ${results.length} tasks with max 5 concurrent in ${duration}ms`);
console.log(` Results: ${results.map(r => r.id).join(', ')}`);
}
async function testBatchProcessor(): Promise<void> {
console.log('\n=== Test 3: BatchProcessor ===');
const items = Array.from({ length: 20 }, (_, i) => ({ id: i + 1, name: `Item ${i + 1}` }));
let processedCount = 0;
const processor = new BatchProcessor(
async (item: { id: number; name: string }) => {
await simulateApiCall(item.id, 50);
processedCount++;
return { ...item, processed: true };
},
4, // concurrency
{
onProgress: (completed, total) => {
if (completed % 5 === 0 || completed === total) {
console.log(` Progress: ${completed}/${total} (${Math.round(completed / total * 100)}%)`);
}
},
onError: (error, item) => {
console.error(` Error processing item ${item.id}:`, error.message);
}
}
);
const start = Date.now();
const results = await processor.process(items);
const duration = Date.now() - start;
console.log(`✓ Processed ${results.length} items with max 4 concurrent in ${duration}ms`);
console.log(` Expected ~500ms (20 items / 4 concurrent * 50ms each)`);
}
async function testErrorHandling(): Promise<void> {
console.log('\n=== Test 4: Error Handling ===');
const items = [1, 2, 3, 4, 5];
let errorCount = 0;
const processor = new BatchProcessor(
async (id: number) => {
if (id % 2 === 0) {
throw new Error(`Simulated error for ${id}`);
}
return { id, success: true };
},
3,
{
onError: (error, item) => {
errorCount++;
console.log(` Caught error for item ${item}: ${error.message}`);
}
}
);
const results = await processor.process(items);
console.log(`✓ Completed: ${results.length} success, ${errorCount} errors (errors handled gracefully)`);
}
async function main(): Promise<void> {
console.log('╔═══════════════════════════════════════════════════╗');
console.log('║ Concurrency Module Test Suite ║');
console.log('╚═══════════════════════════════════════════════════╝');
try {
await testSemaphore();
await testExecuteWithConcurrency();
await testBatchProcessor();
await testErrorHandling();
console.log('\n╔═══════════════════════════════════════════════════╗');
console.log('║ All tests passed! ✓ ║');
console.log('╚═══════════════════════════════════════════════════╝');
console.log('\n📝 Configuration:');
console.log(' - Set CONCURRENT_API_CALLS in .env to control parallelism');
console.log(' - Current default: 8 concurrent requests');
console.log(' - Example: CONCURRENT_API_CALLS=16 for faster crawling');
console.log('');
} catch (error) {
console.error('\n❌ Test failed:', error);
process.exit(1);
}
}
main();

220
utils/semaphore.ts Normal file
View File

@@ -0,0 +1,220 @@
// utils/semaphore.ts
/**
* Semaphore implementation for controlling concurrent operations
* Based on patterns from civitai/civitai and p-queue
*/
export class Semaphore {
private capacity: number;
private permits: number;
private queue: Array<() => void> = [];
constructor(capacity: number) {
if (capacity < 1) {
throw new Error('Semaphore capacity must be at least 1');
}
this.capacity = capacity;
this.permits = capacity;
}
/**
* Acquire a permit. If none available, waits until one is released.
*/
async acquire(): Promise<void> {
return new Promise<void>((resolve) => {
if (this.permits > 0) {
this.permits--;
resolve();
} else {
// Queue the release callback
this.queue.push(() => {
resolve();
});
}
});
}
/**
* Release a permit and wake up a waiting task if any.
*/
release(): void {
if (this.queue.length > 0) {
const next = this.queue.shift();
if (next) {
next();
}
} else {
this.permits++;
}
}
/**
* Get current available permits.
*/
getAvailablePermits(): number {
return this.permits;
}
/**
* Get total capacity.
*/
getCapacity(): number {
return this.capacity;
}
/**
* Get number of waiting tasks.
*/
getWaitingCount(): number {
return this.queue.length;
}
}
/**
* Execute async tasks with concurrency limit
* @param tasks Array of async task functions
* @param concurrency Maximum concurrent tasks
* @returns Promise that resolves with all results when complete
*/
export async function executeWithConcurrency<T>(
tasks: Array<() => Promise<T>>,
concurrency: number
): Promise<T[]> {
const semaphore = new Semaphore(concurrency);
const results: T[] = new Array(tasks.length);
const promises = tasks.map(async (task, index) => {
await semaphore.acquire();
try {
results[index] = await task();
return results[index];
} finally {
semaphore.release();
}
});
return Promise.all(promises);
}
/**
* Execute async tasks with concurrency limit and progress callback
* @param tasks Array of async task functions
* @param concurrency Maximum concurrent tasks
* @param onProgress Callback with (completed, total, result) after each task
* @returns Promise that resolves with all results when complete
*/
export async function executeWithConcurrencyAndProgress<T>(
tasks: Array<() => Promise<T>>,
concurrency: number,
onProgress?: (completed: number, total: number, result: T, error?: Error) => void
): Promise<T[]> {
const semaphore = new Semaphore(concurrency);
const results: T[] = new Array(tasks.length);
let completed = 0;
const total = tasks.length;
const promises = tasks.map(async (task, index) => {
await semaphore.acquire();
try {
results[index] = await task();
completed++;
onProgress?.(completed, total, results[index]!);
return results[index];
} catch (error) {
completed++;
onProgress?.(completed, total, undefined as T, error as Error);
throw error;
} finally {
semaphore.release();
}
});
return Promise.all(promises);
}
/**
* Batch processor with concurrency control
* Useful for processing large arrays in chunks with controlled concurrency
*/
export class BatchProcessor<T, R> {
private semaphore: Semaphore;
private processor: (item: T, index: number) => Promise<R>;
private onError?: (error: Error, item: T, index: number) => void;
private onProgress?: (completed: number, total: number) => void;
constructor(
processor: (item: T, index: number) => Promise<R>,
concurrency: number,
options?: {
onError?: (error: Error, item: T, index: number) => void;
onProgress?: (completed: number, total: number) => void;
}
) {
this.processor = processor;
this.semaphore = new Semaphore(concurrency);
this.onError = options?.onError;
this.onProgress = options?.onProgress;
}
/**
* Process an array of items with concurrency control
* Only returns successful results, errors are handled by onError callback
*/
async process(items: T[]): Promise<Awaited<R>[]> {
const results: (Awaited<R> | undefined)[] = new Array(items.length);
let completed = 0;
const total = items.length;
const promises = items.map(async (item, index) => {
await this.semaphore.acquire();
try {
const result = await this.processor(item, index);
completed++;
this.onProgress?.(completed, total);
return result;
} catch (error) {
completed++;
this.onProgress?.(completed, total);
this.onError?.(error as Error, item, index);
return undefined;
} finally {
this.semaphore.release();
}
});
const allResults = await Promise.all(promises);
return allResults.filter((r): r is Awaited<R> => r !== undefined);
}
/**
* Process an array and return both results and errors
*/
async processWithErrors(items: T[]): Promise<{
results: R[];
errors: Array<{ error: Error; item: T; index: number }>;
}> {
const results: R[] = [];
const errors: Array<{ error: Error; item: T; index: number }> = [];
let completed = 0;
const total = items.length;
const promises = items.map(async (item, index) => {
await this.semaphore.acquire();
try {
const result = await this.processor(item, index);
results.push(result);
completed++;
this.onProgress?.(completed, total);
} catch (error) {
completed++;
this.onProgress?.(completed, total);
errors.push({ error: error as Error, item, index });
} finally {
this.semaphore.release();
}
});
await Promise.all(promises);
return { results, errors };
}
}