Files
dsas-cca-backend-bun/utils/semaphore.ts
JamesFlare1212 0a133159e8 重构 scan: 实现多线程并发爬虫功能
- 新增 Semaphore 信号量类控制并发数
- 新增 BatchProcessor 批量处理器带进度回调
- 重构 initializeClubCache 和 updateStaleClubs 为并发模式
- 修复 Cookie 4xx 判断逻辑(仅 401/403 触发重新登录)
- 添加环境变量配置:CONCURRENT_API_CALLS 等
- 新增并发功能测试脚本 test-concurrency.ts

性能提升:从串行处理提升至可配置的并发处理(默认 8 线程)
修复问题:404 错误不再误判为认证失败
2026-04-07 18:18:18 -04:00

221 lines
5.8 KiB
TypeScript

// utils/semaphore.ts
/**
* Semaphore implementation for controlling concurrent operations
* Based on patterns from civitai/civitai and p-queue
*/
export class Semaphore {
private capacity: number;
private permits: number;
private queue: Array<() => void> = [];
constructor(capacity: number) {
if (capacity < 1) {
throw new Error('Semaphore capacity must be at least 1');
}
this.capacity = capacity;
this.permits = capacity;
}
/**
* Acquire a permit. If none available, waits until one is released.
*/
async acquire(): Promise<void> {
return new Promise<void>((resolve) => {
if (this.permits > 0) {
this.permits--;
resolve();
} else {
// Queue the release callback
this.queue.push(() => {
resolve();
});
}
});
}
/**
* Release a permit and wake up a waiting task if any.
*/
release(): void {
if (this.queue.length > 0) {
const next = this.queue.shift();
if (next) {
next();
}
} else {
this.permits++;
}
}
/**
* Get current available permits.
*/
getAvailablePermits(): number {
return this.permits;
}
/**
* Get total capacity.
*/
getCapacity(): number {
return this.capacity;
}
/**
* Get number of waiting tasks.
*/
getWaitingCount(): number {
return this.queue.length;
}
}
/**
* Execute async tasks with concurrency limit
* @param tasks Array of async task functions
* @param concurrency Maximum concurrent tasks
* @returns Promise that resolves with all results when complete
*/
export async function executeWithConcurrency<T>(
tasks: Array<() => Promise<T>>,
concurrency: number
): Promise<T[]> {
const semaphore = new Semaphore(concurrency);
const results: T[] = new Array(tasks.length);
const promises = tasks.map(async (task, index) => {
await semaphore.acquire();
try {
results[index] = await task();
return results[index];
} finally {
semaphore.release();
}
});
return Promise.all(promises);
}
/**
* Execute async tasks with concurrency limit and progress callback
* @param tasks Array of async task functions
* @param concurrency Maximum concurrent tasks
* @param onProgress Callback with (completed, total, result) after each task
* @returns Promise that resolves with all results when complete
*/
export async function executeWithConcurrencyAndProgress<T>(
tasks: Array<() => Promise<T>>,
concurrency: number,
onProgress?: (completed: number, total: number, result: T, error?: Error) => void
): Promise<T[]> {
const semaphore = new Semaphore(concurrency);
const results: T[] = new Array(tasks.length);
let completed = 0;
const total = tasks.length;
const promises = tasks.map(async (task, index) => {
await semaphore.acquire();
try {
results[index] = await task();
completed++;
onProgress?.(completed, total, results[index]!);
return results[index];
} catch (error) {
completed++;
onProgress?.(completed, total, undefined as T, error as Error);
throw error;
} finally {
semaphore.release();
}
});
return Promise.all(promises);
}
/**
* Batch processor with concurrency control
* Useful for processing large arrays in chunks with controlled concurrency
*/
export class BatchProcessor<T, R> {
private semaphore: Semaphore;
private processor: (item: T, index: number) => Promise<R>;
private onError?: (error: Error, item: T, index: number) => void;
private onProgress?: (completed: number, total: number) => void;
constructor(
processor: (item: T, index: number) => Promise<R>,
concurrency: number,
options?: {
onError?: (error: Error, item: T, index: number) => void;
onProgress?: (completed: number, total: number) => void;
}
) {
this.processor = processor;
this.semaphore = new Semaphore(concurrency);
this.onError = options?.onError;
this.onProgress = options?.onProgress;
}
/**
* Process an array of items with concurrency control
* Only returns successful results, errors are handled by onError callback
*/
async process(items: T[]): Promise<Awaited<R>[]> {
const results: (Awaited<R> | undefined)[] = new Array(items.length);
let completed = 0;
const total = items.length;
const promises = items.map(async (item, index) => {
await this.semaphore.acquire();
try {
const result = await this.processor(item, index);
completed++;
this.onProgress?.(completed, total);
return result;
} catch (error) {
completed++;
this.onProgress?.(completed, total);
this.onError?.(error as Error, item, index);
return undefined;
} finally {
this.semaphore.release();
}
});
const allResults = await Promise.all(promises);
return allResults.filter((r): r is Awaited<R> => r !== undefined);
}
/**
* Process an array and return both results and errors
*/
async processWithErrors(items: T[]): Promise<{
results: R[];
errors: Array<{ error: Error; item: T; index: number }>;
}> {
const results: R[] = [];
const errors: Array<{ error: Error; item: T; index: number }> = [];
let completed = 0;
const total = items.length;
const promises = items.map(async (item, index) => {
await this.semaphore.acquire();
try {
const result = await this.processor(item, index);
results.push(result);
completed++;
this.onProgress?.(completed, total);
} catch (error) {
completed++;
this.onProgress?.(completed, total);
errors.push({ error: error as Error, item, index });
} finally {
this.semaphore.release();
}
});
await Promise.all(promises);
return { results, errors };
}
}