fix(provider-pool): 优化错误计数逻辑,避免并发请求导致误判 unhealthy
问题: - 多个项目并发请求时,临时性 500 错误会快速累计 - 即使内部重试成功,错误计数已经触发 unhealthy 标记 解决方案: - 引入 10 秒滑动窗口机制 - 超过窗口期的错误重置计数,而不是累加 - 只有窗口期内连续失败才标记 unhealthy
This commit is contained in:
parent
8b018b562e
commit
46ea3e707b
1 changed files with 13 additions and 3 deletions
|
|
@ -398,11 +398,21 @@ export class ProviderPoolManager {
|
|||
|
||||
const provider = this._findProvider(providerType, providerConfig.uuid);
|
||||
if (provider) {
|
||||
provider.config.errorCount++;
|
||||
const now = Date.now();
|
||||
const lastErrorTime = provider.config.lastErrorTime ? new Date(provider.config.lastErrorTime).getTime() : 0;
|
||||
const errorWindowMs = 10000; // 10 秒窗口期
|
||||
|
||||
// 如果距离上次错误超过窗口期,重置错误计数
|
||||
if (now - lastErrorTime > errorWindowMs) {
|
||||
provider.config.errorCount = 1;
|
||||
} else {
|
||||
provider.config.errorCount++;
|
||||
}
|
||||
|
||||
provider.config.lastErrorTime = new Date().toISOString();
|
||||
// 更新 lastUsed 时间,避免因 LRU 策略导致失败节点被重复选中
|
||||
provider.config.lastUsed = new Date().toISOString();
|
||||
|
||||
|
||||
// 保存错误信息
|
||||
if (errorMessage) {
|
||||
provider.config.lastErrorMessage = errorMessage;
|
||||
|
|
@ -414,7 +424,7 @@ export class ProviderPoolManager {
|
|||
} else {
|
||||
this._log('warn', `Provider ${providerConfig.uuid} for type ${providerType} error count: ${provider.config.errorCount}/${this.maxErrorCount}. Still healthy.`);
|
||||
}
|
||||
|
||||
|
||||
this._debouncedSave(providerType);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue