feat: improve team model availability diagnostics
This commit is contained in:
parent
d0cfabca48
commit
63b89fcd39
25 changed files with 1632 additions and 150 deletions
|
|
@ -1 +0,0 @@
|
|||
{"taskId":"351e2899-3aba-4992-9250-bf85dccb4399","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T07:59:53.638Z"}
|
||||
|
|
@ -1 +0,0 @@
|
|||
{"taskId":"351e2899","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T08:00:39.185Z"}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -49,3 +49,4 @@ eslint-fix/
|
|||
remotion/*
|
||||
|
||||
.home/
|
||||
.board-task-log-freshness/
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm, writeFile } from 'fs/promises';
|
|||
import os from 'os';
|
||||
import path from 'path';
|
||||
import { afterEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { MemberRuntimeLogTailReader } from '../MemberRuntimeLogTailReader';
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ import {
|
|||
buildProviderPrepareRuntimeStatusSignature,
|
||||
} from './providerPrepareRequestSignature';
|
||||
import {
|
||||
getShortLivedProviderPrepareModelIssueReasons,
|
||||
getShortLivedProviderPrepareModelResults,
|
||||
storeShortLivedProviderPrepareModelResults,
|
||||
} from './providerPrepareShortLivedCache';
|
||||
|
|
@ -682,6 +683,45 @@ export const CreateTeamDialog = ({
|
|||
selectedProviderId,
|
||||
]
|
||||
);
|
||||
const shortLivedModelIssueReasons = useMemo(() => {
|
||||
const modelIssueReasonByProvider: Partial<Record<TeamProviderId, Record<string, string>>> = {};
|
||||
const modelUnavailableReasonByProvider: Partial<
|
||||
Record<TeamProviderId, Record<string, string>>
|
||||
> = {};
|
||||
|
||||
for (const providerId of selectedMemberProviders) {
|
||||
const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null;
|
||||
const cacheKey = buildProviderPrepareModelCacheKey({
|
||||
cwd: effectiveCwd,
|
||||
providerId,
|
||||
backendSummary,
|
||||
limitContext: effectiveAnthropicRuntimeLimitContext,
|
||||
runtimeStatusSignature: prepareRuntimeStatusSignature,
|
||||
});
|
||||
const issueReasons = getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId,
|
||||
cacheKey,
|
||||
});
|
||||
if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) {
|
||||
modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue;
|
||||
}
|
||||
if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) {
|
||||
modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
};
|
||||
}, [
|
||||
effectiveAnthropicRuntimeLimitContext,
|
||||
effectiveCwd,
|
||||
prepareChecks,
|
||||
prepareRuntimeStatusSignature,
|
||||
runtimeBackendSummaryByProvider,
|
||||
selectedMemberProviders,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (multimodelEnabled) {
|
||||
|
|
@ -1860,6 +1900,10 @@ export const CreateTeamDialog = ({
|
|||
leadModelIssueText={leadModelIssueText}
|
||||
memberWarningById={teammateRuntimeCompatibility.memberWarningById}
|
||||
memberModelIssueById={memberModelIssueById}
|
||||
modelIssueReasonByProvider={shortLivedModelIssueReasons.modelIssueReasonByProvider}
|
||||
modelUnavailableReasonByProvider={
|
||||
shortLivedModelIssueReasons.modelUnavailableReasonByProvider
|
||||
}
|
||||
headerTop={
|
||||
<div className="flex items-center gap-2">
|
||||
<Checkbox
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ import {
|
|||
buildProviderPrepareRuntimeStatusSignature,
|
||||
} from './providerPrepareRequestSignature';
|
||||
import {
|
||||
getShortLivedProviderPrepareModelIssueReasons,
|
||||
getShortLivedProviderPrepareModelResults,
|
||||
storeShortLivedProviderPrepareModelResults,
|
||||
} from './providerPrepareShortLivedCache';
|
||||
|
|
@ -1417,6 +1418,53 @@ export const LaunchTeamDialog = (props: LaunchTeamDialogProps): React.JSX.Elemen
|
|||
selectedProviderId,
|
||||
]
|
||||
);
|
||||
const shortLivedModelIssueReasons = useMemo(() => {
|
||||
const modelIssueReasonByProvider: Partial<Record<TeamProviderId, Record<string, string>>> = {};
|
||||
const modelUnavailableReasonByProvider: Partial<
|
||||
Record<TeamProviderId, Record<string, string>>
|
||||
> = {};
|
||||
|
||||
if (!isLaunchMode) {
|
||||
return {
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
};
|
||||
}
|
||||
|
||||
for (const providerId of selectedMemberProviders) {
|
||||
const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null;
|
||||
const cacheKey = buildProviderPrepareModelCacheKey({
|
||||
cwd: effectiveCwd,
|
||||
providerId,
|
||||
backendSummary,
|
||||
limitContext: effectiveAnthropicRuntimeLimitContext,
|
||||
runtimeStatusSignature: prepareRuntimeStatusSignature,
|
||||
});
|
||||
const issueReasons = getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId,
|
||||
cacheKey,
|
||||
});
|
||||
if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) {
|
||||
modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue;
|
||||
}
|
||||
if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) {
|
||||
modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
};
|
||||
}, [
|
||||
effectiveAnthropicRuntimeLimitContext,
|
||||
effectiveCwd,
|
||||
isLaunchMode,
|
||||
prepareChecks,
|
||||
prepareRuntimeStatusSignature,
|
||||
runtimeBackendSummaryByProvider,
|
||||
selectedMemberProviders,
|
||||
]);
|
||||
|
||||
// Clear stale provisioning error when dialog opens
|
||||
useEffect(() => {
|
||||
|
|
@ -2563,6 +2611,12 @@ export const LaunchTeamDialog = (props: LaunchTeamDialogProps): React.JSX.Elemen
|
|||
memberInfoById={memberWorktreeContinuationInfoById}
|
||||
leadModelIssueText={leadModelIssueText}
|
||||
memberModelIssueById={memberModelIssueById}
|
||||
modelIssueReasonByProvider={
|
||||
shortLivedModelIssueReasons.modelIssueReasonByProvider
|
||||
}
|
||||
modelUnavailableReasonByProvider={
|
||||
shortLivedModelIssueReasons.modelUnavailableReasonByProvider
|
||||
}
|
||||
softDeleteMembers
|
||||
disableGeminiOption={isGeminiUiFrozen()}
|
||||
headerBottom={
|
||||
|
|
|
|||
|
|
@ -19,13 +19,9 @@ import {
|
|||
GEMINI_UI_DISABLED_REASON,
|
||||
isGeminiUiFrozen,
|
||||
} from '@renderer/utils/geminiUiFreeze';
|
||||
import {
|
||||
compareOpenCodeTeamModelRecommendations,
|
||||
getOpenCodeTeamModelRecommendation,
|
||||
isOpenCodeTeamModelRecommended,
|
||||
} from '@renderer/utils/openCodeModelRecommendations';
|
||||
import {
|
||||
getAvailableTeamProviderModelOptions,
|
||||
getOpenCodeOpenAiRouteAuthUnavailableReason,
|
||||
getTeamModelUiDisabledReason,
|
||||
isTeamProviderModelVerificationPending,
|
||||
normalizeTeamModelForUi,
|
||||
|
|
@ -41,6 +37,11 @@ import {
|
|||
isAnthropicHaikuTeamModel,
|
||||
} from '@renderer/utils/teamModelCatalog';
|
||||
import { extractProviderScopedBaseModel } from '@renderer/utils/teamModelContext';
|
||||
import {
|
||||
compareTeamModelRecommendations,
|
||||
getTeamModelRecommendation,
|
||||
isTeamModelRecommended,
|
||||
} from '@renderer/utils/teamModelRecommendations';
|
||||
import { resolveAnthropicLaunchModel } from '@shared/utils/anthropicLaunchModel';
|
||||
import { getAnthropicDefaultTeamModel } from '@shared/utils/anthropicModelDefaults';
|
||||
import { isTeamProviderId } from '@shared/utils/teamProvider';
|
||||
|
|
@ -156,6 +157,7 @@ export interface TeamModelSelectorProps {
|
|||
providerDisabledReasonById?: Partial<Record<TeamProviderId, string | null | undefined>>;
|
||||
providerDisabledBadgeLabelById?: Partial<Record<TeamProviderId, string | null | undefined>>;
|
||||
modelIssueReasonByValue?: Partial<Record<string, string | null | undefined>>;
|
||||
modelUnavailableReasonByValue?: Partial<Record<string, string | null | undefined>>;
|
||||
}
|
||||
|
||||
export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
||||
|
|
@ -168,6 +170,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
providerDisabledReasonById,
|
||||
providerDisabledBadgeLabelById,
|
||||
modelIssueReasonByValue,
|
||||
modelUnavailableReasonByValue,
|
||||
}) => {
|
||||
const multimodelEnabled = useStore((s) => s.appConfig?.general?.multimodelEnabled ?? true);
|
||||
const [recommendedOnly, setRecommendedOnly] = useState(false);
|
||||
|
|
@ -315,7 +318,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
const hasRecommendedOpenCodeModels = useMemo(
|
||||
() =>
|
||||
effectiveProviderId === 'opencode' &&
|
||||
modelOptions.some((option) => isOpenCodeTeamModelRecommended(option.value)),
|
||||
modelOptions.some((option) => isTeamModelRecommended(effectiveProviderId, option.value)),
|
||||
[effectiveProviderId, modelOptions]
|
||||
);
|
||||
|
||||
|
|
@ -335,10 +338,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
if (!normalizedModelQuery) {
|
||||
return true;
|
||||
}
|
||||
const modelRecommendation =
|
||||
effectiveProviderId === 'opencode'
|
||||
? getOpenCodeTeamModelRecommendation(option.value)
|
||||
: null;
|
||||
const modelRecommendation = getTeamModelRecommendation(effectiveProviderId, option.value);
|
||||
return [
|
||||
option.value,
|
||||
option.label,
|
||||
|
|
@ -358,10 +358,14 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
const concreteOptions = modelOptions
|
||||
.filter((option) => option.value.trim().length > 0)
|
||||
.map((option, index) => ({ option, index }))
|
||||
.filter(({ option }) => !recommendedOnly || isOpenCodeTeamModelRecommended(option.value))
|
||||
.filter(
|
||||
({ option }) =>
|
||||
!recommendedOnly || isTeamModelRecommended(effectiveProviderId, option.value)
|
||||
)
|
||||
.filter(({ option }) => matchesModelQuery(option))
|
||||
.sort((left, right) => {
|
||||
const recommendationOrder = compareOpenCodeTeamModelRecommendations(
|
||||
const recommendationOrder = compareTeamModelRecommendations(
|
||||
effectiveProviderId,
|
||||
left.option.value,
|
||||
right.option.value
|
||||
);
|
||||
|
|
@ -517,25 +521,44 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
opt.value === '' ? 'available' : (opt.availabilityStatus ?? 'available');
|
||||
const availabilityReason =
|
||||
opt.value === '' ? null : (opt.availabilityReason ?? null);
|
||||
const runtimeUnavailableReason =
|
||||
opt.value !== '' && availabilityStatus === 'unavailable'
|
||||
? (availabilityReason ?? 'Unavailable in current runtime')
|
||||
: null;
|
||||
const modelIssueReason =
|
||||
opt.value === '' ? null : (modelIssueReasonByValue?.[opt.value] ?? null);
|
||||
const hasModelIssue = Boolean(modelIssueReason);
|
||||
const modelUnavailableReason =
|
||||
opt.value === ''
|
||||
? null
|
||||
: (modelUnavailableReasonByValue?.[opt.value] ??
|
||||
getOpenCodeOpenAiRouteAuthUnavailableReason(
|
||||
effectiveProviderId,
|
||||
opt.value,
|
||||
runtimeProviderStatus
|
||||
) ??
|
||||
runtimeUnavailableReason);
|
||||
const hasModelIssue = Boolean(modelIssueReason || modelUnavailableReason);
|
||||
const modelSelectable =
|
||||
activeProviderSelectable &&
|
||||
!modelUnavailableReason &&
|
||||
!modelDisabledReason &&
|
||||
(opt.value === '' ||
|
||||
availabilityStatus == null ||
|
||||
availabilityStatus === 'available');
|
||||
const modelStatusMessage =
|
||||
modelIssueReason ?? modelDisabledReason ?? availabilityReason ?? null;
|
||||
modelUnavailableReason ??
|
||||
modelIssueReason ??
|
||||
modelDisabledReason ??
|
||||
availabilityReason ??
|
||||
null;
|
||||
const sourceBadgeLabel =
|
||||
effectiveProviderId === 'opencode' && opt.value !== ''
|
||||
? opt.badgeLabel?.trim() || null
|
||||
: null;
|
||||
const modelRecommendation =
|
||||
effectiveProviderId === 'opencode'
|
||||
? getOpenCodeTeamModelRecommendation(opt.value)
|
||||
: null;
|
||||
const modelRecommendation = getTeamModelRecommendation(
|
||||
effectiveProviderId,
|
||||
opt.value
|
||||
);
|
||||
|
||||
return (
|
||||
<button
|
||||
|
|
@ -637,10 +660,10 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
{hasModelIssue && (
|
||||
<span
|
||||
className="flex items-center justify-center gap-1 text-[10px] font-normal text-red-300"
|
||||
title={modelIssueReason ?? undefined}
|
||||
title={modelStatusMessage ?? undefined}
|
||||
>
|
||||
<AlertTriangle className="size-3 shrink-0" />
|
||||
<span>Issue</span>
|
||||
<span>{modelUnavailableReason ? 'Unavailable' : 'Issue'}</span>
|
||||
<TooltipProvider delayDuration={200}>
|
||||
<Tooltip>
|
||||
<TooltipTrigger
|
||||
|
|
@ -650,7 +673,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
|
|||
<Info className="size-3 shrink-0 opacity-50 transition-opacity hover:opacity-80" />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="top" className="max-w-[240px] text-xs">
|
||||
{modelIssueReason}
|
||||
{modelStatusMessage}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import type { ProviderPrepareDiagnosticsModelResult } from './providerPrepareDia
|
|||
import type { TeamProviderId } from '@shared/types';
|
||||
|
||||
const OPENCODE_DEEP_VERIFY_SUCCESS_CACHE_TTL_MS = 45_000;
|
||||
const OPENCODE_MODEL_ISSUE_CACHE_TTL_MS = 90_000;
|
||||
|
||||
interface ShortLivedProviderPrepareCacheEntry {
|
||||
expiresAt: number;
|
||||
|
|
@ -9,15 +10,24 @@ interface ShortLivedProviderPrepareCacheEntry {
|
|||
}
|
||||
|
||||
const shortLivedProviderPrepareCache = new Map<string, ShortLivedProviderPrepareCacheEntry>();
|
||||
const shortLivedProviderPrepareIssueCache = new Map<string, ShortLivedProviderPrepareCacheEntry>();
|
||||
|
||||
function pruneExpiredEntries(now: number): void {
|
||||
for (const [cacheKey, entry] of shortLivedProviderPrepareCache.entries()) {
|
||||
function pruneExpiredEntries(
|
||||
cache: Map<string, ShortLivedProviderPrepareCacheEntry>,
|
||||
now: number
|
||||
): void {
|
||||
for (const [cacheKey, entry] of cache.entries()) {
|
||||
if (entry.expiresAt <= now) {
|
||||
shortLivedProviderPrepareCache.delete(cacheKey);
|
||||
cache.delete(cacheKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getIssueReason(result: ProviderPrepareDiagnosticsModelResult): string | null {
|
||||
const match = /\s-\s(?:unavailable|check failed)(?:\s-\s(.+))?$/i.exec(result.line.trim());
|
||||
return match?.[1]?.trim() || result.warningLine?.trim() || result.line.trim() || null;
|
||||
}
|
||||
|
||||
export function getShortLivedProviderPrepareModelResults({
|
||||
providerId,
|
||||
cacheKey,
|
||||
|
|
@ -30,7 +40,7 @@ export function getShortLivedProviderPrepareModelResults({
|
|||
}
|
||||
|
||||
const now = Date.now();
|
||||
pruneExpiredEntries(now);
|
||||
pruneExpiredEntries(shortLivedProviderPrepareCache, now);
|
||||
const entry = shortLivedProviderPrepareCache.get(cacheKey);
|
||||
if (!entry) {
|
||||
return {};
|
||||
|
|
@ -39,6 +49,53 @@ export function getShortLivedProviderPrepareModelResults({
|
|||
return { ...entry.modelResultsById };
|
||||
}
|
||||
|
||||
export function getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId,
|
||||
cacheKey,
|
||||
}: {
|
||||
providerId: TeamProviderId;
|
||||
cacheKey: string;
|
||||
}): {
|
||||
modelIssueReasonByValue: Record<string, string>;
|
||||
modelUnavailableReasonByValue: Record<string, string>;
|
||||
} {
|
||||
if (providerId !== 'opencode') {
|
||||
return {
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {},
|
||||
};
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
pruneExpiredEntries(shortLivedProviderPrepareIssueCache, now);
|
||||
const entry = shortLivedProviderPrepareIssueCache.get(cacheKey);
|
||||
if (!entry) {
|
||||
return {
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {},
|
||||
};
|
||||
}
|
||||
|
||||
const modelIssueReasonByValue: Record<string, string> = {};
|
||||
const modelUnavailableReasonByValue: Record<string, string> = {};
|
||||
for (const [modelId, result] of Object.entries(entry.modelResultsById)) {
|
||||
const reason = getIssueReason(result);
|
||||
if (!reason) {
|
||||
continue;
|
||||
}
|
||||
if (result.status === 'failed') {
|
||||
modelUnavailableReasonByValue[modelId] = reason;
|
||||
} else if (result.status === 'notes') {
|
||||
modelIssueReasonByValue[modelId] = reason;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
modelIssueReasonByValue,
|
||||
modelUnavailableReasonByValue,
|
||||
};
|
||||
}
|
||||
|
||||
export function storeShortLivedProviderPrepareModelResults({
|
||||
providerId,
|
||||
cacheKey,
|
||||
|
|
@ -52,15 +109,18 @@ export function storeShortLivedProviderPrepareModelResults({
|
|||
return;
|
||||
}
|
||||
|
||||
const issueResultsById = Object.fromEntries(
|
||||
Object.entries(modelResultsById).filter(([, result]) => result.status !== 'ready')
|
||||
);
|
||||
const readyResultsById = Object.fromEntries(
|
||||
Object.entries(modelResultsById).filter(([, result]) => result.status === 'ready')
|
||||
);
|
||||
if (Object.keys(readyResultsById).length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
pruneExpiredEntries(now);
|
||||
pruneExpiredEntries(shortLivedProviderPrepareCache, now);
|
||||
pruneExpiredEntries(shortLivedProviderPrepareIssueCache, now);
|
||||
|
||||
if (Object.keys(readyResultsById).length > 0) {
|
||||
const existingEntry = shortLivedProviderPrepareCache.get(cacheKey);
|
||||
shortLivedProviderPrepareCache.set(cacheKey, {
|
||||
expiresAt: now + OPENCODE_DEEP_VERIFY_SUCCESS_CACHE_TTL_MS,
|
||||
|
|
@ -71,6 +131,40 @@ export function storeShortLivedProviderPrepareModelResults({
|
|||
});
|
||||
}
|
||||
|
||||
if (Object.keys(issueResultsById).length > 0) {
|
||||
const existingIssueEntry = shortLivedProviderPrepareIssueCache.get(cacheKey);
|
||||
const nextIssueResultsById = {
|
||||
...(existingIssueEntry?.modelResultsById ?? {}),
|
||||
...issueResultsById,
|
||||
};
|
||||
for (const modelId of Object.keys(readyResultsById)) {
|
||||
delete nextIssueResultsById[modelId];
|
||||
}
|
||||
shortLivedProviderPrepareIssueCache.set(cacheKey, {
|
||||
expiresAt: now + OPENCODE_MODEL_ISSUE_CACHE_TTL_MS,
|
||||
modelResultsById: nextIssueResultsById,
|
||||
});
|
||||
} else if (Object.keys(readyResultsById).length > 0) {
|
||||
const existingIssueEntry = shortLivedProviderPrepareIssueCache.get(cacheKey);
|
||||
if (!existingIssueEntry) {
|
||||
return;
|
||||
}
|
||||
const nextIssueResultsById = { ...existingIssueEntry.modelResultsById };
|
||||
for (const modelId of Object.keys(readyResultsById)) {
|
||||
delete nextIssueResultsById[modelId];
|
||||
}
|
||||
if (Object.keys(nextIssueResultsById).length > 0) {
|
||||
shortLivedProviderPrepareIssueCache.set(cacheKey, {
|
||||
expiresAt: existingIssueEntry.expiresAt,
|
||||
modelResultsById: nextIssueResultsById,
|
||||
});
|
||||
} else {
|
||||
shortLivedProviderPrepareIssueCache.delete(cacheKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function __resetShortLivedProviderPrepareCacheForTests(): void {
|
||||
shortLivedProviderPrepareCache.clear();
|
||||
shortLivedProviderPrepareIssueCache.clear();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@ interface LeadModelRowProps {
|
|||
warningText?: string | null;
|
||||
disableGeminiOption?: boolean;
|
||||
modelIssueText?: string | null;
|
||||
modelIssueReasonByValue?: Partial<Record<string, string | null | undefined>>;
|
||||
modelUnavailableReasonByValue?: Partial<Record<string, string | null | undefined>>;
|
||||
showAnthropicContextLimit?: boolean;
|
||||
disableAnthropicContextLimit?: boolean;
|
||||
}
|
||||
|
|
@ -64,6 +66,8 @@ export const LeadModelRow = ({
|
|||
warningText,
|
||||
disableGeminiOption = false,
|
||||
modelIssueText,
|
||||
modelIssueReasonByValue,
|
||||
modelUnavailableReasonByValue,
|
||||
showAnthropicContextLimit = providerId === 'anthropic',
|
||||
disableAnthropicContextLimit,
|
||||
}: LeadModelRowProps): React.JSX.Element => {
|
||||
|
|
@ -74,7 +78,17 @@ export const LeadModelRow = ({
|
|||
? getProviderScopedTeamModelLabel(providerId, model.trim())
|
||||
: 'Default';
|
||||
const modelButtonAriaLabel = `${getTeamProviderLabel(providerId)} provider, ${modelButtonLabel}`;
|
||||
const hasModelIssue = Boolean(modelIssueText);
|
||||
const selectedModelIssueText =
|
||||
model.trim() && modelIssueReasonByValue?.[model.trim()]
|
||||
? modelIssueReasonByValue[model.trim()]
|
||||
: null;
|
||||
const selectedModelUnavailableText =
|
||||
model.trim() && modelUnavailableReasonByValue?.[model.trim()]
|
||||
? modelUnavailableReasonByValue[model.trim()]
|
||||
: null;
|
||||
const currentModelIssueText =
|
||||
modelIssueText ?? selectedModelUnavailableText ?? selectedModelIssueText ?? null;
|
||||
const hasModelIssue = Boolean(currentModelIssueText);
|
||||
const showSonnetExtraUsageWarning =
|
||||
providerId === 'anthropic' &&
|
||||
!limitContext &&
|
||||
|
|
@ -179,7 +193,11 @@ export const LeadModelRow = ({
|
|||
onValueChange={onModelChange}
|
||||
id="lead-model"
|
||||
disableGeminiOption={disableGeminiOption}
|
||||
modelIssueReasonByValue={model.trim() ? { [model.trim()]: modelIssueText } : undefined}
|
||||
modelIssueReasonByValue={{
|
||||
...(modelIssueReasonByValue ?? {}),
|
||||
...(model.trim() && modelIssueText ? { [model.trim()]: modelIssueText } : {}),
|
||||
}}
|
||||
modelUnavailableReasonByValue={modelUnavailableReasonByValue}
|
||||
/>
|
||||
<EffortLevelSelector
|
||||
value={effort ?? ''}
|
||||
|
|
|
|||
|
|
@ -78,6 +78,12 @@ interface MemberDraftRowProps {
|
|||
infoText?: string | null;
|
||||
disableGeminiOption?: boolean;
|
||||
modelIssueText?: string | null;
|
||||
modelIssueReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
modelUnavailableReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
showWorktreeIsolationControls?: boolean;
|
||||
worktreeIsolationDisabledReason?: string | null;
|
||||
onWorktreeIsolationChange?: (id: string, enabled: boolean) => void;
|
||||
|
|
@ -128,6 +134,8 @@ export const MemberDraftRow = ({
|
|||
infoText,
|
||||
disableGeminiOption = false,
|
||||
modelIssueText,
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
showWorktreeIsolationControls = false,
|
||||
worktreeIsolationDisabledReason,
|
||||
onWorktreeIsolationChange,
|
||||
|
|
@ -226,7 +234,19 @@ export const MemberDraftRow = ({
|
|||
: undefined;
|
||||
const worktreeIsolationDisabled =
|
||||
isRemoved || Boolean(worktreeIsolationDisabledReason && member.isolation !== 'worktree');
|
||||
const hasModelIssue = Boolean(modelIssueText);
|
||||
const effectiveModelKey = effectiveModel?.trim() ?? '';
|
||||
const selectedModelIssueText =
|
||||
effectiveModelKey && modelIssueReasonByProvider?.[effectiveProviderId]?.[effectiveModelKey]
|
||||
? modelIssueReasonByProvider[effectiveProviderId]?.[effectiveModelKey]
|
||||
: null;
|
||||
const selectedModelUnavailableText =
|
||||
effectiveModelKey &&
|
||||
modelUnavailableReasonByProvider?.[effectiveProviderId]?.[effectiveModelKey]
|
||||
? modelUnavailableReasonByProvider[effectiveProviderId]?.[effectiveModelKey]
|
||||
: null;
|
||||
const currentModelIssueText =
|
||||
modelIssueText ?? selectedModelUnavailableText ?? selectedModelIssueText ?? null;
|
||||
const hasModelIssue = Boolean(currentModelIssueText);
|
||||
const hasCustomProviderOrModel =
|
||||
!forceInheritedModelSettings && Boolean(member.providerId || member.model?.trim());
|
||||
const showSonnetExtraUsageWarning =
|
||||
|
|
@ -353,11 +373,15 @@ export const MemberDraftRow = ({
|
|||
</Button>
|
||||
</span>
|
||||
</TooltipTrigger>
|
||||
{modelTooltipText || modelIssueText ? (
|
||||
{modelTooltipText || currentModelIssueText ? (
|
||||
<TooltipContent side="top" className="max-w-64 text-xs leading-relaxed">
|
||||
{modelIssueText ? <p className="text-red-300">{modelIssueText}</p> : null}
|
||||
{currentModelIssueText ? (
|
||||
<p className="text-red-300">{currentModelIssueText}</p>
|
||||
) : null}
|
||||
{modelTooltipText ? (
|
||||
<p className={modelIssueText ? 'mt-1 border-t border-white/10 pt-1' : ''}>
|
||||
<p
|
||||
className={currentModelIssueText ? 'mt-1 border-t border-white/10 pt-1' : ''}
|
||||
>
|
||||
{modelTooltipText}
|
||||
</p>
|
||||
) : null}
|
||||
|
|
@ -524,8 +548,14 @@ export const MemberDraftRow = ({
|
|||
}}
|
||||
id={`member-${member.id}-model`}
|
||||
disableGeminiOption={disableGeminiOption}
|
||||
modelIssueReasonByValue={
|
||||
effectiveModel?.trim() ? { [effectiveModel.trim()]: modelIssueText } : undefined
|
||||
modelIssueReasonByValue={{
|
||||
...(modelIssueReasonByProvider?.[effectiveProviderId] ?? {}),
|
||||
...(effectiveModelKey && modelIssueText
|
||||
? { [effectiveModelKey]: modelIssueText }
|
||||
: {}),
|
||||
}}
|
||||
modelUnavailableReasonByValue={
|
||||
modelUnavailableReasonByProvider?.[effectiveProviderId]
|
||||
}
|
||||
/>
|
||||
<EffortLevelSelector
|
||||
|
|
|
|||
|
|
@ -114,6 +114,12 @@ export interface MembersEditorSectionProps {
|
|||
memberInfoById?: Record<string, string | null | undefined>;
|
||||
disableGeminiOption?: boolean;
|
||||
memberModelIssueById?: Record<string, string | null | undefined>;
|
||||
modelIssueReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
modelUnavailableReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
disableAddMember?: boolean;
|
||||
addMemberLockReason?: string;
|
||||
showWorktreeIsolationControls?: boolean;
|
||||
|
|
@ -153,6 +159,8 @@ export const MembersEditorSection = ({
|
|||
memberInfoById,
|
||||
disableGeminiOption = false,
|
||||
memberModelIssueById,
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
disableAddMember = false,
|
||||
addMemberLockReason,
|
||||
showWorktreeIsolationControls = false,
|
||||
|
|
@ -428,6 +436,8 @@ export const MembersEditorSection = ({
|
|||
infoText={memberInfoById?.[member.id] ?? null}
|
||||
disableGeminiOption={disableGeminiOption}
|
||||
modelIssueText={memberModelIssueById?.[member.id] ?? null}
|
||||
modelIssueReasonByProvider={modelIssueReasonByProvider}
|
||||
modelUnavailableReasonByProvider={modelUnavailableReasonByProvider}
|
||||
/>
|
||||
))}
|
||||
{softDeleteMembers && removedMembers.length > 0 ? (
|
||||
|
|
|
|||
|
|
@ -49,6 +49,12 @@ interface TeamRosterEditorSectionProps {
|
|||
disableGeminiOption?: boolean;
|
||||
leadModelIssueText?: string | null;
|
||||
memberModelIssueById?: Record<string, string | null | undefined>;
|
||||
modelIssueReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
modelUnavailableReasonByProvider?: Partial<
|
||||
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
|
||||
>;
|
||||
showWorktreeIsolationControls?: boolean;
|
||||
teammateWorktreeDefault?: boolean;
|
||||
worktreeIsolationDisabledReason?: string | null;
|
||||
|
|
@ -95,6 +101,8 @@ export const TeamRosterEditorSection = ({
|
|||
disableGeminiOption = false,
|
||||
leadModelIssueText,
|
||||
memberModelIssueById,
|
||||
modelIssueReasonByProvider,
|
||||
modelUnavailableReasonByProvider,
|
||||
showWorktreeIsolationControls = false,
|
||||
teammateWorktreeDefault = false,
|
||||
worktreeIsolationDisabledReason,
|
||||
|
|
@ -153,6 +161,8 @@ export const TeamRosterEditorSection = ({
|
|||
softDeleteMembers={softDeleteMembers}
|
||||
disableGeminiOption={disableGeminiOption}
|
||||
memberModelIssueById={memberModelIssueById}
|
||||
modelIssueReasonByProvider={modelIssueReasonByProvider}
|
||||
modelUnavailableReasonByProvider={modelUnavailableReasonByProvider}
|
||||
showWorktreeIsolationControls={showWorktreeIsolationControls}
|
||||
teammateWorktreeDefault={teammateWorktreeDefault}
|
||||
worktreeIsolationDisabledReason={worktreeIsolationDisabledReason}
|
||||
|
|
@ -174,6 +184,8 @@ export const TeamRosterEditorSection = ({
|
|||
warningText={leadWarningText}
|
||||
disableGeminiOption={disableGeminiOption}
|
||||
modelIssueText={leadModelIssueText}
|
||||
modelIssueReasonByValue={modelIssueReasonByProvider?.[providerId]}
|
||||
modelUnavailableReasonByValue={modelUnavailableReasonByProvider?.[providerId]}
|
||||
showAnthropicContextLimit={hasAnthropicRuntime}
|
||||
disableAnthropicContextLimit={disableAnthropicContextLimit}
|
||||
/>
|
||||
|
|
|
|||
|
|
@ -26,9 +26,7 @@ const PASSED_GAUNTLET_WITH_LIMITS_REASON =
|
|||
|
||||
const OPENCODE_TEAM_RECOMMENDED_MODELS = new Set<string>(['opencode/big-pickle']);
|
||||
|
||||
const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set<string>([
|
||||
'opencode/minimax-m2.5-free',
|
||||
]);
|
||||
const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set<string>([]);
|
||||
|
||||
const OPENCODE_TEAM_TESTED_MODELS = new Set<string>([
|
||||
'openrouter/anthropic/claude-haiku-4.5',
|
||||
|
|
@ -54,7 +52,14 @@ const OPENCODE_TEAM_TESTED_MODELS = new Set<string>([
|
|||
'openrouter/z-ai/glm-5.1',
|
||||
]);
|
||||
|
||||
const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set<string>([]);
|
||||
const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set<string>(['opencode/minimax-m2.5-free']);
|
||||
|
||||
const OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS = new Map<string, string>([
|
||||
[
|
||||
'opencode/minimax-m2.5-free',
|
||||
'This exact free model route passed simple OpenCode Agent Teams provider stress, but a deeper repeated gauntlet hit duplicate or missing reply tokens. Keep it below Recommended until a clean repeated gauntlet passes.',
|
||||
],
|
||||
]);
|
||||
|
||||
const OPENCODE_TEAM_UNAVAILABLE_MODELS = new Map<string, string>([
|
||||
[
|
||||
|
|
@ -1254,7 +1259,9 @@ export function getOpenCodeTeamModelRecommendation(
|
|||
return {
|
||||
level: 'tested-with-limits',
|
||||
label: 'Tested with limits',
|
||||
reason: PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON,
|
||||
reason:
|
||||
OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS.get(normalizedModelId) ??
|
||||
PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,9 @@ export {
|
|||
|
||||
type SupportedProviderId = CliProviderId | TeamProviderId;
|
||||
|
||||
export const OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON =
|
||||
'OpenCode OpenAI provider authentication failed. Reconnect OpenAI in provider settings, then refresh runtime status.';
|
||||
|
||||
export type TeamModelRuntimeProviderStatus = Pick<
|
||||
CliProviderStatus,
|
||||
| 'providerId'
|
||||
|
|
@ -47,6 +50,9 @@ export type TeamModelRuntimeProviderStatus = Pick<
|
|||
| 'backend'
|
||||
| 'authenticated'
|
||||
| 'supported'
|
||||
| 'detailMessage'
|
||||
| 'availableBackends'
|
||||
| 'externalRuntimeDiagnostics'
|
||||
> &
|
||||
Partial<Pick<CliProviderStatus, 'verificationState' | 'statusMessage'>>;
|
||||
|
||||
|
|
@ -61,6 +67,58 @@ export interface TeamProviderModelVerificationCounts {
|
|||
verifying: boolean;
|
||||
}
|
||||
|
||||
export function getOpenCodeOpenAiRouteAuthUnavailableReason(
|
||||
providerId: SupportedProviderId | undefined,
|
||||
model: string | undefined,
|
||||
providerStatus?: TeamModelRuntimeProviderStatus | null
|
||||
): string | null {
|
||||
if (
|
||||
providerId !== 'opencode' ||
|
||||
!model?.trim().toLowerCase().startsWith('openai/') ||
|
||||
!providerStatus
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const openAiBackends = (providerStatus.availableBackends ?? []).filter((backend) =>
|
||||
[backend.id, backend.label, backend.description].some((value) => /\bopenai\b/i.test(value))
|
||||
);
|
||||
const backendRequiresAuth = openAiBackends.some(
|
||||
(backend) =>
|
||||
backend.state === 'authentication-required' ||
|
||||
(!backend.available &&
|
||||
[backend.statusMessage, backend.detailMessage].some((value) =>
|
||||
/auth|token|api key|401|403/i.test(value ?? '')
|
||||
))
|
||||
);
|
||||
if (backendRequiresAuth) {
|
||||
return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON;
|
||||
}
|
||||
|
||||
const diagnosticText = [
|
||||
providerStatus.statusMessage,
|
||||
providerStatus.detailMessage,
|
||||
...openAiBackends.flatMap((backend) => [backend.statusMessage, backend.detailMessage]),
|
||||
...(providerStatus.externalRuntimeDiagnostics ?? [])
|
||||
.filter((diagnostic) => /\bopenai\b/i.test(diagnostic.label))
|
||||
.flatMap((diagnostic) => [diagnostic.statusMessage, diagnostic.detailMessage]),
|
||||
]
|
||||
.map((value) => value?.trim() ?? '')
|
||||
.filter(Boolean)
|
||||
.join('\n');
|
||||
|
||||
if (
|
||||
/\bopenai\b/i.test(diagnosticText) &&
|
||||
/token refresh failed|token.*invalid|invalid.*token|not[_\s-]?authenticated|not authenticated|unauthorized|forbidden|\b401\b|\b403\b|invalid api key|api key.*invalid|authentication required/i.test(
|
||||
diagnosticText
|
||||
)
|
||||
) {
|
||||
return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function getTeamModelUiDisabledReason(
|
||||
providerId: SupportedProviderId | undefined,
|
||||
model: string | undefined,
|
||||
|
|
@ -277,6 +335,10 @@ function getRuntimeModelAvailability(
|
|||
if (!visibleModels.includes(model)) {
|
||||
return null;
|
||||
}
|
||||
const runtimeAvailability = getModelAvailabilityMap(providerStatus).get(model)?.status ?? null;
|
||||
if (runtimeAvailability === 'unavailable') {
|
||||
return 'unavailable';
|
||||
}
|
||||
return 'available';
|
||||
}
|
||||
|
||||
|
|
@ -360,7 +422,11 @@ export function getAvailableTeamProviderModelOptions(
|
|||
...visibleModels.map((model) => {
|
||||
const catalogOption = getRuntimeCatalogModelOption(providerId, model, providerStatus);
|
||||
if (catalogOption) {
|
||||
return catalogOption;
|
||||
return {
|
||||
...catalogOption,
|
||||
availabilityStatus: getRuntimeModelAvailability(providerId, model, providerStatus),
|
||||
availabilityReason: getRuntimeModelAvailabilityReason(model, providerStatus),
|
||||
};
|
||||
}
|
||||
return {
|
||||
value: model,
|
||||
|
|
@ -464,6 +530,15 @@ export function getTeamModelSelectionError(
|
|||
return `Model "${trimmed}" is disabled. ${disabledReason}`;
|
||||
}
|
||||
|
||||
const dynamicUnavailableReason = getOpenCodeOpenAiRouteAuthUnavailableReason(
|
||||
providerId,
|
||||
trimmed,
|
||||
providerStatus
|
||||
);
|
||||
if (dynamicUnavailableReason) {
|
||||
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. ${dynamicUnavailableReason}`;
|
||||
}
|
||||
|
||||
if (providerId === 'anthropic') {
|
||||
return isTeamModelAvailableForUi(providerId, trimmed, providerStatus)
|
||||
? null
|
||||
|
|
@ -483,5 +558,11 @@ export function getTeamModelSelectionError(
|
|||
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. Pick one of the listed models or use Default.`;
|
||||
}
|
||||
|
||||
const availability = getRuntimeModelAvailability(providerId, trimmed, providerStatus);
|
||||
if (availability !== 'available') {
|
||||
const reason = getRuntimeModelAvailabilityReason(trimmed, providerStatus);
|
||||
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime.${reason ? ` ${reason}` : ''} Pick one of the listed models or use Default.`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
96
src/renderer/utils/teamModelRecommendations.ts
Normal file
96
src/renderer/utils/teamModelRecommendations.ts
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
import {
|
||||
getOpenCodeTeamModelRecommendation,
|
||||
getOpenCodeTeamModelRecommendationSortRank,
|
||||
} from '@renderer/utils/openCodeModelRecommendations';
|
||||
import { isSupportedAnthropicTeamModel } from '@renderer/utils/teamModelCatalog';
|
||||
|
||||
import type {
|
||||
OpenCodeTeamModelRecommendation,
|
||||
OpenCodeTeamModelRecommendationLevel,
|
||||
} from '@renderer/utils/openCodeModelRecommendations';
|
||||
import type { TeamProviderId } from '@shared/types';
|
||||
|
||||
export type TeamModelRecommendationLevel = OpenCodeTeamModelRecommendationLevel;
|
||||
export type TeamModelRecommendation = OpenCodeTeamModelRecommendation;
|
||||
|
||||
const CODEX_TEAM_RECOMMENDED_MODELS = new Set<string>(['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']);
|
||||
|
||||
const CODEX_RECOMMENDED_REASON =
|
||||
'This Codex model passed real Agent Teams launch and task-flow stress testing and is selected for stable team-agent behavior.';
|
||||
|
||||
const ANTHROPIC_RECOMMENDED_REASON =
|
||||
'This Claude model passed real Agent Teams launch, restart, and teammate-workflow stress testing.';
|
||||
|
||||
function normalizeTeamModelId(modelId: string | null | undefined): string {
|
||||
return modelId?.trim().toLowerCase() ?? '';
|
||||
}
|
||||
|
||||
function getRecommendedRecommendation(reason: string): TeamModelRecommendation {
|
||||
return {
|
||||
level: 'recommended',
|
||||
label: 'Recommended',
|
||||
reason,
|
||||
};
|
||||
}
|
||||
|
||||
export function getTeamModelRecommendation(
|
||||
providerId: TeamProviderId,
|
||||
modelId: string | null | undefined
|
||||
): TeamModelRecommendation | null {
|
||||
const normalizedModelId = normalizeTeamModelId(modelId);
|
||||
if (!normalizedModelId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (providerId === 'opencode') {
|
||||
return getOpenCodeTeamModelRecommendation(normalizedModelId);
|
||||
}
|
||||
|
||||
if (providerId === 'codex' && CODEX_TEAM_RECOMMENDED_MODELS.has(normalizedModelId)) {
|
||||
return getRecommendedRecommendation(CODEX_RECOMMENDED_REASON);
|
||||
}
|
||||
|
||||
if (providerId === 'anthropic' && isSupportedAnthropicTeamModel(normalizedModelId)) {
|
||||
return getRecommendedRecommendation(ANTHROPIC_RECOMMENDED_REASON);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function isTeamModelRecommended(
|
||||
providerId: TeamProviderId,
|
||||
modelId: string | null | undefined
|
||||
): boolean {
|
||||
const recommendation = getTeamModelRecommendation(providerId, modelId);
|
||||
return (
|
||||
recommendation?.level === 'recommended' || recommendation?.level === 'recommended-with-limits'
|
||||
);
|
||||
}
|
||||
|
||||
function getTeamModelRecommendationSortRank(
|
||||
providerId: TeamProviderId,
|
||||
modelId: string | null | undefined
|
||||
): number {
|
||||
if (providerId === 'opencode') {
|
||||
return getOpenCodeTeamModelRecommendationSortRank(modelId);
|
||||
}
|
||||
|
||||
const recommendation = getTeamModelRecommendation(providerId, modelId);
|
||||
if (recommendation?.level === 'recommended') {
|
||||
return 0;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
export function compareTeamModelRecommendations(
|
||||
providerId: TeamProviderId,
|
||||
leftModelId: string | null | undefined,
|
||||
rightModelId: string | null | undefined
|
||||
): number {
|
||||
const leftRank = getTeamModelRecommendationSortRank(providerId, leftModelId);
|
||||
const rightRank = getTeamModelRecommendationSortRank(providerId, rightModelId);
|
||||
if (leftRank !== rightRank) {
|
||||
return leftRank - rightRank;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
{
|
||||
"generatedAt": "2026-05-08T22:48:31.416Z",
|
||||
"runsPerModel": 1,
|
||||
"generatedAt": "2026-05-09T23:16:07.760Z",
|
||||
"runsPerModel": 3,
|
||||
"qualification": {
|
||||
"minimumAverageScore": 80,
|
||||
"minimumSuccessfulRuns": 1,
|
||||
"minimumAverageScore": 90,
|
||||
"minimumSuccessfulRuns": 3,
|
||||
"minimumConsistencyScore": 85,
|
||||
"requireNoHardFailures": true
|
||||
},
|
||||
|
|
@ -11,93 +11,93 @@
|
|||
{
|
||||
"model": "opencode/big-pickle",
|
||||
"verdict": "recommended",
|
||||
"confidence": "low",
|
||||
"confidence": "high",
|
||||
"qualified": true,
|
||||
"readinessScore": 100,
|
||||
"averageScore": 100,
|
||||
"consistencyScore": 100,
|
||||
"behavioralAverageScore": 100,
|
||||
"minScore": 100,
|
||||
"successfulRuns": 1,
|
||||
"countedRuns": 1,
|
||||
"successfulRuns": 3,
|
||||
"countedRuns": 3,
|
||||
"hardFailures": 0,
|
||||
"providerInfraFailures": 0,
|
||||
"runtimeTransportFailures": 0,
|
||||
"modelBehaviorFailures": 0,
|
||||
"harnessFailures": 0,
|
||||
"p50DurationMs": 118757,
|
||||
"p95DurationMs": 118757,
|
||||
"p50DurationMs": 112355,
|
||||
"p95DurationMs": 116891,
|
||||
"stagePassRates": {
|
||||
"launchBootstrap": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"directReply": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayAB": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayBC": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentReplies": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"taskRefs": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"cleanTranscript": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"noDuplicateTokens": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"latencyStable": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
"taskRefPassRates": {
|
||||
"directReply": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayAB": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayBC": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentBob": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentTom": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -112,8 +112,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -122,8 +122,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -132,8 +132,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -142,8 +142,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -152,8 +152,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -162,8 +162,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -172,8 +172,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -182,8 +182,8 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
|
|
@ -192,14 +192,14 @@
|
|||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 1,
|
||||
"total": 1,
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
}
|
||||
],
|
||||
"scoreStability": {
|
||||
"sampleSize": 1,
|
||||
"sampleSize": 3,
|
||||
"minScore": 100,
|
||||
"maxScore": 100,
|
||||
"spread": 0,
|
||||
|
|
@ -217,16 +217,16 @@
|
|||
"outcome": "passed",
|
||||
"failureCategory": "none",
|
||||
"primaryFailure": null,
|
||||
"durationMs": 118757,
|
||||
"durationMs": 112344,
|
||||
"hardFailure": false,
|
||||
"stageDurationsMs": {
|
||||
"setup": 225,
|
||||
"launchBootstrap": 20591,
|
||||
"materializeTasks": 36,
|
||||
"directReply": 14820,
|
||||
"peerRelayAB": 32039,
|
||||
"peerRelayBC": 27306,
|
||||
"concurrentReplies": 15426,
|
||||
"setup": 183,
|
||||
"launchBootstrap": 19933,
|
||||
"materializeTasks": 35,
|
||||
"directReply": 15430,
|
||||
"peerRelayAB": 25001,
|
||||
"peerRelayBC": 28154,
|
||||
"concurrentReplies": 15551,
|
||||
"hygiene": 1
|
||||
},
|
||||
"stageFailures": {},
|
||||
|
|
@ -253,7 +253,455 @@
|
|||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1"
|
||||
"runId=d9d27eb0-2798-4980-a0fa-f082a6edd705"
|
||||
]
|
||||
},
|
||||
{
|
||||
"runIndex": 2,
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"countedForRecommendation": true,
|
||||
"outcome": "passed",
|
||||
"failureCategory": "none",
|
||||
"primaryFailure": null,
|
||||
"durationMs": 112355,
|
||||
"hardFailure": false,
|
||||
"stageDurationsMs": {
|
||||
"setup": 11,
|
||||
"launchBootstrap": 18682,
|
||||
"materializeTasks": 36,
|
||||
"directReply": 15126,
|
||||
"peerRelayAB": 24835,
|
||||
"peerRelayBC": 28580,
|
||||
"concurrentReplies": 17164,
|
||||
"hygiene": 1
|
||||
},
|
||||
"stageFailures": {},
|
||||
"taskRefChecks": {
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentBob": true,
|
||||
"concurrentTom": true
|
||||
},
|
||||
"protocolViolations": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": []
|
||||
},
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentReplies": true,
|
||||
"taskRefs": true,
|
||||
"cleanTranscript": true,
|
||||
"noDuplicateTokens": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=97364154-e06d-460c-94ae-65b73cb1b6f9"
|
||||
]
|
||||
},
|
||||
{
|
||||
"runIndex": 3,
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"countedForRecommendation": true,
|
||||
"outcome": "passed",
|
||||
"failureCategory": "none",
|
||||
"primaryFailure": null,
|
||||
"durationMs": 116891,
|
||||
"hardFailure": false,
|
||||
"stageDurationsMs": {
|
||||
"setup": 8,
|
||||
"launchBootstrap": 18926,
|
||||
"materializeTasks": 31,
|
||||
"directReply": 17061,
|
||||
"peerRelayAB": 27842,
|
||||
"peerRelayBC": 27262,
|
||||
"concurrentReplies": 15437,
|
||||
"hygiene": 1
|
||||
},
|
||||
"stageFailures": {},
|
||||
"taskRefChecks": {
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentBob": true,
|
||||
"concurrentTom": true
|
||||
},
|
||||
"protocolViolations": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": []
|
||||
},
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentReplies": true,
|
||||
"taskRefs": true,
|
||||
"cleanTranscript": true,
|
||||
"noDuplicateTokens": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"model": "opencode/minimax-m2.5-free",
|
||||
"verdict": "strong-candidate",
|
||||
"confidence": "high",
|
||||
"qualified": false,
|
||||
"readinessScore": 88.6,
|
||||
"averageScore": 98.3,
|
||||
"consistencyScore": 93.1,
|
||||
"behavioralAverageScore": 98.3,
|
||||
"minScore": 95,
|
||||
"successfulRuns": 2,
|
||||
"countedRuns": 3,
|
||||
"hardFailures": 1,
|
||||
"providerInfraFailures": 0,
|
||||
"runtimeTransportFailures": 0,
|
||||
"modelBehaviorFailures": 1,
|
||||
"harnessFailures": 0,
|
||||
"p50DurationMs": 108862,
|
||||
"p95DurationMs": 118757,
|
||||
"stagePassRates": {
|
||||
"launchBootstrap": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"directReply": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayAB": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayBC": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentReplies": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"taskRefs": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"cleanTranscript": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"noDuplicateTokens": {
|
||||
"passed": 2,
|
||||
"total": 3,
|
||||
"rate": 66.7
|
||||
},
|
||||
"latencyStable": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
"taskRefPassRates": {
|
||||
"directReply": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayAB": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"peerRelayBC": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentBob": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
},
|
||||
"concurrentTom": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
"protocolViolationTotals": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": 2,
|
||||
"affectedRuns": 1
|
||||
},
|
||||
"stageFailureImpact": [
|
||||
{
|
||||
"stage": "noDuplicateTokens",
|
||||
"failedRuns": 1,
|
||||
"weightedLoss": 5,
|
||||
"passRate": {
|
||||
"passed": 2,
|
||||
"total": 3,
|
||||
"rate": 66.7
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "cleanTranscript",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "concurrentReplies",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "directReply",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "latencyStable",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "launchBootstrap",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "peerRelayAB",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "peerRelayBC",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
},
|
||||
{
|
||||
"stage": "taskRefs",
|
||||
"failedRuns": 0,
|
||||
"weightedLoss": 0,
|
||||
"passRate": {
|
||||
"passed": 3,
|
||||
"total": 3,
|
||||
"rate": 100
|
||||
}
|
||||
}
|
||||
],
|
||||
"scoreStability": {
|
||||
"sampleSize": 3,
|
||||
"minScore": 95,
|
||||
"maxScore": 100,
|
||||
"spread": 5,
|
||||
"standardDeviation": 2.4,
|
||||
"consistencyScore": 93.1
|
||||
},
|
||||
"dominantFailureCategory": "model-behavior",
|
||||
"recommendationBlockers": [
|
||||
"successful runs 2 < 3",
|
||||
"hard failures 1",
|
||||
"model-behavior failures 1",
|
||||
"highest weighted stage loss noDuplicateTokens=5",
|
||||
"protocol violations in 1 runs"
|
||||
],
|
||||
"runs": [
|
||||
{
|
||||
"runIndex": 1,
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"countedForRecommendation": true,
|
||||
"outcome": "passed",
|
||||
"failureCategory": "none",
|
||||
"primaryFailure": null,
|
||||
"durationMs": 91530,
|
||||
"hardFailure": false,
|
||||
"stageDurationsMs": {
|
||||
"setup": 10,
|
||||
"launchBootstrap": 18716,
|
||||
"materializeTasks": 31,
|
||||
"directReply": 11557,
|
||||
"peerRelayAB": 16323,
|
||||
"peerRelayBC": 27370,
|
||||
"concurrentReplies": 9606,
|
||||
"hygiene": 1
|
||||
},
|
||||
"stageFailures": {},
|
||||
"taskRefChecks": {
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentBob": true,
|
||||
"concurrentTom": true
|
||||
},
|
||||
"protocolViolations": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": []
|
||||
},
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentReplies": true,
|
||||
"taskRefs": true,
|
||||
"cleanTranscript": true,
|
||||
"noDuplicateTokens": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e"
|
||||
]
|
||||
},
|
||||
{
|
||||
"runIndex": 2,
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"countedForRecommendation": true,
|
||||
"outcome": "passed",
|
||||
"failureCategory": "none",
|
||||
"primaryFailure": null,
|
||||
"durationMs": 108862,
|
||||
"hardFailure": false,
|
||||
"stageDurationsMs": {
|
||||
"setup": 10,
|
||||
"launchBootstrap": 18359,
|
||||
"materializeTasks": 35,
|
||||
"directReply": 7236,
|
||||
"peerRelayAB": 30664,
|
||||
"peerRelayBC": 26124,
|
||||
"concurrentReplies": 18477,
|
||||
"hygiene": 0
|
||||
},
|
||||
"stageFailures": {},
|
||||
"taskRefChecks": {
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentBob": true,
|
||||
"concurrentTom": true
|
||||
},
|
||||
"protocolViolations": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": []
|
||||
},
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentReplies": true,
|
||||
"taskRefs": true,
|
||||
"cleanTranscript": true,
|
||||
"noDuplicateTokens": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"runIndex": 3,
|
||||
"passed": false,
|
||||
"score": 95,
|
||||
"countedForRecommendation": true,
|
||||
"outcome": "behavioral-fail",
|
||||
"failureCategory": "model-behavior",
|
||||
"primaryFailure": "duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3",
|
||||
"durationMs": 118757,
|
||||
"hardFailure": true,
|
||||
"stageDurationsMs": {
|
||||
"setup": 9,
|
||||
"launchBootstrap": 19986,
|
||||
"materializeTasks": 37,
|
||||
"directReply": 8036,
|
||||
"peerRelayAB": 37430,
|
||||
"peerRelayBC": 36219,
|
||||
"concurrentReplies": 8551,
|
||||
"hygiene": 0
|
||||
},
|
||||
"stageFailures": {},
|
||||
"taskRefChecks": {
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentBob": true,
|
||||
"concurrentTom": true
|
||||
},
|
||||
"protocolViolations": {
|
||||
"badMessages": 0,
|
||||
"duplicateOrMissingTokens": [
|
||||
"GAUNTLET_JACK_USER_OK_3",
|
||||
"GAUNTLET_TOM_USER_OK_3"
|
||||
]
|
||||
},
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelayAB": true,
|
||||
"peerRelayBC": true,
|
||||
"concurrentReplies": true,
|
||||
"taskRefs": true,
|
||||
"cleanTranscript": true,
|
||||
"noDuplicateTokens": false,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=2b0610e0-7b10-49fc-88dd-ab30b37abce9",
|
||||
"duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
# OpenCode Model Gauntlet Results
|
||||
|
||||
Generated: 2026-05-08T22:48:31.416Z
|
||||
Generated: 2026-05-09T23:16:07.760Z
|
||||
|
||||
Runs per model: 1
|
||||
Recommended threshold: average >= 80, successful runs >= 1, consistency >= 85, hard failures = 0
|
||||
Runs per model: 3
|
||||
Recommended threshold: average >= 90, successful runs >= 3, consistency >= 85, hard failures = 0
|
||||
|
||||
Provider-infra runs are reported separately and are not counted as model behavior. They still block a Recommended verdict until rerun succeeds.
|
||||
|
||||
|
|
@ -13,25 +13,50 @@ Scoring weights: launchBootstrap=15, directReply=10, peerRelayAB=15, peerRelayBC
|
|||
|
||||
| Model | Verdict | Confidence | Readiness | Consistency | Score Spread | Behavior Avg | Overall Avg | Counted | Pass Runs | Weakest Stage | Weakest TaskRef | Dominant Failure | Blockers | Provider Infra | Runtime Transport | Model Fails | Protocol Runs | p50 | p95 |
|
||||
| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |
|
||||
| `opencode/big-pickle` | Recommended | low | 100 | 100 | 0 | 100 | 100 | 1/1 | 1/1 | cleanTranscript 1/1 (100%) | concurrentBob 1/1 (100%) | none | - | 0 | 0 | 0 | 0 | 118757ms | 118757ms |
|
||||
| `opencode/big-pickle` | Recommended | high | 100 | 100 | 0 | 100 | 100 | 3/3 | 3/3 | cleanTranscript 3/3 (100%) | concurrentBob 3/3 (100%) | none | - | 0 | 0 | 0 | 0 | 112355ms | 116891ms |
|
||||
| `opencode/minimax-m2.5-free` | Strong candidate | high | 88.6 | 93.1 | 5 | 98.3 | 98.3 | 3/3 | 2/3 | noDuplicateTokens 2/3 (66.7%) | concurrentBob 3/3 (100%) | model-behavior | successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs | 0 | 0 | 1 | 1 | 108862ms | 118757ms |
|
||||
|
||||
## opencode/big-pickle
|
||||
|
||||
Readiness score: 100.
|
||||
|
||||
Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=1.
|
||||
Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=3.
|
||||
|
||||
Recommendation blockers: -.
|
||||
|
||||
Weighted stage impact: -.
|
||||
|
||||
Stage pass rates: launchBootstrap:1/1 (100%), directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentReplies:1/1 (100%), taskRefs:1/1 (100%), cleanTranscript:1/1 (100%), noDuplicateTokens:1/1 (100%), latencyStable:1/1 (100%).
|
||||
Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:3/3 (100%), latencyStable:3/3 (100%).
|
||||
|
||||
TaskRef pass rates: directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentBob:1/1 (100%), concurrentTom:1/1 (100%).
|
||||
TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%).
|
||||
|
||||
Protocol totals: badMessages=0, duplicateOrMissingTokens=0, affectedRuns=0.
|
||||
|
||||
| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics |
|
||||
| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- |
|
||||
| 1 | passed | none | 100 | yes | 118757ms | - | peerRelayAB:32039ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1 |
|
||||
| 1 | passed | none | 100 | yes | 112344ms | - | peerRelayBC:28154ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=d9d27eb0-2798-4980-a0fa-f082a6edd705 |
|
||||
| 2 | passed | none | 100 | yes | 112355ms | - | peerRelayBC:28580ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=97364154-e06d-460c-94ae-65b73cb1b6f9 |
|
||||
| 3 | passed | none | 100 | yes | 116891ms | - | peerRelayAB:27842ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671 |
|
||||
|
||||
## opencode/minimax-m2.5-free
|
||||
|
||||
Readiness score: 88.6.
|
||||
|
||||
Score stability: consistency=93.1, min=95, max=100, spread=5, stdDev=2.4, samples=3.
|
||||
|
||||
Recommendation blockers: successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs.
|
||||
|
||||
Weighted stage impact: noDuplicateTokens:loss=5, failed=1, pass=2/3 (66.7%).
|
||||
|
||||
Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:2/3 (66.7%), latencyStable:3/3 (100%).
|
||||
|
||||
TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%).
|
||||
|
||||
Protocol totals: badMessages=0, duplicateOrMissingTokens=2, affectedRuns=1.
|
||||
|
||||
| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics |
|
||||
| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- |
|
||||
| 1 | passed | none | 100 | yes | 91530ms | - | peerRelayBC:27370ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e |
|
||||
| 2 | passed | none | 100 | yes | 108862ms | - | peerRelayAB:30664ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5 |
|
||||
| 3 | behavioral-fail | model-behavior | 95 | yes | 118757ms | noDuplicateTokens | peerRelayAB:37430ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | token=GAUNTLET_JACK_USER_OK_3+GAUNTLET_TOM_USER_OK_3 | duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3 |
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
{
|
||||
"generatedAt": "2026-05-09T22:48:19.222Z",
|
||||
"models": [
|
||||
{
|
||||
"model": "opencode/big-pickle",
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"durationMs": 67267,
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelay": true,
|
||||
"taskRefs": true,
|
||||
"longPrompt": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=f44e2547-3689-41a1-9a0a-75d38bdb1097",
|
||||
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"ad750c26-d9bd-4028-9936-754cbf7aef7b\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\",\"opencode_message_send_tool_error_inline_observe\"]}",
|
||||
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"visible_reply_destination_not_found_yet\",\"visibleReplyCorrelation\":\"direct_child_message_send\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"model": "opencode/minimax-m2.5-free",
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"durationMs": 66426,
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelay": true,
|
||||
"taskRefs": true,
|
||||
"longPrompt": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=1659a3ab-ba64-432b-95ec-ab1d88371ebf",
|
||||
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"f03532ac-cccf-450d-a951-63a98d02125d\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}",
|
||||
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"model": "opencode/nemotron-3-super-free",
|
||||
"passed": true,
|
||||
"score": 100,
|
||||
"durationMs": 77760,
|
||||
"stages": {
|
||||
"launchBootstrap": true,
|
||||
"directReply": true,
|
||||
"peerRelay": true,
|
||||
"taskRefs": true,
|
||||
"longPrompt": true,
|
||||
"latencyStable": true
|
||||
},
|
||||
"diagnostics": [
|
||||
"runId=0bd03f13-bac1-4220-96cc-7753944e4ff0",
|
||||
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"247aa6f8-3c53-4353-b9b2-0e3cdc7d5b34\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\"]}",
|
||||
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -2,7 +2,7 @@ import { createHash } from 'crypto';
|
|||
import { mkdtemp, mkdir, rm, stat, writeFile } from 'fs/promises';
|
||||
import { tmpdir } from 'os';
|
||||
import * as path from 'path';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import {
|
||||
shouldIgnoreLogSourceWatcherPath,
|
||||
|
|
@ -12,6 +12,9 @@ import {
|
|||
import type { TeamMemberLogsFinder } from '../../../../src/main/services/team/TeamMemberLogsFinder';
|
||||
import type { TeamChangeEvent } from '../../../../src/shared/types';
|
||||
|
||||
const originalChokidarUsePolling = process.env.CHOKIDAR_USEPOLLING;
|
||||
const originalChokidarInterval = process.env.CHOKIDAR_INTERVAL;
|
||||
|
||||
function safeTaskIdSegment(taskId: string): string {
|
||||
return `task-id-${createHash('sha256').update(taskId).digest('hex').slice(0, 32)}`;
|
||||
}
|
||||
|
|
@ -19,6 +22,11 @@ function safeTaskIdSegment(taskId: string): string {
|
|||
describe('TeamLogSourceTracker', () => {
|
||||
let tempDir: string | null = null;
|
||||
|
||||
beforeAll(() => {
|
||||
process.env.CHOKIDAR_USEPOLLING = '1';
|
||||
process.env.CHOKIDAR_INTERVAL = '25';
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (tempDir) {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
|
|
@ -26,6 +34,19 @@ describe('TeamLogSourceTracker', () => {
|
|||
}
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (originalChokidarUsePolling === undefined) {
|
||||
delete process.env.CHOKIDAR_USEPOLLING;
|
||||
} else {
|
||||
process.env.CHOKIDAR_USEPOLLING = originalChokidarUsePolling;
|
||||
}
|
||||
if (originalChokidarInterval === undefined) {
|
||||
delete process.env.CHOKIDAR_INTERVAL;
|
||||
} else {
|
||||
process.env.CHOKIDAR_INTERVAL = originalChokidarInterval;
|
||||
}
|
||||
});
|
||||
|
||||
it('emits task-log-change for matching runtime freshness signals without broad log-source-change', async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'team-log-source-tracker-'));
|
||||
|
||||
|
|
|
|||
|
|
@ -285,7 +285,7 @@ describe('TeamModelSelector disabled Codex models', () => {
|
|||
expect(host.textContent).toContain('mistralai/codestral-2508');
|
||||
expect(host.textContent).toContain('Tested');
|
||||
expect(host.textContent).toContain('minimax-m2.5-free');
|
||||
expect(host.textContent).toContain('Recommended with limits');
|
||||
expect(host.textContent).toContain('Tested with limits');
|
||||
expect(host.textContent).toContain('openai/gpt-oss-120b:free');
|
||||
expect(host.textContent).toContain('big-pickle');
|
||||
expect(host.textContent).toContain('qwen/qwen3-coder-plus');
|
||||
|
|
@ -313,8 +313,8 @@ describe('TeamModelSelector disabled Codex models', () => {
|
|||
expect(limitedIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(testedIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(limitedIndex).toBeGreaterThan(recommendedIndex);
|
||||
expect(testedIndex).toBeGreaterThan(limitedIndex);
|
||||
expect(unavailableIndex).toBeGreaterThan(testedIndex);
|
||||
expect(testedIndex).toBeGreaterThan(recommendedIndex);
|
||||
expect(unavailableIndex).toBeGreaterThan(limitedIndex);
|
||||
expect(notRecommendedIndex).toBeGreaterThan(unavailableIndex);
|
||||
|
||||
expect(host.textContent).toContain('Recommended only');
|
||||
|
|
@ -325,6 +325,223 @@ describe('TeamModelSelector disabled Codex models', () => {
|
|||
});
|
||||
});
|
||||
|
||||
it('shows short-lived OpenCode preflight failures as unavailable model tiles', async () => {
|
||||
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
|
||||
storeState.cliStatus = {
|
||||
flavor: 'agent_teams_orchestrator',
|
||||
providers: [
|
||||
{
|
||||
providerId: 'opencode',
|
||||
authMethod: 'opencode_managed',
|
||||
backend: {
|
||||
kind: 'opencode-cli',
|
||||
label: 'OpenCode CLI',
|
||||
endpointLabel: 'opencode',
|
||||
},
|
||||
authenticated: true,
|
||||
supported: true,
|
||||
capabilities: {
|
||||
teamLaunch: true,
|
||||
},
|
||||
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
|
||||
modelVerificationState: 'idle',
|
||||
modelAvailability: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const host = document.createElement('div');
|
||||
document.body.appendChild(host);
|
||||
const root = createRoot(host);
|
||||
const onValueChange = vi.fn();
|
||||
|
||||
await act(async () => {
|
||||
root.render(
|
||||
React.createElement(TeamModelSelector, {
|
||||
providerId: 'opencode',
|
||||
onProviderChange: () => undefined,
|
||||
value: '',
|
||||
onValueChange,
|
||||
modelUnavailableReasonByValue: {
|
||||
'openai/gpt-5.4': 'OpenCode provider authentication failed',
|
||||
},
|
||||
})
|
||||
);
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
const unavailableButton = Array.from(host.querySelectorAll('button')).find((button) =>
|
||||
button.textContent?.includes('GPT-5.4')
|
||||
);
|
||||
expect(unavailableButton).not.toBeNull();
|
||||
expect(unavailableButton?.getAttribute('aria-disabled')).toBe('true');
|
||||
expect(unavailableButton?.textContent).toContain('Unavailable');
|
||||
expect(unavailableButton?.getAttribute('title')).toContain(
|
||||
'OpenCode provider authentication failed'
|
||||
);
|
||||
|
||||
await act(async () => {
|
||||
unavailableButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
expect(onValueChange).not.toHaveBeenCalled();
|
||||
|
||||
await act(async () => {
|
||||
root.unmount();
|
||||
await Promise.resolve();
|
||||
});
|
||||
});
|
||||
|
||||
it('shows short-lived OpenCode preflight notes as selectable issue tiles', async () => {
|
||||
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
|
||||
storeState.cliStatus = {
|
||||
flavor: 'agent_teams_orchestrator',
|
||||
providers: [
|
||||
{
|
||||
providerId: 'opencode',
|
||||
authMethod: 'opencode_managed',
|
||||
backend: {
|
||||
kind: 'opencode-cli',
|
||||
label: 'OpenCode CLI',
|
||||
endpointLabel: 'opencode',
|
||||
},
|
||||
authenticated: true,
|
||||
supported: true,
|
||||
capabilities: {
|
||||
teamLaunch: true,
|
||||
},
|
||||
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
|
||||
modelVerificationState: 'idle',
|
||||
modelAvailability: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const host = document.createElement('div');
|
||||
document.body.appendChild(host);
|
||||
const root = createRoot(host);
|
||||
const onValueChange = vi.fn();
|
||||
|
||||
await act(async () => {
|
||||
root.render(
|
||||
React.createElement(TeamModelSelector, {
|
||||
providerId: 'opencode',
|
||||
onProviderChange: () => undefined,
|
||||
value: '',
|
||||
onValueChange,
|
||||
modelIssueReasonByValue: {
|
||||
'openai/gpt-5.4': 'Model verification timed out',
|
||||
},
|
||||
})
|
||||
);
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
const issueButton = Array.from(host.querySelectorAll('button')).find((button) =>
|
||||
button.textContent?.includes('GPT-5.4')
|
||||
);
|
||||
expect(issueButton).not.toBeNull();
|
||||
expect(issueButton?.getAttribute('aria-disabled')).toBe('false');
|
||||
expect(issueButton?.textContent).toContain('Issue');
|
||||
expect(issueButton?.getAttribute('title')).toContain('Model verification timed out');
|
||||
|
||||
await act(async () => {
|
||||
issueButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
expect(onValueChange).toHaveBeenCalledWith('openai/gpt-5.4');
|
||||
|
||||
await act(async () => {
|
||||
root.unmount();
|
||||
await Promise.resolve();
|
||||
});
|
||||
});
|
||||
|
||||
it('dynamically disables OpenCode openai routes when OpenAI auth is invalid', async () => {
|
||||
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
|
||||
storeState.cliStatus = {
|
||||
flavor: 'agent_teams_orchestrator',
|
||||
providers: [
|
||||
{
|
||||
providerId: 'opencode',
|
||||
authMethod: 'opencode_managed',
|
||||
backend: {
|
||||
kind: 'opencode-cli',
|
||||
label: 'OpenCode CLI',
|
||||
endpointLabel: 'opencode',
|
||||
},
|
||||
authenticated: true,
|
||||
supported: true,
|
||||
capabilities: {
|
||||
teamLaunch: true,
|
||||
},
|
||||
statusMessage: 'OpenAI token invalid',
|
||||
detailMessage: 'OpenAI token refresh failed: 401',
|
||||
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
|
||||
availableBackends: [
|
||||
{
|
||||
id: 'openai',
|
||||
label: 'OpenAI',
|
||||
description: 'OpenAI route',
|
||||
selectable: false,
|
||||
recommended: false,
|
||||
available: false,
|
||||
state: 'authentication-required',
|
||||
statusMessage: 'Authentication required',
|
||||
detailMessage: 'Token refresh failed: 401',
|
||||
},
|
||||
],
|
||||
modelVerificationState: 'idle',
|
||||
modelAvailability: [],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const host = document.createElement('div');
|
||||
document.body.appendChild(host);
|
||||
const root = createRoot(host);
|
||||
const onValueChange = vi.fn();
|
||||
|
||||
await act(async () => {
|
||||
root.render(
|
||||
React.createElement(TeamModelSelector, {
|
||||
providerId: 'opencode',
|
||||
onProviderChange: () => undefined,
|
||||
value: '',
|
||||
onValueChange,
|
||||
})
|
||||
);
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
const openAiButton = Array.from(host.querySelectorAll('button')).find((button) =>
|
||||
button.textContent?.includes('GPT-5.4')
|
||||
);
|
||||
const bigPickleButton = Array.from(host.querySelectorAll('button')).find((button) =>
|
||||
button.textContent?.includes('big-pickle')
|
||||
);
|
||||
|
||||
expect(openAiButton).not.toBeNull();
|
||||
expect(openAiButton?.getAttribute('aria-disabled')).toBe('true');
|
||||
expect(openAiButton?.textContent).toContain('Unavailable');
|
||||
expect(bigPickleButton).not.toBeNull();
|
||||
expect(bigPickleButton?.getAttribute('aria-disabled')).toBe('false');
|
||||
|
||||
await act(async () => {
|
||||
openAiButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
|
||||
await Promise.resolve();
|
||||
});
|
||||
|
||||
expect(onValueChange).not.toHaveBeenCalled();
|
||||
|
||||
await act(async () => {
|
||||
root.unmount();
|
||||
await Promise.resolve();
|
||||
});
|
||||
});
|
||||
|
||||
it('constrains long runtime model lists so the selector scrolls', async () => {
|
||||
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
|
||||
storeState.cliStatus = {
|
||||
|
|
@ -842,13 +1059,15 @@ describe('TeamModelSelector disabled Codex models', () => {
|
|||
const modelButtons = Array.from(host.querySelectorAll('button')).map(
|
||||
(button) => button.textContent?.trim() ?? ''
|
||||
);
|
||||
const hasModelButtonStartingWith = (label: string): boolean =>
|
||||
modelButtons.some((text) => text.startsWith(label));
|
||||
|
||||
expect(modelButtons.some((text) => text.startsWith('Default'))).toBe(true);
|
||||
expect(modelButtons).toContain('Opus 4.8');
|
||||
expect(modelButtons).toContain('Opus 4.6');
|
||||
expect(modelButtons).toContain('Sonnet 4.7');
|
||||
expect(modelButtons).toContain('Haiku 4.6');
|
||||
expect(modelButtons).not.toContain('Opus 4.8 (1M)');
|
||||
expect(hasModelButtonStartingWith('Default')).toBe(true);
|
||||
expect(hasModelButtonStartingWith('Opus 4.8')).toBe(true);
|
||||
expect(hasModelButtonStartingWith('Opus 4.6')).toBe(true);
|
||||
expect(hasModelButtonStartingWith('Sonnet 4.7')).toBe(true);
|
||||
expect(hasModelButtonStartingWith('Haiku 4.6')).toBe(true);
|
||||
expect(hasModelButtonStartingWith('Opus 4.8 (1M)')).toBe(false);
|
||||
|
||||
await act(async () => {
|
||||
root.unmount();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
|
|||
|
||||
import {
|
||||
__resetShortLivedProviderPrepareCacheForTests,
|
||||
getShortLivedProviderPrepareModelIssueReasons,
|
||||
getShortLivedProviderPrepareModelResults,
|
||||
storeShortLivedProviderPrepareModelResults,
|
||||
} from '@renderer/components/team/dialogs/providerPrepareShortLivedCache';
|
||||
|
|
@ -42,6 +43,17 @@ describe('providerPrepareShortLivedCache', () => {
|
|||
warningLine: null,
|
||||
},
|
||||
});
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-1',
|
||||
})
|
||||
).toEqual({
|
||||
modelIssueReasonByValue: {
|
||||
'opencode/nemotron-3-super-free': 'timed out',
|
||||
},
|
||||
modelUnavailableReasonByValue: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('expires cached OpenCode results after the short-lived TTL', () => {
|
||||
|
|
@ -68,6 +80,100 @@ describe('providerPrepareShortLivedCache', () => {
|
|||
).toEqual({});
|
||||
});
|
||||
|
||||
it('stores short-lived OpenCode failed model results as blocking unavailable issues', () => {
|
||||
storeShortLivedProviderPrepareModelResults({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-4',
|
||||
modelResultsById: {
|
||||
'openai/gpt-5.4': {
|
||||
status: 'failed',
|
||||
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
|
||||
warningLine: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelResults({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-4',
|
||||
})
|
||||
).toEqual({});
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-4',
|
||||
})
|
||||
).toEqual({
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {
|
||||
'openai/gpt-5.4': 'OpenCode provider authentication failed',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('clears a short-lived issue when a later result verifies the same model', () => {
|
||||
storeShortLivedProviderPrepareModelResults({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-5',
|
||||
modelResultsById: {
|
||||
'openai/gpt-5.4': {
|
||||
status: 'failed',
|
||||
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
|
||||
warningLine: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
storeShortLivedProviderPrepareModelResults({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-5',
|
||||
modelResultsById: {
|
||||
'openai/gpt-5.4': {
|
||||
status: 'ready',
|
||||
line: 'GPT-5.4 - verified',
|
||||
warningLine: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-5',
|
||||
})
|
||||
).toEqual({
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('expires short-lived OpenCode issues after the issue TTL', () => {
|
||||
vi.useFakeTimers();
|
||||
storeShortLivedProviderPrepareModelResults({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-6',
|
||||
modelResultsById: {
|
||||
'openai/gpt-5.4': {
|
||||
status: 'failed',
|
||||
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
|
||||
warningLine: null,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
vi.advanceTimersByTime(90_001);
|
||||
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId: 'opencode',
|
||||
cacheKey: 'key-6',
|
||||
})
|
||||
).toEqual({
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {},
|
||||
});
|
||||
});
|
||||
|
||||
it('does not store short-lived cache for non-OpenCode providers', () => {
|
||||
storeShortLivedProviderPrepareModelResults({
|
||||
providerId: 'codex',
|
||||
|
|
@ -87,5 +193,14 @@ describe('providerPrepareShortLivedCache', () => {
|
|||
cacheKey: 'key-3',
|
||||
})
|
||||
).toEqual({});
|
||||
expect(
|
||||
getShortLivedProviderPrepareModelIssueReasons({
|
||||
providerId: 'codex',
|
||||
cacheKey: 'key-3',
|
||||
})
|
||||
).toEqual({
|
||||
modelIssueReasonByValue: {},
|
||||
modelUnavailableReasonByValue: {},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -643,7 +643,7 @@ describe('RuntimeProviderManagementPanelView', () => {
|
|||
expect(host.textContent).toContain('Not recommended');
|
||||
expect(host.textContent).toContain('Unavailable in OpenCode');
|
||||
expect(host.textContent).toContain('Tested');
|
||||
expect(host.textContent).toContain('Recommended with limits');
|
||||
expect(host.textContent).toContain('Tested with limits');
|
||||
expect(host.textContent).toContain('Recommended only');
|
||||
expect(host.textContent).not.toContain('Set OpenCode default');
|
||||
expect(
|
||||
|
|
@ -687,8 +687,11 @@ describe('RuntimeProviderManagementPanelView', () => {
|
|||
expect((host.textContent ?? '').indexOf('opencode/big-pickle')).toBeLessThan(
|
||||
(host.textContent ?? '').indexOf('minimax-m2.5-free')
|
||||
);
|
||||
expect((host.textContent ?? '').indexOf('mistralai/codestral-2508')).toBeLessThan(
|
||||
(host.textContent ?? '').indexOf('minimax-m2.5-free')
|
||||
);
|
||||
expect((host.textContent ?? '').indexOf('minimax-m2.5-free')).toBeLessThan(
|
||||
(host.textContent ?? '').indexOf('mistralai/codestral-2508')
|
||||
(host.textContent ?? '').indexOf('qwen/qwen3-coder-plus')
|
||||
);
|
||||
expect((host.textContent ?? '').indexOf('qwen/qwen3-coder-plus')).toBeLessThan(
|
||||
(host.textContent ?? '').indexOf('openrouter/openai/gpt-oss-20b:free')
|
||||
|
|
|
|||
|
|
@ -26,10 +26,12 @@ describe('getOpenCodeTeamModelRecommendation', () => {
|
|||
});
|
||||
|
||||
it('marks models that passed real OpenCode Agent Teams smoke E2E as tested', () => {
|
||||
expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject({
|
||||
expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject(
|
||||
{
|
||||
level: 'tested',
|
||||
label: 'Tested',
|
||||
});
|
||||
}
|
||||
);
|
||||
expect(
|
||||
getOpenCodeTeamModelRecommendation(' OPENROUTER/GOOGLE/GEMINI-3-FLASH-PREVIEW ')
|
||||
).toMatchObject({
|
||||
|
|
@ -101,10 +103,11 @@ describe('getOpenCodeTeamModelRecommendation', () => {
|
|||
|
||||
it('keeps similarly named models distinct when real E2E disagreed', () => {
|
||||
expect(getOpenCodeTeamModelRecommendation('opencode/minimax-m2.5-free')).toMatchObject({
|
||||
level: 'recommended-with-limits',
|
||||
label: 'Recommended with limits',
|
||||
level: 'tested-with-limits',
|
||||
label: 'Tested with limits',
|
||||
reason: expect.stringContaining('duplicate or missing reply tokens'),
|
||||
});
|
||||
expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(true);
|
||||
expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(false);
|
||||
expect(
|
||||
getOpenCodeTeamModelRecommendation('openrouter/minimax/minimax-m2.5:free')
|
||||
).toMatchObject({
|
||||
|
|
@ -815,9 +818,9 @@ describe('getOpenCodeTeamModelRecommendation', () => {
|
|||
[...models].sort((left, right) => compareOpenCodeTeamModelRecommendations(left, right))
|
||||
).toEqual([
|
||||
'opencode/big-pickle',
|
||||
'opencode/minimax-m2.5-free',
|
||||
'openrouter/mistralai/codestral-2508',
|
||||
'openrouter/anthropic/claude-sonnet-4.6',
|
||||
'opencode/minimax-m2.5-free',
|
||||
'openrouter/qwen/qwen3-coder-plus',
|
||||
'openrouter/openai/gpt-oss-20b:free',
|
||||
]);
|
||||
|
|
|
|||
|
|
@ -126,6 +126,34 @@ describe('teamModelAvailability', () => {
|
|||
]);
|
||||
});
|
||||
|
||||
it('treats runtime-reported unavailable models as non-selectable', () => {
|
||||
const providerStatus = createCodexProviderStatus(['gpt-5.4'], {
|
||||
modelAvailability: [
|
||||
{
|
||||
modelId: 'gpt-5.4',
|
||||
status: 'unavailable',
|
||||
reason: 'No access for this account',
|
||||
checkedAt: null,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(getAvailableTeamProviderModels('codex', providerStatus)).toEqual([]);
|
||||
expect(normalizeTeamModelForUi('codex', 'gpt-5.4', providerStatus)).toBe('');
|
||||
expect(getTeamModelSelectionError('codex', 'gpt-5.4', providerStatus)).toContain(
|
||||
'No access for this account'
|
||||
);
|
||||
expect(getAvailableTeamProviderModelOptions('codex', providerStatus)).toEqual([
|
||||
{ value: '', label: 'Default', badgeLabel: 'Default' },
|
||||
{
|
||||
value: 'gpt-5.4',
|
||||
label: '5.4',
|
||||
availabilityStatus: 'unavailable',
|
||||
availabilityReason: 'No access for this account',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps OpenCode raw ids intact while exposing readable labels and source badges', () => {
|
||||
const providerStatus = createOpenCodeProviderStatus([
|
||||
'openai/gpt-5.4',
|
||||
|
|
@ -168,6 +196,33 @@ describe('teamModelAvailability', () => {
|
|||
).toBe('openrouter/moonshotai/kimi-k2');
|
||||
});
|
||||
|
||||
it('reports OpenCode openai routes unavailable when OpenAI auth is invalid', () => {
|
||||
const providerStatus = createOpenCodeProviderStatus(['openai/gpt-5.4', 'opencode/big-pickle'], {
|
||||
statusMessage: 'OpenAI token invalid',
|
||||
detailMessage: 'OpenAI token refresh failed: 401',
|
||||
availableBackends: [
|
||||
{
|
||||
id: 'openai',
|
||||
label: 'OpenAI',
|
||||
description: 'OpenAI route',
|
||||
selectable: false,
|
||||
recommended: false,
|
||||
available: false,
|
||||
state: 'authentication-required',
|
||||
statusMessage: 'Authentication required',
|
||||
detailMessage: 'Token refresh failed: 401',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(getTeamModelSelectionError('opencode', 'openai/gpt-5.4', providerStatus)).toContain(
|
||||
'OpenCode OpenAI provider authentication failed'
|
||||
);
|
||||
expect(
|
||||
getTeamModelSelectionError('opencode', 'opencode/big-pickle', providerStatus)
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it('clears stale Codex selections when runtime no longer reports that model', () => {
|
||||
const providerStatus = createCodexProviderStatus(['gpt-5.4', 'gpt-5.3-codex']);
|
||||
|
||||
|
|
@ -304,9 +359,7 @@ describe('teamModelAvailability', () => {
|
|||
|
||||
it('keeps known Anthropic full model ids selectable without runtime verification', () => {
|
||||
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7')).toBe('claude-opus-4-7');
|
||||
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe(
|
||||
'claude-opus-4-7[1m]'
|
||||
);
|
||||
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe('claude-opus-4-7[1m]');
|
||||
expect(normalizeTeamModelForUi('anthropic', 'claude-haiku-4-5-20251001')).toBe(
|
||||
'claude-haiku-4-5-20251001'
|
||||
);
|
||||
|
|
|
|||
64
test/renderer/utils/teamModelRecommendations.test.ts
Normal file
64
test/renderer/utils/teamModelRecommendations.test.ts
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
getTeamModelRecommendation,
|
||||
isTeamModelRecommended,
|
||||
} from '@renderer/utils/teamModelRecommendations';
|
||||
|
||||
describe('getTeamModelRecommendation', () => {
|
||||
it('marks only the selected Codex Agent Teams models as recommended', () => {
|
||||
for (const modelId of ['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']) {
|
||||
expect(getTeamModelRecommendation('codex', modelId)).toMatchObject({
|
||||
level: 'recommended',
|
||||
label: 'Recommended',
|
||||
});
|
||||
expect(isTeamModelRecommended('codex', modelId)).toBe(true);
|
||||
}
|
||||
|
||||
for (const modelId of ['gpt-5.4', 'gpt-5.2', 'gpt-5.3-codex-spark']) {
|
||||
expect(getTeamModelRecommendation('codex', modelId)).toBeNull();
|
||||
expect(isTeamModelRecommended('codex', modelId)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('marks supported Claude aliases and full ids as recommended but leaves default unbadged', () => {
|
||||
for (const modelId of [
|
||||
'haiku',
|
||||
'sonnet',
|
||||
'sonnet[1m]',
|
||||
'opus',
|
||||
'opus[1m]',
|
||||
'claude-haiku-4-5',
|
||||
'claude-haiku-4-5-20251001',
|
||||
'claude-sonnet-4-6',
|
||||
'claude-sonnet-4-6[1m]',
|
||||
'claude-opus-4-7',
|
||||
'claude-opus-4-7[1m]',
|
||||
'claude-opus-4-6',
|
||||
'claude-opus-4-6[1m]',
|
||||
]) {
|
||||
expect(getTeamModelRecommendation('anthropic', modelId)).toMatchObject({
|
||||
level: 'recommended',
|
||||
label: 'Recommended',
|
||||
});
|
||||
expect(isTeamModelRecommended('anthropic', modelId)).toBe(true);
|
||||
}
|
||||
|
||||
expect(getTeamModelRecommendation('anthropic', '')).toBeNull();
|
||||
expect(getTeamModelRecommendation('anthropic', 'default')).toBeNull();
|
||||
});
|
||||
|
||||
it('delegates OpenCode verdicts and keeps MiniMax below recommended', () => {
|
||||
expect(getTeamModelRecommendation('opencode', 'opencode/big-pickle')).toMatchObject({
|
||||
level: 'recommended',
|
||||
label: 'Recommended',
|
||||
});
|
||||
expect(isTeamModelRecommended('opencode', 'opencode/big-pickle')).toBe(true);
|
||||
|
||||
expect(getTeamModelRecommendation('opencode', 'opencode/minimax-m2.5-free')).toMatchObject({
|
||||
level: 'tested-with-limits',
|
||||
label: 'Tested with limits',
|
||||
});
|
||||
expect(isTeamModelRecommended('opencode', 'opencode/minimax-m2.5-free')).toBe(false);
|
||||
});
|
||||
});
|
||||
Loading…
Reference in a new issue