From 63b89fcd39daa344e4ae99bb5418130e08c6b400 Mon Sep 17 00:00:00 2001 From: 777genius Date: Sun, 10 May 2026 10:11:44 +0300 Subject: [PATCH] feat: improve team model availability diagnostics --- .../351e2899-3aba-4992-9250-bf85dccb4399.json | 1 - .board-task-log-freshness/351e2899.json | 1 - .gitignore | 3 +- .../MemberRuntimeLogTailReader.test.ts | 1 + .../team/dialogs/CreateTeamDialog.tsx | 44 ++ .../team/dialogs/LaunchTeamDialog.tsx | 54 ++ .../team/dialogs/TeamModelSelector.tsx | 65 +- .../dialogs/providerPrepareShortLivedCache.ts | 126 +++- .../components/team/members/LeadModelRow.tsx | 22 +- .../team/members/MemberDraftRow.tsx | 42 +- .../team/members/MembersEditorSection.tsx | 10 + .../team/members/TeamRosterEditorSection.tsx | 12 + .../utils/openCodeModelRecommendations.ts | 17 +- src/renderer/utils/teamModelAvailability.ts | 83 ++- .../utils/teamModelRecommendations.ts | 96 +++ .../model-gauntlet-results.json | 578 ++++++++++++++++-- .../model-gauntlet-results.md | 41 +- .../report-1778366899222.json | 62 ++ .../team/TeamLogSourceTracker.test.ts | 23 +- .../TeamModelSelectorDisabledState.test.ts | 237 ++++++- .../providerPrepareShortLivedCache.test.ts | 115 ++++ ...RuntimeProviderManagementPanelView.test.ts | 7 +- .../openCodeModelRecommendations.test.ts | 19 +- .../utils/teamModelAvailability.test.ts | 59 +- .../utils/teamModelRecommendations.test.ts | 64 ++ 25 files changed, 1632 insertions(+), 150 deletions(-) delete mode 100644 .board-task-log-freshness/351e2899-3aba-4992-9250-bf85dccb4399.json delete mode 100644 .board-task-log-freshness/351e2899.json create mode 100644 src/renderer/utils/teamModelRecommendations.ts create mode 100644 test-results/opencode-semantic-model-matrix/report-1778366899222.json create mode 100644 test/renderer/utils/teamModelRecommendations.test.ts diff --git a/.board-task-log-freshness/351e2899-3aba-4992-9250-bf85dccb4399.json b/.board-task-log-freshness/351e2899-3aba-4992-9250-bf85dccb4399.json deleted file mode 100644 index b471db6f..00000000 --- a/.board-task-log-freshness/351e2899-3aba-4992-9250-bf85dccb4399.json +++ /dev/null @@ -1 +0,0 @@ -{"taskId":"351e2899-3aba-4992-9250-bf85dccb4399","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T07:59:53.638Z"} \ No newline at end of file diff --git a/.board-task-log-freshness/351e2899.json b/.board-task-log-freshness/351e2899.json deleted file mode 100644 index afb5a35e..00000000 --- a/.board-task-log-freshness/351e2899.json +++ /dev/null @@ -1 +0,0 @@ -{"taskId":"351e2899","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T08:00:39.185Z"} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1211cf7e..32a0b50c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,5 @@ eslint-fix/ .eslintcache remotion/* -.home/ \ No newline at end of file +.home/ +.board-task-log-freshness/ diff --git a/src/features/member-log-stream/main/application/__tests__/MemberRuntimeLogTailReader.test.ts b/src/features/member-log-stream/main/application/__tests__/MemberRuntimeLogTailReader.test.ts index 60cf73d8..3a6eda67 100644 --- a/src/features/member-log-stream/main/application/__tests__/MemberRuntimeLogTailReader.test.ts +++ b/src/features/member-log-stream/main/application/__tests__/MemberRuntimeLogTailReader.test.ts @@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm, writeFile } from 'fs/promises'; import os from 'os'; import path from 'path'; import { afterEach, describe, expect, it } from 'vitest'; + import { MemberRuntimeLogTailReader } from '../MemberRuntimeLogTailReader'; const tempDirs: string[] = []; diff --git a/src/renderer/components/team/dialogs/CreateTeamDialog.tsx b/src/renderer/components/team/dialogs/CreateTeamDialog.tsx index a63b9873..feee2c0c 100644 --- a/src/renderer/components/team/dialogs/CreateTeamDialog.tsx +++ b/src/renderer/components/team/dialogs/CreateTeamDialog.tsx @@ -110,6 +110,7 @@ import { buildProviderPrepareRuntimeStatusSignature, } from './providerPrepareRequestSignature'; import { + getShortLivedProviderPrepareModelIssueReasons, getShortLivedProviderPrepareModelResults, storeShortLivedProviderPrepareModelResults, } from './providerPrepareShortLivedCache'; @@ -682,6 +683,45 @@ export const CreateTeamDialog = ({ selectedProviderId, ] ); + const shortLivedModelIssueReasons = useMemo(() => { + const modelIssueReasonByProvider: Partial>> = {}; + const modelUnavailableReasonByProvider: Partial< + Record> + > = {}; + + for (const providerId of selectedMemberProviders) { + const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null; + const cacheKey = buildProviderPrepareModelCacheKey({ + cwd: effectiveCwd, + providerId, + backendSummary, + limitContext: effectiveAnthropicRuntimeLimitContext, + runtimeStatusSignature: prepareRuntimeStatusSignature, + }); + const issueReasons = getShortLivedProviderPrepareModelIssueReasons({ + providerId, + cacheKey, + }); + if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) { + modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue; + } + if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) { + modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue; + } + } + + return { + modelIssueReasonByProvider, + modelUnavailableReasonByProvider, + }; + }, [ + effectiveAnthropicRuntimeLimitContext, + effectiveCwd, + prepareChecks, + prepareRuntimeStatusSignature, + runtimeBackendSummaryByProvider, + selectedMemberProviders, + ]); useEffect(() => { if (multimodelEnabled) { @@ -1860,6 +1900,10 @@ export const CreateTeamDialog = ({ leadModelIssueText={leadModelIssueText} memberWarningById={teammateRuntimeCompatibility.memberWarningById} memberModelIssueById={memberModelIssueById} + modelIssueReasonByProvider={shortLivedModelIssueReasons.modelIssueReasonByProvider} + modelUnavailableReasonByProvider={ + shortLivedModelIssueReasons.modelUnavailableReasonByProvider + } headerTop={
{ + const modelIssueReasonByProvider: Partial>> = {}; + const modelUnavailableReasonByProvider: Partial< + Record> + > = {}; + + if (!isLaunchMode) { + return { + modelIssueReasonByProvider, + modelUnavailableReasonByProvider, + }; + } + + for (const providerId of selectedMemberProviders) { + const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null; + const cacheKey = buildProviderPrepareModelCacheKey({ + cwd: effectiveCwd, + providerId, + backendSummary, + limitContext: effectiveAnthropicRuntimeLimitContext, + runtimeStatusSignature: prepareRuntimeStatusSignature, + }); + const issueReasons = getShortLivedProviderPrepareModelIssueReasons({ + providerId, + cacheKey, + }); + if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) { + modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue; + } + if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) { + modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue; + } + } + + return { + modelIssueReasonByProvider, + modelUnavailableReasonByProvider, + }; + }, [ + effectiveAnthropicRuntimeLimitContext, + effectiveCwd, + isLaunchMode, + prepareChecks, + prepareRuntimeStatusSignature, + runtimeBackendSummaryByProvider, + selectedMemberProviders, + ]); // Clear stale provisioning error when dialog opens useEffect(() => { @@ -2563,6 +2611,12 @@ export const LaunchTeamDialog = (props: LaunchTeamDialogProps): React.JSX.Elemen memberInfoById={memberWorktreeContinuationInfoById} leadModelIssueText={leadModelIssueText} memberModelIssueById={memberModelIssueById} + modelIssueReasonByProvider={ + shortLivedModelIssueReasons.modelIssueReasonByProvider + } + modelUnavailableReasonByProvider={ + shortLivedModelIssueReasons.modelUnavailableReasonByProvider + } softDeleteMembers disableGeminiOption={isGeminiUiFrozen()} headerBottom={ diff --git a/src/renderer/components/team/dialogs/TeamModelSelector.tsx b/src/renderer/components/team/dialogs/TeamModelSelector.tsx index 27a0374b..6a593b2d 100644 --- a/src/renderer/components/team/dialogs/TeamModelSelector.tsx +++ b/src/renderer/components/team/dialogs/TeamModelSelector.tsx @@ -19,13 +19,9 @@ import { GEMINI_UI_DISABLED_REASON, isGeminiUiFrozen, } from '@renderer/utils/geminiUiFreeze'; -import { - compareOpenCodeTeamModelRecommendations, - getOpenCodeTeamModelRecommendation, - isOpenCodeTeamModelRecommended, -} from '@renderer/utils/openCodeModelRecommendations'; import { getAvailableTeamProviderModelOptions, + getOpenCodeOpenAiRouteAuthUnavailableReason, getTeamModelUiDisabledReason, isTeamProviderModelVerificationPending, normalizeTeamModelForUi, @@ -41,6 +37,11 @@ import { isAnthropicHaikuTeamModel, } from '@renderer/utils/teamModelCatalog'; import { extractProviderScopedBaseModel } from '@renderer/utils/teamModelContext'; +import { + compareTeamModelRecommendations, + getTeamModelRecommendation, + isTeamModelRecommended, +} from '@renderer/utils/teamModelRecommendations'; import { resolveAnthropicLaunchModel } from '@shared/utils/anthropicLaunchModel'; import { getAnthropicDefaultTeamModel } from '@shared/utils/anthropicModelDefaults'; import { isTeamProviderId } from '@shared/utils/teamProvider'; @@ -156,6 +157,7 @@ export interface TeamModelSelectorProps { providerDisabledReasonById?: Partial>; providerDisabledBadgeLabelById?: Partial>; modelIssueReasonByValue?: Partial>; + modelUnavailableReasonByValue?: Partial>; } export const TeamModelSelector: React.FC = ({ @@ -168,6 +170,7 @@ export const TeamModelSelector: React.FC = ({ providerDisabledReasonById, providerDisabledBadgeLabelById, modelIssueReasonByValue, + modelUnavailableReasonByValue, }) => { const multimodelEnabled = useStore((s) => s.appConfig?.general?.multimodelEnabled ?? true); const [recommendedOnly, setRecommendedOnly] = useState(false); @@ -315,7 +318,7 @@ export const TeamModelSelector: React.FC = ({ const hasRecommendedOpenCodeModels = useMemo( () => effectiveProviderId === 'opencode' && - modelOptions.some((option) => isOpenCodeTeamModelRecommended(option.value)), + modelOptions.some((option) => isTeamModelRecommended(effectiveProviderId, option.value)), [effectiveProviderId, modelOptions] ); @@ -335,10 +338,7 @@ export const TeamModelSelector: React.FC = ({ if (!normalizedModelQuery) { return true; } - const modelRecommendation = - effectiveProviderId === 'opencode' - ? getOpenCodeTeamModelRecommendation(option.value) - : null; + const modelRecommendation = getTeamModelRecommendation(effectiveProviderId, option.value); return [ option.value, option.label, @@ -358,10 +358,14 @@ export const TeamModelSelector: React.FC = ({ const concreteOptions = modelOptions .filter((option) => option.value.trim().length > 0) .map((option, index) => ({ option, index })) - .filter(({ option }) => !recommendedOnly || isOpenCodeTeamModelRecommended(option.value)) + .filter( + ({ option }) => + !recommendedOnly || isTeamModelRecommended(effectiveProviderId, option.value) + ) .filter(({ option }) => matchesModelQuery(option)) .sort((left, right) => { - const recommendationOrder = compareOpenCodeTeamModelRecommendations( + const recommendationOrder = compareTeamModelRecommendations( + effectiveProviderId, left.option.value, right.option.value ); @@ -517,25 +521,44 @@ export const TeamModelSelector: React.FC = ({ opt.value === '' ? 'available' : (opt.availabilityStatus ?? 'available'); const availabilityReason = opt.value === '' ? null : (opt.availabilityReason ?? null); + const runtimeUnavailableReason = + opt.value !== '' && availabilityStatus === 'unavailable' + ? (availabilityReason ?? 'Unavailable in current runtime') + : null; const modelIssueReason = opt.value === '' ? null : (modelIssueReasonByValue?.[opt.value] ?? null); - const hasModelIssue = Boolean(modelIssueReason); + const modelUnavailableReason = + opt.value === '' + ? null + : (modelUnavailableReasonByValue?.[opt.value] ?? + getOpenCodeOpenAiRouteAuthUnavailableReason( + effectiveProviderId, + opt.value, + runtimeProviderStatus + ) ?? + runtimeUnavailableReason); + const hasModelIssue = Boolean(modelIssueReason || modelUnavailableReason); const modelSelectable = activeProviderSelectable && + !modelUnavailableReason && !modelDisabledReason && (opt.value === '' || availabilityStatus == null || availabilityStatus === 'available'); const modelStatusMessage = - modelIssueReason ?? modelDisabledReason ?? availabilityReason ?? null; + modelUnavailableReason ?? + modelIssueReason ?? + modelDisabledReason ?? + availabilityReason ?? + null; const sourceBadgeLabel = effectiveProviderId === 'opencode' && opt.value !== '' ? opt.badgeLabel?.trim() || null : null; - const modelRecommendation = - effectiveProviderId === 'opencode' - ? getOpenCodeTeamModelRecommendation(opt.value) - : null; + const modelRecommendation = getTeamModelRecommendation( + effectiveProviderId, + opt.value + ); return ( - {modelTooltipText || modelIssueText ? ( + {modelTooltipText || currentModelIssueText ? ( - {modelIssueText ?

{modelIssueText}

: null} + {currentModelIssueText ? ( +

{currentModelIssueText}

+ ) : null} {modelTooltipText ? ( -

+

{modelTooltipText}

) : null} @@ -524,8 +548,14 @@ export const MemberDraftRow = ({ }} id={`member-${member.id}-model`} disableGeminiOption={disableGeminiOption} - modelIssueReasonByValue={ - effectiveModel?.trim() ? { [effectiveModel.trim()]: modelIssueText } : undefined + modelIssueReasonByValue={{ + ...(modelIssueReasonByProvider?.[effectiveProviderId] ?? {}), + ...(effectiveModelKey && modelIssueText + ? { [effectiveModelKey]: modelIssueText } + : {}), + }} + modelUnavailableReasonByValue={ + modelUnavailableReasonByProvider?.[effectiveProviderId] } /> ; disableGeminiOption?: boolean; memberModelIssueById?: Record; + modelIssueReasonByProvider?: Partial< + Record>> + >; + modelUnavailableReasonByProvider?: Partial< + Record>> + >; disableAddMember?: boolean; addMemberLockReason?: string; showWorktreeIsolationControls?: boolean; @@ -153,6 +159,8 @@ export const MembersEditorSection = ({ memberInfoById, disableGeminiOption = false, memberModelIssueById, + modelIssueReasonByProvider, + modelUnavailableReasonByProvider, disableAddMember = false, addMemberLockReason, showWorktreeIsolationControls = false, @@ -428,6 +436,8 @@ export const MembersEditorSection = ({ infoText={memberInfoById?.[member.id] ?? null} disableGeminiOption={disableGeminiOption} modelIssueText={memberModelIssueById?.[member.id] ?? null} + modelIssueReasonByProvider={modelIssueReasonByProvider} + modelUnavailableReasonByProvider={modelUnavailableReasonByProvider} /> ))} {softDeleteMembers && removedMembers.length > 0 ? ( diff --git a/src/renderer/components/team/members/TeamRosterEditorSection.tsx b/src/renderer/components/team/members/TeamRosterEditorSection.tsx index 3232cc1f..5a146dec 100644 --- a/src/renderer/components/team/members/TeamRosterEditorSection.tsx +++ b/src/renderer/components/team/members/TeamRosterEditorSection.tsx @@ -49,6 +49,12 @@ interface TeamRosterEditorSectionProps { disableGeminiOption?: boolean; leadModelIssueText?: string | null; memberModelIssueById?: Record; + modelIssueReasonByProvider?: Partial< + Record>> + >; + modelUnavailableReasonByProvider?: Partial< + Record>> + >; showWorktreeIsolationControls?: boolean; teammateWorktreeDefault?: boolean; worktreeIsolationDisabledReason?: string | null; @@ -95,6 +101,8 @@ export const TeamRosterEditorSection = ({ disableGeminiOption = false, leadModelIssueText, memberModelIssueById, + modelIssueReasonByProvider, + modelUnavailableReasonByProvider, showWorktreeIsolationControls = false, teammateWorktreeDefault = false, worktreeIsolationDisabledReason, @@ -153,6 +161,8 @@ export const TeamRosterEditorSection = ({ softDeleteMembers={softDeleteMembers} disableGeminiOption={disableGeminiOption} memberModelIssueById={memberModelIssueById} + modelIssueReasonByProvider={modelIssueReasonByProvider} + modelUnavailableReasonByProvider={modelUnavailableReasonByProvider} showWorktreeIsolationControls={showWorktreeIsolationControls} teammateWorktreeDefault={teammateWorktreeDefault} worktreeIsolationDisabledReason={worktreeIsolationDisabledReason} @@ -174,6 +184,8 @@ export const TeamRosterEditorSection = ({ warningText={leadWarningText} disableGeminiOption={disableGeminiOption} modelIssueText={leadModelIssueText} + modelIssueReasonByValue={modelIssueReasonByProvider?.[providerId]} + modelUnavailableReasonByValue={modelUnavailableReasonByProvider?.[providerId]} showAnthropicContextLimit={hasAnthropicRuntime} disableAnthropicContextLimit={disableAnthropicContextLimit} /> diff --git a/src/renderer/utils/openCodeModelRecommendations.ts b/src/renderer/utils/openCodeModelRecommendations.ts index de88a6ac..25b26f7d 100644 --- a/src/renderer/utils/openCodeModelRecommendations.ts +++ b/src/renderer/utils/openCodeModelRecommendations.ts @@ -26,9 +26,7 @@ const PASSED_GAUNTLET_WITH_LIMITS_REASON = const OPENCODE_TEAM_RECOMMENDED_MODELS = new Set(['opencode/big-pickle']); -const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set([ - 'opencode/minimax-m2.5-free', -]); +const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set([]); const OPENCODE_TEAM_TESTED_MODELS = new Set([ 'openrouter/anthropic/claude-haiku-4.5', @@ -54,7 +52,14 @@ const OPENCODE_TEAM_TESTED_MODELS = new Set([ 'openrouter/z-ai/glm-5.1', ]); -const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set([]); +const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set(['opencode/minimax-m2.5-free']); + +const OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS = new Map([ + [ + 'opencode/minimax-m2.5-free', + 'This exact free model route passed simple OpenCode Agent Teams provider stress, but a deeper repeated gauntlet hit duplicate or missing reply tokens. Keep it below Recommended until a clean repeated gauntlet passes.', + ], +]); const OPENCODE_TEAM_UNAVAILABLE_MODELS = new Map([ [ @@ -1254,7 +1259,9 @@ export function getOpenCodeTeamModelRecommendation( return { level: 'tested-with-limits', label: 'Tested with limits', - reason: PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON, + reason: + OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS.get(normalizedModelId) ?? + PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON, }; } diff --git a/src/renderer/utils/teamModelAvailability.ts b/src/renderer/utils/teamModelAvailability.ts index fd944ac1..f2791e8c 100644 --- a/src/renderer/utils/teamModelAvailability.ts +++ b/src/renderer/utils/teamModelAvailability.ts @@ -35,6 +35,9 @@ export { type SupportedProviderId = CliProviderId | TeamProviderId; +export const OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON = + 'OpenCode OpenAI provider authentication failed. Reconnect OpenAI in provider settings, then refresh runtime status.'; + export type TeamModelRuntimeProviderStatus = Pick< CliProviderStatus, | 'providerId' @@ -47,6 +50,9 @@ export type TeamModelRuntimeProviderStatus = Pick< | 'backend' | 'authenticated' | 'supported' + | 'detailMessage' + | 'availableBackends' + | 'externalRuntimeDiagnostics' > & Partial>; @@ -61,6 +67,58 @@ export interface TeamProviderModelVerificationCounts { verifying: boolean; } +export function getOpenCodeOpenAiRouteAuthUnavailableReason( + providerId: SupportedProviderId | undefined, + model: string | undefined, + providerStatus?: TeamModelRuntimeProviderStatus | null +): string | null { + if ( + providerId !== 'opencode' || + !model?.trim().toLowerCase().startsWith('openai/') || + !providerStatus + ) { + return null; + } + + const openAiBackends = (providerStatus.availableBackends ?? []).filter((backend) => + [backend.id, backend.label, backend.description].some((value) => /\bopenai\b/i.test(value)) + ); + const backendRequiresAuth = openAiBackends.some( + (backend) => + backend.state === 'authentication-required' || + (!backend.available && + [backend.statusMessage, backend.detailMessage].some((value) => + /auth|token|api key|401|403/i.test(value ?? '') + )) + ); + if (backendRequiresAuth) { + return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON; + } + + const diagnosticText = [ + providerStatus.statusMessage, + providerStatus.detailMessage, + ...openAiBackends.flatMap((backend) => [backend.statusMessage, backend.detailMessage]), + ...(providerStatus.externalRuntimeDiagnostics ?? []) + .filter((diagnostic) => /\bopenai\b/i.test(diagnostic.label)) + .flatMap((diagnostic) => [diagnostic.statusMessage, diagnostic.detailMessage]), + ] + .map((value) => value?.trim() ?? '') + .filter(Boolean) + .join('\n'); + + if ( + /\bopenai\b/i.test(diagnosticText) && + /token refresh failed|token.*invalid|invalid.*token|not[_\s-]?authenticated|not authenticated|unauthorized|forbidden|\b401\b|\b403\b|invalid api key|api key.*invalid|authentication required/i.test( + diagnosticText + ) + ) { + return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON; + } + + return null; +} + export function getTeamModelUiDisabledReason( providerId: SupportedProviderId | undefined, model: string | undefined, @@ -277,6 +335,10 @@ function getRuntimeModelAvailability( if (!visibleModels.includes(model)) { return null; } + const runtimeAvailability = getModelAvailabilityMap(providerStatus).get(model)?.status ?? null; + if (runtimeAvailability === 'unavailable') { + return 'unavailable'; + } return 'available'; } @@ -360,7 +422,11 @@ export function getAvailableTeamProviderModelOptions( ...visibleModels.map((model) => { const catalogOption = getRuntimeCatalogModelOption(providerId, model, providerStatus); if (catalogOption) { - return catalogOption; + return { + ...catalogOption, + availabilityStatus: getRuntimeModelAvailability(providerId, model, providerStatus), + availabilityReason: getRuntimeModelAvailabilityReason(model, providerStatus), + }; } return { value: model, @@ -464,6 +530,15 @@ export function getTeamModelSelectionError( return `Model "${trimmed}" is disabled. ${disabledReason}`; } + const dynamicUnavailableReason = getOpenCodeOpenAiRouteAuthUnavailableReason( + providerId, + trimmed, + providerStatus + ); + if (dynamicUnavailableReason) { + return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. ${dynamicUnavailableReason}`; + } + if (providerId === 'anthropic') { return isTeamModelAvailableForUi(providerId, trimmed, providerStatus) ? null @@ -483,5 +558,11 @@ export function getTeamModelSelectionError( return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. Pick one of the listed models or use Default.`; } + const availability = getRuntimeModelAvailability(providerId, trimmed, providerStatus); + if (availability !== 'available') { + const reason = getRuntimeModelAvailabilityReason(trimmed, providerStatus); + return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime.${reason ? ` ${reason}` : ''} Pick one of the listed models or use Default.`; + } + return null; } diff --git a/src/renderer/utils/teamModelRecommendations.ts b/src/renderer/utils/teamModelRecommendations.ts new file mode 100644 index 00000000..8910a4d8 --- /dev/null +++ b/src/renderer/utils/teamModelRecommendations.ts @@ -0,0 +1,96 @@ +import { + getOpenCodeTeamModelRecommendation, + getOpenCodeTeamModelRecommendationSortRank, +} from '@renderer/utils/openCodeModelRecommendations'; +import { isSupportedAnthropicTeamModel } from '@renderer/utils/teamModelCatalog'; + +import type { + OpenCodeTeamModelRecommendation, + OpenCodeTeamModelRecommendationLevel, +} from '@renderer/utils/openCodeModelRecommendations'; +import type { TeamProviderId } from '@shared/types'; + +export type TeamModelRecommendationLevel = OpenCodeTeamModelRecommendationLevel; +export type TeamModelRecommendation = OpenCodeTeamModelRecommendation; + +const CODEX_TEAM_RECOMMENDED_MODELS = new Set(['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']); + +const CODEX_RECOMMENDED_REASON = + 'This Codex model passed real Agent Teams launch and task-flow stress testing and is selected for stable team-agent behavior.'; + +const ANTHROPIC_RECOMMENDED_REASON = + 'This Claude model passed real Agent Teams launch, restart, and teammate-workflow stress testing.'; + +function normalizeTeamModelId(modelId: string | null | undefined): string { + return modelId?.trim().toLowerCase() ?? ''; +} + +function getRecommendedRecommendation(reason: string): TeamModelRecommendation { + return { + level: 'recommended', + label: 'Recommended', + reason, + }; +} + +export function getTeamModelRecommendation( + providerId: TeamProviderId, + modelId: string | null | undefined +): TeamModelRecommendation | null { + const normalizedModelId = normalizeTeamModelId(modelId); + if (!normalizedModelId) { + return null; + } + + if (providerId === 'opencode') { + return getOpenCodeTeamModelRecommendation(normalizedModelId); + } + + if (providerId === 'codex' && CODEX_TEAM_RECOMMENDED_MODELS.has(normalizedModelId)) { + return getRecommendedRecommendation(CODEX_RECOMMENDED_REASON); + } + + if (providerId === 'anthropic' && isSupportedAnthropicTeamModel(normalizedModelId)) { + return getRecommendedRecommendation(ANTHROPIC_RECOMMENDED_REASON); + } + + return null; +} + +export function isTeamModelRecommended( + providerId: TeamProviderId, + modelId: string | null | undefined +): boolean { + const recommendation = getTeamModelRecommendation(providerId, modelId); + return ( + recommendation?.level === 'recommended' || recommendation?.level === 'recommended-with-limits' + ); +} + +function getTeamModelRecommendationSortRank( + providerId: TeamProviderId, + modelId: string | null | undefined +): number { + if (providerId === 'opencode') { + return getOpenCodeTeamModelRecommendationSortRank(modelId); + } + + const recommendation = getTeamModelRecommendation(providerId, modelId); + if (recommendation?.level === 'recommended') { + return 0; + } + return 4; +} + +export function compareTeamModelRecommendations( + providerId: TeamProviderId, + leftModelId: string | null | undefined, + rightModelId: string | null | undefined +): number { + const leftRank = getTeamModelRecommendationSortRank(providerId, leftModelId); + const rightRank = getTeamModelRecommendationSortRank(providerId, rightModelId); + if (leftRank !== rightRank) { + return leftRank - rightRank; + } + return 0; +} diff --git a/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.json b/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.json index dd856243..994d483b 100644 --- a/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.json +++ b/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.json @@ -1,9 +1,9 @@ { - "generatedAt": "2026-05-08T22:48:31.416Z", - "runsPerModel": 1, + "generatedAt": "2026-05-09T23:16:07.760Z", + "runsPerModel": 3, "qualification": { - "minimumAverageScore": 80, - "minimumSuccessfulRuns": 1, + "minimumAverageScore": 90, + "minimumSuccessfulRuns": 3, "minimumConsistencyScore": 85, "requireNoHardFailures": true }, @@ -11,93 +11,93 @@ { "model": "opencode/big-pickle", "verdict": "recommended", - "confidence": "low", + "confidence": "high", "qualified": true, "readinessScore": 100, "averageScore": 100, "consistencyScore": 100, "behavioralAverageScore": 100, "minScore": 100, - "successfulRuns": 1, - "countedRuns": 1, + "successfulRuns": 3, + "countedRuns": 3, "hardFailures": 0, "providerInfraFailures": 0, "runtimeTransportFailures": 0, "modelBehaviorFailures": 0, "harnessFailures": 0, - "p50DurationMs": 118757, - "p95DurationMs": 118757, + "p50DurationMs": 112355, + "p95DurationMs": 116891, "stagePassRates": { "launchBootstrap": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "directReply": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "peerRelayAB": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "peerRelayBC": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "concurrentReplies": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "taskRefs": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "cleanTranscript": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "noDuplicateTokens": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "latencyStable": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, "taskRefPassRates": { "directReply": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "peerRelayAB": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "peerRelayBC": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "concurrentBob": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 }, "concurrentTom": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -112,8 +112,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -122,8 +122,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -132,8 +132,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -142,8 +142,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -152,8 +152,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -162,8 +162,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -172,8 +172,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -182,8 +182,8 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } }, @@ -192,14 +192,14 @@ "failedRuns": 0, "weightedLoss": 0, "passRate": { - "passed": 1, - "total": 1, + "passed": 3, + "total": 3, "rate": 100 } } ], "scoreStability": { - "sampleSize": 1, + "sampleSize": 3, "minScore": 100, "maxScore": 100, "spread": 0, @@ -217,16 +217,16 @@ "outcome": "passed", "failureCategory": "none", "primaryFailure": null, - "durationMs": 118757, + "durationMs": 112344, "hardFailure": false, "stageDurationsMs": { - "setup": 225, - "launchBootstrap": 20591, - "materializeTasks": 36, - "directReply": 14820, - "peerRelayAB": 32039, - "peerRelayBC": 27306, - "concurrentReplies": 15426, + "setup": 183, + "launchBootstrap": 19933, + "materializeTasks": 35, + "directReply": 15430, + "peerRelayAB": 25001, + "peerRelayBC": 28154, + "concurrentReplies": 15551, "hygiene": 1 }, "stageFailures": {}, @@ -253,7 +253,455 @@ "latencyStable": true }, "diagnostics": [ - "runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1" + "runId=d9d27eb0-2798-4980-a0fa-f082a6edd705" + ] + }, + { + "runIndex": 2, + "passed": true, + "score": 100, + "countedForRecommendation": true, + "outcome": "passed", + "failureCategory": "none", + "primaryFailure": null, + "durationMs": 112355, + "hardFailure": false, + "stageDurationsMs": { + "setup": 11, + "launchBootstrap": 18682, + "materializeTasks": 36, + "directReply": 15126, + "peerRelayAB": 24835, + "peerRelayBC": 28580, + "concurrentReplies": 17164, + "hygiene": 1 + }, + "stageFailures": {}, + "taskRefChecks": { + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentBob": true, + "concurrentTom": true + }, + "protocolViolations": { + "badMessages": 0, + "duplicateOrMissingTokens": [] + }, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentReplies": true, + "taskRefs": true, + "cleanTranscript": true, + "noDuplicateTokens": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=97364154-e06d-460c-94ae-65b73cb1b6f9" + ] + }, + { + "runIndex": 3, + "passed": true, + "score": 100, + "countedForRecommendation": true, + "outcome": "passed", + "failureCategory": "none", + "primaryFailure": null, + "durationMs": 116891, + "hardFailure": false, + "stageDurationsMs": { + "setup": 8, + "launchBootstrap": 18926, + "materializeTasks": 31, + "directReply": 17061, + "peerRelayAB": 27842, + "peerRelayBC": 27262, + "concurrentReplies": 15437, + "hygiene": 1 + }, + "stageFailures": {}, + "taskRefChecks": { + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentBob": true, + "concurrentTom": true + }, + "protocolViolations": { + "badMessages": 0, + "duplicateOrMissingTokens": [] + }, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentReplies": true, + "taskRefs": true, + "cleanTranscript": true, + "noDuplicateTokens": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671" + ] + } + ] + }, + { + "model": "opencode/minimax-m2.5-free", + "verdict": "strong-candidate", + "confidence": "high", + "qualified": false, + "readinessScore": 88.6, + "averageScore": 98.3, + "consistencyScore": 93.1, + "behavioralAverageScore": 98.3, + "minScore": 95, + "successfulRuns": 2, + "countedRuns": 3, + "hardFailures": 1, + "providerInfraFailures": 0, + "runtimeTransportFailures": 0, + "modelBehaviorFailures": 1, + "harnessFailures": 0, + "p50DurationMs": 108862, + "p95DurationMs": 118757, + "stagePassRates": { + "launchBootstrap": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "directReply": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "peerRelayAB": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "peerRelayBC": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "concurrentReplies": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "taskRefs": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "cleanTranscript": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "noDuplicateTokens": { + "passed": 2, + "total": 3, + "rate": 66.7 + }, + "latencyStable": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + "taskRefPassRates": { + "directReply": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "peerRelayAB": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "peerRelayBC": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "concurrentBob": { + "passed": 3, + "total": 3, + "rate": 100 + }, + "concurrentTom": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + "protocolViolationTotals": { + "badMessages": 0, + "duplicateOrMissingTokens": 2, + "affectedRuns": 1 + }, + "stageFailureImpact": [ + { + "stage": "noDuplicateTokens", + "failedRuns": 1, + "weightedLoss": 5, + "passRate": { + "passed": 2, + "total": 3, + "rate": 66.7 + } + }, + { + "stage": "cleanTranscript", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "concurrentReplies", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "directReply", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "latencyStable", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "launchBootstrap", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "peerRelayAB", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "peerRelayBC", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + }, + { + "stage": "taskRefs", + "failedRuns": 0, + "weightedLoss": 0, + "passRate": { + "passed": 3, + "total": 3, + "rate": 100 + } + } + ], + "scoreStability": { + "sampleSize": 3, + "minScore": 95, + "maxScore": 100, + "spread": 5, + "standardDeviation": 2.4, + "consistencyScore": 93.1 + }, + "dominantFailureCategory": "model-behavior", + "recommendationBlockers": [ + "successful runs 2 < 3", + "hard failures 1", + "model-behavior failures 1", + "highest weighted stage loss noDuplicateTokens=5", + "protocol violations in 1 runs" + ], + "runs": [ + { + "runIndex": 1, + "passed": true, + "score": 100, + "countedForRecommendation": true, + "outcome": "passed", + "failureCategory": "none", + "primaryFailure": null, + "durationMs": 91530, + "hardFailure": false, + "stageDurationsMs": { + "setup": 10, + "launchBootstrap": 18716, + "materializeTasks": 31, + "directReply": 11557, + "peerRelayAB": 16323, + "peerRelayBC": 27370, + "concurrentReplies": 9606, + "hygiene": 1 + }, + "stageFailures": {}, + "taskRefChecks": { + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentBob": true, + "concurrentTom": true + }, + "protocolViolations": { + "badMessages": 0, + "duplicateOrMissingTokens": [] + }, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentReplies": true, + "taskRefs": true, + "cleanTranscript": true, + "noDuplicateTokens": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e" + ] + }, + { + "runIndex": 2, + "passed": true, + "score": 100, + "countedForRecommendation": true, + "outcome": "passed", + "failureCategory": "none", + "primaryFailure": null, + "durationMs": 108862, + "hardFailure": false, + "stageDurationsMs": { + "setup": 10, + "launchBootstrap": 18359, + "materializeTasks": 35, + "directReply": 7236, + "peerRelayAB": 30664, + "peerRelayBC": 26124, + "concurrentReplies": 18477, + "hygiene": 0 + }, + "stageFailures": {}, + "taskRefChecks": { + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentBob": true, + "concurrentTom": true + }, + "protocolViolations": { + "badMessages": 0, + "duplicateOrMissingTokens": [] + }, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentReplies": true, + "taskRefs": true, + "cleanTranscript": true, + "noDuplicateTokens": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5" + ] + }, + { + "runIndex": 3, + "passed": false, + "score": 95, + "countedForRecommendation": true, + "outcome": "behavioral-fail", + "failureCategory": "model-behavior", + "primaryFailure": "duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3", + "durationMs": 118757, + "hardFailure": true, + "stageDurationsMs": { + "setup": 9, + "launchBootstrap": 19986, + "materializeTasks": 37, + "directReply": 8036, + "peerRelayAB": 37430, + "peerRelayBC": 36219, + "concurrentReplies": 8551, + "hygiene": 0 + }, + "stageFailures": {}, + "taskRefChecks": { + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentBob": true, + "concurrentTom": true + }, + "protocolViolations": { + "badMessages": 0, + "duplicateOrMissingTokens": [ + "GAUNTLET_JACK_USER_OK_3", + "GAUNTLET_TOM_USER_OK_3" + ] + }, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelayAB": true, + "peerRelayBC": true, + "concurrentReplies": true, + "taskRefs": true, + "cleanTranscript": true, + "noDuplicateTokens": false, + "latencyStable": true + }, + "diagnostics": [ + "runId=2b0610e0-7b10-49fc-88dd-ab30b37abce9", + "duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3" ] } ] diff --git a/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.md b/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.md index 79a6cd17..d41c64b7 100644 --- a/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.md +++ b/test-results/opencode-semantic-model-gauntlet/model-gauntlet-results.md @@ -1,9 +1,9 @@ # OpenCode Model Gauntlet Results -Generated: 2026-05-08T22:48:31.416Z +Generated: 2026-05-09T23:16:07.760Z -Runs per model: 1 -Recommended threshold: average >= 80, successful runs >= 1, consistency >= 85, hard failures = 0 +Runs per model: 3 +Recommended threshold: average >= 90, successful runs >= 3, consistency >= 85, hard failures = 0 Provider-infra runs are reported separately and are not counted as model behavior. They still block a Recommended verdict until rerun succeeds. @@ -13,25 +13,50 @@ Scoring weights: launchBootstrap=15, directReply=10, peerRelayAB=15, peerRelayBC | Model | Verdict | Confidence | Readiness | Consistency | Score Spread | Behavior Avg | Overall Avg | Counted | Pass Runs | Weakest Stage | Weakest TaskRef | Dominant Failure | Blockers | Provider Infra | Runtime Transport | Model Fails | Protocol Runs | p50 | p95 | | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | -| `opencode/big-pickle` | Recommended | low | 100 | 100 | 0 | 100 | 100 | 1/1 | 1/1 | cleanTranscript 1/1 (100%) | concurrentBob 1/1 (100%) | none | - | 0 | 0 | 0 | 0 | 118757ms | 118757ms | +| `opencode/big-pickle` | Recommended | high | 100 | 100 | 0 | 100 | 100 | 3/3 | 3/3 | cleanTranscript 3/3 (100%) | concurrentBob 3/3 (100%) | none | - | 0 | 0 | 0 | 0 | 112355ms | 116891ms | +| `opencode/minimax-m2.5-free` | Strong candidate | high | 88.6 | 93.1 | 5 | 98.3 | 98.3 | 3/3 | 2/3 | noDuplicateTokens 2/3 (66.7%) | concurrentBob 3/3 (100%) | model-behavior | successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs | 0 | 0 | 1 | 1 | 108862ms | 118757ms | ## opencode/big-pickle Readiness score: 100. -Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=1. +Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=3. Recommendation blockers: -. Weighted stage impact: -. -Stage pass rates: launchBootstrap:1/1 (100%), directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentReplies:1/1 (100%), taskRefs:1/1 (100%), cleanTranscript:1/1 (100%), noDuplicateTokens:1/1 (100%), latencyStable:1/1 (100%). +Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:3/3 (100%), latencyStable:3/3 (100%). -TaskRef pass rates: directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentBob:1/1 (100%), concurrentTom:1/1 (100%). +TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%). Protocol totals: badMessages=0, duplicateOrMissingTokens=0, affectedRuns=0. | Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics | | ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- | -| 1 | passed | none | 100 | yes | 118757ms | - | peerRelayAB:32039ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1 | +| 1 | passed | none | 100 | yes | 112344ms | - | peerRelayBC:28154ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=d9d27eb0-2798-4980-a0fa-f082a6edd705 | +| 2 | passed | none | 100 | yes | 112355ms | - | peerRelayBC:28580ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=97364154-e06d-460c-94ae-65b73cb1b6f9 | +| 3 | passed | none | 100 | yes | 116891ms | - | peerRelayAB:27842ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671 | + +## opencode/minimax-m2.5-free + +Readiness score: 88.6. + +Score stability: consistency=93.1, min=95, max=100, spread=5, stdDev=2.4, samples=3. + +Recommendation blockers: successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs. + +Weighted stage impact: noDuplicateTokens:loss=5, failed=1, pass=2/3 (66.7%). + +Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:2/3 (66.7%), latencyStable:3/3 (100%). + +TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%). + +Protocol totals: badMessages=0, duplicateOrMissingTokens=2, affectedRuns=1. + +| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics | +| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- | +| 1 | passed | none | 100 | yes | 91530ms | - | peerRelayBC:27370ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e | +| 2 | passed | none | 100 | yes | 108862ms | - | peerRelayAB:30664ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5 | +| 3 | behavioral-fail | model-behavior | 95 | yes | 118757ms | noDuplicateTokens | peerRelayAB:37430ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | token=GAUNTLET_JACK_USER_OK_3+GAUNTLET_TOM_USER_OK_3 | duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3 | diff --git a/test-results/opencode-semantic-model-matrix/report-1778366899222.json b/test-results/opencode-semantic-model-matrix/report-1778366899222.json new file mode 100644 index 00000000..d0660a4a --- /dev/null +++ b/test-results/opencode-semantic-model-matrix/report-1778366899222.json @@ -0,0 +1,62 @@ +{ + "generatedAt": "2026-05-09T22:48:19.222Z", + "models": [ + { + "model": "opencode/big-pickle", + "passed": true, + "score": 100, + "durationMs": 67267, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelay": true, + "taskRefs": true, + "longPrompt": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=f44e2547-3689-41a1-9a0a-75d38bdb1097", + "directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"ad750c26-d9bd-4028-9936-754cbf7aef7b\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\",\"opencode_message_send_tool_error_inline_observe\"]}", + "peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"visible_reply_destination_not_found_yet\",\"visibleReplyCorrelation\":\"direct_child_message_send\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}" + ] + }, + { + "model": "opencode/minimax-m2.5-free", + "passed": true, + "score": 100, + "durationMs": 66426, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelay": true, + "taskRefs": true, + "longPrompt": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=1659a3ab-ba64-432b-95ec-ab1d88371ebf", + "directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"f03532ac-cccf-450d-a951-63a98d02125d\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}", + "peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}" + ] + }, + { + "model": "opencode/nemotron-3-super-free", + "passed": true, + "score": 100, + "durationMs": 77760, + "stages": { + "launchBootstrap": true, + "directReply": true, + "peerRelay": true, + "taskRefs": true, + "longPrompt": true, + "latencyStable": true + }, + "diagnostics": [ + "runId=0bd03f13-bac1-4220-96cc-7753944e4ff0", + "directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"247aa6f8-3c53-4353-b9b2-0e3cdc7d5b34\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\"]}", + "peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}" + ] + } + ] +} diff --git a/test/main/services/team/TeamLogSourceTracker.test.ts b/test/main/services/team/TeamLogSourceTracker.test.ts index 7f5e0f0a..c36e724f 100644 --- a/test/main/services/team/TeamLogSourceTracker.test.ts +++ b/test/main/services/team/TeamLogSourceTracker.test.ts @@ -2,7 +2,7 @@ import { createHash } from 'crypto'; import { mkdtemp, mkdir, rm, stat, writeFile } from 'fs/promises'; import { tmpdir } from 'os'; import * as path from 'path'; -import { afterEach, describe, expect, it, vi } from 'vitest'; +import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest'; import { shouldIgnoreLogSourceWatcherPath, @@ -12,6 +12,9 @@ import { import type { TeamMemberLogsFinder } from '../../../../src/main/services/team/TeamMemberLogsFinder'; import type { TeamChangeEvent } from '../../../../src/shared/types'; +const originalChokidarUsePolling = process.env.CHOKIDAR_USEPOLLING; +const originalChokidarInterval = process.env.CHOKIDAR_INTERVAL; + function safeTaskIdSegment(taskId: string): string { return `task-id-${createHash('sha256').update(taskId).digest('hex').slice(0, 32)}`; } @@ -19,6 +22,11 @@ function safeTaskIdSegment(taskId: string): string { describe('TeamLogSourceTracker', () => { let tempDir: string | null = null; + beforeAll(() => { + process.env.CHOKIDAR_USEPOLLING = '1'; + process.env.CHOKIDAR_INTERVAL = '25'; + }); + afterEach(async () => { if (tempDir) { await rm(tempDir, { recursive: true, force: true }); @@ -26,6 +34,19 @@ describe('TeamLogSourceTracker', () => { } }); + afterAll(() => { + if (originalChokidarUsePolling === undefined) { + delete process.env.CHOKIDAR_USEPOLLING; + } else { + process.env.CHOKIDAR_USEPOLLING = originalChokidarUsePolling; + } + if (originalChokidarInterval === undefined) { + delete process.env.CHOKIDAR_INTERVAL; + } else { + process.env.CHOKIDAR_INTERVAL = originalChokidarInterval; + } + }); + it('emits task-log-change for matching runtime freshness signals without broad log-source-change', async () => { tempDir = await mkdtemp(path.join(tmpdir(), 'team-log-source-tracker-')); diff --git a/test/renderer/components/team/TeamModelSelectorDisabledState.test.ts b/test/renderer/components/team/TeamModelSelectorDisabledState.test.ts index d69721dd..3114124d 100644 --- a/test/renderer/components/team/TeamModelSelectorDisabledState.test.ts +++ b/test/renderer/components/team/TeamModelSelectorDisabledState.test.ts @@ -285,7 +285,7 @@ describe('TeamModelSelector disabled Codex models', () => { expect(host.textContent).toContain('mistralai/codestral-2508'); expect(host.textContent).toContain('Tested'); expect(host.textContent).toContain('minimax-m2.5-free'); - expect(host.textContent).toContain('Recommended with limits'); + expect(host.textContent).toContain('Tested with limits'); expect(host.textContent).toContain('openai/gpt-oss-120b:free'); expect(host.textContent).toContain('big-pickle'); expect(host.textContent).toContain('qwen/qwen3-coder-plus'); @@ -313,8 +313,8 @@ describe('TeamModelSelector disabled Codex models', () => { expect(limitedIndex).toBeGreaterThanOrEqual(0); expect(testedIndex).toBeGreaterThanOrEqual(0); expect(limitedIndex).toBeGreaterThan(recommendedIndex); - expect(testedIndex).toBeGreaterThan(limitedIndex); - expect(unavailableIndex).toBeGreaterThan(testedIndex); + expect(testedIndex).toBeGreaterThan(recommendedIndex); + expect(unavailableIndex).toBeGreaterThan(limitedIndex); expect(notRecommendedIndex).toBeGreaterThan(unavailableIndex); expect(host.textContent).toContain('Recommended only'); @@ -325,6 +325,223 @@ describe('TeamModelSelector disabled Codex models', () => { }); }); + it('shows short-lived OpenCode preflight failures as unavailable model tiles', async () => { + vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); + storeState.cliStatus = { + flavor: 'agent_teams_orchestrator', + providers: [ + { + providerId: 'opencode', + authMethod: 'opencode_managed', + backend: { + kind: 'opencode-cli', + label: 'OpenCode CLI', + endpointLabel: 'opencode', + }, + authenticated: true, + supported: true, + capabilities: { + teamLaunch: true, + }, + models: ['openai/gpt-5.4', 'opencode/big-pickle'], + modelVerificationState: 'idle', + modelAvailability: [], + }, + ], + }; + + const host = document.createElement('div'); + document.body.appendChild(host); + const root = createRoot(host); + const onValueChange = vi.fn(); + + await act(async () => { + root.render( + React.createElement(TeamModelSelector, { + providerId: 'opencode', + onProviderChange: () => undefined, + value: '', + onValueChange, + modelUnavailableReasonByValue: { + 'openai/gpt-5.4': 'OpenCode provider authentication failed', + }, + }) + ); + await Promise.resolve(); + }); + + const unavailableButton = Array.from(host.querySelectorAll('button')).find((button) => + button.textContent?.includes('GPT-5.4') + ); + expect(unavailableButton).not.toBeNull(); + expect(unavailableButton?.getAttribute('aria-disabled')).toBe('true'); + expect(unavailableButton?.textContent).toContain('Unavailable'); + expect(unavailableButton?.getAttribute('title')).toContain( + 'OpenCode provider authentication failed' + ); + + await act(async () => { + unavailableButton?.dispatchEvent(new MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect(onValueChange).not.toHaveBeenCalled(); + + await act(async () => { + root.unmount(); + await Promise.resolve(); + }); + }); + + it('shows short-lived OpenCode preflight notes as selectable issue tiles', async () => { + vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); + storeState.cliStatus = { + flavor: 'agent_teams_orchestrator', + providers: [ + { + providerId: 'opencode', + authMethod: 'opencode_managed', + backend: { + kind: 'opencode-cli', + label: 'OpenCode CLI', + endpointLabel: 'opencode', + }, + authenticated: true, + supported: true, + capabilities: { + teamLaunch: true, + }, + models: ['openai/gpt-5.4', 'opencode/big-pickle'], + modelVerificationState: 'idle', + modelAvailability: [], + }, + ], + }; + + const host = document.createElement('div'); + document.body.appendChild(host); + const root = createRoot(host); + const onValueChange = vi.fn(); + + await act(async () => { + root.render( + React.createElement(TeamModelSelector, { + providerId: 'opencode', + onProviderChange: () => undefined, + value: '', + onValueChange, + modelIssueReasonByValue: { + 'openai/gpt-5.4': 'Model verification timed out', + }, + }) + ); + await Promise.resolve(); + }); + + const issueButton = Array.from(host.querySelectorAll('button')).find((button) => + button.textContent?.includes('GPT-5.4') + ); + expect(issueButton).not.toBeNull(); + expect(issueButton?.getAttribute('aria-disabled')).toBe('false'); + expect(issueButton?.textContent).toContain('Issue'); + expect(issueButton?.getAttribute('title')).toContain('Model verification timed out'); + + await act(async () => { + issueButton?.dispatchEvent(new MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect(onValueChange).toHaveBeenCalledWith('openai/gpt-5.4'); + + await act(async () => { + root.unmount(); + await Promise.resolve(); + }); + }); + + it('dynamically disables OpenCode openai routes when OpenAI auth is invalid', async () => { + vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); + storeState.cliStatus = { + flavor: 'agent_teams_orchestrator', + providers: [ + { + providerId: 'opencode', + authMethod: 'opencode_managed', + backend: { + kind: 'opencode-cli', + label: 'OpenCode CLI', + endpointLabel: 'opencode', + }, + authenticated: true, + supported: true, + capabilities: { + teamLaunch: true, + }, + statusMessage: 'OpenAI token invalid', + detailMessage: 'OpenAI token refresh failed: 401', + models: ['openai/gpt-5.4', 'opencode/big-pickle'], + availableBackends: [ + { + id: 'openai', + label: 'OpenAI', + description: 'OpenAI route', + selectable: false, + recommended: false, + available: false, + state: 'authentication-required', + statusMessage: 'Authentication required', + detailMessage: 'Token refresh failed: 401', + }, + ], + modelVerificationState: 'idle', + modelAvailability: [], + }, + ], + }; + + const host = document.createElement('div'); + document.body.appendChild(host); + const root = createRoot(host); + const onValueChange = vi.fn(); + + await act(async () => { + root.render( + React.createElement(TeamModelSelector, { + providerId: 'opencode', + onProviderChange: () => undefined, + value: '', + onValueChange, + }) + ); + await Promise.resolve(); + }); + + const openAiButton = Array.from(host.querySelectorAll('button')).find((button) => + button.textContent?.includes('GPT-5.4') + ); + const bigPickleButton = Array.from(host.querySelectorAll('button')).find((button) => + button.textContent?.includes('big-pickle') + ); + + expect(openAiButton).not.toBeNull(); + expect(openAiButton?.getAttribute('aria-disabled')).toBe('true'); + expect(openAiButton?.textContent).toContain('Unavailable'); + expect(bigPickleButton).not.toBeNull(); + expect(bigPickleButton?.getAttribute('aria-disabled')).toBe('false'); + + await act(async () => { + openAiButton?.dispatchEvent(new MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect(onValueChange).not.toHaveBeenCalled(); + + await act(async () => { + root.unmount(); + await Promise.resolve(); + }); + }); + it('constrains long runtime model lists so the selector scrolls', async () => { vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); storeState.cliStatus = { @@ -842,13 +1059,15 @@ describe('TeamModelSelector disabled Codex models', () => { const modelButtons = Array.from(host.querySelectorAll('button')).map( (button) => button.textContent?.trim() ?? '' ); + const hasModelButtonStartingWith = (label: string): boolean => + modelButtons.some((text) => text.startsWith(label)); - expect(modelButtons.some((text) => text.startsWith('Default'))).toBe(true); - expect(modelButtons).toContain('Opus 4.8'); - expect(modelButtons).toContain('Opus 4.6'); - expect(modelButtons).toContain('Sonnet 4.7'); - expect(modelButtons).toContain('Haiku 4.6'); - expect(modelButtons).not.toContain('Opus 4.8 (1M)'); + expect(hasModelButtonStartingWith('Default')).toBe(true); + expect(hasModelButtonStartingWith('Opus 4.8')).toBe(true); + expect(hasModelButtonStartingWith('Opus 4.6')).toBe(true); + expect(hasModelButtonStartingWith('Sonnet 4.7')).toBe(true); + expect(hasModelButtonStartingWith('Haiku 4.6')).toBe(true); + expect(hasModelButtonStartingWith('Opus 4.8 (1M)')).toBe(false); await act(async () => { root.unmount(); diff --git a/test/renderer/components/team/dialogs/providerPrepareShortLivedCache.test.ts b/test/renderer/components/team/dialogs/providerPrepareShortLivedCache.test.ts index e2a30896..a2c21fb3 100644 --- a/test/renderer/components/team/dialogs/providerPrepareShortLivedCache.test.ts +++ b/test/renderer/components/team/dialogs/providerPrepareShortLivedCache.test.ts @@ -2,6 +2,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import { __resetShortLivedProviderPrepareCacheForTests, + getShortLivedProviderPrepareModelIssueReasons, getShortLivedProviderPrepareModelResults, storeShortLivedProviderPrepareModelResults, } from '@renderer/components/team/dialogs/providerPrepareShortLivedCache'; @@ -42,6 +43,17 @@ describe('providerPrepareShortLivedCache', () => { warningLine: null, }, }); + expect( + getShortLivedProviderPrepareModelIssueReasons({ + providerId: 'opencode', + cacheKey: 'key-1', + }) + ).toEqual({ + modelIssueReasonByValue: { + 'opencode/nemotron-3-super-free': 'timed out', + }, + modelUnavailableReasonByValue: {}, + }); }); it('expires cached OpenCode results after the short-lived TTL', () => { @@ -68,6 +80,100 @@ describe('providerPrepareShortLivedCache', () => { ).toEqual({}); }); + it('stores short-lived OpenCode failed model results as blocking unavailable issues', () => { + storeShortLivedProviderPrepareModelResults({ + providerId: 'opencode', + cacheKey: 'key-4', + modelResultsById: { + 'openai/gpt-5.4': { + status: 'failed', + line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed', + warningLine: null, + }, + }, + }); + + expect( + getShortLivedProviderPrepareModelResults({ + providerId: 'opencode', + cacheKey: 'key-4', + }) + ).toEqual({}); + expect( + getShortLivedProviderPrepareModelIssueReasons({ + providerId: 'opencode', + cacheKey: 'key-4', + }) + ).toEqual({ + modelIssueReasonByValue: {}, + modelUnavailableReasonByValue: { + 'openai/gpt-5.4': 'OpenCode provider authentication failed', + }, + }); + }); + + it('clears a short-lived issue when a later result verifies the same model', () => { + storeShortLivedProviderPrepareModelResults({ + providerId: 'opencode', + cacheKey: 'key-5', + modelResultsById: { + 'openai/gpt-5.4': { + status: 'failed', + line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed', + warningLine: null, + }, + }, + }); + storeShortLivedProviderPrepareModelResults({ + providerId: 'opencode', + cacheKey: 'key-5', + modelResultsById: { + 'openai/gpt-5.4': { + status: 'ready', + line: 'GPT-5.4 - verified', + warningLine: null, + }, + }, + }); + + expect( + getShortLivedProviderPrepareModelIssueReasons({ + providerId: 'opencode', + cacheKey: 'key-5', + }) + ).toEqual({ + modelIssueReasonByValue: {}, + modelUnavailableReasonByValue: {}, + }); + }); + + it('expires short-lived OpenCode issues after the issue TTL', () => { + vi.useFakeTimers(); + storeShortLivedProviderPrepareModelResults({ + providerId: 'opencode', + cacheKey: 'key-6', + modelResultsById: { + 'openai/gpt-5.4': { + status: 'failed', + line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed', + warningLine: null, + }, + }, + }); + + vi.advanceTimersByTime(90_001); + + expect( + getShortLivedProviderPrepareModelIssueReasons({ + providerId: 'opencode', + cacheKey: 'key-6', + }) + ).toEqual({ + modelIssueReasonByValue: {}, + modelUnavailableReasonByValue: {}, + }); + }); + it('does not store short-lived cache for non-OpenCode providers', () => { storeShortLivedProviderPrepareModelResults({ providerId: 'codex', @@ -87,5 +193,14 @@ describe('providerPrepareShortLivedCache', () => { cacheKey: 'key-3', }) ).toEqual({}); + expect( + getShortLivedProviderPrepareModelIssueReasons({ + providerId: 'codex', + cacheKey: 'key-3', + }) + ).toEqual({ + modelIssueReasonByValue: {}, + modelUnavailableReasonByValue: {}, + }); }); }); diff --git a/test/renderer/features/runtime-provider-management/RuntimeProviderManagementPanelView.test.ts b/test/renderer/features/runtime-provider-management/RuntimeProviderManagementPanelView.test.ts index 6978000f..8dcdae41 100644 --- a/test/renderer/features/runtime-provider-management/RuntimeProviderManagementPanelView.test.ts +++ b/test/renderer/features/runtime-provider-management/RuntimeProviderManagementPanelView.test.ts @@ -643,7 +643,7 @@ describe('RuntimeProviderManagementPanelView', () => { expect(host.textContent).toContain('Not recommended'); expect(host.textContent).toContain('Unavailable in OpenCode'); expect(host.textContent).toContain('Tested'); - expect(host.textContent).toContain('Recommended with limits'); + expect(host.textContent).toContain('Tested with limits'); expect(host.textContent).toContain('Recommended only'); expect(host.textContent).not.toContain('Set OpenCode default'); expect( @@ -687,8 +687,11 @@ describe('RuntimeProviderManagementPanelView', () => { expect((host.textContent ?? '').indexOf('opencode/big-pickle')).toBeLessThan( (host.textContent ?? '').indexOf('minimax-m2.5-free') ); + expect((host.textContent ?? '').indexOf('mistralai/codestral-2508')).toBeLessThan( + (host.textContent ?? '').indexOf('minimax-m2.5-free') + ); expect((host.textContent ?? '').indexOf('minimax-m2.5-free')).toBeLessThan( - (host.textContent ?? '').indexOf('mistralai/codestral-2508') + (host.textContent ?? '').indexOf('qwen/qwen3-coder-plus') ); expect((host.textContent ?? '').indexOf('qwen/qwen3-coder-plus')).toBeLessThan( (host.textContent ?? '').indexOf('openrouter/openai/gpt-oss-20b:free') diff --git a/test/renderer/utils/openCodeModelRecommendations.test.ts b/test/renderer/utils/openCodeModelRecommendations.test.ts index 804a8767..78b4b95e 100644 --- a/test/renderer/utils/openCodeModelRecommendations.test.ts +++ b/test/renderer/utils/openCodeModelRecommendations.test.ts @@ -26,10 +26,12 @@ describe('getOpenCodeTeamModelRecommendation', () => { }); it('marks models that passed real OpenCode Agent Teams smoke E2E as tested', () => { - expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject({ - level: 'tested', - label: 'Tested', - }); + expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject( + { + level: 'tested', + label: 'Tested', + } + ); expect( getOpenCodeTeamModelRecommendation(' OPENROUTER/GOOGLE/GEMINI-3-FLASH-PREVIEW ') ).toMatchObject({ @@ -101,10 +103,11 @@ describe('getOpenCodeTeamModelRecommendation', () => { it('keeps similarly named models distinct when real E2E disagreed', () => { expect(getOpenCodeTeamModelRecommendation('opencode/minimax-m2.5-free')).toMatchObject({ - level: 'recommended-with-limits', - label: 'Recommended with limits', + level: 'tested-with-limits', + label: 'Tested with limits', + reason: expect.stringContaining('duplicate or missing reply tokens'), }); - expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(true); + expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(false); expect( getOpenCodeTeamModelRecommendation('openrouter/minimax/minimax-m2.5:free') ).toMatchObject({ @@ -815,9 +818,9 @@ describe('getOpenCodeTeamModelRecommendation', () => { [...models].sort((left, right) => compareOpenCodeTeamModelRecommendations(left, right)) ).toEqual([ 'opencode/big-pickle', - 'opencode/minimax-m2.5-free', 'openrouter/mistralai/codestral-2508', 'openrouter/anthropic/claude-sonnet-4.6', + 'opencode/minimax-m2.5-free', 'openrouter/qwen/qwen3-coder-plus', 'openrouter/openai/gpt-oss-20b:free', ]); diff --git a/test/renderer/utils/teamModelAvailability.test.ts b/test/renderer/utils/teamModelAvailability.test.ts index cb7f3540..cba8f8c3 100644 --- a/test/renderer/utils/teamModelAvailability.test.ts +++ b/test/renderer/utils/teamModelAvailability.test.ts @@ -126,6 +126,34 @@ describe('teamModelAvailability', () => { ]); }); + it('treats runtime-reported unavailable models as non-selectable', () => { + const providerStatus = createCodexProviderStatus(['gpt-5.4'], { + modelAvailability: [ + { + modelId: 'gpt-5.4', + status: 'unavailable', + reason: 'No access for this account', + checkedAt: null, + }, + ], + }); + + expect(getAvailableTeamProviderModels('codex', providerStatus)).toEqual([]); + expect(normalizeTeamModelForUi('codex', 'gpt-5.4', providerStatus)).toBe(''); + expect(getTeamModelSelectionError('codex', 'gpt-5.4', providerStatus)).toContain( + 'No access for this account' + ); + expect(getAvailableTeamProviderModelOptions('codex', providerStatus)).toEqual([ + { value: '', label: 'Default', badgeLabel: 'Default' }, + { + value: 'gpt-5.4', + label: '5.4', + availabilityStatus: 'unavailable', + availabilityReason: 'No access for this account', + }, + ]); + }); + it('keeps OpenCode raw ids intact while exposing readable labels and source badges', () => { const providerStatus = createOpenCodeProviderStatus([ 'openai/gpt-5.4', @@ -168,6 +196,33 @@ describe('teamModelAvailability', () => { ).toBe('openrouter/moonshotai/kimi-k2'); }); + it('reports OpenCode openai routes unavailable when OpenAI auth is invalid', () => { + const providerStatus = createOpenCodeProviderStatus(['openai/gpt-5.4', 'opencode/big-pickle'], { + statusMessage: 'OpenAI token invalid', + detailMessage: 'OpenAI token refresh failed: 401', + availableBackends: [ + { + id: 'openai', + label: 'OpenAI', + description: 'OpenAI route', + selectable: false, + recommended: false, + available: false, + state: 'authentication-required', + statusMessage: 'Authentication required', + detailMessage: 'Token refresh failed: 401', + }, + ], + }); + + expect(getTeamModelSelectionError('opencode', 'openai/gpt-5.4', providerStatus)).toContain( + 'OpenCode OpenAI provider authentication failed' + ); + expect( + getTeamModelSelectionError('opencode', 'opencode/big-pickle', providerStatus) + ).toBeNull(); + }); + it('clears stale Codex selections when runtime no longer reports that model', () => { const providerStatus = createCodexProviderStatus(['gpt-5.4', 'gpt-5.3-codex']); @@ -304,9 +359,7 @@ describe('teamModelAvailability', () => { it('keeps known Anthropic full model ids selectable without runtime verification', () => { expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7')).toBe('claude-opus-4-7'); - expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe( - 'claude-opus-4-7[1m]' - ); + expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe('claude-opus-4-7[1m]'); expect(normalizeTeamModelForUi('anthropic', 'claude-haiku-4-5-20251001')).toBe( 'claude-haiku-4-5-20251001' ); diff --git a/test/renderer/utils/teamModelRecommendations.test.ts b/test/renderer/utils/teamModelRecommendations.test.ts new file mode 100644 index 00000000..030f7d46 --- /dev/null +++ b/test/renderer/utils/teamModelRecommendations.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from 'vitest'; + +import { + getTeamModelRecommendation, + isTeamModelRecommended, +} from '@renderer/utils/teamModelRecommendations'; + +describe('getTeamModelRecommendation', () => { + it('marks only the selected Codex Agent Teams models as recommended', () => { + for (const modelId of ['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']) { + expect(getTeamModelRecommendation('codex', modelId)).toMatchObject({ + level: 'recommended', + label: 'Recommended', + }); + expect(isTeamModelRecommended('codex', modelId)).toBe(true); + } + + for (const modelId of ['gpt-5.4', 'gpt-5.2', 'gpt-5.3-codex-spark']) { + expect(getTeamModelRecommendation('codex', modelId)).toBeNull(); + expect(isTeamModelRecommended('codex', modelId)).toBe(false); + } + }); + + it('marks supported Claude aliases and full ids as recommended but leaves default unbadged', () => { + for (const modelId of [ + 'haiku', + 'sonnet', + 'sonnet[1m]', + 'opus', + 'opus[1m]', + 'claude-haiku-4-5', + 'claude-haiku-4-5-20251001', + 'claude-sonnet-4-6', + 'claude-sonnet-4-6[1m]', + 'claude-opus-4-7', + 'claude-opus-4-7[1m]', + 'claude-opus-4-6', + 'claude-opus-4-6[1m]', + ]) { + expect(getTeamModelRecommendation('anthropic', modelId)).toMatchObject({ + level: 'recommended', + label: 'Recommended', + }); + expect(isTeamModelRecommended('anthropic', modelId)).toBe(true); + } + + expect(getTeamModelRecommendation('anthropic', '')).toBeNull(); + expect(getTeamModelRecommendation('anthropic', 'default')).toBeNull(); + }); + + it('delegates OpenCode verdicts and keeps MiniMax below recommended', () => { + expect(getTeamModelRecommendation('opencode', 'opencode/big-pickle')).toMatchObject({ + level: 'recommended', + label: 'Recommended', + }); + expect(isTeamModelRecommended('opencode', 'opencode/big-pickle')).toBe(true); + + expect(getTeamModelRecommendation('opencode', 'opencode/minimax-m2.5-free')).toMatchObject({ + level: 'tested-with-limits', + label: 'Tested with limits', + }); + expect(isTeamModelRecommended('opencode', 'opencode/minimax-m2.5-free')).toBe(false); + }); +});