feat: improve team model availability diagnostics

This commit is contained in:
777genius 2026-05-10 10:11:44 +03:00
parent d0cfabca48
commit 63b89fcd39
25 changed files with 1632 additions and 150 deletions

View file

@ -1 +0,0 @@
{"taskId":"351e2899-3aba-4992-9250-bf85dccb4399","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T07:59:53.638Z"}

View file

@ -1 +0,0 @@
{"taskId":"351e2899","teamName":"ember-collective","provider":"codex","source":"codex-native-trace","updatedAt":"2026-05-09T08:00:39.185Z"}

3
.gitignore vendored
View file

@ -48,4 +48,5 @@ eslint-fix/
.eslintcache
remotion/*
.home/
.home/
.board-task-log-freshness/

View file

@ -2,6 +2,7 @@ import { mkdir, mkdtemp, rm, writeFile } from 'fs/promises';
import os from 'os';
import path from 'path';
import { afterEach, describe, expect, it } from 'vitest';
import { MemberRuntimeLogTailReader } from '../MemberRuntimeLogTailReader';
const tempDirs: string[] = [];

View file

@ -110,6 +110,7 @@ import {
buildProviderPrepareRuntimeStatusSignature,
} from './providerPrepareRequestSignature';
import {
getShortLivedProviderPrepareModelIssueReasons,
getShortLivedProviderPrepareModelResults,
storeShortLivedProviderPrepareModelResults,
} from './providerPrepareShortLivedCache';
@ -682,6 +683,45 @@ export const CreateTeamDialog = ({
selectedProviderId,
]
);
const shortLivedModelIssueReasons = useMemo(() => {
const modelIssueReasonByProvider: Partial<Record<TeamProviderId, Record<string, string>>> = {};
const modelUnavailableReasonByProvider: Partial<
Record<TeamProviderId, Record<string, string>>
> = {};
for (const providerId of selectedMemberProviders) {
const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null;
const cacheKey = buildProviderPrepareModelCacheKey({
cwd: effectiveCwd,
providerId,
backendSummary,
limitContext: effectiveAnthropicRuntimeLimitContext,
runtimeStatusSignature: prepareRuntimeStatusSignature,
});
const issueReasons = getShortLivedProviderPrepareModelIssueReasons({
providerId,
cacheKey,
});
if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) {
modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue;
}
if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) {
modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue;
}
}
return {
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
};
}, [
effectiveAnthropicRuntimeLimitContext,
effectiveCwd,
prepareChecks,
prepareRuntimeStatusSignature,
runtimeBackendSummaryByProvider,
selectedMemberProviders,
]);
useEffect(() => {
if (multimodelEnabled) {
@ -1860,6 +1900,10 @@ export const CreateTeamDialog = ({
leadModelIssueText={leadModelIssueText}
memberWarningById={teammateRuntimeCompatibility.memberWarningById}
memberModelIssueById={memberModelIssueById}
modelIssueReasonByProvider={shortLivedModelIssueReasons.modelIssueReasonByProvider}
modelUnavailableReasonByProvider={
shortLivedModelIssueReasons.modelUnavailableReasonByProvider
}
headerTop={
<div className="flex items-center gap-2">
<Checkbox

View file

@ -115,6 +115,7 @@ import {
buildProviderPrepareRuntimeStatusSignature,
} from './providerPrepareRequestSignature';
import {
getShortLivedProviderPrepareModelIssueReasons,
getShortLivedProviderPrepareModelResults,
storeShortLivedProviderPrepareModelResults,
} from './providerPrepareShortLivedCache';
@ -1417,6 +1418,53 @@ export const LaunchTeamDialog = (props: LaunchTeamDialogProps): React.JSX.Elemen
selectedProviderId,
]
);
const shortLivedModelIssueReasons = useMemo(() => {
const modelIssueReasonByProvider: Partial<Record<TeamProviderId, Record<string, string>>> = {};
const modelUnavailableReasonByProvider: Partial<
Record<TeamProviderId, Record<string, string>>
> = {};
if (!isLaunchMode) {
return {
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
};
}
for (const providerId of selectedMemberProviders) {
const backendSummary = runtimeBackendSummaryByProvider.get(providerId) ?? null;
const cacheKey = buildProviderPrepareModelCacheKey({
cwd: effectiveCwd,
providerId,
backendSummary,
limitContext: effectiveAnthropicRuntimeLimitContext,
runtimeStatusSignature: prepareRuntimeStatusSignature,
});
const issueReasons = getShortLivedProviderPrepareModelIssueReasons({
providerId,
cacheKey,
});
if (Object.keys(issueReasons.modelIssueReasonByValue).length > 0) {
modelIssueReasonByProvider[providerId] = issueReasons.modelIssueReasonByValue;
}
if (Object.keys(issueReasons.modelUnavailableReasonByValue).length > 0) {
modelUnavailableReasonByProvider[providerId] = issueReasons.modelUnavailableReasonByValue;
}
}
return {
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
};
}, [
effectiveAnthropicRuntimeLimitContext,
effectiveCwd,
isLaunchMode,
prepareChecks,
prepareRuntimeStatusSignature,
runtimeBackendSummaryByProvider,
selectedMemberProviders,
]);
// Clear stale provisioning error when dialog opens
useEffect(() => {
@ -2563,6 +2611,12 @@ export const LaunchTeamDialog = (props: LaunchTeamDialogProps): React.JSX.Elemen
memberInfoById={memberWorktreeContinuationInfoById}
leadModelIssueText={leadModelIssueText}
memberModelIssueById={memberModelIssueById}
modelIssueReasonByProvider={
shortLivedModelIssueReasons.modelIssueReasonByProvider
}
modelUnavailableReasonByProvider={
shortLivedModelIssueReasons.modelUnavailableReasonByProvider
}
softDeleteMembers
disableGeminiOption={isGeminiUiFrozen()}
headerBottom={

View file

@ -19,13 +19,9 @@ import {
GEMINI_UI_DISABLED_REASON,
isGeminiUiFrozen,
} from '@renderer/utils/geminiUiFreeze';
import {
compareOpenCodeTeamModelRecommendations,
getOpenCodeTeamModelRecommendation,
isOpenCodeTeamModelRecommended,
} from '@renderer/utils/openCodeModelRecommendations';
import {
getAvailableTeamProviderModelOptions,
getOpenCodeOpenAiRouteAuthUnavailableReason,
getTeamModelUiDisabledReason,
isTeamProviderModelVerificationPending,
normalizeTeamModelForUi,
@ -41,6 +37,11 @@ import {
isAnthropicHaikuTeamModel,
} from '@renderer/utils/teamModelCatalog';
import { extractProviderScopedBaseModel } from '@renderer/utils/teamModelContext';
import {
compareTeamModelRecommendations,
getTeamModelRecommendation,
isTeamModelRecommended,
} from '@renderer/utils/teamModelRecommendations';
import { resolveAnthropicLaunchModel } from '@shared/utils/anthropicLaunchModel';
import { getAnthropicDefaultTeamModel } from '@shared/utils/anthropicModelDefaults';
import { isTeamProviderId } from '@shared/utils/teamProvider';
@ -156,6 +157,7 @@ export interface TeamModelSelectorProps {
providerDisabledReasonById?: Partial<Record<TeamProviderId, string | null | undefined>>;
providerDisabledBadgeLabelById?: Partial<Record<TeamProviderId, string | null | undefined>>;
modelIssueReasonByValue?: Partial<Record<string, string | null | undefined>>;
modelUnavailableReasonByValue?: Partial<Record<string, string | null | undefined>>;
}
export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
@ -168,6 +170,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
providerDisabledReasonById,
providerDisabledBadgeLabelById,
modelIssueReasonByValue,
modelUnavailableReasonByValue,
}) => {
const multimodelEnabled = useStore((s) => s.appConfig?.general?.multimodelEnabled ?? true);
const [recommendedOnly, setRecommendedOnly] = useState(false);
@ -315,7 +318,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
const hasRecommendedOpenCodeModels = useMemo(
() =>
effectiveProviderId === 'opencode' &&
modelOptions.some((option) => isOpenCodeTeamModelRecommended(option.value)),
modelOptions.some((option) => isTeamModelRecommended(effectiveProviderId, option.value)),
[effectiveProviderId, modelOptions]
);
@ -335,10 +338,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
if (!normalizedModelQuery) {
return true;
}
const modelRecommendation =
effectiveProviderId === 'opencode'
? getOpenCodeTeamModelRecommendation(option.value)
: null;
const modelRecommendation = getTeamModelRecommendation(effectiveProviderId, option.value);
return [
option.value,
option.label,
@ -358,10 +358,14 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
const concreteOptions = modelOptions
.filter((option) => option.value.trim().length > 0)
.map((option, index) => ({ option, index }))
.filter(({ option }) => !recommendedOnly || isOpenCodeTeamModelRecommended(option.value))
.filter(
({ option }) =>
!recommendedOnly || isTeamModelRecommended(effectiveProviderId, option.value)
)
.filter(({ option }) => matchesModelQuery(option))
.sort((left, right) => {
const recommendationOrder = compareOpenCodeTeamModelRecommendations(
const recommendationOrder = compareTeamModelRecommendations(
effectiveProviderId,
left.option.value,
right.option.value
);
@ -517,25 +521,44 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
opt.value === '' ? 'available' : (opt.availabilityStatus ?? 'available');
const availabilityReason =
opt.value === '' ? null : (opt.availabilityReason ?? null);
const runtimeUnavailableReason =
opt.value !== '' && availabilityStatus === 'unavailable'
? (availabilityReason ?? 'Unavailable in current runtime')
: null;
const modelIssueReason =
opt.value === '' ? null : (modelIssueReasonByValue?.[opt.value] ?? null);
const hasModelIssue = Boolean(modelIssueReason);
const modelUnavailableReason =
opt.value === ''
? null
: (modelUnavailableReasonByValue?.[opt.value] ??
getOpenCodeOpenAiRouteAuthUnavailableReason(
effectiveProviderId,
opt.value,
runtimeProviderStatus
) ??
runtimeUnavailableReason);
const hasModelIssue = Boolean(modelIssueReason || modelUnavailableReason);
const modelSelectable =
activeProviderSelectable &&
!modelUnavailableReason &&
!modelDisabledReason &&
(opt.value === '' ||
availabilityStatus == null ||
availabilityStatus === 'available');
const modelStatusMessage =
modelIssueReason ?? modelDisabledReason ?? availabilityReason ?? null;
modelUnavailableReason ??
modelIssueReason ??
modelDisabledReason ??
availabilityReason ??
null;
const sourceBadgeLabel =
effectiveProviderId === 'opencode' && opt.value !== ''
? opt.badgeLabel?.trim() || null
: null;
const modelRecommendation =
effectiveProviderId === 'opencode'
? getOpenCodeTeamModelRecommendation(opt.value)
: null;
const modelRecommendation = getTeamModelRecommendation(
effectiveProviderId,
opt.value
);
return (
<button
@ -637,10 +660,10 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
{hasModelIssue && (
<span
className="flex items-center justify-center gap-1 text-[10px] font-normal text-red-300"
title={modelIssueReason ?? undefined}
title={modelStatusMessage ?? undefined}
>
<AlertTriangle className="size-3 shrink-0" />
<span>Issue</span>
<span>{modelUnavailableReason ? 'Unavailable' : 'Issue'}</span>
<TooltipProvider delayDuration={200}>
<Tooltip>
<TooltipTrigger
@ -650,7 +673,7 @@ export const TeamModelSelector: React.FC<TeamModelSelectorProps> = ({
<Info className="size-3 shrink-0 opacity-50 transition-opacity hover:opacity-80" />
</TooltipTrigger>
<TooltipContent side="top" className="max-w-[240px] text-xs">
{modelIssueReason}
{modelStatusMessage}
</TooltipContent>
</Tooltip>
</TooltipProvider>

View file

@ -2,6 +2,7 @@ import type { ProviderPrepareDiagnosticsModelResult } from './providerPrepareDia
import type { TeamProviderId } from '@shared/types';
const OPENCODE_DEEP_VERIFY_SUCCESS_CACHE_TTL_MS = 45_000;
const OPENCODE_MODEL_ISSUE_CACHE_TTL_MS = 90_000;
interface ShortLivedProviderPrepareCacheEntry {
expiresAt: number;
@ -9,15 +10,24 @@ interface ShortLivedProviderPrepareCacheEntry {
}
const shortLivedProviderPrepareCache = new Map<string, ShortLivedProviderPrepareCacheEntry>();
const shortLivedProviderPrepareIssueCache = new Map<string, ShortLivedProviderPrepareCacheEntry>();
function pruneExpiredEntries(now: number): void {
for (const [cacheKey, entry] of shortLivedProviderPrepareCache.entries()) {
function pruneExpiredEntries(
cache: Map<string, ShortLivedProviderPrepareCacheEntry>,
now: number
): void {
for (const [cacheKey, entry] of cache.entries()) {
if (entry.expiresAt <= now) {
shortLivedProviderPrepareCache.delete(cacheKey);
cache.delete(cacheKey);
}
}
}
function getIssueReason(result: ProviderPrepareDiagnosticsModelResult): string | null {
const match = /\s-\s(?:unavailable|check failed)(?:\s-\s(.+))?$/i.exec(result.line.trim());
return match?.[1]?.trim() || result.warningLine?.trim() || result.line.trim() || null;
}
export function getShortLivedProviderPrepareModelResults({
providerId,
cacheKey,
@ -30,7 +40,7 @@ export function getShortLivedProviderPrepareModelResults({
}
const now = Date.now();
pruneExpiredEntries(now);
pruneExpiredEntries(shortLivedProviderPrepareCache, now);
const entry = shortLivedProviderPrepareCache.get(cacheKey);
if (!entry) {
return {};
@ -39,6 +49,53 @@ export function getShortLivedProviderPrepareModelResults({
return { ...entry.modelResultsById };
}
export function getShortLivedProviderPrepareModelIssueReasons({
providerId,
cacheKey,
}: {
providerId: TeamProviderId;
cacheKey: string;
}): {
modelIssueReasonByValue: Record<string, string>;
modelUnavailableReasonByValue: Record<string, string>;
} {
if (providerId !== 'opencode') {
return {
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {},
};
}
const now = Date.now();
pruneExpiredEntries(shortLivedProviderPrepareIssueCache, now);
const entry = shortLivedProviderPrepareIssueCache.get(cacheKey);
if (!entry) {
return {
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {},
};
}
const modelIssueReasonByValue: Record<string, string> = {};
const modelUnavailableReasonByValue: Record<string, string> = {};
for (const [modelId, result] of Object.entries(entry.modelResultsById)) {
const reason = getIssueReason(result);
if (!reason) {
continue;
}
if (result.status === 'failed') {
modelUnavailableReasonByValue[modelId] = reason;
} else if (result.status === 'notes') {
modelIssueReasonByValue[modelId] = reason;
}
}
return {
modelIssueReasonByValue,
modelUnavailableReasonByValue,
};
}
export function storeShortLivedProviderPrepareModelResults({
providerId,
cacheKey,
@ -52,25 +109,62 @@ export function storeShortLivedProviderPrepareModelResults({
return;
}
const issueResultsById = Object.fromEntries(
Object.entries(modelResultsById).filter(([, result]) => result.status !== 'ready')
);
const readyResultsById = Object.fromEntries(
Object.entries(modelResultsById).filter(([, result]) => result.status === 'ready')
);
if (Object.keys(readyResultsById).length === 0) {
return;
}
const now = Date.now();
pruneExpiredEntries(now);
const existingEntry = shortLivedProviderPrepareCache.get(cacheKey);
shortLivedProviderPrepareCache.set(cacheKey, {
expiresAt: now + OPENCODE_DEEP_VERIFY_SUCCESS_CACHE_TTL_MS,
modelResultsById: {
...(existingEntry?.modelResultsById ?? {}),
...readyResultsById,
},
});
pruneExpiredEntries(shortLivedProviderPrepareCache, now);
pruneExpiredEntries(shortLivedProviderPrepareIssueCache, now);
if (Object.keys(readyResultsById).length > 0) {
const existingEntry = shortLivedProviderPrepareCache.get(cacheKey);
shortLivedProviderPrepareCache.set(cacheKey, {
expiresAt: now + OPENCODE_DEEP_VERIFY_SUCCESS_CACHE_TTL_MS,
modelResultsById: {
...(existingEntry?.modelResultsById ?? {}),
...readyResultsById,
},
});
}
if (Object.keys(issueResultsById).length > 0) {
const existingIssueEntry = shortLivedProviderPrepareIssueCache.get(cacheKey);
const nextIssueResultsById = {
...(existingIssueEntry?.modelResultsById ?? {}),
...issueResultsById,
};
for (const modelId of Object.keys(readyResultsById)) {
delete nextIssueResultsById[modelId];
}
shortLivedProviderPrepareIssueCache.set(cacheKey, {
expiresAt: now + OPENCODE_MODEL_ISSUE_CACHE_TTL_MS,
modelResultsById: nextIssueResultsById,
});
} else if (Object.keys(readyResultsById).length > 0) {
const existingIssueEntry = shortLivedProviderPrepareIssueCache.get(cacheKey);
if (!existingIssueEntry) {
return;
}
const nextIssueResultsById = { ...existingIssueEntry.modelResultsById };
for (const modelId of Object.keys(readyResultsById)) {
delete nextIssueResultsById[modelId];
}
if (Object.keys(nextIssueResultsById).length > 0) {
shortLivedProviderPrepareIssueCache.set(cacheKey, {
expiresAt: existingIssueEntry.expiresAt,
modelResultsById: nextIssueResultsById,
});
} else {
shortLivedProviderPrepareIssueCache.delete(cacheKey);
}
}
}
export function __resetShortLivedProviderPrepareCacheForTests(): void {
shortLivedProviderPrepareCache.clear();
shortLivedProviderPrepareIssueCache.clear();
}

View file

@ -46,6 +46,8 @@ interface LeadModelRowProps {
warningText?: string | null;
disableGeminiOption?: boolean;
modelIssueText?: string | null;
modelIssueReasonByValue?: Partial<Record<string, string | null | undefined>>;
modelUnavailableReasonByValue?: Partial<Record<string, string | null | undefined>>;
showAnthropicContextLimit?: boolean;
disableAnthropicContextLimit?: boolean;
}
@ -64,6 +66,8 @@ export const LeadModelRow = ({
warningText,
disableGeminiOption = false,
modelIssueText,
modelIssueReasonByValue,
modelUnavailableReasonByValue,
showAnthropicContextLimit = providerId === 'anthropic',
disableAnthropicContextLimit,
}: LeadModelRowProps): React.JSX.Element => {
@ -74,7 +78,17 @@ export const LeadModelRow = ({
? getProviderScopedTeamModelLabel(providerId, model.trim())
: 'Default';
const modelButtonAriaLabel = `${getTeamProviderLabel(providerId)} provider, ${modelButtonLabel}`;
const hasModelIssue = Boolean(modelIssueText);
const selectedModelIssueText =
model.trim() && modelIssueReasonByValue?.[model.trim()]
? modelIssueReasonByValue[model.trim()]
: null;
const selectedModelUnavailableText =
model.trim() && modelUnavailableReasonByValue?.[model.trim()]
? modelUnavailableReasonByValue[model.trim()]
: null;
const currentModelIssueText =
modelIssueText ?? selectedModelUnavailableText ?? selectedModelIssueText ?? null;
const hasModelIssue = Boolean(currentModelIssueText);
const showSonnetExtraUsageWarning =
providerId === 'anthropic' &&
!limitContext &&
@ -179,7 +193,11 @@ export const LeadModelRow = ({
onValueChange={onModelChange}
id="lead-model"
disableGeminiOption={disableGeminiOption}
modelIssueReasonByValue={model.trim() ? { [model.trim()]: modelIssueText } : undefined}
modelIssueReasonByValue={{
...(modelIssueReasonByValue ?? {}),
...(model.trim() && modelIssueText ? { [model.trim()]: modelIssueText } : {}),
}}
modelUnavailableReasonByValue={modelUnavailableReasonByValue}
/>
<EffortLevelSelector
value={effort ?? ''}

View file

@ -78,6 +78,12 @@ interface MemberDraftRowProps {
infoText?: string | null;
disableGeminiOption?: boolean;
modelIssueText?: string | null;
modelIssueReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
modelUnavailableReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
showWorktreeIsolationControls?: boolean;
worktreeIsolationDisabledReason?: string | null;
onWorktreeIsolationChange?: (id: string, enabled: boolean) => void;
@ -128,6 +134,8 @@ export const MemberDraftRow = ({
infoText,
disableGeminiOption = false,
modelIssueText,
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
showWorktreeIsolationControls = false,
worktreeIsolationDisabledReason,
onWorktreeIsolationChange,
@ -226,7 +234,19 @@ export const MemberDraftRow = ({
: undefined;
const worktreeIsolationDisabled =
isRemoved || Boolean(worktreeIsolationDisabledReason && member.isolation !== 'worktree');
const hasModelIssue = Boolean(modelIssueText);
const effectiveModelKey = effectiveModel?.trim() ?? '';
const selectedModelIssueText =
effectiveModelKey && modelIssueReasonByProvider?.[effectiveProviderId]?.[effectiveModelKey]
? modelIssueReasonByProvider[effectiveProviderId]?.[effectiveModelKey]
: null;
const selectedModelUnavailableText =
effectiveModelKey &&
modelUnavailableReasonByProvider?.[effectiveProviderId]?.[effectiveModelKey]
? modelUnavailableReasonByProvider[effectiveProviderId]?.[effectiveModelKey]
: null;
const currentModelIssueText =
modelIssueText ?? selectedModelUnavailableText ?? selectedModelIssueText ?? null;
const hasModelIssue = Boolean(currentModelIssueText);
const hasCustomProviderOrModel =
!forceInheritedModelSettings && Boolean(member.providerId || member.model?.trim());
const showSonnetExtraUsageWarning =
@ -353,11 +373,15 @@ export const MemberDraftRow = ({
</Button>
</span>
</TooltipTrigger>
{modelTooltipText || modelIssueText ? (
{modelTooltipText || currentModelIssueText ? (
<TooltipContent side="top" className="max-w-64 text-xs leading-relaxed">
{modelIssueText ? <p className="text-red-300">{modelIssueText}</p> : null}
{currentModelIssueText ? (
<p className="text-red-300">{currentModelIssueText}</p>
) : null}
{modelTooltipText ? (
<p className={modelIssueText ? 'mt-1 border-t border-white/10 pt-1' : ''}>
<p
className={currentModelIssueText ? 'mt-1 border-t border-white/10 pt-1' : ''}
>
{modelTooltipText}
</p>
) : null}
@ -524,8 +548,14 @@ export const MemberDraftRow = ({
}}
id={`member-${member.id}-model`}
disableGeminiOption={disableGeminiOption}
modelIssueReasonByValue={
effectiveModel?.trim() ? { [effectiveModel.trim()]: modelIssueText } : undefined
modelIssueReasonByValue={{
...(modelIssueReasonByProvider?.[effectiveProviderId] ?? {}),
...(effectiveModelKey && modelIssueText
? { [effectiveModelKey]: modelIssueText }
: {}),
}}
modelUnavailableReasonByValue={
modelUnavailableReasonByProvider?.[effectiveProviderId]
}
/>
<EffortLevelSelector

View file

@ -114,6 +114,12 @@ export interface MembersEditorSectionProps {
memberInfoById?: Record<string, string | null | undefined>;
disableGeminiOption?: boolean;
memberModelIssueById?: Record<string, string | null | undefined>;
modelIssueReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
modelUnavailableReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
disableAddMember?: boolean;
addMemberLockReason?: string;
showWorktreeIsolationControls?: boolean;
@ -153,6 +159,8 @@ export const MembersEditorSection = ({
memberInfoById,
disableGeminiOption = false,
memberModelIssueById,
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
disableAddMember = false,
addMemberLockReason,
showWorktreeIsolationControls = false,
@ -428,6 +436,8 @@ export const MembersEditorSection = ({
infoText={memberInfoById?.[member.id] ?? null}
disableGeminiOption={disableGeminiOption}
modelIssueText={memberModelIssueById?.[member.id] ?? null}
modelIssueReasonByProvider={modelIssueReasonByProvider}
modelUnavailableReasonByProvider={modelUnavailableReasonByProvider}
/>
))}
{softDeleteMembers && removedMembers.length > 0 ? (

View file

@ -49,6 +49,12 @@ interface TeamRosterEditorSectionProps {
disableGeminiOption?: boolean;
leadModelIssueText?: string | null;
memberModelIssueById?: Record<string, string | null | undefined>;
modelIssueReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
modelUnavailableReasonByProvider?: Partial<
Record<TeamProviderId, Partial<Record<string, string | null | undefined>>>
>;
showWorktreeIsolationControls?: boolean;
teammateWorktreeDefault?: boolean;
worktreeIsolationDisabledReason?: string | null;
@ -95,6 +101,8 @@ export const TeamRosterEditorSection = ({
disableGeminiOption = false,
leadModelIssueText,
memberModelIssueById,
modelIssueReasonByProvider,
modelUnavailableReasonByProvider,
showWorktreeIsolationControls = false,
teammateWorktreeDefault = false,
worktreeIsolationDisabledReason,
@ -153,6 +161,8 @@ export const TeamRosterEditorSection = ({
softDeleteMembers={softDeleteMembers}
disableGeminiOption={disableGeminiOption}
memberModelIssueById={memberModelIssueById}
modelIssueReasonByProvider={modelIssueReasonByProvider}
modelUnavailableReasonByProvider={modelUnavailableReasonByProvider}
showWorktreeIsolationControls={showWorktreeIsolationControls}
teammateWorktreeDefault={teammateWorktreeDefault}
worktreeIsolationDisabledReason={worktreeIsolationDisabledReason}
@ -174,6 +184,8 @@ export const TeamRosterEditorSection = ({
warningText={leadWarningText}
disableGeminiOption={disableGeminiOption}
modelIssueText={leadModelIssueText}
modelIssueReasonByValue={modelIssueReasonByProvider?.[providerId]}
modelUnavailableReasonByValue={modelUnavailableReasonByProvider?.[providerId]}
showAnthropicContextLimit={hasAnthropicRuntime}
disableAnthropicContextLimit={disableAnthropicContextLimit}
/>

View file

@ -26,9 +26,7 @@ const PASSED_GAUNTLET_WITH_LIMITS_REASON =
const OPENCODE_TEAM_RECOMMENDED_MODELS = new Set<string>(['opencode/big-pickle']);
const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set<string>([
'opencode/minimax-m2.5-free',
]);
const OPENCODE_TEAM_RECOMMENDED_WITH_LIMITS_MODELS = new Set<string>([]);
const OPENCODE_TEAM_TESTED_MODELS = new Set<string>([
'openrouter/anthropic/claude-haiku-4.5',
@ -54,7 +52,14 @@ const OPENCODE_TEAM_TESTED_MODELS = new Set<string>([
'openrouter/z-ai/glm-5.1',
]);
const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set<string>([]);
const OPENCODE_TEAM_TESTED_WITH_LIMITS_MODELS = new Set<string>(['opencode/minimax-m2.5-free']);
const OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS = new Map<string, string>([
[
'opencode/minimax-m2.5-free',
'This exact free model route passed simple OpenCode Agent Teams provider stress, but a deeper repeated gauntlet hit duplicate or missing reply tokens. Keep it below Recommended until a clean repeated gauntlet passes.',
],
]);
const OPENCODE_TEAM_UNAVAILABLE_MODELS = new Map<string, string>([
[
@ -1254,7 +1259,9 @@ export function getOpenCodeTeamModelRecommendation(
return {
level: 'tested-with-limits',
label: 'Tested with limits',
reason: PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON,
reason:
OPENCODE_TEAM_TESTED_WITH_LIMITS_REASONS.get(normalizedModelId) ??
PASSED_FREE_ROUTE_REAL_AGENT_TEAMS_E2E_REASON,
};
}

View file

@ -35,6 +35,9 @@ export {
type SupportedProviderId = CliProviderId | TeamProviderId;
export const OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON =
'OpenCode OpenAI provider authentication failed. Reconnect OpenAI in provider settings, then refresh runtime status.';
export type TeamModelRuntimeProviderStatus = Pick<
CliProviderStatus,
| 'providerId'
@ -47,6 +50,9 @@ export type TeamModelRuntimeProviderStatus = Pick<
| 'backend'
| 'authenticated'
| 'supported'
| 'detailMessage'
| 'availableBackends'
| 'externalRuntimeDiagnostics'
> &
Partial<Pick<CliProviderStatus, 'verificationState' | 'statusMessage'>>;
@ -61,6 +67,58 @@ export interface TeamProviderModelVerificationCounts {
verifying: boolean;
}
export function getOpenCodeOpenAiRouteAuthUnavailableReason(
providerId: SupportedProviderId | undefined,
model: string | undefined,
providerStatus?: TeamModelRuntimeProviderStatus | null
): string | null {
if (
providerId !== 'opencode' ||
!model?.trim().toLowerCase().startsWith('openai/') ||
!providerStatus
) {
return null;
}
const openAiBackends = (providerStatus.availableBackends ?? []).filter((backend) =>
[backend.id, backend.label, backend.description].some((value) => /\bopenai\b/i.test(value))
);
const backendRequiresAuth = openAiBackends.some(
(backend) =>
backend.state === 'authentication-required' ||
(!backend.available &&
[backend.statusMessage, backend.detailMessage].some((value) =>
/auth|token|api key|401|403/i.test(value ?? '')
))
);
if (backendRequiresAuth) {
return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON;
}
const diagnosticText = [
providerStatus.statusMessage,
providerStatus.detailMessage,
...openAiBackends.flatMap((backend) => [backend.statusMessage, backend.detailMessage]),
...(providerStatus.externalRuntimeDiagnostics ?? [])
.filter((diagnostic) => /\bopenai\b/i.test(diagnostic.label))
.flatMap((diagnostic) => [diagnostic.statusMessage, diagnostic.detailMessage]),
]
.map((value) => value?.trim() ?? '')
.filter(Boolean)
.join('\n');
if (
/\bopenai\b/i.test(diagnosticText) &&
/token refresh failed|token.*invalid|invalid.*token|not[_\s-]?authenticated|not authenticated|unauthorized|forbidden|\b401\b|\b403\b|invalid api key|api key.*invalid|authentication required/i.test(
diagnosticText
)
) {
return OPENCODE_OPENAI_AUTH_UNAVAILABLE_REASON;
}
return null;
}
export function getTeamModelUiDisabledReason(
providerId: SupportedProviderId | undefined,
model: string | undefined,
@ -277,6 +335,10 @@ function getRuntimeModelAvailability(
if (!visibleModels.includes(model)) {
return null;
}
const runtimeAvailability = getModelAvailabilityMap(providerStatus).get(model)?.status ?? null;
if (runtimeAvailability === 'unavailable') {
return 'unavailable';
}
return 'available';
}
@ -360,7 +422,11 @@ export function getAvailableTeamProviderModelOptions(
...visibleModels.map((model) => {
const catalogOption = getRuntimeCatalogModelOption(providerId, model, providerStatus);
if (catalogOption) {
return catalogOption;
return {
...catalogOption,
availabilityStatus: getRuntimeModelAvailability(providerId, model, providerStatus),
availabilityReason: getRuntimeModelAvailabilityReason(model, providerStatus),
};
}
return {
value: model,
@ -464,6 +530,15 @@ export function getTeamModelSelectionError(
return `Model "${trimmed}" is disabled. ${disabledReason}`;
}
const dynamicUnavailableReason = getOpenCodeOpenAiRouteAuthUnavailableReason(
providerId,
trimmed,
providerStatus
);
if (dynamicUnavailableReason) {
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. ${dynamicUnavailableReason}`;
}
if (providerId === 'anthropic') {
return isTeamModelAvailableForUi(providerId, trimmed, providerStatus)
? null
@ -483,5 +558,11 @@ export function getTeamModelSelectionError(
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime. Pick one of the listed models or use Default.`;
}
const availability = getRuntimeModelAvailability(providerId, trimmed, providerStatus);
if (availability !== 'available') {
const reason = getRuntimeModelAvailabilityReason(trimmed, providerStatus);
return `Model "${trimmed}" is not available for the current ${getTeamProviderLabel(providerId) ?? providerId} runtime.${reason ? ` ${reason}` : ''} Pick one of the listed models or use Default.`;
}
return null;
}

View file

@ -0,0 +1,96 @@
import {
getOpenCodeTeamModelRecommendation,
getOpenCodeTeamModelRecommendationSortRank,
} from '@renderer/utils/openCodeModelRecommendations';
import { isSupportedAnthropicTeamModel } from '@renderer/utils/teamModelCatalog';
import type {
OpenCodeTeamModelRecommendation,
OpenCodeTeamModelRecommendationLevel,
} from '@renderer/utils/openCodeModelRecommendations';
import type { TeamProviderId } from '@shared/types';
export type TeamModelRecommendationLevel = OpenCodeTeamModelRecommendationLevel;
export type TeamModelRecommendation = OpenCodeTeamModelRecommendation;
const CODEX_TEAM_RECOMMENDED_MODELS = new Set<string>(['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']);
const CODEX_RECOMMENDED_REASON =
'This Codex model passed real Agent Teams launch and task-flow stress testing and is selected for stable team-agent behavior.';
const ANTHROPIC_RECOMMENDED_REASON =
'This Claude model passed real Agent Teams launch, restart, and teammate-workflow stress testing.';
function normalizeTeamModelId(modelId: string | null | undefined): string {
return modelId?.trim().toLowerCase() ?? '';
}
function getRecommendedRecommendation(reason: string): TeamModelRecommendation {
return {
level: 'recommended',
label: 'Recommended',
reason,
};
}
export function getTeamModelRecommendation(
providerId: TeamProviderId,
modelId: string | null | undefined
): TeamModelRecommendation | null {
const normalizedModelId = normalizeTeamModelId(modelId);
if (!normalizedModelId) {
return null;
}
if (providerId === 'opencode') {
return getOpenCodeTeamModelRecommendation(normalizedModelId);
}
if (providerId === 'codex' && CODEX_TEAM_RECOMMENDED_MODELS.has(normalizedModelId)) {
return getRecommendedRecommendation(CODEX_RECOMMENDED_REASON);
}
if (providerId === 'anthropic' && isSupportedAnthropicTeamModel(normalizedModelId)) {
return getRecommendedRecommendation(ANTHROPIC_RECOMMENDED_REASON);
}
return null;
}
export function isTeamModelRecommended(
providerId: TeamProviderId,
modelId: string | null | undefined
): boolean {
const recommendation = getTeamModelRecommendation(providerId, modelId);
return (
recommendation?.level === 'recommended' || recommendation?.level === 'recommended-with-limits'
);
}
function getTeamModelRecommendationSortRank(
providerId: TeamProviderId,
modelId: string | null | undefined
): number {
if (providerId === 'opencode') {
return getOpenCodeTeamModelRecommendationSortRank(modelId);
}
const recommendation = getTeamModelRecommendation(providerId, modelId);
if (recommendation?.level === 'recommended') {
return 0;
}
return 4;
}
export function compareTeamModelRecommendations(
providerId: TeamProviderId,
leftModelId: string | null | undefined,
rightModelId: string | null | undefined
): number {
const leftRank = getTeamModelRecommendationSortRank(providerId, leftModelId);
const rightRank = getTeamModelRecommendationSortRank(providerId, rightModelId);
if (leftRank !== rightRank) {
return leftRank - rightRank;
}
return 0;
}

View file

@ -1,9 +1,9 @@
{
"generatedAt": "2026-05-08T22:48:31.416Z",
"runsPerModel": 1,
"generatedAt": "2026-05-09T23:16:07.760Z",
"runsPerModel": 3,
"qualification": {
"minimumAverageScore": 80,
"minimumSuccessfulRuns": 1,
"minimumAverageScore": 90,
"minimumSuccessfulRuns": 3,
"minimumConsistencyScore": 85,
"requireNoHardFailures": true
},
@ -11,93 +11,93 @@
{
"model": "opencode/big-pickle",
"verdict": "recommended",
"confidence": "low",
"confidence": "high",
"qualified": true,
"readinessScore": 100,
"averageScore": 100,
"consistencyScore": 100,
"behavioralAverageScore": 100,
"minScore": 100,
"successfulRuns": 1,
"countedRuns": 1,
"successfulRuns": 3,
"countedRuns": 3,
"hardFailures": 0,
"providerInfraFailures": 0,
"runtimeTransportFailures": 0,
"modelBehaviorFailures": 0,
"harnessFailures": 0,
"p50DurationMs": 118757,
"p95DurationMs": 118757,
"p50DurationMs": 112355,
"p95DurationMs": 116891,
"stagePassRates": {
"launchBootstrap": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"directReply": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayAB": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayBC": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentReplies": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"taskRefs": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"cleanTranscript": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"noDuplicateTokens": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"latencyStable": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
"taskRefPassRates": {
"directReply": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayAB": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayBC": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentBob": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentTom": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -112,8 +112,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -122,8 +122,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -132,8 +132,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -142,8 +142,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -152,8 +152,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -162,8 +162,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -172,8 +172,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -182,8 +182,8 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
},
@ -192,14 +192,14 @@
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 1,
"total": 1,
"passed": 3,
"total": 3,
"rate": 100
}
}
],
"scoreStability": {
"sampleSize": 1,
"sampleSize": 3,
"minScore": 100,
"maxScore": 100,
"spread": 0,
@ -217,16 +217,16 @@
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 118757,
"durationMs": 112344,
"hardFailure": false,
"stageDurationsMs": {
"setup": 225,
"launchBootstrap": 20591,
"materializeTasks": 36,
"directReply": 14820,
"peerRelayAB": 32039,
"peerRelayBC": 27306,
"concurrentReplies": 15426,
"setup": 183,
"launchBootstrap": 19933,
"materializeTasks": 35,
"directReply": 15430,
"peerRelayAB": 25001,
"peerRelayBC": 28154,
"concurrentReplies": 15551,
"hygiene": 1
},
"stageFailures": {},
@ -253,7 +253,455 @@
"latencyStable": true
},
"diagnostics": [
"runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1"
"runId=d9d27eb0-2798-4980-a0fa-f082a6edd705"
]
},
{
"runIndex": 2,
"passed": true,
"score": 100,
"countedForRecommendation": true,
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 112355,
"hardFailure": false,
"stageDurationsMs": {
"setup": 11,
"launchBootstrap": 18682,
"materializeTasks": 36,
"directReply": 15126,
"peerRelayAB": 24835,
"peerRelayBC": 28580,
"concurrentReplies": 17164,
"hygiene": 1
},
"stageFailures": {},
"taskRefChecks": {
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentBob": true,
"concurrentTom": true
},
"protocolViolations": {
"badMessages": 0,
"duplicateOrMissingTokens": []
},
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentReplies": true,
"taskRefs": true,
"cleanTranscript": true,
"noDuplicateTokens": true,
"latencyStable": true
},
"diagnostics": [
"runId=97364154-e06d-460c-94ae-65b73cb1b6f9"
]
},
{
"runIndex": 3,
"passed": true,
"score": 100,
"countedForRecommendation": true,
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 116891,
"hardFailure": false,
"stageDurationsMs": {
"setup": 8,
"launchBootstrap": 18926,
"materializeTasks": 31,
"directReply": 17061,
"peerRelayAB": 27842,
"peerRelayBC": 27262,
"concurrentReplies": 15437,
"hygiene": 1
},
"stageFailures": {},
"taskRefChecks": {
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentBob": true,
"concurrentTom": true
},
"protocolViolations": {
"badMessages": 0,
"duplicateOrMissingTokens": []
},
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentReplies": true,
"taskRefs": true,
"cleanTranscript": true,
"noDuplicateTokens": true,
"latencyStable": true
},
"diagnostics": [
"runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671"
]
}
]
},
{
"model": "opencode/minimax-m2.5-free",
"verdict": "strong-candidate",
"confidence": "high",
"qualified": false,
"readinessScore": 88.6,
"averageScore": 98.3,
"consistencyScore": 93.1,
"behavioralAverageScore": 98.3,
"minScore": 95,
"successfulRuns": 2,
"countedRuns": 3,
"hardFailures": 1,
"providerInfraFailures": 0,
"runtimeTransportFailures": 0,
"modelBehaviorFailures": 1,
"harnessFailures": 0,
"p50DurationMs": 108862,
"p95DurationMs": 118757,
"stagePassRates": {
"launchBootstrap": {
"passed": 3,
"total": 3,
"rate": 100
},
"directReply": {
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayAB": {
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayBC": {
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentReplies": {
"passed": 3,
"total": 3,
"rate": 100
},
"taskRefs": {
"passed": 3,
"total": 3,
"rate": 100
},
"cleanTranscript": {
"passed": 3,
"total": 3,
"rate": 100
},
"noDuplicateTokens": {
"passed": 2,
"total": 3,
"rate": 66.7
},
"latencyStable": {
"passed": 3,
"total": 3,
"rate": 100
}
},
"taskRefPassRates": {
"directReply": {
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayAB": {
"passed": 3,
"total": 3,
"rate": 100
},
"peerRelayBC": {
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentBob": {
"passed": 3,
"total": 3,
"rate": 100
},
"concurrentTom": {
"passed": 3,
"total": 3,
"rate": 100
}
},
"protocolViolationTotals": {
"badMessages": 0,
"duplicateOrMissingTokens": 2,
"affectedRuns": 1
},
"stageFailureImpact": [
{
"stage": "noDuplicateTokens",
"failedRuns": 1,
"weightedLoss": 5,
"passRate": {
"passed": 2,
"total": 3,
"rate": 66.7
}
},
{
"stage": "cleanTranscript",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "concurrentReplies",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "directReply",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "latencyStable",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "launchBootstrap",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "peerRelayAB",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "peerRelayBC",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
},
{
"stage": "taskRefs",
"failedRuns": 0,
"weightedLoss": 0,
"passRate": {
"passed": 3,
"total": 3,
"rate": 100
}
}
],
"scoreStability": {
"sampleSize": 3,
"minScore": 95,
"maxScore": 100,
"spread": 5,
"standardDeviation": 2.4,
"consistencyScore": 93.1
},
"dominantFailureCategory": "model-behavior",
"recommendationBlockers": [
"successful runs 2 < 3",
"hard failures 1",
"model-behavior failures 1",
"highest weighted stage loss noDuplicateTokens=5",
"protocol violations in 1 runs"
],
"runs": [
{
"runIndex": 1,
"passed": true,
"score": 100,
"countedForRecommendation": true,
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 91530,
"hardFailure": false,
"stageDurationsMs": {
"setup": 10,
"launchBootstrap": 18716,
"materializeTasks": 31,
"directReply": 11557,
"peerRelayAB": 16323,
"peerRelayBC": 27370,
"concurrentReplies": 9606,
"hygiene": 1
},
"stageFailures": {},
"taskRefChecks": {
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentBob": true,
"concurrentTom": true
},
"protocolViolations": {
"badMessages": 0,
"duplicateOrMissingTokens": []
},
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentReplies": true,
"taskRefs": true,
"cleanTranscript": true,
"noDuplicateTokens": true,
"latencyStable": true
},
"diagnostics": [
"runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e"
]
},
{
"runIndex": 2,
"passed": true,
"score": 100,
"countedForRecommendation": true,
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 108862,
"hardFailure": false,
"stageDurationsMs": {
"setup": 10,
"launchBootstrap": 18359,
"materializeTasks": 35,
"directReply": 7236,
"peerRelayAB": 30664,
"peerRelayBC": 26124,
"concurrentReplies": 18477,
"hygiene": 0
},
"stageFailures": {},
"taskRefChecks": {
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentBob": true,
"concurrentTom": true
},
"protocolViolations": {
"badMessages": 0,
"duplicateOrMissingTokens": []
},
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentReplies": true,
"taskRefs": true,
"cleanTranscript": true,
"noDuplicateTokens": true,
"latencyStable": true
},
"diagnostics": [
"runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5"
]
},
{
"runIndex": 3,
"passed": false,
"score": 95,
"countedForRecommendation": true,
"outcome": "behavioral-fail",
"failureCategory": "model-behavior",
"primaryFailure": "duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3",
"durationMs": 118757,
"hardFailure": true,
"stageDurationsMs": {
"setup": 9,
"launchBootstrap": 19986,
"materializeTasks": 37,
"directReply": 8036,
"peerRelayAB": 37430,
"peerRelayBC": 36219,
"concurrentReplies": 8551,
"hygiene": 0
},
"stageFailures": {},
"taskRefChecks": {
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentBob": true,
"concurrentTom": true
},
"protocolViolations": {
"badMessages": 0,
"duplicateOrMissingTokens": [
"GAUNTLET_JACK_USER_OK_3",
"GAUNTLET_TOM_USER_OK_3"
]
},
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelayAB": true,
"peerRelayBC": true,
"concurrentReplies": true,
"taskRefs": true,
"cleanTranscript": true,
"noDuplicateTokens": false,
"latencyStable": true
},
"diagnostics": [
"runId=2b0610e0-7b10-49fc-88dd-ab30b37abce9",
"duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3"
]
}
]

View file

@ -1,9 +1,9 @@
# OpenCode Model Gauntlet Results
Generated: 2026-05-08T22:48:31.416Z
Generated: 2026-05-09T23:16:07.760Z
Runs per model: 1
Recommended threshold: average >= 80, successful runs >= 1, consistency >= 85, hard failures = 0
Runs per model: 3
Recommended threshold: average >= 90, successful runs >= 3, consistency >= 85, hard failures = 0
Provider-infra runs are reported separately and are not counted as model behavior. They still block a Recommended verdict until rerun succeeds.
@ -13,25 +13,50 @@ Scoring weights: launchBootstrap=15, directReply=10, peerRelayAB=15, peerRelayBC
| Model | Verdict | Confidence | Readiness | Consistency | Score Spread | Behavior Avg | Overall Avg | Counted | Pass Runs | Weakest Stage | Weakest TaskRef | Dominant Failure | Blockers | Provider Infra | Runtime Transport | Model Fails | Protocol Runs | p50 | p95 |
| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |
| `opencode/big-pickle` | Recommended | low | 100 | 100 | 0 | 100 | 100 | 1/1 | 1/1 | cleanTranscript 1/1 (100%) | concurrentBob 1/1 (100%) | none | - | 0 | 0 | 0 | 0 | 118757ms | 118757ms |
| `opencode/big-pickle` | Recommended | high | 100 | 100 | 0 | 100 | 100 | 3/3 | 3/3 | cleanTranscript 3/3 (100%) | concurrentBob 3/3 (100%) | none | - | 0 | 0 | 0 | 0 | 112355ms | 116891ms |
| `opencode/minimax-m2.5-free` | Strong candidate | high | 88.6 | 93.1 | 5 | 98.3 | 98.3 | 3/3 | 2/3 | noDuplicateTokens 2/3 (66.7%) | concurrentBob 3/3 (100%) | model-behavior | successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs | 0 | 0 | 1 | 1 | 108862ms | 118757ms |
## opencode/big-pickle
Readiness score: 100.
Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=1.
Score stability: consistency=100, min=100, max=100, spread=0, stdDev=0, samples=3.
Recommendation blockers: -.
Weighted stage impact: -.
Stage pass rates: launchBootstrap:1/1 (100%), directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentReplies:1/1 (100%), taskRefs:1/1 (100%), cleanTranscript:1/1 (100%), noDuplicateTokens:1/1 (100%), latencyStable:1/1 (100%).
Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:3/3 (100%), latencyStable:3/3 (100%).
TaskRef pass rates: directReply:1/1 (100%), peerRelayAB:1/1 (100%), peerRelayBC:1/1 (100%), concurrentBob:1/1 (100%), concurrentTom:1/1 (100%).
TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%).
Protocol totals: badMessages=0, duplicateOrMissingTokens=0, affectedRuns=0.
| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics |
| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- |
| 1 | passed | none | 100 | yes | 118757ms | - | peerRelayAB:32039ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=44f5aa40-e169-49ed-9ea3-4c72aaf4a9f1 |
| 1 | passed | none | 100 | yes | 112344ms | - | peerRelayBC:28154ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=d9d27eb0-2798-4980-a0fa-f082a6edd705 |
| 2 | passed | none | 100 | yes | 112355ms | - | peerRelayBC:28580ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=97364154-e06d-460c-94ae-65b73cb1b6f9 |
| 3 | passed | none | 100 | yes | 116891ms | - | peerRelayAB:27842ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=7bdd4b2e-dbd6-4474-a8a0-9418df433671 |
## opencode/minimax-m2.5-free
Readiness score: 88.6.
Score stability: consistency=93.1, min=95, max=100, spread=5, stdDev=2.4, samples=3.
Recommendation blockers: successful runs 2 < 3; hard failures 1; model-behavior failures 1; highest weighted stage loss noDuplicateTokens=5; protocol violations in 1 runs.
Weighted stage impact: noDuplicateTokens:loss=5, failed=1, pass=2/3 (66.7%).
Stage pass rates: launchBootstrap:3/3 (100%), directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentReplies:3/3 (100%), taskRefs:3/3 (100%), cleanTranscript:3/3 (100%), noDuplicateTokens:2/3 (66.7%), latencyStable:3/3 (100%).
TaskRef pass rates: directReply:3/3 (100%), peerRelayAB:3/3 (100%), peerRelayBC:3/3 (100%), concurrentBob:3/3 (100%), concurrentTom:3/3 (100%).
Protocol totals: badMessages=0, duplicateOrMissingTokens=2, affectedRuns=1.
| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics |
| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- |
| 1 | passed | none | 100 | yes | 91530ms | - | peerRelayBC:27370ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=23ae85d2-e79d-41c9-93a6-e843acea6d9e |
| 2 | passed | none | 100 | yes | 108862ms | - | peerRelayAB:30664ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=c3a55d8a-4028-4af7-9e1a-8ae8c87a95e5 |
| 3 | behavioral-fail | model-behavior | 95 | yes | 118757ms | noDuplicateTokens | peerRelayAB:37430ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | token=GAUNTLET_JACK_USER_OK_3+GAUNTLET_TOM_USER_OK_3 | duplicateOrMissingTokens=GAUNTLET_JACK_USER_OK_3,GAUNTLET_TOM_USER_OK_3 |

View file

@ -0,0 +1,62 @@
{
"generatedAt": "2026-05-09T22:48:19.222Z",
"models": [
{
"model": "opencode/big-pickle",
"passed": true,
"score": 100,
"durationMs": 67267,
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelay": true,
"taskRefs": true,
"longPrompt": true,
"latencyStable": true
},
"diagnostics": [
"runId=f44e2547-3689-41a1-9a0a-75d38bdb1097",
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"ad750c26-d9bd-4028-9936-754cbf7aef7b\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\",\"opencode_message_send_tool_error_inline_observe\"]}",
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"visible_reply_destination_not_found_yet\",\"visibleReplyCorrelation\":\"direct_child_message_send\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
]
},
{
"model": "opencode/minimax-m2.5-free",
"passed": true,
"score": 100,
"durationMs": 66426,
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelay": true,
"taskRefs": true,
"longPrompt": true,
"latencyStable": true
},
"diagnostics": [
"runId=1659a3ab-ba64-432b-95ec-ab1d88371ebf",
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"f03532ac-cccf-450d-a951-63a98d02125d\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}",
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
]
},
{
"model": "opencode/nemotron-3-super-free",
"passed": true,
"score": 100,
"durationMs": 77760,
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelay": true,
"taskRefs": true,
"longPrompt": true,
"latencyStable": true
},
"diagnostics": [
"runId=0bd03f13-bac1-4220-96cc-7753944e4ff0",
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"247aa6f8-3c53-4353-b9b2-0e3cdc7d5b34\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\",\"opencode_direct_user_delivery_inline_observe_attempt_1\"]}",
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":true,\"responseState\":\"empty_assistant_turn\",\"ledgerStatus\":\"retry_scheduled\",\"reason\":\"empty_assistant_turn\",\"diagnostics\":[\"OpenCode bootstrap MCP did not complete required tools before assistant response: runtime_bootstrap_checkin, member_briefing\",\"Recreated OpenCode session before message delivery because bootstrap MCP failed in the stored session.\",\"OpenCode app MCP was reattached before message delivery.\",\"Reattached stale OpenCode app MCP server: existing app MCP config does not expose environment; CLAUDE_TEAM_CONTROL_URL cannot be verified\"]}"
]
}
]
}

View file

@ -2,7 +2,7 @@ import { createHash } from 'crypto';
import { mkdtemp, mkdir, rm, stat, writeFile } from 'fs/promises';
import { tmpdir } from 'os';
import * as path from 'path';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
import {
shouldIgnoreLogSourceWatcherPath,
@ -12,6 +12,9 @@ import {
import type { TeamMemberLogsFinder } from '../../../../src/main/services/team/TeamMemberLogsFinder';
import type { TeamChangeEvent } from '../../../../src/shared/types';
const originalChokidarUsePolling = process.env.CHOKIDAR_USEPOLLING;
const originalChokidarInterval = process.env.CHOKIDAR_INTERVAL;
function safeTaskIdSegment(taskId: string): string {
return `task-id-${createHash('sha256').update(taskId).digest('hex').slice(0, 32)}`;
}
@ -19,6 +22,11 @@ function safeTaskIdSegment(taskId: string): string {
describe('TeamLogSourceTracker', () => {
let tempDir: string | null = null;
beforeAll(() => {
process.env.CHOKIDAR_USEPOLLING = '1';
process.env.CHOKIDAR_INTERVAL = '25';
});
afterEach(async () => {
if (tempDir) {
await rm(tempDir, { recursive: true, force: true });
@ -26,6 +34,19 @@ describe('TeamLogSourceTracker', () => {
}
});
afterAll(() => {
if (originalChokidarUsePolling === undefined) {
delete process.env.CHOKIDAR_USEPOLLING;
} else {
process.env.CHOKIDAR_USEPOLLING = originalChokidarUsePolling;
}
if (originalChokidarInterval === undefined) {
delete process.env.CHOKIDAR_INTERVAL;
} else {
process.env.CHOKIDAR_INTERVAL = originalChokidarInterval;
}
});
it('emits task-log-change for matching runtime freshness signals without broad log-source-change', async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'team-log-source-tracker-'));

View file

@ -285,7 +285,7 @@ describe('TeamModelSelector disabled Codex models', () => {
expect(host.textContent).toContain('mistralai/codestral-2508');
expect(host.textContent).toContain('Tested');
expect(host.textContent).toContain('minimax-m2.5-free');
expect(host.textContent).toContain('Recommended with limits');
expect(host.textContent).toContain('Tested with limits');
expect(host.textContent).toContain('openai/gpt-oss-120b:free');
expect(host.textContent).toContain('big-pickle');
expect(host.textContent).toContain('qwen/qwen3-coder-plus');
@ -313,8 +313,8 @@ describe('TeamModelSelector disabled Codex models', () => {
expect(limitedIndex).toBeGreaterThanOrEqual(0);
expect(testedIndex).toBeGreaterThanOrEqual(0);
expect(limitedIndex).toBeGreaterThan(recommendedIndex);
expect(testedIndex).toBeGreaterThan(limitedIndex);
expect(unavailableIndex).toBeGreaterThan(testedIndex);
expect(testedIndex).toBeGreaterThan(recommendedIndex);
expect(unavailableIndex).toBeGreaterThan(limitedIndex);
expect(notRecommendedIndex).toBeGreaterThan(unavailableIndex);
expect(host.textContent).toContain('Recommended only');
@ -325,6 +325,223 @@ describe('TeamModelSelector disabled Codex models', () => {
});
});
it('shows short-lived OpenCode preflight failures as unavailable model tiles', async () => {
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
storeState.cliStatus = {
flavor: 'agent_teams_orchestrator',
providers: [
{
providerId: 'opencode',
authMethod: 'opencode_managed',
backend: {
kind: 'opencode-cli',
label: 'OpenCode CLI',
endpointLabel: 'opencode',
},
authenticated: true,
supported: true,
capabilities: {
teamLaunch: true,
},
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
modelVerificationState: 'idle',
modelAvailability: [],
},
],
};
const host = document.createElement('div');
document.body.appendChild(host);
const root = createRoot(host);
const onValueChange = vi.fn();
await act(async () => {
root.render(
React.createElement(TeamModelSelector, {
providerId: 'opencode',
onProviderChange: () => undefined,
value: '',
onValueChange,
modelUnavailableReasonByValue: {
'openai/gpt-5.4': 'OpenCode provider authentication failed',
},
})
);
await Promise.resolve();
});
const unavailableButton = Array.from(host.querySelectorAll('button')).find((button) =>
button.textContent?.includes('GPT-5.4')
);
expect(unavailableButton).not.toBeNull();
expect(unavailableButton?.getAttribute('aria-disabled')).toBe('true');
expect(unavailableButton?.textContent).toContain('Unavailable');
expect(unavailableButton?.getAttribute('title')).toContain(
'OpenCode provider authentication failed'
);
await act(async () => {
unavailableButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
await Promise.resolve();
});
expect(onValueChange).not.toHaveBeenCalled();
await act(async () => {
root.unmount();
await Promise.resolve();
});
});
it('shows short-lived OpenCode preflight notes as selectable issue tiles', async () => {
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
storeState.cliStatus = {
flavor: 'agent_teams_orchestrator',
providers: [
{
providerId: 'opencode',
authMethod: 'opencode_managed',
backend: {
kind: 'opencode-cli',
label: 'OpenCode CLI',
endpointLabel: 'opencode',
},
authenticated: true,
supported: true,
capabilities: {
teamLaunch: true,
},
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
modelVerificationState: 'idle',
modelAvailability: [],
},
],
};
const host = document.createElement('div');
document.body.appendChild(host);
const root = createRoot(host);
const onValueChange = vi.fn();
await act(async () => {
root.render(
React.createElement(TeamModelSelector, {
providerId: 'opencode',
onProviderChange: () => undefined,
value: '',
onValueChange,
modelIssueReasonByValue: {
'openai/gpt-5.4': 'Model verification timed out',
},
})
);
await Promise.resolve();
});
const issueButton = Array.from(host.querySelectorAll('button')).find((button) =>
button.textContent?.includes('GPT-5.4')
);
expect(issueButton).not.toBeNull();
expect(issueButton?.getAttribute('aria-disabled')).toBe('false');
expect(issueButton?.textContent).toContain('Issue');
expect(issueButton?.getAttribute('title')).toContain('Model verification timed out');
await act(async () => {
issueButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
await Promise.resolve();
});
expect(onValueChange).toHaveBeenCalledWith('openai/gpt-5.4');
await act(async () => {
root.unmount();
await Promise.resolve();
});
});
it('dynamically disables OpenCode openai routes when OpenAI auth is invalid', async () => {
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
storeState.cliStatus = {
flavor: 'agent_teams_orchestrator',
providers: [
{
providerId: 'opencode',
authMethod: 'opencode_managed',
backend: {
kind: 'opencode-cli',
label: 'OpenCode CLI',
endpointLabel: 'opencode',
},
authenticated: true,
supported: true,
capabilities: {
teamLaunch: true,
},
statusMessage: 'OpenAI token invalid',
detailMessage: 'OpenAI token refresh failed: 401',
models: ['openai/gpt-5.4', 'opencode/big-pickle'],
availableBackends: [
{
id: 'openai',
label: 'OpenAI',
description: 'OpenAI route',
selectable: false,
recommended: false,
available: false,
state: 'authentication-required',
statusMessage: 'Authentication required',
detailMessage: 'Token refresh failed: 401',
},
],
modelVerificationState: 'idle',
modelAvailability: [],
},
],
};
const host = document.createElement('div');
document.body.appendChild(host);
const root = createRoot(host);
const onValueChange = vi.fn();
await act(async () => {
root.render(
React.createElement(TeamModelSelector, {
providerId: 'opencode',
onProviderChange: () => undefined,
value: '',
onValueChange,
})
);
await Promise.resolve();
});
const openAiButton = Array.from(host.querySelectorAll('button')).find((button) =>
button.textContent?.includes('GPT-5.4')
);
const bigPickleButton = Array.from(host.querySelectorAll('button')).find((button) =>
button.textContent?.includes('big-pickle')
);
expect(openAiButton).not.toBeNull();
expect(openAiButton?.getAttribute('aria-disabled')).toBe('true');
expect(openAiButton?.textContent).toContain('Unavailable');
expect(bigPickleButton).not.toBeNull();
expect(bigPickleButton?.getAttribute('aria-disabled')).toBe('false');
await act(async () => {
openAiButton?.dispatchEvent(new MouseEvent('click', { bubbles: true }));
await Promise.resolve();
});
expect(onValueChange).not.toHaveBeenCalled();
await act(async () => {
root.unmount();
await Promise.resolve();
});
});
it('constrains long runtime model lists so the selector scrolls', async () => {
vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true);
storeState.cliStatus = {
@ -842,13 +1059,15 @@ describe('TeamModelSelector disabled Codex models', () => {
const modelButtons = Array.from(host.querySelectorAll('button')).map(
(button) => button.textContent?.trim() ?? ''
);
const hasModelButtonStartingWith = (label: string): boolean =>
modelButtons.some((text) => text.startsWith(label));
expect(modelButtons.some((text) => text.startsWith('Default'))).toBe(true);
expect(modelButtons).toContain('Opus 4.8');
expect(modelButtons).toContain('Opus 4.6');
expect(modelButtons).toContain('Sonnet 4.7');
expect(modelButtons).toContain('Haiku 4.6');
expect(modelButtons).not.toContain('Opus 4.8 (1M)');
expect(hasModelButtonStartingWith('Default')).toBe(true);
expect(hasModelButtonStartingWith('Opus 4.8')).toBe(true);
expect(hasModelButtonStartingWith('Opus 4.6')).toBe(true);
expect(hasModelButtonStartingWith('Sonnet 4.7')).toBe(true);
expect(hasModelButtonStartingWith('Haiku 4.6')).toBe(true);
expect(hasModelButtonStartingWith('Opus 4.8 (1M)')).toBe(false);
await act(async () => {
root.unmount();

View file

@ -2,6 +2,7 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
import {
__resetShortLivedProviderPrepareCacheForTests,
getShortLivedProviderPrepareModelIssueReasons,
getShortLivedProviderPrepareModelResults,
storeShortLivedProviderPrepareModelResults,
} from '@renderer/components/team/dialogs/providerPrepareShortLivedCache';
@ -42,6 +43,17 @@ describe('providerPrepareShortLivedCache', () => {
warningLine: null,
},
});
expect(
getShortLivedProviderPrepareModelIssueReasons({
providerId: 'opencode',
cacheKey: 'key-1',
})
).toEqual({
modelIssueReasonByValue: {
'opencode/nemotron-3-super-free': 'timed out',
},
modelUnavailableReasonByValue: {},
});
});
it('expires cached OpenCode results after the short-lived TTL', () => {
@ -68,6 +80,100 @@ describe('providerPrepareShortLivedCache', () => {
).toEqual({});
});
it('stores short-lived OpenCode failed model results as blocking unavailable issues', () => {
storeShortLivedProviderPrepareModelResults({
providerId: 'opencode',
cacheKey: 'key-4',
modelResultsById: {
'openai/gpt-5.4': {
status: 'failed',
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
warningLine: null,
},
},
});
expect(
getShortLivedProviderPrepareModelResults({
providerId: 'opencode',
cacheKey: 'key-4',
})
).toEqual({});
expect(
getShortLivedProviderPrepareModelIssueReasons({
providerId: 'opencode',
cacheKey: 'key-4',
})
).toEqual({
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {
'openai/gpt-5.4': 'OpenCode provider authentication failed',
},
});
});
it('clears a short-lived issue when a later result verifies the same model', () => {
storeShortLivedProviderPrepareModelResults({
providerId: 'opencode',
cacheKey: 'key-5',
modelResultsById: {
'openai/gpt-5.4': {
status: 'failed',
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
warningLine: null,
},
},
});
storeShortLivedProviderPrepareModelResults({
providerId: 'opencode',
cacheKey: 'key-5',
modelResultsById: {
'openai/gpt-5.4': {
status: 'ready',
line: 'GPT-5.4 - verified',
warningLine: null,
},
},
});
expect(
getShortLivedProviderPrepareModelIssueReasons({
providerId: 'opencode',
cacheKey: 'key-5',
})
).toEqual({
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {},
});
});
it('expires short-lived OpenCode issues after the issue TTL', () => {
vi.useFakeTimers();
storeShortLivedProviderPrepareModelResults({
providerId: 'opencode',
cacheKey: 'key-6',
modelResultsById: {
'openai/gpt-5.4': {
status: 'failed',
line: 'GPT-5.4 - unavailable - OpenCode provider authentication failed',
warningLine: null,
},
},
});
vi.advanceTimersByTime(90_001);
expect(
getShortLivedProviderPrepareModelIssueReasons({
providerId: 'opencode',
cacheKey: 'key-6',
})
).toEqual({
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {},
});
});
it('does not store short-lived cache for non-OpenCode providers', () => {
storeShortLivedProviderPrepareModelResults({
providerId: 'codex',
@ -87,5 +193,14 @@ describe('providerPrepareShortLivedCache', () => {
cacheKey: 'key-3',
})
).toEqual({});
expect(
getShortLivedProviderPrepareModelIssueReasons({
providerId: 'codex',
cacheKey: 'key-3',
})
).toEqual({
modelIssueReasonByValue: {},
modelUnavailableReasonByValue: {},
});
});
});

View file

@ -643,7 +643,7 @@ describe('RuntimeProviderManagementPanelView', () => {
expect(host.textContent).toContain('Not recommended');
expect(host.textContent).toContain('Unavailable in OpenCode');
expect(host.textContent).toContain('Tested');
expect(host.textContent).toContain('Recommended with limits');
expect(host.textContent).toContain('Tested with limits');
expect(host.textContent).toContain('Recommended only');
expect(host.textContent).not.toContain('Set OpenCode default');
expect(
@ -687,8 +687,11 @@ describe('RuntimeProviderManagementPanelView', () => {
expect((host.textContent ?? '').indexOf('opencode/big-pickle')).toBeLessThan(
(host.textContent ?? '').indexOf('minimax-m2.5-free')
);
expect((host.textContent ?? '').indexOf('mistralai/codestral-2508')).toBeLessThan(
(host.textContent ?? '').indexOf('minimax-m2.5-free')
);
expect((host.textContent ?? '').indexOf('minimax-m2.5-free')).toBeLessThan(
(host.textContent ?? '').indexOf('mistralai/codestral-2508')
(host.textContent ?? '').indexOf('qwen/qwen3-coder-plus')
);
expect((host.textContent ?? '').indexOf('qwen/qwen3-coder-plus')).toBeLessThan(
(host.textContent ?? '').indexOf('openrouter/openai/gpt-oss-20b:free')

View file

@ -26,10 +26,12 @@ describe('getOpenCodeTeamModelRecommendation', () => {
});
it('marks models that passed real OpenCode Agent Teams smoke E2E as tested', () => {
expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject({
level: 'tested',
label: 'Tested',
});
expect(getOpenCodeTeamModelRecommendation('openrouter/mistralai/codestral-2508')).toMatchObject(
{
level: 'tested',
label: 'Tested',
}
);
expect(
getOpenCodeTeamModelRecommendation(' OPENROUTER/GOOGLE/GEMINI-3-FLASH-PREVIEW ')
).toMatchObject({
@ -101,10 +103,11 @@ describe('getOpenCodeTeamModelRecommendation', () => {
it('keeps similarly named models distinct when real E2E disagreed', () => {
expect(getOpenCodeTeamModelRecommendation('opencode/minimax-m2.5-free')).toMatchObject({
level: 'recommended-with-limits',
label: 'Recommended with limits',
level: 'tested-with-limits',
label: 'Tested with limits',
reason: expect.stringContaining('duplicate or missing reply tokens'),
});
expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(true);
expect(isOpenCodeTeamModelRecommended('opencode/minimax-m2.5-free')).toBe(false);
expect(
getOpenCodeTeamModelRecommendation('openrouter/minimax/minimax-m2.5:free')
).toMatchObject({
@ -815,9 +818,9 @@ describe('getOpenCodeTeamModelRecommendation', () => {
[...models].sort((left, right) => compareOpenCodeTeamModelRecommendations(left, right))
).toEqual([
'opencode/big-pickle',
'opencode/minimax-m2.5-free',
'openrouter/mistralai/codestral-2508',
'openrouter/anthropic/claude-sonnet-4.6',
'opencode/minimax-m2.5-free',
'openrouter/qwen/qwen3-coder-plus',
'openrouter/openai/gpt-oss-20b:free',
]);

View file

@ -126,6 +126,34 @@ describe('teamModelAvailability', () => {
]);
});
it('treats runtime-reported unavailable models as non-selectable', () => {
const providerStatus = createCodexProviderStatus(['gpt-5.4'], {
modelAvailability: [
{
modelId: 'gpt-5.4',
status: 'unavailable',
reason: 'No access for this account',
checkedAt: null,
},
],
});
expect(getAvailableTeamProviderModels('codex', providerStatus)).toEqual([]);
expect(normalizeTeamModelForUi('codex', 'gpt-5.4', providerStatus)).toBe('');
expect(getTeamModelSelectionError('codex', 'gpt-5.4', providerStatus)).toContain(
'No access for this account'
);
expect(getAvailableTeamProviderModelOptions('codex', providerStatus)).toEqual([
{ value: '', label: 'Default', badgeLabel: 'Default' },
{
value: 'gpt-5.4',
label: '5.4',
availabilityStatus: 'unavailable',
availabilityReason: 'No access for this account',
},
]);
});
it('keeps OpenCode raw ids intact while exposing readable labels and source badges', () => {
const providerStatus = createOpenCodeProviderStatus([
'openai/gpt-5.4',
@ -168,6 +196,33 @@ describe('teamModelAvailability', () => {
).toBe('openrouter/moonshotai/kimi-k2');
});
it('reports OpenCode openai routes unavailable when OpenAI auth is invalid', () => {
const providerStatus = createOpenCodeProviderStatus(['openai/gpt-5.4', 'opencode/big-pickle'], {
statusMessage: 'OpenAI token invalid',
detailMessage: 'OpenAI token refresh failed: 401',
availableBackends: [
{
id: 'openai',
label: 'OpenAI',
description: 'OpenAI route',
selectable: false,
recommended: false,
available: false,
state: 'authentication-required',
statusMessage: 'Authentication required',
detailMessage: 'Token refresh failed: 401',
},
],
});
expect(getTeamModelSelectionError('opencode', 'openai/gpt-5.4', providerStatus)).toContain(
'OpenCode OpenAI provider authentication failed'
);
expect(
getTeamModelSelectionError('opencode', 'opencode/big-pickle', providerStatus)
).toBeNull();
});
it('clears stale Codex selections when runtime no longer reports that model', () => {
const providerStatus = createCodexProviderStatus(['gpt-5.4', 'gpt-5.3-codex']);
@ -304,9 +359,7 @@ describe('teamModelAvailability', () => {
it('keeps known Anthropic full model ids selectable without runtime verification', () => {
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7')).toBe('claude-opus-4-7');
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe(
'claude-opus-4-7[1m]'
);
expect(normalizeTeamModelForUi('anthropic', 'claude-opus-4-7[1m]')).toBe('claude-opus-4-7[1m]');
expect(normalizeTeamModelForUi('anthropic', 'claude-haiku-4-5-20251001')).toBe(
'claude-haiku-4-5-20251001'
);

View file

@ -0,0 +1,64 @@
import { describe, expect, it } from 'vitest';
import {
getTeamModelRecommendation,
isTeamModelRecommended,
} from '@renderer/utils/teamModelRecommendations';
describe('getTeamModelRecommendation', () => {
it('marks only the selected Codex Agent Teams models as recommended', () => {
for (const modelId of ['gpt-5.4-mini', 'gpt-5.3-codex', 'gpt-5.5']) {
expect(getTeamModelRecommendation('codex', modelId)).toMatchObject({
level: 'recommended',
label: 'Recommended',
});
expect(isTeamModelRecommended('codex', modelId)).toBe(true);
}
for (const modelId of ['gpt-5.4', 'gpt-5.2', 'gpt-5.3-codex-spark']) {
expect(getTeamModelRecommendation('codex', modelId)).toBeNull();
expect(isTeamModelRecommended('codex', modelId)).toBe(false);
}
});
it('marks supported Claude aliases and full ids as recommended but leaves default unbadged', () => {
for (const modelId of [
'haiku',
'sonnet',
'sonnet[1m]',
'opus',
'opus[1m]',
'claude-haiku-4-5',
'claude-haiku-4-5-20251001',
'claude-sonnet-4-6',
'claude-sonnet-4-6[1m]',
'claude-opus-4-7',
'claude-opus-4-7[1m]',
'claude-opus-4-6',
'claude-opus-4-6[1m]',
]) {
expect(getTeamModelRecommendation('anthropic', modelId)).toMatchObject({
level: 'recommended',
label: 'Recommended',
});
expect(isTeamModelRecommended('anthropic', modelId)).toBe(true);
}
expect(getTeamModelRecommendation('anthropic', '')).toBeNull();
expect(getTeamModelRecommendation('anthropic', 'default')).toBeNull();
});
it('delegates OpenCode verdicts and keeps MiniMax below recommended', () => {
expect(getTeamModelRecommendation('opencode', 'opencode/big-pickle')).toMatchObject({
level: 'recommended',
label: 'Recommended',
});
expect(isTeamModelRecommended('opencode', 'opencode/big-pickle')).toBe(true);
expect(getTeamModelRecommendation('opencode', 'opencode/minimax-m2.5-free')).toMatchObject({
level: 'tested-with-limits',
label: 'Tested with limits',
});
expect(isTeamModelRecommended('opencode', 'opencode/minimax-m2.5-free')).toBe(false);
});
});