feat: enhance team control API with retry logic and fallback mechanisms

- Introduced a new method to resolve multiple control base URLs, allowing for better handling of API requests.
- Implemented retryable error handling for control API requests, improving robustness against transient failures.
- Updated provisioning and runtime state retrieval functions to utilize the new fallback logic.
- Enhanced tests to validate the new behavior, ensuring proper functionality under various scenarios.
- Added utility functions for managing retryable errors and control API state, improving code clarity and maintainability.
This commit is contained in:
iliya 2026-03-12 19:23:48 +02:00
parent 4a0b1aa698
commit 81ac59e46b
10 changed files with 428 additions and 48 deletions

View file

@ -7,6 +7,7 @@ const MIN_WAIT_TIMEOUT_MS = 1000;
const MAX_WAIT_TIMEOUT_MS = 10 * 60 * 1000;
const POLL_INTERVAL_MS = 1000;
const TEAM_CONTROL_API_STATE_FILE = 'team-control-api.json';
const RETRYABLE_CONTROL_ERROR = 'retryableControlError';
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
@ -41,21 +42,46 @@ function readControlApiState(context) {
}
}
function resolveControlBaseUrl(context, flags = {}) {
function uniqueNonEmpty(items) {
return [...new Set(items.filter((item) => typeof item === 'string' && item.trim()))];
}
function resolveControlBaseUrls(context, flags = {}) {
const explicit =
(typeof flags.controlUrl === 'string' && flags.controlUrl.trim()) ||
(typeof flags['control-url'] === 'string' && flags['control-url'].trim()) ||
'';
const stateFileUrl = readControlApiState(context) || '';
const envUrl =
(typeof process.env.CLAUDE_TEAM_CONTROL_URL === 'string' &&
process.env.CLAUDE_TEAM_CONTROL_URL.trim()) ||
readControlApiState(context);
'';
const candidates = uniqueNonEmpty([explicit, stateFileUrl, envUrl]);
if (!explicit) {
if (candidates.length === 0) {
throw new Error(
'Team control API is unavailable. Start the desktop app team runtime first so it can publish CLAUDE_TEAM_CONTROL_URL.'
);
}
return explicit;
return candidates;
}
function makeRetryableControlError(message, cause) {
const error = new Error(message);
error[RETRYABLE_CONTROL_ERROR] = true;
if (cause) {
error.cause = cause;
}
return error;
}
function isRetryableControlError(error) {
return Boolean(error && error[RETRYABLE_CONTROL_ERROR] === true);
}
function isRetryableStatusCode(statusCode) {
return statusCode === 404 || statusCode === 408 || statusCode === 429 || statusCode >= 500;
}
async function requestJson(baseUrl, pathname, options = {}) {
@ -86,13 +112,28 @@ async function requestJson(baseUrl, pathname, options = {}) {
payload && typeof payload.error === 'string' && payload.error.trim()
? payload.error.trim()
: `${response.status} ${response.statusText}`.trim();
if (isRetryableStatusCode(response.status)) {
throw makeRetryableControlError(
`Team control API ${response.status} at ${baseUrl}${pathname}: ${detail || 'request failed'}`
);
}
throw new Error(detail || 'Team control API request failed');
}
if (payload == null) {
throw makeRetryableControlError(`Team control API returned empty or non-JSON response at ${baseUrl}${pathname}`);
}
return payload;
} catch (error) {
if (error && error.name === 'AbortError') {
throw new Error(`Timed out calling team control API: ${pathname}`);
throw makeRetryableControlError(`Timed out calling team control API: ${pathname}`, error);
}
if (error && error.name === 'TypeError') {
throw makeRetryableControlError(
`Failed to reach team control API at ${baseUrl}: ${error.message || 'fetch failed'}`,
error
);
}
throw error;
} finally {
@ -100,6 +141,24 @@ async function requestJson(baseUrl, pathname, options = {}) {
}
}
async function requestJsonWithFallback(baseUrls, pathname, options = {}) {
let lastError = null;
for (let index = 0; index < baseUrls.length; index += 1) {
const baseUrl = baseUrls[index];
try {
return await requestJson(baseUrl, pathname, options);
} catch (error) {
lastError = error;
if (!isRetryableControlError(error) || index === baseUrls.length - 1) {
throw error;
}
}
}
throw lastError || new Error('Team control API request failed');
}
function buildLaunchRequest(flags = {}) {
const cwd = typeof flags.cwd === 'string' ? flags.cwd.trim() : '';
if (!cwd) {
@ -159,14 +218,18 @@ function shouldWaitForStop(flags = {}) {
return true;
}
async function waitForProvisioningState(baseUrl, teamName, runId, timeoutMs) {
async function waitForProvisioningState(baseUrls, teamName, runId, timeoutMs) {
const startedAt = Date.now();
let lastProgress = null;
while (Date.now() - startedAt <= timeoutMs) {
const progress = await requestJson(baseUrl, `/api/teams/provisioning/${encodeURIComponent(runId)}`, {
timeoutMs: Math.min(timeoutMs, 10000),
});
const progress = await requestJsonWithFallback(
baseUrls,
`/api/teams/provisioning/${encodeURIComponent(runId)}`,
{
timeoutMs: Math.min(timeoutMs, 10000),
}
);
lastProgress = progress;
if (progress && READY_STATES.has(progress.state)) {
@ -194,12 +257,12 @@ async function waitForProvisioningState(baseUrl, teamName, runId, timeoutMs) {
throw new Error(`Timed out waiting for team ${teamName} to become ready${stateLabel}`);
}
async function waitForStopped(baseUrl, teamName, timeoutMs) {
async function waitForStopped(baseUrls, teamName, timeoutMs) {
const startedAt = Date.now();
while (Date.now() - startedAt <= timeoutMs) {
const runtime = await requestJson(
baseUrl,
const runtime = await requestJsonWithFallback(
baseUrls,
`/api/teams/${encodeURIComponent(teamName)}/runtime`,
{ timeoutMs: Math.min(timeoutMs, 10000) }
);
@ -215,12 +278,16 @@ async function waitForStopped(baseUrl, teamName, timeoutMs) {
}
async function launchTeam(context, flags = {}) {
const baseUrl = resolveControlBaseUrl(context, flags);
const baseUrls = resolveControlBaseUrls(context, flags);
const request = buildLaunchRequest(flags);
const launch = await requestJson(baseUrl, `/api/teams/${encodeURIComponent(context.teamName)}/launch`, {
method: 'POST',
body: request,
});
const launch = await requestJsonWithFallback(
baseUrls,
`/api/teams/${encodeURIComponent(context.teamName)}/launch`,
{
method: 'POST',
body: request,
}
);
if (!shouldWaitForReady(flags)) {
return {
@ -231,7 +298,7 @@ async function launchTeam(context, flags = {}) {
}
return waitForProvisioningState(
baseUrl,
baseUrls,
context.teamName,
launch.runId,
normalizeTimeoutMs(flags.waitTimeoutMs || flags['wait-timeout-ms'])
@ -239,25 +306,29 @@ async function launchTeam(context, flags = {}) {
}
async function stopTeam(context, flags = {}) {
const baseUrl = resolveControlBaseUrl(context, flags);
const stopped = await requestJson(baseUrl, `/api/teams/${encodeURIComponent(context.teamName)}/stop`, {
method: 'POST',
});
const baseUrls = resolveControlBaseUrls(context, flags);
const stopped = await requestJsonWithFallback(
baseUrls,
`/api/teams/${encodeURIComponent(context.teamName)}/stop`,
{
method: 'POST',
}
);
if (!shouldWaitForStop(flags)) {
return stopped;
}
return waitForStopped(
baseUrl,
baseUrls,
context.teamName,
normalizeTimeoutMs(flags.waitTimeoutMs || flags['wait-timeout-ms'])
);
}
async function getRuntimeState(context, flags = {}) {
const baseUrl = resolveControlBaseUrl(context, flags);
return requestJson(baseUrl, `/api/teams/${encodeURIComponent(context.teamName)}/runtime`);
const baseUrls = resolveControlBaseUrls(context, flags);
return requestJsonWithFallback(baseUrls, `/api/teams/${encodeURIComponent(context.teamName)}/runtime`);
}
module.exports = {

View file

@ -58,6 +58,13 @@ describe('agent-teams-controller API', () => {
};
}
function writeControlApiState(claudeDir, baseUrl) {
fs.writeFileSync(
path.join(claudeDir, 'team-control-api.json'),
JSON.stringify({ baseUrl, updatedAt: new Date().toISOString() }, null, 2)
);
}
it('creates tasks and exposes grouped controller modules', () => {
const claudeDir = makeClaudeDir();
const controller = createController({ teamName: 'my-team', claudeDir });
@ -680,4 +687,140 @@ describe('agent-teams-controller API', () => {
await server.close();
}
});
it('prefers the published control endpoint over a stale env URL', async () => {
const claudeDir = makeClaudeDir();
const controller = createController({ teamName: 'my-team', claudeDir });
const previousUrl = process.env.CLAUDE_TEAM_CONTROL_URL;
const server = await startControlServer(async ({ method, url }) => {
if (method === 'POST' && url === '/api/teams/my-team/launch') {
return { body: { runId: 'run-fresh' } };
}
if (method === 'GET' && url === '/api/teams/provisioning/run-fresh') {
return {
body: {
runId: 'run-fresh',
teamName: 'my-team',
state: 'ready',
message: 'Ready',
startedAt: '2026-03-12T00:00:00.000Z',
updatedAt: '2026-03-12T00:00:01.000Z',
},
};
}
return { statusCode: 404, body: { error: `Unhandled ${method} ${url}` } };
});
try {
process.env.CLAUDE_TEAM_CONTROL_URL = 'http://127.0.0.1:1';
writeControlApiState(claudeDir, server.baseUrl);
const launched = await controller.runtime.launchTeam({
cwd: '/tmp/project',
});
expect(launched.runId).toBe('run-fresh');
expect(launched.progress.state).toBe('ready');
} finally {
if (previousUrl === undefined) {
delete process.env.CLAUDE_TEAM_CONTROL_URL;
} else {
process.env.CLAUDE_TEAM_CONTROL_URL = previousUrl;
}
await server.close();
}
});
it('falls back to the env endpoint when the published control file is stale', async () => {
const claudeDir = makeClaudeDir();
const controller = createController({ teamName: 'my-team', claudeDir });
const previousUrl = process.env.CLAUDE_TEAM_CONTROL_URL;
const server = await startControlServer(async ({ method, url }) => {
if (method === 'POST' && url === '/api/teams/my-team/launch') {
return { body: { runId: 'run-env' } };
}
if (method === 'GET' && url === '/api/teams/provisioning/run-env') {
return {
body: {
runId: 'run-env',
teamName: 'my-team',
state: 'ready',
message: 'Ready',
startedAt: '2026-03-12T00:00:00.000Z',
updatedAt: '2026-03-12T00:00:01.000Z',
},
};
}
return { statusCode: 404, body: { error: `Unhandled ${method} ${url}` } };
});
try {
process.env.CLAUDE_TEAM_CONTROL_URL = server.baseUrl;
writeControlApiState(claudeDir, 'http://127.0.0.1:1');
const launched = await controller.runtime.launchTeam({
cwd: '/tmp/project',
});
expect(launched.runId).toBe('run-env');
expect(launched.progress.state).toBe('ready');
} finally {
if (previousUrl === undefined) {
delete process.env.CLAUDE_TEAM_CONTROL_URL;
} else {
process.env.CLAUDE_TEAM_CONTROL_URL = previousUrl;
}
await server.close();
}
});
it('falls back to the next control endpoint when the first one responds with 404', async () => {
const claudeDir = makeClaudeDir();
const controller = createController({ teamName: 'my-team', claudeDir });
const previousUrl = process.env.CLAUDE_TEAM_CONTROL_URL;
const staleServer = await startControlServer(async () => {
return { statusCode: 404, body: { error: 'Not found' } };
});
const liveServer = await startControlServer(async ({ method, url }) => {
if (method === 'POST' && url === '/api/teams/my-team/launch') {
return { body: { runId: 'run-live' } };
}
if (method === 'GET' && url === '/api/teams/provisioning/run-live') {
return {
body: {
runId: 'run-live',
teamName: 'my-team',
state: 'ready',
message: 'Ready',
startedAt: '2026-03-12T00:00:00.000Z',
updatedAt: '2026-03-12T00:00:01.000Z',
},
};
}
return { statusCode: 404, body: { error: `Unhandled ${method} ${url}` } };
});
try {
writeControlApiState(claudeDir, staleServer.baseUrl);
process.env.CLAUDE_TEAM_CONTROL_URL = liveServer.baseUrl;
const launched = await controller.runtime.launchTeam({
cwd: '/tmp/project',
});
expect(launched.runId).toBe('run-live');
expect(launched.progress.state).toBe('ready');
} finally {
if (previousUrl === undefined) {
delete process.env.CLAUDE_TEAM_CONTROL_URL;
} else {
process.env.CLAUDE_TEAM_CONTROL_URL = previousUrl;
}
await staleServer.close();
await liveServer.close();
}
});
});

View file

@ -222,6 +222,50 @@ describe('agent-teams-mcp tools', () => {
}
});
it('discovers the control endpoint from the published state file', async () => {
const claudeDir = makeClaudeDir();
const statePath = path.join(claudeDir, 'team-control-api.json');
const server = await startControlServer(async ({ method, url }) => {
if (method === 'POST' && url === '/api/teams/alpha/launch') {
return { body: { runId: 'run-state-file' } };
}
if (method === 'GET' && url === '/api/teams/provisioning/run-state-file') {
return {
body: {
runId: 'run-state-file',
teamName: 'alpha',
state: 'ready',
message: 'Ready',
startedAt: '2026-03-12T00:00:00.000Z',
updatedAt: '2026-03-12T00:00:02.000Z',
},
};
}
return { statusCode: 404, body: { error: `Unhandled ${method} ${url}` } };
});
try {
fs.writeFileSync(
statePath,
JSON.stringify({ baseUrl: server.baseUrl, updatedAt: new Date().toISOString() }, null, 2)
);
const launched = parseJsonToolResult(
await getTool('team_launch').execute({
teamName: 'alpha',
claudeDir,
cwd: '/tmp/project',
})
);
expect(launched.runId).toBe('run-state-file');
expect(launched.progress.state).toBe('ready');
} finally {
await server.close();
}
});
it('covers task lifecycle, attachments, relationships, kanban, and review flows', async () => {
const claudeDir = makeClaudeDir();
const teamName = 'alpha';

View file

@ -54,7 +54,9 @@ export function registerHttpRoutes(
registerSessionRoutes(app, services);
registerSearchRoutes(app, services);
registerSubagentRoutes(app, services);
registerTeamRoutes(app, services);
if (services.teamProvisioningService) {
registerTeamRoutes(app, services);
}
registerNotificationRoutes(app);
registerConfigRoutes(app);
registerValidationRoutes(app);

View file

@ -14,14 +14,29 @@ type LaunchBody = Omit<TeamLaunchRequest, 'teamName'>;
const EFFORT_LEVELS = new Set<EffortLevel>(['low', 'medium', 'high']);
class HttpBadRequestError extends Error {}
class HttpFeatureUnavailableError extends Error {}
function getTeamProvisioningService(services: HttpServices) {
if (!services.teamProvisioningService) {
throw new Error('Team runtime control is not available in this mode');
throw new HttpFeatureUnavailableError('Team runtime control is not available in this mode');
}
return services.teamProvisioningService;
}
function getStatusCode(error: unknown, fallback: number = 500): number {
if (error instanceof HttpBadRequestError) {
return 400;
}
if (error instanceof HttpFeatureUnavailableError) {
return 501;
}
return fallback;
}
function shouldLogError(error: unknown): boolean {
return !(error instanceof HttpBadRequestError) && !(error instanceof HttpFeatureUnavailableError);
}
function assertAbsoluteCwd(cwd: unknown): string {
if (typeof cwd !== 'string' || cwd.trim().length === 0) {
throw new HttpBadRequestError('cwd must be a non-empty string');
@ -126,8 +141,8 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
);
return reply.send(response);
} catch (error) {
const statusCode = error instanceof HttpBadRequestError ? 400 : 500;
if (!(error instanceof HttpBadRequestError)) {
const statusCode = getStatusCode(error);
if (shouldLogError(error)) {
logger.error(
`Error in POST /api/teams/${request.params.teamName}/launch:`,
getErrorMessage(error)
@ -151,11 +166,13 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
teamProvisioningService.stopTeam(validatedTeamName.value!);
return reply.send(teamProvisioningService.getRuntimeState(validatedTeamName.value!));
} catch (error) {
logger.error(
`Error in POST /api/teams/${request.params.teamName}/stop:`,
getErrorMessage(error)
);
return reply.status(500).send({ error: getErrorMessage(error) });
if (shouldLogError(error)) {
logger.error(
`Error in POST /api/teams/${request.params.teamName}/stop:`,
getErrorMessage(error)
);
}
return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) });
}
}
);
@ -173,11 +190,13 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
getTeamProvisioningService(services).getRuntimeState(validatedTeamName.value!)
);
} catch (error) {
logger.error(
`Error in GET /api/teams/${request.params.teamName}/runtime:`,
getErrorMessage(error)
);
return reply.status(500).send({ error: getErrorMessage(error) });
if (shouldLogError(error)) {
logger.error(
`Error in GET /api/teams/${request.params.teamName}/runtime:`,
getErrorMessage(error)
);
}
return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) });
}
}
);
@ -194,8 +213,10 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
return reply.send(await getTeamProvisioningService(services).getProvisioningStatus(runId));
} catch (error) {
const message = getErrorMessage(error);
const statusCode = message === 'Unknown runId' ? 404 : 500;
logger.error(`Error in GET /api/teams/provisioning/${request.params.runId}:`, message);
const statusCode = message === 'Unknown runId' ? 404 : getStatusCode(error);
if (shouldLogError(error) && statusCode !== 404) {
logger.error(`Error in GET /api/teams/provisioning/${request.params.runId}:`, message);
}
return reply.status(statusCode).send({ error: message });
}
}
@ -209,8 +230,10 @@ export function registerTeamRoutes(app: FastifyInstance, services: HttpServices)
.map((teamName) => teamProvisioningService.getRuntimeState(teamName));
return reply.send(runtimeStates);
} catch (error) {
logger.error('Error in GET /api/teams/runtime/alive:', getErrorMessage(error));
return reply.status(500).send({ error: getErrorMessage(error) });
if (shouldLogError(error)) {
logger.error('Error in GET /api/teams/runtime/alive:', getErrorMessage(error));
}
return reply.status(getStatusCode(error)).send({ error: getErrorMessage(error) });
}
});
}

View file

@ -812,6 +812,9 @@ function initializeServices(): void {
onClaudeRootPathUpdated: (_claudeRootPath: string | null) => {
reconfigureLocalContextForClaudeRoot();
void schedulerService?.reloadForClaudeRootChange();
if (httpServer?.isRunning()) {
void syncTeamControlApiState().catch(() => undefined);
}
},
},
{
@ -858,7 +861,7 @@ function initializeServices(): void {
// Start HTTP server if enabled in config
const appConfig = configManager.getConfig();
if (appConfig.httpServer?.enabled) {
void startHttpServer(handleModeSwitch);
void startHttpServer(handleModeSwitch).catch(() => undefined);
}
logger.info('Services initialized successfully');
@ -897,6 +900,7 @@ async function startHttpServer(
} catch (error) {
await clearTeamControlApiState().catch(() => undefined);
logger.error('Failed to start HTTP server:', error);
throw error;
}
}

View file

@ -63,9 +63,6 @@ async function handleStart(): Promise<{
error?: string;
}> {
try {
if (httpServer.isRunning()) {
return { success: true, data: { running: true, port: httpServer.getPort() } };
}
await startServer();
configManager.updateConfig('httpServer', { enabled: true, port: httpServer.getPort() });
return { success: true, data: { running: true, port: httpServer.getPort() } };

View file

@ -9,6 +9,7 @@ import { Plus } from 'lucide-react';
import { MembersJsonEditor } from '../dialogs/MembersJsonEditor';
import { MemberDraftRow } from './MemberDraftRow';
import { getNextSuggestedMemberName } from './memberNameSets';
import { createMemberDraft, getWorkflowForExport } from './membersEditorUtils';
import type { MemberDraft } from './membersEditorTypes';
@ -145,7 +146,8 @@ export const MembersEditorSection = ({
};
const addMember = (): void => {
onChange([...members, createMemberDraft()]);
const suggestedName = getNextSuggestedMemberName(members.map((member) => member.name));
onChange([...members, createMemberDraft({ name: suggestedName })]);
};
const names = members.map((m) => m.name.trim().toLowerCase()).filter(Boolean);

View file

@ -0,0 +1,65 @@
const MEMBER_NAME_SETS = [
['atlas', 'nova', 'echo', 'vector', 'ember', 'pixel'],
['scout', 'forge', 'quill', 'signal', 'patch', 'guard'],
['aurora', 'cipher', 'relay', 'kernel', 'beacon', 'sable'],
['orbit', 'flux', 'delta', 'prism', 'comet', 'node'],
] as const;
function normalizeMemberName(name: string): string {
return name.trim().toLowerCase();
}
function belongsToBaseName(name: string, baseName: string): boolean {
const normalized = normalizeMemberName(name);
return normalized === baseName || normalized.startsWith(`${baseName}-`);
}
function getPreferredNameSet(existingNames: readonly string[]): readonly string[] {
for (const nameSet of MEMBER_NAME_SETS) {
if (
nameSet.some((candidate) => existingNames.some((name) => belongsToBaseName(name, candidate)))
) {
return nameSet;
}
}
return MEMBER_NAME_SETS[0];
}
function createUniqueName(baseName: string, existingNames: readonly string[]): string {
const normalizedExisting = new Set(existingNames.map(normalizeMemberName));
if (!normalizedExisting.has(baseName)) {
return baseName;
}
let suffix = 2;
while (normalizedExisting.has(`${baseName}-${suffix}`)) {
suffix += 1;
}
return `${baseName}-${suffix}`;
}
export function getNextSuggestedMemberName(existingNames: readonly string[]): string {
const normalizedExisting = new Set(existingNames.map(normalizeMemberName).filter(Boolean));
const preferredSet = getPreferredNameSet(existingNames);
for (const candidate of preferredSet) {
if (!normalizedExisting.has(candidate)) {
return candidate;
}
}
for (const nameSet of MEMBER_NAME_SETS) {
for (const candidate of nameSet) {
if (!normalizedExisting.has(candidate)) {
return candidate;
}
}
}
const fallbackBaseName = preferredSet[existingNames.length % preferredSet.length] ?? 'agent';
return createUniqueName(fallbackBaseName, existingNames);
}
export { MEMBER_NAME_SETS };

View file

@ -205,4 +205,33 @@ describe('HTTP team runtime routes', () => {
await app.close();
}
});
it('returns 501 when team runtime routes are registered without a runtime service', async () => {
const app = Fastify();
registerTeamRoutes(
app,
{
projectScanner: {} as HttpServices['projectScanner'],
sessionParser: {} as HttpServices['sessionParser'],
subagentResolver: {} as HttpServices['subagentResolver'],
chunkBuilder: {} as HttpServices['chunkBuilder'],
dataCache: {} as HttpServices['dataCache'],
updaterService: {} as HttpServices['updaterService'],
sshConnectionManager: {} as HttpServices['sshConnectionManager'],
} satisfies HttpServices
);
await app.ready();
try {
const response = await app.inject({
method: 'GET',
url: '/api/teams/runtime/alive',
});
expect(response.statusCode).toBe(501);
expect(response.json()).toEqual({ error: 'Team runtime control is not available in this mode' });
} finally {
await app.close();
}
});
});