fix(team): include runtime logs in launch failure artifacts
Refs: 777genius/agent-teams-ai#140
This commit is contained in:
parent
9518ce920a
commit
b9cbdde502
2 changed files with 133 additions and 3 deletions
|
|
@ -26,6 +26,9 @@ const MAX_CLI_LOG_CHARS = 256_000;
|
|||
const MAX_TRACE_CHARS = 128_000;
|
||||
const MAX_COPIED_FILE_BYTES = 256 * 1024;
|
||||
const MAX_DIAGNOSTICS_COPY_FILE_BYTES = 128 * 1024;
|
||||
const RUNTIME_ARTIFACT_FILE_PATTERN = /^[^/\\]+(?:\.runtime\.jsonl|\.stdout\.log|\.stderr\.log)$/;
|
||||
const RUNTIME_ARTIFACT_LABEL_PATTERN =
|
||||
/^runtime\/[^/\\]+(?:\.runtime\.jsonl|\.stdout\.log|\.stderr\.log)$/;
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
|
|
@ -63,6 +66,7 @@ export type LaunchFailureArtifactClassificationCode =
|
|||
| 'stdin_missing'
|
||||
| 'provider_quota'
|
||||
| 'provider_auth'
|
||||
| 'process_readiness_timeout'
|
||||
| 'model_no_bootstrap'
|
||||
| 'process_exited'
|
||||
| 'opencode_protocol'
|
||||
|
|
@ -289,6 +293,12 @@ export function classifyLaunchFailureArtifact(
|
|||
pattern:
|
||||
/401 unauthorized|not_logged_in|login required|auth(?:entication)? failed|api key.*(?:missing|invalid)|token refresh failed/i,
|
||||
},
|
||||
{
|
||||
code: 'process_readiness_timeout',
|
||||
confidence: 0.9,
|
||||
pattern:
|
||||
/did not become (?:runtime_ready|inbox_poller_ready)|timed out waiting for (?:runtime_ready|inbox_poller_ready)/i,
|
||||
},
|
||||
{
|
||||
code: 'opencode_protocol',
|
||||
confidence: 0.84,
|
||||
|
|
@ -431,6 +441,15 @@ function getRecord(value: unknown): JsonRecord | null {
|
|||
return value && typeof value === 'object' && !Array.isArray(value) ? (value as JsonRecord) : null;
|
||||
}
|
||||
|
||||
function getRuntimeArtifactLabels(manifestJson: JsonRecord | null): string[] {
|
||||
const artifactFiles = manifestJson?.artifactFiles;
|
||||
if (!Array.isArray(artifactFiles)) return [];
|
||||
return artifactFiles
|
||||
.filter((item): item is string => typeof item === 'string')
|
||||
.filter((item) => RUNTIME_ARTIFACT_LABEL_PATTERN.test(item))
|
||||
.sort();
|
||||
}
|
||||
|
||||
function resolveArtifactManifestPath(
|
||||
teamDir: string,
|
||||
latestJson: JsonRecord | null,
|
||||
|
|
@ -489,6 +508,18 @@ export async function readTeamLaunchFailureDiagnosticsBundle(
|
|||
});
|
||||
}
|
||||
|
||||
if (resolvedManifest.path) {
|
||||
const artifactDirectory = path.dirname(resolvedManifest.path);
|
||||
for (const artifactName of getRuntimeArtifactLabels(manifestJson)) {
|
||||
files.push(
|
||||
await readDiagnosticsCopyFile(
|
||||
`launch-failure-artifacts/${artifactName}`,
|
||||
path.join(artifactDirectory, artifactName)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
files.push(
|
||||
await readDiagnosticsCopyFile(
|
||||
'bootstrap-journal.jsonl',
|
||||
|
|
@ -559,10 +590,32 @@ export async function readTeamLaunchFailureDiagnosticsBundle(
|
|||
};
|
||||
}
|
||||
|
||||
function getKnownLaunchArtifactSourceFiles(teamName: string): CopiedArtifactFile[] {
|
||||
async function getRuntimeLaunchArtifactSourceFiles(teamDir: string): Promise<CopiedArtifactFile[]> {
|
||||
const runtimeDir = path.join(teamDir, 'runtime');
|
||||
try {
|
||||
const entries = await fs.promises.readdir(runtimeDir, { withFileTypes: true });
|
||||
return entries
|
||||
.filter((entry) => entry.isFile() && RUNTIME_ARTIFACT_FILE_PATTERN.test(entry.name))
|
||||
.map((entry) => ({
|
||||
sourcePath: path.join(runtimeDir, entry.name),
|
||||
artifactName: `runtime/${entry.name}`,
|
||||
}))
|
||||
.sort((left, right) => left.artifactName.localeCompare(right.artifactName));
|
||||
} catch (error) {
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
if (code === 'ENOENT') return [];
|
||||
logger.warn('[ArtifactPack] Failed to enumerate runtime artifacts', {
|
||||
teamDir,
|
||||
error: String(error),
|
||||
});
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getKnownLaunchArtifactSourceFiles(teamName: string): Promise<CopiedArtifactFile[]> {
|
||||
const bootstrapStatePath = getTeamBootstrapStatePath(teamName);
|
||||
const teamDir = path.dirname(bootstrapStatePath);
|
||||
return [
|
||||
const launchFiles: CopiedArtifactFile[] = [
|
||||
{
|
||||
sourcePath: getTeamLaunchStatePath(teamName),
|
||||
artifactName: 'launch-state.json',
|
||||
|
|
@ -584,6 +637,7 @@ function getKnownLaunchArtifactSourceFiles(teamName: string): CopiedArtifactFile
|
|||
artifactName: 'bootstrap-lock-metadata.json',
|
||||
},
|
||||
];
|
||||
return [...launchFiles, ...(await getRuntimeLaunchArtifactSourceFiles(teamDir))];
|
||||
}
|
||||
|
||||
async function writeArtifactTextFile(
|
||||
|
|
@ -593,6 +647,8 @@ async function writeArtifactTextFile(
|
|||
files: string[]
|
||||
): Promise<void> {
|
||||
const targetPath = path.join(directory, artifactName);
|
||||
assertPathWithin(directory, targetPath);
|
||||
await fs.promises.mkdir(path.dirname(targetPath), { recursive: true });
|
||||
await atomicWriteAsync(targetPath, `${redactLaunchFailureArtifactText(rawText).trimEnd()}\n`);
|
||||
files.push(artifactName);
|
||||
}
|
||||
|
|
@ -639,7 +695,7 @@ export async function writeTeamLaunchFailureArtifactPack(
|
|||
);
|
||||
}
|
||||
|
||||
for (const source of getKnownLaunchArtifactSourceFiles(input.teamName)) {
|
||||
for (const source of await getKnownLaunchArtifactSourceFiles(input.teamName)) {
|
||||
const read = await readBoundedTextFile(source.sourcePath);
|
||||
if (read.text !== undefined) {
|
||||
await writeArtifactTextFile(directory, source.artifactName, read.text, files);
|
||||
|
|
|
|||
|
|
@ -146,6 +146,80 @@ describe('TeamLaunchFailureArtifactPack', () => {
|
|||
expect(launchStateContent).not.toContain('sk-ant-');
|
||||
});
|
||||
|
||||
it('copies runtime process logs into the launch failure artifact pack', async () => {
|
||||
const teamName = 'runtime-artifact-team';
|
||||
const runId = 'run-readiness-timeout';
|
||||
const teamDir = path.join(getTeamsBasePath(), teamName);
|
||||
const runtimeDir = path.join(teamDir, 'runtime');
|
||||
await fs.mkdir(runtimeDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(runtimeDir, 'alice.runtime.jsonl'),
|
||||
'{"type":"runtime_ready","token":"abcdefghijklmnopqrstuvwxyz123456"}\n',
|
||||
'utf8'
|
||||
);
|
||||
await fs.writeFile(
|
||||
path.join(runtimeDir, 'alice.stdout.log'),
|
||||
'stdout OPENAI_API_KEY=sk-proj-cccccccccccccccccccccccccccccccccccccccc\n',
|
||||
'utf8'
|
||||
);
|
||||
await fs.writeFile(
|
||||
path.join(runtimeDir, 'alice.stderr.log'),
|
||||
'stderr Teammate process alice did not become inbox_poller_ready: timed out waiting for inbox_poller_ready\n',
|
||||
'utf8'
|
||||
);
|
||||
await fs.writeFile(path.join(runtimeDir, 'ignored.txt'), 'ignore me\n', 'utf8');
|
||||
|
||||
const result = await writeTeamLaunchFailureArtifactPack({
|
||||
teamName,
|
||||
runId,
|
||||
reason: 'launch_progress_failed',
|
||||
memberSpawnStatuses: {
|
||||
alice: {
|
||||
status: 'error',
|
||||
launchState: 'failed_to_start',
|
||||
hardFailureReason:
|
||||
'Teammate process alice did not become inbox_poller_ready: timed out waiting for inbox_poller_ready',
|
||||
updatedAt: '2026-05-09T00:01:00.000Z',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const manifest = JSON.parse(await fs.readFile(result.manifestPath, 'utf8')) as {
|
||||
artifactFiles: string[];
|
||||
classification: { code: string };
|
||||
};
|
||||
expect(manifest.classification.code).toBe('process_readiness_timeout');
|
||||
expect(manifest.artifactFiles).toContain('runtime/alice.runtime.jsonl');
|
||||
expect(manifest.artifactFiles).toContain('runtime/alice.stdout.log');
|
||||
expect(manifest.artifactFiles).toContain('runtime/alice.stderr.log');
|
||||
expect(manifest.artifactFiles).not.toContain('runtime/ignored.txt');
|
||||
|
||||
const copiedStdout = await fs.readFile(
|
||||
path.join(result.directory, 'runtime', 'alice.stdout.log'),
|
||||
'utf8'
|
||||
);
|
||||
expect(copiedStdout).toContain('OPENAI_API_KEY=[REDACTED]');
|
||||
expect(copiedStdout).not.toContain('sk-proj-');
|
||||
|
||||
const copiedEvents = await fs.readFile(
|
||||
path.join(result.directory, 'runtime', 'alice.runtime.jsonl'),
|
||||
'utf8'
|
||||
);
|
||||
expect(copiedEvents).toContain('"token":"[REDACTED]"');
|
||||
expect(copiedEvents).not.toContain('abcdefghijklmnopqrstuvwxyz123456');
|
||||
|
||||
const bundle = await readTeamLaunchFailureDiagnosticsBundle(teamName, runId);
|
||||
const labels = bundle.files.map((file) => file.label);
|
||||
expect(labels).toContain('launch-failure-artifacts/runtime/alice.runtime.jsonl');
|
||||
expect(labels).toContain('launch-failure-artifacts/runtime/alice.stdout.log');
|
||||
expect(labels).toContain('launch-failure-artifacts/runtime/alice.stderr.log');
|
||||
expect(
|
||||
bundle.files.find(
|
||||
(file) => file.label === 'launch-failure-artifacts/runtime/alice.stdout.log'
|
||||
)?.content
|
||||
).toContain('OPENAI_API_KEY=[REDACTED]');
|
||||
});
|
||||
|
||||
it('redacts common bearer and token-shaped secrets', () => {
|
||||
const redacted = redactLaunchFailureArtifactText(
|
||||
'Authorization: Bearer abcdefghijklmnopqrstuvwxyz123456 token: abcdefghijklmnopqrstuvwxyz123456 ANTHROPIC_AUTH_TOKEN=lmstudio CODEX_API_KEY="quoted-codex-token" OPENROUTER_API_KEY=\'quoted-router-token\''
|
||||
|
|
|
|||
Loading…
Reference in a new issue