262 lines
6.1 KiB
JSON
262 lines
6.1 KiB
JSON
{
|
|
"generatedAt": "2026-05-27T08:11:47.513Z",
|
|
"runsPerModel": 1,
|
|
"qualification": {
|
|
"minimumAverageScore": 80,
|
|
"minimumSuccessfulRuns": 1,
|
|
"minimumConsistencyScore": 85,
|
|
"requireNoHardFailures": true
|
|
},
|
|
"models": [
|
|
{
|
|
"model": "opencode/big-pickle",
|
|
"verdict": "recommended",
|
|
"confidence": "low",
|
|
"qualified": true,
|
|
"readinessScore": 100,
|
|
"averageScore": 100,
|
|
"consistencyScore": 100,
|
|
"behavioralAverageScore": 100,
|
|
"minScore": 100,
|
|
"successfulRuns": 1,
|
|
"countedRuns": 1,
|
|
"hardFailures": 0,
|
|
"providerInfraFailures": 0,
|
|
"runtimeTransportFailures": 0,
|
|
"modelBehaviorFailures": 0,
|
|
"harnessFailures": 0,
|
|
"p50DurationMs": 129420,
|
|
"p95DurationMs": 129420,
|
|
"stagePassRates": {
|
|
"launchBootstrap": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"directReply": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"peerRelayAB": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"peerRelayBC": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"concurrentReplies": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"taskRefs": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"cleanTranscript": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"noDuplicateTokens": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"latencyStable": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
"taskRefPassRates": {
|
|
"directReply": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"peerRelayAB": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"peerRelayBC": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"concurrentBob": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
},
|
|
"concurrentTom": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
"protocolViolationTotals": {
|
|
"badMessages": 0,
|
|
"duplicateOrMissingTokens": 0,
|
|
"affectedRuns": 0
|
|
},
|
|
"stageFailureImpact": [
|
|
{
|
|
"stage": "cleanTranscript",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "concurrentReplies",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "directReply",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "latencyStable",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "launchBootstrap",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "noDuplicateTokens",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "peerRelayAB",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "peerRelayBC",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
},
|
|
{
|
|
"stage": "taskRefs",
|
|
"failedRuns": 0,
|
|
"weightedLoss": 0,
|
|
"passRate": {
|
|
"passed": 1,
|
|
"total": 1,
|
|
"rate": 100
|
|
}
|
|
}
|
|
],
|
|
"scoreStability": {
|
|
"sampleSize": 1,
|
|
"minScore": 100,
|
|
"maxScore": 100,
|
|
"spread": 0,
|
|
"standardDeviation": 0,
|
|
"consistencyScore": 100
|
|
},
|
|
"dominantFailureCategory": "none",
|
|
"recommendationBlockers": [],
|
|
"runs": [
|
|
{
|
|
"runIndex": 1,
|
|
"passed": true,
|
|
"score": 100,
|
|
"countedForRecommendation": true,
|
|
"outcome": "passed",
|
|
"failureCategory": "none",
|
|
"primaryFailure": null,
|
|
"durationMs": 129420,
|
|
"hardFailure": false,
|
|
"stageDurationsMs": {
|
|
"setup": 168,
|
|
"launchBootstrap": 31364,
|
|
"materializeTasks": 29,
|
|
"directReply": 15080,
|
|
"peerRelayAB": 31900,
|
|
"peerRelayBC": 29178,
|
|
"concurrentReplies": 20867,
|
|
"hygiene": 1
|
|
},
|
|
"stageFailures": {},
|
|
"taskRefChecks": {
|
|
"directReply": true,
|
|
"peerRelayAB": true,
|
|
"peerRelayBC": true,
|
|
"concurrentBob": true,
|
|
"concurrentTom": true
|
|
},
|
|
"protocolViolations": {
|
|
"badMessages": 0,
|
|
"duplicateOrMissingTokens": []
|
|
},
|
|
"stages": {
|
|
"launchBootstrap": true,
|
|
"directReply": true,
|
|
"peerRelayAB": true,
|
|
"peerRelayBC": true,
|
|
"concurrentReplies": true,
|
|
"taskRefs": true,
|
|
"cleanTranscript": true,
|
|
"noDuplicateTokens": true,
|
|
"latencyStable": true
|
|
},
|
|
"diagnostics": [
|
|
"runId=37f103a7-cae5-4d48-b578-56cbabb466d9"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|