test(opencode): update live semantic model results

This commit is contained in:
777genius 2026-05-27 12:14:56 +03:00
parent ebcc0e717f
commit 5046d80fdf
3 changed files with 39 additions and 15 deletions

View file

@ -1,5 +1,5 @@
{
"generatedAt": "2026-05-20T15:44:19.975Z",
"generatedAt": "2026-05-27T08:11:47.513Z",
"runsPerModel": 1,
"qualification": {
"minimumAverageScore": 80,
@ -25,8 +25,8 @@
"runtimeTransportFailures": 0,
"modelBehaviorFailures": 0,
"harnessFailures": 0,
"p50DurationMs": 201184,
"p95DurationMs": 201184,
"p50DurationMs": 129420,
"p95DurationMs": 129420,
"stagePassRates": {
"launchBootstrap": {
"passed": 1,
@ -217,16 +217,16 @@
"outcome": "passed",
"failureCategory": "none",
"primaryFailure": null,
"durationMs": 201184,
"durationMs": 129420,
"hardFailure": false,
"stageDurationsMs": {
"setup": 322,
"launchBootstrap": 44102,
"materializeTasks": 40,
"directReply": 20838,
"peerRelayAB": 41022,
"peerRelayBC": 47832,
"concurrentReplies": 29138,
"setup": 168,
"launchBootstrap": 31364,
"materializeTasks": 29,
"directReply": 15080,
"peerRelayAB": 31900,
"peerRelayBC": 29178,
"concurrentReplies": 20867,
"hygiene": 1
},
"stageFailures": {},
@ -253,7 +253,7 @@
"latencyStable": true
},
"diagnostics": [
"runId=85e7ecb6-0767-4606-90d2-c926937b22f5"
"runId=37f103a7-cae5-4d48-b578-56cbabb466d9"
]
}
]

View file

@ -1,6 +1,6 @@
# OpenCode Model Gauntlet Results
Generated: 2026-05-20T15:44:19.975Z
Generated: 2026-05-27T08:11:47.513Z
Runs per model: 1
Recommended threshold: average >= 80, successful runs >= 1, consistency >= 85, hard failures = 0
@ -13,7 +13,7 @@ Scoring weights: launchBootstrap=15, directReply=10, peerRelayAB=15, peerRelayBC
| Model | Verdict | Confidence | Readiness | Consistency | Score Spread | Behavior Avg | Overall Avg | Counted | Pass Runs | Weakest Stage | Weakest TaskRef | Dominant Failure | Blockers | Provider Infra | Runtime Transport | Model Fails | Protocol Runs | p50 | p95 |
| --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | --- | --- | --- | ---: | ---: | ---: | ---: | ---: | ---: |
| `opencode/big-pickle` | Recommended | low | 100 | 100 | 0 | 100 | 100 | 1/1 | 1/1 | cleanTranscript 1/1 (100%) | concurrentBob 1/1 (100%) | none | - | 0 | 0 | 0 | 0 | 201184ms | 201184ms |
| `opencode/big-pickle` | Recommended | low | 100 | 100 | 0 | 100 | 100 | 1/1 | 1/1 | cleanTranscript 1/1 (100%) | concurrentBob 1/1 (100%) | none | - | 0 | 0 | 0 | 0 | 129420ms | 129420ms |
## opencode/big-pickle
@ -33,5 +33,5 @@ Protocol totals: badMessages=0, duplicateOrMissingTokens=0, affectedRuns=0.
| Run | Outcome | Category | Score | Counted | Duration | Failed Stages | Slowest Stage | TaskRefs | Protocol | Diagnostics |
| ---: | --- | --- | ---: | --- | ---: | --- | --- | --- | --- | --- |
| 1 | passed | none | 100 | yes | 201184ms | - | peerRelayBC:47832ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=85e7ecb6-0767-4606-90d2-c926937b22f5 |
| 1 | passed | none | 100 | yes | 129420ms | - | peerRelayAB:31900ms | directReply:ok, peerRelayAB:ok, peerRelayBC:ok, concurrentBob:ok, concurrentTom:ok | - | runId=37f103a7-cae5-4d48-b578-56cbabb466d9 |

View file

@ -0,0 +1,24 @@
{
"generatedAt": "2026-05-27T08:11:34.489Z",
"models": [
{
"model": "opencode/big-pickle",
"passed": true,
"score": 100,
"durationMs": 86233,
"stages": {
"launchBootstrap": true,
"directReply": true,
"peerRelay": true,
"taskRefs": true,
"longPrompt": true,
"latencyStable": true
},
"diagnostics": [
"runId=5a90cf2a-d00e-4e26-a514-5efecf1914af",
"directDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"d4065728-b244-4e53-a8cf-d33d7de62a6f\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP is connected for message delivery.\",\"OpenCode prompt_async accepted after a turn-settled guard; response observation remains delegated to durable app-side ledger reconciliation.\"]}",
"peerDelivery={\"delivered\":true,\"accepted\":true,\"responsePending\":false,\"responseState\":\"responded_visible_message\",\"ledgerStatus\":\"responded\",\"visibleReplyMessageId\":\"37187280-1220-44da-a5d3-a3fdf812cc3a\",\"visibleReplyCorrelation\":\"relayOfMessageId\",\"diagnostics\":[\"OpenCode app MCP is connected for message delivery.\",\"OpenCode prompt_async accepted after a turn-settled guard; response observation remains delegated to durable app-side ledger reconciliation.\"]}"
]
}
]
}