refactor(kiro): 优化流式响应立即发送message_start，移除contextUsagePercentage等待逻辑

1. 立即发送message_start和content_block_start事件，不再等待contextUsagePercentage 2. 使用estimateInputTokens预估输入token作为保底值 3. 移除bufferedEvents缓冲机制和messageStartSent状态标记 4. 简化content事件处理逻辑，直接yield而非条件判断 5. 移除contextUsagePercentage未收到时的错误抛出逻辑 6. 更新estimateInputTokens方法注释，标记为备用方案而非废弃
2026-01-09 14:41:54 +08:00 · 2026-01-09 14:41:54 +08:00 · 876b92cc0e
commit 876b92cc0e
parent 2776adaa09
1 changed files with 30 additions and 54 deletions
--- a/src/claude/claude-kiro.js
+++ b/src/claude/claude-kiro.js
@ -1459,12 +1459,33 @@ export class KiroApiService {
        const finalModel = MODEL_MAPPING[model] ? model : this.modelName;
        console.log(`[Kiro] Calling generateContentStream with model: ${finalModel} (real streaming)`);

-        let inputTokens = 0;
-        let contextUsagePercentage = null;
+        // 预先估算 inputTokens 作为保底值，如果收到 contextUsagePercentage 会被覆盖
+        let inputTokens = this.estimateInputTokens(requestBody);
        const messageId = `${uuidv4()}`;

-        let messageStartSent = false;
-        const bufferedEvents = [];
+        // 立即发送 message_start，不等待 contextUsagePercentage
+        yield {
+            type: "message_start",
+            message: {
+                id: messageId,
+                type: "message",
+                role: "assistant",
+                model: model,
+                usage: {
+                    input_tokens: 0,
+                    output_tokens: 0,
+                    cache_creation_input_tokens: 0,
+                    cache_read_input_tokens: 0
+                },
+                content: []
+            }
+        };
+
+        yield {
+            type: "content_block_start",
+            index: 0,
+            content_block: { type: "text", text: "" }
+        };

        try {
            let totalContent = '';
@ -1474,54 +1495,16 @@ export class KiroApiService {

            for await (const event of this.streamApiReal('', finalModel, requestBody)) {
                if (event.type === 'contextUsage' && event.percentage) {
-                    contextUsagePercentage = event.percentage;
-                    inputTokens = this.calculateInputTokensFromPercentage(contextUsagePercentage);
-
-                    if (!messageStartSent) {
-                        yield {
-                            type: "message_start",
-                            message: {
-                                id: messageId,
-                                type: "message",
-                                role: "assistant",
-                                model: model,
-                                usage: {
-                                    input_tokens: inputTokens,
-                                    output_tokens: 0,
-                                    cache_creation_input_tokens: 0,
-                                    cache_read_input_tokens: 0
-                                },
-                                content: []
-                            }
-                        };
-
-                        yield {
-                            type: "content_block_start",
-                            index: 0,
-                            content_block: { type: "text", text: "" }
-                        };
-
-                        messageStartSent = true;
-
-                        for (const buffered of bufferedEvents) {
-                            yield buffered;
-                        }
-                        bufferedEvents.length = 0;
-                    }
+                    // 收到 contextUsagePercentage 时更新 inputTokens，用于最终的 message_delta
+                    inputTokens = this.calculateInputTokensFromPercentage(event.percentage);
                } else if (event.type === 'content' && event.content) {
                    totalContent += event.content;

-                    const contentEvent = {
+                    yield {
                        type: "content_block_delta",
                        index: 0,
                        delta: { type: "text_delta", text: event.content }
                    };
-
-                    if (messageStartSent) {
-                        yield contentEvent;
-                    } else {
-                        bufferedEvents.push(contentEvent);
-                    }
                } else if (event.type === 'toolUse') {
                    const tc = event.toolUse;
                    // 工具调用事件（包含 name 和 toolUseId）
@ -1585,12 +1568,6 @@ export class KiroApiService {
                currentToolCall = null;
            }

-            // Fallback: 如果 contextUsagePercentage 没有收到，抛出错误
-            if (!messageStartSent) {
-                console.error('[Kiro Stream] contextUsagePercentage not received from API - cannot calculate accurate input tokens');
-                throw new Error('Failed to receive contextUsagePercentage from Kiro API. Input token calculation requires this data.');
-            }
-
            // 检查文本内容中的 bracket 格式工具调用
            const bracketToolCalls = parseBracketToolCalls(totalContent);
            if (bracketToolCalls && bracketToolCalls.length > 0) {
@ -1697,11 +1674,10 @@ export class KiroApiService {
    }

    /**
-     * @deprecated Use contextUsagePercentage from API response instead
-     * Calculate input tokens from request body using Claude's official tokenizer
+     * Estimate input tokens from request body using Claude's official tokenizer
+     * Used as fallback when contextUsagePercentage is not available from API
     */
    estimateInputTokens(requestBody) {
-        console.warn('[Kiro] estimateInputTokens() is deprecated. Use contextUsagePercentage from API response instead.');
        let totalTokens = 0;

        // Count system prompt tokens