From fcf63898563ff4f91b43e37083b7f7c0e2eaeff1 Mon Sep 17 00:00:00 2001 From: hex2077 Date: Wed, 30 Jul 2025 16:37:07 +0800 Subject: [PATCH] =?UTF-8?q?feat(api):=20=E6=94=AF=E6=8C=81=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E8=B7=AF=E5=BE=84=E6=AE=B5=E6=8C=87=E5=AE=9AMODEL=5FP?= =?UTF-8?q?ROVIDER=E5=B9=B6=E4=BC=98=E5=8C=96=E7=B3=BB=E7=BB=9F=E6=8F=90?= =?UTF-8?q?=E7=A4=BA=E8=AF=8D=E6=8F=90=E5=8F=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 本次提交主要包含以下变更: 1. 在API服务器中新增通过URL路径段自动识别和切换MODEL_PROVIDER的功能,提升接口灵活性 2. 重构各提供商策略中的系统提示词提取逻辑,统一使用common.js中新增的extractSystemPromptFromRequestBody工具函数,提高代码复用性和维护性 3. 更新README文档,完善架构设计模式说明、数据流处理流程以及多模态内容和不同提供商的使用示例 --- README-EN.md | 69 +++++++++++++++++++++++++++++++++-- README.md | 69 +++++++++++++++++++++++++++++++++-- src/api-server.js | 33 ++++++++++++----- src/claude/claude-kiro.js | 2 +- src/claude/claude-strategy.js | 16 ++------ src/common.js | 63 ++++++++++++++++++++++++++++++++ src/gemini/gemini-strategy.js | 22 ++--------- src/openai/openai-strategy.js | 18 ++------- 8 files changed, 230 insertions(+), 62 deletions(-) diff --git a/README-EN.md b/README-EN.md index 620af3f..b66eeae 100644 --- a/README-EN.md +++ b/README-EN.md @@ -54,19 +54,36 @@ Leaving behind the simple structure of the past, we have introduced a more profe * Stores shared constants, utility functions, and common handlers for the project, making the code cleaner and more efficient. * **`src/gemini/`, `src/openai/`, `src/claude/`**: 📦 **Provider Implementation Directories** - * Each directory contains the core logic, API calls, and strategy implementations for the corresponding service provider, with a clear structure that makes it easy for you to add more new service providers in the future. Among them, `src/openai/openai-kiro.js` provides a special implementation for the Kiro API. + * Each directory contains the core logic, API calls, and strategy implementations for the corresponding service provider, with a clear structure that makes it easy for you to add more new service providers in the future. Among them, `src/claude/claude-kiro.js` provides a special implementation for the Kiro API. * **`tests/`**: 🧪 **Test Directory** * Contains a complete integration test suite covering all API endpoints, authentication methods, and error handling scenarios to ensure project stability and reliability. ---- +### 🏗️ Architecture Design Patterns + +The project adopts multiple modern design patterns to ensure code maintainability and extensibility: + +* **Adapter Pattern**: `src/adapter.js` provides a unified interface for different AI services +* **Strategy Pattern**: `src/provider-strategies.js` handles request/response conversion for different protocols +* **Factory Pattern**: Dynamically creates and manages service adapter instances +* **Singleton Pattern**: Caching and reusing service adapter instances + +### 🔄 Data Flow Processing + +1. **Request Reception**: HTTP server receives client requests +2. **Authentication Verification**: Unified verification of multiple authentication methods +3. **Protocol Recognition**: Identifies client protocol based on endpoint and request headers +4. **Format Conversion**: Converts requests to target provider format +5. **Service Call**: Calls specific AI service through adapter +6. **Response Conversion**: Converts service response back to client expected format +7. **Streaming Processing**: Supports real-time streaming response transmission ### 🔧 Usage Instructions * **MCP Support**: While the built-in command functions of the original Gemini CLI are not available, this project perfectly supports MCP (Model Context Protocol) and can work with MCP-compatible clients for more powerful functionality extensions. * **Multimodal Capabilities**: Supports multimodal inputs such as images and documents, providing you with a richer interactive experience. * **Latest Model Support**: Supports the latest **Kimi K2** and **GLM-4.5** models. Simply configure the corresponding OpenAI or Claude compatible interfaces in `config.json` to use them. -* **Kiro API**: Using Kiro API requires [Download Kiro client](https://aibook.ren/archives/kiro-install) and completing authorized login to generate kiro-auth-token.json. **Recommended for use with Claude Code for the best experience**. . +* **Kiro API**: Using Kiro API requires [Download Kiro client](https://aibook.ren/archives/kiro-install) and completing authorized login to generate kiro-auth-token.json. **Recommended for use with Claude Code for the best experience**. --- @@ -81,9 +98,19 @@ Leaving behind the simple structure of the past, we have introduced a more profe #### OpenAI Compatible Interface (`/v1/...`) * 🌍 **Perfect Compatibility**: Implements the core `/v1/models` and `/v1/chat/completions` endpoints. -* 🔄 **Automatic Format Conversion**: Internally and seamlessly converts requests/responses between different model formats and the OpenAI format. +* 🔄 **Automatic Format Conversion**: Internally and seamlessly converts requests/responses between different model formats and the OpenAI format, supporting multimodal content. * 💨 **Streaming Support**: Fully supports OpenAI's streaming responses (`"stream": true`), providing a typewriter-like real-time experience. +#### Gemini Native Interface (`/v1beta/...`) +* 🎯 **Native Support**: Complete support for Gemini API's native format and features. +* 🔧 **Advanced Features**: Supports system instructions, tool calls, multimodal input and other advanced features. +* 📊 **Detailed Statistics**: Provides complete token usage statistics and model information. + +#### Claude Native Interface (`/v1/messages`) +* 🤖 **Claude Dedicated**: Complete support for Claude Messages API format. +* 🛠️ **Tool Integration**: Supports Claude's tool usage and function calling features. +* 🎨 **Multimodal**: Supports images, audio and other input formats. + --- ## 📦 Installation Guide @@ -206,6 +233,40 @@ All requests use the standard OpenAI format. ] }' ``` + +* **Multimodal Content Generation** + ```bash + curl http://localhost:3000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{ + "model": "gemini-2.5-flash", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Describe this image"}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}} + ] + } + ] + }' + ``` + +* **Using Different Providers (via Path)** + ```bash + # Using Gemini + curl http://localhost:3000/gemini-cli-oauth/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{"model": "gemini-2.5-flash", "messages": [{"role": "user", "content": "Hello"}]}' + + # Using Claude + curl http://localhost:3000/claude-custom/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{"model": "claude-3-opus-20240229", "messages": [{"role": "user", "content": "Hello"}]}' + ``` * **Stream Generate Content** ```bash curl http://localhost:3000/v1/chat/completions \ diff --git a/README.md b/README.md index 213a11c..fefc337 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,29 @@ * 存放着项目共享的常量、工具函数和通用处理器,让代码更加整洁和高效。 * **`src/gemini/`, `src/openai/`, `src/claude/`**: 📦 **提供商实现目录** - * 每个目录都包含了对应服务商的核心逻辑、API 调用和策略实现,结构清晰,便于您未来添加更多新的服务商。其中 `src/openai/openai-kiro.js` 提供了 Kiro API 的特殊实现。 + * 每个目录都包含了对应服务商的核心逻辑、API 调用和策略实现,结构清晰,便于您未来添加更多新的服务商。其中 `src/claude/claude-kiro.js` 提供了 Kiro API 的特殊实现。 * **`tests/`**: 🧪 **测试目录** - * 包含完整的集成测试套件,覆盖所有API端点、认证方式和错误处理场景,确保项目的稳定性和可靠性。 + * 包含完整的集成测试套件,覆盖所有API端点、认证方式和错误处理场景,确保项目的稳定性和可靠性。支持针对不同提供商的独立测试和完整的HTTP集成测试。 + +### 🏗️ 架构设计模式 + +项目采用多种现代设计模式,确保代码的可维护性和扩展性: + +* **适配器模式 (Adapter Pattern)**: `src/adapter.js` 为不同的 AI 服务提供统一接口 +* **策略模式 (Strategy Pattern)**: `src/provider-strategies.js` 处理不同协议的请求/响应转换 +* **工厂模式 (Factory Pattern)**: 动态创建和管理服务适配器实例 +* **单例模式 (Singleton Pattern)**: 服务适配器实例的缓存和复用 + +### 🔄 数据流处理 + +1. **请求接收**: HTTP 服务器接收客户端请求 +2. **认证验证**: 多种认证方式的统一验证 +3. **协议识别**: 根据端点和请求头识别客户端协议 +4. **格式转换**: 将请求转换为目标提供商格式 +5. **服务调用**: 通过适配器调用具体的 AI 服务 +6. **响应转换**: 将服务响应转换回客户端期望格式 +7. **流式处理**: 支持实时流式响应传输 --- @@ -81,9 +100,19 @@ #### OpenAI 兼容接口 (`/v1/...`) * 🌍 **完美兼容**: 实现了 `/v1/models` 和 `/v1/chat/completions` 核心端点。 -* 🔄 **自动格式转换**: 在内部自动将不同模型的请求/响应与 OpenAI 格式进行无缝转换。 +* 🔄 **自动格式转换**: 在内部自动将不同模型的请求/响应与 OpenAI 格式进行无缝转换,支持多模态内容。 * 💨 **流式传输支持**: 完全支持 OpenAI 的流式响应 (`"stream": true`),提供打字机般的实时体验。 +#### Gemini 原生接口 (`/v1beta/...`) +* 🎯 **原生支持**: 完整支持 Gemini API 的原生格式和功能。 +* 🔧 **高级功能**: 支持系统指令、工具调用、多模态输入等高级特性。 +* 📊 **详细统计**: 提供完整的 token 使用统计和模型信息。 + +#### Claude 原生接口 (`/v1/messages`) +* 🤖 **Claude 专用**: 完整支持 Claude Messages API 格式。 +* 🛠️ **工具集成**: 支持 Claude 的工具使用和函数调用功能。 +* 🎨 **多模态**: 支持图片、音频等多种输入格式。 + --- ## 📦 安装指南 @@ -206,6 +235,40 @@ ] }' ``` + +* **多模态内容生成** + ```bash + curl http://localhost:3000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{ + "model": "gemini-2.5-flash", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "描述这张图片"}, + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}} + ] + } + ] + }' + ``` + +* **使用不同提供商 (通过路径)** + ```bash + # 使用 Gemini + curl http://localhost:3000/gemini-cli-oauth/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{"model": "gemini-2.5-flash", "messages": [{"role": "user", "content": "Hello"}]}' + + # 使用 Claude + curl http://localhost:3000/claude-custom/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer 123456" \ + -d '{"model": "claude-3-opus-20240229", "messages": [{"role": "user", "content": "Hello"}]}' + ``` * **流式生成内容** ```bash curl http://localhost:3000/v1/chat/completions \ diff --git a/src/api-server.js b/src/api-server.js index 5585d58..e17276a 100644 --- a/src/api-server.js +++ b/src/api-server.js @@ -348,14 +348,9 @@ async function getSystemPromptFileContent(filePath) { async function initApiService(config) { // Make getApiService exportable and accept config // Initialize all known service adapters at startup - const providers = [ - MODEL_PROVIDER.OPENAI_CUSTOM, - MODEL_PROVIDER.GEMINI_CLI, - MODEL_PROVIDER.CLAUDE_CUSTOM, - MODEL_PROVIDER.KIRO_API - ]; - for (const provider of providers) { + for (const provider of Object.values(MODEL_PROVIDER)) { try { + console.log(`[Initialization] Initializing service adapter for ${provider}...`); getServiceAdapter({ ...config, MODEL_PROVIDER: provider }); } catch (error) { console.warn(`[Initialization Warning] Failed to initialize service adapter for ${provider}: ${error.message}`); @@ -392,11 +387,29 @@ function createRequestHandler(config) { delete req.headers['model-provider']; } - const apiService = await getApiService(currentConfig); const requestUrl = new URL(req.url, `http://${req.headers.host}`); - const path = requestUrl.pathname; - const method = req.method; + let path = requestUrl.pathname; + // Check if the first path segment matches a MODEL_PROVIDER and switch if it does + const pathSegments = path.split('/').filter(segment => segment.length > 0); + if (pathSegments.length > 0) { + const firstSegment = pathSegments[0]; + // Check if firstSegment is a valid MODEL_PROVIDER value + const isValidProvider = Object.values(MODEL_PROVIDER).includes(firstSegment); + if (firstSegment && isValidProvider) { + currentConfig.MODEL_PROVIDER = firstSegment; + console.log(`[Config] MODEL_PROVIDER overridden by path segment to: ${currentConfig.MODEL_PROVIDER}`); + // Remove the first segment from the path to maintain routing consistency + pathSegments.shift(); + path = '/' + pathSegments.join('/'); + // Update the requestUrl pathname as well + requestUrl.pathname = path; + } else if (firstSegment && !isValidProvider) { + console.log(`[Config] Ignoring invalid MODEL_PROVIDER in path segment: ${firstSegment}`); + } + } + const apiService = await getApiService(currentConfig); + const method = req.method; if (method === 'OPTIONS') { res.writeHead(200, { 'Content-Type': 'application/json' }); console.log("OPTIONS REQUEST SUCCESS"); diff --git a/src/claude/claude-kiro.js b/src/claude/claude-kiro.js index cd2c885..3dbcb1b 100644 --- a/src/claude/claude-kiro.js +++ b/src/claude/claude-kiro.js @@ -462,7 +462,7 @@ async initializeAuth(forceRefresh = false) { .map(part => part.text) .join(''); } - return String(message.content || ''); + return String(message.content || message); } /** diff --git a/src/claude/claude-strategy.js b/src/claude/claude-strategy.js index 4ff62a3..0152113 100644 --- a/src/claude/claude-strategy.js +++ b/src/claude/claude-strategy.js @@ -1,4 +1,5 @@ import { ProviderStrategy } from '../provider-strategy.js'; +import { extractSystemPromptFromRequestBody, MODEL_PROTOCOL_PREFIX } from '../common.js'; /** * Claude provider strategy implementation. @@ -51,10 +52,7 @@ class ClaudeStrategy extends ProviderStrategy { return requestBody; } - let existingSystemText = ''; - if (requestBody.system) { - existingSystemText = requestBody.system; - } + const existingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.CLAUDE); const newSystemText = config.SYSTEM_PROMPT_MODE === 'append' && existingSystemText ? `${existingSystemText}\n${filePromptContent}` @@ -67,14 +65,8 @@ class ClaudeStrategy extends ProviderStrategy { } async manageSystemPrompt(requestBody) { - let incomingSystemText = ''; - if (typeof requestBody.system === 'string') { - incomingSystemText = requestBody.system; - } - if (typeof requestBody.system === 'object') { - incomingSystemText = JSON.stringify(requestBody.system); - } - await this._updateSystemPromptFile(incomingSystemText, 'claude'); + const incomingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.CLAUDE); + await this._updateSystemPromptFile(incomingSystemText, MODEL_PROTOCOL_PREFIX.CLAUDE); } } diff --git a/src/common.js b/src/common.js index d86d9e1..6b93ef5 100644 --- a/src/common.js +++ b/src/common.js @@ -475,3 +475,66 @@ export function handleError(res, error) { }; res.end(JSON.stringify(errorPayload)); } + +/** + * 从请求体中提取系统提示词。 + * @param {Object} requestBody - 请求体对象。 + * @param {string} provider - 提供商类型('openai', 'gemini', 'claude')。 + * @returns {string} 提取到的系统提示词字符串。 + */ +export function extractSystemPromptFromRequestBody(requestBody, provider) { + let incomingSystemText = ''; + switch (provider) { + case MODEL_PROTOCOL_PREFIX.OPENAI: + const openaiSystemMessage = requestBody.messages?.find(m => m.role === 'system'); + if (openaiSystemMessage?.content) { + incomingSystemText = openaiSystemMessage.content; + } else if (requestBody.messages?.length > 0) { + // Fallback to first user message if no system message + const userMessage = requestBody.messages.find(m => m.role === 'user'); + if (userMessage) { + incomingSystemText = userMessage.content; + } + } + break; + case MODEL_PROTOCOL_PREFIX.GEMINI: + const geminiSystemInstruction = requestBody.system_instruction || requestBody.systemInstruction; + if (geminiSystemInstruction?.parts) { + incomingSystemText = geminiSystemInstruction.parts + .filter(p => p?.text) + .map(p => p.text) + .join('\n'); + } else if (requestBody.contents?.length > 0) { + // Fallback to first user content if no system instruction + const userContent = requestBody.contents[0]; + if (userContent?.parts) { + incomingSystemText = userContent.parts + .filter(p => p?.text) + .map(p => p.text) + .join('\n'); + } + } + break; + case MODEL_PROTOCOL_PREFIX.CLAUDE: + if (typeof requestBody.system === 'string') { + incomingSystemText = requestBody.system; + } else if (typeof requestBody.system === 'object') { + incomingSystemText = JSON.stringify(requestBody.system); + } else if (requestBody.messages?.length > 0) { + // Fallback to first user message if no system property + const userMessage = requestBody.messages.find(m => m.role === 'user'); + if (userMessage) { + if (Array.isArray(userMessage.content)) { + incomingSystemText = userMessage.content.map(block => block.text).join(''); + } else { + incomingSystemText = userMessage.content; + } + } + } + break; + default: + console.warn(`[System Prompt] Unknown provider: ${provider}`); + break; + } + return incomingSystemText; +} diff --git a/src/gemini/gemini-strategy.js b/src/gemini/gemini-strategy.js index 3b1cbcc..23cc342 100644 --- a/src/gemini/gemini-strategy.js +++ b/src/gemini/gemini-strategy.js @@ -1,4 +1,4 @@ -import { API_ACTIONS } from '../common.js'; +import { API_ACTIONS, extractSystemPromptFromRequestBody, MODEL_PROTOCOL_PREFIX } from '../common.js'; import { ProviderStrategy } from '../provider-strategy.js'; /** @@ -46,14 +46,7 @@ class GeminiStrategy extends ProviderStrategy { return requestBody; } - let existingSystemText = ''; - const currentSystemInstruction = requestBody.system_instruction || requestBody.systemInstruction; - if (currentSystemInstruction?.parts) { - existingSystemText = currentSystemInstruction.parts - .filter(p => p?.text) - .map(p => p.text) - .join('\n'); - } + const existingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.GEMINI); const newSystemText = config.SYSTEM_PROMPT_MODE === 'append' && existingSystemText ? `${existingSystemText}\n${filePromptContent}` @@ -69,15 +62,8 @@ class GeminiStrategy extends ProviderStrategy { } async manageSystemPrompt(requestBody) { - let incomingSystemText = ''; - const geminiSystemInstruction = requestBody.system_instruction || requestBody.systemInstruction; - if (geminiSystemInstruction?.parts) { - incomingSystemText = geminiSystemInstruction.parts - .filter(p => p?.text) - .map(p => p.text) - .join('\n'); - } - await this._updateSystemPromptFile(incomingSystemText, 'gemini'); + const incomingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.GEMINI); + await this._updateSystemPromptFile(incomingSystemText, MODEL_PROTOCOL_PREFIX.GEMINI); } } diff --git a/src/openai/openai-strategy.js b/src/openai/openai-strategy.js index 971af20..dbf4774 100644 --- a/src/openai/openai-strategy.js +++ b/src/openai/openai-strategy.js @@ -1,4 +1,5 @@ import { ProviderStrategy } from '../provider-strategy.js'; +import { extractSystemPromptFromRequestBody, MODEL_PROTOCOL_PREFIX } from '../common.js'; /** * OpenAI provider strategy implementation. @@ -51,11 +52,7 @@ class OpenAIStrategy extends ProviderStrategy { return requestBody; } - let existingSystemText = ''; - const systemMessage = requestBody.messages?.find(m => m.role === 'system'); - if (systemMessage) { - existingSystemText = systemMessage.content || ''; - } + const existingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.OPENAI); const newSystemText = config.SYSTEM_PROMPT_MODE === 'append' && existingSystemText ? `${existingSystemText}\n${filePromptContent}` @@ -77,15 +74,8 @@ class OpenAIStrategy extends ProviderStrategy { async manageSystemPrompt(requestBody) { //console.log('[System Prompt] Managing system prompt for provider "openai".', requestBody); - let incomingSystemText = ''; - const systemMessage = requestBody.messages?.find(m => m.role === 'system'); - if (systemMessage?.content) { - incomingSystemText = systemMessage.content; - } - if (!incomingSystemText) { - incomingSystemText = requestBody.messages.filter(m => m.role === 'user')[0].content; - } - await this._updateSystemPromptFile(incomingSystemText, 'openai'); + const incomingSystemText = extractSystemPromptFromRequestBody(requestBody, MODEL_PROTOCOL_PREFIX.OPENAI); + await this._updateSystemPromptFile(incomingSystemText, MODEL_PROTOCOL_PREFIX.OPENAI); } }