diff --git a/src/converters/strategies/ClaudeConverter.js b/src/converters/strategies/ClaudeConverter.js index 89e5fea..15a0bd9 100644 --- a/src/converters/strategies/ClaudeConverter.js +++ b/src/converters/strategies/ClaudeConverter.js @@ -8,7 +8,15 @@ import { BaseConverter } from '../BaseConverter.js'; import { checkAndAssignOrDefault, cleanJsonSchemaProperties as cleanJsonSchema, - determineReasoningEffortFromBudget + determineReasoningEffortFromBudget, + OPENAI_DEFAULT_MAX_TOKENS, + OPENAI_DEFAULT_TEMPERATURE, + OPENAI_DEFAULT_TOP_P, + GEMINI_DEFAULT_MAX_TOKENS, + GEMINI_DEFAULT_TEMPERATURE, + GEMINI_DEFAULT_TOP_P, + GEMINI_DEFAULT_INPUT_TOKEN_LIMIT, + GEMINI_DEFAULT_OUTPUT_TOKEN_LIMIT } from '../utils.js'; import { MODEL_PROTOCOL_PREFIX } from '../../common.js'; import { @@ -202,9 +210,9 @@ export class ClaudeConverter extends BaseConverter { const openaiRequest = { model: claudeRequest.model, messages: openaiMessages, - max_tokens: checkAndAssignOrDefault(claudeRequest.max_tokens, 8192), - temperature: checkAndAssignOrDefault(claudeRequest.temperature, 1), - top_p: checkAndAssignOrDefault(claudeRequest.top_p, 0.95), + max_tokens: checkAndAssignOrDefault(claudeRequest.max_tokens, OPENAI_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(claudeRequest.temperature, OPENAI_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(claudeRequest.top_p, OPENAI_DEFAULT_TOP_P), stream: claudeRequest.stream, }; @@ -619,8 +627,8 @@ export class ClaudeConverter extends BaseConverter { version: m.version || "1.0.0", displayName: m.displayName || m.id || m.name, description: m.description || `A generative model for text and chat generation. ID: ${m.id || m.name}`, - inputTokenLimit: m.inputTokenLimit || 32768, - outputTokenLimit: m.outputTokenLimit || 8192, + inputTokenLimit: m.inputTokenLimit || GEMINI_DEFAULT_INPUT_TOKEN_LIMIT, + outputTokenLimit: m.outputTokenLimit || GEMINI_DEFAULT_OUTPUT_TOKEN_LIMIT, supportedGenerationMethods: m.supportedGenerationMethods || ["generateContent", "streamGenerateContent"] })) }; @@ -788,9 +796,9 @@ export class ClaudeConverter extends BaseConverter { // 添加生成配置 const generationConfig = {}; - generationConfig.maxOutputTokens = checkAndAssignOrDefault(claudeRequest.max_tokens, 65535); - generationConfig.temperature = checkAndAssignOrDefault(claudeRequest.temperature, 1); - generationConfig.topP = checkAndAssignOrDefault(claudeRequest.top_p, 0.95); + generationConfig.maxOutputTokens = checkAndAssignOrDefault(claudeRequest.max_tokens, GEMINI_DEFAULT_MAX_TOKENS); + generationConfig.temperature = checkAndAssignOrDefault(claudeRequest.temperature, GEMINI_DEFAULT_TEMPERATURE); + generationConfig.topP = checkAndAssignOrDefault(claudeRequest.top_p, GEMINI_DEFAULT_TOP_P); if (Object.keys(generationConfig).length > 0) { geminiRequest.generationConfig = generationConfig; @@ -1119,9 +1127,9 @@ export class ClaudeConverter extends BaseConverter { // 转换为OpenAI Responses格式 const responsesRequest = { model: claudeRequest.model, - max_tokens: checkAndAssignOrDefault(claudeRequest.max_tokens, 8192), - temperature: checkAndAssignOrDefault(claudeRequest.temperature, 1), - top_p: checkAndAssignOrDefault(claudeRequest.top_p, 0.95), + max_tokens: checkAndAssignOrDefault(claudeRequest.max_tokens, OPENAI_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(claudeRequest.temperature, OPENAI_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(claudeRequest.top_p, OPENAI_DEFAULT_TOP_P), }; // 处理系统指令 diff --git a/src/converters/strategies/GeminiConverter.js b/src/converters/strategies/GeminiConverter.js index 1ebd950..4e092a9 100644 --- a/src/converters/strategies/GeminiConverter.js +++ b/src/converters/strategies/GeminiConverter.js @@ -6,7 +6,13 @@ import { v4 as uuidv4 } from 'uuid'; import { BaseConverter } from '../BaseConverter.js'; import { - checkAndAssignOrDefault + checkAndAssignOrDefault, + OPENAI_DEFAULT_MAX_TOKENS, + OPENAI_DEFAULT_TEMPERATURE, + OPENAI_DEFAULT_TOP_P, + CLAUDE_DEFAULT_MAX_TOKENS, + CLAUDE_DEFAULT_TEMPERATURE, + CLAUDE_DEFAULT_TOP_P } from '../utils.js'; import { MODEL_PROTOCOL_PREFIX } from '../../common.js'; import { @@ -102,9 +108,9 @@ export class GeminiConverter extends BaseConverter { const openaiRequest = { messages: [], model: geminiRequest.model, - max_tokens: checkAndAssignOrDefault(geminiRequest.max_tokens, 8192), - temperature: checkAndAssignOrDefault(geminiRequest.temperature, 1), - top_p: checkAndAssignOrDefault(geminiRequest.top_p, 0.95), + max_tokens: checkAndAssignOrDefault(geminiRequest.max_tokens, OPENAI_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(geminiRequest.temperature, OPENAI_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(geminiRequest.top_p, OPENAI_DEFAULT_TOP_P), }; // 处理系统指令 @@ -373,9 +379,9 @@ export class GeminiConverter extends BaseConverter { const claudeRequest = { model: geminiRequest.model || 'claude-3-opus', messages: [], - max_tokens: checkAndAssignOrDefault(geminiRequest.generationConfig?.maxOutputTokens, 8192), - temperature: checkAndAssignOrDefault(geminiRequest.generationConfig?.temperature, 1), - top_p: checkAndAssignOrDefault(geminiRequest.generationConfig?.topP, 0.95), + max_tokens: checkAndAssignOrDefault(geminiRequest.generationConfig?.maxOutputTokens, CLAUDE_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(geminiRequest.generationConfig?.temperature, CLAUDE_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(geminiRequest.generationConfig?.topP, CLAUDE_DEFAULT_TOP_P), }; // 处理系统指令 @@ -678,9 +684,9 @@ export class GeminiConverter extends BaseConverter { toOpenAIResponsesRequest(geminiRequest) { const responsesRequest = { model: geminiRequest.model, - max_tokens: checkAndAssignOrDefault(geminiRequest.generationConfig?.maxOutputTokens, 8192), - temperature: checkAndAssignOrDefault(geminiRequest.generationConfig?.temperature, 1), - top_p: checkAndAssignOrDefault(geminiRequest.generationConfig?.topP, 0.95), + max_tokens: checkAndAssignOrDefault(geminiRequest.generationConfig?.maxOutputTokens, OPENAI_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(geminiRequest.generationConfig?.temperature, OPENAI_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(geminiRequest.generationConfig?.topP, OPENAI_DEFAULT_TOP_P), }; // 处理系统指令 diff --git a/src/converters/strategies/OllamaConverter.js b/src/converters/strategies/OllamaConverter.js index 69bed9d..3bd8ffd 100644 --- a/src/converters/strategies/OllamaConverter.js +++ b/src/converters/strategies/OllamaConverter.js @@ -7,6 +7,76 @@ import { v4 as uuidv4 } from 'uuid'; import { createHash } from 'crypto'; import { BaseConverter } from '../BaseConverter.js'; import { MODEL_PROTOCOL_PREFIX } from '../../common.js'; +import { + OLLAMA_DEFAULT_CONTEXT_LENGTH, + OLLAMA_DEFAULT_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_DEFAULT_CONTEXT_LENGTH, + OLLAMA_CLAUDE_SONNET_45_CONTEXT_LENGTH, + OLLAMA_CLAUDE_SONNET_45_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_HAIKU_45_CONTEXT_LENGTH, + OLLAMA_CLAUDE_HAIKU_45_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_OPUS_41_CONTEXT_LENGTH, + OLLAMA_CLAUDE_OPUS_41_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_SONNET_40_CONTEXT_LENGTH, + OLLAMA_CLAUDE_SONNET_40_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_SONNET_37_CONTEXT_LENGTH, + OLLAMA_CLAUDE_SONNET_37_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_OPUS_40_CONTEXT_LENGTH, + OLLAMA_CLAUDE_OPUS_40_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_HAIKU_35_CONTEXT_LENGTH, + OLLAMA_CLAUDE_HAIKU_35_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_HAIKU_30_CONTEXT_LENGTH, + OLLAMA_CLAUDE_HAIKU_30_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_SONNET_35_CONTEXT_LENGTH, + OLLAMA_CLAUDE_SONNET_35_MAX_OUTPUT_TOKENS, + OLLAMA_CLAUDE_OPUS_30_CONTEXT_LENGTH, + OLLAMA_CLAUDE_OPUS_30_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_25_PRO_CONTEXT_LENGTH, + OLLAMA_GEMINI_25_PRO_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_25_FLASH_CONTEXT_LENGTH, + OLLAMA_GEMINI_25_FLASH_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_25_IMAGE_CONTEXT_LENGTH, + OLLAMA_GEMINI_25_IMAGE_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_25_LIVE_CONTEXT_LENGTH, + OLLAMA_GEMINI_25_LIVE_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_25_TTS_CONTEXT_LENGTH, + OLLAMA_GEMINI_25_TTS_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_20_FLASH_CONTEXT_LENGTH, + OLLAMA_GEMINI_20_FLASH_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_20_IMAGE_CONTEXT_LENGTH, + OLLAMA_GEMINI_20_IMAGE_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_15_PRO_CONTEXT_LENGTH, + OLLAMA_GEMINI_15_PRO_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_15_FLASH_CONTEXT_LENGTH, + OLLAMA_GEMINI_15_FLASH_MAX_OUTPUT_TOKENS, + OLLAMA_GEMINI_DEFAULT_CONTEXT_LENGTH, + OLLAMA_GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, + OLLAMA_GPT4_TURBO_CONTEXT_LENGTH, + OLLAMA_GPT4_TURBO_MAX_OUTPUT_TOKENS, + OLLAMA_GPT4_32K_CONTEXT_LENGTH, + OLLAMA_GPT4_32K_MAX_OUTPUT_TOKENS, + OLLAMA_GPT4_BASE_CONTEXT_LENGTH, + OLLAMA_GPT4_BASE_MAX_OUTPUT_TOKENS, + OLLAMA_GPT35_16K_CONTEXT_LENGTH, + OLLAMA_GPT35_16K_MAX_OUTPUT_TOKENS, + OLLAMA_GPT35_BASE_CONTEXT_LENGTH, + OLLAMA_GPT35_BASE_MAX_OUTPUT_TOKENS, + OLLAMA_QWEN_CODER_PLUS_CONTEXT_LENGTH, + OLLAMA_QWEN_CODER_PLUS_MAX_OUTPUT_TOKENS, + OLLAMA_QWEN_VL_PLUS_CONTEXT_LENGTH, + OLLAMA_QWEN_VL_PLUS_MAX_OUTPUT_TOKENS, + OLLAMA_QWEN_CODER_FLASH_CONTEXT_LENGTH, + OLLAMA_QWEN_CODER_FLASH_MAX_OUTPUT_TOKENS, + OLLAMA_QWEN_DEFAULT_CONTEXT_LENGTH, + OLLAMA_QWEN_DEFAULT_MAX_OUTPUT_TOKENS, + OLLAMA_DEFAULT_FILE_TYPE, + OLLAMA_DEFAULT_QUANTIZATION_VERSION, + OLLAMA_DEFAULT_ROPE_FREQ_BASE, + OLLAMA_DEFAULT_TEMPERATURE, + OLLAMA_DEFAULT_TOP_P, + OLLAMA_DEFAULT_QUANTIZATION_LEVEL, + OLLAMA_SHOW_QUANTIZATION_LEVEL +} from '../utils.js'; @@ -360,7 +430,7 @@ export class OllamaConverter extends BaseConverter { family: modelOwner, // "Ollama" with capital O families: [modelOwner], parameter_size: '0B', // As in the old patch - quantization_level: 'Q4_0' + quantization_level: OLLAMA_DEFAULT_QUANTIZATION_LEVEL } }); }); @@ -374,8 +444,8 @@ export class OllamaConverter extends BaseConverter { */ toOllamaShowResponse(modelName) { // Minimal implementation, as in the old patch - let contextLength = 8192; - let maxOutputTokens = 4096; + let contextLength = OLLAMA_DEFAULT_CONTEXT_LENGTH; + let maxOutputTokens = OLLAMA_DEFAULT_MAX_OUTPUT_TOKENS; let family = 'Ollama'; // ВАЖНО: С большой буквы, как ожидает Copilot! let architecture = 'transformer'; @@ -385,62 +455,62 @@ export class OllamaConverter extends BaseConverter { // Claude models if (lowerName.includes('claude')) { architecture = 'claude'; - contextLength = 200000; // Default 200K + contextLength = OLLAMA_CLAUDE_DEFAULT_CONTEXT_LENGTH; // Default 200K // Claude Sonnet 4.5 if (lowerName.includes('sonnet-4-5') || lowerName.includes('sonnet-4.5')) { - contextLength = 200000; // 200K (1M beta available) - maxOutputTokens = 64000; // 64K output + contextLength = OLLAMA_CLAUDE_SONNET_45_CONTEXT_LENGTH; // 200K (1M beta available) + maxOutputTokens = OLLAMA_CLAUDE_SONNET_45_MAX_OUTPUT_TOKENS; // 64K output } // Claude Haiku 4.5 else if (lowerName.includes('haiku-4-5') || lowerName.includes('haiku-4.5')) { - contextLength = 200000; // 200K - maxOutputTokens = 64000; // 64K output + contextLength = OLLAMA_CLAUDE_HAIKU_45_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_HAIKU_45_MAX_OUTPUT_TOKENS; // 64K output } // Claude Opus 4.1 else if (lowerName.includes('opus-4-1') || lowerName.includes('opus-4.1')) { - contextLength = 200000; // 200K - maxOutputTokens = 32000; // 32K output + contextLength = OLLAMA_CLAUDE_OPUS_41_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_OPUS_41_MAX_OUTPUT_TOKENS; // 32K output } // Claude Sonnet 4.0 (legacy) else if (lowerName.includes('sonnet-4-0') || lowerName.includes('sonnet-4.0') || lowerName.includes('sonnet-4-20')) { - contextLength = 200000; // 200K (1M beta available) - maxOutputTokens = 64000; // 64K output + contextLength = OLLAMA_CLAUDE_SONNET_40_CONTEXT_LENGTH; // 200K (1M beta available) + maxOutputTokens = OLLAMA_CLAUDE_SONNET_40_MAX_OUTPUT_TOKENS; // 64K output } // Claude Sonnet 3.7 (legacy) else if (lowerName.includes('3-7') || lowerName.includes('3.7')) { - contextLength = 200000; // 200K - maxOutputTokens = 64000; // 64K output (128K beta available) + contextLength = OLLAMA_CLAUDE_SONNET_37_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_SONNET_37_MAX_OUTPUT_TOKENS; // 64K output (128K beta available) } // Claude Opus 4.0 (legacy) else if (lowerName.includes('opus-4-0') || lowerName.includes('opus-4.0') || lowerName.includes('opus-4-20')) { - contextLength = 200000; // 200K - maxOutputTokens = 32000; // 32K output + contextLength = OLLAMA_CLAUDE_OPUS_40_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_OPUS_40_MAX_OUTPUT_TOKENS; // 32K output } // Claude Haiku 3.5 (legacy) else if (lowerName.includes('haiku-3-5') || lowerName.includes('haiku-3.5')) { - contextLength = 200000; // 200K - maxOutputTokens = 8192; // 8K output + contextLength = OLLAMA_CLAUDE_HAIKU_35_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_HAIKU_35_MAX_OUTPUT_TOKENS; // 8K output } // Claude Haiku 3.0 (legacy) else if (lowerName.includes('haiku-3-0') || lowerName.includes('haiku-3.0') || lowerName.includes('haiku-20240307')) { - contextLength = 200000; // 200K - maxOutputTokens = 4096; // 4K output + contextLength = OLLAMA_CLAUDE_HAIKU_30_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_HAIKU_30_MAX_OUTPUT_TOKENS; // 4K output } // Claude Sonnet 3.5 (legacy) else if (lowerName.includes('sonnet-3-5') || lowerName.includes('sonnet-3.5')) { - contextLength = 200000; // 200K - maxOutputTokens = 8192; // 8K output + contextLength = OLLAMA_CLAUDE_SONNET_35_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_SONNET_35_MAX_OUTPUT_TOKENS; // 8K output } // Claude Opus 3.0 (legacy) else if (lowerName.includes('opus-3-0') || lowerName.includes('opus-3.0') || lowerName.includes('opus') && lowerName.includes('20240229')) { - contextLength = 200000; // 200K - maxOutputTokens = 4096; // 4K output + contextLength = OLLAMA_CLAUDE_OPUS_30_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_OPUS_30_MAX_OUTPUT_TOKENS; // 4K output } // Default for Claude else { - contextLength = 200000; // 200K - maxOutputTokens = 8192; // 8K output + contextLength = OLLAMA_CLAUDE_DEFAULT_CONTEXT_LENGTH; // 200K + maxOutputTokens = OLLAMA_CLAUDE_HAIKU_35_MAX_OUTPUT_TOKENS; // 8K output } } // Gemini models @@ -449,53 +519,53 @@ export class OllamaConverter extends BaseConverter { // Gemini 2.5 Pro if (lowerName.includes('2.5') && lowerName.includes('pro')) { - contextLength = 1048576; // 1M input tokens - maxOutputTokens = 65536; // 65K output tokens + contextLength = OLLAMA_GEMINI_25_PRO_CONTEXT_LENGTH; // 1M input tokens + maxOutputTokens = OLLAMA_GEMINI_25_PRO_MAX_OUTPUT_TOKENS; // 65K output tokens } // Gemini 2.5 Flash / Flash-Lite else if (lowerName.includes('2.5') && (lowerName.includes('flash') || lowerName.includes('lite'))) { - contextLength = 1048576; // 1M input tokens - maxOutputTokens = 65536; // 65K output tokens + contextLength = OLLAMA_GEMINI_25_FLASH_CONTEXT_LENGTH; // 1M input tokens + maxOutputTokens = OLLAMA_GEMINI_25_FLASH_MAX_OUTPUT_TOKENS; // 65K output tokens } // Gemini 2.5 Flash Image else if (lowerName.includes('2.5') && lowerName.includes('image')) { - contextLength = 65536; // 65K input tokens - maxOutputTokens = 32768; // 32K output tokens + contextLength = OLLAMA_GEMINI_25_IMAGE_CONTEXT_LENGTH; // 65K input tokens + maxOutputTokens = OLLAMA_GEMINI_25_IMAGE_MAX_OUTPUT_TOKENS; // 32K output tokens } // Gemini 2.5 Flash Live / Native Audio else if (lowerName.includes('2.5') && (lowerName.includes('live') || lowerName.includes('native-audio'))) { - contextLength = 131072; // 131K input tokens - maxOutputTokens = 8192; // 8K output tokens + contextLength = OLLAMA_GEMINI_25_LIVE_CONTEXT_LENGTH; // 131K input tokens + maxOutputTokens = OLLAMA_GEMINI_25_LIVE_MAX_OUTPUT_TOKENS; // 8K output tokens } // Gemini 2.5 TTS else if (lowerName.includes('2.5') && lowerName.includes('tts')) { - contextLength = 8192; // 8K input tokens - maxOutputTokens = 16384; // 16K output tokens + contextLength = OLLAMA_GEMINI_25_TTS_CONTEXT_LENGTH; // 8K input tokens + maxOutputTokens = OLLAMA_GEMINI_25_TTS_MAX_OUTPUT_TOKENS; // 16K output tokens } // Gemini 2.0 Flash else if (lowerName.includes('2.0') && lowerName.includes('flash')) { - contextLength = 1048576; // 1M input tokens - maxOutputTokens = 8192; // 8K output tokens + contextLength = OLLAMA_GEMINI_20_FLASH_CONTEXT_LENGTH; // 1M input tokens + maxOutputTokens = OLLAMA_GEMINI_20_FLASH_MAX_OUTPUT_TOKENS; // 8K output tokens } // Gemini 2.0 Flash Image else if (lowerName.includes('2.0') && lowerName.includes('image')) { - contextLength = 32768; // 32K input tokens - maxOutputTokens = 8192; // 8K output tokens + contextLength = OLLAMA_GEMINI_20_IMAGE_CONTEXT_LENGTH; // 32K input tokens + maxOutputTokens = OLLAMA_GEMINI_20_IMAGE_MAX_OUTPUT_TOKENS; // 8K output tokens } // Gemini 1.5 Pro (legacy) else if (lowerName.includes('1.5') && lowerName.includes('pro')) { - contextLength = 2097152; // 2M tokens - maxOutputTokens = 8192; + contextLength = OLLAMA_GEMINI_15_PRO_CONTEXT_LENGTH; // 2M tokens + maxOutputTokens = OLLAMA_GEMINI_15_PRO_MAX_OUTPUT_TOKENS; } // Gemini 1.5 Flash (legacy) else if (lowerName.includes('1.5') && lowerName.includes('flash')) { - contextLength = 1048576; // 1M tokens - maxOutputTokens = 8192; + contextLength = OLLAMA_GEMINI_15_FLASH_CONTEXT_LENGTH; // 1M tokens + maxOutputTokens = OLLAMA_GEMINI_15_FLASH_MAX_OUTPUT_TOKENS; } // Default for Gemini else { - contextLength = 1048576; // 1M tokens - maxOutputTokens = 8192; + contextLength = OLLAMA_GEMINI_DEFAULT_CONTEXT_LENGTH; // 1M tokens + maxOutputTokens = OLLAMA_GEMINI_DEFAULT_MAX_OUTPUT_TOKENS; } } // GPT-4 models @@ -503,14 +573,14 @@ export class OllamaConverter extends BaseConverter { architecture = 'gpt'; if (lowerName.includes('turbo') || lowerName.includes('preview')) { - contextLength = 128000; // GPT-4 Turbo - maxOutputTokens = 4096; + contextLength = OLLAMA_GPT4_TURBO_CONTEXT_LENGTH; // GPT-4 Turbo + maxOutputTokens = OLLAMA_GPT4_TURBO_MAX_OUTPUT_TOKENS; } else if (lowerName.includes('32k')) { - contextLength = 32768; - maxOutputTokens = 4096; + contextLength = OLLAMA_GPT4_32K_CONTEXT_LENGTH; + maxOutputTokens = OLLAMA_GPT4_32K_MAX_OUTPUT_TOKENS; } else { - contextLength = 8192; // GPT-4 base - maxOutputTokens = 4096; + contextLength = OLLAMA_GPT4_BASE_CONTEXT_LENGTH; // GPT-4 base + maxOutputTokens = OLLAMA_GPT4_BASE_MAX_OUTPUT_TOKENS; } } // GPT-3.5 models @@ -518,11 +588,11 @@ export class OllamaConverter extends BaseConverter { architecture = 'gpt'; if (lowerName.includes('16k')) { - contextLength = 16385; - maxOutputTokens = 4096; + contextLength = OLLAMA_GPT35_16K_CONTEXT_LENGTH; + maxOutputTokens = OLLAMA_GPT35_16K_MAX_OUTPUT_TOKENS; } else { - contextLength = 4096; - maxOutputTokens = 4096; + contextLength = OLLAMA_GPT35_BASE_CONTEXT_LENGTH; + maxOutputTokens = OLLAMA_GPT35_BASE_MAX_OUTPUT_TOKENS; } } // Qwen models @@ -531,23 +601,23 @@ export class OllamaConverter extends BaseConverter { // Qwen3 Coder Plus (coder-model) if (lowerName.includes('coder-plus') || lowerName.includes('coder_plus') || lowerName.includes('coder-model')) { - contextLength = 128000; // 128K tokens - maxOutputTokens = 65536; // 65K output + contextLength = OLLAMA_QWEN_CODER_PLUS_CONTEXT_LENGTH; // 128K tokens + maxOutputTokens = OLLAMA_QWEN_CODER_PLUS_MAX_OUTPUT_TOKENS; // 65K output } // Qwen3 VL Plus (vision-model) else if (lowerName.includes('vl-plus') || lowerName.includes('vl_plus') || lowerName.includes('vision-model')) { - contextLength = 262144; // 256K tokens - maxOutputTokens = 32768; // 32K output + contextLength = OLLAMA_QWEN_VL_PLUS_CONTEXT_LENGTH; // 256K tokens + maxOutputTokens = OLLAMA_QWEN_VL_PLUS_MAX_OUTPUT_TOKENS; // 32K output } // Qwen3 Coder Flash else if (lowerName.includes('coder-flash') || lowerName.includes('coder_flash')) { - contextLength = 128000; // 128K tokens - maxOutputTokens = 65536; // 65K output + contextLength = OLLAMA_QWEN_CODER_FLASH_CONTEXT_LENGTH; // 128K tokens + maxOutputTokens = OLLAMA_QWEN_CODER_FLASH_MAX_OUTPUT_TOKENS; // 65K output } // Default for Qwen else { - contextLength = 32768; // 32K tokens - maxOutputTokens = 8192; + contextLength = OLLAMA_QWEN_DEFAULT_CONTEXT_LENGTH; // 32K tokens + maxOutputTokens = OLLAMA_QWEN_DEFAULT_MAX_OUTPUT_TOKENS; } } @@ -557,7 +627,7 @@ export class OllamaConverter extends BaseConverter { return { license: '', modelfile: `# Modelfile for ${modelName}\nFROM ${modelName}`, - parameters: `num_ctx ${contextLength}\nnum_predict ${maxOutputTokens}\ntemperature 0.7\ntop_p 0.9`, + parameters: `num_ctx ${contextLength}\nnum_predict ${maxOutputTokens}\ntemperature ${OLLAMA_DEFAULT_TEMPERATURE}\ntop_p ${OLLAMA_DEFAULT_TOP_P}`, template: '{{ if .System }}{{ .System }}\n{{ end }}{{ .Prompt }}', details: { parent_model: '', @@ -565,16 +635,16 @@ export class OllamaConverter extends BaseConverter { family: family, families: [family], parameter_size: parameterSize, - quantization_level: 'Q4_K_M' + quantization_level: OLLAMA_SHOW_QUANTIZATION_LEVEL }, model_info: { 'general.architecture': architecture, - 'general.file_type': 2, + 'general.file_type': OLLAMA_DEFAULT_FILE_TYPE, 'general.parameter_count': 0, - 'general.quantization_version': 2, + 'general.quantization_version': OLLAMA_DEFAULT_QUANTIZATION_VERSION, 'general.context_length': contextLength, 'llama.context_length': contextLength, - 'llama.rope.freq_base': 10000.0 + 'llama.rope.freq_base': OLLAMA_DEFAULT_ROPE_FREQ_BASE }, capabilities: ['tools', 'vision', 'completion'] // Indicate that the model supports tool calling }; diff --git a/src/converters/strategies/OpenAIConverter.js b/src/converters/strategies/OpenAIConverter.js index 0b64b7d..d970f22 100644 --- a/src/converters/strategies/OpenAIConverter.js +++ b/src/converters/strategies/OpenAIConverter.js @@ -12,7 +12,15 @@ import { checkAndAssignOrDefault, extractThinkingFromOpenAIText, mapFinishReason, - cleanJsonSchemaProperties as cleanJsonSchema + cleanJsonSchemaProperties as cleanJsonSchema, + CLAUDE_DEFAULT_MAX_TOKENS, + CLAUDE_DEFAULT_TEMPERATURE, + CLAUDE_DEFAULT_TOP_P, + GEMINI_DEFAULT_MAX_TOKENS, + GEMINI_DEFAULT_TEMPERATURE, + GEMINI_DEFAULT_TOP_P, + OPENAI_DEFAULT_INPUT_TOKEN_LIMIT, + OPENAI_DEFAULT_OUTPUT_TOKEN_LIMIT } from '../utils.js'; import { MODEL_PROTOCOL_PREFIX } from '../../common.js'; import { @@ -225,9 +233,9 @@ export class OpenAIConverter extends BaseConverter { const claudeRequest = { model: openaiRequest.model, messages: mergedClaudeMessages, - max_tokens: checkAndAssignOrDefault(openaiRequest.max_tokens, 8192), - temperature: checkAndAssignOrDefault(openaiRequest.temperature, 1), - top_p: checkAndAssignOrDefault(openaiRequest.top_p, 0.95), + max_tokens: checkAndAssignOrDefault(openaiRequest.max_tokens, CLAUDE_DEFAULT_MAX_TOKENS), + temperature: checkAndAssignOrDefault(openaiRequest.temperature, CLAUDE_DEFAULT_TEMPERATURE), + top_p: checkAndAssignOrDefault(openaiRequest.top_p, CLAUDE_DEFAULT_TOP_P), }; if (systemInstruction) { @@ -514,8 +522,8 @@ export class OpenAIConverter extends BaseConverter { version: m.version || "1.0.0", displayName: m.displayName || m.id, description: m.description || `A generative model for text and chat generation. ID: ${m.id}`, - inputTokenLimit: m.inputTokenLimit || 32768, - outputTokenLimit: m.outputTokenLimit || 8192, + inputTokenLimit: m.inputTokenLimit || OPENAI_DEFAULT_INPUT_TOKEN_LIMIT, + outputTokenLimit: m.outputTokenLimit || OPENAI_DEFAULT_OUTPUT_TOKEN_LIMIT, supportedGenerationMethods: m.supportedGenerationMethods || ["generateContent", "streamGenerateContent"] })) }; @@ -713,9 +721,9 @@ export class OpenAIConverter extends BaseConverter { */ buildGeminiGenerationConfig({ temperature, max_tokens, top_p, stop, tools, response_format }, model) { const config = {}; - config.temperature = checkAndAssignOrDefault(temperature, 1); - config.maxOutputTokens = checkAndAssignOrDefault(max_tokens, 65535); - config.topP = checkAndAssignOrDefault(top_p, 0.95); + config.temperature = checkAndAssignOrDefault(temperature, GEMINI_DEFAULT_TEMPERATURE); + config.maxOutputTokens = checkAndAssignOrDefault(max_tokens, GEMINI_DEFAULT_MAX_TOKENS); + config.topP = checkAndAssignOrDefault(top_p, GEMINI_DEFAULT_TOP_P); if (stop !== undefined) config.stopSequences = Array.isArray(stop) ? stop : [stop]; // Handle response_format diff --git a/src/converters/strategies/OpenAIResponsesConverter.js b/src/converters/strategies/OpenAIResponsesConverter.js index 1c635d8..2c926dd 100644 --- a/src/converters/strategies/OpenAIResponsesConverter.js +++ b/src/converters/strategies/OpenAIResponsesConverter.js @@ -7,7 +7,10 @@ import { BaseConverter } from '../BaseConverter.js'; import { MODEL_PROTOCOL_PREFIX } from '../../common.js'; import { extractAndProcessSystemMessages as extractSystemMessages, - extractTextFromMessageContent as extractText + extractTextFromMessageContent as extractText, + CLAUDE_DEFAULT_MAX_TOKENS, + GEMINI_DEFAULT_INPUT_TOKEN_LIMIT, + GEMINI_DEFAULT_OUTPUT_TOKEN_LIMIT } from '../utils.js'; /** @@ -227,7 +230,7 @@ export class OpenAIResponsesConverter extends BaseConverter { const claudeRequest = { model: responsesRequest.model, messages: [], - max_tokens: responsesRequest.max_tokens || 4096, + max_tokens: responsesRequest.max_tokens || CLAUDE_DEFAULT_MAX_TOKENS, stream: responsesRequest.stream || false }; @@ -561,8 +564,8 @@ export class OpenAIResponsesConverter extends BaseConverter { version: m.version || "1.0.0", displayName: m.displayName || m.id || m.name, description: m.description || `A generative model for text and chat generation. ID: ${m.id || m.name}`, - inputTokenLimit: m.inputTokenLimit || 32768, - outputTokenLimit: m.outputTokenLimit || 8192, + inputTokenLimit: m.inputTokenLimit || GEMINI_DEFAULT_INPUT_TOKEN_LIMIT, + outputTokenLimit: m.outputTokenLimit || GEMINI_DEFAULT_OUTPUT_TOKEN_LIMIT, supportedGenerationMethods: m.supportedGenerationMethods || ["generateContent", "streamGenerateContent"] })) }; diff --git a/src/converters/utils.js b/src/converters/utils.js index 4a621f9..a7501c4 100644 --- a/src/converters/utils.js +++ b/src/converters/utils.js @@ -9,11 +9,126 @@ import { v4 as uuidv4 } from 'uuid'; // 常量定义 // ============================================================================= +// 通用默认值 export const DEFAULT_MAX_TOKENS = 8192; -export const DEFAULT_GEMINI_MAX_TOKENS = 65535; export const DEFAULT_TEMPERATURE = 1; export const DEFAULT_TOP_P = 0.95; +// ============================================================================= +// OpenAI 相关常量 +// ============================================================================= +export const OPENAI_DEFAULT_MAX_TOKENS = 64000; +export const OPENAI_DEFAULT_TEMPERATURE = 1; +export const OPENAI_DEFAULT_TOP_P = 0.95; +export const OPENAI_DEFAULT_INPUT_TOKEN_LIMIT = 32768; +export const OPENAI_DEFAULT_OUTPUT_TOKEN_LIMIT = 64000; + +// ============================================================================= +// Claude 相关常量 +// ============================================================================= +export const CLAUDE_DEFAULT_MAX_TOKENS = 64000; +export const CLAUDE_DEFAULT_TEMPERATURE = 1; +export const CLAUDE_DEFAULT_TOP_P = 0.95; + +// ============================================================================= +// Gemini 相关常量 +// ============================================================================= +export const GEMINI_DEFAULT_MAX_TOKENS = 65534; +export const GEMINI_DEFAULT_TEMPERATURE = 1; +export const GEMINI_DEFAULT_TOP_P = 0.95; +export const GEMINI_DEFAULT_INPUT_TOKEN_LIMIT = 32768; +export const GEMINI_DEFAULT_OUTPUT_TOKEN_LIMIT = 64000; + +// ============================================================================= +// OpenAI Responses 相关常量 +// ============================================================================= +export const OPENAI_RESPONSES_DEFAULT_MAX_TOKENS = 4096; +export const OPENAI_RESPONSES_DEFAULT_TEMPERATURE = 1; +export const OPENAI_RESPONSES_DEFAULT_TOP_P = 0.95; +export const OPENAI_RESPONSES_DEFAULT_INPUT_TOKEN_LIMIT = 32768; +export const OPENAI_RESPONSES_DEFAULT_OUTPUT_TOKEN_LIMIT = 64000; + +// ============================================================================= +// Ollama 相关常量 +// ============================================================================= +export const OLLAMA_DEFAULT_CONTEXT_LENGTH = 64000; +export const OLLAMA_DEFAULT_MAX_OUTPUT_TOKENS = 4096; + +// Claude 模型上下文长度 +export const OLLAMA_CLAUDE_DEFAULT_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_SONNET_45_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_SONNET_45_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_HAIKU_45_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_HAIKU_45_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_OPUS_41_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_OPUS_41_MAX_OUTPUT_TOKENS = 32000; +export const OLLAMA_CLAUDE_SONNET_40_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_SONNET_40_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_SONNET_37_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_SONNET_37_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_OPUS_40_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_OPUS_40_MAX_OUTPUT_TOKENS = 32000; +export const OLLAMA_CLAUDE_HAIKU_35_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_HAIKU_35_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_HAIKU_30_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_HAIKU_30_MAX_OUTPUT_TOKENS = 4096; +export const OLLAMA_CLAUDE_SONNET_35_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_SONNET_35_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_CLAUDE_OPUS_30_CONTEXT_LENGTH = 200000; +export const OLLAMA_CLAUDE_OPUS_30_MAX_OUTPUT_TOKENS = 4096; + +// Gemini 模型上下文长度 +export const OLLAMA_GEMINI_25_PRO_CONTEXT_LENGTH = 1048576; +export const OLLAMA_GEMINI_25_PRO_MAX_OUTPUT_TOKENS = 65536; +export const OLLAMA_GEMINI_25_FLASH_CONTEXT_LENGTH = 1048576; +export const OLLAMA_GEMINI_25_FLASH_MAX_OUTPUT_TOKENS = 65536; +export const OLLAMA_GEMINI_25_IMAGE_CONTEXT_LENGTH = 65536; +export const OLLAMA_GEMINI_25_IMAGE_MAX_OUTPUT_TOKENS = 32768; +export const OLLAMA_GEMINI_25_LIVE_CONTEXT_LENGTH = 131072; +export const OLLAMA_GEMINI_25_LIVE_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_GEMINI_25_TTS_CONTEXT_LENGTH = 64000; +export const OLLAMA_GEMINI_25_TTS_MAX_OUTPUT_TOKENS = 16384; +export const OLLAMA_GEMINI_20_FLASH_CONTEXT_LENGTH = 1048576; +export const OLLAMA_GEMINI_20_FLASH_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_GEMINI_20_IMAGE_CONTEXT_LENGTH = 32768; +export const OLLAMA_GEMINI_20_IMAGE_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_GEMINI_15_PRO_CONTEXT_LENGTH = 2097152; +export const OLLAMA_GEMINI_15_PRO_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_GEMINI_15_FLASH_CONTEXT_LENGTH = 1048576; +export const OLLAMA_GEMINI_15_FLASH_MAX_OUTPUT_TOKENS = 64000; +export const OLLAMA_GEMINI_DEFAULT_CONTEXT_LENGTH = 1048576; +export const OLLAMA_GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 64000; + +// GPT 模型上下文长度 +export const OLLAMA_GPT4_TURBO_CONTEXT_LENGTH = 128000; +export const OLLAMA_GPT4_TURBO_MAX_OUTPUT_TOKENS = 4096; +export const OLLAMA_GPT4_32K_CONTEXT_LENGTH = 32768; +export const OLLAMA_GPT4_32K_MAX_OUTPUT_TOKENS = 4096; +export const OLLAMA_GPT4_BASE_CONTEXT_LENGTH = 64000; +export const OLLAMA_GPT4_BASE_MAX_OUTPUT_TOKENS = 4096; +export const OLLAMA_GPT35_16K_CONTEXT_LENGTH = 16385; +export const OLLAMA_GPT35_16K_MAX_OUTPUT_TOKENS = 4096; +export const OLLAMA_GPT35_BASE_CONTEXT_LENGTH = 4096; +export const OLLAMA_GPT35_BASE_MAX_OUTPUT_TOKENS = 4096; + +// Qwen 模型上下文长度 +export const OLLAMA_QWEN_CODER_PLUS_CONTEXT_LENGTH = 128000; +export const OLLAMA_QWEN_CODER_PLUS_MAX_OUTPUT_TOKENS = 65536; +export const OLLAMA_QWEN_VL_PLUS_CONTEXT_LENGTH = 262144; +export const OLLAMA_QWEN_VL_PLUS_MAX_OUTPUT_TOKENS = 32768; +export const OLLAMA_QWEN_CODER_FLASH_CONTEXT_LENGTH = 128000; +export const OLLAMA_QWEN_CODER_FLASH_MAX_OUTPUT_TOKENS = 65536; +export const OLLAMA_QWEN_DEFAULT_CONTEXT_LENGTH = 32768; +export const OLLAMA_QWEN_DEFAULT_MAX_OUTPUT_TOKENS = 64000; + +export const OLLAMA_DEFAULT_FILE_TYPE = 2; +export const OLLAMA_DEFAULT_QUANTIZATION_VERSION = 2; +export const OLLAMA_DEFAULT_ROPE_FREQ_BASE = 10000.0; +export const OLLAMA_DEFAULT_TEMPERATURE = 0.7; +export const OLLAMA_DEFAULT_TOP_P = 0.9; +export const OLLAMA_DEFAULT_QUANTIZATION_LEVEL = 'Q4_0'; +export const OLLAMA_SHOW_QUANTIZATION_LEVEL = 'Q4_K_M'; + // ============================================================================= // 通用辅助函数 // =============================================================================