增加生图工具
This commit is contained in:
parent
7be373b82e
commit
c337eacf4d
3 changed files with 306 additions and 1 deletions
41
src/index.ts
41
src/index.ts
|
|
@ -36,6 +36,9 @@ interface AppConfig {
|
|||
smtpPass?: string;
|
||||
smtpFrom?: string;
|
||||
tavilyApiKey?: string;
|
||||
imageApiKey?: string;
|
||||
imageBaseUrl?: string;
|
||||
imageModel?: string;
|
||||
autoConfirm?: boolean;
|
||||
feishuWebhook?: string;
|
||||
feishuKeyword?: string;
|
||||
|
|
@ -143,6 +146,12 @@ async function runSetup(options: any = {}) {
|
|||
message: 'Enter default Model:',
|
||||
default: currentConfig.model || 'gpt-4o'
|
||||
},
|
||||
{
|
||||
type: 'confirm',
|
||||
name: 'configureImage',
|
||||
message: 'Do you want to configure a separate Image Generation Service (DALL-E)?',
|
||||
default: !!currentConfig.imageApiKey
|
||||
},
|
||||
{
|
||||
type: 'confirm',
|
||||
name: 'configureEmail',
|
||||
|
|
@ -166,6 +175,37 @@ async function runSetup(options: any = {}) {
|
|||
// Resolve sensitive values (Keep old if empty)
|
||||
const finalApiKey = answers.apiKey || currentConfig.apiKey;
|
||||
|
||||
let imageConfig: any = {};
|
||||
if (answers.configureImage) {
|
||||
const imageAnswers = await inquirer.prompt([
|
||||
{
|
||||
type: 'password',
|
||||
name: 'imageApiKey',
|
||||
message: currentConfig.imageApiKey
|
||||
? `Enter Image Service API Key (Leave empty to keep ${maskSecret(currentConfig.imageApiKey)}, or leave empty to use main API key):`
|
||||
: 'Enter Image Service API Key (Leave empty to use main API key):',
|
||||
mask: '*'
|
||||
},
|
||||
{
|
||||
type: 'input',
|
||||
name: 'imageBaseUrl',
|
||||
message: 'Enter Image Service Base URL:',
|
||||
default: currentConfig.imageBaseUrl || currentConfig.baseUrl || 'https://api.openai.com/v1'
|
||||
},
|
||||
{
|
||||
type: 'input',
|
||||
name: 'imageModel',
|
||||
message: 'Default Image Model:',
|
||||
default: currentConfig.imageModel || 'dall-e-3'
|
||||
}
|
||||
]);
|
||||
imageConfig = {
|
||||
imageApiKey: imageAnswers.imageApiKey || currentConfig.imageApiKey,
|
||||
imageBaseUrl: imageAnswers.imageBaseUrl,
|
||||
imageModel: imageAnswers.imageModel
|
||||
};
|
||||
}
|
||||
|
||||
let emailConfig: any = {};
|
||||
if (answers.configureEmail) {
|
||||
const emailAnswers = await inquirer.prompt([
|
||||
|
|
@ -282,6 +322,7 @@ async function runSetup(options: any = {}) {
|
|||
apiKey: finalApiKey,
|
||||
baseUrl: answers.baseUrl,
|
||||
model: answers.model,
|
||||
...imageConfig,
|
||||
...emailConfig,
|
||||
...searchConfig,
|
||||
...notifyConfig
|
||||
|
|
|
|||
262
src/tools/image.ts
Normal file
262
src/tools/image.ts
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
import OpenAI from 'openai';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { ToolModule } from './interface.js';
|
||||
|
||||
const toolDefinition = {
|
||||
type: "function",
|
||||
function: {
|
||||
name: "generate_image",
|
||||
description: "Generates or edits images using AI models (DALL-E 3/2). Supports text-to-image, image variation, and image editing. Allows control over size, resolution (quality), and model selection.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
prompt: {
|
||||
type: "string",
|
||||
description: "Text description of the desired image. Required for text-to-image and edit modes."
|
||||
},
|
||||
image_path: {
|
||||
type: "string",
|
||||
description: "Path to an existing image file (local path). Required for variation and editing modes."
|
||||
},
|
||||
mask_path: {
|
||||
type: "string",
|
||||
description: "Path to a mask image file (local path). Optional, used only for editing."
|
||||
},
|
||||
mode: {
|
||||
type: "string",
|
||||
enum: ["text-to-image", "variation", "edit"],
|
||||
description: "Operation mode. Inferred if not provided."
|
||||
},
|
||||
model: {
|
||||
type: "string",
|
||||
enum: ["dall-e-3", "dall-e-2"],
|
||||
description: "The AI model to use. 'dall-e-3' for high quality (default), 'dall-e-2' for faster/smaller generation or editing.",
|
||||
default: "dall-e-3"
|
||||
},
|
||||
n: {
|
||||
type: "integer",
|
||||
description: "Number of images to generate. Default is 1.",
|
||||
default: 1
|
||||
},
|
||||
size: {
|
||||
type: "string",
|
||||
description: "Image resolution/size. DALL-E 3: '1024x1024', '1024x1792' (Portrait), '1792x1024' (Landscape). DALL-E 2: '256x256', '512x512', '1024x1024'.",
|
||||
default: "1024x1024"
|
||||
},
|
||||
quality: {
|
||||
type: "string",
|
||||
enum: ["standard", "hd"],
|
||||
description: "Image quality (DALL-E 3 only). 'hd' creates more detailed images. Default is 'standard'.",
|
||||
default: "standard"
|
||||
},
|
||||
style: {
|
||||
type: "string",
|
||||
enum: ["vivid", "natural"],
|
||||
description: "Image style (DALL-E 3 only). Default is 'vivid'.",
|
||||
default: "vivid"
|
||||
},
|
||||
output_dir: {
|
||||
type: "string",
|
||||
description: "Directory to save the generated images. Defaults to current directory."
|
||||
}
|
||||
},
|
||||
required: []
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
async function downloadImage(url: string, destPath: string): Promise<void> {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) throw new Error(`Failed to download image: ${response.statusText}`);
|
||||
const buffer = await response.arrayBuffer();
|
||||
fs.writeFileSync(destPath, Buffer.from(buffer));
|
||||
}
|
||||
|
||||
const handler = async (args: any, config: any): Promise<string> => {
|
||||
const apiKey = config.imageApiKey || config.apiKey || process.env.OPENAI_API_KEY;
|
||||
const baseURL = config.imageBaseUrl || config.baseUrl || process.env.OPENAI_BASE_URL;
|
||||
|
||||
if (!apiKey) {
|
||||
return "Error: Image Service API Key is missing. Please configure it in .autoclaw/setting.json (imageApiKey or apiKey).";
|
||||
}
|
||||
|
||||
const client = new OpenAI({
|
||||
apiKey: apiKey,
|
||||
baseURL: baseURL
|
||||
});
|
||||
|
||||
const {
|
||||
prompt,
|
||||
image_path,
|
||||
mask_path,
|
||||
n = 1,
|
||||
size = "1024x1024",
|
||||
quality = "standard",
|
||||
style = "vivid",
|
||||
output_dir = "."
|
||||
} = args;
|
||||
|
||||
let mode = args.mode;
|
||||
let model = args.model || config.imageModel || "dall-e-3";
|
||||
|
||||
// Infer mode if not provided
|
||||
if (!mode) {
|
||||
if (image_path && mask_path) mode = "edit";
|
||||
else if (image_path) mode = "variation";
|
||||
else mode = "text-to-image";
|
||||
}
|
||||
|
||||
// Model-specific validations
|
||||
if (mode === "text-to-image") {
|
||||
// DALL-E 3 Validation
|
||||
if (model === "dall-e-3") {
|
||||
const validSizes = ["1024x1024", "1024x1792", "1792x1024"];
|
||||
if (!validSizes.includes(size)) {
|
||||
return `Error: Invalid size '${size}' for DALL-E 3. Supported sizes are: ${validSizes.join(", ")}.`;
|
||||
}
|
||||
}
|
||||
// DALL-E 2 Validation
|
||||
else if (model === "dall-e-2") {
|
||||
const validSizes = ["256x256", "512x512", "1024x1024"];
|
||||
if (!validSizes.includes(size)) {
|
||||
return `Error: Invalid size '${size}' for DALL-E 2. Supported sizes are: ${validSizes.join(", ")}.`;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Variation and Edit only support DALL-E 2 currently
|
||||
if (model === "dall-e-3") {
|
||||
console.log("Note: DALL-E 3 does not support variation/edit. Falling back to DALL-E 2.");
|
||||
model = "dall-e-2";
|
||||
}
|
||||
}
|
||||
|
||||
// Resolve output directory
|
||||
const resolvedOutputDir = path.resolve(process.cwd(), output_dir);
|
||||
if (!fs.existsSync(resolvedOutputDir)) {
|
||||
fs.mkdirSync(resolvedOutputDir, { recursive: true });
|
||||
}
|
||||
|
||||
const generatedFiles: string[] = [];
|
||||
|
||||
try {
|
||||
if (mode === "text-to-image") {
|
||||
if (!prompt) return "Error: 'prompt' is required for text-to-image mode.";
|
||||
|
||||
console.log(`Generating ${n} image(s) with ${model} (${size}, ${quality})...`);
|
||||
|
||||
if (model === "dall-e-3") {
|
||||
for (let i = 0; i < n; i++) {
|
||||
const response = await client.images.generate({
|
||||
model: "dall-e-3",
|
||||
prompt: prompt,
|
||||
n: 1, // DALL-E 3 constraint
|
||||
size: size as any,
|
||||
quality: quality as any,
|
||||
style: style as any,
|
||||
response_format: "url"
|
||||
});
|
||||
|
||||
const imageUrl = response.data?.[0]?.url;
|
||||
if (imageUrl) {
|
||||
const fileName = `generated-${Date.now()}-${i + 1}.png`;
|
||||
const filePath = path.join(resolvedOutputDir, fileName);
|
||||
await downloadImage(imageUrl, filePath);
|
||||
generatedFiles.push(filePath);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// DALL-E 2
|
||||
const response = await client.images.generate({
|
||||
model: "dall-e-2",
|
||||
prompt: prompt,
|
||||
n: n,
|
||||
size: size as any,
|
||||
response_format: "url"
|
||||
});
|
||||
|
||||
const data = response.data || [];
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const imageUrl = data[i].url;
|
||||
if (imageUrl) {
|
||||
const fileName = `generated-${Date.now()}-${i + 1}.png`;
|
||||
const filePath = path.join(resolvedOutputDir, fileName);
|
||||
await downloadImage(imageUrl, filePath);
|
||||
generatedFiles.push(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (mode === "variation") {
|
||||
if (!image_path) return "Error: 'image_path' is required for variation mode.";
|
||||
if (!fs.existsSync(image_path)) return `Error: Image file not found at ${image_path}`;
|
||||
|
||||
console.log(`Generating ${n} variation(s) with ${model}...`);
|
||||
|
||||
const response = await client.images.createVariation({
|
||||
image: fs.createReadStream(image_path),
|
||||
n: n,
|
||||
model: "dall-e-2", // Explicitly set model just in case, though it's the default/only option
|
||||
size: size as any,
|
||||
response_format: "url"
|
||||
});
|
||||
|
||||
const data = response.data || [];
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const imageUrl = data[i].url;
|
||||
if (imageUrl) {
|
||||
const fileName = `variation-${Date.now()}-${i + 1}.png`;
|
||||
const filePath = path.join(resolvedOutputDir, fileName);
|
||||
await downloadImage(imageUrl, filePath);
|
||||
generatedFiles.push(filePath);
|
||||
}
|
||||
}
|
||||
|
||||
} else if (mode === "edit") {
|
||||
if (!image_path) return "Error: 'image_path' is required for edit mode.";
|
||||
if (!prompt) return "Error: 'prompt' is required for edit mode.";
|
||||
if (!fs.existsSync(image_path)) return `Error: Image file not found at ${image_path}`;
|
||||
|
||||
console.log(`Editing image with ${model}...`);
|
||||
|
||||
const params: any = {
|
||||
image: fs.createReadStream(image_path),
|
||||
prompt: prompt,
|
||||
n: n,
|
||||
model: "dall-e-2",
|
||||
size: size as any,
|
||||
response_format: "url"
|
||||
};
|
||||
|
||||
if (mask_path && fs.existsSync(mask_path)) {
|
||||
params.mask = fs.createReadStream(mask_path);
|
||||
}
|
||||
|
||||
const response = await client.images.edit(params);
|
||||
|
||||
const data = response.data || [];
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const imageUrl = data[i].url;
|
||||
if (imageUrl) {
|
||||
const fileName = `edited-${Date.now()}-${i + 1}.png`;
|
||||
const filePath = path.join(resolvedOutputDir, fileName);
|
||||
await downloadImage(imageUrl, filePath);
|
||||
generatedFiles.push(filePath);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return `Error: Unknown mode '${mode}'.`;
|
||||
}
|
||||
|
||||
return `Successfully generated ${generatedFiles.length} image(s):\n${generatedFiles.join('\n')}`;
|
||||
|
||||
} catch (error: any) {
|
||||
return `Error generating image: ${error.message}`;
|
||||
}
|
||||
};
|
||||
|
||||
export const ImageTool: ToolModule = {
|
||||
name: "Image Generation",
|
||||
definition: toolDefinition as any,
|
||||
handler: handler
|
||||
};
|
||||
|
|
@ -5,6 +5,7 @@ import { SearchTool } from './search.js';
|
|||
import { NotifyTool } from './notify.js';
|
||||
import { BrowserTool } from './browser.js';
|
||||
import { ScreenshotTool } from './screenshot.js';
|
||||
import { ImageTool } from './image.js';
|
||||
|
||||
// Central Registry of all available tools
|
||||
export const toolRegistry: ToolModule[] = [
|
||||
|
|
@ -16,7 +17,8 @@ export const toolRegistry: ToolModule[] = [
|
|||
SearchTool,
|
||||
NotifyTool,
|
||||
BrowserTool,
|
||||
ScreenshotTool
|
||||
ScreenshotTool,
|
||||
ImageTool
|
||||
];
|
||||
|
||||
export function getToolDefinitions() {
|
||||
|
|
|
|||
Loading…
Reference in a new issue