diff --git a/README.md b/README.md index c61ea1d..dd98afb 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,7 @@ wewrite/ │ ├── theme.py # YAML 主题引擎 │ ├── publisher.py # 微信草稿箱 API + 小绿书图片帖 │ ├── wechat_api.py # access_token / 图片上传 -│ ├── image_gen.py # AI 图片生成(doubao / OpenAI) +│ ├── image_gen.py # AI 图片生成(doubao / OpenAI / Gemini) │ └── themes/ # 16+ 排版主题(含暗黑模式,可从文章学习新增) │ ├── personas/ # 5 套写作人格预设(含朱雀实测数据) diff --git a/config.example.yaml b/config.example.yaml index 3553c32..9562d9f 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -9,7 +9,7 @@ wechat: # AI 图片生成 image: - # 可选 provider: doubao | openai + # 可选 provider: doubao | openai | gemini provider: "doubao" api_key: "your_api_key" @@ -24,5 +24,11 @@ image: # model: "dall-e-3" # base_url: "https://api.openai.com/v1" + # Google Gemini Imagen + # provider: "gemini" + # api_key: "AIza..." + # 获取 API key: https://aistudio.google.com/apikey + # model: "gemini-3.1-flash-image-preview" + # 默认排版主题 theme: "professional-clean" diff --git a/dist/openclaw/config.example.yaml b/dist/openclaw/config.example.yaml index 3553c32..9562d9f 100644 --- a/dist/openclaw/config.example.yaml +++ b/dist/openclaw/config.example.yaml @@ -9,7 +9,7 @@ wechat: # AI 图片生成 image: - # 可选 provider: doubao | openai + # 可选 provider: doubao | openai | gemini provider: "doubao" api_key: "your_api_key" @@ -24,5 +24,11 @@ image: # model: "dall-e-3" # base_url: "https://api.openai.com/v1" + # Google Gemini Imagen + # provider: "gemini" + # api_key: "AIza..." + # 获取 API key: https://aistudio.google.com/apikey + # model: "gemini-3.1-flash-image-preview" + # 默认排版主题 theme: "professional-clean" diff --git a/dist/openclaw/toolkit/image_gen.py b/dist/openclaw/toolkit/image_gen.py index 089e566..5651de3 100644 --- a/dist/openclaw/toolkit/image_gen.py +++ b/dist/openclaw/toolkit/image_gen.py @@ -5,12 +5,13 @@ AI image generation module for WeWrite. Supports multiple providers via a simple abstraction: - doubao-seedream (Volcengine Ark) — default, good for Chinese prompts - openai (DALL-E 3) — broad availability + - gemini (Google Gemini Imagen) — multimodal image generation - Custom providers via ImageProvider base class Usage as CLI: python3 image_gen.py --prompt "描述" --output cover.png python3 image_gen.py --prompt "描述" --output cover.png --size cover - python3 image_gen.py --prompt "描述" --output cover.png --provider openai + python3 image_gen.py --prompt "描述" --output cover.png --provider gemini Usage as module: from image_gen import generate_image @@ -19,6 +20,7 @@ Usage as module: import abc import argparse +import base64 import json import sys from pathlib import Path @@ -50,10 +52,10 @@ def _load_config() -> dict: # Article: 16:9 横版内文配图 # Vertical: 9:16 竖版 SIZE_PRESETS = { - "cover": {"doubao": "2952x1256", "openai": "1792x1024"}, - "article": {"doubao": "2560x1440", "openai": "1792x1024"}, - "vertical": {"doubao": "1088x2560", "openai": "1024x1792"}, - "square": {"doubao": "2048x2048", "openai": "1024x1024"}, + "cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"}, + "article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"}, + "vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"}, + "square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"}, } MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB @@ -211,14 +213,54 @@ class OpenAIProvider(ImageProvider): return img_resp.content +class GeminiProvider(ImageProvider): + """Google Gemini Imagen provider.""" + + provider_key = "gemini" + + def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview", + base_url: str = "https://generativelanguage.googleapis.com/v1beta"): + self._api_key = api_key + self._model = model + self._base_url = base_url + + def generate(self, prompt: str, size: str) -> bytes: + body = { + "contents": [{"parts": [{"text": prompt}]}], + "generationConfig": {"responseModalities": ["TEXT", "IMAGE"]}, + } + session = requests.Session() + session.trust_env = False + resp = session.post( + f"{self._base_url}/models/{self._model}:generateContent?key={self._api_key}", + headers={"Content-Type": "application/json"}, + json=body, + timeout=120, + ) + data = resp.json() + if resp.status_code != 200: + error = data.get("error", {}) + msg = error.get("message", json.dumps(data, ensure_ascii=False)) + raise ValueError(f"Gemini API error ({resp.status_code}): {msg}") + candidates = data.get("candidates", []) + if not candidates: + raise ValueError(f"No candidates in Gemini response") + parts = candidates[0].get("content", {}).get("parts", []) + for part in parts: + inline_data = part.get("inlineData") + if inline_data and inline_data.get("mimeType", "").startswith("image/"): + return base64.b64decode(inline_data["data"]) + raise ValueError(f"No image found in Gemini response parts") + + # --- Provider registry --- PROVIDERS = { "doubao": DoubaoProvider, "openai": OpenAIProvider, + "gemini": GeminiProvider, } - def _build_provider(config: dict) -> ImageProvider: """Build an ImageProvider from config.yaml's image section.""" img_cfg = config.get("image", {}) @@ -287,7 +329,7 @@ def generate_image( def main(): parser = argparse.ArgumentParser( - description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)" + description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)" ) parser.add_argument("--prompt", required=True, help="Image generation prompt") parser.add_argument("--output", required=True, help="Output file path") @@ -299,7 +341,7 @@ def main(): parser.add_argument( "--provider", default=None, - help="Override provider (doubao, openai). Default: from config.yaml", + help="Override provider (doubao, openai, gemini). Default: from config.yaml", ) args = parser.parse_args() diff --git a/toolkit/image_gen.py b/toolkit/image_gen.py index 089e566..73647af 100644 --- a/toolkit/image_gen.py +++ b/toolkit/image_gen.py @@ -5,12 +5,13 @@ AI image generation module for WeWrite. Supports multiple providers via a simple abstraction: - doubao-seedream (Volcengine Ark) — default, good for Chinese prompts - openai (DALL-E 3) — broad availability + - gemini (Google Gemini Imagen) — multimodal image generation - Custom providers via ImageProvider base class Usage as CLI: python3 image_gen.py --prompt "描述" --output cover.png python3 image_gen.py --prompt "描述" --output cover.png --size cover - python3 image_gen.py --prompt "描述" --output cover.png --provider openai + python3 image_gen.py --prompt "描述" --output cover.png --provider gemini Usage as module: from image_gen import generate_image @@ -19,6 +20,7 @@ Usage as module: import abc import argparse +import base64 import json import sys from pathlib import Path @@ -50,10 +52,10 @@ def _load_config() -> dict: # Article: 16:9 横版内文配图 # Vertical: 9:16 竖版 SIZE_PRESETS = { - "cover": {"doubao": "2952x1256", "openai": "1792x1024"}, - "article": {"doubao": "2560x1440", "openai": "1792x1024"}, - "vertical": {"doubao": "1088x2560", "openai": "1024x1792"}, - "square": {"doubao": "2048x2048", "openai": "1024x1024"}, + "cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"}, + "article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"}, + "vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"}, + "square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"}, } MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB @@ -211,11 +213,52 @@ class OpenAIProvider(ImageProvider): return img_resp.content +class GeminiProvider(ImageProvider): + """Google Gemini Imagen provider.""" + + provider_key = "gemini" + + def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview", + base_url: str = "https://generativelanguage.googleapis.com/v1beta"): + self._api_key = api_key + self._model = model + self._base_url = base_url + + def generate(self, prompt: str, size: str) -> bytes: + body = { + "contents": [{"parts": [{"text": prompt}]}], + "generationConfig": {"responseModalities": ["TEXT", "IMAGE"]}, + } + session = requests.Session() + session.trust_env = False + resp = session.post( + f"{self._base_url}/models/{self._model}:generateContent?key={self._api_key}", + headers={"Content-Type": "application/json"}, + json=body, + timeout=120, + ) + data = resp.json() + if resp.status_code != 200: + error = data.get("error", {}) + msg = error.get("message", json.dumps(data, ensure_ascii=False)) + raise ValueError(f"Gemini API error ({resp.status_code}): {msg}") + candidates = data.get("candidates", []) + if not candidates: + raise ValueError(f"No candidates in Gemini response") + parts = candidates[0].get("content", {}).get("parts", []) + for part in parts: + inline_data = part.get("inlineData") + if inline_data and inline_data.get("mimeType", "").startswith("image/"): + return base64.b64decode(inline_data["data"]) + raise ValueError(f"No image found in Gemini response parts") + + # --- Provider registry --- PROVIDERS = { "doubao": DoubaoProvider, "openai": OpenAIProvider, + "gemini": GeminiProvider, } @@ -287,7 +330,7 @@ def generate_image( def main(): parser = argparse.ArgumentParser( - description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)" + description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)" ) parser.add_argument("--prompt", required=True, help="Image generation prompt") parser.add_argument("--output", required=True, help="Output file path") @@ -299,7 +342,7 @@ def main(): parser.add_argument( "--provider", default=None, - help="Override provider (doubao, openai). Default: from config.yaml", + help="Override provider (doubao, openai, gemini). Default: from config.yaml", ) args = parser.parse_args()