Merge pull request #10 from ystyleb/feat/gemini-image
Reviewed and improved: added per-provider API key support, moved key to header, fixed size param, improved error handling.
This commit is contained in:
commit
c2e0256a4f
5 changed files with 133 additions and 18 deletions
|
|
@ -192,7 +192,7 @@ wewrite/
|
|||
│ ├── theme.py # YAML 主题引擎
|
||||
│ ├── publisher.py # 微信草稿箱 API + 小绿书图片帖
|
||||
│ ├── wechat_api.py # access_token / 图片上传
|
||||
│ ├── image_gen.py # AI 图片生成(doubao / OpenAI)
|
||||
│ ├── image_gen.py # AI 图片生成(doubao / OpenAI / Gemini)
|
||||
│ └── themes/ # 16+ 排版主题(含暗黑模式,可从文章学习新增)
|
||||
│
|
||||
├── personas/ # 5 套写作人格预设(含朱雀实测数据)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ wechat:
|
|||
|
||||
# AI 图片生成
|
||||
image:
|
||||
# 可选 provider: doubao | openai
|
||||
# 可选 provider: doubao | openai | gemini
|
||||
provider: "doubao"
|
||||
api_key: "your_api_key"
|
||||
|
||||
|
|
@ -24,5 +24,11 @@ image:
|
|||
# model: "dall-e-3"
|
||||
# base_url: "https://api.openai.com/v1"
|
||||
|
||||
# Google Gemini Imagen
|
||||
# provider: "gemini"
|
||||
# api_key: "AIza..."
|
||||
# 获取 API key: https://aistudio.google.com/apikey
|
||||
# model: "gemini-3.1-flash-image-preview"
|
||||
|
||||
# 默认排版主题
|
||||
theme: "professional-clean"
|
||||
|
|
|
|||
8
dist/openclaw/config.example.yaml
vendored
8
dist/openclaw/config.example.yaml
vendored
|
|
@ -9,7 +9,7 @@ wechat:
|
|||
|
||||
# AI 图片生成
|
||||
image:
|
||||
# 可选 provider: doubao | openai
|
||||
# 可选 provider: doubao | openai | gemini
|
||||
provider: "doubao"
|
||||
api_key: "your_api_key"
|
||||
|
||||
|
|
@ -24,5 +24,11 @@ image:
|
|||
# model: "dall-e-3"
|
||||
# base_url: "https://api.openai.com/v1"
|
||||
|
||||
# Google Gemini Imagen
|
||||
# provider: "gemini"
|
||||
# api_key: "AIza..."
|
||||
# 获取 API key: https://aistudio.google.com/apikey
|
||||
# model: "gemini-3.1-flash-image-preview"
|
||||
|
||||
# 默认排版主题
|
||||
theme: "professional-clean"
|
||||
|
|
|
|||
67
dist/openclaw/toolkit/image_gen.py
vendored
67
dist/openclaw/toolkit/image_gen.py
vendored
|
|
@ -5,12 +5,13 @@ AI image generation module for WeWrite.
|
|||
Supports multiple providers via a simple abstraction:
|
||||
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
||||
- openai (DALL-E 3) — broad availability
|
||||
- gemini (Google Gemini Imagen) — multimodal image generation
|
||||
- Custom providers via ImageProvider base class
|
||||
|
||||
Usage as CLI:
|
||||
python3 image_gen.py --prompt "描述" --output cover.png
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --provider openai
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --provider gemini
|
||||
|
||||
Usage as module:
|
||||
from image_gen import generate_image
|
||||
|
|
@ -19,6 +20,7 @@ Usage as module:
|
|||
|
||||
import abc
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
@ -50,10 +52,10 @@ def _load_config() -> dict:
|
|||
# Article: 16:9 横版内文配图
|
||||
# Vertical: 9:16 竖版
|
||||
SIZE_PRESETS = {
|
||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024"},
|
||||
"article": {"doubao": "2560x1440", "openai": "1792x1024"},
|
||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792"},
|
||||
"square": {"doubao": "2048x2048", "openai": "1024x1024"},
|
||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||
"article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"},
|
||||
"square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"},
|
||||
}
|
||||
|
||||
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
||||
|
|
@ -211,14 +213,63 @@ class OpenAIProvider(ImageProvider):
|
|||
return img_resp.content
|
||||
|
||||
|
||||
class GeminiProvider(ImageProvider):
|
||||
"""Google Gemini Imagen provider."""
|
||||
|
||||
provider_key = "gemini"
|
||||
|
||||
def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview",
|
||||
base_url: str = "https://generativelanguage.googleapis.com/v1beta"):
|
||||
self._api_key = api_key
|
||||
self._model = model
|
||||
self._base_url = base_url
|
||||
|
||||
def generate(self, prompt: str, size: str) -> bytes:
|
||||
# Append size instruction to prompt (Gemini doesn't have a native size param)
|
||||
if "x" in size:
|
||||
w, h = size.split("x", 1)
|
||||
prompt = f"{prompt}\n\nGenerate this image at {w}x{h} resolution."
|
||||
|
||||
body = {
|
||||
"contents": [{"parts": [{"text": prompt}]}],
|
||||
"generationConfig": {"responseModalities": ["TEXT", "IMAGE"]},
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{self._base_url}/models/{self._model}:generateContent",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"x-goog-api-key": self._api_key,
|
||||
},
|
||||
json=body,
|
||||
timeout=120,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
try:
|
||||
error = resp.json().get("error", {})
|
||||
msg = error.get("message", resp.text[:200])
|
||||
except (ValueError, KeyError):
|
||||
msg = resp.text[:200]
|
||||
raise ValueError(f"Gemini API error ({resp.status_code}): {msg}")
|
||||
data = resp.json()
|
||||
candidates = data.get("candidates", [])
|
||||
if not candidates:
|
||||
raise ValueError("No candidates in Gemini response")
|
||||
parts = candidates[0].get("content", {}).get("parts", [])
|
||||
for part in parts:
|
||||
inline_data = part.get("inlineData")
|
||||
if inline_data and inline_data.get("mimeType", "").startswith("image/"):
|
||||
return base64.b64decode(inline_data["data"])
|
||||
raise ValueError(f"No image found in Gemini response parts")
|
||||
|
||||
|
||||
# --- Provider registry ---
|
||||
|
||||
PROVIDERS = {
|
||||
"doubao": DoubaoProvider,
|
||||
"openai": OpenAIProvider,
|
||||
"gemini": GeminiProvider,
|
||||
}
|
||||
|
||||
|
||||
def _build_provider(config: dict) -> ImageProvider:
|
||||
"""Build an ImageProvider from config.yaml's image section."""
|
||||
img_cfg = config.get("image", {})
|
||||
|
|
@ -287,7 +338,7 @@ def generate_image(
|
|||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)"
|
||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)"
|
||||
)
|
||||
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
||||
parser.add_argument("--output", required=True, help="Output file path")
|
||||
|
|
@ -299,7 +350,7 @@ def main():
|
|||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help="Override provider (doubao, openai). Default: from config.yaml",
|
||||
help="Override provider (doubao, openai, gemini). Default: from config.yaml",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,12 +5,13 @@ AI image generation module for WeWrite.
|
|||
Supports multiple providers via a simple abstraction:
|
||||
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
||||
- openai (DALL-E 3) — broad availability
|
||||
- gemini (Google Gemini Imagen) — multimodal image generation
|
||||
- Custom providers via ImageProvider base class
|
||||
|
||||
Usage as CLI:
|
||||
python3 image_gen.py --prompt "描述" --output cover.png
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --provider openai
|
||||
python3 image_gen.py --prompt "描述" --output cover.png --provider gemini
|
||||
|
||||
Usage as module:
|
||||
from image_gen import generate_image
|
||||
|
|
@ -19,6 +20,7 @@ Usage as module:
|
|||
|
||||
import abc
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
@ -50,10 +52,10 @@ def _load_config() -> dict:
|
|||
# Article: 16:9 横版内文配图
|
||||
# Vertical: 9:16 竖版
|
||||
SIZE_PRESETS = {
|
||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024"},
|
||||
"article": {"doubao": "2560x1440", "openai": "1792x1024"},
|
||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792"},
|
||||
"square": {"doubao": "2048x2048", "openai": "1024x1024"},
|
||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||
"article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"},
|
||||
"square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"},
|
||||
}
|
||||
|
||||
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
||||
|
|
@ -211,11 +213,61 @@ class OpenAIProvider(ImageProvider):
|
|||
return img_resp.content
|
||||
|
||||
|
||||
class GeminiProvider(ImageProvider):
|
||||
"""Google Gemini Imagen provider."""
|
||||
|
||||
provider_key = "gemini"
|
||||
|
||||
def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview",
|
||||
base_url: str = "https://generativelanguage.googleapis.com/v1beta"):
|
||||
self._api_key = api_key
|
||||
self._model = model
|
||||
self._base_url = base_url
|
||||
|
||||
def generate(self, prompt: str, size: str) -> bytes:
|
||||
# Append size instruction to prompt (Gemini doesn't have a native size param)
|
||||
if "x" in size:
|
||||
w, h = size.split("x", 1)
|
||||
prompt = f"{prompt}\n\nGenerate this image at {w}x{h} resolution."
|
||||
|
||||
body = {
|
||||
"contents": [{"parts": [{"text": prompt}]}],
|
||||
"generationConfig": {"responseModalities": ["TEXT", "IMAGE"]},
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{self._base_url}/models/{self._model}:generateContent",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"x-goog-api-key": self._api_key,
|
||||
},
|
||||
json=body,
|
||||
timeout=120,
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
try:
|
||||
error = resp.json().get("error", {})
|
||||
msg = error.get("message", resp.text[:200])
|
||||
except (ValueError, KeyError):
|
||||
msg = resp.text[:200]
|
||||
raise ValueError(f"Gemini API error ({resp.status_code}): {msg}")
|
||||
data = resp.json()
|
||||
candidates = data.get("candidates", [])
|
||||
if not candidates:
|
||||
raise ValueError("No candidates in Gemini response")
|
||||
parts = candidates[0].get("content", {}).get("parts", [])
|
||||
for part in parts:
|
||||
inline_data = part.get("inlineData")
|
||||
if inline_data and inline_data.get("mimeType", "").startswith("image/"):
|
||||
return base64.b64decode(inline_data["data"])
|
||||
raise ValueError("No image found in Gemini response parts")
|
||||
|
||||
|
||||
# --- Provider registry ---
|
||||
|
||||
PROVIDERS = {
|
||||
"doubao": DoubaoProvider,
|
||||
"openai": OpenAIProvider,
|
||||
"gemini": GeminiProvider,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -287,7 +339,7 @@ def generate_image(
|
|||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)"
|
||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)"
|
||||
)
|
||||
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
||||
parser.add_argument("--output", required=True, help="Output file path")
|
||||
|
|
@ -299,7 +351,7 @@ def main():
|
|||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help="Override provider (doubao, openai). Default: from config.yaml",
|
||||
help="Override provider (doubao, openai, gemini). Default: from config.yaml",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue