Merge pull request #10 from ystyleb/feat/gemini-image
Reviewed and improved: added per-provider API key support, moved key to header, fixed size param, improved error handling.
This commit is contained in:
commit
c2e0256a4f
5 changed files with 133 additions and 18 deletions
|
|
@ -192,7 +192,7 @@ wewrite/
|
||||||
│ ├── theme.py # YAML 主题引擎
|
│ ├── theme.py # YAML 主题引擎
|
||||||
│ ├── publisher.py # 微信草稿箱 API + 小绿书图片帖
|
│ ├── publisher.py # 微信草稿箱 API + 小绿书图片帖
|
||||||
│ ├── wechat_api.py # access_token / 图片上传
|
│ ├── wechat_api.py # access_token / 图片上传
|
||||||
│ ├── image_gen.py # AI 图片生成(doubao / OpenAI)
|
│ ├── image_gen.py # AI 图片生成(doubao / OpenAI / Gemini)
|
||||||
│ └── themes/ # 16+ 排版主题(含暗黑模式,可从文章学习新增)
|
│ └── themes/ # 16+ 排版主题(含暗黑模式,可从文章学习新增)
|
||||||
│
|
│
|
||||||
├── personas/ # 5 套写作人格预设(含朱雀实测数据)
|
├── personas/ # 5 套写作人格预设(含朱雀实测数据)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ wechat:
|
||||||
|
|
||||||
# AI 图片生成
|
# AI 图片生成
|
||||||
image:
|
image:
|
||||||
# 可选 provider: doubao | openai
|
# 可选 provider: doubao | openai | gemini
|
||||||
provider: "doubao"
|
provider: "doubao"
|
||||||
api_key: "your_api_key"
|
api_key: "your_api_key"
|
||||||
|
|
||||||
|
|
@ -24,5 +24,11 @@ image:
|
||||||
# model: "dall-e-3"
|
# model: "dall-e-3"
|
||||||
# base_url: "https://api.openai.com/v1"
|
# base_url: "https://api.openai.com/v1"
|
||||||
|
|
||||||
|
# Google Gemini Imagen
|
||||||
|
# provider: "gemini"
|
||||||
|
# api_key: "AIza..."
|
||||||
|
# 获取 API key: https://aistudio.google.com/apikey
|
||||||
|
# model: "gemini-3.1-flash-image-preview"
|
||||||
|
|
||||||
# 默认排版主题
|
# 默认排版主题
|
||||||
theme: "professional-clean"
|
theme: "professional-clean"
|
||||||
|
|
|
||||||
8
dist/openclaw/config.example.yaml
vendored
8
dist/openclaw/config.example.yaml
vendored
|
|
@ -9,7 +9,7 @@ wechat:
|
||||||
|
|
||||||
# AI 图片生成
|
# AI 图片生成
|
||||||
image:
|
image:
|
||||||
# 可选 provider: doubao | openai
|
# 可选 provider: doubao | openai | gemini
|
||||||
provider: "doubao"
|
provider: "doubao"
|
||||||
api_key: "your_api_key"
|
api_key: "your_api_key"
|
||||||
|
|
||||||
|
|
@ -24,5 +24,11 @@ image:
|
||||||
# model: "dall-e-3"
|
# model: "dall-e-3"
|
||||||
# base_url: "https://api.openai.com/v1"
|
# base_url: "https://api.openai.com/v1"
|
||||||
|
|
||||||
|
# Google Gemini Imagen
|
||||||
|
# provider: "gemini"
|
||||||
|
# api_key: "AIza..."
|
||||||
|
# 获取 API key: https://aistudio.google.com/apikey
|
||||||
|
# model: "gemini-3.1-flash-image-preview"
|
||||||
|
|
||||||
# 默认排版主题
|
# 默认排版主题
|
||||||
theme: "professional-clean"
|
theme: "professional-clean"
|
||||||
|
|
|
||||||
67
dist/openclaw/toolkit/image_gen.py
vendored
67
dist/openclaw/toolkit/image_gen.py
vendored
|
|
@ -5,12 +5,13 @@ AI image generation module for WeWrite.
|
||||||
Supports multiple providers via a simple abstraction:
|
Supports multiple providers via a simple abstraction:
|
||||||
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
||||||
- openai (DALL-E 3) — broad availability
|
- openai (DALL-E 3) — broad availability
|
||||||
|
- gemini (Google Gemini Imagen) — multimodal image generation
|
||||||
- Custom providers via ImageProvider base class
|
- Custom providers via ImageProvider base class
|
||||||
|
|
||||||
Usage as CLI:
|
Usage as CLI:
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png
|
python3 image_gen.py --prompt "描述" --output cover.png
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png --provider openai
|
python3 image_gen.py --prompt "描述" --output cover.png --provider gemini
|
||||||
|
|
||||||
Usage as module:
|
Usage as module:
|
||||||
from image_gen import generate_image
|
from image_gen import generate_image
|
||||||
|
|
@ -19,6 +20,7 @@ Usage as module:
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import argparse
|
import argparse
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -50,10 +52,10 @@ def _load_config() -> dict:
|
||||||
# Article: 16:9 横版内文配图
|
# Article: 16:9 横版内文配图
|
||||||
# Vertical: 9:16 竖版
|
# Vertical: 9:16 竖版
|
||||||
SIZE_PRESETS = {
|
SIZE_PRESETS = {
|
||||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024"},
|
"cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||||
"article": {"doubao": "2560x1440", "openai": "1792x1024"},
|
"article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792"},
|
"vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"},
|
||||||
"square": {"doubao": "2048x2048", "openai": "1024x1024"},
|
"square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"},
|
||||||
}
|
}
|
||||||
|
|
||||||
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
||||||
|
|
@ -211,14 +213,63 @@ class OpenAIProvider(ImageProvider):
|
||||||
return img_resp.content
|
return img_resp.content
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiProvider(ImageProvider):
|
||||||
|
"""Google Gemini Imagen provider."""
|
||||||
|
|
||||||
|
provider_key = "gemini"
|
||||||
|
|
||||||
|
def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview",
|
||||||
|
base_url: str = "https://generativelanguage.googleapis.com/v1beta"):
|
||||||
|
self._api_key = api_key
|
||||||
|
self._model = model
|
||||||
|
self._base_url = base_url
|
||||||
|
|
||||||
|
def generate(self, prompt: str, size: str) -> bytes:
|
||||||
|
# Append size instruction to prompt (Gemini doesn't have a native size param)
|
||||||
|
if "x" in size:
|
||||||
|
w, h = size.split("x", 1)
|
||||||
|
prompt = f"{prompt}\n\nGenerate this image at {w}x{h} resolution."
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"contents": [{"parts": [{"text": prompt}]}],
|
||||||
|
"generationConfig": {"responseModalities": ["TEXT", "IMAGE"]},
|
||||||
|
}
|
||||||
|
resp = requests.post(
|
||||||
|
f"{self._base_url}/models/{self._model}:generateContent",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-goog-api-key": self._api_key,
|
||||||
|
},
|
||||||
|
json=body,
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
try:
|
||||||
|
error = resp.json().get("error", {})
|
||||||
|
msg = error.get("message", resp.text[:200])
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
msg = resp.text[:200]
|
||||||
|
raise ValueError(f"Gemini API error ({resp.status_code}): {msg}")
|
||||||
|
data = resp.json()
|
||||||
|
candidates = data.get("candidates", [])
|
||||||
|
if not candidates:
|
||||||
|
raise ValueError("No candidates in Gemini response")
|
||||||
|
parts = candidates[0].get("content", {}).get("parts", [])
|
||||||
|
for part in parts:
|
||||||
|
inline_data = part.get("inlineData")
|
||||||
|
if inline_data and inline_data.get("mimeType", "").startswith("image/"):
|
||||||
|
return base64.b64decode(inline_data["data"])
|
||||||
|
raise ValueError(f"No image found in Gemini response parts")
|
||||||
|
|
||||||
|
|
||||||
# --- Provider registry ---
|
# --- Provider registry ---
|
||||||
|
|
||||||
PROVIDERS = {
|
PROVIDERS = {
|
||||||
"doubao": DoubaoProvider,
|
"doubao": DoubaoProvider,
|
||||||
"openai": OpenAIProvider,
|
"openai": OpenAIProvider,
|
||||||
|
"gemini": GeminiProvider,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _build_provider(config: dict) -> ImageProvider:
|
def _build_provider(config: dict) -> ImageProvider:
|
||||||
"""Build an ImageProvider from config.yaml's image section."""
|
"""Build an ImageProvider from config.yaml's image section."""
|
||||||
img_cfg = config.get("image", {})
|
img_cfg = config.get("image", {})
|
||||||
|
|
@ -287,7 +338,7 @@ def generate_image(
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)"
|
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)"
|
||||||
)
|
)
|
||||||
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
||||||
parser.add_argument("--output", required=True, help="Output file path")
|
parser.add_argument("--output", required=True, help="Output file path")
|
||||||
|
|
@ -299,7 +350,7 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--provider",
|
"--provider",
|
||||||
default=None,
|
default=None,
|
||||||
help="Override provider (doubao, openai). Default: from config.yaml",
|
help="Override provider (doubao, openai, gemini). Default: from config.yaml",
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,13 @@ AI image generation module for WeWrite.
|
||||||
Supports multiple providers via a simple abstraction:
|
Supports multiple providers via a simple abstraction:
|
||||||
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
- doubao-seedream (Volcengine Ark) — default, good for Chinese prompts
|
||||||
- openai (DALL-E 3) — broad availability
|
- openai (DALL-E 3) — broad availability
|
||||||
|
- gemini (Google Gemini Imagen) — multimodal image generation
|
||||||
- Custom providers via ImageProvider base class
|
- Custom providers via ImageProvider base class
|
||||||
|
|
||||||
Usage as CLI:
|
Usage as CLI:
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png
|
python3 image_gen.py --prompt "描述" --output cover.png
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
python3 image_gen.py --prompt "描述" --output cover.png --size cover
|
||||||
python3 image_gen.py --prompt "描述" --output cover.png --provider openai
|
python3 image_gen.py --prompt "描述" --output cover.png --provider gemini
|
||||||
|
|
||||||
Usage as module:
|
Usage as module:
|
||||||
from image_gen import generate_image
|
from image_gen import generate_image
|
||||||
|
|
@ -19,6 +20,7 @@ Usage as module:
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import argparse
|
import argparse
|
||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -50,10 +52,10 @@ def _load_config() -> dict:
|
||||||
# Article: 16:9 横版内文配图
|
# Article: 16:9 横版内文配图
|
||||||
# Vertical: 9:16 竖版
|
# Vertical: 9:16 竖版
|
||||||
SIZE_PRESETS = {
|
SIZE_PRESETS = {
|
||||||
"cover": {"doubao": "2952x1256", "openai": "1792x1024"},
|
"cover": {"doubao": "2952x1256", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||||
"article": {"doubao": "2560x1440", "openai": "1792x1024"},
|
"article": {"doubao": "2560x1440", "openai": "1792x1024", "gemini": "1792x1024"},
|
||||||
"vertical": {"doubao": "1088x2560", "openai": "1024x1792"},
|
"vertical": {"doubao": "1088x2560", "openai": "1024x1792", "gemini": "1024x1792"},
|
||||||
"square": {"doubao": "2048x2048", "openai": "1024x1024"},
|
"square": {"doubao": "2048x2048", "openai": "1024x1024", "gemini": "1024x1024"},
|
||||||
}
|
}
|
||||||
|
|
||||||
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
MAX_FILE_SIZE = 5 * 1024 * 1024 # 5MB
|
||||||
|
|
@ -211,11 +213,61 @@ class OpenAIProvider(ImageProvider):
|
||||||
return img_resp.content
|
return img_resp.content
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiProvider(ImageProvider):
|
||||||
|
"""Google Gemini Imagen provider."""
|
||||||
|
|
||||||
|
provider_key = "gemini"
|
||||||
|
|
||||||
|
def __init__(self, api_key: str, model: str = "gemini-3.1-flash-image-preview",
|
||||||
|
base_url: str = "https://generativelanguage.googleapis.com/v1beta"):
|
||||||
|
self._api_key = api_key
|
||||||
|
self._model = model
|
||||||
|
self._base_url = base_url
|
||||||
|
|
||||||
|
def generate(self, prompt: str, size: str) -> bytes:
|
||||||
|
# Append size instruction to prompt (Gemini doesn't have a native size param)
|
||||||
|
if "x" in size:
|
||||||
|
w, h = size.split("x", 1)
|
||||||
|
prompt = f"{prompt}\n\nGenerate this image at {w}x{h} resolution."
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"contents": [{"parts": [{"text": prompt}]}],
|
||||||
|
"generationConfig": {"responseModalities": ["TEXT", "IMAGE"]},
|
||||||
|
}
|
||||||
|
resp = requests.post(
|
||||||
|
f"{self._base_url}/models/{self._model}:generateContent",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-goog-api-key": self._api_key,
|
||||||
|
},
|
||||||
|
json=body,
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
try:
|
||||||
|
error = resp.json().get("error", {})
|
||||||
|
msg = error.get("message", resp.text[:200])
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
msg = resp.text[:200]
|
||||||
|
raise ValueError(f"Gemini API error ({resp.status_code}): {msg}")
|
||||||
|
data = resp.json()
|
||||||
|
candidates = data.get("candidates", [])
|
||||||
|
if not candidates:
|
||||||
|
raise ValueError("No candidates in Gemini response")
|
||||||
|
parts = candidates[0].get("content", {}).get("parts", [])
|
||||||
|
for part in parts:
|
||||||
|
inline_data = part.get("inlineData")
|
||||||
|
if inline_data and inline_data.get("mimeType", "").startswith("image/"):
|
||||||
|
return base64.b64decode(inline_data["data"])
|
||||||
|
raise ValueError("No image found in Gemini response parts")
|
||||||
|
|
||||||
|
|
||||||
# --- Provider registry ---
|
# --- Provider registry ---
|
||||||
|
|
||||||
PROVIDERS = {
|
PROVIDERS = {
|
||||||
"doubao": DoubaoProvider,
|
"doubao": DoubaoProvider,
|
||||||
"openai": OpenAIProvider,
|
"openai": OpenAIProvider,
|
||||||
|
"gemini": GeminiProvider,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -287,7 +339,7 @@ def generate_image(
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, etc.)"
|
description="Generate images using AI (doubao-seedream, OpenAI DALL-E, Gemini Imagen, etc.)"
|
||||||
)
|
)
|
||||||
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
parser.add_argument("--prompt", required=True, help="Image generation prompt")
|
||||||
parser.add_argument("--output", required=True, help="Output file path")
|
parser.add_argument("--output", required=True, help="Output file path")
|
||||||
|
|
@ -299,7 +351,7 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--provider",
|
"--provider",
|
||||||
default=None,
|
default=None,
|
||||||
help="Override provider (doubao, openai). Default: from config.yaml",
|
help="Override provider (doubao, openai, gemini). Default: from config.yaml",
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue