add: gemini-web-generate skill(整合 CLI + skill)

This commit is contained in:
2026-05-14 14:45:51 +08:00
parent a23b9a5272
commit 5df8934000
11 changed files with 3498 additions and 0 deletions
+557
View File
@@ -0,0 +1,557 @@
#!/usr/bin/env node
import { connectBrowser, navigateToGemini, log, checkLogin, listSessions, closeSession, captureChatUrl, findSessionByChatUrl } from './src/browser.js';
import { generate } from './src/generator.js';
import { downloadViaButtons, takeScreenshot } from './src/screenshot.js';
import { checkStatus } from './src/status.js';
import { config } from './config.js';
import fs from 'fs';
import path from 'path';
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
/**
* NDJSON event emitter.
* In JSON mode, emits structured progress events to stdout as newline-delimited JSON.
* Human-readable logs always go to stderr.
*/
let jsonMode = false;
/**
* Emit an NDJSON event to stdout.
* In JSON mode, only stdout is used (no stderr logging).
* In human mode, only stderr logging is used (no JSON).
* @param {string} type - "progress" | "success" | "error"
* @param {object} data
*/
function emit(type, data) {
// JSON mode: emit NDJSON to stdout only
if (jsonMode) {
process.stdout.write(JSON.stringify({ type, ...data }) + '\n');
return;
}
// Human mode: log to stderr, and output JSON for terminal events
if (type === 'progress' && data.message) {
log(data.message);
} else if (type === 'success') {
console.log(JSON.stringify(data, null, 2));
} else if (type === 'error' && data.message) {
log('ERROR:', data.message);
console.log(JSON.stringify({ type, ...data }, null, 2));
}
}
/**
* Output a single JSON object to stdout.
* @param {object} data
*/
function output(data) {
if (jsonMode) {
process.stdout.write(JSON.stringify(data) + '\n');
} else {
console.log(JSON.stringify(data, null, 2));
}
}
/**
* Output error and exit.
* @param {string} message
*/
function error(message) {
emit('error', { message });
process.exit(1);
}
/**
* Parse command line arguments.
* @returns {{command: string, args: object}}
*/
function parseArgs() {
const args = process.argv.slice(2);
const command = args[0];
const parsed = { command, args: {} };
for (let i = 1; i < args.length; i++) {
if (args[i].startsWith('--')) {
const key = args[i].slice(2);
const value = args[i + 1]?.startsWith('--') ? undefined : args[i + 1];
parsed.args[key] = value === undefined ? true : value;
if (value !== undefined) i++;
}
}
return parsed;
}
function readStdin() {
return new Promise((resolve, reject) => {
let data = '';
process.stdin.setEncoding('utf8');
process.stdin.on('data', chunk => { data += chunk; });
process.stdin.on('end', () => resolve(data.trim()));
process.stdin.on('error', reject);
// Timeout if no data within 1s — treat as empty input
setTimeout(() => {
if (!data) resolve('');
}, 1000);
});
}
/**
* Resolve image path - handle relative paths.
* @param {string} imagePath
* @returns {string}
*/
function resolveImagePath(imagePath) {
if (path.isAbsolute(imagePath)) return imagePath;
return path.resolve(process.cwd(), imagePath);
}
/**
* Check login and output error if not logged in.
* @param {import('puppeteer-core').Page} page
*/
async function requireLogin(page) {
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
type: 'not_logged_in',
message: 'Not logged into Gemini. Please log in and try again.',
action_required: 'open_browser_and_login',
url: config.geminiUrl,
});
process.exit(0);
}
if (loginStatus.account) {
emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` });
}
}
async function main() {
const { command, args } = parseArgs();
// Check for JSON mode flag
jsonMode = args.json === true || args.json === 'true';
if (jsonMode) {
process.env.GEMINI_JSON_MODE = '1';
}
if (!command || command === 'help' || command === '--help' || command === '-h') {
const helpText = `
gemini-web-cli — 通过 CLI 驱动 Gemini 网页 AI 生图
用法:
node cli.js <command> [options]
命令:
generate 向 Gemini 发送提示词(可选参考图),等待图片生成
download 从当前 Gemini 标签页下载已生成的图片
status 检查页面状态(空闲 / 生成中 / 完成 / 异常)
sessions 列出所有活跃的 Gemini 会话(标签页)
find_session 通过 Gemini 对话链接找回已丢失的 session
close 关闭指定会话的标签页
generate 参数:
--prompt "文本" 发送给 Gemini 的提示词(必填),支持 "stdin" 从管道读取
--prompt-file <路径> 从文件读取提示词(支持换行)
--image <路径> 单张参考图片路径
--images <路径1,路径2,...> 多张参考图片,逗号分隔,最多 10 张
--session <id> 复用已有会话的标签页
--chatUrl <url> 打开指定的 Gemini 对话链接(创建新会话)
--mode <single|multi> "single" 生成后关闭标签页,"multi" 保持标签页打开(默认 multi)
--tool <工具名> 发送前选择指定工具(默认选择"制作图片")
--timeout <毫秒> 生成最大等待时间(默认 300000ms = 5分钟)
--download-timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟)
--screenshot 出错或超时时截图(低质量 JPEG),保存路径写入输出
--json 启用 NDJSON 模式:每行输出一个 JSON 事件
download 参数:
--timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟)
--session <id> 复用已有会话的标签页
status 参数:
--session <id> 检查指定会话的标签页
--wait 持续轮询,直到完成 / 出错
--timeout <毫秒> --wait 模式下的最大等待时间(默认 300000ms)
--screenshot 状态异常时截图(低质量 JPEG),保存路径写入输出
sessions(无参数)
find_session 参数:
--chatUrl <url> Gemini 对话链接(必填)
--open 如果没找到匹配的标签页,自动打开新标签页并导航
close 参数:
--session <id> 要关闭的会话 ID(必填)
示例:
# 提示词 + 参考图生图
node cli.js generate --prompt "一只可爱的猫" --image ./cat.png
# NDJSON 模式(适合程序解析)
node cli.js generate --prompt "一只可爱的猫" --image ./cat.png --json
# single 模式,生成后自动关闭标签页
node cli.js generate --prompt "日落风景" --mode single
# 轮询状态,直到完成
node cli.js status --session 7ac6cfff --wait
# 快速检查状态(立即返回)
node cli.js status --session 7ac6cfff
# 出错/超时时自动截图(路径写入输出 JSON)
node cli.js generate --prompt "日落风景" --screenshot
node cli.js status --session 7ac6cfff --wait --screenshot
# 下载已生成的图片
node cli.js download --session 7ac6cfff
# 列出所有活跃的会话
node cli.js sessions
# 通过对话链接找回 session
node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745"
# 没找到时自动打开新标签页
node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --open
# 关闭指定会话的标签页
node cli.js close --session 7ac6cfff
# 通过之前的对话链接继续生图
node cli.js generate --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --prompt "换成晚上场景"
环境变量:
CDP_URL Chrome DevTools Protocol 连接地址(默认 http://127.0.0.1:9222
`.trim();
process.stdout.write(helpText + '\n');
return;
}
// Sessions command
if (command === 'sessions') {
const sessions = await listSessions();
output({ type: 'sessions', sessions });
return;
}
// Find session command — recover a lost session by matching chatUrl
if (command === 'find_session') {
if (!args.chatUrl) {
error('--chatUrl is required');
}
const openIfNotFound = args.open === true || args.open === 'true';
const result = await findSessionByChatUrl({ chatUrl: args.chatUrl, openIfNotFound });
output({ type: 'find_session', ...result });
return;
}
// Close command
if (command === 'close') {
if (!args.session) {
error('--session is required to close a tab');
}
const result = await closeSession(args.session);
output({ type: 'close', ...result });
return;
}
// Ensure output directories exist
fs.mkdirSync(config.outputDir, { recursive: true });
fs.mkdirSync(config.screenshotDir, { recursive: true });
fs.mkdirSync(config.downloadDir, { recursive: true });
let browser;
try {
emit('progress', { step: 'start', message: `Command: ${command}` });
const sessionId = args.session || undefined;
// Default to 'multi' mode (keep tab open for subsequent calls)
const mode = (args.mode === 'multi' || args.mode === 'single') ? args.mode : 'multi';
switch (command) {
case 'generate': {
// Resolve prompt: --prompt-file > --prompt (no value = stdin) > --prompt literal
let prompt = args.prompt;
if (args.promptFile) {
const p = resolveImagePath(args.promptFile);
if (!fs.existsSync(p)) {
error(`Prompt file not found: ${p}`);
}
prompt = fs.readFileSync(p, 'utf8').trim();
} else if (prompt === true || prompt === 'stdin') {
// --prompt 后面没有值,或显式写了 stdin,从管道/heredoc 读取
prompt = await readStdin();
}
if (!prompt) {
error('--prompt is required');
}
// Support --images path1,path2,... or --image single
let imagePaths = [];
const imagesArg = args.images;
if (imagesArg) {
imagePaths = imagesArg.split(',').map(p => resolveImagePath(p.trim()));
} else if (args.image) {
imagePaths = [resolveImagePath(args.image)];
}
if (imagePaths.length > 10) {
imagePaths = imagePaths.slice(0, 10);
}
for (const p of imagePaths) {
if (!fs.existsSync(p)) {
error(`Image file not found: ${p}`);
}
}
const timeout = args.timeout ? parseInt(args.timeout, 10) : config.timeout;
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
let continuedSession = false;
if (sessionId && !isNew) {
emit('progress', { step: 'connect', message: `Continuing session: ${sessionId} (mode: ${mode})` });
// Don't navigate — stay on the current chat page for multi-round
continuedSession = true;
} else if (args.chatUrl) {
emit('progress', { step: 'navigate', message: `Navigating to chat URL: ${args.chatUrl}` });
await page.goto(args.chatUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
// Wait for conversation content to load — check for download buttons or existing images
await page.waitForFunction(
() => document.querySelectorAll('button.generated-image-button').length > 0 ||
document.querySelectorAll('[contenteditable="true"]').length > 0,
{ timeout: 30000 }
).catch(() => {});
await sleep(3000);
// Chat URL opens an existing conversation, treat as continued
continuedSession = true;
} else {
emit('progress', { step: 'navigate', message: `New session created (mode: ${mode})` });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
message: 'Not logged into Gemini. Please log in at ' + config.geminiUrl + ' and try again.',
action_required: 'open_browser_and_login',
sessionId: sid,
});
return;
}
if (loginStatus.account) {
emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` });
}
// generate() handles its own progress events
const genStartTime = Date.now();
const result = await generate(page, {
prompt: prompt,
images: imagePaths,
timeout,
isContinuedSession: continuedSession,
tool: args.tool,
});
// Capture the Gemini chat URL for recovery
const chatUrl = await captureChatUrl(sid, page);
const elapsed = ((Date.now() - genStartTime) / 1000).toFixed(1);
// Add session info to result
result.sessionId = sid;
result.chatUrl = chatUrl;
result.mode = mode;
if (result.status === 'success') {
emit('progress', {
step: 'generate',
state: 'DONE',
elapsed: parseFloat(elapsed),
message: `Generation completed (${elapsed}s)`,
});
// Only attempt download if this was an image generation
try {
const downloadTimeout = parseInt(args.downloadTimeout, 10) || 120000;
const downloaded = await downloadViaButtons(page, cdp, {
existingButtonCount: result.existingButtonCount || 0,
timeout: downloadTimeout,
});
if (downloaded.length > 0) {
result.images = downloaded;
for (const img of downloaded) {
const fileName = img.path.split('/').pop();
emit('progress', { step: 'download', message: `Downloaded: ${fileName}` });
}
}
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to download generated images: ${e.message}` });
emit('progress', { step: 'download', message: `Download failed: ${e.message}` });
}
// Remove internal fields from output
delete result.existingButtonCount;
// Emit final success event
emit('success', {
sessionId: sid,
chatUrl,
mode,
images: result.images?.map(i => i.path) || [],
});
} else if (result.status === 'text_only') {
// Text-only response means image generation failed
emit('error', {
message: result.message,
sessionId: sid,
status: 'text_only',
});
} else {
// Error or timeout — emit error as terminal event
emit('error', {
message: result.message,
sessionId: sid,
status: result.status,
timeout: result.timeout,
});
}
// Capture screenshot on error or timeout
if (args.screenshot && result.status && result.status !== 'success') {
try {
const ss = await takeScreenshot(page);
result.screenshot = ss.path;
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` });
}
}
// Single mode: close the tab
if (mode === 'single') {
await closeSession(sid);
}
break;
}
case 'download': {
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
// Don't navigate — stay on the current chat page
if (isNew) {
emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
await requireLogin(page);
emit('progress', { step: 'download', message: 'Downloading generated images...' });
const downloadTimeout = parseInt(args.downloadTimeout, 10) || parseInt(args.timeout, 10) || 120000;
const downloadResult = await downloadViaButtons(page, cdp, { newestOnly: true, timeout: downloadTimeout });
if (downloadResult.length === 0) {
error('No new images found to download.');
}
for (const img of downloadResult) {
const fileName = img.path.split('/').pop();
emit('progress', { step: 'download', message: `Downloaded: ${fileName}` });
}
emit('success', {
sessionId: sid,
path: downloadResult[0].path,
});
if (mode === 'single') {
await closeSession(sid);
}
break;
}
case 'status': {
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
// Navigate if a new tab was created (either no session requested,
// or requested session was lost and replaced)
if (isNew) {
emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
message: 'Not logged into Gemini',
action_required: 'open_browser_and_login',
sessionId: sid,
});
return;
}
const waitForCompletion = args.wait === true || args.wait === 'true';
const pollTimeout = args.timeout ? parseInt(args.timeout, 10) : undefined;
if (waitForCompletion) {
emit('progress', { step: 'status', message: 'Waiting for generation to complete...' });
}
const result = await checkStatus(page, {
waitForCompletion,
timeout: pollTimeout,
onStateChange: (state, elapsed) => {
emit('progress', { step: 'status', state, elapsed, message: `State: ${state} (${elapsed}s)` });
},
});
result.sessionId = sid;
// Capture screenshot on error or unclear states
if (args.screenshot && result.state && ['error', 'page_error', 'generating', 'idle'].includes(result.state)) {
try {
const ss = await takeScreenshot(page);
result.screenshot = ss.path;
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` });
}
}
output({ type: 'status', ...result });
break;
}
default:
error(`Unknown command: ${command}. Available commands: generate, download, status, sessions, close`);
}
} catch (err) {
// CDP connection failures
if (err.message.includes('ECONNREFUSED') || err.message.includes('connect')) {
error(`Cannot connect to browser at ${config.cdpUrl}. Make sure Chrome is running with --remote-debugging-port.`);
}
error(err.message);
} finally {
// Don't close the browser - it's managed externally (OpenClaw, etc.)
// Just release the CDP connection
if (browser) {
try {
await browser.disconnect();
} catch {
// Already disconnected
}
}
}
}
main();