Files
liaoxin-skills/skills/gemini-web-generate/scripts/cli.js
T

619 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
import { connectBrowser, navigateToGemini, log, checkLogin, listSessions, closeSession, captureChatUrl, findSessionByChatUrl } from './src/browser.js';
import { generate } from './src/generator.js';
import { downloadViaButtons, takeScreenshot } from './src/screenshot.js';
import { checkStatus } from './src/status.js';
import { config } from './config.js';
import fs from 'fs';
import path from 'path';
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
/**
* NDJSON event emitter.
* In JSON mode, emits structured progress events to stdout as newline-delimited JSON.
* Human-readable logs always go to stderr.
*/
let jsonMode = false;
/**
* Emit an NDJSON event to stdout.
* In JSON mode, only stdout is used (no stderr logging).
* In human mode, only stderr logging is used (no JSON).
* @param {string} type - "progress" | "success" | "error"
* @param {object} data
*/
function emit(type, data) {
// JSON mode: emit NDJSON to stdout only
if (jsonMode) {
process.stdout.write(JSON.stringify({ type, ...data }) + '\n');
return;
}
// Human mode: log to stderr, and output JSON for terminal events
if (type === 'progress' && data.message) {
log(data.message);
} else if (type === 'success') {
console.log(JSON.stringify(data, null, 2));
} else if (type === 'error' && data.message) {
log('ERROR:', data.message);
console.log(JSON.stringify({ type, ...data }, null, 2));
}
}
/**
* Output a single JSON object to stdout.
* @param {object} data
*/
function output(data) {
if (jsonMode) {
process.stdout.write(JSON.stringify(data) + '\n');
} else {
console.log(JSON.stringify(data, null, 2));
}
}
/**
* Output error and exit.
* @param {string} message
*/
function error(message) {
emit('error', { message });
process.exit(1);
}
/**
* Parse command line arguments.
* @returns {{command: string, args: object}}
*/
function parseArgs() {
const args = process.argv.slice(2);
const command = args[0];
const parsed = { command, args: {} };
for (let i = 1; i < args.length; i++) {
if (args[i].startsWith('--')) {
const key = args[i].slice(2);
const value = args[i + 1]?.startsWith('--') ? undefined : args[i + 1];
parsed.args[key] = value === undefined ? true : value;
if (value !== undefined) i++;
}
}
return parsed;
}
function readStdin() {
return new Promise((resolve, reject) => {
let data = '';
process.stdin.setEncoding('utf8');
process.stdin.on('data', chunk => { data += chunk; });
process.stdin.on('end', () => resolve(data.trim()));
process.stdin.on('error', reject);
// Timeout if no data within 1s — treat as empty input
setTimeout(() => {
if (!data) resolve('');
}, 1000);
});
}
/**
* Resolve image path - handle relative paths.
* @param {string} imagePath
* @returns {string}
*/
function resolveImagePath(imagePath) {
if (path.isAbsolute(imagePath)) return imagePath;
return path.resolve(process.cwd(), imagePath);
}
/**
* Check login and output error if not logged in.
* @param {import('puppeteer-core').Page} page
*/
async function requireLogin(page) {
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
type: 'not_logged_in',
message: 'Not logged into Gemini. Please log in and try again.',
action_required: 'open_browser_and_login',
url: config.geminiUrl,
});
process.exit(0);
}
if (loginStatus.account) {
emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` });
}
}
async function main() {
const { command, args } = parseArgs();
// Check for JSON mode flag
jsonMode = args.json === true || args.json === 'true';
if (jsonMode) {
process.env.GEMINI_JSON_MODE = '1';
}
if (!command || command === 'help' || command === '--help' || command === '-h') {
const helpText = `
gemini-web-cli — 通过 CLI 驱动 Gemini 网页 AI 生图
用法:
node cli.js <command> [options]
命令:
generate 向 Gemini 发送提示词(可选参考图),等待图片生成
download 从当前 Gemini 标签页下载已生成的图片
status 检查页面状态(空闲 / 生成中 / 完成 / 异常)
sessions 列出所有活跃的 Gemini 会话(标签页)
find_session 通过 Gemini 对话链接找回已丢失的 session
close 关闭指定会话的标签页
generate 参数:
--prompt "文本" 发送给 Gemini 的提示词(必填),支持 "stdin" 从管道读取
--prompt-file <路径> 从文件读取提示词(支持换行)
--image <路径> 单张参考图片路径
--images <路径1,路径2,...> 多张参考图片,逗号分隔,最多 10 张
--session <id> 复用已有会话的标签页
--chatUrl <url> 打开指定的 Gemini 对话链接(创建新会话)
--mode <single|multi> "single" 生成后关闭标签页,"multi" 保持标签页打开(默认 multi)
--tool <工具名> 发送前选择指定工具(默认选择"制作图片")
--timeout <毫秒> 生成最大等待时间(默认 300000ms = 5分钟)
--download-timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟)
--screenshot 出错或超时时截图(低质量 JPEG),保存路径写入输出
--json 启用 NDJSON 模式:每行输出一个 JSON 事件
download 参数:
--timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟)
--session <id> 复用已有会话的标签页
status 参数:
--session <id> 检查指定会话的标签页
--wait 持续轮询,直到完成 / 出错
--timeout <毫秒> --wait 模式下的最大等待时间(默认 300000ms)
--screenshot 状态异常时截图(低质量 JPEG),保存路径写入输出
sessions(无参数)
find_session 参数:
--chatUrl <url> Gemini 对话链接(必填)
--open 如果没找到匹配的标签页,自动打开新标签页并导航
close 参数:
--session <id> 要关闭的会话 ID(必填)
示例:
# 提示词 + 参考图生图
node cli.js generate --prompt "一只可爱的猫" --image ./cat.png
# NDJSON 模式(适合程序解析)
node cli.js generate --prompt "一只可爱的猫" --image ./cat.png --json
# single 模式,生成后自动关闭标签页
node cli.js generate --prompt "日落风景" --mode single
# 轮询状态,直到完成
node cli.js status --session 7ac6cfff --wait
# 快速检查状态(立即返回)
node cli.js status --session 7ac6cfff
# 出错/超时时自动截图(路径写入输出 JSON)
node cli.js generate --prompt "日落风景" --screenshot
node cli.js status --session 7ac6cfff --wait --screenshot
# 下载已生成的图片
node cli.js download --session 7ac6cfff
# 列出所有活跃的会话
node cli.js sessions
# 通过对话链接找回 session
node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745"
# 没找到时自动打开新标签页
node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --open
# 关闭指定会话的标签页
node cli.js close --session 7ac6cfff
# 通过之前的对话链接继续生图
node cli.js generate --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --prompt "换成晚上场景"
# 仅下载已有图片(chatUrl 不加 --prompt,直接下载)
node cli.js generate --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --mode single
环境变量:
CDP_URL Chrome DevTools Protocol 连接地址(默认 http://127.0.0.1:9222
`.trim();
process.stdout.write(helpText + '\n');
return;
}
// Sessions command
if (command === 'sessions') {
const sessions = await listSessions();
output({ type: 'sessions', sessions });
return;
}
// Find session command — recover a lost session by matching chatUrl
if (command === 'find_session') {
if (!args.chatUrl) {
error('--chatUrl is required');
}
const openIfNotFound = args.open === true || args.open === 'true';
const result = await findSessionByChatUrl({ chatUrl: args.chatUrl, openIfNotFound });
output({ type: 'find_session', ...result });
return;
}
// Close command
if (command === 'close') {
if (!args.session) {
error('--session is required to close a tab');
}
const result = await closeSession(args.session);
output({ type: 'close', ...result });
return;
}
// Ensure output directories exist
fs.mkdirSync(config.outputDir, { recursive: true });
fs.mkdirSync(config.screenshotDir, { recursive: true });
fs.mkdirSync(config.downloadDir, { recursive: true });
let browser;
try {
emit('progress', { step: 'start', message: `Command: ${command}` });
const sessionId = args.session || undefined;
// Default to 'multi' mode (keep tab open for subsequent calls)
const mode = (args.mode === 'multi' || args.mode === 'single') ? args.mode : 'multi';
switch (command) {
case 'generate': {
// Resolve prompt: --prompt-file > --prompt (no value = stdin) > --prompt literal
let prompt = args.prompt;
let isDownloadOnly = false;
if (args.promptFile) {
const p = resolveImagePath(args.promptFile);
if (!fs.existsSync(p)) {
error(`Prompt file not found: ${p}`);
}
prompt = fs.readFileSync(p, 'utf8').trim();
} else if (prompt === true || prompt === 'stdin') {
// --prompt 后面没有值,或显式写了 stdin,从管道/heredoc 读取
prompt = await readStdin();
}
if (!prompt) {
if (args.chatUrl) {
// --chatUrl without --prompt: enter download-only mode
isDownloadOnly = true;
} else {
error('--prompt is required');
}
}
// Support --images path1,path2,... or --image single
let imagePaths = [];
const imagesArg = args.images;
if (imagesArg) {
imagePaths = imagesArg.split(',').map(p => resolveImagePath(p.trim()));
} else if (args.image) {
imagePaths = [resolveImagePath(args.image)];
}
if (imagePaths.length > 10) {
imagePaths = imagePaths.slice(0, 10);
}
for (const p of imagePaths) {
if (!fs.existsSync(p)) {
error(`Image file not found: ${p}`);
}
}
const timeout = args.timeout ? parseInt(args.timeout, 10) : config.timeout;
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
let continuedSession = false;
if (sessionId && !isNew) {
emit('progress', { step: 'connect', message: `Continuing session: ${sessionId} (mode: ${mode})` });
// Don't navigate — stay on the current chat page for multi-round
continuedSession = true;
} else if (args.chatUrl) {
emit('progress', { step: 'navigate', message: `Navigating to chat URL: ${args.chatUrl}` });
await page.goto(args.chatUrl, { waitUntil: 'domcontentloaded', timeout: 60000 });
// Wait for conversation content to load — check for download buttons or existing images
await page.waitForFunction(
() => document.querySelectorAll('button.generated-image-button').length > 0 ||
document.querySelectorAll('[contenteditable="true"]').length > 0,
{ timeout: 30000 }
).catch(() => {});
await sleep(3000);
// Chat URL opens an existing conversation, treat as continued
continuedSession = true;
} else {
emit('progress', { step: 'navigate', message: `New session created (mode: ${mode})` });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
message: 'Not logged into Gemini. Please log in at ' + config.geminiUrl + ' and try again.',
action_required: 'open_browser_and_login',
sessionId: sid,
});
return;
}
if (loginStatus.account) {
emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` });
}
// Download-only mode: --chatUrl without --prompt, download existing images directly
if (isDownloadOnly) {
emit('progress', { step: 'download', message: 'Download-only mode: downloading existing images...' });
// Wait a moment for the page to fully load embedded images
await sleep(3000);
// Count visible download buttons
const btnCount = await page.evaluate(() => {
const buttons = document.querySelectorAll('button.generated-image-button');
return Array.from(buttons).filter(b => b.offsetParent !== null).length;
});
if (btnCount === 0) {
emit('error', {
message: 'No existing images found to download on this page.',
sessionId: sid,
status: 'no_images',
});
if (mode === 'single') await closeSession(sid);
break;
}
emit('progress', { step: 'download', message: `Found ${btnCount} download button(s), downloading all...` });
try {
const downloadTimeout = parseInt(args.downloadTimeout, 10) || 120000;
const downloaded = await downloadViaButtons(page, cdp, {
existingButtonCount: 0,
timeout: downloadTimeout,
});
const chatUrl = await captureChatUrl(sid, page);
emit('success', {
sessionId: sid,
chatUrl,
mode,
images: downloaded.map(i => i.path),
});
} catch (e) {
emit('error', {
message: `Download failed: ${e.message}`,
sessionId: sid,
status: 'download_failed',
});
}
if (mode === 'single') await closeSession(sid);
break;
}
// generate() handles its own progress events
const genStartTime = Date.now();
const result = await generate(page, {
prompt: prompt,
images: imagePaths,
timeout,
isContinuedSession: continuedSession,
tool: args.tool,
});
// Capture the Gemini chat URL for recovery
const chatUrl = await captureChatUrl(sid, page);
const elapsed = ((Date.now() - genStartTime) / 1000).toFixed(1);
// Add session info to result
result.sessionId = sid;
result.chatUrl = chatUrl;
result.mode = mode;
if (result.status === 'success') {
emit('progress', {
step: 'generate',
state: 'DONE',
elapsed: parseFloat(elapsed),
message: `Generation completed (${elapsed}s)`,
});
// Only attempt download if this was an image generation
try {
const downloadTimeout = parseInt(args.downloadTimeout, 10) || 120000;
const downloaded = await downloadViaButtons(page, cdp, {
existingButtonCount: result.existingButtonCount || 0,
timeout: downloadTimeout,
});
if (downloaded.length > 0) {
result.images = downloaded;
for (const img of downloaded) {
const fileName = img.path.split('/').pop();
emit('progress', { step: 'download', message: `Downloaded: ${fileName}` });
}
}
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to download generated images: ${e.message}` });
emit('progress', { step: 'download', message: `Download failed: ${e.message}` });
}
// Remove internal fields from output
delete result.existingButtonCount;
// Emit final success event
emit('success', {
sessionId: sid,
chatUrl,
mode,
images: result.images?.map(i => i.path) || [],
});
} else if (result.status === 'text_only') {
// Text-only response means image generation failed
emit('error', {
message: result.message,
sessionId: sid,
status: 'text_only',
});
} else {
// Error or timeout — emit error as terminal event
emit('error', {
message: result.message,
sessionId: sid,
status: result.status,
timeout: result.timeout,
});
}
// Capture screenshot on error or timeout
if (args.screenshot && result.status && result.status !== 'success') {
try {
const ss = await takeScreenshot(page);
result.screenshot = ss.path;
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` });
}
}
// Single mode: close the tab
if (mode === 'single') {
await closeSession(sid);
}
break;
}
case 'download': {
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
// Don't navigate — stay on the current chat page
if (isNew) {
emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
await requireLogin(page);
emit('progress', { step: 'download', message: 'Downloading generated images...' });
const downloadTimeout = parseInt(args.downloadTimeout, 10) || parseInt(args.timeout, 10) || 120000;
const downloadResult = await downloadViaButtons(page, cdp, { newestOnly: true, timeout: downloadTimeout });
if (downloadResult.length === 0) {
error('No new images found to download.');
}
for (const img of downloadResult) {
const fileName = img.path.split('/').pop();
emit('progress', { step: 'download', message: `Downloaded: ${fileName}` });
}
emit('success', {
sessionId: sid,
path: downloadResult[0].path,
});
if (mode === 'single') {
await closeSession(sid);
}
break;
}
case 'status': {
emit('progress', { step: 'connect', message: 'Connecting to browser...' });
const { browser: b, page, sessionId: sid, isNew } = await connectBrowser({ sessionId });
browser = b;
// Navigate if a new tab was created (either no session requested,
// or requested session was lost and replaced)
if (isNew) {
emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' });
await navigateToGemini(page);
}
emit('progress', { step: 'login', message: 'Checking login status...' });
const loginStatus = await checkLogin(page);
if (!loginStatus.loggedIn) {
emit('error', {
message: 'Not logged into Gemini',
action_required: 'open_browser_and_login',
sessionId: sid,
});
return;
}
const waitForCompletion = args.wait === true || args.wait === 'true';
const pollTimeout = args.timeout ? parseInt(args.timeout, 10) : undefined;
if (waitForCompletion) {
emit('progress', { step: 'status', message: 'Waiting for generation to complete...' });
}
const result = await checkStatus(page, {
waitForCompletion,
timeout: pollTimeout,
onStateChange: (state, elapsed) => {
emit('progress', { step: 'status', state, elapsed, message: `State: ${state} (${elapsed}s)` });
},
});
result.sessionId = sid;
// Capture screenshot on error or unclear states
if (args.screenshot && result.state && ['error', 'page_error', 'generating', 'idle'].includes(result.state)) {
try {
const ss = await takeScreenshot(page);
result.screenshot = ss.path;
} catch (e) {
emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` });
}
}
output({ type: 'status', ...result });
break;
}
default:
error(`Unknown command: ${command}. Available commands: generate, download, status, sessions, close`);
}
} catch (err) {
// CDP connection failures
if (err.message.includes('ECONNREFUSED') || err.message.includes('connect')) {
error(`Cannot connect to browser at ${config.cdpUrl}. Make sure Chrome is running with --remote-debugging-port.`);
}
error(err.message);
} finally {
// Don't close the browser - it's managed externally (OpenClaw, etc.)
// Just release the CDP connection
if (browser) {
try {
await browser.disconnect();
} catch {
// Already disconnected
}
}
}
}
main();