From 5df89340004bcdf5ebfeb1046b21e0d2e75a4b1b Mon Sep 17 00:00:00 2001 From: liaoxin Date: Thu, 14 May 2026 14:45:51 +0800 Subject: [PATCH] =?UTF-8?q?add:=20gemini-web-generate=20skill=EF=BC=88?= =?UTF-8?q?=E6=95=B4=E5=90=88=20CLI=20+=20skill=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + skills/gemini-web-generate/SKILL.md | 166 ++++ skills/gemini-web-generate/scripts/.gitignore | 1 + skills/gemini-web-generate/scripts/cli.js | 557 +++++++++++ skills/gemini-web-generate/scripts/config.js | 15 + .../scripts/package-lock.json | 936 ++++++++++++++++++ .../gemini-web-generate/scripts/package.json | 15 + .../scripts/src/browser.js | 412 ++++++++ .../scripts/src/generator.js | 847 ++++++++++++++++ .../scripts/src/screenshot.js | 156 +++ .../gemini-web-generate/scripts/src/status.js | 391 ++++++++ 11 files changed, 3498 insertions(+) create mode 100644 skills/gemini-web-generate/SKILL.md create mode 100644 skills/gemini-web-generate/scripts/.gitignore create mode 100644 skills/gemini-web-generate/scripts/cli.js create mode 100644 skills/gemini-web-generate/scripts/config.js create mode 100644 skills/gemini-web-generate/scripts/package-lock.json create mode 100644 skills/gemini-web-generate/scripts/package.json create mode 100644 skills/gemini-web-generate/scripts/src/browser.js create mode 100644 skills/gemini-web-generate/scripts/src/generator.js create mode 100644 skills/gemini-web-generate/scripts/src/screenshot.js create mode 100644 skills/gemini-web-generate/scripts/src/status.js diff --git a/.gitignore b/.gitignore index 996cdd0..33403d5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.skill __pycache__/ .DS_Store +node_modules/ +output/ diff --git a/skills/gemini-web-generate/SKILL.md b/skills/gemini-web-generate/SKILL.md new file mode 100644 index 0000000..58ddaa2 --- /dev/null +++ b/skills/gemini-web-generate/SKILL.md @@ -0,0 +1,166 @@ +--- +name: gemini-web-generate +description: "Generate images via Gemini web interface using a headless browser automation CLI. Supports text-to-image, image-to-image, multi-image reference, multi-turn conversations, and session management. Use when: (1) User wants to generate images with Gemini, (2) User says 'generate with Gemini' or 'Gemini 生图', (3) Image-to-image or style transfer tasks, (4) Continuing an existing Gemini image conversation." +--- + +# Gemini Web Image Generation + +Generate images through Gemini's web interface using a bundled Puppeteer-based CLI that automates the full workflow: navigation, image upload, prompt submission, download, and cleanup. + +## Architecture + +| Component | Path | +|------|------| +| CLI entry | `scripts/cli.js` | +| Node binary | `/home/dazhi/.nvm/versions/node/v22.22.0/bin/node` | +| Browser CDP | `http://127.0.0.1:9223` (managed by `browser` tool) | + +## Quick Start + +All commands use absolute paths for reliability: + +```bash +NODE="/home/dazhi/.nvm/versions/node/v22.22.0/bin/node" +CLI="/scripts/cli.js" +``` + +### Text-to-Image (default: single mode) + +```bash +$NODE $CLI generate --prompt "a cute cat" --mode single +``` + +### Image-to-Image + +```bash +$NODE $CLI generate --prompt "convert to watercolor style" --image /path/to/ref.png --mode single +``` + +### Multi-image Reference + +```bash +$NODE $CLI generate --prompt "blend these images" --images "/path/a.png,/path/b.png" --mode single +``` + +### Multi-turn Conversation + +```bash +# First turn (session stays open) +$NODE $CLI generate --prompt "draw a sunset" +# Continue +$NODE $CLI generate --session --prompt "add a boat" +# Last turn (auto-close) +$NODE $CLI generate --session --prompt "make it night" --mode single +``` + +## Core Workflow + +### 1. Ensure Browser is Running + +``` +browser action=start +``` + +### 2. Execute Generation + +Default to `--mode single` (auto-close after download). Only omit `--mode` for multi-turn sessions. + +```bash +$NODE $CLI generate --prompt "..." --mode single +``` + +CLI handles: open tab → navigate to Gemini → paste reference images → type prompt → submit → wait for generation → download to `output/originals/` → close tab (single mode). + +Default timeouts: generation 300s (5min), download 120s. + +### 3. Handle Results + +**On success**: Move downloaded image and clean up: + +```bash +# Move latest generated image +LATEST=$(ls -t /scripts/output/originals/ | head -1) +mv "/scripts/output/originals/$LATEST" ~/.openclaw/workspace/media/generated/ + +# Clean up moved files +for f in Gemini_Generated_Image_*.png generated-*.png; do + if [ -f "/home/dazhi/.openclaw/workspace/media/generated/$f" ]; then + rm "/scripts/output/originals/$f" 2>/dev/null + fi +done +``` + +**On timeout or error**: Check status: + +```bash +$NODE $CLI status --session --wait +``` + +If status returns `done`, run download: + +```bash +$NODE $CLI download --session +``` + +**Add `--screenshot` for diagnostics**: When errors/timeouts are expected, add `--screenshot` to auto-capture a screenshot on failure. + +### 4. Deliver to User + +Use `message` tool with `media` parameter to send the final image. + +## Secondary Workflows + +### Continue from Chat URL + +```bash +$NODE $CLI generate --chatUrl "https://gemini.google.com/app/xxxx" --prompt "new instruction" +``` + +### List Active Sessions + +```bash +$NODE $CLI sessions +``` + +### Find Lost Session + +```bash +$NODE $CLI find_session --chatUrl "https://gemini.google.com/app/xxxx" --open +``` + +### Close a Session + +```bash +$NODE $CLI close --session +``` + +## Error Handling + +| Status | Action | +|------|------| +| CLI exits `success` | Move image from `output/originals/` | +| CLI exits `timeout` | Run `status --session --wait` to check if still generating | +| Status returns `done` | Run `download --session ` | +| Status returns `error` | Report error, suggest retry with modified prompt | +| Status returns `generating` | Continue waiting with `--wait` | +| Session lost | Use `find_session --chatUrl --open` to recover | +| Browser not started | Run `browser action=start` first | +| Downloaded image not found | Check `output/originals/` directory, verify filename and size | + +## Critical Rules + +1. **Browser management**: Use `browser` tool for lifecycle; CLI only handles Gemini interaction +2. **Always use absolute paths** for node and CLI paths +3. **Default to `--mode single`**: Auto-close after download unless in multi-turn conversation +4. **Multi-turn continuation**: Do NOT add `--mode` when continuing; close with `--mode single` on last turn or `close` command +5. **Reference image path handling**: + - ❌ Never use absolute paths with spaces — shell splits them + - ✅ `cd` to reference image directory first, then use relative paths + - ✅ Use `ls | grep` to dynamically get filenames +6. **Avoid numbered reference images** (e.g., `01-xxx.jpg`) — Gemini may rate-limit repeated use +7. **Stop after 3 consecutive failures** — investigate root cause instead of retrying +8. **Always move downloaded images** to `~/media/generated/` and clean up originals +9. **Verify downloads**: Check filename and file size in originals before moving +10. **Use `--screenshot`** for diagnostic capture on errors +11. **No `--json` needed**: Read CLI text output directly +12. **Don't use screenshot previews**: CLI downloads original images directly diff --git a/skills/gemini-web-generate/scripts/.gitignore b/skills/gemini-web-generate/scripts/.gitignore new file mode 100644 index 0000000..ea1472e --- /dev/null +++ b/skills/gemini-web-generate/scripts/.gitignore @@ -0,0 +1 @@ +output/ diff --git a/skills/gemini-web-generate/scripts/cli.js b/skills/gemini-web-generate/scripts/cli.js new file mode 100644 index 0000000..b5063f2 --- /dev/null +++ b/skills/gemini-web-generate/scripts/cli.js @@ -0,0 +1,557 @@ +#!/usr/bin/env node +import { connectBrowser, navigateToGemini, log, checkLogin, listSessions, closeSession, captureChatUrl, findSessionByChatUrl } from './src/browser.js'; +import { generate } from './src/generator.js'; +import { downloadViaButtons, takeScreenshot } from './src/screenshot.js'; +import { checkStatus } from './src/status.js'; +import { config } from './config.js'; +import fs from 'fs'; +import path from 'path'; + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +/** + * NDJSON event emitter. + * In JSON mode, emits structured progress events to stdout as newline-delimited JSON. + * Human-readable logs always go to stderr. + */ + +let jsonMode = false; + +/** + * Emit an NDJSON event to stdout. + * In JSON mode, only stdout is used (no stderr logging). + * In human mode, only stderr logging is used (no JSON). + * @param {string} type - "progress" | "success" | "error" + * @param {object} data + */ +function emit(type, data) { + // JSON mode: emit NDJSON to stdout only + if (jsonMode) { + process.stdout.write(JSON.stringify({ type, ...data }) + '\n'); + return; + } + + // Human mode: log to stderr, and output JSON for terminal events + if (type === 'progress' && data.message) { + log(data.message); + } else if (type === 'success') { + console.log(JSON.stringify(data, null, 2)); + } else if (type === 'error' && data.message) { + log('ERROR:', data.message); + console.log(JSON.stringify({ type, ...data }, null, 2)); + } +} + +/** + * Output a single JSON object to stdout. + * @param {object} data + */ +function output(data) { + if (jsonMode) { + process.stdout.write(JSON.stringify(data) + '\n'); + } else { + console.log(JSON.stringify(data, null, 2)); + } +} + +/** + * Output error and exit. + * @param {string} message + */ +function error(message) { + emit('error', { message }); + process.exit(1); +} + +/** + * Parse command line arguments. + * @returns {{command: string, args: object}} + */ +function parseArgs() { + const args = process.argv.slice(2); + const command = args[0]; + const parsed = { command, args: {} }; + + for (let i = 1; i < args.length; i++) { + if (args[i].startsWith('--')) { + const key = args[i].slice(2); + const value = args[i + 1]?.startsWith('--') ? undefined : args[i + 1]; + parsed.args[key] = value === undefined ? true : value; + if (value !== undefined) i++; + } + } + + return parsed; +} + +function readStdin() { + return new Promise((resolve, reject) => { + let data = ''; + process.stdin.setEncoding('utf8'); + process.stdin.on('data', chunk => { data += chunk; }); + process.stdin.on('end', () => resolve(data.trim())); + process.stdin.on('error', reject); + // Timeout if no data within 1s — treat as empty input + setTimeout(() => { + if (!data) resolve(''); + }, 1000); + }); +} + +/** + * Resolve image path - handle relative paths. + * @param {string} imagePath + * @returns {string} + */ +function resolveImagePath(imagePath) { + if (path.isAbsolute(imagePath)) return imagePath; + return path.resolve(process.cwd(), imagePath); +} + +/** + * Check login and output error if not logged in. + * @param {import('puppeteer-core').Page} page + */ +async function requireLogin(page) { + const loginStatus = await checkLogin(page); + if (!loginStatus.loggedIn) { + emit('error', { + type: 'not_logged_in', + message: 'Not logged into Gemini. Please log in and try again.', + action_required: 'open_browser_and_login', + url: config.geminiUrl, + }); + process.exit(0); + } + if (loginStatus.account) { + emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` }); + } +} + +async function main() { + const { command, args } = parseArgs(); + + // Check for JSON mode flag + jsonMode = args.json === true || args.json === 'true'; + if (jsonMode) { + process.env.GEMINI_JSON_MODE = '1'; + } + + if (!command || command === 'help' || command === '--help' || command === '-h') { + const helpText = ` +gemini-web-cli — 通过 CLI 驱动 Gemini 网页 AI 生图 + +用法: + node cli.js [options] + +命令: + + generate 向 Gemini 发送提示词(可选参考图),等待图片生成 + download 从当前 Gemini 标签页下载已生成的图片 + status 检查页面状态(空闲 / 生成中 / 完成 / 异常) + sessions 列出所有活跃的 Gemini 会话(标签页) + find_session 通过 Gemini 对话链接找回已丢失的 session + close 关闭指定会话的标签页 + +generate 参数: + --prompt "文本" 发送给 Gemini 的提示词(必填),支持 "stdin" 从管道读取 + --prompt-file <路径> 从文件读取提示词(支持换行) + --image <路径> 单张参考图片路径 + --images <路径1,路径2,...> 多张参考图片,逗号分隔,最多 10 张 + --session 复用已有会话的标签页 + --chatUrl 打开指定的 Gemini 对话链接(创建新会话) + --mode "single" 生成后关闭标签页,"multi" 保持标签页打开(默认 multi) + --tool <工具名> 发送前选择指定工具(默认选择"制作图片") + --timeout <毫秒> 生成最大等待时间(默认 300000ms = 5分钟) + --download-timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟) + --screenshot 出错或超时时截图(低质量 JPEG),保存路径写入输出 + --json 启用 NDJSON 模式:每行输出一个 JSON 事件 + +download 参数: + --timeout <毫秒> 下载最大等待时间(默认 120000ms = 2分钟) + --session 复用已有会话的标签页 + +status 参数: + --session 检查指定会话的标签页 + --wait 持续轮询,直到完成 / 出错 + --timeout <毫秒> --wait 模式下的最大等待时间(默认 300000ms) + --screenshot 状态异常时截图(低质量 JPEG),保存路径写入输出 + +sessions(无参数) + +find_session 参数: + --chatUrl Gemini 对话链接(必填) + --open 如果没找到匹配的标签页,自动打开新标签页并导航 + +close 参数: + --session 要关闭的会话 ID(必填) + +示例: + + # 提示词 + 参考图生图 + node cli.js generate --prompt "一只可爱的猫" --image ./cat.png + + # NDJSON 模式(适合程序解析) + node cli.js generate --prompt "一只可爱的猫" --image ./cat.png --json + + # single 模式,生成后自动关闭标签页 + node cli.js generate --prompt "日落风景" --mode single + + # 轮询状态,直到完成 + node cli.js status --session 7ac6cfff --wait + + # 快速检查状态(立即返回) + node cli.js status --session 7ac6cfff + + # 出错/超时时自动截图(路径写入输出 JSON) + node cli.js generate --prompt "日落风景" --screenshot + node cli.js status --session 7ac6cfff --wait --screenshot + + # 下载已生成的图片 + node cli.js download --session 7ac6cfff + + # 列出所有活跃的会话 + node cli.js sessions + + # 通过对话链接找回 session + node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" + + # 没找到时自动打开新标签页 + node cli.js find_session --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --open + + # 关闭指定会话的标签页 + node cli.js close --session 7ac6cfff + + # 通过之前的对话链接继续生图 + node cli.js generate --chatUrl "https://gemini.google.com/app/4c089f364e1cf745" --prompt "换成晚上场景" + +环境变量: + CDP_URL Chrome DevTools Protocol 连接地址(默认 http://127.0.0.1:9222) +`.trim(); + process.stdout.write(helpText + '\n'); + return; + } + + // Sessions command + if (command === 'sessions') { + const sessions = await listSessions(); + output({ type: 'sessions', sessions }); + return; + } + + // Find session command — recover a lost session by matching chatUrl + if (command === 'find_session') { + if (!args.chatUrl) { + error('--chatUrl is required'); + } + const openIfNotFound = args.open === true || args.open === 'true'; + const result = await findSessionByChatUrl({ chatUrl: args.chatUrl, openIfNotFound }); + output({ type: 'find_session', ...result }); + return; + } + + // Close command + if (command === 'close') { + if (!args.session) { + error('--session is required to close a tab'); + } + const result = await closeSession(args.session); + output({ type: 'close', ...result }); + return; + } + + // Ensure output directories exist + fs.mkdirSync(config.outputDir, { recursive: true }); + fs.mkdirSync(config.screenshotDir, { recursive: true }); + fs.mkdirSync(config.downloadDir, { recursive: true }); + + let browser; + try { + emit('progress', { step: 'start', message: `Command: ${command}` }); + + const sessionId = args.session || undefined; + // Default to 'multi' mode (keep tab open for subsequent calls) + const mode = (args.mode === 'multi' || args.mode === 'single') ? args.mode : 'multi'; + + switch (command) { + case 'generate': { + // Resolve prompt: --prompt-file > --prompt (no value = stdin) > --prompt literal + let prompt = args.prompt; + if (args.promptFile) { + const p = resolveImagePath(args.promptFile); + if (!fs.existsSync(p)) { + error(`Prompt file not found: ${p}`); + } + prompt = fs.readFileSync(p, 'utf8').trim(); + } else if (prompt === true || prompt === 'stdin') { + // --prompt 后面没有值,或显式写了 stdin,从管道/heredoc 读取 + prompt = await readStdin(); + } + + if (!prompt) { + error('--prompt is required'); + } + + // Support --images path1,path2,... or --image single + let imagePaths = []; + const imagesArg = args.images; + if (imagesArg) { + imagePaths = imagesArg.split(',').map(p => resolveImagePath(p.trim())); + } else if (args.image) { + imagePaths = [resolveImagePath(args.image)]; + } + if (imagePaths.length > 10) { + imagePaths = imagePaths.slice(0, 10); + } + for (const p of imagePaths) { + if (!fs.existsSync(p)) { + error(`Image file not found: ${p}`); + } + } + + const timeout = args.timeout ? parseInt(args.timeout, 10) : config.timeout; + + emit('progress', { step: 'connect', message: 'Connecting to browser...' }); + const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId }); + browser = b; + + let continuedSession = false; + if (sessionId && !isNew) { + emit('progress', { step: 'connect', message: `Continuing session: ${sessionId} (mode: ${mode})` }); + // Don't navigate — stay on the current chat page for multi-round + continuedSession = true; + } else if (args.chatUrl) { + emit('progress', { step: 'navigate', message: `Navigating to chat URL: ${args.chatUrl}` }); + await page.goto(args.chatUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); + // Wait for conversation content to load — check for download buttons or existing images + await page.waitForFunction( + () => document.querySelectorAll('button.generated-image-button').length > 0 || + document.querySelectorAll('[contenteditable="true"]').length > 0, + { timeout: 30000 } + ).catch(() => {}); + await sleep(3000); + // Chat URL opens an existing conversation, treat as continued + continuedSession = true; + } else { + emit('progress', { step: 'navigate', message: `New session created (mode: ${mode})` }); + await navigateToGemini(page); + } + + emit('progress', { step: 'login', message: 'Checking login status...' }); + const loginStatus = await checkLogin(page); + if (!loginStatus.loggedIn) { + emit('error', { + message: 'Not logged into Gemini. Please log in at ' + config.geminiUrl + ' and try again.', + action_required: 'open_browser_and_login', + sessionId: sid, + }); + return; + } + if (loginStatus.account) { + emit('progress', { step: 'login', message: `Logged in as: ${loginStatus.account}` }); + } + + // generate() handles its own progress events + const genStartTime = Date.now(); + + const result = await generate(page, { + prompt: prompt, + images: imagePaths, + timeout, + isContinuedSession: continuedSession, + tool: args.tool, + }); + + // Capture the Gemini chat URL for recovery + const chatUrl = await captureChatUrl(sid, page); + const elapsed = ((Date.now() - genStartTime) / 1000).toFixed(1); + + // Add session info to result + result.sessionId = sid; + result.chatUrl = chatUrl; + result.mode = mode; + + if (result.status === 'success') { + emit('progress', { + step: 'generate', + state: 'DONE', + elapsed: parseFloat(elapsed), + message: `Generation completed (${elapsed}s)`, + }); + + // Only attempt download if this was an image generation + try { + const downloadTimeout = parseInt(args.downloadTimeout, 10) || 120000; + const downloaded = await downloadViaButtons(page, cdp, { + existingButtonCount: result.existingButtonCount || 0, + timeout: downloadTimeout, + }); + if (downloaded.length > 0) { + result.images = downloaded; + for (const img of downloaded) { + const fileName = img.path.split('/').pop(); + emit('progress', { step: 'download', message: `Downloaded: ${fileName}` }); + } + } + } catch (e) { + emit('progress', { step: 'warning', message: `Failed to download generated images: ${e.message}` }); + emit('progress', { step: 'download', message: `Download failed: ${e.message}` }); + } + + // Remove internal fields from output + delete result.existingButtonCount; + + // Emit final success event + emit('success', { + sessionId: sid, + chatUrl, + mode, + images: result.images?.map(i => i.path) || [], + }); + } else if (result.status === 'text_only') { + // Text-only response means image generation failed + emit('error', { + message: result.message, + sessionId: sid, + status: 'text_only', + }); + } else { + // Error or timeout — emit error as terminal event + emit('error', { + message: result.message, + sessionId: sid, + status: result.status, + timeout: result.timeout, + }); + } + + // Capture screenshot on error or timeout + if (args.screenshot && result.status && result.status !== 'success') { + try { + const ss = await takeScreenshot(page); + result.screenshot = ss.path; + } catch (e) { + emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` }); + } + } + + // Single mode: close the tab + if (mode === 'single') { + await closeSession(sid); + } + break; + } + + case 'download': { + emit('progress', { step: 'connect', message: 'Connecting to browser...' }); + const { browser: b, page, cdp, sessionId: sid, isNew } = await connectBrowser({ sessionId }); + browser = b; + + // Don't navigate — stay on the current chat page + if (isNew) { + emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' }); + await navigateToGemini(page); + } + + emit('progress', { step: 'login', message: 'Checking login status...' }); + await requireLogin(page); + + emit('progress', { step: 'download', message: 'Downloading generated images...' }); + const downloadTimeout = parseInt(args.downloadTimeout, 10) || parseInt(args.timeout, 10) || 120000; + const downloadResult = await downloadViaButtons(page, cdp, { newestOnly: true, timeout: downloadTimeout }); + if (downloadResult.length === 0) { + error('No new images found to download.'); + } + + for (const img of downloadResult) { + const fileName = img.path.split('/').pop(); + emit('progress', { step: 'download', message: `Downloaded: ${fileName}` }); + } + + emit('success', { + sessionId: sid, + path: downloadResult[0].path, + }); + + if (mode === 'single') { + await closeSession(sid); + } + break; + } + + case 'status': { + emit('progress', { step: 'connect', message: 'Connecting to browser...' }); + const { browser: b, page, sessionId: sid, isNew } = await connectBrowser({ sessionId }); + browser = b; + + // Navigate if a new tab was created (either no session requested, + // or requested session was lost and replaced) + if (isNew) { + emit('progress', { step: 'navigate', message: 'Navigating to Gemini...' }); + await navigateToGemini(page); + } + + emit('progress', { step: 'login', message: 'Checking login status...' }); + const loginStatus = await checkLogin(page); + if (!loginStatus.loggedIn) { + emit('error', { + message: 'Not logged into Gemini', + action_required: 'open_browser_and_login', + sessionId: sid, + }); + return; + } + + const waitForCompletion = args.wait === true || args.wait === 'true'; + const pollTimeout = args.timeout ? parseInt(args.timeout, 10) : undefined; + + if (waitForCompletion) { + emit('progress', { step: 'status', message: 'Waiting for generation to complete...' }); + } + + const result = await checkStatus(page, { + waitForCompletion, + timeout: pollTimeout, + onStateChange: (state, elapsed) => { + emit('progress', { step: 'status', state, elapsed, message: `State: ${state} (${elapsed}s)` }); + }, + }); + result.sessionId = sid; + + // Capture screenshot on error or unclear states + if (args.screenshot && result.state && ['error', 'page_error', 'generating', 'idle'].includes(result.state)) { + try { + const ss = await takeScreenshot(page); + result.screenshot = ss.path; + } catch (e) { + emit('progress', { step: 'warning', message: `Failed to take screenshot: ${e.message}` }); + } + } + + output({ type: 'status', ...result }); + break; + } + + default: + error(`Unknown command: ${command}. Available commands: generate, download, status, sessions, close`); + } + } catch (err) { + // CDP connection failures + if (err.message.includes('ECONNREFUSED') || err.message.includes('connect')) { + error(`Cannot connect to browser at ${config.cdpUrl}. Make sure Chrome is running with --remote-debugging-port.`); + } + error(err.message); + } finally { + // Don't close the browser - it's managed externally (OpenClaw, etc.) + // Just release the CDP connection + if (browser) { + try { + await browser.disconnect(); + } catch { + // Already disconnected + } + } + } +} + +main(); diff --git a/skills/gemini-web-generate/scripts/config.js b/skills/gemini-web-generate/scripts/config.js new file mode 100644 index 0000000..588d0a3 --- /dev/null +++ b/skills/gemini-web-generate/scripts/config.js @@ -0,0 +1,15 @@ +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +export const config = { + geminiUrl: 'https://gemini.google.com/app', + cdpUrl: process.env.CDP_URL || 'http://127.0.0.1:9223', + timeout: 300000, + pollInterval: 2000, + outputDir: path.join(__dirname, 'output'), + screenshotDir: path.join(__dirname, 'output', 'screenshots'), + downloadDir: path.join(__dirname, 'output', 'originals'), +}; diff --git a/skills/gemini-web-generate/scripts/package-lock.json b/skills/gemini-web-generate/scripts/package-lock.json new file mode 100644 index 0000000..5ffb26a --- /dev/null +++ b/skills/gemini-web-generate/scripts/package-lock.json @@ -0,0 +1,936 @@ +{ + "name": "gemini-web-generate", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "gemini-web-generate", + "version": "1.0.0", + "dependencies": { + "puppeteer-core": "^24.0.0" + } + }, + "node_modules/@puppeteer/browsers": { + "version": "2.13.0", + "resolved": "https://registry.npmmirror.com/@puppeteer/browsers/-/browsers-2.13.0.tgz", + "integrity": "sha512-46BZJYJjc/WwmKjsvDFykHtXrtomsCIrwYQPOP7VfMJoZY2bsDF9oROBABR3paDjDcmkUye1Pb1BqdcdiipaWA==", + "license": "Apache-2.0", + "dependencies": { + "debug": "^4.4.3", + "extract-zip": "^2.0.1", + "progress": "^2.0.3", + "proxy-agent": "^6.5.0", + "semver": "^7.7.4", + "tar-fs": "^3.1.1", + "yargs": "^17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmmirror.com/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==", + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "25.6.0", + "resolved": "https://registry.npmmirror.com/@types/node/-/node-25.6.0.tgz", + "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", + "license": "MIT", + "optional": true, + "dependencies": { + "undici-types": "~7.19.0" + } + }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmmirror.com/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "license": "MIT", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmmirror.com/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmmirror.com/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmmirror.com/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "license": "MIT", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/b4a": { + "version": "1.8.0", + "resolved": "https://registry.npmmirror.com/b4a/-/b4a-1.8.0.tgz", + "integrity": "sha512-qRuSmNSkGQaHwNbM7J78Wwy+ghLEYF1zNrSeMxj4Kgw6y33O3mXcQ6Ie9fRvfU/YnxWkOchPXbaLb73TkIsfdg==", + "license": "Apache-2.0", + "peerDependencies": { + "react-native-b4a": "*" + }, + "peerDependenciesMeta": { + "react-native-b4a": { + "optional": true + } + } + }, + "node_modules/bare-events": { + "version": "2.8.2", + "resolved": "https://registry.npmmirror.com/bare-events/-/bare-events-2.8.2.tgz", + "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", + "license": "Apache-2.0", + "peerDependencies": { + "bare-abort-controller": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + } + } + }, + "node_modules/bare-fs": { + "version": "4.7.0", + "resolved": "https://registry.npmmirror.com/bare-fs/-/bare-fs-4.7.0.tgz", + "integrity": "sha512-xzqKsCFxAek9aezYhjJuJRXBIaYlg/0OGDTZp+T8eYmYMlm66cs6cYko02drIyjN2CBbi+I6L7YfXyqpqtKRXA==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.5.4", + "bare-path": "^3.0.0", + "bare-stream": "^2.6.4", + "bare-url": "^2.2.2", + "fast-fifo": "^1.3.2" + }, + "engines": { + "bare": ">=1.16.0" + }, + "peerDependencies": { + "bare-buffer": "*" + }, + "peerDependenciesMeta": { + "bare-buffer": { + "optional": true + } + } + }, + "node_modules/bare-os": { + "version": "3.8.7", + "resolved": "https://registry.npmmirror.com/bare-os/-/bare-os-3.8.7.tgz", + "integrity": "sha512-G4Gr1UsGeEy2qtDTZwL7JFLo2wapUarz7iTMcYcMFdS89AIQuBoyjgXZz0Utv7uHs3xA9LckhVbeBi8lEQrC+w==", + "license": "Apache-2.0", + "engines": { + "bare": ">=1.14.0" + } + }, + "node_modules/bare-path": { + "version": "3.0.0", + "resolved": "https://registry.npmmirror.com/bare-path/-/bare-path-3.0.0.tgz", + "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==", + "license": "Apache-2.0", + "dependencies": { + "bare-os": "^3.0.1" + } + }, + "node_modules/bare-stream": { + "version": "2.13.0", + "resolved": "https://registry.npmmirror.com/bare-stream/-/bare-stream-2.13.0.tgz", + "integrity": "sha512-3zAJRZMDFGjdn+RVnNpF9kuELw+0Fl3lpndM4NcEOhb9zwtSo/deETfuIwMSE5BXanA0FrN1qVjffGwAg2Y7EA==", + "license": "Apache-2.0", + "dependencies": { + "streamx": "^2.25.0", + "teex": "^1.0.1" + }, + "peerDependencies": { + "bare-abort-controller": "*", + "bare-buffer": "*", + "bare-events": "*" + }, + "peerDependenciesMeta": { + "bare-abort-controller": { + "optional": true + }, + "bare-buffer": { + "optional": true + }, + "bare-events": { + "optional": true + } + } + }, + "node_modules/bare-url": { + "version": "2.4.0", + "resolved": "https://registry.npmmirror.com/bare-url/-/bare-url-2.4.0.tgz", + "integrity": "sha512-NSTU5WN+fy/L0DDenfE8SXQna4voXuW0FHM7wH8i3/q9khUSchfPbPezO4zSFMnDGIf9YE+mt/RWhZgNRKRIXA==", + "license": "Apache-2.0", + "dependencies": { + "bare-path": "^3.0.0" + } + }, + "node_modules/basic-ftp": { + "version": "5.2.2", + "resolved": "https://registry.npmmirror.com/basic-ftp/-/basic-ftp-5.2.2.tgz", + "integrity": "sha512-1tDrzKsdCg70WGvbFss/ulVAxupNauGnOlgpyjKzeQxzyllBLS0CGLV7tjIXTK3ZQA9/FBEm9qyFFN1bciA6pw==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmmirror.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "license": "MIT", + "engines": { + "node": "*" + } + }, + "node_modules/chromium-bidi": { + "version": "14.0.0", + "resolved": "https://registry.npmmirror.com/chromium-bidi/-/chromium-bidi-14.0.0.tgz", + "integrity": "sha512-9gYlLtS6tStdRWzrtXaTMnqcM4dudNegMXJxkR0I/CXObHalYeYcAMPrL19eroNZHtJ8DQmu1E+ZNOYu/IXMXw==", + "license": "Apache-2.0", + "dependencies": { + "mitt": "^3.0.1", + "zod": "^3.24.1" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, + "node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmmirror.com/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "license": "ISC", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmmirror.com/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmmirror.com/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "license": "MIT" + }, + "node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmmirror.com/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmmirror.com/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "license": "MIT", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/devtools-protocol": { + "version": "0.0.1581282", + "resolved": "https://registry.npmmirror.com/devtools-protocol/-/devtools-protocol-0.0.1581282.tgz", + "integrity": "sha512-nv7iKtNZQshSW2hKzYNr46nM/Cfh5SEvE2oV0/SEGgc9XupIY5ggf84Cz8eJIkBce7S3bmTAauFD6aysMpnqsQ==", + "license": "BSD-3-Clause" + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmmirror.com/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "license": "MIT" + }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmmirror.com/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, + "node_modules/escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmmirror.com/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "license": "BSD-2-Clause", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmmirror.com/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "license": "BSD-2-Clause", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmmirror.com/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/events-universal": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/events-universal/-/events-universal-1.0.1.tgz", + "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==", + "license": "Apache-2.0", + "dependencies": { + "bare-events": "^2.7.0" + } + }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmmirror.com/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "license": "BSD-2-Clause", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/fast-fifo": { + "version": "1.3.2", + "resolved": "https://registry.npmmirror.com/fast-fifo/-/fast-fifo-1.3.2.tgz", + "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==", + "license": "MIT" + }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "license": "MIT", + "dependencies": { + "pend": "~1.2.0" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmmirror.com/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "license": "ISC", + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmmirror.com/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/get-uri": { + "version": "6.0.5", + "resolved": "https://registry.npmmirror.com/get-uri/-/get-uri-6.0.5.tgz", + "integrity": "sha512-b1O07XYq8eRuVzBNgJLstU6FYc1tS6wnMtF1I1D9lE8LxZSOGZ7LhxN54yPP6mGw5f2CkXY2BQUL9Fx41qvcIg==", + "license": "MIT", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmmirror.com/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", + "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmmirror.com/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmmirror.com/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmmirror.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmmirror.com/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmmirror.com/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", + "license": "MIT" + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/netmask": { + "version": "2.1.1", + "resolved": "https://registry.npmmirror.com/netmask/-/netmask-2.1.1.tgz", + "integrity": "sha512-eonl3sLUha+S1GzTPxychyhnUzKyeQkZ7jLjKrBagJgPla13F+uQ71HgpFefyHgqrjEbCPkDArxYsjY8/+gLKA==", + "license": "MIT", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmmirror.com/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/pac-proxy-agent": { + "version": "7.2.0", + "resolved": "https://registry.npmmirror.com/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz", + "integrity": "sha512-TEB8ESquiLMc0lV8vcd5Ql/JAKAoyzHFXaStwjkzpOpC5Yv+pIzLfHvjTSdf3vpa2bMiUQrg9i6276yn8666aA==", + "license": "MIT", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.6", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmmirror.com/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "license": "MIT", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmmirror.com/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==", + "license": "MIT" + }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmmirror.com/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/proxy-agent": { + "version": "6.5.0", + "resolved": "https://registry.npmmirror.com/proxy-agent/-/proxy-agent-6.5.0.tgz", + "integrity": "sha512-TmatMXdr2KlRiA2CyDu8GqR8EjahTG3aY3nXjdzFyoZbmB8hrBsTyMezhULIXKnC0jpfjlmiZ3+EaCzoInSu/A==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.6", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.1.0", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.5" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmmirror.com/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/puppeteer-core": { + "version": "24.40.0", + "resolved": "https://registry.npmmirror.com/puppeteer-core/-/puppeteer-core-24.40.0.tgz", + "integrity": "sha512-MWL3XbUCfVgGR0gRsidzT6oKJT2QydPLhMITU6HoVWiiv4gkb6gJi3pcdAa8q4HwjBTbqISOWVP4aJiiyUJvag==", + "license": "Apache-2.0", + "dependencies": { + "@puppeteer/browsers": "2.13.0", + "chromium-bidi": "14.0.0", + "debug": "^4.4.3", + "devtools-protocol": "0.0.1581282", + "typed-query-selector": "^2.12.1", + "webdriver-bidi-protocol": "0.4.1", + "ws": "^8.19.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmmirror.com/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmmirror.com/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmmirror.com/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks": { + "version": "2.8.7", + "resolved": "https://registry.npmmirror.com/socks/-/socks-2.8.7.tgz", + "integrity": "sha512-HLpt+uLy/pxB+bum/9DzAgiKS8CX1EvbWxI4zlmgGCExImLdiad2iCwXT5Z4c9c3Eq8rP2318mPW2c+QbtjK8A==", + "license": "MIT", + "dependencies": { + "ip-address": "^10.0.1", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.5", + "resolved": "https://registry.npmmirror.com/socks-proxy-agent/-/socks-proxy-agent-8.0.5.tgz", + "integrity": "sha512-HehCEsotFqbPW9sJ8WVYB6UbmIMv7kUUORIF2Nncq4VQvBfNBLibW9YZR5dlYCSUhwcD628pRllm7n+E+YTzJw==", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmmirror.com/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "license": "BSD-3-Clause", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/streamx": { + "version": "2.25.0", + "resolved": "https://registry.npmmirror.com/streamx/-/streamx-2.25.0.tgz", + "integrity": "sha512-0nQuG6jf1w+wddNEEXCF4nTg3LtufWINB5eFEN+5TNZW7KWJp6x87+JFL43vaAUPyCfH1wID+mNVyW6OHtFamg==", + "license": "MIT", + "dependencies": { + "events-universal": "^1.0.0", + "fast-fifo": "^1.3.2", + "text-decoder": "^1.1.0" + } + }, + "node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmmirror.com/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmmirror.com/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/tar-fs": { + "version": "3.1.2", + "resolved": "https://registry.npmmirror.com/tar-fs/-/tar-fs-3.1.2.tgz", + "integrity": "sha512-QGxxTxxyleAdyM3kpFs14ymbYmNFrfY+pHj7Z8FgtbZ7w2//VAgLMac7sT6nRpIHjppXO2AwwEOg0bPFVRcmXw==", + "license": "MIT", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^4.0.1", + "bare-path": "^3.0.0" + } + }, + "node_modules/tar-stream": { + "version": "3.1.8", + "resolved": "https://registry.npmmirror.com/tar-stream/-/tar-stream-3.1.8.tgz", + "integrity": "sha512-U6QpVRyCGHva435KoNWy9PRoi2IFYCgtEhq9nmrPPpbRacPs9IH4aJ3gbrFC8dPcXvdSZ4XXfXT5Fshbp2MtlQ==", + "license": "MIT", + "dependencies": { + "b4a": "^1.6.4", + "bare-fs": "^4.5.5", + "fast-fifo": "^1.2.0", + "streamx": "^2.15.0" + } + }, + "node_modules/teex": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/teex/-/teex-1.0.1.tgz", + "integrity": "sha512-eYE6iEI62Ni1H8oIa7KlDU6uQBtqr4Eajni3wX7rpfXD8ysFx8z0+dri+KWEPWpBsxXfxu58x/0jvTVT1ekOSg==", + "license": "MIT", + "dependencies": { + "streamx": "^2.12.5" + } + }, + "node_modules/text-decoder": { + "version": "1.2.7", + "resolved": "https://registry.npmmirror.com/text-decoder/-/text-decoder-1.2.7.tgz", + "integrity": "sha512-vlLytXkeP4xvEq2otHeJfSQIRyWxo/oZGEbXrtEEF9Hnmrdly59sUbzZ/QgyWuLYHctCHxFF4tRQZNQ9k60ExQ==", + "license": "Apache-2.0", + "dependencies": { + "b4a": "^1.6.4" + } + }, + "node_modules/tslib": { + "version": "2.8.1", + "resolved": "https://registry.npmmirror.com/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" + }, + "node_modules/typed-query-selector": { + "version": "2.12.1", + "resolved": "https://registry.npmmirror.com/typed-query-selector/-/typed-query-selector-2.12.1.tgz", + "integrity": "sha512-uzR+FzI8qrUEIu96oaeBJmd9E7CFEiQ3goA5qCVgc4s5llSubcfGHq9yUstZx/k4s9dXHVKsE35YWoFyvEqEHA==", + "license": "MIT" + }, + "node_modules/undici-types": { + "version": "7.19.2", + "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-7.19.2.tgz", + "integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==", + "license": "MIT", + "optional": true + }, + "node_modules/webdriver-bidi-protocol": { + "version": "0.4.1", + "resolved": "https://registry.npmmirror.com/webdriver-bidi-protocol/-/webdriver-bidi-protocol-0.4.1.tgz", + "integrity": "sha512-ARrjNjtWRRs2w4Tk7nqrf2gBI0QXWuOmMCx2hU+1jUt6d00MjMxURrhxhGbrsoiZKJrhTSTzbIrc554iKI10qw==", + "license": "Apache-2.0" + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmmirror.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/ws": { + "version": "8.20.0", + "resolved": "https://registry.npmmirror.com/ws/-/ws-8.20.0.tgz", + "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmmirror.com/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmmirror.com/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "license": "MIT", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmmirror.com/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "license": "ISC", + "engines": { + "node": ">=12" + } + }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmmirror.com/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "license": "MIT", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmmirror.com/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + } + } +} diff --git a/skills/gemini-web-generate/scripts/package.json b/skills/gemini-web-generate/scripts/package.json new file mode 100644 index 0000000..bf483a3 --- /dev/null +++ b/skills/gemini-web-generate/scripts/package.json @@ -0,0 +1,15 @@ +{ + "name": "gemini-web-cli", + "version": "1.0.0", + "description": "CLI tool for AI image generation and session management via Gemini web interface", + "type": "module", + "main": "cli.js", + "scripts": { + "generate": "node cli.js generate", + "download": "node cli.js download", + "status": "node cli.js status" + }, + "dependencies": { + "puppeteer-core": "^24.0.0" + } +} diff --git a/skills/gemini-web-generate/scripts/src/browser.js b/skills/gemini-web-generate/scripts/src/browser.js new file mode 100644 index 0000000..a39ffab --- /dev/null +++ b/skills/gemini-web-generate/scripts/src/browser.js @@ -0,0 +1,412 @@ +import puppeteer from 'puppeteer-core'; +import { config } from '../config.js'; +import fs from 'fs'; +import path from 'path'; +import crypto from 'crypto'; + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); +const SESSIONS_FILE = path.join(process.cwd(), 'sessions.json'); + +/** + * Session management for multi-tab concurrency. + * Each session maps to a unique Gemini chat tab. + */ + +/** + * Load or initialize sessions registry. + * @returns {Record} + */ +function loadSessions() { + try { + if (fs.existsSync(SESSIONS_FILE)) { + return JSON.parse(fs.readFileSync(SESSIONS_FILE, 'utf8')); + } + } catch {} + return {}; +} + +/** + * Save sessions registry. + * @param {Record} sessions + */ +function saveSessions(sessions) { + fs.writeFileSync(SESSIONS_FILE, JSON.stringify(sessions, null, 2)); +} + +/** + * Generate a unique session ID. + */ +function generateSessionId() { + return crypto.randomBytes(4).toString('hex'); +} + +/** + * Connect to an existing Chrome instance via CDP. + * Does NOT launch or manage the browser. + * @param {object} options + * @param {string} [options.sessionId] - reuse an existing session/tab + * @param {boolean} [options.newTab] - force create a new tab + * @returns {Promise<{browser, page, cdp, sessionId: string, isNew: boolean}>} + */ +export async function connectBrowser({ sessionId, newTab } = {}) { + const browser = await puppeteer.connect({ + browserURL: config.cdpUrl, + defaultViewport: null, + }); + + let page; + let isNew = false; + const sessions = loadSessions(); + + if (sessionId && sessions[sessionId] && !newTab) { + // Reuse existing session: find the matching tab + const pages = await browser.pages(); + const info = sessions[sessionId]; + page = pages.find(p => p.target()._targetId === info.tabId); + + if (page) { + const cdp = await page.createCDPSession(); + await setupDownloadBehavior(cdp); + log('Reused session', sessionId); + return { browser, page, cdp, sessionId, isNew: false }; + } + + // Tab lost - try to recover via chatUrl + if (info.chatUrl) { + log('Session', sessionId, 'tab lost, recovering via chat URL'); + page = await browser.newPage(); + const cdp = await page.createCDPSession(); + await setupDownloadBehavior(cdp); + try { + await page.goto(info.chatUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); + await sleep(3000); + log('Recovered session', sessionId, 'via chat URL'); + const newTabId = page.target()._targetId; + sessions[sessionId].tabId = newTabId; + sessions[sessionId].url = info.chatUrl; + saveSessions(sessions); + return { browser, page, cdp, sessionId, isNew: false }; + } catch (e) { + log('Chat URL recovery failed, creating fresh tab'); + sessions[sessionId].tabId = page.target()._targetId; + saveSessions(sessions); + return { browser, page, cdp, sessionId, isNew: true }; + } + } + + log('Session', sessionId, 'tab not found, creating new one'); + } + + // Always create a new tab to avoid context pollution from previous conversations + page = await browser.newPage(); + isNew = true; + + // Anti-detection: hide automation signals from websites (especially Gemini) + await page.evaluateOnNewDocument(() => { + Object.defineProperty(navigator, 'webdriver', { get: () => false }); + Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); + Object.defineProperty(navigator, 'languages', { get: () => ['zh-CN', 'zh', 'en-US', 'en'] }); + window.chrome = { runtime: {} }; + }); + + const cdp = await page.createCDPSession(); + await setupDownloadBehavior(cdp); + + // Generate new session ID if not provided + sessionId = sessionId || generateSessionId(); + const tabId = page.target()._targetId; + sessions[sessionId] = { tabId, url: page.url(), chatUrl: '' }; + saveSessions(sessions); + + log('Connected to tab', tabId, isNew ? '(new tab)' : '(existing Gemini tab)'); + return { browser, page, cdp, sessionId, isNew }; +} + +/** + * Extract and save the Gemini chat URL from the current page. + * After a response, Gemini navigates to a conversation URL like: + * https://gemini.google.com/app/{conversation_id} + * @param {string} sessionId + * @param {import('puppeteer-core').Page} page + */ +export async function captureChatUrl(sessionId, page) { + const url = page.url(); + const sessions = loadSessions(); + if (sessions[sessionId]) { + sessions[sessionId].url = url; + // Only capture if it looks like a conversation URL + if (url.includes('/app/') && !url.endsWith('/app')) { + sessions[sessionId].chatUrl = url; + } + saveSessions(sessions); + } + return url; +} + +/** + * Close a session's tab and remove from registry. + * @param {string} sessionId + * @returns {Promise<{success: boolean, message: string}>} + */ +export async function closeSession(sessionId) { + const sessions = loadSessions(); + if (!sessions[sessionId]) { + return { success: false, message: `Session ${sessionId} not found` }; + } + + const browser = await puppeteer.connect({ + browserURL: config.cdpUrl, + defaultViewport: null, + }); + + try { + const pages = await browser.pages(); + const info = sessions[sessionId]; + const page = pages.find(p => p.target()._targetId === info.tabId); + + if (page) { + await page.close(); + } + + delete sessions[sessionId]; + saveSessions(sessions); + log('Session closed:', sessionId); + return { success: true, message: `Session ${sessionId} closed` }; + } finally { + await browser.disconnect(); + } +} + +/** + * Find an open tab matching a chatUrl and register it as a new session. + * Useful when the session record was lost but the browser tab is still open. + * @param {object} options + * @param {string} options.chatUrl - Full Gemini chat URL to match against + * @param {boolean} [options.openIfNotFound] - open a new tab if no match found + * @returns {Promise<{success: boolean, sessionId?: string, tabId?: string, url?: string, message?: string, isNew?: boolean}>} + */ +export async function findSessionByChatUrl({ chatUrl, openIfNotFound }) { + const browser = await puppeteer.connect({ + browserURL: config.cdpUrl, + defaultViewport: null, + }); + + try { + const pages = await browser.pages(); + + // Try exact match on conversation ID + for (const page of pages) { + const url = page.url(); + if (!url.includes('gemini.google.com/app/')) continue; + const urlConvId = url.match(/\/app\/([a-f0-9]+)/i)?.[1]; + const chatConvId = chatUrl.match(/\/app\/([a-f0-9]+)/i)?.[1]; + if (urlConvId && chatConvId && urlConvId === chatConvId) { + const tabId = page.target()._targetId; + const sessionId = generateSessionId(); + const sessions = loadSessions(); + sessions[sessionId] = { tabId, url, chatUrl }; + saveSessions(sessions); + return { + success: true, + sessionId, + tabId, + url, + }; + } + } + + // Not found — optionally open a new tab + if (openIfNotFound) { + const page = await browser.newPage(); + const cdp = await page.createCDPSession(); + try { + await cdp.send('Browser.setDownloadBehavior', { + behavior: 'allow', + downloadPath: config.downloadDir, + eventsEnabled: true, + }); + } catch {} + log('Opening new tab for chatUrl:', chatUrl); + await page.goto(chatUrl, { waitUntil: 'domcontentloaded', timeout: 60000 }); + await sleep(3000); + + const tabId = page.target()._targetId; + const sessionId = generateSessionId(); + const sessions = loadSessions(); + sessions[sessionId] = { tabId, url: chatUrl, chatUrl }; + saveSessions(sessions); + return { + success: true, + sessionId, + tabId, + url: chatUrl, + isNew: true, + }; + } + + return { + success: false, + message: `No open tab found matching chatUrl: ${chatUrl}`, + }; + } finally { + await browser.disconnect(); + } +} + +/** + * List all active sessions. + * @returns {Promise>} + */ +export async function listSessions() { + const sessions = loadSessions(); + const result = []; + + try { + const browser = await puppeteer.connect({ + browserURL: config.cdpUrl, + defaultViewport: null, + }); + const targets = await browser.targets(); + const tabIds = new Set(targets.map(t => t._targetId)); + + for (const [id, info] of Object.entries(sessions)) { + result.push({ + sessionId: id, + url: info.url || 'navigating', + chatUrl: info.chatUrl || '', + active: tabIds.has(info.tabId), + }); + } + + await browser.disconnect(); + } catch {} + + return result; +} + +async function setupDownloadBehavior(cdp) { + try { + await cdp.send('Browser.setDownloadBehavior', { + behavior: 'allow', + downloadPath: config.downloadDir, + eventsEnabled: true, + }); + } catch (e) { + // May fail if not supported + } +} + +/** + * Check if user is logged into Gemini. + * @param {import('puppeteer-core').Page} page + * @returns {Promise<{loggedIn: boolean, account?: string}>} + */ +export async function checkLogin(page) { + const result = await page.evaluate(() => { + // Check for Google account avatar / menu + const accountBtn = document.querySelector( + '[aria-label="Google 账号"], [aria-label="Google Account"], ' + + 'div[data-account-index], c-wiz[jsmodel][data-l] img' + ); + + // Check for sign-in prompts + const signInText = document.body.innerText.includes('Sign in') || + document.body.innerText.includes('登录') || + document.body.innerText.includes('登入'); + + // Check URL for sign-in redirect + const isSignInPage = window.location.href.includes('accounts.google.com') || + window.location.href.includes('ServiceLogin'); + + return { + hasAccountBtn: !!accountBtn, + hasSignInPrompt: signInText, + isSignInPage, + url: window.location.href, + title: document.title, + }; + }); + + if (result.isSignInPage || (result.hasSignInPrompt && !result.hasAccountBtn)) { + return { loggedIn: false }; + } + + // Try to get account name + const account = await page.evaluate(() => { + // Gemini shows account info in the sidebar or top-right + const avatar = document.querySelector( + 'img[jsname], button[data-account] img, ' + + '[data-profile-index] img, [jsname] img' + ); + // Try aria-label on avatar button + const avatarBtn = document.querySelector( + 'button img[jsname]' + )?.closest('button'); + if (avatarBtn) { + return avatarBtn.getAttribute('aria-label') || ''; + } + return ''; + }); + + return { + loggedIn: true, + account: account || undefined, + }; +} + +/** + * Navigate to Gemini page and wait for it to be ready. + * @param {import('puppeteer-core').Page} page + */ +export async function navigateToGemini(page) { + // If already on Gemini, don't navigate away + if (page.url().includes('gemini.google.com')) { + // Wait for editor to be ready + try { + await page.waitForSelector('[contenteditable="true"]', { timeout: 15000, visible: true }); + } catch { + // Editor not found, let the caller handle it + log('Warning: Editor not found on existing Gemini page'); + } + await sleep(2000); + return; + } + + // New tab: navigate to Gemini with relaxed timeout + await page.goto(config.geminiUrl, { + waitUntil: 'domcontentloaded', + timeout: 60000, + }); + // Wait for editor to be ready + try { + await page.waitForSelector('[contenteditable="true"]', { timeout: 30000, visible: true }); + log('Editor found after navigation'); + } catch { + log('Warning: Editor not found after navigation, page may need more time'); + } + // Wait for page to settle + await sleep(3000); +} + +export function log(...args) { + // In JSON mode, suppress stderr logs — all output goes to stdout as NDJSON + if (process.env.GEMINI_JSON_MODE === '1') return; + process.stderr.write('[gemini-web-cli] ' + args.join(' ') + '\n'); +} + +/** + * Emit a structured progress event as NDJSON to stdout. + * In JSON mode: writes NDJSON line to stdout. + * In human mode: writes human-readable message to stderr via log(). + * @param {object} event - { type: "progress"|"success"|"error", step?, state?, message?, ... } + */ +export function emit(event) { + if (process.env.GEMINI_JSON_MODE === '1') { + process.stdout.write(JSON.stringify(event) + '\n'); + return; + } + // Human mode: just log the message + if (event.message) { + log(event.message); + } +} diff --git a/skills/gemini-web-generate/scripts/src/generator.js b/skills/gemini-web-generate/scripts/src/generator.js new file mode 100644 index 0000000..197d01f --- /dev/null +++ b/skills/gemini-web-generate/scripts/src/generator.js @@ -0,0 +1,847 @@ +import { pageStatus, detectPageState } from './status.js'; +import { log, emit } from './browser.js'; + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +/** + * Wait for the Gemini chat editor to be visible. + * @param {import('puppeteer-core').Page} page + * @param {number} timeout + * @returns {Promise} + */ +async function waitForEditor(page, timeout = 30000) { + try { + const el = await page.waitForSelector('[contenteditable="true"]', { timeout, visible: true }); + return el; + } catch { + // Dump DOM state for debugging + const dump = await page.evaluate(() => { + const body = document.body; + return { + title: document.title, + url: window.location.href, + hasContentEditable: document.querySelectorAll('[contenteditable="true"]').length, + bodyText: body.innerText.substring(0, 500), + }; + }); + log('Editor not found. DOM state:', JSON.stringify(dump, null, 2)); + throw new Error('Gemini chat editor not found. The page may not be fully loaded.'); + } +} + +/** + * Type prompt into Gemini chat input. + * @param {import('puppeteer-core').Page} page + * @param {string} prompt + */ +export async function typePrompt(page, prompt) { + // Wait for editor to be ready first + await waitForEditor(page, 15000); + + const selectors = [ + 'textarea', + '[contenteditable="true"]', + 'div[contenteditable]', + ]; + + for (const selector of selectors) { + try { + const el = await page.$(selector); + if (el && await el.isVisible()) { + await el.click({ clickCount: 3 }); + // el.type() sends Enter for \n which submits the form. + // Split on newlines and type each line, using Shift+Enter for line breaks. + const lines = prompt.split('\n'); + for (let i = 0; i < lines.length; i++) { + if (lines[i]) { + await page.keyboard.type(lines[i], { delay: 20 }); + } + if (i < lines.length - 1) { + await page.keyboard.down('Shift'); + await page.keyboard.press('Enter'); + await page.keyboard.up('Shift'); + } + } + return; + } + } catch (e) { + // try next selector + } + } + + throw new Error('Chat input element not found'); +} + +/** + * Upload reference images by dispatching paste events on the Quill clipboard div. + * Gemini's editor uses Quill, which has a .ql-clipboard element for paste handling. + * + * @param {import('puppeteer-core').Page} page + * @param {string[]} imagePaths - array of local file paths (max 5) + */ +export async function uploadImages(page, imagePaths) { + const MAX_IMAGES = 5; + const paths = imagePaths.slice(0, MAX_IMAGES); + + if (paths.length > MAX_IMAGES) { + log(`Warning: Only first ${MAX_IMAGES} images will be used`); + } + + // Wait for editor to be ready + const editor = await waitForEditor(page); + await editor.click(); + await sleep(1000); + + // Dispatch paste for each image + for (let i = 0; i < paths.length; i++) { + const imagePath = paths[i]; + const { readFileSync } = await import('fs'); + const imageBuffer = readFileSync(imagePath); + const base64 = imageBuffer.toString('base64'); + const ext = imagePath.split('.').pop().toLowerCase(); + const mimeType = ext === 'png' ? 'image/png' : + ext === 'jpg' || ext === 'jpeg' ? 'image/jpeg' : + ext === 'gif' ? 'image/gif' : + ext === 'webp' ? 'image/webp' : 'image/png'; + const fileName = imagePath.split('/').pop(); + + // Dispatch paste on both the editor and the ql-clipboard + const result = await page.evaluate(({ base64, mimeType, fileName }) => { + const binary = atob(base64); + const array = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) { + array[i] = binary.charCodeAt(i); + } + const blob = new Blob([array], { type: mimeType }); + const file = new File([blob], fileName, { type: mimeType }); + + const dt = new DataTransfer(); + dt.items.add(file); + + // Dispatch on the contenteditable editor + const editor = document.querySelector('[contenteditable="true"]'); + if (!editor) return 'no editor'; + + // Also dispatch on the Quill clipboard div + const clipboard = document.querySelector('.ql-clipboard'); + + const pasteEvent = new ClipboardEvent('paste', { + clipboardData: dt, + bubbles: true, + cancelable: true, + }); + + editor.dispatchEvent(pasteEvent); + if (clipboard) { + clipboard.dispatchEvent(pasteEvent); + } + + return 'ok'; + }, { base64, mimeType, fileName }); + + if (result !== 'ok') { + throw new Error('Failed to paste image: ' + result); + } + + emit({ type: 'progress', step: 'paste', message: `Pasted image ${i + 1}/${paths.length} (${fileName})` }); + // Wait between pastes for Gemini to process each one + await sleep(2000); + } + + // Wait for ALL attachments to appear and Gemini to fully register them + log('Waiting for attachments to be ready...'); + const ok = await waitForAttachmentsReady(page, paths.length); + if (!ok) { + const dump = await page.evaluate(() => { + const editor = document.querySelector('[contenteditable="true"]'); + const clipboard = document.querySelector('.ql-clipboard'); + let result = `editor children: ${editor?.children.length || 0}\n`; + result += `editor HTML: ${editor?.innerHTML.substring(0, 500) || 'none'}\n`; + result += `clipboard HTML: ${clipboard?.innerHTML.substring(0, 500) || 'none'}\n`; + result += `.attachment-preview-wrapper: ${document.querySelectorAll('.attachment-preview-wrapper').length}\n`; + result += `uploader-file-preview: ${document.querySelectorAll('uploader-file-preview').length}\n`; + result += `[class*="uploader"]: ${document.querySelectorAll('[class*="uploader"]').length}\n`; + result += `.file-preview-chip: ${document.querySelectorAll('.file-preview-chip').length}\n`; + return result; + }); + log('DOM dump:\n' + dump); + throw new Error(`Image upload failed: expected ${paths.length} but none appeared`); + } + emit({ type: 'progress', step: 'attachments_ready', message: `All ${paths.length} image(s) confirmed attached and rendered` }); +} + +/** + * Poll until attachment previews appear and Gemini has finished processing them. + * The key indicator: each image shows a cancel-button AND the preview img has + * a valid blob: src — meaning Gemini has fully registered the attachment. + * @param {import('puppeteer-core').Page} page + * @param {number} expectedCount + * @param {number} timeout + * @returns {Promise} + */ +async function waitForAttachmentsReady(page, expectedCount, timeout = 30000) { + const startTime = Date.now(); + while (Date.now() - startTime < timeout) { + const ready = await page.evaluate((expected) => { + // Get all file preview containers + const previews = document.querySelectorAll('uploader-file-preview'); + if (previews.length < expected) { + return false; + } + + // Each preview must have BOTH: a cancel button and an img with a valid blob: src + let readyCount = 0; + for (const preview of previews) { + const hasCancel = preview.querySelector('button[data-test-id="cancel-button"]'); + const img = preview.querySelector('img[data-test-id="image-preview"]'); + const hasBlobSrc = img && img.src && img.src.startsWith('blob:'); + if (hasCancel && hasBlobSrc) { + readyCount++; + } + } + if (readyCount >= expected) return true; + + // Fallback: check the wrapper-level structure + const wrappers = document.querySelectorAll('.attachment-preview-wrapper'); + const fallbackCount = Array.from(wrappers).filter(w => { + const hasCancel = w.querySelector('[data-test-id="cancel-button"], button[aria-label*="移除文件"]'); + const img = w.querySelector('img[data-test-id="image-preview"]'); + const hasBlobSrc = img && img.src && img.src.startsWith('blob:'); + return hasCancel && hasBlobSrc; + }).length; + if (fallbackCount >= expected) return true; + + return false; + }, expectedCount); + + if (ready) { + // Extra settle time after all buttons detected + await sleep(2000); + return true; + } + + await sleep(500); + } + return false; +} + +/** + * Click the send button to submit the prompt. + * @param {import('puppeteer-core').Page} page + */ +export async function sendPrompt(page) { + const sent = await page.evaluate(() => { + // Priority 1: Send button by aria-label + const buttons = Array.from(document.querySelectorAll('button')); + for (const b of buttons) { + const label = b.getAttribute('aria-label') || b.getAttribute('title') || ''; + if (label.includes('Send') || label.includes('发送') || + label.includes('Arrow') || label.includes('arrow')) { + // Check if button is enabled (not disabled) + if (!b.disabled && b.offsetParent !== null) { + b.click(); + return 'clicked'; + } + } + } + + // Priority 2: SVG-based send button (common in Material Design) + for (const b of buttons) { + const svg = b.querySelector('svg'); + if (svg && b.offsetParent !== null && !b.disabled) { + // Check if near the textarea + const textarea = document.querySelector('textarea') || + document.querySelector('[contenteditable="true"]'); + if (textarea) { + const textareaRect = textarea.getBoundingClientRect(); + const btnRect = b.getBoundingClientRect(); + const dist = Math.abs(textareaRect.top - btnRect.top); + if (dist < 100) { + b.click(); + return 'clicked'; + } + } + } + } + + // Priority 3: Press Enter in the textarea + const textarea = document.querySelector('textarea'); + if (textarea) { + textarea.focus(); + textarea.dispatchEvent(new KeyboardEvent('keydown', { + key: 'Enter', + code: 'Enter', + keyCode: 13, + bubbles: true, + })); + return 'enter'; + } + + return 'none'; + }); + + if (sent === 'none') { + throw new Error('Could not find send button or input area'); + } + + await sleep(1500); +} + +/** + * Select the image generation tool from the toolbox drawer. + * Uses the icon name "photo_prints" to identify the tool, which is language-independent. + * Falls back to text matching for "制作图片" / "Generate image" / etc. + * @param {import('puppeteer-core').Page} page + * @returns {Promise} - whether the tool was selected successfully + */ +export async function selectTool(page) { + const result = await page.evaluate(() => { + const toolboxBtn = document.querySelector('.toolbox-drawer-button'); + if (!toolboxBtn) { + return { success: false, error: 'Toolbox button not found' }; + } + + // Check if already selected + if (toolboxBtn.className.includes('has-selected-item')) { + return { success: true, alreadySelected: true }; + } + + // Click to open toolbox drawer + toolboxBtn.click(); + return { success: true, opened: true }; + }); + + if (!result.success) { + log('selectTool failed:', result.error); + return false; + } + if (result.alreadySelected) { + return true; + } + + // Wait for the menu to open + await sleep(500); + + // Click the image tool item — match by icon first (language-independent), then by text + const selected = await page.evaluate(() => { + const toolItems = Array.from(document.querySelectorAll('.toolbox-drawer-item-list-button')); + for (const item of toolItems) { + const icon = item.querySelector('mat-icon[data-mat-icon-name="photo_prints"]'); + if (icon) { + item.click(); + return 'icon'; + } + } + // Fallback: match by text patterns + for (const item of toolItems) { + const text = item.innerText?.trim().toLowerCase(); + if (text && (text.includes('制作图片') || text.includes('生成图片') || + text.includes('generate image') || text.includes('create image') || + text.includes('imagen'))) { + item.click(); + return 'text'; + } + } + return null; + }); + + if (selected) { + await sleep(1000); + return true; + } + + log('Image generation tool not found in toolbox drawer'); + return false; +} + +/** + * Select a specific tool by name from the toolbox drawer. + * @param {import('puppeteer-core').Page} page + * @param {string} toolName - e.g. "制作图片", "Canvas", "Deep Research" + * @returns {Promise} + */ +export async function selectToolByName(page, toolName) { + const result = await page.evaluate(() => { + const toolboxBtn = document.querySelector('.toolbox-drawer-button'); + if (!toolboxBtn) { + return { success: false, error: 'Toolbox button not found' }; + } + // Check if already selected + if (toolboxBtn.className.includes('has-selected-item')) { + return { success: true, alreadySelected: true }; + } + toolboxBtn.click(); + return { success: true, opened: true }; + }); + + if (!result.success) { + log('selectToolByName failed:', result.error); + return false; + } + if (result.alreadySelected) { + return true; + } + + await sleep(500); + + const selected = await page.evaluate((toolName) => { + const toolItems = Array.from(document.querySelectorAll('.toolbox-drawer-item-list-button')); + for (const item of toolItems) { + const text = item.innerText?.trim(); + if (text && text.includes(toolName)) { + item.click(); + return true; + } + } + return false; + }, toolName); + + if (selected) { + await sleep(1000); + return true; + } + + log(`Tool "${toolName}" not found in toolbox drawer`); + return false; +} + +/** + * Full generation flow. + * Order: 1) upload images → 2) select image tool → 3) type prompt → 4) send + * @param {import('puppeteer-core').Page} page + * @param {{prompt: string, images?: string[], timeout?: number, tool?: string}} options + * @returns {Promise<{status: string, message: string, preview?: string}>} + */ +export async function generate(page, { prompt, images, timeout, isContinuedSession, tool }) { + // Capture existing download buttons before this generation (for continued sessions) + const btnSnapshot = await snapshotDownloadButtons(page); + + // Step 1: Upload images first + if (images && images.length > 0) { + await uploadImages(page, images); + } + + // Step 2: Select image generation tool (default), or specified tool + if (tool) { + await selectToolByName(page, tool); + } else { + await selectTool(page); + } + + // Step 3: Type prompt after images are attached + emit({ type: 'progress', step: 'typing', message: 'Typing prompt...' }); + await typePrompt(page, prompt); + + // Step 4: Send + emit({ type: 'progress', step: 'sending', message: 'Sending prompt...' }); + await sendPrompt(page); + + emit({ type: 'progress', step: 'waiting', message: 'Waiting for generation to complete...' }); + const result = await waitForGenerationComplete(page, timeout, isContinuedSession, btnSnapshot.count, btnSnapshot.bottomY); + + // Return the pre-generation button count so the caller knows how many + // buttons existed before this generation. This is needed to distinguish + // old download buttons from newly generated ones. + result.existingButtonCount = btnSnapshot.count; + + return result; +} + +/** + * Wait for generation to complete. + * @param {import('puppeteer-core').Page} page + * @param {number} timeout + * @param {boolean} isContinuedSession - if true, wait for NEW images below existing ones + * @param {number} [prevImgCount] - existing image count before this generation + * @param {number} [prevBtnBottomY] - bottom Y position of existing download buttons + */ +async function waitForGenerationComplete(page, timeout = 120000, isContinuedSession = false, prevImgCount, prevBtnBottomY) { + const startTime = Date.now(); + let prevState = pageStatus.IDLE; + + // For continued sessions: Gemini keeps ALL previous responses visible, including + // their download buttons. We can't wait for old buttons to disappear — they stay. + // Instead, poll for NEW content appearing below the existing results. + if (isContinuedSession) { + log(`Monitoring for new generation (existing images: ${prevImgCount || 0}, btn bottomY: ${prevBtnBottomY?.toFixed(0) || 'none'})`); + const maxWait = timeout; + + while (Date.now() - startTime < maxWait) { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + + const info = await page.evaluate((prevCount, prevBottomY) => { + // Count all generated images + const allImages = document.querySelectorAll('img'); + let generatedImageCount = 0; + let maxImgBottom = 0; + let newImageBelowOld = false; + + for (const img of allImages) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + const isReference = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + if (isGenerated && !isReference) { + generatedImageCount++; + const rect = img.getBoundingClientRect(); + if (rect.bottom > maxImgBottom) maxImgBottom = rect.bottom; + // Check if this image is below the old bottom boundary + if (prevBottomY > 0 && rect.top > prevBottomY + 200) { + newImageBelowOld = true; + } + } + } + + // Count download buttons + const allBtns = document.querySelectorAll('button.generated-image-button'); + const visibleBtns = Array.from(allBtns).filter(b => b.offsetParent !== null); + let newBtnBelowOld = false; + let maxBtnBottom = 0; + for (const btn of visibleBtns) { + const rect = btn.getBoundingClientRect(); + if (rect.bottom > maxBtnBottom) maxBtnBottom = rect.bottom; + if (prevBottomY > 0 && rect.top > prevBottomY + 100) { + newBtnBelowOld = true; + } + } + + // Check if we see a new download button that wasn't there before + const btnCountIncreased = prevCount !== undefined && visibleBtns.length > prevCount; + + return { + generatedImageCount, + btnCount: visibleBtns.length, + btnCountIncreased, + newBtnBelowOld, + newImageBelowOld, + maxImgBottom, + maxBtnBottom, + hasNewBtn: visibleBtns.length > 0, + inputReady: !!(document.querySelector('[contenteditable="true"]') || + document.querySelector('textarea')), + // Check if the LATEST response footer shows "complete" + // In multi-turn sessions there may be multiple responses — we need the bottom one. + isComplete: (() => { + const footers = Array.from(document.querySelectorAll('.response-footer')); + const latest = footers[footers.length - 1]; + return !!(latest && latest.classList.contains('complete')); + })(), + // Check for DOM elements in the latest response + hasGeneratedImageElement: (() => { + const latestResponse = document.querySelector('model-response:last-of-type, .response-container:last-of-type'); + const container = latestResponse || document; + return container.querySelectorAll('generated-image').length > 0; + })(), + // Check if generation is still in progress + isGenerating: !!document.querySelector('bard-avatar .thinking, bard-avatar.thinking') || + !!document.querySelector('.avatar_spinner_animation[style*="opacity: 1"], .avatar_spinner_animation[style*="visibility: visible"]') || + !!document.querySelector('.send-button.stop, .stop-icon, button[aria-label="停止回答"]'), + // Check for error indicators — only from conversation area + text: (() => { const e = document.querySelector('.content-container, .main-content, main'); return (e ? e.innerText : document.body.innerText).substring(0, 200); })(), + title: document.title, + }; + }, prevImgCount || 0, prevBtnBottomY || 0); + + // Check for errors — use specific regex patterns to avoid false positives + // from generic UI text like tooltips, footers, and sidebar links + const errorPatterns = [ + /Something went wrong/i, + /出了点问题/, + /Unable to generate/i, + /Something unexpected happened/i, + /发生意外情况/, + ]; + let hasError = false; + let errorType = ''; + for (const pattern of errorPatterns) { + if (info.text.match(pattern)) { + hasError = true; + errorType = pattern.source; + break; + } + } + if (hasError) { + log(`Detected error: ${errorType}`); + return { + status: 'error', + message: info.title || `Generation failed: ${errorType}`, + }; + } + + // Determine state for logging + let state = prevState; + if (info.btnCountIncreased || info.newBtnBelowOld) { + state = 'DONE'; + } else if (info.newImageBelowOld) { + state = 'GENERATING'; + } + + if (state !== prevState) { + emit({ type: 'progress', step: 'state', prevState, state, elapsed: parseFloat(elapsed), + message: `State: ${prevState} → ${state} (${elapsed}s)` }); + prevState = state; + } + + // DONE: new download button appeared below old content AND input is ready + if (info.btnCountIncreased || info.newBtnBelowOld) { + if (info.inputReady) { + log(`Generation completed - new button detected (count: ${prevImgCount || 0}→${info.btnCount})`); + await waitForAllButtonsReady(page, info.btnCount, 15000); + return { + status: 'success', + message: 'Image generation completed', + }; + } + } + + // DONE: response footer shows "complete" AND generated-image elements exist AND input is ready + if (info.isComplete && info.inputReady && (info.btnCount > 0 || info.hasGeneratedImageElement)) { + log(`Generation completed (footer complete, btnCount: ${info.btnCount}, hasGenElement: ${info.hasGeneratedImageElement})`); + await waitForAllButtonsReady(page, info.btnCount, 15000); + return { + status: 'success', + message: 'Image generation completed', + }; + } + + // TEXT_ONLY: input is ready, generation is NOT in progress (stop button gone), + // footer shows "complete", and NO elements exist + // AND no new images appeared + const isStillGenerating = info.isGenerating; + if (info.inputReady && !isStillGenerating && info.isComplete && !info.hasGeneratedImageElement && + !info.btnCountIncreased && !info.newBtnBelowOld && !info.newImageBelowOld) { + const text = await getLatestResponseText(page); + if (text && text.length > 50) { + return { + status: 'text_only', + message: 'Only text was generated, no images. Response: ' + text, + }; + } + } + + await sleep(2000); + } + + return { + status: 'timeout', + message: `Generation did not complete within ${timeout / 1000}s`, + }; + } + + while (Date.now() - startTime < timeout) { + const state = await detectPageState(page); + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + + if (state !== prevState) { + emit({ type: 'progress', step: 'state', prevState, state, elapsed: parseFloat(elapsed), + message: `State: ${prevState} → ${state} (${elapsed}s)` }); + prevState = state; + } + + if (state === pageStatus.DONE) { + emit({ type: 'progress', step: 'complete', message: 'Generation completed successfully' }); + // Wait for all download buttons to stabilize before returning + await waitForAllButtonsReady(page, 0, 15000); + return { + status: 'success', + message: 'Image generation completed', + }; + } + + if (state === pageStatus.TEXT_ONLY) { + const text = await getLatestResponseText(page); + return { + status: 'text_only', + message: 'Only text was generated, no images. Response: ' + text, + }; + } + + if (state === pageStatus.ERROR) { + log('Generation failed with error'); + return { + status: 'error', + message: await getErrorMessage(page), + }; + } + + if (state === pageStatus.PAGE_ERROR) { + log('Page encountered an error'); + return { + status: 'page_error', + message: 'The page encountered an error. It may need to be refreshed.', + }; + } + + await sleep(2000); + } + + log('Generation timed out'); + return { + status: 'timeout', + message: `Generation did not complete within ${timeout / 1000}s`, + }; +} + +/** + * Snapshot the current state of generated images on the page. + * Call this BEFORE starting a new generation to distinguish + * old images from newly generated ones. + * @param {import('puppeteer-core').Page} page + * @returns {Promise>} - set of image srcs that existed before generation + */ +export async function snapshotExistingImages(page) { + return page.evaluate(() => { + const images = document.querySelectorAll('img'); + const srcs = new Set(); + for (const img of images) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + const isReferenceUpload = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + if (isGenerated && !isReferenceUpload && img.src) { + srcs.add(img.src); + } + } + return [...srcs]; + }); +} + +/** + * Wait until the count of download buttons stabilizes — i.e., no new buttons + * appear within the settle window. This ensures all generated images have + * finished rendering their download buttons before we proceed to the download phase. + * @param {import('puppeteer-core').Page} page + * @param {number} initialCount - button count when DONE was first detected + * @param {number} timeout - max time to wait for buttons to stabilize + */ +async function waitForAllButtonsReady(page, initialCount, timeout = 15000) { + const startTime = Date.now(); + let lastCount = initialCount; + let stableSince = Date.now(); + const STABLE_WINDOW = 2000; // 2s with no new buttons = stable + + while (Date.now() - startTime < timeout) { + await sleep(1000); + const count = await page.evaluate(() => { + const buttons = document.querySelectorAll('button.generated-image-button'); + return Array.from(buttons).filter(b => b.offsetParent !== null).length; + }); + + if (count > lastCount) { + lastCount = count; + stableSince = Date.now(); + } else if (Date.now() - stableSince >= STABLE_WINDOW) { + break; // stable for long enough + } + } +} + +/** + * Count existing download buttons and their vertical positions. + * Used to detect when NEW buttons appear below the existing ones. + * @param {import('puppeteer-core').Page} page + * @returns {Promise<{count: number, bottomY: number}>} + */ +export async function snapshotDownloadButtons(page) { + return page.evaluate(() => { + const buttons = document.querySelectorAll('button.generated-image-button'); + const visible = Array.from(buttons).filter(b => b.offsetParent !== null); + if (visible.length === 0) { + return { count: 0, bottomY: 0 }; + } + const bottomY = Math.max(...visible.map(b => b.getBoundingClientRect().bottom)); + return { count: visible.length, bottomY }; + }); +} + +/** + * Extract generated image URLs from the page. + * @param {import('puppeteer-core').Page} page + * @param {Set} [existingSrcs] - srcs of images that existed before this generation + * @returns {Promise>} + */ +export async function extractGeneratedImages(page, existingSrcs) { + return page.evaluate((excludeSrcs) => { + const images = document.querySelectorAll('img'); + const exclude = new Set(excludeSrcs || []); + const results = []; + + for (const img of images) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + + const isReferenceUpload = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + + if (isGenerated && !isReferenceUpload) { + // Skip images that existed before this generation + if (exclude.has(img.src)) continue; + results.push({ + url: img.src, + width: img.naturalWidth, + height: img.naturalHeight, + }); + } + } + + return results; + }, existingSrcs || []); +} + +/** + * Get error message from page. + */ +async function getErrorMessage(page) { + return page.evaluate(() => { + const title = document.title; + + // Content policy blocks - use title as message + if (title.includes('限制') || title.includes('blocked') || + title.includes('policy') || title.includes('Blocked')) { + return title; + } + + return 'Unknown error during generation'; + }); +} + +/** + * Extract the latest AI response text from the page. + * Walks through message containers and returns the last substantial response. + */ +async function getLatestResponseText(page) { + return page.evaluate(() => { + // Try Gemini's response containers first + const responseContainers = document.querySelectorAll( + '[class*="message-content"], [class*="response"], [class*="assistant"], ' + + 'mat-card, c-wiz[jsname]' + ); + + // Get the last one with substantial text + for (let i = responseContainers.length - 1; i >= 0; i--) { + const text = responseContainers[i].innerText?.trim(); + if (text && text.length > 20) { + return text; + } + } + + // Fallback: conversation area text only, excluding sidebar + const el = document.querySelector('.content-container, .main-content, main'); + return (el ? el.innerText : document.body.innerText)?.trim() || ''; + }); +} + +export { pageStatus, detectPageState }; diff --git a/skills/gemini-web-generate/scripts/src/screenshot.js b/skills/gemini-web-generate/scripts/src/screenshot.js new file mode 100644 index 0000000..a319b48 --- /dev/null +++ b/skills/gemini-web-generate/scripts/src/screenshot.js @@ -0,0 +1,156 @@ +import path from 'path'; +import fs from 'fs'; +import { config } from '../config.js'; +import { log, emit } from './browser.js'; + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +/** + * Take a screenshot of the current Gemini page. + * @param {import('puppeteer-core').Page} page + * @param {object} options + * @param {boolean} [options.full] - full page screenshot + * @param {string} [options.outputPath] - custom output path + * @returns {Promise<{path: string}>} + */ +export async function takeScreenshot(page, { full = false, outputPath, quality = 60 } = {}) { + fs.mkdirSync(config.screenshotDir, { recursive: true }); + + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `screenshot-${timestamp}.jpg`; + const filePath = outputPath || path.join(config.screenshotDir, filename); + + await page.screenshot({ + path: filePath, + fullPage: full, + type: 'jpeg', + quality, + }); + + return { path: filePath }; +} + +/** + * Count existing download buttons and return their count. + * @param {import('puppeteer-core').Page} page + * @returns {Promise<{count: number}>} + */ +export async function snapshotDownloadButtons(page) { + const buttons = await page.$$('button.generated-image-button'); + const visible = []; + for (const btn of buttons) { + if (await btn.isVisible()) { + visible.push(btn); + } + } + return { count: visible.length }; +} + +/** + * Download generated images by clicking download buttons. + * Waits for all images to finish loading before clicking, then scrolls + * gently to each button one at a time. + * @param {import('puppeteer-core').Page} page + * @param {import('puppeteer-core').CDPSession} cdp + * @param {object} options + * @param {number} [options.existingButtonCount] - number of download buttons before this generation + * @param {number} [options.timeout] - max wait per download in ms (default 120000) + * @returns {Promise>} + */ +export async function downloadViaButtons(page, cdp, { existingButtonCount = 0, newestOnly = false, timeout = 120000 } = {}) { + fs.mkdirSync(config.downloadDir, { recursive: true }); + + // Get list of files before download + const filesBefore = new Set(fs.readdirSync(config.downloadDir)); + + // Wait for all images on the page to finish loading and decoding. + // This is critical: Gemini generates multiple large images, and interacting + // with the page while they're still decoding can cause tab crashes. + log('Waiting for all images to finish loading...'); + try { + await page.evaluate(async () => { + const images = Array.from(document.images); + const promises = images + .filter(img => img.src && !img.complete) + .map(img => img.decode().catch(() => {})); + await Promise.all(promises); + }); + emit({ type: 'progress', step: 'images_ready', message: 'All images loaded' }); + } catch { + log('Image loading check timed out, proceeding anyway'); + } + + // Find all download buttons — both visible and in the DOM + const allBtns = await page.$$('button.generated-image-button'); + + if (allBtns.length === 0) { + throw new Error( + 'No generated image download button found. ' + + 'Make sure an image has been generated first.' + ); + } + + // Determine which buttons to click + let targetBtns; + if (newestOnly) { + // Only click the very last (newest) button + targetBtns = allBtns.slice(-1); + } else { + // Skip the ones that existed before generation + targetBtns = allBtns.slice(existingButtonCount); + } + + if (targetBtns.length === 0) { + log('No new download buttons detected (existing count:', existingButtonCount + ')'); + return []; + } + + log(`Clicking ${targetBtns.length} download button(s) (total in DOM: ${allBtns.length})`); + emit({ type: 'progress', step: 'download', buttonCount: targetBtns.length, + message: `Clicking ${targetBtns.length} download button(s)` }); + + // Click each button and wait for the download to complete + const results = []; + for (let i = 0; i < targetBtns.length; i++) { + const btn = targetBtns[i]; + log(`Clicking download button ${i + 1}/${targetBtns.length}...`); + + // Gently scroll the button into view before clicking + await btn.evaluate(el => el.scrollIntoView({ behavior: 'auto', block: 'center' })); + await sleep(500); + + await btn.click(); + + // Poll for new file in download directory + const downloadPath = await new Promise((resolve, reject) => { + const timer = setTimeout(() => { + reject(new Error(`Download timed out after ${timeout / 1000}s`)); + }, timeout); + + const poll = setInterval(() => { + const filesAfter = fs.readdirSync(config.downloadDir); + const newFiles = filesAfter.filter(f => !filesBefore.has(f) && !f.endsWith('.crdownload')); + if (newFiles.length > 0) { + clearInterval(poll); + clearTimeout(timer); + // Wait a bit for file to finish writing + setTimeout(() => { + // Update filesBefore so next download is tracked separately + for (const f of newFiles) { + filesBefore.add(f); + } + resolve(path.join(config.downloadDir, newFiles[0])); + }, 1000); + } + }, 500); + }); + + results.push({ path: downloadPath }); + log(`Downloaded: ${downloadPath}`); + + // Small delay between downloads + await sleep(500); + } + + return results; +} diff --git a/skills/gemini-web-generate/scripts/src/status.js b/skills/gemini-web-generate/scripts/src/status.js new file mode 100644 index 0000000..cf724d7 --- /dev/null +++ b/skills/gemini-web-generate/scripts/src/status.js @@ -0,0 +1,391 @@ +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +export const pageStatus = { + IDLE: 'IDLE', + GENERATING: 'GENERATING', + DONE: 'DONE', + TEXT_ONLY: 'TEXT_ONLY', + ERROR: 'ERROR', + PAGE_ERROR: 'PAGE_ERROR', + TIMEOUT: 'TIMEOUT', +}; + +/** + * Detect current page state — the single source of truth. + * @param {import('puppeteer-core').Page} page + * @returns {Promise} + */ +export async function detectPageState(page) { + return page.evaluate(() => { + // Only extract text from the main conversation area, excluding the sidebar. + // Sidebar lives outside .content-container (its parent is chat-app). + const mainContentEl = document.querySelector('.content-container, .main-content, main'); + const mainText = mainContentEl ? (mainContentEl.innerText || '') : document.body.innerText; + const bodyText = mainText; + + // Page-level errors (check full page title for crashes) + if (document.title.includes('Crash') || document.title.includes('Aw, Snap') || + document.querySelector('.crashed') || document.querySelector('.error-page')) { + return 'PAGE_ERROR'; + } + + // Content policy / safety blocks (these show in page title) + if (document.title.includes('限制') || document.title.includes('blocked') || + document.title.includes('policy') || document.title.includes('Blocked')) { + return 'ERROR'; + } + + // Check if the prompt was returned to the input box with no conversation. + // This happens when Gemini rejects/resets the page — text is in the input + // but no user message was recorded in the chat area. + const inputEl = document.querySelector('[contenteditable="true"]') || document.querySelector('textarea'); + const inputText = (inputEl?.innerText || inputEl?.value || '').trim(); + const hasConversation = document.querySelector('[id*="user-query"], [id*="model-response"], [class*="message-content"]'); + if (inputText && !hasConversation) { + // Page reset: prompt is in the input box but no chat was created + return 'ERROR'; + } + + // Try to find the latest AI response in the chat area. + // If we can't isolate it, fall back to full body text (less accurate). + let chatText = ''; + // Gemini's chat responses are in specific containers + const responseContainers = [ + // Try to find the last message-like container + ...Array.from(document.querySelectorAll( + '[class*="message-content"], [class*="response"], [class*="assistant"], ' + + 'mat-card, c-wiz[jsname]' + )), + ]; + // Get the last one that has substantial text + for (let i = responseContainers.length - 1; i >= 0; i--) { + const c = responseContainers[i]; + const t = c.innerText?.trim(); + if (t && t.length > 20) { + chatText = t; + break; + } + } + // If we couldn't find a specific response container, use body text + // but exclude the input area (bottom of page) + if (!chatText) { + chatText = bodyText; + } + + // Generation in progress — check DOM-based indicators first, then fall back to text. + const thinkingAvatar = document.querySelector('bard-avatar .thinking, bard-avatar.thinking'); + const visibleSpinner = document.querySelector('.avatar_spinner_animation[style*="opacity: 1"], .avatar_spinner_animation[style*="visibility: visible"]'); + if (thinkingAvatar || visibleSpinner) { + return 'GENERATING'; + } + + // Check send button state: during generation it becomes a "stop" button + const stopBtn = document.querySelector('.send-button.stop, .stop-icon, button[aria-label="停止回答"]'); + if (stopBtn) { + return 'GENERATING'; + } + + // Check if the LATEST response footer has the "complete" class. + // In multi-turn sessions there may be multiple responses — we need the bottom one. + const responseFooters = Array.from(document.querySelectorAll('.response-footer')); + const latestFooter = responseFooters[responseFooters.length - 1]; + const isComplete = latestFooter && latestFooter.classList.contains('complete'); + const ariaBusyFalse = document.querySelector('[aria-busy="false"]'); + + const generatingIndicators = [ + 'Generating', '生成中', 'Thinking', '思考中', + 'Creating', '创作中', 'Loading', '加载中', + 'Working on it', '正在处理', + ]; + for (const indicator of generatingIndicators) { + if (chatText.includes(indicator)) { + return 'GENERATING'; + } + } + + // If the footer shows "complete" and input is ready → DONE or still rendering + if (isComplete) { + const inputReady = document.querySelector('[contenteditable="true"]') || + document.querySelector('textarea'); + if (inputReady) { + // Check if there are generated images + const images = document.querySelectorAll('img'); + let hasGeneratedImage = false; + let hasGeneratedImageElement = false; + for (const img of images) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + const isReferenceUpload = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + if (isGenerated && !isReferenceUpload) { + hasGeneratedImage = true; + break; + } + } + // Also check for DOM elements within the latest response + // (appear before loads). Scope to latest response to avoid false + // positives from old responses in multi-turn sessions. + if (!hasGeneratedImage) { + const latestResponse = document.querySelector('model-response:last-of-type, .response-container:last-of-type'); + const container = latestResponse || document; + const genEls = container.querySelectorAll('generated-image'); + if (genEls.length > 0) { + hasGeneratedImageElement = true; + } + } + if (hasGeneratedImage) { + return 'DONE'; + } + // element exists but image not loaded yet — keep polling + if (hasGeneratedImageElement) { + return 'GENERATING'; + } + // No images and no element — text-only response + return 'TEXT_ONLY'; + } + } + + // Generation errors — use specific, multi-word patterns to avoid false positives + // from generic UI text like tooltips and footer links. + const errorIndicators = [ + 'Something went wrong', + '出了点问题', + 'Unable to generate', + 'Something unexpected happened', + '发生意外情况', + ]; + for (const indicator of errorIndicators) { + if (chatText.includes(indicator)) { + return 'ERROR'; + } + } + + // Check for generated image + download button + const images = document.querySelectorAll('img'); + let hasGeneratedImage = false; + let hasDownloadButton = false; + + for (const img of images) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + + const isReferenceUpload = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + + if (isGenerated && !isReferenceUpload) { + hasGeneratedImage = true; + } + } + + const downloadBtn = document.querySelector('button.generated-image-button'); + if (downloadBtn && downloadBtn.offsetParent !== null) { + hasDownloadButton = true; + } + + // Only DONE when both generated image AND download button exist + if (hasGeneratedImage && hasDownloadButton) { + const inputReady = document.querySelector('[contenteditable="true"]') || + document.querySelector('textarea'); + if (inputReady) { + return 'DONE'; + } + } + + // Text-only generation: input is ready but no images appeared + // This means Gemini generated only text, which is an image generation failure + if (!hasGeneratedImage) { + const inputReady = document.querySelector('[contenteditable="true"]') || + document.querySelector('textarea'); + if (inputReady) { + // Check if there's substantive text response in the chat area + // that doesn't match generating indicators + let hasTextResponse = false; + if (chatText && chatText.length > 50) { + const generating = generatingIndicators.some(ind => chatText.includes(ind)); + const hasErrors = errorIndicators.some(ind => chatText.includes(ind)); + if (!generating && !hasErrors) { + hasTextResponse = true; + } + } + if (hasTextResponse) { + return 'TEXT_ONLY'; + } + } + } + + if (hasGeneratedImage) { + return 'GENERATING'; + } + + return 'IDLE'; + }); +} + +/** + * Check the current state of the Gemini page. + * @param {import('puppeteer-core').Page} page + * @param {object} options + * @param {boolean} [options.waitForCompletion] - poll until DONE/ERROR + * @param {number} [options.timeout] - max ms to wait (default 300000) + * @param {function} [options.onStateChange] - callback(state, elapsed) called on each state transition + * @returns {Promise<{state: string, message: string, imageCount?: number, hasDownloadButton?: boolean, errorMessage?: string, polled?: boolean}>} + */ +export async function checkStatus(page, { waitForCompletion, timeout, onStateChange } = {}) { + if (waitForCompletion) { + const maxTime = timeout || 300000; + const startTime = Date.now(); + let polls = 0; + let lastState = ''; + + while (Date.now() - startTime < maxTime) { + const result = await getFullStatus(page); + polls++; + + if (result.state !== lastState) { + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1); + process.stderr.write(`[gemini-web-cli] Status: ${lastState || 'unknown'} → ${result.state} (${elapsed}s)\n`); + onStateChange?.(result.state, parseFloat(elapsed)); + lastState = result.state; + } + + if (result.state === 'done' || result.state === 'text_only' || result.state === 'error' || result.state === 'page_error') { + result.polled = true; + result.polls = polls; + result.elapsedMs = Date.now() - startTime; + return result; + } + + await sleep(3000); + } + + return { + state: 'generating', + message: `Still generating after ${maxTime / 1000}s, timed out`, + polled: true, + polls, + elapsedMs: maxTime, + }; + } + + return getFullStatus(page); +} + +/** + * Get a full status report from the page. + */ +async function getFullStatus(page) { + // Scroll to top to trigger lazy-loaded images into the DOM + await page.evaluate(() => { + window.scrollTo({ top: 0, behavior: 'instant' }); + }); + await sleep(1000); + + const [state, details] = await Promise.all([ + detectPageState(page), + getDetails(page), + ]); + + const messages = { + IDLE: 'Page is idle, waiting for input', + GENERATING: 'AI is currently generating', + DONE: 'Image generation completed successfully', + TEXT_ONLY: 'Only text was generated, no images', + ERROR: 'An error occurred during generation', + PAGE_ERROR: 'The page encountered an error', + }; + + const result = { + state: state.toLowerCase(), + message: details.errorMessage || messages[state] || 'Unknown state', + }; + + if (state === 'DONE') { + result.imageCount = details.imageCount; + result.hasDownloadButton = details.hasDownloadButton; + } + + return result; +} + +/** + * Get details from the page: image count, download button, error message. + */ +async function getDetails(page) { + return page.evaluate(() => { + let imageCount = 0; + let hasDownloadButton = false; + + const images = document.querySelectorAll('img'); + for (const img of images) { + if (img.naturalWidth < 300 || img.naturalHeight < 300) continue; + const isGenerated = img.closest('single-image.generated-image') || + img.closest('.generated-image') || + img.closest('generated-image') || + img.closest('[class*="generated-image"]'); + const isReferenceUpload = img.closest('.attachment-preview-wrapper') || + img.closest('uploader-file-preview') || + img.closest('[class*="file-preview"]'); + if (isGenerated && !isReferenceUpload) { + imageCount++; + } + } + + const downloadBtn = document.querySelector('button.generated-image-button'); + if (downloadBtn && downloadBtn.offsetParent !== null) { + hasDownloadButton = true; + } + + // Extract error text from the conversation area only, excluding sidebar + const contentEl = document.querySelector('.content-container, .main-content, main'); + const text = contentEl ? (contentEl.innerText || '') : document.body.innerText; + let errorMessage = ''; + const errorPatterns = [ + /Something went wrong[:\s]*(.*)/i, + /出了点问题[:\s]*(.*)/, + /Unable to generate[:\s]*(.*)/i, + /无法生成[:\s]*(.*)/, + /Network error[:\s]*(.*)/i, + /网络错误[:\s]*(.*)/, + /Server error[:\s]*(.*)/i, + /服务器错误[:\s]*(.*)/, + /Too many requests[:\s]*(.*)/i, + /请求过多[:\s]*(.*)/, + /生成限制[:\s]*(.*)/, + /内容政策[:\s]*(.*)/, + /安全政策[:\s]*(.*)/, + /content policy[:\s]*(.*)/i, + ]; + for (const pattern of errorPatterns) { + const match = text.match(pattern); + if (match && match[1]) { + errorMessage = match[1].trim().substring(0, 200); + break; + } + } + + if (!errorMessage && (document.title.includes('限制') || document.title.includes('blocked') || + document.title.includes('policy') || document.title.includes('Blocked'))) { + errorMessage = document.title; + } + + // If no error text found and the page was reset (input has text, no conversation), + // use the input content as the error context + const inputEl2 = document.querySelector('[contenteditable="true"]') || document.querySelector('textarea'); + const inputText2 = (inputEl2?.innerText || inputEl2?.value || '').trim(); + const hasConversation2 = document.querySelector('[id*="user-query"], [id*="model-response"], [class*="message-content"]'); + if (!errorMessage && inputText2 && !hasConversation2) { + errorMessage = `Page was reset, prompt returned to input: ${inputText2.substring(0, 100)}`; + } + + return { imageCount, hasDownloadButton, errorMessage }; + }); +}