diff --git a/fetchWithPuppeteer.js b/fetchWithPuppeteer.js new file mode 100644 index 0000000000000000000000000000000000000000..566cd7152503a9f5b6f7735d58bdc9c0ca02a9f8 --- /dev/null +++ b/fetchWithPuppeteer.js @@ -0,0 +1,457 @@ +//fetchWithPuppeteer.js +const puppeteer = require('puppeteer-extra'); +const StealthPlugin = require('puppeteer-extra-plugin-stealth'); +const { execSync } = require('child_process'); + +puppeteer.use(StealthPlugin()); + +const fs = require('fs'); +const path = require('path'); + +const readline = require('readline'); + +// ============= 共享状态 ============= +let pendingVerify = false; +let pageGlobal = null; +let qrCodeContent = ''; +let userInfo = "{}"; + +// 新增:更清晰的扫码状态 +let scanStatus = ''; // new | scanned | verify +let scanUser = {}; // 当前扫码用户信息(头像、昵称等) +let lastVerifyError = ''; // 新增:最后一次验证码错误描述 + +let browserInstance = null; + + +async function stopBot() { + if (browserInstance) { + try { + console.log(`🛑 正在关闭 Puppeteer...`); + await browserInstance.close(); + console.log(`✅ Puppeteer 已关闭`); + } catch (err) { + console.log(`❌ 关闭 Puppeteer 失败:`, err); + } + browserInstance = null; // 一定要重置 + } else { + console.log(`⚠️ 没有运行中的 Puppeteer`); + } + + browserInstance = null; + pendingVerify = false; + pageGlobal = null; + qrCodeContent = ''; + scanStatus = ''; + scanUser = {}; + lastVerifyError = ''; + userInfo = "{}"; + console.log(`♻️ 状态已清理,允许重新启动`); + +} + +async function submitVerifyCode(code) { + console.log(`✅ 收到验证码:${code}`); + await pageGlobal.keyboard.type(code, { delay: 500 }); + const [confirmButton] = await pageGlobal.$x(`//div[text()="验证"]`); + if (confirmButton) await confirmButton.click(); + pendingVerify = false; + + await pageGlobal.screenshot({ path: `img/after_input${code}_${getTimestamp()}.png` }); + console.log('✅ 收到验证码,已输入验证码,已截图'); +} + + +/** + * 加载账号专用 Cookie + * @param {import('puppeteer').Page} page + * @param {string} accountName + */ +async function loadCookies(page, accountName) { + const cookiePath = path.join('cookies', `cookies_${accountName}.json`); + if (fs.existsSync(cookiePath)) { + const cookies = JSON.parse(fs.readFileSync(cookiePath)); + await page.setCookie(...cookies); + console.log(`🍪 已加载账号【${accountName}】的 Cookie`); + } else { + console.log(`⚠️ 未找到账号【${accountName}】的 Cookie 文件,准备扫码登录`); + } +} + +/** + * 保存账号 Cookie + * @param {import('puppeteer').Page} page + * @param {string} accountName + */ +async function saveCookies(page, accountName) { + const cookies = await page.cookies(); + const dir = 'cookies'; + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir); + } + const cookiePath = path.join(dir, `cookies_${accountName}.json`); + fs.writeFileSync(cookiePath, JSON.stringify(cookies, null, 2)); + console.log(`💾 已保存账号【${accountName}】的 Cookie: ${cookiePath}`); +} + + +// 用于格式化时间戳 +function getTimestamp() { + const now = new Date(); + const YYYY = now.getFullYear(); + const MM = String(now.getMonth() + 1).padStart(2, '0'); + const DD = String(now.getDate()).padStart(2, '0'); + const hh = String(now.getHours()).padStart(2, '0'); + const mm = String(now.getMinutes()).padStart(2, '0'); + const ss = String(now.getSeconds()).padStart(2, '0'); + return `${YYYY}${MM}${DD}_${hh}${mm}${ss}`; +} + +function detectChromePath() { + const candidates = ['google-chrome', 'chromium-browser', 'chromium']; + for (const cmd of candidates) { + try { + const path = execSync(`which ${cmd}`).toString().trim(); + if (path) { + console.log(`✅ 检测到 Chrome 路径: ${path}`); + return path; + } + } catch (e) { + continue; + } + } + throw new Error("❌ 未检测到可用的 Chrome/Chromium,请先安装!"); +} +/** + * 等待出现 + 自动轮询点击指定验证选项 + * @param {import('puppeteer').Page} page Puppeteer 页面实例 + * @param {string} optionText 按钮文字,如 '接收短信验证码' + * @param {number} timeout 最大等待时间(毫秒),默认 30 秒 + * @param {number} interval 检测间隔(毫秒),默认 1000ms + */ +async function clickVerifyOptionAutoRetry(page, optionText, timeout = 30000, interval = 1000) { + const xpath = `//div[contains(text(), "${optionText}")]`; + const start = Date.now(); + console.log(`🟢 开始等待【${optionText}】按钮出现...`); + + while (Date.now() - start < timeout) { + const [button] = await page.$x(xpath); + if (button) { + await button.click(); + console.log(`✅ 已点击【${optionText}】按钮`); + const filename = `img/click_${optionText}_${getTimestamp()}.png`; + await page.screenshot({ path: filename }); + console.log(`📸 点击后已截图保存为 ${filename}`); + + //输入验证码并点击验证 + await checkTimerAndVerifyAutoRetry(page, '888888', 60000, 1000); + + return; // 找到后立即结束 + } else { + console.log(`⏳ 【${optionText}】按钮暂未出现,继续等待...`); + } + await page.waitForTimeout(interval); + } + + console.error(`❌ 超时未找到【${optionText}】按钮`); +} + +/** + * 自动轮询:倒计时秒数 <= 30 时,输入验证码并点击验证,每一步都截图保存 + * @param {import('puppeteer').Page} page Puppeteer Page 实例 + * @param {string} verifyCode 要输入的验证码 + * @param {number} timeout 最大等待时长(毫秒) + * @param {number} interval 轮询间隔(毫秒) + */ +async function checkTimerAndVerifyAutoRetry(page, verifyCode = '888888', timeout = 60000, interval = 3000) { + const xpathForTimer = `//span[contains(text(), "后重新发送")]`; + const start = Date.now(); + + console.log(`🟢 [开始轮询] 检测倒计时...`); + + let isKeyinput = false; + let isClickbtt = false; + pageGlobal = page; // 保存给后端接口用 + while (Date.now() - start < timeout) { + const [timerElement] = await page.$x(xpathForTimer); + + if (timerElement) { + const timerText = await page.evaluate(el => el.textContent, timerElement); + console.log(`⏳ [倒计时文本] ${timerText}`); + + // 每次检测都截图 + const checkFilename = `img/check_timer_${getTimestamp()}.png`; + await page.screenshot({ path: checkFilename }); + console.log(`📸 [已截图] 当前检测保存为 ${checkFilename}`); + + const match = timerText.match(/(\d+)s/); + if (match) { + const remaining = parseInt(match[1], 10); + console.log(`⏳ [剩余秒数] ${remaining}s`); + + if (remaining <= 50 && !isKeyinput) { + isKeyinput = true; + + // ✅ 改这里:不要再 readline,而是挂状态 + pendingVerify = true; + + console.log('⚡ 已挂 pendingVerify = true,前端页面可以轮询到'); + + // console.log('✅ [满足条件] 剩余秒数 <= 50,开始输入验证码'); + // 模拟真实输入 + // await page.keyboard.type('888888', { delay: 100 }); + // console.log('✅ 已通过 Puppeteer 原生键盘输入验证码:888888'); + + // const verifyCode = await waitForUserInput('👉 请在控制台输入收到的验证码:'); + // console.log(`✅ 你输入的验证码是:${verifyCode}`); + // // 可以定位到 input 元素(示例) + // await page.keyboard.type(verifyCode, { delay: 100 }); + + // 可选:截图验证 + await page.screenshot({ path: `img/after_原生键盘输入_${getTimestamp()}.png` }); + await page.screenshot({ path: `img/after_input_${getTimestamp()}.png` }); + console.log('✅ 已输入验证码,已截图'); + + }else if (remaining <= 45 && !isClickbtt) { + // isClickbtt = true; + // 点击验证按钮 + // const xpathForConfirm = `//div[text()="验证"]`; + // const [confirmButton] = await page.$x(xpathForConfirm); + // if (confirmButton) { + // const beforeClickFilename = `img/before_click_confirm_${getTimestamp()}.png`; + // await page.screenshot({ path: beforeClickFilename }); + // console.log(`📸 [已截图] 点击前保存为 ${beforeClickFilename}`); + + // await confirmButton.click(); + // console.log('✅ [已点击] 【验证】按钮'); + + // const afterClickFilename = `img/after_click_confirm_${getTimestamp()}.png`; + // await page.screenshot({ path: afterClickFilename }); + // console.log(`📸 [已截图] 点击后保存为 ${afterClickFilename}`); + // }else { + // console.log('❌ [未找到] 【验证】按钮'); + // } + }else { + console.log(`⏳ [继续等待] 剩余秒数 ${remaining} > 30`); + } + } else { + console.log('⚠️ [未匹配] 秒数格式,继续等待...'); + } + } else { + console.log('⏳ [未找到] 倒计时元素,继续等待...'); + } + + const beforeClickFilename = `img/before_[wait]_${getTimestamp()}.png`; + await page.screenshot({ path: beforeClickFilename }); + + await page.waitForTimeout(interval);//等待 + + if(isClickbtt && isKeyinput){ + + const beforeClickFilename = `img/before_[wait]_${getTimestamp()}.png`; + await page.screenshot({ path: beforeClickFilename }); + + console.log(`[完成了] 倒计时元素[ isClickbtt:${getTimestamp()} isKeyinput:${getTimestamp()} ]`); + // return; + } + }//where + + const beforeClickFilename = `img/before_[超时]_${getTimestamp()}.png`; + await page.screenshot({ path: beforeClickFilename }); + + console.log(`❌ [超时] (${timeout / 1000}s) 未检测到倒计时 <= 30,未执行验证`); +} + +async function fetchQrFromPage(entryUrl, accountName = 'userTest') { + + if (browserInstance) { + console.log(`♻️ 已有 Puppeteer,先关闭`); + await stopBot(); // 👈 这里是自己安全关闭 + } + const chromePath = detectChromePath(); + const browser = await puppeteer.launch({ + headless: 'new', + executablePath: chromePath, + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-blink-features=AutomationControlled', + ], + defaultViewport: { + width: 1280, + height: 800 + } + }); + + + browserInstance = browser; + + const page = await browser.newPage(); + + await page.setUserAgent( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' + ); + await page.setExtraHTTPHeaders({ + 'Accept-Language': 'zh-CN,zh;q=0.9' + }); + + // 先尝试加载 + await loadCookies(page, accountName); + + let hookResult = null; + // 监听响应 + page.on('response', async (response) => { + const url = response.url(); + + // console.log('📡 url:', url); + if (url.includes("get_qrcode")) { + + console.log('📡 url:', url); + + try { + // const data = await response.json(); + const text = await response.text(); + console.log('📡 原始响应:', text); + hookResult = text; + + console.log("🎯 截获二维码接口返回:"); + const json = JSON.parse(text); + const base64 = json?.data?.qrcode; + if (base64) { + qrCodeContent = base64; // 保存给前端页面 + console.log('✅ 已保存二维码 base64:', base64.substring(0, 30) + '...'); + } + + } catch (err) { + console.error('❌ 解析响应失败:', err.message); + } + + const filename = `img/scree_${getTimestamp()}.png`; + await page.screenshot({ path: filename }); + console.log(`📸 错误截图已保存为 ${filename}`); + + } + // 监听二维码状态查询接口 + if (url.includes('check_qrconnect') || url.includes("check_qrcode")) { + const text = await response.text(); + console.log('📡 原始响应:', text); + const json = JSON.parse(text); + + if (json?.data?.status === 'new') { + scanStatus = 'new'; + pendingVerify = false; + } + + if (json?.data?.status === 'scanned') { + scanStatus = 'scanned'; + scanUser = json.data.scan_user_info || {}; + pendingVerify = false; + } + + if (json?.data?.account_flow === 'verify') { + scanStatus = 'verify'; + pendingVerify = true; + await clickVerifyOptionAutoRetry(page, '接收短信验证码'); + } + + // 已过期可选:刷新二维码 + if (json?.data?.status === 'expired' && json?.data?.qrcode) { + qrCodeContent = json.data.qrcode; + console.log('♻️ 二维码已过期,已更新 base64'); + await page.reload({ waitUntil: 'networkidle2' }); + } + } + if (url.includes('validate_code')) { + const text = await response.text(); + console.log('📡 验证码校验响应:', text); + try { + const json = JSON.parse(text); + if (json?.message === 'error') { + lastVerifyError = json?.data?.description || '验证码验证失败'; + pendingVerify = true; // 继续挂验证码状态 + console.log(`❌ [验证码错误] ${lastVerifyError}`); + } else { + lastVerifyError = ''; + } + } catch (e) { + console.error('❌ validate_code 解析失败', e); + } + } + if(url.includes("support/user/info")){ + + console.log('📡 url:', url); + + const text = await response.text(); + console.log('📡 原始响应:', text); + + console.log(`✅ [扫码成功] 准备保存账号【${accountName}】的 Cookie`); + await saveCookies(page, accountName); + + const filename = `img/scree_${getTimestamp()}.png`; + await page.screenshot({ path: filename }); + console.log(`📸 扫码成功 已保存为 ${filename}`); + + // const text = await response.text(); + const json = JSON.parse(text); + userInfo = json; // ✅ 保存 + console.log(`✅ 已保存用户信息: ${json.basic?.nickname || ''}`); + + + } + }); + +// 未自动登录,挂监听:检测跳转成功就保存 +page.on('framenavigated', async (frame) => { + console.log('当前 Frame URL:', frame.url()); + if (frame.url().includes('/douyin-mp/home')) { + // console.log(`✅ [扫码成功] 准备保存账号【${accountName}】的 Cookie`); + // await saveCookies(page, accountName); + } +}); + + try { + console.log(`🟡 正在打开入口页面:${entryUrl}`); + const res = await page.goto(entryUrl, { + waitUntil: 'networkidle2', + timeout: 15000, + }); + + if (!res || !res.ok()) { + throw new Error(`页面加载失败: ${res?.status()}`); + } + + // await page.waitForTimeout(8000); // 页面可能异步加载二维码请求 + // await page.screenshot({ path: 'success.png' }); + // console.log("📸 页面截图已保存为 success.png"); + + } catch (err) { + console.error("❌ 页面加载出错:", err.message); + await page.screenshot({ path: 'error_screenshot.png' }); + console.log("📸 错误截图已保存为 error_screenshot.png"); + } + + //await browser.close(); + return hookResult; +} + +// module.exports = fetchQrFromPage; + +// module.exports = { +// fetchQrFromPage, +// pendingVerify: () => pendingVerify, +// submitVerifyCode, +// qrCodeContent: () => qrCodeContent, +// userInfo: () => userInfo +// }; +module.exports = { + stopBot, // ✅ 暴露 + fetchQrFromPage, + pendingVerify: () => pendingVerify, + submitVerifyCode, + qrCodeContent: () => qrCodeContent, + userInfo: () => userInfo, + scanStatus: () => scanStatus, + scanUser: () => scanUser, + verifyError: () => lastVerifyError // ✅ 新增 +}; + diff --git a/index.js b/index.js deleted file mode 100644 index 2d7e6834fb6366b3120c7a37cc5f637bc4a33928..0000000000000000000000000000000000000000 --- a/index.js +++ /dev/null @@ -1 +0,0 @@ -console.log("欢迎来到 InsCode"); \ No newline at end of file diff --git a/package.json b/package.json index 72caa1750a1c44c18460a496d258fbd3c51c673a..4cec3e34813f38ca124c40d08d980212cd25d0b3 100644 --- a/package.json +++ b/package.json @@ -1,18 +1,15 @@ { - "name": "nodejs", - "version": "1.0.0", - "description": "", - "main": "index.js", - "scripts": { - "dev": "node index.js", - "test": "echo \"Error: no test specified\" && exit 1" - }, - "keywords": [], - "author": "", - "license": "ISC", - "dependencies": { - "@types/node": "^18.0.6", - "node-fetch": "^3.2.6" - } + "name": "debot-fetch", + "version": "1.0.0", + "description": "Headless Puppeteer scraper for debot.ai", + "main": "run.js", + "scripts": { + "start": "node run.js" + }, + "dependencies": { + "express": "^5.1.0", + "puppeteer": "^21.3.8", + "puppeteer-extra": "^3.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.1" } - \ No newline at end of file +} diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000000000000000000000000000000000000..7b56cf5e34d04d1b8bb9ba0d673aaa8cbae284de --- /dev/null +++ b/public/index.html @@ -0,0 +1,158 @@ + + +
+ ++
昵称:
+