nodejs 无头浏览器 puppeteer 的使用,防检测
const puppeteer = require('puppeteer');
//防检测
async function setBrowserPage(page){
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => false,
});
});
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36");
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'plugins', {
get: () => [
{
0: {type: "application/x-google-chrome-pdf", suffixes: "pdf", description: "Portable Document Format", enabledPlugin: Plugin},
description: "Portable Document Format",
filename: "internal-pdf-viewer",
length: 1,
name: "Chrome PDF Plugin"
},
{
0: {type: "application/pdf", suffixes: "pdf", description: "", enabledPlugin: Plugin},
description: "",
filename: "mhjfbmdgcfjbbpaeojofohoefgiehjai",
length: 1,
name: "Chrome PDF Viewer"
},
{
0: {type: "application/x-nacl", suffixes: "", description: "Native Client Executable", enabledPlugin: Plugin},
1: {type: "application/x-pnacl", suffixes: "", description: "Portable Native Client Executable", enabledPlugin: Plugin},
description: "",
filename: "internal-nacl-plugin",
length: 2,
name: "Native Client"
}
],
});
});
await page.evaluateOnNewDocument(() => {
window.navigator.chrome = {
runtime: {},
loadTimes: function() {},
csi: function() {},
app: {}
};
});
// 打开拦截请求
//await page.setRequestInterception(true);
// 请求拦截器
// 这里的作用是在所有js执行前都插入我们的js代码抹掉puppeteer的特征
}
(async () => {
//启动浏览器
const browser = await puppeteer.launch({
headless: true,
args: ['--disable-gpu', '--disable-setuid-sandbox', '--no-sandbox', '--no-zygote'],
ignoreDefaultArgs: ["--enable-automation"],
devtools: true, // 自动打开devtool
defaultViewport: {
width: 1500,
height: 800
}
});
//关闭浏览器
const page = await browser.newPage();
page.setDefaultNavigationTimeout(600000); // 设置导航超时时间为 60 秒
await setBrowserPage(page);
await page.goto(url);
await page.evaluate(() => {
//这里写前端执行的js代码
// 删除 navigator.webdriver 属性
delete navigator.webdriver;
// 也可以添加其他防检测逻辑
// ...
(async function () {
function loadScript(url, callback) {
let script = document.createElement('script');
script.src = url;
script.onload = function () {
if (callback) {
callback();
}
};
document.head.appendChild(script);
}
let t = new Date().getTime();
loadScript('http://127.0.0.1:8789/static/js/xxx.js?t=' + t, function () {
// 在脚本加载完成后执行的回调函数
console.log('脚本已加载');
});
})();
});
//等到某个元素出现
await page.waitForSelector('#id');
//获取内容
await page.content();
//截图保存
await page.screenshot({path: 'test.png'});
await page.screenshot();//流
await page.close();//关页面
await browser.close();//关浏览器
})();
