chrome headless爬取http訪問記錄,代碼如下
const Puppeteer = require("puppeteer");
(async () => {
const browser = await Puppeteer.launch({
headless: true
}).catch(() => browser.close);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', request => {
console.log(request.url());
request.continue();
});
page.on('response', response => {
console.log(response.url());
});
page.on('requestfailed', request => {
console.log(request.url());
});
page.on('requestfinished', request => {
console.log(request.url());
});
await page.goto('https://www.baidu.com').catch(() => browser.close);
//await page.waitFor(500);
await browser.close();
})()
以上是一個簡單的例子;
基於以上思路,實現靜態資源、api爬取,然後做URL相似度分析存儲,供漏洞掃描使用,迭代中,敬請期待。