您好,登錄后才能下訂單哦!
有這么一個需求,首先從cvs文件中讀取要解析的url數(shù)據(jù),然后使用puppeteer和puppeteer-har來獲取瀏覽器的HAR數(shù)據(jù)。在調(diào)試的過程中,發(fā)現(xiàn)在for循環(huán)中怎么操作都是異步的,最后找到了一個解決方案,也算在此記錄。
const fs = require("fs");
const path = require("path");
const csv =require('csv');
const parse = require('csv-parse/lib/sync')
const iconv = require('iconv-lite');
/*
npm install iconv-lite
*/
function readUrlRecord(csvpath){
console.log('開始解析文件:' + csvpath) ;
//讀取文件
const input = fs.readFileSync(csvpath,'utf8') ;
/*
解析文件,生成JSON格式
{ ' ': '142',
AREA_NAME: '湖北',
SITE_LINK: 'www.banggo.com',
BEARING_MODE: '移動接入',
SITE_NAME: '邦購',
MENU_TYPE: '二級' }
*/
const records = parse(input, {
columns: true,
skip_empty_lines: true,
delimiter: ',',
}) ;
return records ;
}
//readUrlRecord('../top300.csv') ;
exports.readUrlRecord = readUrlRecord;
const fs = require('fs');
const { promisify } = require('util');
const path = require("path");
const puppeteer = require('puppeteer');
const { harFromMessages } = require('chrome-har');
const logger=require("./log");
const log = logger.getPuppeteerHarEventRecordLogger() ;
//https://michaljanaszek.com/blog/generate-har-with-puppeteer
//https://www.npmjs.com/package/chrome-har
// 設(shè)置要監(jiān)控的事件
const observe = [
'Page.loadEventFired',
'Page.domContentEventFired',
'Page.frameStartedLoading',
'Page.frameAttached',
'Network.requestWillBeSent',
'Network.requestServedFromCache',
'Network.dataReceived',
'Network.responseReceived',
'Network.resourceChangedPriority',
'Network.loadingFinished',
'Network.loadingFailed',
];
/*
啟動瀏覽器
*/
async function launchBrowser(){
//啟動瀏覽器實例 [puppeteer.createBrowserFetcher([options])]
let browser = await puppeteer.launch({
// 若是手動下載的chromium需要指定chromium地址, 默認引用地址為 /項目目錄/node_modules/puppeteer/.local-chromium/
//executablePath: '/Users/huqiyang/Documents/project/z/chromium/Chromium.app/Contents/MacOS/Chromium',
//如果是訪問https頁面 此屬性會忽略https錯誤
ignoreHTTPSErrors: true,
// 關(guān)閉headless模式, 不會打開瀏覽器
headless: true,
//瀏覽器啟動參數(shù) https://peter.sh/experiments/chromium-command-line-switches/ --timeout
args:["--disk-cache-size=0","--disable-cache",'--disable-infobars','--window-size=800,600','--ignore-certificate-errors','--enable-feaures'],
//是否為每個選項卡自動打開DevTools面板。 如果此選項為true,則headless選項將設(shè)置為false。
devtools: false,
//Defaults to 30000 (30 seconds). Pass 0 to disable timeout.
timeout: 0
//放慢puppeteer執(zhí)行的動作,方便調(diào)試
//slowMo: 250
});
return browser ;
}
async function saveHarlog(url,dirPath,filename){
let homesite = url ;
//保存的文件路徑
let harFilePath = path.join(dirPath,filename) ;
//處理URL
if(!(url.startsWith('http://') || url.startsWith('https://'))){
url = "http://" + url ;
}
//打開瀏覽器
let browser = await launchBrowser() ;
//創(chuàng)建一個新頁面
//let page = await browser.newPage();
let page = (await browser.pages())[0];
// 注冊事件監(jiān)聽器
const client = await page.target().createCDPSession();
await client.send('Page.enable');
await client.send('Network.enable');
//用于保存用于轉(zhuǎn)為為HAR數(shù)據(jù)的事件
const events = [];
observe.forEach(method => {
client.on(method, params => {
events.push({ method, params });
});
});
try{
// 執(zhí)行跳轉(zhuǎn),訪問制定的資源
await page.goto(url,{
timeout:0
});
}catch(error){
log.info('resovle error :' + url + "; error message:" + error) ;
}finally{
if(browser){
await browser.close();
}
}
const har = harFromMessages(events);
//resovleHar(har) ;
//log.info(JSON.stringify(har));
await promisify(fs.writeFile)(harFilePath, JSON.stringify(har));
}
exports.launchBrowser = launchBrowser;
exports.saveHarlog = saveHarlog;
const fs = require("fs");
const path = require("path");
const moment = require("moment");
const schedule = require('node-schedule');
const cvsresovler=require("./module/cvsresovle");
const mhar=require("./module/puppeteerhar-event");
/*
cnpm install --save moment
cnpm install --save csv
cnpm install --save node-schedule
cnpm install --save puppeteer
cnpm install --save puppeteer-har
cnpm install --save iconv-lite
cnpm install --save chrome-har
cnpm install --save grpc
*/
function init(){
console.log('初始化調(diào)度器') ;
//每分鐘的第30秒定時執(zhí)行一次:
schedule.scheduleJob('0 55 8 * * *',()=>{
let ftime = moment().format('YYYYMMDDHHmm');
console.log('當(dāng)前調(diào)度時間為:' + ftime) ;
let dirPath = path.join(__dirname,'harlogs',ftime) ;
console.log("創(chuàng)建目錄:" + dirPath) ;
let isExist = false ;
if(fs.existsSync(dirPath)){
//創(chuàng)建文件夾
let stat = fs.lstatSync(dirPath);
if(stat.isDirectory()){
isExist = true ;
}
}
if(!isExist){
//創(chuàng)建文件夾
console.log("創(chuàng)建文件夾" + ftime) ;
fs.mkdirSync(dirPath);
}
//開始解析需要處理的URL
let dataArr = cvsresovler.readUrlRecord(path.join(__dirname,'top300.csv')) ;
console.log("解析出URL共計" + dataArr.length + "條") ;
/*
開始抓取HAR數(shù)據(jù)【同步的方式執(zhí)行】。
注意:如果這里直接通過for循環(huán)遍歷dataArr并調(diào)用saveHarlog方法,那么這將是一個異步的過程。
*/
(async function iterator(i){
let data = dataArr[i]
let url = data['SITE_LINK'] ;
url = url.trim() ;
let filename = url.replace(/\//g,'-').replace(/\\/g,'-') + '.har' ;
if(url){
console.log((i+1) + "-starting to resovle url :" + url ) ;
try{
await mhar.saveHarlog(url,dirPath,"N" + "-" + filename) ;
}catch(error){
console.log(error) ;
}
}
if(i + 1 < dataArr.length){
iterator(i+1) ;
}
})(0) ;
});
console.log('應(yīng)用程序啟動完成') ;
}
//執(zhí)行
init();
/**
用于測試的方法
*/
async function test(){
let ftime = moment().format('YYYYMMDDHHmm');
console.log('當(dāng)前執(zhí)行時間為:' + ftime) ;
let dirPath = path.join(__dirname,'harlogs',ftime) ;
console.log("創(chuàng)建目錄:" + dirPath) ;
let isExist = false ;
if(fs.existsSync(dirPath)){
//創(chuàng)建文件夾
let stat = fs.lstatSync(dirPath);
if(stat.isDirectory()){
isExist = true ;
}
}
if(!isExist){
//創(chuàng)建文件夾
console.log("創(chuàng)建文件夾" + ftime) ;
fs.mkdirSync(dirPath);
}
//測試的URL
let url = "www.baidu.com" ;
let arguments = process.argv.splice(2);
if(arguments.length > 0 ){
url = arguments[0] ;
}
url = url.trim() ;
let filename = url.replace(/\//g,'-').replace(/\\/g,'-') ;
if(url){
if(!(url.startsWith('http://') || url.startsWith('https://'))){
url = "http://" + url ;
}
console.log("starting to resovle test url :" + url ) ;
try{
await mhar.saveHarlog(url,dirPath,"NT" + "-" + filename) ;
}catch(error){
console.log(error) ;
}
}
}
//運行測試
//test() ;
免責(zé)聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。