初始化:
1.安裝了node
2.新建一個文件夾
3.在該文件夾中初始化node應用
npm init
安裝依賴:
- 使用express框架
- 使用superagent庫:
superagent 是一個輕量級、漸進式的請求庫,內部依賴 nodejs 原生的請求 api,適用於 nodejs 環境
- 使用cheerio庫:
cheerio 是 nodejs 的抓取頁面模塊,爲服務器特別定製的,快速、靈活、實施的 jQuery 核心實現。適合各種 Web 爬蟲程序。node.js 版的 jQuery。
npm i express cheerio superagent -D
代碼展示:
1.首先引入模塊
const express = require("express")
const app = express()
const fs = require("fs")
const superagent = require("superagent")
const cheerio = require("cheerio")
2.聲明要爬的網站URL
const lagouURL = "https://www.lagou.com/guangzhou-zhaopin/webqianduan/";
const code = "/?filterOption=3&sid=b87c46399fd24f618b97b395f945ab1b";
3.請求數據
superagent.get(url).end((err, res) => {
if (err) {
console.log("獲取失敗")
} else {
let data = getHotData(res,i)
ListData=ListData.concat(data)
}
})
4.分析數據
let getHotData = (res,i) => {
let hotNews = []
let $ = cheerio.load(res.text);
$("#s_position_list ul>.con_list_item").each(function (index) {
// if (index !== 0) {
const $item = $(this).children();
const position=$item.find(".list_item_top .position .p_top a h3").text()
const region=$item.find(".list_item_top .position .p_top a .add").text()
const title=$item.find(".list_item_top .position .p_top a .format-time").text()
const link=$item.find(".list_item_top .position .p_top a").attr("href")
const money=$item.find(".list_item_top .position .p_bot .li_b_l").text()
const education=$item.find(".list_item_top .position .p_bot .li_b_l span").text()
const company_name=$item.find(".list_item_top .company .company_name a").text()
const industry=$item.find(".list_item_top .company .industry").text()
const com_logo=$item.find(".list_item_top .com_logo a img").attr("src")
const com_link=$item.find(".list_item_top .com_logo a").attr("href")
const skill=$item.find(".list_item_bot .li_b_l span").text()
const guarantee=$item.find(".list_item_bot .li_b_r").text()
const id=(i-1)*15+index+1
hotNews.push({ id, position, region, title, link,money,education,company_name,com_link,com_logo,industry,skill,guarantee})
// }
})
return hotNews
}
5.保存數據
fs.writeFileSync(`${__dirname}/data.json`, JSON.stringify(ListData), 'utf-8', (err) => {
if (err) {
console.log(err)
}
})
6.完整代碼
const express = require("express")
const app = express()
const fs = require("fs")
const superagent = require("superagent")
const cheerio = require("cheerio")
const lagouURL = "https://www.lagou.com/guangzhou-zhaopin/webqianduan/";
const code = "/?filterOption=3&sid=b87c46399fd24f618b97b395f945ab1b";
app.get("/", (req, key) => {
let i = 0
let ListData=[]
let timer = setInterval(() => {
i++
let url = lagouURL + i + code
if (i >=30) {
// key.send(ListData)
if (ListData.length > 0) {
fs.writeFileSync(`${__dirname}/data.json`, JSON.stringify(ListData), 'utf-8', (err) => {
if (err) {
console.log(err)
}
})
}
clearInterval(timer)
} else {
console.log(url,i)
superagent.get(url).end((err, res) => {
if (err) {
console.log("獲取失敗")
} else {
let data = getHotData(res,i)
ListData=ListData.concat(data)
}
})
}
}, 1000)
})
let getHotData = (res,i) => {
let hotNews = []
let $ = cheerio.load(res.text);
$("#s_position_list ul>.con_list_item").each(function (index) {
// if (index !== 0) {
const $item = $(this).children();
const position=$item.find(".list_item_top .position .p_top a h3").text()
const region=$item.find(".list_item_top .position .p_top a .add").text()
const title=$item.find(".list_item_top .position .p_top a .format-time").text()
const link=$item.find(".list_item_top .position .p_top a").attr("href")
const money=$item.find(".list_item_top .position .p_bot .li_b_l").text()
const education=$item.find(".list_item_top .position .p_bot .li_b_l span").text()
const company_name=$item.find(".list_item_top .company .company_name a").text()
const industry=$item.find(".list_item_top .company .industry").text()
const com_logo=$item.find(".list_item_top .com_logo a img").attr("src")
const com_link=$item.find(".list_item_top .com_logo a").attr("href")
const skill=$item.find(".list_item_bot .li_b_l span").text()
const guarantee=$item.find(".list_item_bot .li_b_r").text()
const id=(i-1)*15+index+1
hotNews.push({ id, position, region, title, link,money,education,company_name,com_link,com_logo,industry,skill,guarantee})
// }
})
return hotNews
}
app.listen(3000, () => console.log("啓動成功"))