// 連接使用數據庫
import {connection} from './../../common/dbConnect.js'
import {senderMsg} from './../../common/mailer.js'
// 引入爬蟲需要的網絡請求模塊
const https = require('https')
const fs = require('fs')
// 需要被爬的網站網址
const url = 'https://douban.fm/j/v2/songlist/explore?type=hot&genre=0&limit=20&sample_cnt=5' // 這是獲取數據的接口url
const urlDir = 'https://douban.fm/explore/songlists' // 這是瀏覽器訪問頁面的url
let errorInfoGroup = []
let successInfoGroup = []
let testInterval
const logPath = '../../../../log.js'
let receiverMsg = {
from: '。。。@qq.com',
to: '。。。@163.com',
subject: '測試發郵件',
text: '你能不能收到我發的郵件呢',
html: '',
attachments: []
}
// 從需要被爬的網站裏獲取數據
let getWebData = function () {
console.log('程序開始執行......')
let httpPromise = new Promise((resolve, reject) => {
https.get(url, (res) => {
let chunks = []
let size = 0
res.on('data', (trunk) => {
chunks.push(trunk)
size += trunk.length
})
res.on('end', () => {
let data = Buffer.concat(chunks, size)
let html = JSON.parse(data.toString()).reverse()
resolve(html)
})
})
})
return httpPromise
}
// 依據獲取到的數據結構(字段)創建數據庫表結構
let getTableSkelon = function (html) {
let tablePromise = new Promise((resolve, reject) => {
let createString = 'CREATE TABLE IF NOT EXISTS doubanAlbum ('
let createParams = ''
for(let j in html[0]) {
if(j !== 'id') {
createParams = createParams + j + ' VARCHAR (255) DEFAULT NULL, '
} else {
createParams = createParams + 'id INT (10) NOT NULL, '
}
}
createString = createString + createParams + 'PRIMARY KEY (id))'
resolve([createString, html]) // resolve傳值只能拿到第一個參數,所以用數組的方式傳值
})
return tablePromise
}
// 創建數據表
let createTable = function (createString) {
let dbPromise = new Promise ((resolve, reject) => {
connection.query(createString[0], (err, data) => {
if(err) {
throw err
} else {
resolve(createString[1])
}
})
})
return dbPromise
}
// 插入數據前整理數據
let fillTable = function (newInfo) {
let fillPromise = new Promise ((resolve, reject) => {
let tempSongs = []
for (let i in newInfo) {
if(newInfo[i] !== null) {
if(Array.prototype.isPrototypeOf(newInfo[i]) == true) {
if(i == 'sample_songs') {
newInfo['sample_songs'].forEach((item, index, array) => {
tempSongs.push(item.sid)
})
newInfo[i] = tempSongs.join(',')
} else {
newInfo[i] = JSON.stringify(newInfo[i])
}
} else if(typeof(newInfo[i]) == 'object') {
if(newInfo[i].hasOwnProperty('id')) {
newInfo[i] = newInfo[i].id
} else {
newInfo[i] = null
}
} else {
newInfo[i] = newInfo[i]
}
}
}
})
fillbd(newInfo)
return fillPromise
}
// 將整理好的數據插入到數據庫中
let fillbd = function (newInfo) {
connection.query('INSERT INTO doubanAlbum SET ?', newInfo, (err, result) => {
if(err) {
errorInfoGroup.push(newInfo)
} else {
successInfoGroup.push(newInfo)
}
})
}
// 發送郵件
let postMail = function (text) {
console.log('郵件發送中')
receiverMsg.text = text
senderMsg.sendMail(receiverMsg,(error,info)=>{
if(error) {
console.log(error)
} else {
console.log(`Message: ${info.messageId}`)
console.log(`sent: ${info.response}`)
}
})
}
// 將錯誤信息寫入log
let writeLog = function (string) {
fs.open(logPath, 'a', (err, fd) => {
if(err) {
throw err
} else {
fs.write(fd, string, (err, written, string) => {
if(err) {
throw err
} else {
console.log('已成功寫入log')
fs.close(fd)
}
})
}
})
}
// 清空log文件
let clearLog = function () {
fs.writeFile(logPath, '', 'utf8', (err) => {
if (err){
console.log(err)
} else {
console.log('清空成功')
}
})
}
// 插入操作結束後關閉數據庫連接或者收集錯誤信息
let endConnection = function (html) {
let shutPromise = new Promise((resolve, reject) => {
if(successInfoGroup.length + errorInfoGroup.length == html.length) {
connection.end()
clearInterval(testInterval)
} else {
console.log('錯誤數據')
console.log(errorInfoGroup)
clearLog()
writeLog(JSON.stringify(errorInfoGroup))
postMail(JSON.stringify(errorInfoGroup))
}
})
return shutPromise
}
getWebData().then(res => {
return getTableSkelon(res)
}).then(res => {
return createTable(res)
}).then (res => {
res.forEach((item, index, array) => {
fillTable(item)
})
testInterval = setInterval(endConnection, 1000, res)
})