node + mysql 爬取網頁數據並寫入數據庫(Promise 優化,發郵件優化)

// 連接使用數據庫
import {connection} from './../../common/dbConnect.js'
import {senderMsg} from './../../common/mailer.js'
// 引入爬蟲需要的網絡請求模塊
const https = require('https')
const fs = require('fs')

// 需要被爬的網站網址
const url = 'https://douban.fm/j/v2/songlist/explore?type=hot&genre=0&limit=20&sample_cnt=5' // 這是獲取數據的接口url
const urlDir = 'https://douban.fm/explore/songlists' // 這是瀏覽器訪問頁面的url

let errorInfoGroup = []
let successInfoGroup = []
let testInterval

const logPath = '../../../../log.js'

let receiverMsg = {
  from: '。。。@qq.com',
  to: '。。。@163.com',
  subject: '測試發郵件',
  text: '你能不能收到我發的郵件呢',
  html: '',
  attachments: []
}

// 從需要被爬的網站裏獲取數據
let getWebData = function () {
  console.log('程序開始執行......')
  let httpPromise = new Promise((resolve, reject) => {
    https.get(url, (res) => {
      let chunks = []
      let size = 0
      res.on('data', (trunk) => {
        chunks.push(trunk)
        size += trunk.length
      })
      res.on('end', () => {
        let data = Buffer.concat(chunks, size)
        let html = JSON.parse(data.toString()).reverse()
        resolve(html)
      })
    })
  })
  return httpPromise
}

// 依據獲取到的數據結構(字段)創建數據庫表結構
let getTableSkelon = function (html) {
  let tablePromise = new Promise((resolve, reject) => {
    let createString = 'CREATE TABLE IF NOT EXISTS doubanAlbum ('
    let createParams = ''
    for(let j in html[0]) {
      if(j !== 'id') {
        createParams = createParams + j + ' VARCHAR (255) DEFAULT NULL, '
      } else {
        createParams = createParams + 'id INT (10) NOT NULL, '
      }
    }
    createString = createString + createParams + 'PRIMARY KEY (id))'
    resolve([createString, html]) // resolve傳值只能拿到第一個參數,所以用數組的方式傳值
  })
  return tablePromise
}


// 創建數據表
let createTable = function (createString) {
  let dbPromise = new Promise ((resolve, reject) => {
    connection.query(createString[0], (err, data) => {
      if(err) {
        throw err
      } else {
        resolve(createString[1])
      }
    })
  })
  return dbPromise
}

// 插入數據前整理數據
let fillTable = function (newInfo) {
  let fillPromise = new Promise ((resolve, reject) => {
    let tempSongs = []
    for (let i in newInfo) {
      if(newInfo[i] !== null) {
        if(Array.prototype.isPrototypeOf(newInfo[i]) == true) {
          if(i == 'sample_songs') {
            newInfo['sample_songs'].forEach((item, index, array) => {
              tempSongs.push(item.sid)
            })
            newInfo[i] = tempSongs.join(',')
          } else {
            newInfo[i] = JSON.stringify(newInfo[i])
          }
        } else if(typeof(newInfo[i]) == 'object') {
          if(newInfo[i].hasOwnProperty('id')) {
            newInfo[i] = newInfo[i].id
          } else {
            newInfo[i] = null
          }
        } else {
          newInfo[i] = newInfo[i]
        }
      }
    }
  })
  fillbd(newInfo)
  return fillPromise
}

// 將整理好的數據插入到數據庫中
let fillbd = function (newInfo) {
  connection.query('INSERT INTO doubanAlbum SET ?', newInfo, (err, result) => {
    if(err) {
      errorInfoGroup.push(newInfo)
    } else {
      successInfoGroup.push(newInfo)
    }
  })
}


// 發送郵件
let postMail = function (text) {
  console.log('郵件發送中')
  receiverMsg.text = text
  senderMsg.sendMail(receiverMsg,(error,info)=>{
    if(error) {
      console.log(error)
    } else {
      console.log(`Message: ${info.messageId}`)
      console.log(`sent: ${info.response}`)
    }
  })
}

// 將錯誤信息寫入log
let writeLog = function (string) {
  fs.open(logPath, 'a', (err, fd) => {
    if(err) {
      throw err
    } else {
      fs.write(fd, string, (err, written, string) => {
        if(err) {
          throw err
        } else {
          console.log('已成功寫入log')
          fs.close(fd)
        }
      })
    }
  })
}

// 清空log文件
let clearLog = function () {
  fs.writeFile(logPath, '', 'utf8', (err) => {
     if (err){
       console.log(err)
     } else {
       console.log('清空成功')
     }
  })
}


// 插入操作結束後關閉數據庫連接或者收集錯誤信息
let endConnection = function (html) {
  let shutPromise = new Promise((resolve, reject) => {
    if(successInfoGroup.length + errorInfoGroup.length == html.length) {
      connection.end()
      clearInterval(testInterval)
    } else {
      console.log('錯誤數據')
      console.log(errorInfoGroup)
      clearLog()
      writeLog(JSON.stringify(errorInfoGroup))
      postMail(JSON.stringify(errorInfoGroup))
    }
  })
  return shutPromise
}


getWebData().then(res => {
  return getTableSkelon(res)
}).then(res => {
  return createTable(res)
}).then (res => {
  res.forEach((item, index, array) => {
    fillTable(item)
  })
  testInterval = setInterval(endConnection, 1000, res)
})

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章