node.js之爬蟲實現

數據引用慕課網

const http = require('http');
const cheerio = require('cheerio');
let url = 'http://www.imooc.com/learn/348'


function filterChapters(html) {
    let $ = cheerio.load(html)
    let chapters = $('.chapter')
    let courseData = [];

    chapters.each(function (item) {
        let chapter = $(this)
        let chapterTitle = chapter.find('h3').text();
        let videos = chapter.find('ul').children('li')
        let chapterData = {
            chapterTitle:chapterTitle,
            videos:[]
        }
        videos.each(function (item) {
            let video = $(this).find('.J-media-item')
            let videoTitle = video.text().trim();
            let id = video.attr('href').split('video/')[1]
            chapterData.videos.push({
                title: videoTitle,
                id: id
            })
        })
        courseData.push(chapterData)
    })
    return courseData;
}

function printCourseInfo(courseData) {
    courseData.forEach(function (item) {
        let chapterTitle = item.chapterTitle
        console.log(chapterTitle + '\n')
        item.videos.forEach(function (video) {
            console.log('   【'+video.id+'】' + video.title + '\n')
        })
    })
}
http.get(url,function (res) {
    let html = ''
    res.on('data',function (data) {
        html += data;
    })
    res.on('end',function () {
        let courseData = filterChapters(html)
        printCourseInfo(courseData)
    })
}).on('error',function () {
    console.log('獲取課程數據出錯')
})
發佈了80 篇原創文章 · 獲贊 5 · 訪問量 4萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章