初學node,爬一下優酷的視頻,寫的比較粗糙
var http = require('http');
var cheerio = require("cheerio");
var url = "http://www.youku.com/";
var all = [];
http.get(url,function(res){
var html = "";
res.on("data" , function (chunk) {
html +=chunk;
});
res.on("end" , function(){
fenxi(html);
all.forEach(function(e){
console.log(" 【 "+e.title+" "+e.link+" 】"+"\n");
});
console.log(all.length);
}).on("error",function(){
console.log("獲取錯誤");
});
});
//利用 cheerio 分析HTML文件。下面邊看文檔變寫。。。畢竟菜
function fenxi(html){
//首先你需要加載HTML。這一步對jQuery來說是必須的,since jQuery operates on the one, baked-in DOM。通過Cheerio,我們需要把HTML document 傳進去。
var $ = cheerio.load(html);
//分析源碼發現每一塊視頻包在class爲yk-row的div裏
var $bigs = $(".yk-row");
//console.log($bigs.length);
//遍歷每一個大塊,得到小子塊
$bigs.each(function(i){
var $big= $(this);
//開始尋找大塊裏的中塊
var $middles = $big.children();
//console.log("middle: "+$middles.length);
//遍歷每一箇中塊,在中塊裏找到每一個視頻的a元素
}) $middles.each(function (j) {
var $middle = $(this);
var $views = $middle.children();
//console.log($views.length);
$views.each(function (k) {
var $view = $(this);
var title = $view.find("a").attr("title");
var href = $view.find("a").attr("href");
var each ={
title : title,
link : href
};
all.push(each);
})
})
}
輪播圖部分的還爬不出來。。。