pyspider採集例子(js)

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2015-01-04 03:35:01
# Project: tutorial_pinterest

from pyspider.libs.base_handler import *


class Handler(BaseHandler):
    """
    This is a sample scrape script for: Level 3: Render with PhantomJS
    http://docs.pyspider.org/en/latest/tutorial/Render-with-PhantomJS/#running-javascript-on-page
    """
    
    def on_start(self):
        self.crawl('http://www.pinterest.com/categories/popular/',
                   fetch_type='js', js_script="""
                   function() {
                       window.scrollTo(0,document.body.scrollHeight);
                   }
                   """, callback=self.index_page)

    def index_page(self, response):
        return {
            "url": response.url,
            "p_w_picpaths": [{
                "title": x('.richPinGridTitle').text(),
                "img": x('.pinImg').attr('src'),
                "author": x('.creditName').text(),
            } for x in response.doc('.item').items() if x('.pinImg')]
        }


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章