pyspider 抓包實例

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2019-09-01 19:58:54
# Project: localhost

from pyspider.libs.base_handler import *
from pyspider.libs.utils import md5string
import random

global preStr
global preResult
preStr = "0"
preResult = ["0"]
class Handler(BaseHandler):
    
    crawl_config = {
    }

    @every(minutes=1)
    def on_start(self):
        
        self.crawl('http://localhost:8080/', callback=self.detail_page)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
        for each in response.doc('a[href^="http"]').items():
            self.crawl(each.attr.href, callback=self.detail_page)

    @config(priority=2,age=1)
    def detail_page(self, response):
        global preResult
        global preStr
        result=[]
        tempStr = []
        for each in response.doc('tr.odd,tr.even').items():
            if each('td:eq(0)').text().find('以上信息') < 0 and len(each('td:eq(0)').text()) > 0:
                item = {
                "week": each('td:eq(0)').text(),
                "vs":  each('td:eq(1)').text(),
                "tidian":each('td:eq(2)').text(),
                }
                result.append(item)
                tempStr.append(each('td:eq(0)').text())
                tempStr.append(each('td:eq(1)').text())
                tempStr.append(each('td:eq(2)').text())
                preResult = result
        preStr = tempStr
        return result
    
    def get_taskid(self, task):
        global preResult
        global preStr
        return md5string(preStr)
    

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章