#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2019-09-01 19:58:54
# Project: localhost
from pyspider.libs.base_handler import *
from pyspider.libs.utils import md5string
import random
global preStr
global preResult
preStr = "0"
preResult = ["0"]
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=1)
def on_start(self):
self.crawl('http://localhost:8080/', callback=self.detail_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('a[href^="http"]').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2,age=1)
def detail_page(self, response):
global preResult
global preStr
result=[]
tempStr = []
for each in response.doc('tr.odd,tr.even').items():
if each('td:eq(0)').text().find('以上信息') < 0 and len(each('td:eq(0)').text()) > 0:
item = {
"week": each('td:eq(0)').text(),
"vs": each('td:eq(1)').text(),
"tidian":each('td:eq(2)').text(),
}
result.append(item)
tempStr.append(each('td:eq(0)').text())
tempStr.append(each('td:eq(1)').text())
tempStr.append(each('td:eq(2)').text())
preResult = result
preStr = tempStr
return result
def get_taskid(self, task):
global preResult
global preStr
return md5string(preStr)