app移动端没有后台数据,没办法自己写呗,凑合着用,日子凑合着过,慢慢开始爬虫了
这是一个 python scrapy项目,具体关于scrapy的知识自己扩展
首先来一张自己稍稍总结的导图:
代码不做过多解释
功能:获取动态申请数据的url
class HomeNewsSpidersSpider(scrapy.Spider):
name = 'home_news_spiders'
allowed_domains = ['news.cctv.com']
start_urls = ['http://news.cctv.com/']
def parse(self, response):
# 解析json,获取某个变量var的值
json_str = response.xpath('//*[@id="SUBD1563517622685109"]/script[2]/text()').extract_first()
# js文本转换成xml
src_text = js2xml.parse(json_str, encoding='utf-8', debug=False)
src_tree = js2xml.pretty_print(src_text)
# 转换xml成html
etree.HTML(src_tree)
# xpath解析数据
selector = Selector(text=src_tree)
content = selector.xpath("/html/body/program/var[2]/string/text()").extract_first()
print('content=', content)