scrapy-亞馬遜

import time

import scrapy
from scrapy import Request


class MobileSpider(scrapy.Spider):
    name = 'mobile'
    allowed_domains = ['amazon.com']
    start_urls = ['https://www.amazon.cn/s?k=mobile+phone&s=price-desc-rank&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&crid=215CPRDHDI9WF&qid=1584240877&sprefix=mobile%2Caps%2C182&ref=sr_st_price-desc-rank']


    def parse(self, response):
        print(response.body)
        time.sleep(5)
        title = response.xpath('//div[@class="sg-col-inner"]//h2//span[contains(@class,"a-size-base-plus")]/text()').extract()
        hrefs=response.xpath('//div[@class="sg-col-inner"]//h2/a/@href').extract()
        price=response.xpath('//div[@class="a-section a-spacing-none a-spacing-top-small"]//span[@class="a-price"]/span[@class="a-offscreen"]/text()').extract()
        hrefs = [str("https://www.amazon.cn"+href) for href in hrefs]
        for item in zip(title,hrefs,price):
            yield{
                "title":item[0],
                "hrefs":item[1],
                "price":item[2]
            }

        next ="https://www.amazon.cn"+response.xpath('//ul[@class="a-pagination"]/li[@class="a-last"]/a/@href').extract_first()
        yield Request(next)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章