Python3-Spyder-urllib.request抓取搜haohuo平臺信息-保存到csv

*Python3-Spyder-urllib.request抓取搜haohuo平臺信息-保存到csv

-- coding: utf-8 --

“”"
Created on Sat Nov 9 10:15:32 2019

@author: Administrator
“”"
import urllib.request as request
import lxml.html as html
import csv
import time
import codecs
import random

headers={‘User-Agent’:’ Mozilla/5.0 (Windows NT 6.1; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36’}
#代理
px =request.ProxyHandler({
‘http’:‘182.35.80.197:9999’,
‘http’:‘117.69.201.81:9999’,
‘http’:‘182.34.34.201:9999’,
‘http’:‘113.120.33.49:9999’,
‘http’:‘222.89.32.187:9999’

    })

opener = request.build_opener(px)

xpath1 = “//div[@class=“clearfix mt20 pb5”]/a/@href”
xpath = “//a[@class=“list-item-button list-item-see L”]/@href”
a = 1
#codevs 防止中文寫入時亂碼
f = codecs.open(‘soumeng.csv’,‘a’,encoding=‘utf-8’)
csv_writer = csv.writer(f)

while a<=82:
url = ‘https://s.912688.com/comp/dy/search?kw=%E5%BE%B7%E5%B7%9E&page=’+str(a)
print(url)
print(a)
#代理版
req = request.Request(url=url,headers=headers)
res = opener.open(req)
data = res.read().decode(“utf-8”)
#原版

data = request.urlopen(url).read().decode(“utf-8”)

dom = html.document_fromstring(data)
links = dom.xpath(xpath)

xpathGs = "//p[@class=\"shop-card-cname\"]/a/text()"
xpathXm = "//p[@class=\"shop-card-link\"]/a/text()"
xpathNb = "//p[@class=\"shop-card-num\"]/span/text()"
xpathGs2 = "//span[@class=\"com-name m-btm\"]/a/text()"
xpathXm2 = "//span[@class=\"name\"]/a/text()"
xpathNb2 = "//span[@class=\"phone\"]/text()"
#準備csv

for link in links:
    #代理
    req = request.Request(url=link,headers=headers)
    res = opener.open(req)
    data = res.read().decode("utf-8")
    #data = request.urlopen(link).read().decode("utf-8")
    dom=html.document_fromstring(data)
    Gongsi = dom.xpath(xpathGs)
    if Gongsi==[]:
        print("plan B")
        Gongsi = dom.xpath(xpathGs2) 
    Xingming = dom.xpath(xpathXm)
    if Xingming==[]:
        Xingming = dom.xpath(xpathXm2)
    Nub = dom.xpath(xpathNb)
    if Nub==[]:
        Nub = dom.xpath(xpathNb2)
    csv_writer.writerow([Gongsi,Xingming,Nub])
    print(link)
    print(str(Gongsi)+'-'+str(Xingming)+'-'+str(Nub))
    #time.sleep(random.randint(0,2))
    
a += 1
#沉睡3-7秒
time.sleep(random.randint(0,5))

f.close()
print(“結束”)

發佈了13 篇原創文章 · 獲贊 14 · 訪問量 4萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章