# -*- coding: utf-8 -*-
"""
Created on Mon Nov 18 14:58:57 2019
@author: Administrator
"""
import urllib.request as request
import lxml.html as html
import xlsxwriter
import time
import random
#模擬谷歌瀏覽器
headers = {'User-Agent':' Mozilla/5.0 (Windows NT 6.1; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'}
#代理
px = request.ProxyHandler({
'http':'182.35.84.97:9999'
#'http':'114.239.42.226:9999',
#'http':'183.154.55.73:9999',
# 'http':'1.198.73.115:9999',
# 'http':'183.166.125.221:9999'
})
#opener = request.build_opener(px)
xpathNm = "//div[@class = \"chanpin\"]/ul/li/a/p/text()"
xpathNm2 = "//div[@class = \"place\"]/text()"
xpathUrl = "//div[@class = \"chanpin\"]/ul/li/a/@href"
xpathImg = "//div[@class = \"chanpin\"]/ul/li/a/img/@src"
xpathFenlei = "//div[@class = \"place\"]/a/text()"
xpathJieshao = "//div[@class = \"content\"]/text()"
a = 1
def jiexi(url,xpath):
#代理抓取
#req = request.Request(url=url,headers=headers)
#res = opener.open(req)
#data = res.read().decode("utf-8")
data = request.urlopen(url,timeout = 500).read().decode("utf-8")
dom = html.document_fromstring(data)
linksO = dom.xpath(xpath)
return(linksO)
excelPath = "C:/Users/Administrator/pachong/excel_1967416.xls"
xl = xlsxwriter.Workbook(r'C:\Users\Administrator\pachong\excel_1967416.xls')
sheet=xl.add_worksheet()
def write_excel_xls_append(x, y,value):
sheet.write_string(x+ str(y),value)
x=2
y=2
z=2
while a<=12:
url = "http://www.sdblygc.com/cpzx/list_2_"+str(a)+".html"
imgUrls = jiexi(url,xpathImg)
for imgUrl in imgUrls:
imgUrl = "http://www.sdblygc.com" + imgUrl
write_excel_xls_append('C', x,imgUrl)
#print(imgUrl)
x += 1
'''
names = jiexi(url,xpathNm)
for name in names:
write_excel_xls_append('D', z,name)
print(name)
z += 1
'''
linksUrl = jiexi(url,xpathUrl)
fenleis = []
jieshaos= []
for linkUrl in linksUrl:
linkUrl = "http://www.sdblygc.com" + linkUrl
name = jiexi(linkUrl,xpathNm2)
write_excel_xls_append('D', y,name[3])
print(name[3])
fenlei = jiexi(linkUrl,xpathFenlei)
write_excel_xls_append('A', y,fenlei[2])
write_excel_xls_append('B', y,fenlei[3])
#print(fenlei)
jieshao = jiexi(linkUrl,xpathJieshao)
write_excel_xls_append('E', y,jieshao[1])
#print("介紹",jieshao)
#dataFrame = pd.DataFrame({'大類名稱':fenlei})
#dataFrame.to_csv(r"C:\Users\Administrator\pachong\111.csv",sep=',')
#csv_writer.writerow({'大類名稱':fenlei})
y += 1
a += 1
time.sleep(random.randint(3,5))
# write_excel_xls_append(excelPath, fenleis)
xl.close()
練手保存 不做註釋!