Python抓取淘寶手機銷量排行榜

#!/usr/bin/env python  
# encoding: utf-8  
""" 
@version: v1.0 
@author: W_H_J
@license: Apache Licence  
@contact: [email protected] 
@site:  
@software: PyCharm 
@file: taoBaoSpider.py 
@time: 2017/12/25 15:07 
@describe:淘寶部分頁面分析

"""
import pandas
import re
import requests
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
for ii in range(1,10):  # 翻頁
    mn = 44 * (ii-1)
    url = 'https://s.taobao.com/search?q=%E6%89%8B%E6%9C%BA&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306'
    header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
              'cookie':'thw=cn; t=be73ea5ec1ffbeb254d0a3535dd00415; cna=HqWrEpIZeG4CAbYSAEIb6bav; hng=CN%7Czh-CN%7CCNY%7C156; miid=596160490770762658; lgc=%5Cu5815%5Cu843D%5Cu4E4B%5Cu6CEAa; tracknick=%5Cu5815%5Cu843D%5Cu4E4B%5Cu6CEAa; tg=0; uc2=wuf=https%3A%2F%2Ftrade.tmall.com%2Fdetail%2ForderDetail.htm%3Fbiz_order_id%3D104827474284154168%26forward_action%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; uc3=sg2=VWxidJMT8gLCYBc%2BxP5FJdYe9%2FXfUvq2%2Byf0cFWq90Q%3D&nk2=1RSXayUHM0Sl&id2=UUpkvTJ9k5HsSA%3D%3D&vt3=F8dBzLbVzPYkPml1NZk%3D&lg2=W5iHLLyFOGW7aA%3D%3D; uss=VvioJOfdaT365u5YugXSKrRnG47jUQQG9UQvstfUu5fjcHD0zxGQLEmn; _cc_=VFC%2FuZ9ajQ%3D%3D; mt=ci=67_1; tk_trace=oTRxOWSBNwn9dPy4KVJVbutfzK5InlkjwbWpxHegXyGxPdWTLVRjn23RuZzZtB1ZgD6Khe0jl%2BAoo68rryovRBE2Yp933GccTPwH%2FTbWVnqEfudSt0ozZPG%2BkA1iKeVv2L5C1tkul3c1pEAfoOzBoBsNsJySQJwqIKz2kX83uPP5e4iE9t1ZpHdHZkk218jfUuTKISIEGrGMtBctY%2B2vMCmzCRVhIqleLIl%2BRRQHs4ekW3wNcZhDfwkkQzp9RF7kjYiNbNLTbo2mRCr3Wf97aW%2FfC72uuEf9Tcc6cNT9QCiB0y7NxqzS4M5NvMkxl5KoKbA%2BorLqu5Y9jpCfT31RlA%3D%3D; cookie2=1c16eb46ef00c015dd101f731c258d77; _tb_token_=8de4c4560b63; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; swfstore=107855; JSESSIONID=ED726367865542B7BA84D801D1C72812; isg=AhcXOlKpAS4SKIXa0x_6AhsZpovNTcSrwSKOp2lEKOZNmDfacSx7DtWyjg59; uc1=cookie14=UoTdf1DFLRnICg%3D%3D',
              'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
              'path':'/search?q=%E6%89%8B%E6%9C%BA&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306'}
    html = requests.request('GET', url, headers=header)
    # print html.text
    # ren = re.compile('"title":"(.*?)","pic_url":"(.*?)","price":"(.*?)","importantKey":"(.*?)","month_sales":"(.*?)"')
    ren = re.compile('"title":"(.*?)","pic_url":"(.*?)","price":"(.*?)","trace":"(.*?)","month_sales":"(.*?)"')
    data = re.findall(ren, html.text)
    # print data
    data2 = pandas.DataFrame(data)
    print data2
    data2.to_csv(r'taobao.csv', header=False, index=False, mode='a+',encoding='utf-8')
銷量信息可視化
#!/usr/bin/env python  
# encoding: utf-8  
""" 
@version: v1.0 
@author: W_H_J
@license: Apache Licence  
@contact: [email protected] 
@site:  
@software: PyCharm 
@file: taobaoPlot.py 
@time: 2017/12/25 16:12 
@describe:淘寶信息分析

"""
import pandas
import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams["font.sans-serif"] = ['SimHei'] #配置字體
# 繪圖格式
plt.rcParams["axes.labelsize"] = 16
plt.rcParams["xtick.labelsize"] = 10
# print plt.rcParams.keys()
plt.rcParams["ytick.labelsize"] = 10
plt.rcParams["legend.fontsize"] = 10  # 圖例字體大小
plt.rcParams["figure.figsize"] = [15,12]
# plt.rcParams['patch.facecolor'] = 'red'
def1 =pandas.read_csv('taobao.csv')
# print def1
TBdata =pandas.DataFrame(list(zip(def1['A'], def1['E'])))
# 可視化
DD = TBdata.groupby([0]).sum()
DD[1].plot(color='r')
DD[1].plot(kind='bar', rot=90)
DD[1].plot(rot=90)



plt.show()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章