我用 Python 分析了一波熱賣年貨,原來大家都在買這些東西?

{"type":"doc","content":[{"type":"blockquote","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"作者:Cherich_sun","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"來源:公衆號「傑哥的IT之旅」ID:Jake_Internet","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"原文鏈接:[我用 Python 分析了一波熱賣年貨,原來大家都在買這些東西?](https://mp.weixin.qq.com/s/71hnlEIhr9mb06_5WirVCw)","attrs":{}}]}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"今年不知道有多少小夥伴留在原地過年,雖然今年過年不能回老家,但這個年也得過,也得買年貨,給家人長輩送禮。於是我出於好奇心的想法利用爬蟲獲取某寶數據,並結合 Python 數據分析和第三方可視化平臺來分析一下大家過年都買了哪些東西,分析結果大屏如下:","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/ec/ecd13b1bad4333561aea6d7b4a966277.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"上面使用清洗好的數據後用 finebi 第三方可視化工具完成的。接下來是用 Python 的實現過程,對於本文的敘述,主要分爲以下五步:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"分析思路","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"爬蟲部分","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"數據清洗","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"數據可視化及分析","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"結論與建議","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"一、分析思路","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"其實就今天的數據來講,我們主要做的是探索性分析;首先梳理已有的字段,有標題(提取出品類)、價格、銷量、店鋪名、發貨地。下面來做一下詳細的維度拆分以及可視化圖形選擇:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"品類:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"品類銷量的 TOP 10 有哪些?(表格或者橫向條形圖)","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"熱門(出現次數最多)品類展示;(詞雲)","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"價格:","attrs":{}},{"type":"text","text":"年貨的價格區間分佈情況;(圓環圖,觀察佔比)","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"銷量、店鋪名:","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"bulletedlist","content":[{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"店鋪銷量最高的 TOP 10 有哪些?(條形圖)","attrs":{}}]}],"attrs":{}},{"type":"listitem","content":[{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"結合品類做聯動,比如點堅果,對應展示銷量排名的店鋪;(聯動,利用三方工具)","attrs":{}}]}],"attrs":{}}],"attrs":{}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"發貨地:","attrs":{}},{"type":"text","text":"銷量最高的城市有哪些?(地圖)","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"二、爬取數據","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"爬取主要利用 selenium 模擬點擊瀏覽器,前提是已經安裝 selenium 和瀏覽器驅動,這裏我是用的 Google 瀏覽器,找到對應的版本號後並下載對應的版本驅動,一定要對應瀏覽器的版本號。","attrs":{}}]},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"pip install selenium","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/45/4594c7c6ad8ebcd9a45eaea1a5b41b7b.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"安裝成功後,運行如下代碼,輸入關鍵字\"年貨\",進行掃碼就可以了,等着程序慢慢採集。","attrs":{}}]},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"# coding=utf8\nimport re\nfrom selenium.webdriver.chrome.options import Options\nfrom selenium import webdriver\nimport time\nimport csv\n\n\n# 搜索商品,獲取商品頁碼\ndef search_product(key_word):\n # 定位輸入框\n browser.find_element_by_id(\"q\").send_keys(key_word)\n # 定義點擊按鈕,並點擊\n browser.find_element_by_class_name('btn-search').click()\n # 最大化窗口:爲了方便我們掃碼\n browser.maximize_window()\n # 等待15秒,給足時間我們掃碼\n time.sleep(15)\n # 定位這個“頁碼”,獲取“共100頁這個文本”\n page_info = browser.find_element_by_xpath('//div[@class=\"total\"]').text\n # 需要注意的是:findall()返回的是一個列表,雖然此時只有一個元素它也是一個列表。\n page = re.findall(\"(\\d+)\", page_info)[0]\n return page\n\n\n# 獲取數據\ndef get_data():\n # 通過頁面分析發現:所有的信息都在items節點下\n items = browser.find_elements_by_xpath('//div[@class=\"items\"]/div[@class=\"item J_MouserOnverReq \"]')\n for item in items:\n # 參數信息\n pro_desc = item.find_element_by_xpath('.//div[@class=\"row row-2 title\"]/a').text\n # 價格\n pro_price = item.find_element_by_xpath('.//strong').text\n # 付款人數\n buy_num = item.find_element_by_xpath('.//div[@class=\"deal-cnt\"]').text\n # 旗艦店\n shop = item.find_element_by_xpath('.//div[@class=\"shop\"]/a').text\n # 發貨地\n address = item.find_element_by_xpath('.//div[@class=\"location\"]').text\n # print(pro_desc, pro_price, buy_num, shop, address)\n with open('{}.csv'.format(key_word), mode='a', newline='', encoding='utf-8-sig') as f:\n csv_writer = csv.writer(f, delimiter=',')\n csv_writer.writerow([pro_desc, pro_price, buy_num, shop, address])\n\n\ndef main():\n browser.get('https://www.taobao.com/')\n page = search_product(key_word)\n print(page)\n get_data()\n page_num = 1\n while int(page) != page_num:\n print(\"*\" * 100)\n print(\"正在爬取第{}頁\".format(page_num + 1))\n browser.get('https://s.taobao.com/search?q={}&s={}'.format(key_word, page_num * 44))\n browser.implicitly_wait(25)\n get_data()\n page_num += 1\n print(\"數據爬取完畢!\")\n\n\nif __name__ == '__main__':\n key_word = input(\"請輸入你要搜索的商品:\")\n option = Options()\n browser = webdriver.Chrome(chrome_options=option,\n executable_path=r\"C:\\Users\\cherich\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe\")\n main()","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"採集結果如下:","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/04/04ac92cdc8789390eeecdd23ca84c796.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"數據準備完成,中間從標題裏提取類別過程比較耗時,建議大家直接用整理好的數據。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"大概思路是對標題進行分詞,命名實體識別,標記出名詞,找出類別名稱,比如堅果、茶葉等。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"三、數據清洗","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"這裏的文件清洗幾乎用 Excel 搞定,數據集小,用 Excel 效率很高,比如這裏做了一個價格區間。到現在數據清洗已經完成(可以用三方工具做可視化了),如果大家愛折騰,可以接着往下看用 Python 如何進行分析。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"四、數據可視化及分析","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"1、讀取文件","attrs":{}}]},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"import pandas as pd\nimport matplotlib as mpl\nmpl.rcParams['font.family'] = 'SimHei'\nfrom wordcloud import WordCloud\nfrom ast import literal_eval\nimport matplotlib.pyplot as plt\ndatas = pd.read_csv('./年貨.csv',encoding='gbk')\ndatas","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/38/38f40d113564a1a4a5f3d70e6b37437b.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"2、可視化:詞雲圖","attrs":{}}]},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"li = []\nfor each in datas['關鍵詞'].values:\n new_list = str(each).split(',')\n li.extend(new_list)\ndef func_pd(words):\n count_result = pd.Series(words).value_counts()\n return count_result.to_dict()\n\nfrequencies = func_pd(li)\nfrequencies.pop('其他')\n\nplt.figure(figsize = (10,4),dpi=80)\nwordcloud = WordCloud(font_path=\"STSONG.TTF\",background_color='white', width=700,height=350).fit_words(frequencies)\nplt.imshow(wordcloud)\nplt.axis(\"off\")\nplt.show()","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/96/96d2bfbb5ded95073e0fa38282576a26.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"圖表說明:我們可以看到詞雲圖,熱門(出現次數最多)品類字體最大,依次是:堅果、茶葉、糕點等。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"3、可視化:繪製圓環圖","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"# plt.pie(x,lables,autopct,shadow,startangle,colors,explode)\nfood_type = datas.groupby('價格區間').size()\nplt.figure(figsize=(8,4),dpi=80)\nexplodes= [0,0,0,0,0.2,0.1]\nsize = 0.3\nplt.pie(food_type, radius=1,labels=food_type.index, autopct='%.2f%%', colors=['#F4A460','#D2691E','#CDCD00','#FFD700','#EEE5DE'],\n wedgeprops=dict(width=size, edgecolor='w'))\nplt.title('年貨價格區間佔比情況',fontsize=18)\nplt.legend(food_type.index,bbox_to_anchor=(1.5, 1.0))\nplt.show()","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/89/890a05b583be158c309f8df401d22750.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"圖表說明:圓環圖和餅圖類似,代表部分相對於整體的佔比情況,可以看到0 ~ 200元的年貨大概33%左右,100 ~ 200元也是33%。說明大部分的年貨的價格趨於200以內。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"4、可視化:繪製條形圖","attrs":{}}]},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/19/19984e767fe59b571646e560061a61fd.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"圖表說明:以上是店鋪按銷量排名情況,可以看到第一名是三隻松鼠旗艦店,看來過年大家都喜歡喫乾貨。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"5、可視化:繪製橫向條形圖","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"codeblock","attrs":{"lang":"text"},"content":[{"type":"text","text":"foods = datas.groupby(by='類別')['銷量'].sum().sort_values(ascending=False).head(10)\nfoods.sort_values(ascending=True,inplace=True)\nplt.figure(figsize = (10,4),dpi=80)\nplt.xlabel('銷量')\nplt.title('年貨推薦購買排行榜',fontsize=18)\ncolors = ['#F4A460','#D2691E','#CDCD00','#CD96CD','#EEE5DE', '#EEB4B4', '#FFA07A', '#FFD700']\nplt.barh(foods.index,foods.values, color=colors,height=1)\nplt.show()","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/8f/8fa7efa17c97ff8baaf0e550fe424fd4.png","alt":null,"title":null,"style":[{"key":"width","value":"75%"},{"key":"bordertype","value":"none"}],"href":null,"fromPaste":true,"pastePass":true}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","text":"圖表說明:根據類別銷量排名,排名第一是堅果,驗證了上面的假設,大家喜歡喫堅果。","attrs":{}}]},{"type":"heading","attrs":{"align":null,"level":3},"content":[{"type":"text","text":"結論與建議","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"淘寶熱賣年貨:","attrs":{}},{"type":"text","text":" 堅果,茶葉,糕點,餅乾,糖果,白酒,核桃,羊肉,海蔘,枸杞;","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"年貨推薦清單(按銷量):","attrs":{}},{"type":"text","text":"堅果、零食、糕點、餅乾、茶葉、糖果、松子、紅棗、蛋糕、滷味、瓜子、牛奶、核桃;","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"年貨價格參考:","attrs":{}},{"type":"text","text":"66%以上的年貨價格在0~200元之間;","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"熱門店鋪","attrs":{}},{"type":"text","text":":三隻老鼠、天貓超市、百草味、良品鋪子;","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null},"content":[{"type":"text","marks":[{"type":"strong","attrs":{}}],"text":"公衆號:傑哥的IT之旅,後臺回覆:「年貨」,即可獲取本文完整數據。","attrs":{}}]},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"image","attrs":{"src":"https://static001.geekbang.org/infoq/01/019b65e5ec6661e8bbcaec704494797d.jpeg","alt":null,"title":"","style":[{"key":"width","value":"100%"},{"key":"bordertype","value":"none"}],"href":"","fromPaste":false,"pastePass":false}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}},{"type":"paragraph","attrs":{"indent":0,"number":0,"align":null,"origin":null}}]}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章