爲了關注某位主播熱度的變化情況,特敲下以下代碼。
本次程序導入了requests模塊、time模塊、lxml模塊、matplotlib模塊。
代碼思維是:
- 根據輸入的分區名稱(area),向鬥魚(https://www.douyu.com/directory/all)發送請求獲取響應,用xpath截取分區對應的鏈接;
- 向獲取的新鏈接發送請求獲取響應,用xpa替換截取對應主播的熱度數據並用列表記錄下來;
- 根據輸入的間隔時間(interval,建議爲60s)對步驟2進行循環,當主播下播時,則停止循環;
- 根據記錄下來的數據,使用matplotlib繪製折線圖,並保存圖片;
- 主播下播後每隔一小時向網頁發送請求,直到主播上播,繼續循環2、3、4步驟。
代碼如下,歡迎學習交流:
# coding=utf-8
import requests
import time
from lxml import etree
from matplotlib import font_manager
from matplotlib import pyplot as plt
from math import ceil
class Douyu_Spider:
def __init__(self, area, name, interval):
self.area = area
self.name = name
self.interval = interval
self.headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.3", }
self.hot_list = []
self.time_list = []
response = requests.get("https://www.douyu.com/directory/all", headers=self.headers)
ret = response.content.decode()
html = etree.HTML(ret)
self.url_part = html.xpath('''//a[@title="{}"]/@href'''.format(self.area))[0]
self.url = "https://www.douyu.com/{}".format(self.url_part)
def parse_url(self,url):
time_list = list(time.localtime())[3:6]
time_str = str(time_list[0]) + ":" + str(time_list[1]) + ":" + str(time_list[2])
self.time_list.append(time_str)
print(time_str)
response = requests.get(url, headers=self.headers)
return response.content.decode()
def get_info(self,html_str):
html = etree.HTML(html_str)
hot = html.xpath('''//h2[text()="{}"]/..//span[@class="DyListCover-hot is-template"]/text()'''.format(self.name))
if len(hot)>0:
hot = hot[0]
if hot.count("萬"):
hot = float(hot[0:-1])*10000
else:
hot = float(hot[0:-1])
self.hot_list.append(hot)
print(hot)
return False
else:
self.time_list.pop()
return True
def plot_hot(self):
my_font1 = font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc', size=18)
my_font2 = font_manager.FontProperties(fname='C:\Windows\Fonts\msyh.ttc', size=10)
plt.figure(figsize=(20,8), dpi=80)
x = range(len(self.time_list))
plt.plot(x, self.hot_list)
# 橫座標理想數爲40
if len(self.time_list)>40:
x_interval = ceil(len(self.time_list)//40)
else:
x_interval = len(self.time_list)
plt.xticks(x[::x_interval], self.time_list[::x_interval], fontproperties=my_font2, rotation=45)
plt.xlabel('時間軸', fontproperties=my_font1)
plt.ylabel('主播熱度', fontproperties=my_font1)
plt.title("鬥魚主播《{}》的熱度變化圖{}-{}".format(self.name, self.time_list[0], self.time_list[-1]), fontproperties=my_font1)
plt.grid(alpha=0.3)
file_name = "鬥魚主播《{}》的熱度變化圖{}-{}.png".format(self.name, self.time_list[0].replace(":","_"), self.time_list[-1].replace(":","_"))
plt.savefig(file_name)
def run(self):
while True:
time_list = list(time.localtime())[3:6]
time_str = str(time_list[0]) + ":" + str(time_list[1]) + ":" + str(time_list[2])
print("跟蹤:{}".format(time_str))
while True:
# 1.發送請求,接受響應
html_str = self.parse_url(self.url)
# 2.每隔1min提取一次熱度數據
if self.get_info(html_str):
if len(self.hot_list)>0:
self.interval = len(self.time_list)
print("主播已下播")
else:
print("主播未上線")
break
time.sleep(self.interval)
# 3.繪製熱度變化圖並保存
if len(self.hot_list)>0:
self.plot_hot()
self.hot_list = []
self.time_list = []
time.sleep(3600)
if __name__ == "__main__":
area = input('請輸入遊戲區域:')
name = input('請輸入主播名字:')
a = Douyu_Spider(area, name, 60)
a.run()