原創文章,歡迎轉載。轉載請註明:轉載自 祥的博客
原文鏈接:https://blog.csdn.net/humanking7/article/details/90176191
Python獲取起點小說網的更新情況
1. 原因
每次想知道網絡小說的更新情況,就得去貼吧逛逛,然後去起點網瞧瞧,這樣很浪費時間,尤其是我這種一逛貼吧,就出不來的選手,所以寫了一個腳本,用來獲取小說的更新情況,不需要點開網頁,直接運行bat文件
就ok了。
對於不同的小說,改一下url
就行了。
那個,我也覺得看網絡小說不好,而且也在看一些嚴肅文學作品,但是無奈啊,我就是一個脫離不了快餐文學的人。。。
2. 代碼
2.1. Python文件代碼
文件名:getYuanZunInfo.py
# 獲取元尊的數據
# url = 'https://book.qidian.com/info/1014920025'
import requests
import re
from bs4 import BeautifulSoup
import bs4
import os
import time
class getYuanZunInfo():
def __init__(self,
url,
timeout=10,
filePath='yuanZun.txt',
mode='a',
fileEncode='utf-8'):
self.url = url
self.timeout = timeout
self.filePath = filePath
self.mode = mode
self.fileEncode = fileEncode
self.li_dataInfo = []
# 寫到文本文件中
def writeText2File(self, content):
filePath = self.filePath
mode = self.mode
fileEncode = self.fileEncode
f = open(filePath, mode, encoding=fileEncode)
f.write(content)
f.close()
# 獲取html
def getHTMLText(self):
url = self.url
timeout = self.timeout
flags = False
try:
kv = {'user-agent': 'Mozilla/5.0'}
r = requests.get(url, headers=kv, timeout=timeout)
# print(r.status_code)
r.raise_for_status()
r.encoding = r.apparent_encoding
flags = True
return flags, r.text
except requests.exceptions.Timeout as e:
flags = False
return flags, str(e)
except requests.exceptions.ConnectionError as e:
flags = False
return flags, str(e)
except requests.exceptions.HTTPError as e:
flags = False
return flags, str(e)
# 解析html
def parseYuanZun(self):
[flg, html] = self.getHTMLText()
try:
soup = BeautifulSoup(html, "html.parser")
# 章節ID
chapterID = soup.find(id='J-catalogCount').text
chapterID = re.sub("\D", "", chapterID)
# 章節詳細信息
chapterUpdate = soup.find('li', attrs={"class": "update"})
# 章節名
chapterTitle = chapterUpdate.find(
'a', attrs={
"class": "blue"
}).get("title")
# 更新時間
lastUpdateTime = chapterUpdate.find(
'em', attrs={
"class": "time"
}).text
# 更新到全局變量
self.li_dataInfo.append(chapterID)
self.li_dataInfo.append(chapterTitle)
self.li_dataInfo.append(lastUpdateTime)
# 顯示+保存
self.show_save_Info()
except:
print('解析html失敗!')
def show_save_Info(self):
# show
print("==================")
# 章節ID
# 章節名
# 最新一次更新時間
str_show = "更新到: %s 章\n%s\n\n最新更新時間: %s" % (
self.li_dataInfo[0], self.li_dataInfo[1], self.li_dataInfo[2])
print(str_show)
print("==================")
# self.writeText2File(str_show) # save 到txt
if __name__ == "__main__":
url = 'https://book.qidian.com/info/1014920025'
yuanZun = getYuanZunInfo(url)
yuanZun.parseYuanZun()
2.2. 批處理文件代碼
文件名:getYuanZun.bat
::獲取最新的小說數據
python getYuanZunInfo.py
pause()