#筆趣閣網站
# 1.模擬搜索
# 2.圖書查詢-章節
# 3.獲取章節-內容
# 4.本地存儲:txt、mysql、
def searchBook():
print("****************筆趣閣小說圖書下載*****************")
print("****************作者:一個低調的人*****************")
bookName = input("請輸入圖書的名稱: ")
# 1.轉移字符:中文在URL中亂碼
bookName = bookName.encode("gbk")
# 2.請求
resp = requests.get(url = url_one, params = {"searchkey":bookName},headers=head,timeout=10)
# 3.判斷是否成功
if resp.status_code == 200:
resp.encoding = "gbk"
print(resp.text)
# 4.解析內容:1 .數據源 2.html.parser
soup = BeautifulSoup(resp.text, "html.parser")
# 4.1 Tag 根據標籤的名稱獲取,第一個出現的
title = soup.title # 拿標題
print(title)
img = soup.img # 拿A標籤
print(img)
a = soup.a # 拿A標籤
print(a)
# 4.2 string text 獲取內容
print(title.string, img, a.string)
# 4.3獲取屬性 attrs 屬性字典集合 get(key)訪問
# print(img.attrs)
print(img.attrs.get("src"))
# {'class': ['navbar-logo'], 'src': '/novel/images/navbar-logo.svg'}
# # 4.4查詢
# # find_all() 查詢所有標籤,list列表[tag,tag...]
# find() = soup.Tag 第一個出現的標籤
# name:標籤名 ,string:單個,list:多個
div_list = soup.find_all(name="div",attrs={"class": "caption"})
for div in div_list:
# 判斷不能None
bookname = div.h4.a.string
bookurl = div.h4.a.attrs.get("href")
bookauthor = div.small.string
bookdir = div.p.string
# and 與 需要滿足所有所有條件
if bookname != None and bookurl != None and bookauthor != None and bookdir != None:
bookname.replace(" ", "")
bookurl.replace(" ", "")
bookauthor.replace(" ", "")
bookdir.replace(" ", "")
print(bookname + "\n", bookurl + "\n", bookauthor + "\n", bookdir + "\n")
# 5.保存到字典
book_dict[bookname] = bookurl
else:
print("錯誤!重新開始")
searchBook()
pass
def getBookChapter():
bookname = input("請輸入已找到的圖書的名稱: ")
# 判斷是否存在字典中
# keys() 返回字典key的列表 集合
if bookname in book_dict.keys():
# resp = requests.get(url=url_one, params={"searchkey": bookName}, headers=head, timeout=10)
resp = requests.get(url=book_dict[bookname],headers=head, timeout=time)
# 3.判斷是否成功
if resp.status_code == 200:
resp.encoding = "gbk"
soup = BeautifulSoup(resp.text, "html.parser")
title = soup.title.string # 拿標題
print(title.string)
dd_list = soup.find_all(name="dd", attrs={"class": "col-md-3"})
for dd in dd_list:
try:
chapter = dd.a.attrs.get("title")
chapterUrl = dd.a.attrs.get("href")
print(chapter,chapterUrl)
bookUrl = book_dict[bookname]
getBookChapterContent(chapter, chapterUrl, bookUrl,bookname)
except Exception:
pass
continue
else:
print("錯誤!重新開始")
getBookChapter()
pass
def getBookChapterContent(chapter, chapterUrl, bookUrl,bookname):
# 判斷是否存在URL,進行拼接
if "http" not in chapterUrl:
chapterUrl = bookUrl + chapterUrl
resp = requests.get(url=chapterUrl) # 發起請求
if resp.ststus_code == 200:
resp.encoding = "gbk"
soup4 = BeautifulSoup(resp.text,"html.parser") # 格式化
div = soup4.find(name="div",attrs={"div":"htmlContent"}) #返回一個標籤對象,而不是列表對象
text = div.text
if text !=None and text !="": #判斷不能爲空
text = div.text.replace("<br/","\n") #換行
saveTxt(text,bookname,chapter) #保存
chapter_dict[chapter] = text #保存到字典中
else:
print(bookname + "下載失敗!")
def saveTxt(text,bookname,chapter):
path = "小說" + bookname
# 驗證路徑是否存在
if not os.path.exists(path):
#創建
os.mkdir(path) # 創建一級目錄
os.makedirs(path) # 創建多級目錄
# file文件管理,創建、打開、寫入、讀取、清理緩存、關閉
file = open(path+"/"+chapter+".txt","wb",encoding="utf-8") # wb 寫,rb 讀
file.write(text)
file.flush() #緩衝區
file.close()
# csv可與數據庫之間互相導入
def saveCsv():
headers = {"章節名稱","內容"} # 寫一行,標題行
rows = {} # 寫多行,寫入二維列表
file = open("test.csv", "w",encoding="utf-8") # 創建CSV文件
f_csv = csv.file.write(file) # 轉換寫方式 :表結構
# 循環所有保存到字典裏的內容
for key in chapter_dict.keys(): # 查詢key的集合
text = chapter_dict[key]
row = [key,text] # 保存 名稱和列表
rows.append(row) # 添加到rows
print("存儲中",row)
f_csv.writerow(headers) # 寫單行數據:[]
f_csv.writerow(rows) # 寫多行數據:[[],[],[]]
print("創建表格成功")
#python 入口
if __name__ == '__main__':
url_one = "https://www.biqukan.cc/modules/article/search.php"
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
time = 15
import requests #請求
from bs4 import BeautifulSoup # 解析類
import os #管理目錄:創建,管理,修改
import csv # csv管理
book_dict = {} # 存儲圖書的名稱:路徑
chapter_dict = {} # 章節:內容
searchBook()
getBookChapter()
# saveCsv()
pass
1.4 爬蟲-筆趣閣獲取小說例子
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.