#笔趣阁网站
# 1.模拟搜索
# 2.图书查询-章节
# 3.获取章节-内容
# 4.本地存储:txt、mysql、
def searchBook():
print("****************笔趣阁小说图书下载*****************")
print("****************作者:一个低调的人*****************")
bookName = input("请输入图书的名称: ")
# 1.转移字符:中文在URL中乱码
bookName = bookName.encode("gbk")
# 2.请求
resp = requests.get(url = url_one, params = {"searchkey":bookName},headers=head,timeout=10)
# 3.判断是否成功
if resp.status_code == 200:
resp.encoding = "gbk"
print(resp.text)
# 4.解析内容:1 .数据源 2.html.parser
soup = BeautifulSoup(resp.text, "html.parser")
# 4.1 Tag 根据标签的名称获取,第一个出现的
title = soup.title # 拿标题
print(title)
img = soup.img # 拿A标签
print(img)
a = soup.a # 拿A标签
print(a)
# 4.2 string text 获取内容
print(title.string, img, a.string)
# 4.3获取属性 attrs 属性字典集合 get(key)访问
# print(img.attrs)
print(img.attrs.get("src"))
# {'class': ['navbar-logo'], 'src': '/novel/images/navbar-logo.svg'}
# # 4.4查询
# # find_all() 查询所有标签,list列表[tag,tag...]
# find() = soup.Tag 第一个出现的标签
# name:标签名 ,string:单个,list:多个
div_list = soup.find_all(name="div",attrs={"class": "caption"})
for div in div_list:
# 判断不能None
bookname = div.h4.a.string
bookurl = div.h4.a.attrs.get("href")
bookauthor = div.small.string
bookdir = div.p.string
# and 与 需要满足所有所有条件
if bookname != None and bookurl != None and bookauthor != None and bookdir != None:
bookname.replace(" ", "")
bookurl.replace(" ", "")
bookauthor.replace(" ", "")
bookdir.replace(" ", "")
print(bookname + "\n", bookurl + "\n", bookauthor + "\n", bookdir + "\n")
# 5.保存到字典
book_dict[bookname] = bookurl
else:
print("错误!重新开始")
searchBook()
pass
def getBookChapter():
bookname = input("请输入已找到的图书的名称: ")
# 判断是否存在字典中
# keys() 返回字典key的列表 集合
if bookname in book_dict.keys():
# resp = requests.get(url=url_one, params={"searchkey": bookName}, headers=head, timeout=10)
resp = requests.get(url=book_dict[bookname],headers=head, timeout=time)
# 3.判断是否成功
if resp.status_code == 200:
resp.encoding = "gbk"
soup = BeautifulSoup(resp.text, "html.parser")
title = soup.title.string # 拿标题
print(title.string)
dd_list = soup.find_all(name="dd", attrs={"class": "col-md-3"})
for dd in dd_list:
try:
chapter = dd.a.attrs.get("title")
chapterUrl = dd.a.attrs.get("href")
print(chapter,chapterUrl)
bookUrl = book_dict[bookname]
getBookChapterContent(chapter, chapterUrl, bookUrl,bookname)
except Exception:
pass
continue
else:
print("错误!重新开始")
getBookChapter()
pass
def getBookChapterContent(chapter, chapterUrl, bookUrl,bookname):
# 判断是否存在URL,进行拼接
if "http" not in chapterUrl:
chapterUrl = bookUrl + chapterUrl
resp = requests.get(url=chapterUrl) # 发起请求
if resp.ststus_code == 200:
resp.encoding = "gbk"
soup4 = BeautifulSoup(resp.text,"html.parser") # 格式化
div = soup4.find(name="div",attrs={"div":"htmlContent"}) #返回一个标签对象,而不是列表对象
text = div.text
if text !=None and text !="": #判断不能为空
text = div.text.replace("<br/","\n") #换行
saveTxt(text,bookname,chapter) #保存
chapter_dict[chapter] = text #保存到字典中
else:
print(bookname + "下载失败!")
def saveTxt(text,bookname,chapter):
path = "小说" + bookname
# 验证路径是否存在
if not os.path.exists(path):
#创建
os.mkdir(path) # 创建一级目录
os.makedirs(path) # 创建多级目录
# file文件管理,创建、打开、写入、读取、清理缓存、关闭
file = open(path+"/"+chapter+".txt","wb",encoding="utf-8") # wb 写,rb 读
file.write(text)
file.flush() #缓冲区
file.close()
# csv可与数据库之间互相导入
def saveCsv():
headers = {"章节名称","内容"} # 写一行,标题行
rows = {} # 写多行,写入二维列表
file = open("test.csv", "w",encoding="utf-8") # 创建CSV文件
f_csv = csv.file.write(file) # 转换写方式 :表结构
# 循环所有保存到字典里的内容
for key in chapter_dict.keys(): # 查询key的集合
text = chapter_dict[key]
row = [key,text] # 保存 名称和列表
rows.append(row) # 添加到rows
print("存储中",row)
f_csv.writerow(headers) # 写单行数据:[]
f_csv.writerow(rows) # 写多行数据:[[],[],[]]
print("创建表格成功")
#python 入口
if __name__ == '__main__':
url_one = "https://www.biqukan.cc/modules/article/search.php"
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
time = 15
import requests #请求
from bs4 import BeautifulSoup # 解析类
import os #管理目录:创建,管理,修改
import csv # csv管理
book_dict = {} # 存储图书的名称:路径
chapter_dict = {} # 章节:内容
searchBook()
getBookChapter()
# saveCsv()
pass
1.4 爬虫-笔趣阁获取小说例子
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.