爬一本小說的代碼操作:
import requests
from pyquery import PyQuery
# 目標地址
chapter1_url = 'http://www.biquyun.com/14_14055/9194140.html'
def get_one_chapter(chapter_url):
# 獲取一章內容
# 使用requests工具 發送請求
response = requests.get(url=chapter_url)
# 萬能的解決編碼問題:用內容中的編碼來解析
response.encoding = response.apparent_encoding
# print(response.text)
# 把文字變成網頁格式
doc = PyQuery(response.text)
title = doc("h1").text()
print(title)
content = doc('#content').text()
print(content)
with open(file='三寸人間.txt', encoding='utf-8', mode="a+") as f:
f.write(title + '\n' + content + '\n\n\n')
# 書本目錄
index_url = 'http://www.biquyun.com/14_14055/'
response = requests.get(url=index_url)
response.encoding = response.apparent_encoding
doc = PyQuery(response.text)
list_dd = doc('#list > dl > dd a')
for dd in list_dd.items():
get_one_chapter('http://www.biquyun.com'+dd.attr('href'))