import requests
from fake_useragent import UserAgent
import re
url = 'https://www.qiushibaike.com/text/page/{}/'
headers = {
'User-Agent': UserAgent().chrome
}
def get_data(page):
print("正在爬取第{}頁".format(page))
response = requests.get(url.format(page), headers=headers)
info = response.text
infos = re.findall(r'<div class="content">\s*<span>\s*(.+)\s*</span>', info)
with open('duanzi.txt', 'a+', encoding='utf-8') as f:
for info in infos:
info = info.replace("\s", "")
f.write(info + "\n\n")
for page in range(1, 14):
get_data(page)