前言
因爲喜歡的妹子特別喜歡推理書籍,因此就想用python把豆瓣上的推理書籍給爬下來,讓她驚喜驚喜,但是好像沒達到預期的目的。先上一張爬到的數據圖片。
代碼
我的代碼比較簡單隻實現基本的功能大家可以自己改
# -*- coding: utf8 -*-
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook
import sys
reload(sys)
sys.setdefaultencoding('utf8')
f = open("D:\\python\\demos\\crawler\\DouBanSpider\\dybooks.txt", "a")
def get_html(url):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}
html = requests.get(url,headers=header).content
return html
def get_con(html):
soup = BeautifulSoup(html,'html.parser')
for href in soup.find_all('li',class_='subject-item'):
cnt=0
for link in href.find_all("a"):
if cnt==1:
links=link.get('href')
name=link.get('title')
cnt=cnt+1;
for detail in href.find(class_='pub'):
details= detail.string
for rank in href.find(class_='rating_nums'):
rating=rank.string
f.write("書籍名稱: "+name+"\n")
f.write("信息: "+details+"\n")
f.write("評分: "+rating+"\n")
f.write("鏈接: "+links+"\n")
f.write("\n\n")
def main():
url = 'https://book.douban.com/tag/推理?start='
for i in range(0,90):
urls=url+str(i*20)+"&type=T"
html=get_html(urls)
get_con(html)
f.close()
if __name__ == '__main__':
main()