Python爬蟲實現豆瓣圖書搜索

import requests
import time
from bs4 import BeautifulSoup
import re

book=[]
def search(url):
    try:
        #url='https://book.douban.com/tag/%E6%97%A5%E6%9C%AC%E6%96%87%E5%AD%A6'
        r=requests.get(url)
        html=r.text
        return html
    except:
        search(url)

def parse_page(url,k):
    html=search(url)
    pattern1=re.compile('<div class="pub">(.*?)</div>',re.S)
    book_info=re.findall(pattern1,html)
    pattern2=re.compile('title="(.*?)"',re.S)
    book_name=re.findall(pattern2,html)
    pattern3=re.compile('class="rating_nums">(.*?)</span>')
    book_score=re.findall(pattern3,html)

    if len(book_name)==0:
        print('爬取結束')
        print('共'+str(len(book))+'本')
        exit()

    for i in range(len(book_name)-1):
        book_tmp = []
        book_tmp.append(book_name[i])
        book_tmp.append(book_info[i].strip())
        #book_tmp.append(book_score[i])
        book.append(book_tmp)
    #print(len(book_name))
    for i in range(k*20,len(book)-1):
        print('第'+str(i+1)+'本')
        print('書名:《'+book[i][0]+'》'+'  圖書信息:'+book[i][1])
        print('-----------------------------------------------------')


def main(k,sname,start):
    url='https://book.douban.com/tag/'+sname+'?start='+str(start)+'&type=T'
    parse_page(url,k)

if __name__ == '__main__':
    sname=input("請輸入圖書信息:")
    print('-------------------------開始爬取-------------------------')
    time.sleep(1)
    for i in range(10):
        main(i,sname,start=i*20)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章