Python3 爬取豆瓣書籍 Xpath bs4 寫入文件

#coding:utf8
import time
from urllib import request
from bs4 import BeautifulSoup
num = 1#用來計算一共爬取了多少本書
start_time = time.time()#定位一個開始的時間

url = 'https://book.douban.com/series/128?page=1'

html = request.urlopen('https://book.douban.com/series/128?page=1')

bsObj = BeautifulSoup(html,'lxml')

print (bsObj)

f = open('/root/桌面/豆瓣/1.txt','a')
for i in range(1,3):
for e in (100,400,15):
html = request.urlopen('https://book.douban.com/series/%d?page=%d'%(e,i))
bsObj = BeautifulSoup(html,'lxml')
print ("=============" + "第%d頁"%i + "==============")
h2_list = bsObj.find_all('h2')

print (h2_list)

    for h2_node in h2_list:
        a_node = h2_node.a
        #print (a_node)
        title = a_node.attrs["title"]
        title = "<<" + title + ">>"
        print ("第%d本書籍"%num,title,file=f)
        num +=1
    time.sleep(2)

end_time = time.time()
duration_time = start_time - end_time
print ('運行時間一共%.2f: '%duration_time+'秒')
print ('共抓到%d本書籍'%num-1)
f.close()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章