20200223——起點文學免費小說爬取

這個單子爬取的是起點文學的免費小說,最開始由於只傳入兩個參數,我就手動了,結果坑參了,只要手動量大於50的一定要寫代碼完成!!!!!!!!!
在這裏插入圖片描述

from lxml import etree
import requests
import pandas as pd
#20*5
urls='https://www.qidian.com/free/all?orderId=&vip=hidden&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=1&page=1'
res=requests.get(url=urls).content.decode('utf-8')
ele=etree.HTML(res)
#文章標題,可以用來下一環節的目錄名稱
name=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li/div[2]/h4/a/text()')
//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[1]/div[2]/h4/a
#可以獲取用來下一個頁面的傳入參數
next_url=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li/div[2]/h4/a/@href')
#以獲取用來下一個頁面的傳入參數(數字)
next_url2=['http:'+ ele +'#Catalog' for ele in next_url]
#小說的作者
author=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li/div[2]/p[1]/a[1]/text()')
#小說分類
label1=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li/div[2]/p[1]/a[2]/text()')
label2=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li/div[2]/p[1]/a[3]/text()')
filename = 'test.txt'
with open(filename,'w') as f: # 如果filename不存在會自動創建, 'w'表示寫數據,寫之前會清空文件中的原有數據!
    a=("書名").format(name)
    f.write(a)
    f.write("\n--------------------------")
with open(filename,'a') as f: # 'a'表示append,即在原來文件內容後繼續寫數據(不清楚原有數據)
    b=('英語平均分: {0}').format(int(test['英語'].mean()))
    f.write("\n")
    f.write(b)
import urllib.request
for i in range(1,6):
    urls='https://www.qidian.com/free/all?orderId=&vip=hidden&style=1&pageSize=20&siteid=1&pubflag=0&hiddenField=1&page={0}'.format(i)
    res=requests.get(url=urls).content.decode('utf-8')
    ele=etree.HTML(res)
    #文章標題,可以用來下一環節的目錄名稱
    for i in range(1,21):
        name=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[2]/h4/a/text()'.format(i))
        print(name)
        #小說的作者
        next_url=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[2]/h4/a/@href'.format(i))
        print(next_url)
        author=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[2]/p[1]/a[1]/text()'.format(i))
        #小說分類
        label1=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[2]/p[1]/a[2]/text()'.format(i))
        label2=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[2]/p[1]/a[3]/text()'.format(i))
#         filename = '{0}.txt'.format(name)
#         with open(filename,'w') as f: # 如果filename不存在會自動創建, 'w'表示寫數據,寫之前會清空文件中的原有數據!
#             a=("書名{0}******作者名{1}******分類{2}{3}").format(name,author,label1,label2)
#             f.write(a)
#             f.write("\n--------------------------")
#     #     #小說的封面
        img=ele.xpath('//*[@id="free-channel-wrap"]/div/div/div[2]/div[2]/div/ul/li[{0}]/div[1]/a/img/@src'.format(i))
        img2=['http:'+ ele for ele in img][0]
        print(img2)
        path='image/'
        urllib.request.urlretrieve(img2,''.join([path,'{0}.jpg']).format(name))
['數風流人物']
['//book.qidian.com/info/1017596768']
http://bookcover.yuewen.com/qdbimg/349573/1017596768/150
['蓋世雙諧']
['//book.qidian.com/info/1017371184']
http://bookcover.yuewen.com/qdbimg/349573/1017371184/150
['我是神話創世主']
['//book.qidian.com/info/1017514184']
http://bookcover.yuewen.com/qdbimg/349573/1017514184/150
['重生創業時代']
['//book.qidian.com/info/1016320879']
http://bookcover.yuewen.com/qdbimg/349573/1016320879/150
['基本劍術']
['//book.qidian.com/info/1016096305']
http://bookcover.yuewen.com/qdbimg/349573/1016096305/150
['庶族無名']
['//book.qidian.com/info/1017361973']
http://bookcover.yuewen.com/qdbimg/349573/1017361973/150
['從火影開始掌控時間']
['//book.qidian.com/info/1017621987']
http://bookcover.yuewen.com/qdbimg/349573/1017621987/150
['妖靈保護協會']
['//book.qidian.com/info/1017549911']
http://bookcover.yuewen.com/qdbimg/349573/1017549911/150
['我在足壇瘋狂刷錢']
['//book.qidian.com/info/1017503698']
http://bookcover.yuewen.com/qdbimg/349573/1017503698/150
['萬族之劫']
['//book.qidian.com/info/1018027842']
http://bookcover.yuewen.com/qdbimg/349573/1018027842/150
['從九龍奪嫡開始']
['//book.qidian.com/info/1017625899']
http://bookcover.yuewen.com/qdbimg/349573/1017625899/150
['戰爭工坊']
['//book.qidian.com/info/1017512762']
http://bookcover.yuewen.com/qdbimg/349573/1017512762/150
['我真不想當惡魔']
['//book.qidian.com/info/1017390905']
http://bookcover.yuewen.com/qdbimg/349573/1017390905/150
['我的帝國無雙']
['//book.qidian.com/info/1017737715']
http://bookcover.yuewen.com/qdbimg/349573/1017737715/150
['仙魔編輯器']
['//book.qidian.com/info/1017696480']
http://bookcover.yuewen.com/qdbimg/349573/1017696480/150
['都市大進化時代']
['//book.qidian.com/info/1017661402']
http://bookcover.yuewen.com/qdbimg/349573/1017661402/150
['文娛璀璨']
['//book.qidian.com/info/1012621275']
http://bookcover.yuewen.com/qdbimg/349573/1012621275/150
['大國重坦']
['//book.qidian.com/info/1017558916']
http://bookcover.yuewen.com/qdbimg/349573/1017558916/150
['我有一座藏武樓']
['//book.qidian.com/info/1017434674']
http://bookcover.yuewen.com/qdbimg/349573/1017434674/150
['位面練級大師']
['//book.qidian.com/info/1017559230']
http://bookcover.yuewen.com/qdbimg/349573/1017559230/150
['木葉之波風家的崛起']
['//book.qidian.com/info/1017556476']
http://bookcover.yuewen.com/qdbimg/349573/1017556476/150
['大宋很野蠻']
['//book.qidian.com/info/1017512048']
http://bookcover.yuewen.com/qdbimg/349573/1017512048/150
['科技樹保姆']
['//book.qidian.com/info/1017675321']
http://bookcover.yuewen.com/qdbimg/349573/1017675321/150
['諸天最強女主']
['//book.qidian.com/info/1017702416']
http://bookcover.yuewen.com/qdbimg/349573/1017702416/150
['孤島諜戰']
['//book.qidian.com/info/1017496873']
http://bookcover.yuewen.com/qdbimg/349573/1017496873/150
['開局50歲我還可以火三年']
['//book.qidian.com/info/1017570591']
http://bookcover.yuewen.com/qdbimg/349573/1017570591/150
['超品命師']
['//book.qidian.com/info/1017694591']
http://bookcover.yuewen.com/qdbimg/349573/1017694591/150
['深宵酒館']
['//book.qidian.com/info/1017180943']
http://bookcover.yuewen.com/qdbimg/349573/1017180943/150
['我投籃實在太準了']
['//book.qidian.com/info/1018126504']
http://bookcover.yuewen.com/qdbimg/349573/1018126504/150
['洪荒之我不是哪吒']
['//book.qidian.com/info/1017374273']
http://bookcover.yuewen.com/qdbimg/349573/1017374273/150
['賽博英雄傳']
['//book.qidian.com/info/1018180913']
http://bookcover.yuewen.com/qdbimg/349573/1018180913/150
['鋼鐵城市']
['//book.qidian.com/info/1017639935']
http://bookcover.yuewen.com/qdbimg/349573/1017639935/150
['西南崛起']
['//book.qidian.com/info/1016182259']
http://bookcover.yuewen.com/qdbimg/349573/1016182259/150
['詭異遊戲空間']
['//book.qidian.com/info/1017664765']
http://bookcover.yuewen.com/qdbimg/349573/1017664765/150
['楚門狼']
['//book.qidian.com/info/1017589032']
http://bookcover.yuewen.com/qdbimg/349573/1017589032/150
['神祕讓我強大']
['//book.qidian.com/info/1018023786']
http://bookcover.yuewen.com/qdbimg/349573/1018023786/150
['我要做閣老']
['//book.qidian.com/info/1018002065']
http://bookcover.yuewen.com/qdbimg/349573/1018002065/150
['以力服人']
['//book.qidian.com/info/1018164861']
http://bookcover.yuewen.com/qdbimg/349573/1018164861/150
['放怪物一條生路不行嗎']
['//book.qidian.com/info/1018171817']
http://bookcover.yuewen.com/qdbimg/349573/1018171817/150
['獨行諸天末日']
['//book.qidian.com/info/1018044323']
http://bookcover.yuewen.com/qdbimg/349573/1018044323/150
['帝國梟色']
['//book.qidian.com/info/1017690656']
http://bookcover.yuewen.com/qdbimg/349573/1017690656/150
['我的1999年']
['//book.qidian.com/info/1017749812']
http://bookcover.yuewen.com/qdbimg/349573/1017749812/150
['楚氏贅婿']
['//book.qidian.com/info/1018195792']
http://bookcover.yuewen.com/qdbimg/349573/1018195792/150
['從靈氣復甦到末法時代']
['//book.qidian.com/info/1018230018']
http://bookcover.yuewen.com/qdbimg/349573/1018230018/150
['我能拉低別人的智商']
['//book.qidian.com/info/1018219521']
http://bookcover.yuewen.com/qdbimg/349573/1018219521/150
['我真沒想成大佬']
['//book.qidian.com/info/1018094243']
http://bookcover.yuewen.com/qdbimg/349573/1018094243/150
['天命螢惑']
['//book.qidian.com/info/1017494444']
http://bookcover.yuewen.com/qdbimg/349573/1017494444/150
['無限地球衛士']
['//book.qidian.com/info/1018337057']
http://bookcover.yuewen.com/qdbimg/349573/1018337057/150
['百歲大爺激活修仙系統']
['//book.qidian.com/info/1018198459']
http://bookcover.yuewen.com/qdbimg/349573/1018198459/150
['我的分身是玉皇大帝']
['//book.qidian.com/info/1018339789']
http://bookcover.yuewen.com/qdbimg/349573/1018339789/150
['日本戰國走一遭']
['//book.qidian.com/info/1012757932']
http://bookcover.yuewen.com/qdbimg/349573/1012757932/150
['重生寫推理小說']
['//book.qidian.com/info/1016350338']
http://bookcover.yuewen.com/qdbimg/349573/1016350338/150
['廢土修真的日常']
['//book.qidian.com/info/1016234812']
http://bookcover.yuewen.com/qdbimg/349573/1016234812/150
['食戟之蓋世龍廚']
['//book.qidian.com/info/1016075145']
http://bookcover.yuewen.com/qdbimg/349573/1016075145/150
['卡塞爾裏的混血君王']
['//book.qidian.com/info/1015940062']
http://bookcover.yuewen.com/qdbimg/349573/1015940062/150
['大田園']
['//book.qidian.com/info/1017249858']
http://bookcover.yuewen.com/qdbimg/349573/1017249858/150
['開局八百個火影']
['//book.qidian.com/info/1017469084']
http://bookcover.yuewen.com/qdbimg/349573/1017469084/150
['柯南之我不是蛇精病']
['//book.qidian.com/info/1017470457']
http://bookcover.yuewen.com/qdbimg/349573/1017470457/150
['我死了也變強了']
['//book.qidian.com/info/1016937210']
http://bookcover.yuewen.com/qdbimg/349573/1016937210/150
['我真不是仙二代']
['//book.qidian.com/info/1017596129']
http://bookcover.yuewen.com/qdbimg/349573/1017596129/150
['李朝萬古一逆賊']
['//book.qidian.com/info/1015407245']
http://bookcover.yuewen.com/qdbimg/349573/1015407245/150
['我本初唐']
['//book.qidian.com/info/1013429012']
http://bookcover.yuewen.com/qdbimg/349573/1013429012/150
['我的火影真是太穩健了']
['//book.qidian.com/info/1017256868']
http://bookcover.yuewen.com/qdbimg/349573/1017256868/150
['我有好多復活幣']
['//book.qidian.com/info/1017380601']
http://bookcover.yuewen.com/qdbimg/349573/1017380601/150
['我的生活能開掛']
['//book.qidian.com/info/1016519510']
http://bookcover.yuewen.com/qdbimg/349573/1016519510/150
['從火影開始的鍛造師']
['//book.qidian.com/info/1017501005']
http://bookcover.yuewen.com/qdbimg/349573/1017501005/150
['砂隱的崛起之路']
['//book.qidian.com/info/1017387894']
http://bookcover.yuewen.com/qdbimg/349573/1017387894/150
['回到明朝做昏君']
['//book.qidian.com/info/1017224028']
http://bookcover.yuewen.com/qdbimg/349573/1017224028/150
['渾沌記']
['//book.qidian.com/info/3267635']
http://bookcover.yuewen.com/qdbimg/349573/3267635/150
['我真不想當聖師']
['//book.qidian.com/info/1017456326']
http://bookcover.yuewen.com/qdbimg/349573/1017456326/150
['苦境武學系統']
['//book.qidian.com/info/1017381974']
http://bookcover.yuewen.com/qdbimg/349573/1017381974/150
['漫威之我是防火女']
['//book.qidian.com/info/1017523110']
http://bookcover.yuewen.com/qdbimg/349573/1017523110/150
['機戰世界']
['//book.qidian.com/info/1017442662']
http://bookcover.yuewen.com/qdbimg/349573/1017442662/150
['這款遊戲絕對有問題']
['//book.qidian.com/info/1017371463']
http://bookcover.yuewen.com/qdbimg/349573/1017371463/150
['民國之遠東鉅商']
['//book.qidian.com/info/1017287530']
http://bookcover.yuewen.com/qdbimg/349573/1017287530/150
['湖人有個孫大聖']
['//book.qidian.com/info/1017349860']
http://bookcover.yuewen.com/qdbimg/349573/1017349860/150
['刺客伍六七之劍客陸九']
['//book.qidian.com/info/1017483282']
http://bookcover.yuewen.com/qdbimg/349573/1017483282/150
['阿拉德的不正經救世主']
['//book.qidian.com/info/1017377808']
http://bookcover.yuewen.com/qdbimg/349573/1017377808/150
['在超神學院的那些年']
['//book.qidian.com/info/1017546226']
http://bookcover.yuewen.com/qdbimg/349573/1017546226/150
['有個沙雕血族老婆是什麼體驗']
['//book.qidian.com/info/1017435917']
http://bookcover.yuewen.com/qdbimg/349573/1017435917/150
['當醫生遇上不正經系統']
['//book.qidian.com/info/1017087484']
http://bookcover.yuewen.com/qdbimg/349573/1017087484/150
['如何在推理番中裝好人']
['//book.qidian.com/info/1017596228']
http://bookcover.yuewen.com/qdbimg/349573/1017596228/150
['三國從救曹操老爹開始']
['//book.qidian.com/info/1017374000']
http://bookcover.yuewen.com/qdbimg/349573/1017374000/150
['回檔在2008']
['//book.qidian.com/info/1017562274']
http://bookcover.yuewen.com/qdbimg/349573/1017562274/150
['從主播開始成爲巨星']
['//book.qidian.com/info/1016506925']
http://bookcover.yuewen.com/qdbimg/349573/1016506925/150
['神豪從願望成真開始']
['//book.qidian.com/info/1017422310']
http://bookcover.yuewen.com/qdbimg/349573/1017422310/150
['文體之路']
['//book.qidian.com/info/1017205414']
http://bookcover.yuewen.com/qdbimg/349573/1017205414/150
['時停在玄幻世界']
['//book.qidian.com/info/1017165730']
http://bookcover.yuewen.com/qdbimg/349573/1017165730/150
['漫威裏的旅法師']
['//book.qidian.com/info/1017341840']
http://bookcover.yuewen.com/qdbimg/349573/1017341840/150
['漫威之電影大破壞']
['//book.qidian.com/info/1017497343']
http://bookcover.yuewen.com/qdbimg/349573/1017497343/150
['我可以無限裝備']
['//book.qidian.com/info/1017500099']
http://bookcover.yuewen.com/qdbimg/349573/1017500099/150
['無限流生存遊戲']
['//book.qidian.com/info/1017257783']
http://bookcover.yuewen.com/qdbimg/349573/1017257783/150
['諸天配角交流羣']
['//book.qidian.com/info/1016861661']
http://bookcover.yuewen.com/qdbimg/349573/1016861661/150
['這個刺客有毛病']
['//book.qidian.com/info/1017433918']
http://bookcover.yuewen.com/qdbimg/349573/1017433918/150
['我真的重生了']
['//book.qidian.com/info/1017596673']
http://bookcover.yuewen.com/qdbimg/349573/1017596673/150
['從UP主開始']
['//book.qidian.com/info/1016419324']
http://bookcover.yuewen.com/qdbimg/349573/1016419324/150
['老婆的神級陪練']
['//book.qidian.com/info/1017442346']
http://bookcover.yuewen.com/qdbimg/349573/1017442346/150
['大明王冠']
['//book.qidian.com/info/1016942258']
http://bookcover.yuewen.com/qdbimg/349573/1016942258/150
['我是自己的頭號黑粉']
['//book.qidian.com/info/1016566684']
http://bookcover.yuewen.com/qdbimg/349573/1016566684/150
['鬥羅之黃猿鬥羅']
['//book.qidian.com/info/1017795366']
http://bookcover.yuewen.com/qdbimg/349573/1017795366/150

這是兩個代碼需要分割開,第二個只需要傳入兩個參數就可以,一個是number_book(起點文學書都有個id號) name(這個主要創建文件夾使用)

import requests
import re
from bs4 import BeautifulSoup
from requests.exceptions import *
import random
import json
import time
import os
import sys
#定義要爬的文章的序號
number_book='1016350338'
name='重生寫推理小說'
def random_user_agent():
    list = ['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/44.0.2403.155 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36',
            'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36']
    seed = random.randint(0, len(list)-1)
    return list[seed]
 
def getJson():
    url = 'https://book.qidian.com/ajax/book/category?_csrfToken=lpPyO6EWOZoz5LggKFp43eq7jbMf8WfSF2ndrCca&bookId='+number_book
    headers = {'User-Agent': random_user_agent(),
               'Referer': 'https://book.qidian.com/info/'+number_book,
               'Cookie': '_csrfToken=BXnzDKmnJamNAgLu4O3GknYVL2YuNX5EE86tTBAm;newstatisticUUID=1564467217_1193332262; qdrs=0%7C3%7C0%7C0%7C1; showSectionCommentGuide=1; qdgd=1; lrbc=1013637116%7C436231358%7C0%2C1003541158%7C309402995%7C0; rcr=1013637116%2C1003541158; bc=1003541158%2C1013637116; e1=%7B%22pid%22%3A%22qd_P_limitfree%22%2C%22eid%22%3A%22qd_E01%22%2C%22l1%22%3A4%7D; e2=%7B%22pid%22%3A%22qd_P_free%22%2C%22eid%22%3A%22qd_A18%22%2C%22l1%22%3A3%7D'
    }
    try:
        res = requests.get(url=url, params=headers)
        if res.status_code == 200:
            json_str = res.text
            list = json.loads(json_str)['data']['vs']
            response = {
                'VolumeId_List': [],
                'VolumeNum_List': []
            }
            for i in range(len(list)):
                json_str = json.dumps(list[i]).replace(" ", "")
                volume_id = re.search('.*?"vId":(.*?),', json_str, re.S).group(1)
                volume_num = re.search('.*?"cCnt":(.*?),', json_str, re.S).group(1)
                response['VolumeId_List'].append(volume_id)
                response['VolumeNum_List'].append(volume_num)
            return response
        else:
            print('No response')
            return None
    except ReadTimeout:
        print("ReadTimeout!")
        return None
    except RequestException:
        print("請求頁面出錯!")
        return None
 
def getPage(VolId_List, VolNum_List):
    '''
    通過卷章Id找到要爬取的頁面,並返回頁面html信息
    :param VolId_List: 卷章Id列表
    :param VolNum_List: 每一卷含有的章節數量列表
    :return:
    '''
    size = VolId_List
    for i in range(2):
        path = name+'//卷' + str(i + 1)
        mkdir(path)
#         https://book.qidian.com/info/1014218975#Catalog
        url = 'https://read.qidian.com/hankread/'+number_book+'/'+VolId_List[i]
        print('\n當前訪問路徑:'+url)
        headers = {
            'User-Agent': random_user_agent(),
            'Referer': 'https://book.qidian.com/info/3144877',
            'Cookie': 'e1=%7B%22pid%22%3A%22qd_P_hankRead%22%2C%22eid%22%3A%22%22%2C%22l1%22%3A3%7D; e2=%7B%22pid%22%3A%22qd_P_hankRead%22%2C%22eid%22%3A%22%22%2C%22l1%22%3A2%7D; _csrfToken=BXnzDKmnJamNAgLu4O3GknYVL2YuNX5EE86tTBAm; newstatisticUUID=1564467217_1193332262; qdrs=0%7C3%7C0%7C0%7C1; showSectionCommentGuide=1; qdgd=1; e1=%7B%22pid%22%3A%22qd_P_limitfree%22%2C%22eid%22%3A%22qd_E01%22%2C%22l1%22%3A4%7D; e2=%7B%22pid%22%3A%22qd_P_free%22%2C%22eid%22%3A%22qd_A18%22%2C%22l1%22%3A3%7D; rcr=3144877%2C1013637116%2C1003541158; lrbc=3144877%7C52472447%7C0%2C1013637116%7C436231358%7C0%2C1003541158%7C309402995%7C0; bc=3144877'
        }
        try:
            res = requests.get(url=url, params=headers)
            if res.status_code == 200:
                print('第'+str(i+1)+'卷已開始爬取:')
                parsePage(res.text, url, path, int(VolNum_List[i]))
            else:
                print('No response')
                return None
        except ReadTimeout:
            print("ReadTimeout!")
            return None
        except RequestException:
            print("請求頁面出錯!")
            return None
        time.sleep(3)
 
def parsePage(html, url, path, chapNum):
    '''
    解析小說內容頁面,將每章內容寫入txt文件,並存儲到相應的卷目錄下
    :param html: 小說內容頁面
    :param url: 訪問路徑
    :param path: 卷目錄路徑
    :return: None
    '''
    if html == None:
        print('訪問路徑爲'+url+'的頁面爲空')
        return
    soup = BeautifulSoup(html, 'lxml')
    ChapInfoList = soup.find_all('div', attrs={'class': 'main-text-wrap'})
    alreadySpiderNum = 0.0
    for i in range(len(ChapInfoList)):
        sys.stdout.write('\r已爬取{0}'.format('%.2f%%' % float(alreadySpiderNum/chapNum*100)))
        sys.stdout.flush()
        time.sleep(0.5)
        soup1 = BeautifulSoup(str(ChapInfoList[i]), 'lxml')
        ChapName = soup1.find('h3', attrs={'class': 'j_chapterName'}).span.string
        ChapName = re.sub('[\/:*?"<>|]', '', ChapName)
        if ChapName == '無題':
            ChapName = '第'+str(i+1)+'章 無題'
        filename = path+'//'+ChapName+'.txt'
        readContent = soup1.find('div', attrs={'class': 'read-content j_readContent'}).find_all('p')
        for item in readContent:
            paragraph = re.search('.*?<p>(.*?)</p>', str(item), re.S).group(1)
            save2file(filename, paragraph)
        alreadySpiderNum += 1.0
    sys.stdout.write('\r已爬取{0}'.format('%.2f%%' % float(alreadySpiderNum / chapNum * 100)))
 
 
def save2file(filename, content):
    with open(r''+filename, 'a', encoding='utf-8') as f:
        f.write(content+'\n')
        f.close()
 
def mkdir(path):
    '''
    創建卷目錄文件夾
    :param path: 創建路徑
    :return: None
    '''
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)
    else:
        print('路徑'+path+'已存在')
 
def main():
    response = getJson()
    if response != None:
        VolId_List = response['VolumeId_List']
        VolNum_List = response['VolumeNum_List']
        getPage(VolId_List, VolNum_List)
    else:
        print('無法爬取該小說!')
    print("小說爬取完畢!")
 
if __name__ == '__main__':
    main()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章