python 下載小說

使用beautifulsoup 爬取小說,並整合到txt中。

"""
======================
@Auther:CacheYu
@Time:2019/9/16:16:09
======================
"""
# -*- coding:utf-8 -*-
import urllib.request
import urllib.error
import bs4
from bs4 import BeautifulSoup

def readdown(url):
    soup = BeautifulSoup(urllib.request.urlopen(url), 'html.parser')
    fixed_html = soup.prettify()
    table = soup.find('table', attrs={'id': 'tabletxt'})
    # # if isinstance(table, bs4.element.Tag):
    # tds = table.find_all('td')
    i = table.find('i').string
    print(i)
    div = table.find_all('div', attrs={'class': 'txt'})
    content = div[0].get_text().strip()
    couple = i + '\n' + content
    return couple

page_url = 'https://www.dushiyanqing.net/book/90/90659/index.html'
book = r'E:\story\誰把風聲聽成離別歌.txt'

soup = BeautifulSoup(urllib.request.urlopen(page_url), 'html.parser')
fixed_html = soup.prettify()

table = soup.find('table')
if isinstance(table, bs4.element.Tag):
    tds = table.find_all('td', attrs={'class': 'k4'})
    default_encode = 'utf-8'

    print('開始寫入,請稍等……')
    with open(book, 'r+', encoding=default_encode) as target_file_writer:
        for td in tds:
            a = td.find('a')
            if a is not None:
                href = 'https://www.dushiyanqing.net' + a.get('href')
                # print(href)
                target_file_writer.write(readdown(href))
                # time.sleep(random.randint(5, 10))
    print('已完成!\n目錄地址爲:', book)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章