多線程使用 python

原創

2020-04-20 11:06

代碼：

# -*- coding: utf-8 -*-
# @Time : 2020/4/19 21:54
# @Author : Oneqq
# @File : 24.threads的使用.py
# @Software: PyCharm

from threading import Thread
from queue import Queue
from fake_useragent import UserAgent
import requests
from lxml import etree


class CrawlInof(Thread):
    def __init__(self, url_queue, html_queue):
        Thread.__init__(self)
        self.url_queue = url_queue
        self.html_queue = html_queue

    def run(self):
        headers = {
            "User-Agent": UserAgent().random
        }
        while url_queue.empty() == False:
            response = requests.get(url_queue.get(), headers=headers)
            if response.status_code == 200:
                html_queue.put(response.text)


class ParseInfo(Thread):
    def __init__(self, html_queue):
        Thread.__init__(self)
        self.html_queue = html_queue

    def run(self):
        while self.html_queue.empty() == False:
            e = etree.HTML(self.html_queue.get())
            span_contents = e.xpath('//div[@class="content"]/span[1]')
            with open('duanzi.txt', 'a', encoding='utf-8') as f:
                for span in span_contents:
                    info = span.xpath('string(.)')
                    f.write(info)


if __name__ == '__main__':
    base_url = "https://www.qiushibaike.com/text/page/{}/"
    url_queue = Queue()
    html_queue = Queue()
    for i in range(1, 14):
        new_url = base_url.format(i)
        url_queue.put(new_url)
    crawl_list = []
    for i in range(0, 3):
        crawl1 = CrawlInof(url_queue, html_queue)
        crawl_list.append(crawl1)
        crawl1.start()
    for crawl in crawl_list:
        crawl.join()

    parse_list = []
    for i in range(0, 3):
        parseInfo = ParseInfo(html_queue)
        parse_list.append(parseInfo)
        parseInfo.start()
    for parse in parse_list:
        parse.join()

結果：

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

多線程使用 python

Win10 LTSC 2019 安裝後的一些步驟

推薦2款開源、美觀的WinForm UI控件庫

NET9 AspnetCore將整合OpenAPI的文檔生成功能而無需三方庫

在Linux下管理MySQL的大小寫敏感性

今日影評動態爬蟲

scrapy 簡單使用 pipelines,items

pymongo簡單使用

農業新聞內容獲取 python

雲打碼案例 python

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結