python Thrading demo - 線程

python Thrading demo - 線程



pyhon 多線程、多進程

線程與子進程的區別

進程和線程的區別在於粒度不同, 進程之間的變量(或者說是內存)是不能直接互相訪問的, 而線程可以, 線程一定會依附在某一個進程上執行.我舉個例子, 你在Windows下開一個IE瀏覽器, 這個IE瀏覽器是一個進程. 你用瀏覽器去打開一個pdf, IE就去調用Acrobat去打開, 這時Acrobat是一個獨立的進程, 就是IE的子進程.而IE自己本身同時用同一個進程開了2個網頁, 並且同時在跑兩個網頁上的腳本, 這兩個網頁的執行就是IE自己通過兩個線程實現的.值得注意的是, 線程仍然是IE的內容, 而子進程Acrobat嚴格來說就不屬於IE了, 是另外一個程序.
之所以是IE的子進程, 只是受IE調用而啓動的而已.
Linux系統的實現打破了純粹的進程與純粹的線程之間的差異。在Linux系統下二者是本質一致的

密集型
https://blog.csdn.net/qq_33020901/article/details/80207594

計算密集型
IO密集型

併發和並行
https://blog.csdn.net/qq_27825451/article/details/78850336
併發:同一時間間隔交替進行
並行:同時進行
併發和並行其實概念上最核心的區別就在於是否“同時”

並行是同時

【Python3之多線程】
https://www.cnblogs.com/smallmars/p/7149507.html
結論:
多線程用於IO密集型,如socket,爬蟲,web
多進程用於計算密集型,如金融分析



demo_1


# -*- coding: utf-8 -*-

import threading, time


def run(num):
    pass
    print("subThread({}) is start...".format(threading.current_thread().name))

    time.sleep(0.5)
    print(num)
    time.sleep(0.5)

    print("subThread({}) is stop.".format(threading.current_thread().name))


if __name__ == '__main__':
    pass
    print("mainThreading({}) is starting...".format(threading.current_thread().name))

    # create subThreading
    """
    target=function
    name = subThradingName
    args = subThreading input args(tupe)
    """
    subThreading = threading.Thread(target=run, name='runThreading', args=(1,))
    subThreading.start()

    # wait the mainThreading stop,then subThreading is stop
    subThreading.join() # if not write this line ,the mainThreading is stop befor subThreading

    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_2 多線程

  1. 定義全局變量,便於資源共享
  2. 不定義線程鎖。輸出數字混亂
  3. 定義線程函數,並聲明全局變量的使用範圍,使用with對鎖操作
# -*- coding: utf-8 -*-

import threading

num = 100

def run(n):
    global num

    for i in range(1000000):
        num = num + n
        num = num - n


if __name__ == '__main__':
    t1 = threading.Thread(target=run, args=(6, ))
    t2 = threading.Thread(target=run, args=(9, ))

    t1.start()
    t2.start()

    t1.join()
    t2.join()

    print("num = {}".format(num))
    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_3 多線程_Lock

  1. 定義全局變量,便於資源共享
  2. 定義線程鎖,只有當一個線程結束後,鎖纔會被釋放,鎖不被線程共享
  3. 定義線程函數,並聲明全局變量的使用範圍,使用with對鎖操作
# -*- coding: utf-8 -*-

import threading

num = 100
lock = threading.Lock()

def run(n):
    global num

    for i in range(1000000):
        with lock:
            num = num + n
            num = num - n


if __name__ == '__main__':
    t1 = threading.Thread(target=run, args=(6, ))
    t2 = threading.Thread(target=run, args=(9, ))

    t1.start()
    t2.start()

    t1.join()
    t2.join()

    print("num = {}".format(num))
    print("mainThreading({}) is stop.".format(threading.current_thread().name))

demo_4 一起過馬路

  1. 湊夠3人才能過馬路
    bar = threading.Barrier(3)
# -*- coding: utf-8 -*-

import threading, time

# 湊夠3人才能過馬路
bar = threading.Barrier(3)


def run():
    print("{} is starting...".format(threading.current_thread().name))

    time.sleep(1)
    bar.wait()

    print("{} is end.".format(threading.current_thread().name))


if __name__ == '__main__':
    for i in range(5):
        threading.Thread(target=run).start()

demo_5 Threading.Time

定時線程

# -*- coding: utf-8 -*-

import threading

def run():
    print('-*' * 10)
    print("runThreading is end.")


if __name__ == '__main__':
    print("mainThreading is start...")
    timeThreading = threading.Timer(3, run)
    timeThreading.start()
    timeThreading.join()
    print("mainThreading is end.")




多線程爬取demo

業務流

業務流

#!/usr/bin/python3
# -*- coding:utf-8 -*-
# @Software  : PyCharm
# @CreateTime: 2019-12-23 09:36
# @Author    : spider
# @File      : pyThread

import sys
import time
import pprint
import json
import re
from lxml import etree

import requests

import threading
from queue import Queue

"""
2類線程:3個下載,3個解析
內容隊列:下載線程往隊列中put數據,解析線程從隊列中get數據
url隊列: 下載線程從url隊列get數據
寫數據:上鎖,保證文件不能同時被修改
"""

g_crawl_list = []
g_parse_list = []

class CrawThread(threading.Thread):
    def __init__(self, name, page_queue, data_queue):
        super(CrawThread, self).__init__()
        self.name = name
        self.page_queue = page_queue
        self.data_queue = data_queue
        self.url = r"{}"
        self.hreaders = {}

    def run(self):
        print("{} ---------- crawl_thread start".format(self.name))
        while True:
            if self.page_queue.empty:
                break
            # 從隊列中取出頁碼
            page = self.page_queue.get()

            # 拼接url,發送請求
            url = self.url.format(page)
            res = requests.get(url, headers=self.hreaders)
            if res.ok:
                # 將響應內容存放到data_queue
                self.data_queue.put(res.text)
        print("{} ---------- crawl_thread stop".format(self.name))



class ParserThread(threading.Thread):
    def __init__(self, name, data_queue, fp, lock):
        super(ParserThread, self).__init__()
        self.name = name
        self.data_queue = data_queue
        self.lock = lock
        self.fp = fp

    def run(self):
        while True:
            if self.data_queue.empty():
                break
        print("{} ---------- parse_thread start".format(self.name))
        # 從data_queue中取出一頁數據
        pageSourceHtml = self.data_queue.get()
        # 解析內容
        self.parse_content(pageSourceHtml)
        print("{} ---------- parse_thread stop".format(self.name))

    def parse_content(self, pageSourceHtml):
        tree = etree.HTML(pageSourceHtml)
        li_list = tree.xpath("//li")

        items = []
        for oli in li_list:
            title = 'title'
            imgLink = 'imgLink'
            item = {
                'title':title,
                'imgLink':imgLink,
            }
            items.append(item)
        # write to jsonFile
        self.lock.acquire()
        self.fp.write(json.dumps(items), ensure_acsii=False)
        self.lock.release()

def function():
    print("in function...")
    print("-=" * 90)
    try:
        pass
    except Exception as e:
        print('\nLine_{:0>5d} in {} - {}'.format(
            sys._getframe().f_lineno, __file__, e))
    finally:
        pass

def create_queue():
    # 創建 頁碼 隊列
    page_queue = Queue()
    for page in range(1, 11):
        page_queue.put(page)

    # 創建 內存 隊列
    data_queue = Queue()
    return page_queue, data_queue

def create_crawl_thread(page_queue, data_queue):
    crawl_name = ['crawlThread1', 'crawlThread2', 'crawlThread3']
    for name in crawl_name:
        tCrawl = CrawThread(name, page_queue, data_queue)
        g_crawl_list.append(tCrawl)

def create_parser_thread(data_queue, fp, lock):
    crawl_name = ['parseThread1', 'parseThread2', 'parseThread3']
    for name in crawl_name:
        tParse = ParserThread(name, data_queue, fp, lock)
        g_parse_list.append(tParse)

def main():
    # 打開文件
    fp = open('jian.json', 'a', encoding='utf8')
    # 創建鎖
    lock = threading.Lock()
    # 創建隊列函數
    page_queue, data_queue = create_queue()

    # 創建 採集 線程
    create_crawl_thread(page_queue, data_queue)
    # 創建 解析 線程
    create_parser_thread(data_queue, lock, fp)

    # 啓動所有 採集 線程
    for tCrawl in g_crawl_list:
        tCrawl.start()
    # 啓動所有 解析 線程
    for tPrase in g_parse_list:
        tPrase .start()
        
    # 主線程等待子線程結束
    for tCrawl in g_crawl_list:
        tCrawl.join()
    for tPrase in g_parse_list:
        tPrase .join()



if __name__ == '__main__':
    print("in startMain...")
    print("-=" * 90)
    main()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章