ThreadPool線程池的幾種姿勢比較

from multiprocessing.pool import ThreadPool
#from multiprocessing.dummy import Pool as ThreadPool
#這兩個ThreadPool好像區別不大,方法基本一樣
import time

def test1(x):
    print("x1:", x)
    try:
        time.sleep(0.1)
        if x == 8:
            time.sleep(10)
        elif x == 12:
            time.sleep(5)
        elif x == 49:
            time.sleep(20)
            print('這裏等待時間最長!')
        else:
            return
    except Exception as e:
        print('error1:', e)
    raise Exception('timeout error1 ', x)  # 只要沒走return都會觸發此異常

def test2(x):
    print("x2:", x)
    try:
        time.sleep(0.1)
        if x == 9:
            time.sleep(1)
        elif x == 11:
            time.sleep(5)
        elif x == 40:
            time.sleep(10)
        else:
            return
    except Exception as e:
        print('error2:', e)
    raise Exception('timeout error2 ', x)

def task(x):
    ret1 = test1(x)
    ret2 = test2(x)
    print('ret1 and ret2: ', ret1, ret2)
    #下面這些條件判斷其實是不必要的,因爲異常發生前get的結果已經是None了,而異常發生時這裏的else語句沒機會執行
    if ret1 is None and ret2 is None:
        return
    else:
        return 1
        #raise Exception('test1 or test2 is timeout!')



#apply_async---error
def dome0():
    time1 = time.time()
    result1 = []
    pool = ThreadPool(20)
    for i in range(50):
        result = pool.apply_async(func=task, args=(i,))
        result1.append(result)

    pool.close()
    pool.join()
    time2 = time.time()
    print("time:", time2 - time1)
    # 結果分析
    #無法接收子線程的異常,主線程中不會報錯,這樣的實現是不合理的


#apply_async
def dome1():
    time1 = time.time()
    result1 = []
    result2 = []
    pool = ThreadPool(20)
    for i in range(50):
        result = pool.apply_async(func=task, args=(i,))
        # 懷疑會在 x==9 處報異常,因爲sleep時間最短且可能在同一線程池中被處理,然而實際情況是在x==8處報異常
        # 在 if 後面最小的x 處中斷 error is ('timeout error1 ', 8),而與sleep時間無關
        result1.append(result)

    # 下面兩句會影響總時間,只有wait時爲10s,加上join時20s,可以看出join會運行所有的子線程
    # pool.close()
    # pool.join()

    # print('=====if====1') #這種方式能檢測出子線程的超時異常
    # try:
    #     for i in result1:
    #         i.wait()  # 等待線程函數執行完畢
    #         print('success:',i.successful())
    #         print('ready:',i.ready())
    #         print('get:',i.get())   # 線程函數返回值
    #         if i.ready():  # 線程函數是否已經啓動了
    #             if i.successful():  # 線程函數是否執行成功
    #                 result2.append(i.get())
    # except Exception as e:
    #     print("error is %s" % str(e))
    # print('=====if====1')

    print('=====while====2')
    # 與1基本一樣,放到tempest中再比較一下,線程的等待情況,基本不要用while,目前來看和if真的沒區別
    try:
        for i in result1:
            i.wait(timeout=60)
            while i.ready():
                print("ready %s" % i.ready())
                print("successful %s" % i.successful())
                print("i.get %s" % i.get())
                if i.successful():
                    result2.append(i.get())
                    break
    except Exception as e:
        print("error is %s" % str(e))
    print('=====while====2')

    print("result2 and len: ", (result2, len(result2)))
    time2 = time.time()
    print("time:", time2 - time1)
    # 結果分析
    # error is ('timeout error1 ', 8)
    # result2 and len: ([None, None, None, None, None, None, None, None], 8)
    # time: 20.839099645614624
    # 線程按 x 的進入順序觸發異常,而與sleep時間無關
    # 可以i.get()得到異常發生前的返回,這一點是map類方法無法做到的
    # 異常發生後其他線程並沒有中斷,還是執行最長的sleep(20),導致最後time 爲20s

#map_async
def dome2():
    result2 = []
    time1 = time.time()
    pool = ThreadPool(20)
    my_iter = range(50)
    try:
        result = pool.map_async(task, my_iter)
        # 懷疑會在 x==9 處報異常,因爲sleep時間最短且可能在同一線程池中被處理
        # 在 if 後面時間最短處中斷 error is ('timeout error2 ', 9),而與sleep時間有關
        # map和map_async 不按 x 的進入順序處理
        #下面兩句加不加好像不影響,wait已經等待了
        pool.close()
        pool.join()
        result.wait()  # 等待所有線程函數執行完畢

        #放在這裏打印可以提前預警,知道錯誤產生的原因
        print("ready %s" % result.ready())
        print("successful %s" % result.successful())
        print("i.get %s" % result.get())

        print('=====if====1')
        if result.ready():  # 線程函數是否已經啓動了
            if result.successful():  # 線程函數是否執行成功
                result2.append(result.get())
                # 以下函數在全部線程都執行成功時可以重複執行,返回值相同,但異常時只能執行一次successful/get
                print("ready %s" % result.ready())
                print("successful %s" % result.successful())
                print("i.get %s" % result.get())
                print("ready %s" % result.ready())
                print("successful %s" % result.successful())
                print("i.get %s" % result.get())
                # error is ('timeout error2 ', 9)
                # result2 and len:  ([], 0)
        print('=====if====1')

        # print('=====while====2')
        # while result.ready():
        #     if result.successful():
        #         result2.append(result.get())
        #         break
        #         #while 並不合適在map類的方法中,因爲map類是完成全部線程的運行後才返回數據
        #         #如果非要使用,while外面需要有 result.successful()、result.get()的提前調用
        #         #來結束線程結果不能返回的異常。(因爲線程一直是ready==True狀態)
        # print('=====while====2')

    except Exception as e:
        print("error is %s" % str(e))
    print("result2 and len: ", (result2, len(result2)))
    time2 = time.time()
    print("time:", time2 - time1)
    # 結果分析
    # result2 and len: ([], 0)
    # time: 20.790918588638306
    # map會跑全部的線程,x==9時觸發異常的等待時間最短
    # 異常發生後其他線程並沒有中斷,還是執行最長的sleep(20),導致最後time 爲20s
    # 一旦有線程異常,result.get()就得不到數據了

#map
def dome3():
    time1 = time.time()
    pool = ThreadPool(20)
    my_iter = range(50)
    try:
        result = pool.map(task, my_iter)
        pool.close()
        pool.join()
        print('result : ', result)
    except Exception as e:
        print("error is %s" % str(e))
    time2 = time.time()
    print("time:", time2 - time1)
    # 結果分析
    #error is ('timeout error2 ', 9)
    #time: 20.84999394416809
    # 也是併發執行,x==9時觸發異常的等待時間最短
    # 異常發生後其他線程並沒有中斷,還是執行最長的sleep(20),導致最後time 爲20s
    # 一旦有線程異常,result = pool.map(task, my_iter)就會Exception,得不到返回數據

# apply
def dome4():
    time1 = time.time()
    pool = ThreadPool(20)
    results = []
    try:
        for i in range(50):
            result = pool.apply(task, (i,))
            results.append(result)
        print(results)
    except Exception as e:
        print("error is %s" % str(e))
    time2 = time.time()
    print("time:", time2 - time1)
    #結果分析
    #error is ('timeout error1 ', 8)
    #time: 11.943454027175903
    #一步一步執行,並沒有併發,基本和單線程一樣,異常發生後就不再往下執行

#map_async---上面的map_async其實不需要if判斷的
def dome5():
    result2 = []
    time1 = time.time()
    pool = ThreadPool(20)
    my_iter = range(50)
    try:
        result = pool.map_async(task, my_iter)
        # 懷疑會在 x==9 處報異常,因爲sleep時間最短且可能在同一線程池中被處理
        # 在 if 後面時間最短處中斷 error is ('timeout error2 ', 9),而與sleep時間有關
        # map和map_async 不按 x 的進入順序處理
        #下面兩句加不加好像不影響,wait已經等待了
        pool.close()
        pool.join()
        result.wait()  # 等待所有線程函數執行完畢
        #放在這裏打印可以觸發異常,不然就算子線程異常了,主線程也不會知道
        print("ready %s" % result.ready())
        print("successful %s" % result.successful())
        print("i.get %s" % result.get())
    except Exception as e:
        print("error is %s" % str(e))
    print("result2 and len: ", (result2, len(result2)))
    time2 = time.time()
    print("time:", time2 - time1)
    # 結果分析
    #對比dome2,這裏的更簡潔一些,不需要多餘的if判斷,子線程有異常直接接收就好

if __name__ == "__main__":
    dome0()

總結:

  1. 不建議使用while,因爲和if基本一樣的功能,主線程會等待子線程,不需要輪詢
  2. apply_async中加上join會增加異常發生時總的時間(會把全部的線程都運行了,異常發生時會中斷異常線程,其他線程不影響,但get的結果只收集異常發生前的線程返回)。異常會發生在最快進入sleep的線程中,get返回結果是x最先進入sleep觸發異常前的線程(x=8,sleep(10))的結果,總消耗時間在不加join情況下也以此x爲準,但當加join後總消耗時間會以最長sleep的爲參考,也就是說會運行所有的線程,但get的結果還是第一個異常觸發前的x<8的其他線程的結果
  3. map_async中加不加join一樣。map總是把全部線程都執行,異常會發生在sleep最短(x=9,sleep(1))的線程中,返回結果時以sleep最短的異常爲準,但總消耗時間以sleep最長的爲準,發生異常就get不到結果
  4. 無論是map還是apply,若只是多線程發送請求不關注結果,可以不用加result.wait()、result.successful()、result.get(),如果要關注就必須加上,這樣才能接收子線程的異常,這就是異步。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章