import time
import threading
from queue import Queue
class Multithreading(object):
def __init__(self):
"""
初始化隊列
"""
self.list_page_queue = Queue(maxsize=100) # 隊列任務個數上限可設置
self.detail_page_queue = Queue(maxsize=100) # 隊列任務個數上限可設置
def get_list(self):
"""
獲取列表頁
"""
for i in range(1001):
self.list_page_queue.put(i)
def get_detail(self):
"""
獲取詳情頁
"""
while True:
i = self.list_page_queue.get()
ret = f'第{i}頁的詳情頁'
time.sleep(0.1) # 模擬IO延時
self.detail_page_queue.put(ret)
self.list_page_queue.task_done()
def save(self):
"""
保存結果
"""
while True:
ret = self.detail_page_queue.get()
print(f'成功保存{ret}')
self.detail_page_queue.task_done()
def run(self):
"""
實現主要邏輯--->>> 多線程
"""
thread_list = []
# 獲取列表頁
for i in range(1): # 單個線程
t_get_list = threading.Thread(target=self.get_list)
thread_list.append(t_get_list)
# 獲取詳情頁
for i in range(10): # 10個線程
t_get_detail = threading.Thread(target=self.get_detail)
thread_list.append(t_get_detail)
# 保存結果
for i in range(5): # 5個線程
t_save = threading.Thread(target=self.save)
thread_list.append(t_save)
# 線程控制
for t in thread_list:
t.setDaemon(True) # 將每一個子線程設爲守護線程, 主線程不結束, 子線程不結束
t.start() # 開啓每一個子線程
# time.sleep(30) # 等待隊列獲得任務 注意:有時不需要
for q in [self.list_page_queue, self.detail_page_queue]:
q.join() # 讓主線程等待阻塞,等待隊列的任務完成之後再結束
print("任務完成主線程結束!")
if __name__ == '__main__':
web_spider = Multithreading()
web_spider.run()