多進程爬蟲示例

import requests
import time
from multiprocessing import Process,Queue
link_list=[]
link_head="http://"
f=open(r"C:\Users\Heisenberg\Desktop\newfile.txt", "r")
web_list=f.readlines()
for each in web_list[0:10]:
    link=link_head+each.replace('\n','')
    link_list.append(link)
start=time.time()
class MyProcess(Process):
    def __init__(self,q):
        Process.__init__(self)
        self.q=q
    def run(self):
        #print('hello')
        print("start ",self.pid)
        while not self.q.empty():
            crawler(self.q)
        print('exiting ',self.pid)

def crawler(q):
    url=q.get(timeout=2)
    try:
        r=requests.get(url,timeout=20)
        print(q.qsize(),r.status_code,url)
    except Exception as e:
        print(q.qsize(),url,'Error ',e)
if __name__=='__main__':
    ProcessNames=['Process-1','Process-2','Process-3']
    process=[]
    workQueue=Queue(10)
    for url in link_list:
        #print(url)
        workQueue.put(url)
    for i in range(0,3):
        p=MyProcess(workQueue)
        #p.daemon=True
        p.start()
        #print('hello')
        p.join()
    end=time.time()
    print('total time ',end-start)
    print('main process exit')

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章