python多線程與單線程之間的差距

對於python多線程與單線程之間有多少差距呢?今天用一個小例子比較一下。

說明:爬取代理ip網站ip並用代理ip訪問某網站,看執行後的用時多少。

單線程版
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup as bs
import re
import time
def proxy_list(mbUrl):
    headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"}
    url='http://www.xicidaili.com/nn/'
    r=requests.get(url=url,headers=headers)
    soup=bs(r.content)
    datas=soup.find_all(name='tr',attrs={'class':re.compile('|[^odd]')})
    i = 0
    for data in datas:
        soup_proxy_content=bs(str(data))
        soup_proxys=soup_proxy_content.find_all(name='td')
        #for i in[1,2,5]:
        #   print soup_proxys[i].string
        ip=str(soup_proxys[1].string)
        port=str(soup_proxys[2].string)
        types=str(soup_proxys[5].string)
        proxy_test(mbUrl,ip,port,types)
        #thread.start_new_thread(proxy_test,(mbUrl,ip,port,types))
def proxy_test(url,ip,port,types):
    proxy={}
    proxy[types.lower()]='%s:%s'%(ip,port)
    #proxy={'http':'106.46.136.24:808'}
    try:
            r=requests.get(url,proxies=proxy,timeout=3) #會把每個代理ip都測試一遍,超時設置爲六秒
            ip_content=re.findall(r'\[(.*?)\]',r.text)[0]#匹配[]中的ip
            #print r.text
            if ip==ip_content:#判斷代理是否測試成功
                #lock.acquire()#線程鎖
                print proxy
                #lock.release()
    except Exception,e:
        #print e
        pass
#測試一下
if __name__=='__main__':
    time_start=time.time()
    #lock = thread.allocate_lock()  # 定義一個線程鎖
    proxy_list("http://1212.ip138.com/ic.asp")
    time_finished=time.time()-time_start
    print time_finished

運行結果:

{'http': '39.85.13.253:9999'}

{'http': '113.124.9.229:8998'}
{'http': '124.88.67.52:843'}
{'http': '123.132.170.185:9999'}
{'http': '123.132.179.153:9999'}
{'http': '106.46.136.145:808'}
{'http': '111.124.245.121:9999'}
{'http': '218.17.43.228:3128'}
{'http': '58.23.103.106:9999'}
{'http': '42.84.103.140:9999'}
{'http': '140.240.245.167:9999'}
{'http': '106.46.136.96:808'}

85.9160001278


-------------------------------------------------------------------

多線程版
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup as bs
import re
import threading,thread
import time
def proxy_list(mbUrl):
    headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0"}
    url='http://www.xicidaili.com/nn/'
    r=requests.get(url=url,headers=headers)
    soup=bs(r.content)
    datas=soup.find_all(name='tr',attrs={'class':re.compile('|[^odd]')})
    datalen=len(datas)
    threads=[] #定義一個線程隊列
    ip = []
    port = []
    types = []
    for i in range(datalen):
        soup_proxy_content=bs(str(datas[i]))
        soup_proxys=soup_proxy_content.find_all(name='td')
        ip.append(str(soup_proxys[1].string))
        port.append(str(soup_proxys[2].string))
        types.append(str(soup_proxys[5].string))
    for i in range(datalen):
        t=threading.Thread(target=proxy_test,args=(mbUrl,ip[i],port[i],types[i],))
        threads.append(t)
    for i in range(datalen):
        threads[i].start()
        #time.sleep(0.2)
    for i in range(datalen):
        threads[i].join()
def proxy_test(url,ip,port,types):
    proxy={}
    proxy[types.lower()]='%s:%s'%(ip,port)
    try:
            r=requests.get(url,proxies=proxy,timeout=3) #會把每個代理ip都測試一遍,超時設置爲六秒
            ip_content=re.findall(r'\[(.*?)\]',r.text)[0]#匹配[]中的ip
            if ip==ip_content:#判斷代理是否測試成功
                lock.acquire()  # 線程鎖
                print proxy
                lock.release()
    except Exception,e:
        #print e
        pass
#測試一下
if __name__=='__main__':
    time_start=time.time()
    lock = thread.allocate_lock()  # 定義一個線程鎖
    proxy_list("http://1212.ip138.com/ic.asp")
    time_finished=time.time()-time_start
    print time_finished

運行結果:
{'http': '123.132.170.185:9999'}
{'http': '180.107.249.162:9999'}
{'http': '110.244.202.201:9999'}
{'http': '123.52.220.243:9999'}
{'http': '121.31.143.237:8123'}
{'http': '110.73.2.236:8123'}
{'http': '106.46.136.24:808'}
{'http': '110.72.41.108:8123'}
{'http': '140.240.245.167:9999'}
{'http': '218.17.43.228:3128'}
{'http': '220.166.241.213:8118'}
3.36299991608

多線程版的僅用了3.3秒,而單線程版的卻用了86秒,足足差了26倍。這之間的差距一目瞭然。

發佈了55 篇原創文章 · 獲贊 67 · 訪問量 9萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章