多線程截取html中相應的數據

多線程截取html中相應的數據

#coding=gbk
import re,urllib,time
import linecache,threading
from bs4 import BeautifulSoup as soup
mlock = threading.Lock()
a = []
def get_content(ip_content):
                
    '獲取HTML中需要的內容'
                
    global a
    pythoner = urllib.urlopen("http://hk.bing.com/search?q=ip%3A125.39.240.113&\
qs=n&form=QBLH&filt=all&pq=ip%{0}&sc=0-2&sp=-1&sk=" .format(ip_content) )
    content = pythoner.read()
    pythoner.close()
    c = soup(content)
    data = c.find_all("div",{"class":"sb_meta"})
    mlock.acquire()
    for x in data:
        da = re.split('/',x.cite.text)[0]
        if da not in a:
            a.append(da)
            data = open('c:\mylog.txt','a')
            print >> data,da
    mlock.release()        
                
                
                
def thread_geturl(process,info):
                
    '根據IP地址生成相應的進程'
                
    for x in info:
        d = threading.Thread(target=process,args=[x])
        d.start()
                
                
if __name__ == '__main__':
    ip_list = [ x for x in linecache.getlines(r'c:\iplist.txt')]
    thread_geturl(get_content,ip_list)

IP_list

155641384.jpg

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章