多線程截取html中相應的數據
#coding=gbk import re,urllib,time import linecache,threading from bs4 import BeautifulSoup as soup mlock = threading.Lock() a = [] def get_content(ip_content): '獲取HTML中需要的內容' global a pythoner = urllib.urlopen("http://hk.bing.com/search?q=ip%3A125.39.240.113&\ qs=n&form=QBLH&filt=all&pq=ip%{0}&sc=0-2&sp=-1&sk=" .format(ip_content) ) content = pythoner.read() pythoner.close() c = soup(content) data = c.find_all("div",{"class":"sb_meta"}) mlock.acquire() for x in data: da = re.split('/',x.cite.text)[0] if da not in a: a.append(da) data = open('c:\mylog.txt','a') print >> data,da mlock.release() def thread_geturl(process,info): '根據IP地址生成相應的進程' for x in info: d = threading.Thread(target=process,args=[x]) d.start() if __name__ == '__main__': ip_list = [ x for x in linecache.getlines(r'c:\iplist.txt')] thread_geturl(get_content,ip_list)
IP_list