代理IP獲取器
很簡單的原理,找一個提供代理IP的網站,然後爬取。並用得到的代理IP嘗試訪問百度,如果訪問成功就可以用。攢夠10個代理IP輸出並退出程序
代碼:
import requests, bs4 from bs4 import BeautifulSoup ls = [] def makeSoup(url): try: r = requests.get(url, timeout=30) r.raise_for_status() return BeautifulSoup(r.text, 'lxml') except: pass def check(ip, port): url = 'http://www.baidu.com/' kv = {'http:' + port: ip} try: r = requests.get(url, proxies=kv, timeout=30) r.raise_for_status() return 1 except: print('×') return 0 def getList(soup): try: trs = soup.tbody.find_all('tr') for tr in trs: td = tr.find_all('td') ip = td[0].string port = td[1].string if check(ip, port): ls.append([ip, port]) print('√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√') except: pass for page in range(1, 3000): print('第', page, '頁') url = 'http://www.kuaidaili.com/free/inha/' + str(page) + '/' soup = makeSoup(url) getList(soup) if len(ls) > 10: break for ip in ls: print(ip)