代理IP获取器
很简单的原理,找一个提供代理IP的网站,然后爬取。并用得到的代理IP尝试访问百度,如果访问成功就可以用。攒够10个代理IP输出并退出程序
代码:
import requests, bs4 from bs4 import BeautifulSoup ls = [] def makeSoup(url): try: r = requests.get(url, timeout=30) r.raise_for_status() return BeautifulSoup(r.text, 'lxml') except: pass def check(ip, port): url = 'http://www.baidu.com/' kv = {'http:' + port: ip} try: r = requests.get(url, proxies=kv, timeout=30) r.raise_for_status() return 1 except: print('×') return 0 def getList(soup): try: trs = soup.tbody.find_all('tr') for tr in trs: td = tr.find_all('td') ip = td[0].string port = td[1].string if check(ip, port): ls.append([ip, port]) print('√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√√') except: pass for page in range(1, 3000): print('第', page, '页') url = 'http://www.kuaidaili.com/free/inha/' + str(page) + '/' soup = makeSoup(url) getList(soup) if len(ls) > 10: break for ip in ls: print(ip)