背景
selenium 配置代理需要在啓動的時候配置,如果代理失效或者不可用,切換需要重啓,浪費時間。
思路解決方案
可以使用搭建一個代理中轉服務,讓selenium連接固定的中轉服務,中轉服務選擇可用的代理。
代碼實現
代碼實現就簡單了,中轉服務將收到的網絡請求,直接轉發給可用的代理就行,如果代理連接失敗,則切換到新的代理。
以下是python實現
import socket
import _thread
from threading import Thread
from redis.client import Redis
from adsl_proxy_pool import ADSLProxyPool
redis_client = Redis(host="127.0.0.12", port=6383,
password="password", db=5)
adsl_proxy_pool = ADSLProxyPool(redis_client=redis_client)
cache_proxy = None
def communicate(sock1, sock2):
"""
socket之間的數據交換
:param sock1:
:param sock2:
:return:
"""
try:
while 1:
data = sock1.recv(1024)
if not data:
return
sock2.sendall(data)
except:
pass
def handle(client):
"""
處理連接進來的客戶端
:param client:
:return:
"""
global cache_proxy
timeout = 60
client.settimeout(timeout)
proxy_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
while 1:
try:
# 獲取代理
host_info = get_proxy()
proxy_server.connect(host_info)
proxy_server.settimeout(timeout)
break
except:
cache_proxy = None
pass
# 轉發到proxy_server
_thread.start_new_thread(communicate, (client, proxy_server))
# 將proxy_server響應給client
communicate(proxy_server, client)
def get_proxy():
# 先緩存proxy
global cache_proxy
# 從代理池獲取可用代理
proxy = adsl_proxy_pool.get_proxy() if cache_proxy is None else cache_proxy
if not proxy:
return get_proxy()
else:
cache_proxy = proxy
proxy = proxy.replace("http://", "").split(":")
return proxy[0], int(proxy[1])
def serve(ip, port):
"""
代理服務
:param ip:
:param port:
:return:
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind((ip, port))
s.listen(10)
print('\n local proxy server started...\n')
while True:
conn, addr = s.accept()
_thread.start_new_thread(handle, (conn,))
def start_local_proxy_server(server="127.0.0.1", port=8081):
"""
啓動本地
:return:
"""
proxy_thread = Thread(target=serve, name='APScheduler', args=(server, port,))
proxy_thread.daemon = True
proxy_thread.start()
if __name__ == '__main__':
IP = "127.0.0.1"
PORT = 8080
serve(IP, PORT)
selenium 直接配置本地代理
start_local_proxy_server("127.0.0.1", 8081)
agent = Agent(chrome_args=get_chrome_opt_list(),
proxy="http://127.0.0.1:8081",
verbose=True,
chrome_driver_path='/usr/local/bin/chromedriver')
-----------
options = uc.ChromeOptions()
if self.proxy:
self.logger.info("use proxy:{}".format(self.proxy))
options.add_argument(f'--proxy-server={self.proxy}')
總結
一開始考慮,使用nginx來中轉代理服務,但想到nginx本質就是一箇中轉,從方便性和可控性考慮,直接用代碼實現更好。