某360查詢網址同一ip頻繁訪問的話會彈出驗證碼,
該驗證碼特點:get獲取的驗證碼圖片和網頁展現的不一致(所以無法通過圖像識別破解);該驗證碼是針對ip,輸入驗證碼後會給出一個大概一天有效期的cookie,在此階段可以大量頻繁訪問了。
獲取cookie:谷歌瀏覽器(輸入驗證碼之後) >>F12 >> Application(Network右邊)>> Cookies>> 然後最好從Network也獲取個cookies對比一下,然後測試一下哪幾個是關鍵的,去掉多餘的>> 我這裏篩選的有QiHooGUID、__guid、HSPK >> 然後就可以在get方式裏使用這個cookie了,此cookie只對相同ip下的訪問生效。。
cookiesList = [{'QiHooGUID': '4B8401DA0CF874A18E4BFF757EB90AE1.1559194913641','__guid': '15484592.1937232903330800000.1559194915338.2622','HSPK': 'e233796b2ddb62b46b356063af241c0.1559194953.1'},
{'QiHooGUID': '0EFD4F5BD0052491974CAED4F706DF45.1559179861562','__guid': '15484592.814590072574967200.1559179861246.2769','HSPK': '160ff6e1afae22e60d0321036f692f9.1559179998.1'},
{'QiHooGUID': '117C9371BC637FDC5C410F7B05C04CBD.1559179860932','__guid': '15484592.1180757161332586200.1559179861157.9478','HSPK': '96db7fb23d1c854555fba80e4c827bc.1559180056.1'},
{'QiHooGUID': '397CF97029E2031C7FE185F323BC4591.1559179862317','__guid': '15484592.4234635624280693000.1559179861608.329','HSPK': '788f84234c87aff1807a434c968aa67.1559180114.1'},
{'QiHooGUID': '7677BC494C9AEB59BF338C34BE065908.1559179860461','__guid': '15484592.772992706594705000.1559179860839.9106','HSPK': '054120f3987b7a26dff500e099e238a.1559180167.1'}
]
headersPool = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3",
"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12",
"Opera/9.27 (Windows NT 5.2; U; zh-cn)",
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13",
"Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 ",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.9 (KHTML, like Gecko) Chrome/ Safari/530.9 ",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/27.0.1453.93 Chrome/27.0.1453.93 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
]
while True:
try:
urlAim = 'https://www.so.com/s?q={0}'.format(telNbr)
print(urlAim)
headers = {"User-Agent": random.choice(headersPool)}
cookies = random.choice(cookiesList)
response = requests.get(urlAim, cookies=cookies, headers=headers)
selector = etree.HTML(response.text)
response.close()
telNbrShow = telAttribution = telType = telMarkNbr = ''
seeyzm = selector.xpath('//*[@id="container"]/div[1]/p[1]/text()') # 判斷是否出現驗證碼
# ['親,系統檢測到您操作過於頻繁。']
if seeyzm:
print('請檢查第 {0} cookie是否過期...'.format(cookiesList.index(cookies)))
else:
print('當前cookie {0} '.format(cookiesList.index(cookies)))