我們知道即使我們把header換成某個瀏覽器,但是一直用這個瀏覽器訪問,也會被認定位爬蟲,所以要及時更換瀏覽器
import urllib.request
import random
def load_baidu():
url="http://www.baidu.com"
user_agent_list=[
"Mozilla / 5.0(Windows;U;WindowsNT6.1;en - us) AppleWebKit / 534.50(KHTML, likeGecko) Version / 5.1Safari / 534.50",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11"
]
random_user_agent = random.choice(user_agent_list)
request = urllib.request.Request(url)
requests.add_header("User-Agent",random_user_agent)
print(request.headers)
load_baidu()