一、安裝
執行命令:pip install scrapy-fake-useragent
二、使用
1、在middlewares.py中編寫如下類:
from fake_useragent import UserAgent
class RandomUserAgent(object):
# 隨機跟換user-agent
def __init__(self, crawler):
super(RandomUserAgent, self).__init__()
self.ua = UserAgent()
self.ua_type = crawler.settings.get('RANDOM_UA_TYPE', 'random') # 從setting文件中讀取RANDOM_UA_TYPE值
@ classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def process_request(self, request, spider): ###系統電泳函數
def get_ua():
return getattr(self.ua, self.ua_type)
# user_agent_random=get_ua()
request.headers.setdefault('User_Agent', get_ua())
pass
2、在setting.py中啓用
RANDOM_UA_TYPE = 'random'
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
'test.middlewares.RandomUserAgent': 200,
}
三、測試
1、測試代碼
def parse(self, response):
header = response.request.headers['User_Agent']
print(header)
2、測試結果
b'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36'
2019-08-28 19:11:56 [scrapy.core.engine] INFO: Closing spider (finished)