中間件重寫, 登錄起始開啓有頭模擬,保存cookies後,開啓無頭模擬
示例:
class SeleniumDownloaderMiddleware(object):
def __init__(self):
self.chrome_options = Options()
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--disable-gpu')
self.url = 'http://www.taobao.com'
path = r'E:\LiuLanQi\chromedriver.exe'
# self.driver = webdriver.Chrome(executable_path=path, chrome_options=self.chrome_options)
self.driver = webdriver.Chrome(executable_path=path)
self.load_cookies = False
# def __del__(self):
# self.driver.close()
def process_request(self, request, spider):
if request.meta['page'] == '0':
self.driver.get(url=self.url)
if not os.path.exists('cookies.json'):
time.sleep(30)
dictCookies = self.driver.get_cookies()
jsonCookies = json.dumps(dictCookies)
# 登錄完成後,將cookie保存到本地文件
with open('cookies.json', 'w') as f:
f.write(jsonCookies)
self.driver.close()
else:
if not self.load_cookies:
# 刪除第一次建立連接時的cookie
self.driver.delete_all_cookies()
# 讀取登錄時存儲到本地的cookie
with open('cookies.json', 'r') as f:
listCookies = json.loads(f.read())
for cookie in listCookies:
self.driver.add_cookie({
'domain': '.taobao.com', # 此處xxx.com前,需要帶點
'name': cookie['name'],
'value': cookie['value'],
'path': '/',
'expires': None
})
self.load_cookies = True
# 再次訪問頁面,便可實現免登陸訪問
self.driver.get('https://www.taobao.com')
time.sleep(5)
my_input = self.driver.find_element_by_css_selector('#q')
# 向這個框裏面寫內容
my_input.send_keys('口紅')
time.sleep(3)
button = self.driver.find_element_by_css_selector('#J_TSearchForm > div.search-button > button')
button.click()
time.sleep(5)
bai_botton = self.driver.find_element_by_link_text('紀梵希')
bai_botton.click()
time.sleep(10)
hufu_info = self.driver.find_element_by_css_selector(
'#mainsrp-itemlist > div > div > div:nth-child(1) > div:nth-child(1)')
hufu_info.click()
time.sleep(20)
pinglun = self.driver.find_element_by_css_selector('#J_TabBar > li:nth-child(2)')
pinglun.click()
time.sleep(5)
ping = self.driver.find_element_by_css_selector('#J_Reviews > div > div.rate-grid > table > tbody > tr:nth-child(1) > td.tm-col-master > div.tm-rate-content > div.tm-rate-fulltxt').text
url = self.driver.current_url
page_source = self.driver.page_source
# 根據網頁源代碼,創建htmlresponse對象
# 因爲返回的是文本內容,指定字符編碼格式
time.sleep(5)
response = HtmlResponse(url=url, body=page_source, encoding='utf-8',request=request)
return response