selenium很常見的一個用法:通過cookie跳過登陸和圖片/短信/驗證碼驗證等,直接處於登陸狀態。
看似簡單,但操作起來cookie卻總有問題。
首先用豆瓣做測試,整個思路就是:
1. 先使用selenium加載一個webdriver,通過driver加載豆瓣首頁,點擊登陸進行登陸驗證,登陸成功後會保存用戶信息到cookie
2. 將瀏覽器cookie保存到本地,然後刪除瀏覽器cookie,刷新頁面,發現用戶已經登出
3. 將本地cookie添加進瀏覽器cookie中,再次刷新頁面,用戶已經處於登陸狀態
踩到的坑有:
1. 豆瓣登陸頁面內嵌了一個iframe,因此要獲取到這個frame中的元素,要先切換一下框架,用switch_to.frame()就可以
2. 從瀏覽器中取出的cookie格式爲
[{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
{"domain": "", "path": "", "expiry": "", "name": "", "httpOnly": "", "secure": "", "value": ""},
......]
類似這種格式,有很多個cookie,不知道哪個纔是新生成的,試着取第一個或者最後一個添加,都沒有用,所以這種情況下只好全部添加進cookie,相當於原封不動地還原cookie
3. 原封不動還原cookie又遇到了格式問題,取出cookie後寫入文件C:\Users\Lulu\Desktop\cookie.txt"中,是將cookies列表中的cookie一個個取出單獨寫入的,一個cookie爲一行,格式如下:
因此讀出時必須一行行讀取,然後每個cookie單獨做添加處理
4. 此時又出現了問題,cookie的expiry字段總是在添加的時候報錯“invalid argument: invalid 'expiry”,這個錯誤是由於expiry格式不符合要求引起的,說格式不能爲float格式,網上有人將expiry置爲空“”,試了一下格式依然不對。所以我又抱着測試的心態把expiry都截成整數,雖然不報格式錯誤了,但不出所料,這樣添加的cookie不能起到認證用戶的作用,豆瓣依然是未登陸狀態。
5. 到這裏已經有點崩潰了,明明網上搜“cookie登陸”代碼一大堆,大家的方法大同小異,爲什麼一樣的代碼我卻不可以?我覺得根源還是出在expiry上,不知道是不是瀏覽器版本的原因,其他人很少有遇到invalid "expiry"這種錯誤。最後終於在stackoverflow上找到一個解決辦法,stackoverflow的同道中人,最後一個回答,答主表示他通過刪掉expiry這個字段解決了問題......這個方法雖然很那個,但嘗試了一下果然可以......
完整代碼如下,註釋掉的是我走過的彎路
import json
from selenium import webdriver
from time import sleep
import pytest
class Test_selenimu1:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://douban.com")
@pytest.mark.skip(msg="先不用")
def test_cookies(self):
driver = self.driver
driver.implicitly_wait(10)
#新版豆瓣的源代碼特性,加了一個iframe框架,直接獲取只能取到整個頁面的元素,因此要先切換一下frame
#首先切換一下框架,切換到輸入密碼這個框架上來
driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0])
driver.find_element_by_xpath("/html/body/div[1]/div[1]/ul[1]/li[2]").click()
driver.find_element_by_id("username").send_keys("***********")
driver.find_element_by_id("password").send_keys("***********")
#等待10秒,手動登陸
sleep(10)
cookies = driver.get_cookies()
with open(r"C:\Users\Lulu\Desktop\cookie.txt", 'w') as file1:
for cookie in cookies:
file1.write(json.dumps(cookie) + "\n")
# @pytest.mark.skip(msg="先不用")
def test_case2(self):
driver = self.driver
driver.get(r"https://douban.com")
driver.implicitly_wait(10)
with open(r"C:\Users\Lulu\Desktop\cookie.txt", 'r') as file2:
res = file2.readlines()
for line in res:
# print(line)
cookie = json.loads(line)
# print(cookie)
if "expiry" in cookie:
del cookie["expiry"]
driver.add_cookie(cookie)
# for line in file2.readlines():
# res = line.strip()
# print(res)
# mycookie = json.loads(res)
# print(mycookie)
# driver.add_cookie(mycookie)
# res = file2.readline()
# mycookies = json.loads(res)
driver.refresh()
sleep(3)
def teardown(self):
self.driver.quit()
遺留問題:從fiddler和瀏覽器控制檯讀到的cookie跟通過selenuim接口取出來的cookie不一樣,網上找到一篇文章https://www.jianshu.com/p/32e4a7cc2ddd,說這些格式不同的cookie是可以互相轉化的,雖然還不知道要怎麼轉化|= =,以後再研究吧
順帶,又測試了一下百度和b站,這種刪掉expiry的方法都行得通,反正也不是重要字段,只是過期時間就無所謂吧
代碼都扔上來備份吧
from selenium import webdriver
from time import sleep
import json
import pytest
class Test_bilibili:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://passport.bilibili.com/login")
def test_1(self):
driver = self.driver
driver.implicitly_wait(10)
driver.find_element_by_id("login-username").send_keys("**********")
driver.find_element_by_id("login-passwd").send_keys("***********")
driver.find_element_by_xpath('//li[@class="btn-box"]/a').click()
sleep(10)
cookies = driver.get_cookies()
with open(r"C:\Users\Lulu\Desktop\bilibili_cookies.txt", "w") as file:
file.write(json.dumps(cookies))
# for cookie in cookies:
# cookiedict = {
# "domain": "bilibili.com",
# # "expiry": int(cookie["expiry"]),
# "httpOnly": cookie["httpOnly"],
# "name": cookie["name"],
# "path": "/",
# "secure": cookie["secure"],
# "value": cookie["value"]
# }
driver.delete_all_cookies()
driver.refresh()
sleep(10)
for t in cookies:
if "expiry" in t:
del t["expiry"]
driver.add_cookie(t)
# driver.add_cookie(cookiedict)
# driver.add_cookie({"domain": domain, "expiry":expiry, "httpOnly": httpOnly, "name": name, "path": path, "secure": secure, "value": value})
driver.refresh()
# print(driver.get_cookies()[0])
sleep(5)
driver.find_element_by_link_text("歷史").click()
sleep(5)
def teardown(self):
self.driver.quit()
from selenium import webdriver
from time import sleep
import json
import pytest
class TestBaidu:
def setup(self):
self.driver = webdriver.Chrome(r"D:\Program Files\chromedriver.exe")
self.driver.get(r"https://www.baidu.com")
def test_baidu_cookie(self):
driver = self.driver
driver.implicitly_wait(10)
driver.find_element_by_xpath("//*[@id='u1']/a[7]").click()
driver.find_element_by_id("TANGRAM__PSP_10__footerULoginBtn").click()
driver.find_element_by_id("TANGRAM__PSP_10__userName").send_keys("**********")
driver.find_element_by_id("TANGRAM__PSP_10__password").send_keys("**********")
sleep(5)
driver.find_element_by_id("TANGRAM__PSP_10__submit").click()
sleep(40)
cookies = driver.get_cookies()
print(cookies)
driver.delete_all_cookies()
driver.refresh()
sleep(5)
for t in cookies:
if 'expiry' in t:
del t["expiry"]
driver.add_cookie(t)
driver.refresh()
sleep(5)
def teardown(self):
self.driver.quit()