背景
在1688官網裏面有很多信息是需要登錄才能看得到的,比如商家的聯繫電話等等。那麼我們在抓取它的網頁的時候,肯定是需要維持登錄狀態才能得到對應的內容。這裏面就會涉及到自動登錄的問題。
登錄地址
https://login.1688.com/member/signin.htm
自動登錄方法
1、找到對應的元素,賬號、密碼框。
2、把賬號、密碼值帶進去。
List<string> logininfolist = new List<string>();
string file = "1688Account.json";
if (!File.Exists(file))
{
throw new ArgumentException("1688Account not found");
}
string data = File.ReadAllText(file, Encoding.UTF8);
var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
Random rdinfo = new Random();
int indexinfo = rdinfo.Next(logininfolist.Count);
var modelinfo = logininfolist[indexinfo];
driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
3、模擬點擊提交按鈕操作。
// Thread.Sleep(1000 * 30); //30時間操作
driver.FindElement(By.ClassName("password-login")).Click();
Thread.Sleep(1000 * 30);//30時間操作
4、進入控制檯,驗證是否登陸成功。並記錄Cookies,下次再來的時候,直接使用現成的Cookies,一般有效期可以維持一天多左右。
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 5);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("登錄失敗");
CookieHelp.DeleteCookies();
Console.WriteLine("2");
throw new Exception("重新登錄");
}
driver.Navigate().Refresh();
CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
5、判斷是否有現成的登錄cookies。
driver.Navigate().GoToUrl("https://www.1688.com/");
driver.Manage().Cookies.DeleteAllCookies();
var listCookie = CookieHelp.GetCookie();
if (listCookie != null)
{
logintry = 0;
Console.WriteLine("有現成cookies" + DateTime.UtcNow);
foreach (var item in listCookie)
{
driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
}
Thread.Sleep(2000);
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 2);
完整代碼
/// <summary>
/// 登錄 todo
/// </summary>
/// <param name="_reptilesImageSearchService"></param>
/// <param name="options"></param>
/// <param name="driver"></param>
public void Implement(IReptilesImageSearchService _reptilesImageSearchService, IWebDriver driver)
{
driver.Navigate().GoToUrl("https://www.1688.com/");
driver.Manage().Cookies.DeleteAllCookies();
var listCookie = CookieHelp.GetCookie();
if (listCookie != null)
{
logintry = 0;
Console.WriteLine("有現成cookies" + DateTime.UtcNow);
foreach (var item in listCookie)
{
driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
}
Thread.Sleep(2000);
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 2);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("cookies過期了");
CookieHelp.DeleteCookies();
Console.WriteLine("1");
throw new Exception("重新登錄");
}
}
else
{
if (logintry > 4)
{
Console.WriteLine("登陸次數超出:" + logintry);
throw new Exception("登陸次數超出,退出");
}
logintry++;
Console.WriteLine("無現成cookies" + DateTime.UtcNow);
driver.Navigate().GoToUrl("https://login.1688.com/member/signin.htm");
#region 登錄動作
driver.SwitchTo().Frame(0);
IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
string returnjs = (string)js.ExecuteScript(jsfile);
List<string> logininfolist = new List<string>();
string file = "1688Account.json";
if (!File.Exists(file))
{
throw new ArgumentException("1688Account not found");
}
string data = File.ReadAllText(file, Encoding.UTF8);
var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
Random rdinfo = new Random();
int indexinfo = rdinfo.Next(logininfolist.Count);
var modelinfo = logininfolist[indexinfo];
driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
// Thread.Sleep(1000 * 30); //30時間操作
driver.FindElement(By.ClassName("password-login")).Click();
Thread.Sleep(1000 * 30);//30時間操作
#endregion 登錄動作
driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
Thread.Sleep(1000 * 5);
if (driver.Url.Contains("login.1688.com"))
{
Console.WriteLine("登錄失敗");
CookieHelp.DeleteCookies();
Console.WriteLine("2");
throw new Exception("重新登錄");
}
driver.Navigate().Refresh();
CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
}
Thread.Sleep(1000);
}
注意事項
1、登錄的時候,有時候會出現滑塊驗證碼,這時候一般是使用其他賬號重試或者是在當前的機器手工登錄一次,後面基本就會被信任。
2、使用這段代碼本身就是模擬真實用戶的行爲,最大限度的減少驗證碼出現的機率。
IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
string returnjs = (string)js.ExecuteScript(jsfile);