首先模擬登陸:
$username = "abcde";
$pwd = "123123";
$url = "https://example.com/Login/index";
$fields = array(
"username" =>$username,
"userpwd" =>$pwd
);
$cookie_file = "pic.cookie"; //cookie文件存放位置(保存登陸後的cookie用於在抓取登陸後頁面使用)
$ch = curl_init ();
// 設置URL和相應的選項
curl_setopt ( $ch , CURLOPT_URL , $url);
curl_setopt ( $ch,CURLOPT_POST,true);
curl_setopt ( $ch,CURLOPT_POSTFIELDS,$fields);
curl_setopt ( $ch, CURLOPT_USERAGENT, $GLOBALS ['user_agent'] ); // 模擬用戶使用的瀏覽器
curl_setopt ( $ch, CURLOPT_AUTOREFERER, 1 ); // 自動設置Referer
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER , 0 ); // 禁用後cURL將終止從服務端進行驗證https。
curl_setopt ( $ch, CURLOPT_TIMEOUT, 30 ); // 設置超時限制防止死循環
curl_setopt ( $ch, CURLOPT_HEADER, 0 ); // 顯示返回的Header區域內容
curl_setopt ( $ch, CURLOPT_BINARYTRANSFER ,1 );
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
$html = curl_exec ( $ch );
if(curl_errno ( $ch ) == 0){
//其他操作
}else{
echo curl_error($ch);//輸出錯誤碼
}
登陸成功後抓取需要登錄才能訪問的頁面
$url="https://www.example.com/user/index";
$ch = curl_init ();
$cookie_file = "pic.cookie"; //cookie文件存放位置
// 設置URL和相應的選項
curl_setopt ( $ch , CURLOPT_URL , $url);
curl_setopt ( $ch, CURLOPT_USERAGENT, $GLOBALS ['user_agent'] ); // 模擬用戶使用的瀏覽器
curl_setopt ( $ch, CURLOPT_AUTOREFERER, 1 ); // 自動設置Referer
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER , 0 ); //
curl_setopt ( $ch, CURLOPT_TIMEOUT, 30 ); // 設置超時限制防止死循環
curl_setopt ( $ch, CURLOPT_HEADER, 0 ); // 顯示返回的Header區域內容
curl_setopt ( $ch, CURLOPT_BINARYTRANSFER ,1 );
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER ,1 );
$html = curl_exec ( $ch );
if(curl_errno ( $ch ) == 0){
//其他操作
}else{
echo curl_error($ch);//輸出錯誤碼
}