首先模拟登陆:
$username = "abcde";
$pwd = "123123";
$url = "https://example.com/Login/index";
$fields = array(
"username" =>$username,
"userpwd" =>$pwd
);
$cookie_file = "pic.cookie"; //cookie文件存放位置(保存登陆后的cookie用于在抓取登陆后页面使用)
$ch = curl_init ();
// 设置URL和相应的选项
curl_setopt ( $ch , CURLOPT_URL , $url);
curl_setopt ( $ch,CURLOPT_POST,true);
curl_setopt ( $ch,CURLOPT_POSTFIELDS,$fields);
curl_setopt ( $ch, CURLOPT_USERAGENT, $GLOBALS ['user_agent'] ); // 模拟用户使用的浏览器
curl_setopt ( $ch, CURLOPT_AUTOREFERER, 1 ); // 自动设置Referer
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER , 0 ); // 禁用后cURL将终止从服务端进行验证https。
curl_setopt ( $ch, CURLOPT_TIMEOUT, 30 ); // 设置超时限制防止死循环
curl_setopt ( $ch, CURLOPT_HEADER, 0 ); // 显示返回的Header区域内容
curl_setopt ( $ch, CURLOPT_BINARYTRANSFER ,1 );
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
$html = curl_exec ( $ch );
if(curl_errno ( $ch ) == 0){
//其他操作
}else{
echo curl_error($ch);//输出错误码
}
登陆成功后抓取需要登录才能访问的页面
$url="https://www.example.com/user/index";
$ch = curl_init ();
$cookie_file = "pic.cookie"; //cookie文件存放位置
// 设置URL和相应的选项
curl_setopt ( $ch , CURLOPT_URL , $url);
curl_setopt ( $ch, CURLOPT_USERAGENT, $GLOBALS ['user_agent'] ); // 模拟用户使用的浏览器
curl_setopt ( $ch, CURLOPT_AUTOREFERER, 1 ); // 自动设置Referer
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER , 0 ); //
curl_setopt ( $ch, CURLOPT_TIMEOUT, 30 ); // 设置超时限制防止死循环
curl_setopt ( $ch, CURLOPT_HEADER, 0 ); // 显示返回的Header区域内容
curl_setopt ( $ch, CURLOPT_BINARYTRANSFER ,1 );
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER ,1 );
$html = curl_exec ( $ch );
if(curl_errno ( $ch ) == 0){
//其他操作
}else{
echo curl_error($ch);//输出错误码
}