採集論壇第一步就是要模擬登陸,由於各個站點登錄表單各不相同,驗證方式又是多種多樣,所以直接提交用戶名密碼 到登錄頁面就比較繁瑣。
所以我們採用cookie來模擬登陸無疑是最佳捷徑。
對象www.discuz.net
用戶名liuyuntest
密碼123456
一 首先我們手工登錄下,記錄下cookie
dznet_cookietime=2592000;dznet_onlineusernum=7816;dznet_sid=QtlC87;
dznet_auth=6bbeCQrzGv4eliNMLgU%2FlGZSpzbrsauGO1l0OBp6VQw5p0bcEg0xd4slYCM2ks%2FL0YCVYSO7XP2z8GMaxkPDUbXZCWft;
checkpm=1
(目前主流瀏覽都有插件查看cookie)
二 採用snoopy模擬登錄
- include("snoopy.php");
- $snoopy = new Snoopy;
- $snoopy->agent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)";//這項是瀏覽器信息,前面你用什麼瀏覽器查看cookie,就用那個瀏覽器的信息(ps:$_SERVER可以查看到瀏覽器的信息)
- $snoopy->referer = "http://www.discuz.net/";
- $snoopy->rawheaders["COOKIE"]= "dznet_cookietime=2592000;dznet_onlineusernum=7816;dznet_sid=QtlC87;
- dznet_auth=6bbeCQrzGv4eliNMLgU%2FlGZSpzbrsauGO1l0OBp6VQw5p0bcEg0xd4slYCM2ks%2FL0YCVYSO7XP2z8GMaxkPDUbXZCWft;
- checkpm=1";
- $snoopy->fetch("http://www.discuz.net/post.php?action=newthread&fid=2&extra=page%3D1");//獲取發貼頁面
- echo $tempCn = $snoopy->results;
三 採用curl 模擬登錄
- $url = "http://www.discuz.net/";
- $useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; TheWorld)";
- $cookie = "dznet_cookietime=2592000;dznet_onlineusernum=7816;dznet_sid=QtlC87;
- dznet_auth=6bbeCQrzGv4eliNMLgU%2FlGZSpzbrsauGO1l0OBp6VQw5p0bcEg0xd4slYCM2ks%2FL0YCVYSO7XP2z8GMaxkPDUbXZCWft;
- checkpm=1";
- $ch= curl_init();
- curl_setopt($ch, CURLOPT_URL,$url);
- curl_setopt($ch, CURLOPT_COOKIE, $cookie);
- curl_setopt($ch, CURLOPT_REFERER, "http://www.discuz.net/index.php");
- curl_setopt($ch, CURLOPT_HEADER, false);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt ($ch, CURLOPT_USERAGENT, $useragent);