PHP模擬登陸

PHP模擬登陸抓取,不使用cookieJar文件保存cookie,第一次嘗試寫面向對象Mark,自己留着看。其中的幾個難點,password的加密方法,cookie的接連獲取,巧用substr()和strpos取值。

<?php
set_time_limit(120);
date_default_timezone_set('Asia/Shanghai');
class yingji{
    private $cookie="";
    private $username="email";
    private $password="password";
    private $url ="https://host/login";
    private $loginaction="https://host/loginAction";
    private $getcloneEidurl="https://host/monitor/pad/addAttention";
    private $targeturl="https://host/monitor/query-micro-blogs-count";
    private $request_headers=array (  
            'Host' => 'host',  
            'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',  
            'Accept' => '*/*',  
            'Accept-Language' => 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',  
            'Accept-Encoding' => 'gzip, deflate',   
            'X-Requested-With'=>'XMLHttpRequest',
            'Connection' => 'keep-alive'   
        ); 
    private $cloneEid;
    public $keyword;
    public $fromdate;
    public $todate;
    public function __construct($keyword,$fromdate,$todate){
        $this->keyword=$keyword;
        $this->fromdate=$fromdate;
        $this->todate=$todate;
    }
    public function geturl(){
        if($this->cookie==""|$this->cookie==NULL){
            $this::dologin();
        }
        $this::getcloneEid();
        $result =$this::again();
        $times=1;
        while($result=="0"){
            $result=$this::again();
            $times++;
            if($times==3)
                break;
        }
        return $result;
    }

    private function again(){
        $post_data="viewType=day&startTime=".$this->fromdate."&endTime=".$this->todate."&dt=&dtt=day&st=MICRO_BLOG_ALL&fq=%7B%22blogType%22%3A0%2C%22blogStatus%22%3A0%2C%22content%22%3A%22%22%2C%22bloggerVipType%22%3A-1%2C%22minFans%22%3A%220%22%2C%22maxFans%22%3A%22-1%22%2C%22bloggerType%22%3A0%2C%22platformType%22%3A%22MICRO_BLOG_ALL%22%7D&q=".urlencode($this->keyword)."&cloneEid=".$this->cloneEid;
            $result=$this::docurl($this->targeturl,TRUE,$post_data,$this->cookie);
            // var_dump($this->cookie);
            $status=substr($result, 9,3);
            if($status!="200"){
                return "0";
            }
            return substr($result, strpos($result, "count")+8,-2);
    }
    private function getcloneEid(){//獲取最終頁面post的其中一個值
        //$this::dologin();
        $post="at=EVENT&st=MICRO_BLOG_ALL&name=".urlencode($this->keyword)."&keywords=".urlencode($this->keyword);
        $output=$this::docurl($this->getcloneEidurl,TRUE,$post,$this->cookie);
        $this->cloneEid=substr($output, strpos($output, "id")+5,36);
    }
    private function dologin(){//積累cookie
        $this::prelogin();
        $post="client_screen=1440+x+900&langCode=&username=15221197583%40139.com&password=7c2605c596c3002057999999776af6d7";
        $result=$this::docurl($this->loginaction,TRUE,$post,$this->cookie);
        list($header, $body) = explode("\r\n\r\n", $result); 
        // 解析COOKIE 
        preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches); 
        $cookie=substr($header, strrpos($header, "Cookie:")+8,58);
        $this->cookie=substr($this->cookie, 0,40);
        $this->cookie .= $cookie;
    }
    private function prelogin(){//積累cookie
        $url=$this->url;
        $result=$this::docurl($url,FALSE,"",$this->cookie);
        list($header, $body) = explode("\r\n\r\n", $result); 
        // 解析COOKIE 
        // preg_match("/set\-cookie:([^\r\n]*)/i", $header, $matches); 
        $cookie_route=substr($header, strrpos($header, "route"),38);
        $cookie_JSESS=substr($header, strrpos($header, "JSESSIONID"),51);
        // 後面用CURL提交的時候可以直接使用 
        // curl_setopt($ch, CURLOPT_COOKIE, $cookie); 
        $this->cookie =$cookie_route."; ".$cookie_JSESS;
    }
    private function docurl($url,$is_post=FALSE,$postdata="",$cookie=""){
        $ch=curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
        curl_setopt($ch, CURLOPT_HEADER, 1);
        curl_setopt($ch, CURLOPT_HTTPHEADER, $this->request_headers);
        if($is_post){
            curl_setopt($ch, CURLOPT_POST, 1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $postdata);
        }
        if($cookie!=""){
            curl_setopt($ch, CURLOPT_COOKIE, $cookie);
        }
        $output=curl_exec($ch);
        curl_close($ch);
        return $output;
    }
}
//這裏想鏈接數據庫,在外操作數據庫,以便添加或刪除爬蟲任務
$conn=mysqli_connect('localhost','','');
if(!$conn){
    die("連接數據庫失敗".mysqli_error());
}
mysqli_select_db($conn,"");
mysqli_query($conn,"set names utf8");
$query="SELECT * FROM keywords";
$result=mysqli_query($conn,$query);
$daycount1="begin";
while ($row=mysqli_fetch_object($result)) {
    echo "正在抓取".$row->words."...<br/>";
    /*這裏是沒有之前沒有進行過爬蟲的程序*/
    if($row->tempdate=='1'){
        $todate=(strtotime(date("Y-m-d",time()))-1);
        $fromdate =$row->Fromdate;
        if(strtotime(date("Y-m-d",time()))!=strtotime($fromdate)||strtotime(date("Y-m-d",time()))>strtotime($fromdate)){
            $daycount = $row->daycount;
            echo "fromdate:".date("Y-m-d H:i:s",($todate+1-86400))." todate ".date("Y-m-d H:i:s",($todate));
            $a=new yingji($row->words,(1000*($todate+1-86400)),(1000*$todate));
            $count= $a->geturl();
            $daycount.=",".$count;
            unset($a);
            $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount."\" where words=\"".$row->words."\"";
            var_dump($update);
            $resul=mysqli_query($conn,$update);
        }else{
            //do nothing
        }
    }else{
        $fromdate=strtotime($row->Fromdate);
        $times=floor((time()-strtotime($row->Fromdate))/86400);
        for ($i=1; $i <=$times ; $i++) { 
            $a=new yingji($row->words,(1000*$fromdate),(1000*($fromdate+86400-1)));
            $count= $a->geturl();
            echo date("Y-m-d H:i:s",$fromdate)."  to ".date("Y-m-d H:i:s",($fromdate+86400-1))."<br/>";
            $fromdate+=86400;
            $daycount1.=",".$count;
            unset($a);
            ob_flush();
            flush();
            sleep(1);
        }
        $update="UPDATE keywords set tempdate='1' ,daycount=\"".$daycount1."\" where words=\"".$row->words."\"";
        var_dump($update);
        $resul=mysqli_query($conn,$update);
        if($resul){
            $daycount1="begin";
        }
    }
    /*抓取之前的數據到此結束*/
    }
mysqli_close($conn);

Mark給自己看

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章