PHP 伪装IP地址 数据采集 GET、POST请求

大家都知道,你采集别人的网站,可能封掉你的IP地址,那么肯定就不能用自己的真实IP地址了,今天我们就浅谈PHP伪装IP地址,进行数据采集!

首先,我用的是TP5框架,把IP伪装及数据采集封装到common.php中了,代码如下:

<?php
// +----------------------------------------------------------------------
// | ThinkPHP [ WE CAN DO IT JUST THINK ]
// +----------------------------------------------------------------------
// | Copyright (c) 2006-2016 http://thinkphp.cn All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: 流年 <[email protected]>
// +----------------------------------------------------------------------

// 应用公共文件
use think\Db;

/*************************************  封装公共方法 *****************************************/
/**
 * 伪装IP 地址 - 抓取数据
 * GET 请求
 * @param $url
 * @return mixed
 */
function pretendIpData($url){
    // 给与IP 段
    $data = array(
        119.120.'.'.rand(1,255).'.'.rand(1,255),
        124.174.'.'.rand(1,255).'.'.rand(1,255),
        116.249.'.'.rand(1,255).'.'.rand(1,255),
        118.125.'.'.rand(1,255).'.'.rand(1,255),
        42.175.'.'.rand(1,255).'.'.rand(1,255),
        124.162.'.'.rand(1,255).'.'.rand(1,255),
        211.167.'.'.rand(1,255).'.'.rand(1,255),
        58.206.'.'.rand(1,255).'.'.rand(1,255),
        117.24.'.'.rand(1,255).'.'.rand(1,255),
        203.93.'.'.rand(1,255).'.'.rand(1,255),
    );
    //随机获取一个IP地址
    $ip = $data[array_rand($data)];
    //模拟来源网址
    $referUrl = "http://www.baidu.com";
    $agentArray=[
        //PC端的UserAgent
        "safari 5.1 – MAC"=>"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
        "safari 5.1 – Windows"=>"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Firefox 38esr"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
        "IE 11"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
        "IE 9.0"=>"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
        "IE 8.0"=>"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
        "IE 7.0"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
        "IE 6.0"=>"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
        "Firefox 4.0.1 – MAC"=>"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Firefox 4.0.1 – Windows"=>"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Opera 11.11 – MAC"=>"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
        "Opera 11.11 – Windows"=>"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
        "Chrome 17.0 – MAC"=>"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
        "傲游(Maxthon)"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
        "腾讯TT"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
        "世界之窗(The World) 2.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
        "世界之窗(The World) 3.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
        "360浏览器"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
        "搜狗浏览器 1.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
        "Avant"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
        "Green Browser"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
    ];
    $userAgent=$agentArray[array_rand($agentArray,1)];  //随机浏览器userAgent
    $header = array(
        'CLIENT-IP:'.$ip,
        'X-FORWARDED-FOR:'.$ip,
    );    //构造ip
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url); //要抓取的网址
    curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER,true);
    curl_setopt($curl, CURLOPT_REFERER, $referUrl);  //模拟来源网址
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent); //模拟常用浏览器的userAgent
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); // 跳过证书检查
    $info = curl_exec($curl);
    return $info;
}

/**
 * 伪装IP 地址 - 抓取数据
 * POST 请求
 * @param $url
 * @param array $data
 * @return mixed
 */
function getPostIpData($url,$data=array()){
    // 给与IP 段
    $ipData = array(
        119.120.'.'.rand(1,255).'.'.rand(1,255),
        124.174.'.'.rand(1,255).'.'.rand(1,255),
        116.249.'.'.rand(1,255).'.'.rand(1,255),
        118.125.'.'.rand(1,255).'.'.rand(1,255),
        42.175.'.'.rand(1,255).'.'.rand(1,255),
        124.162.'.'.rand(1,255).'.'.rand(1,255),
        211.167.'.'.rand(1,255).'.'.rand(1,255),
        58.206.'.'.rand(1,255).'.'.rand(1,255),
        117.24.'.'.rand(1,255).'.'.rand(1,255),
        203.93.'.'.rand(1,255).'.'.rand(1,255),
    );
    //随机获取一个IP地址
    $ip = $ipData[array_rand($ipData)];
    //模拟来源网址
    $referUrl = "http://www.baidu.com";
    $agentArray=[
        //PC端的UserAgent
        "safari 5.1 – MAC"=>"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
        "safari 5.1 – Windows"=>"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
        "Firefox 38esr"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
        "IE 11"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
        "IE 9.0"=>"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
        "IE 8.0"=>"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
        "IE 7.0"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
        "IE 6.0"=>"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
        "Firefox 4.0.1 – MAC"=>"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Firefox 4.0.1 – Windows"=>"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Opera 11.11 – MAC"=>"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
        "Opera 11.11 – Windows"=>"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
        "Chrome 17.0 – MAC"=>"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
        "傲游(Maxthon)"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)",
        "腾讯TT"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
        "世界之窗(The World) 2.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
        "世界之窗(The World) 3.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
        "360浏览器"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
        "搜狗浏览器 1.x"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
        "Avant"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
        "Green Browser"=>"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
    ];
    $userAgent=$agentArray[array_rand($agentArray,1)];  //随机浏览器userAgent
    $header = array(
        'CLIENT-IP:'.$ip,
        'X-FORWARDED-FOR:'.$ip,
    );    //构造ip
    $curl = curl_init(); // 启动一个CURL会话
    curl_setopt($curl, CURLOPT_URL, $url); // 要访问的地址
    curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER,true);
    curl_setopt($curl, CURLOPT_REFERER, $referUrl);  //模拟来源网址
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0); // 对认证证书来源的检查
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0); // 0-跳过证书 1-从证书中检查SSL加密算法是否存在
    curl_setopt($curl, CURLOPT_USERAGENT, $userAgent); // 模拟用户使用的浏览器
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); // 使用自动跳转
    curl_setopt($curl, CURLOPT_AUTOREFERER, 1); // 自动设置Referer
    curl_setopt($curl, CURLOPT_POST, 1); // 发送一个常规的Post请求
    curl_setopt($curl, CURLOPT_POSTFIELDS, $data); // Post提交的数据包
    curl_setopt($curl, CURLOPT_TIMEOUT, 30); // 设置超时限制防止死循环
    $info = curl_exec($curl); // 执行操作
    if (curl_errno($curl)) {
        echo 'Errno'.curl_error($curl);//捕抓异常
    }
    curl_close($curl); // 关闭CURL会话
    return $info; // 返回数据,json格式
}

/**
 * 时间戳转日期 - 精确到毫秒的时间戳
 * @param $time
 * @return false|string
 */
function timeDate($time)
{
    $tag='Y-m-d H:i:s';
    $a = substr($time,0,10);
    $date = date($tag,$a);
    return $date;
}

/**
 * 数字字符串求和
 * @param $numberString
 * @return float|int
 */
function strSum ($numberString) {
    $arr = explode(',',$numberString);
    return array_sum($arr);
}

/**
 * 随机数:1-10
 * @param $num
 * @return array
 */
function randStr($num)
{
    $numbers = range (1,10);
    //shuffle 将数组顺序随即打乱
    shuffle ($numbers);
    //array_slice 取该数组中的某一段
    $result = array_slice($numbers,0,$num);
    //将数组的值按升序排列
    array_multisort($result,SORT_ASC,SORT_NUMERIC);
    return $result;
}


/**
 * 单双判断 - 1 单 2 双
 * @param $num
 * @return bool|int
 */
function isDouble($num){
    $is_double = 0;
    if(is_numeric($num)){
        if(is_int($num)){
            if($num % 2 == 0){
                $is_double = 2;
            }else{
                $is_double = 1;
            }
        }
    }else{
        return false;
    }
    return $is_double;
}


GET请求,我们要在控制器中,调用此方法,控制器代码如下:
<?php

namespace app\index\controller\choose;

use think\Controller;
use think\Db;

class ChooseGd extends Controller
{
    public function collection(){
        $url = '你要采集的网址';
        // 将数据格式 转换成你想要的数据类型
        $data = json_decode(pretendData($url),true);
        dump($data);die;
    }
}

最后,看看打印结果:

POST请求,我们要在控制器中,调用此方法,控制器代码如下:

这个是我们要抓取的的POST请求,接下来调用上面封装的POST请求方法:控制器代码如下:

<?php

namespace app\index\controller\quick;

use think\Controller;
use think\Db;

class QuickXx extends Controller
{
    public function collection(){
        //模拟要抓取网址的参数
        $postData['lotterytype'] = 'GP_K3_JiLin';
        //要抓取的网址
        $url = '你要抓取的网址';
        $data = json_decode(getPostIpData($url,$postData),true);
        dump($data);die;
    }
}

结果打印如下:(顺便打印一下header头)

希望能帮到你!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章