php curl 異步併發請求http

先來看下同步的代碼以及請求時間。

$start_time=date("h:i:sa");
for ($i=0; $i <100 ; $i++) { 
	$urls[]="http://www.downxia.com/downinfo/2315".$i.".html";
	GetTitle(geturl("http://www.downxia.com/downinfo/2315".$i.".html"));
}
function geturl($url){
       
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        
        $output = curl_exec($ch);
        curl_close($ch);

        return $output;
}
function GetTitle($output){

	preg_match('/<title>.*<\/title>/i',$output,$matches);
	var_dump($matches[0]);
}
$end_time=date("h:i:sa");
echo '開始時間是:'.$start_time;
echo '結束時間是:'.$end_time;

最下面可以看到時間花了27秒。

接下來看下php curl 異步併發請求http的代碼以及花費時間。

$start_time=date("h:i:sa");

$urls=[];
for ($i=0; $i <100 ; $i++) { 
	$urls[]="http://www.downxia.com/downinfo/2315".$i.".html";
}
var_dump($urls);
// GetTitle('klasjdkla<title>313asds12</title>');

rolling_curl($urls,'GetTitle');

function GetTitle($output){

	preg_match('/<title>.*<\/title>/i',$output,$matches);
	var_dump($matches[0]);
}
$end_time=date("h:i:sa");

echo '開始時間是:'.$start_time;
echo '結束時間是:'.$end_time;

function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問

    // make sure the rolling window isn't greater than the # of urls
    $rolling_window = 5;
    $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

    $master   = curl_multi_init();
    $curl_arr = array();

    // add additional curl options here
    $std_options = array(
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5
        );
    $options = ($custom_options) ? ($std_options + $custom_options) : $std_options;

    // start the first batch of requests
    for ($i = 0; $i < $rolling_window; $i++) {
        $ch                   = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }

    do {
        while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
        if ($execrun != CURLM_OK) {
            break;
        }

        // a request was just completed -- find out which one
        while ($done = curl_multi_info_read($master)) {
            $info = curl_getinfo($done['handle']);
            if ($info['http_code'] == 200) {
                $output = curl_multi_getcontent($done['handle']);

                // request successful.  process output using the callback function.
                $callback($output);

                // start a new request (it's important to do this before removing the old one)
                $ch                   = curl_init();
                $options[CURLOPT_URL] = $urls[$i++]; // increment i
                curl_setopt_array($ch, $options);
                curl_multi_add_handle($master, $ch);

                // remove the curl handle that just completed
                curl_multi_remove_handle($master, $done['handle']);
            } else {
                // request failed.  add error handling.
            }
        }
    } while ($running);

    curl_multi_close($master);
    return true;
}

才花了3秒?實際上我感覺應該是花了5秒,因爲啓動比同步要慢,開始的時候卡了2秒。

http請求效率,毋庸置疑是異步遠遠高於同步。

核心請求代碼如下:(這是老外寫的,有點小問題,最後的提示undefined offset)

function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問

    // make sure the rolling window isn't greater than the # of urls
    $rolling_window = 5;
    $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

    $master   = curl_multi_init();
    $curl_arr = array();

    // add additional curl options here
    $std_options = array(
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5
        );
    $options = ($custom_options) ? ($std_options + $custom_options) : $std_options;

    // start the first batch of requests
    for ($i = 0; $i < $rolling_window; $i++) {
        $ch                   = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }

    do {
        while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
        if ($execrun != CURLM_OK) {
            break;
        }

        // a request was just completed -- find out which one
        while ($done = curl_multi_info_read($master)) {
            $info = curl_getinfo($done['handle']);
            if ($info['http_code'] == 200) {
                $output = curl_multi_getcontent($done['handle']);

                // request successful.  process output using the callback function.
                $callback($output);

                // start a new request (it's important to do this before removing the old one)
                $ch                   = curl_init();
                $options[CURLOPT_URL] = $urls[$i++]; // increment i
                curl_setopt_array($ch, $options);
                curl_multi_add_handle($master, $ch);

                // remove the curl handle that just completed
                curl_multi_remove_handle($master, $done['handle']);
            } else {
                // request failed.  add error handling.
            }
        }
    } while ($running);

    curl_multi_close($master);
    return true;
}

修改一下。只要在新增url的時候加個判斷就好了。// 當$i等於$urls數組大小時不用再增加了。

function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問

    // make sure the rolling window isn't greater than the # of urls
    $rolling_window = 5;
    $rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;

    $master   = curl_multi_init();
    $curl_arr = array();

    // add additional curl options here
    $std_options = array(
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_MAXREDIRS      => 5
        );
    $options = ($custom_options) ? ($std_options + $custom_options) : $std_options;

    // start the first batch of requests
    for ($i = 0; $i < $rolling_window; $i++) {
        $ch                   = curl_init();
        $options[CURLOPT_URL] = $urls[$i];
        curl_setopt_array($ch, $options);
        curl_multi_add_handle($master, $ch);
    }

    do {
        while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
        if ($execrun != CURLM_OK) {
            break;
        }

        // a request was just completed -- find out which one
        while ($done = curl_multi_info_read($master)) {
            $info = curl_getinfo($done['handle']);
            if ($info['http_code'] == 200) {
                $output = curl_multi_getcontent($done['handle']);

                // request successful.  process output using the callback function.
                $callback($output);

                // start a new request (it's important to do this before removing the old one)
                // 當$i等於$urls數組大小時不用再增加了
                if($i<sizeof($urls)){
                    $ch                   = curl_init();
                    $options[CURLOPT_URL] = $urls[$i++]; // increment i
                    curl_setopt_array($ch, $options);
                    curl_multi_add_handle($master, $ch);
                }
                // remove the curl handle that just completed
                curl_multi_remove_handle($master, $done['handle']);
            } else {
                // request failed.  add error handling.
            }
        }
    } while ($running);

    curl_multi_close($master);
    return true;
}

以上,結束。記錄一下,以免自己忘記。需要下載Cmder的可以點這裏下載。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章