先來看下同步的代碼以及請求時間。
$start_time=date("h:i:sa");
for ($i=0; $i <100 ; $i++) {
$urls[]="http://www.downxia.com/downinfo/2315".$i.".html";
GetTitle(geturl("http://www.downxia.com/downinfo/2315".$i.".html"));
}
function geturl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$output = curl_exec($ch);
curl_close($ch);
return $output;
}
function GetTitle($output){
preg_match('/<title>.*<\/title>/i',$output,$matches);
var_dump($matches[0]);
}
$end_time=date("h:i:sa");
echo '開始時間是:'.$start_time;
echo '結束時間是:'.$end_time;
最下面可以看到時間花了27秒。
接下來看下php curl 異步併發請求http的代碼以及花費時間。
$start_time=date("h:i:sa");
$urls=[];
for ($i=0; $i <100 ; $i++) {
$urls[]="http://www.downxia.com/downinfo/2315".$i.".html";
}
var_dump($urls);
// GetTitle('klasjdkla<title>313asds12</title>');
rolling_curl($urls,'GetTitle');
function GetTitle($output){
preg_match('/<title>.*<\/title>/i',$output,$matches);
var_dump($matches[0]);
}
$end_time=date("h:i:sa");
echo '開始時間是:'.$start_time;
echo '結束時間是:'.$end_time;
function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問
// make sure the rolling window isn't greater than the # of urls
$rolling_window = 5;
$rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;
$master = curl_multi_init();
$curl_arr = array();
// add additional curl options here
$std_options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5
);
$options = ($custom_options) ? ($std_options + $custom_options) : $std_options;
// start the first batch of requests
for ($i = 0; $i < $rolling_window; $i++) {
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i];
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
if ($execrun != CURLM_OK) {
break;
}
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
$info = curl_getinfo($done['handle']);
if ($info['http_code'] == 200) {
$output = curl_multi_getcontent($done['handle']);
// request successful. process output using the callback function.
$callback($output);
// start a new request (it's important to do this before removing the old one)
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i++]; // increment i
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
} else {
// request failed. add error handling.
}
}
} while ($running);
curl_multi_close($master);
return true;
}
才花了3秒?實際上我感覺應該是花了5秒,因爲啓動比同步要慢,開始的時候卡了2秒。
http請求效率,毋庸置疑是異步遠遠高於同步。
核心請求代碼如下:(這是老外寫的,有點小問題,最後的提示undefined offset)
function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問
// make sure the rolling window isn't greater than the # of urls
$rolling_window = 5;
$rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;
$master = curl_multi_init();
$curl_arr = array();
// add additional curl options here
$std_options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5
);
$options = ($custom_options) ? ($std_options + $custom_options) : $std_options;
// start the first batch of requests
for ($i = 0; $i < $rolling_window; $i++) {
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i];
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
if ($execrun != CURLM_OK) {
break;
}
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
$info = curl_getinfo($done['handle']);
if ($info['http_code'] == 200) {
$output = curl_multi_getcontent($done['handle']);
// request successful. process output using the callback function.
$callback($output);
// start a new request (it's important to do this before removing the old one)
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i++]; // increment i
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
} else {
// request failed. add error handling.
}
}
} while ($running);
curl_multi_close($master);
return true;
}
修改一下。只要在新增url的時候加個判斷就好了。// 當$i等於$urls數組大小時不用再增加了。
function rolling_curl($urls, $callback, $custom_options = null)
{//多個url訪問
// make sure the rolling window isn't greater than the # of urls
$rolling_window = 5;
$rolling_window = (sizeof($urls) < $rolling_window) ? sizeof($urls) : $rolling_window;
$master = curl_multi_init();
$curl_arr = array();
// add additional curl options here
$std_options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_MAXREDIRS => 5
);
$options = ($custom_options) ? ($std_options + $custom_options) : $std_options;
// start the first batch of requests
for ($i = 0; $i < $rolling_window; $i++) {
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i];
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM);
if ($execrun != CURLM_OK) {
break;
}
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
$info = curl_getinfo($done['handle']);
if ($info['http_code'] == 200) {
$output = curl_multi_getcontent($done['handle']);
// request successful. process output using the callback function.
$callback($output);
// start a new request (it's important to do this before removing the old one)
// 當$i等於$urls數組大小時不用再增加了
if($i<sizeof($urls)){
$ch = curl_init();
$options[CURLOPT_URL] = $urls[$i++]; // increment i
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
}
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
} else {
// request failed. add error handling.
}
}
} while ($running);
curl_multi_close($master);
return true;
}
以上,結束。記錄一下,以免自己忘記。需要下載Cmder的可以點這裏下載。