querylist 下載 composer require jaeger/querylist:V3.2.1
<?php
include "vendor/autoload.php";
use \QL\QueryList;
error_reporting(1);
//下載img
function dlfile($file_url, $save_to){
$content = file_get_contents($file_url);
return file_put_contents($save_to, $content);
}
$url = "https://www.php.cn/course/type/2/level_3.html?p=1";
function check_url( $a){
return 'https://www.php.cn/'.$a;
}
$reg = array(
"img" => array('.layui-col-md12 ul li a img','data-original'),
"title" => array('.layui-col-md12 ul li a h3','text'),
'href' =>array('.course-list-col ul li a','href','','check_url')
);
$data = QueryList::Query($url,$reg)->data;
$success = 0;
$fail = 0;
foreach($data as $k=>$v){
$img_ext = substr($v['img'],strrpos($v['img'],'.')+1);
$img_name = basename($v['img'],$img_ext);
$result = dlfile( $v['img'],"E:\WWW\bin\PHPTutorial\WWW\querylist\imgs\\".$img_name.$img_ext);
if(result){
++$success;
echo $img_name.$img_ext."下載成功<br/>";
}else{
++$fail;
echo $img_name.$img_ext."下載失敗<br/>";
}
}
echo "失敗".$fail."個"."<br/>";
echo "成功".$success."個"."<br/>";
echo "下載總數:".$success+$fail."<br/>";
echo '<pre>';
print_r($data);
exit;
QueryList::run('Multi',[
//待採集鏈接集合
'list' => [
'http://cms.querylist.cc/news/it/547.html',
'http://cms.querylist.cc/news/it/545.html',
'http://cms.querylist.cc/news/it/543.html'
//更多的採集鏈接....
],
'curl' => [
'opt' => array(
//這裏根據自身需求設置curl參數
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_AUTOREFERER => true,
//........
),
//設置線程數
'maxThread' => 100,
//設置最大嘗試數
'maxTry' => 3
],
'success' => function($a){
//採集規則
$reg = array(
//採集文章標題
'title' => array('h1','text'),
//採集文章正文內容,利用過濾功能去掉文章中的超鏈接,但保留超鏈接的文字,並去掉版權、JS代碼等無用信息
'content' => array('.post_content','html','a -.content_copyright -script' )
);
$rang = '.content';
$ql = QueryList::Query($a['content'],$reg,$rang);
$data = $ql->getData();
//打印結果,實際操作中這裏應該做入數據庫操作
print_r($data);
}
]);
?>