首先定義一個cg_http的類
使用curl()方法將網頁url寫入指定的文件夾
class cg_http
{
private $agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36';
private $post_fields = array();
private $post_files = array();
private $headers = array();
private $cookies = array();
private $curl;
private $url = '';
public function __construct($url = '')
{
$this->curl = curl_init();
if ($url != '')
{
$this->set_url($url);
}
}
public function set_url($url)
{
$this->url = $url;
curl_setopt($this->curl, CURLOPT_URL, $this->url);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, $this->timeout);
curl_setopt($this->curl, CURLOPT_HEADER, false);
$this->set_agent($this->agent);
}
public function set_agent($agent)
{
curl_setopt($this->curl, CURLOPT_USERAGENT, $agent);
}
public function send_request($data = '', $transcoding = false)
{
if ($this->url == '')
{
die('url is empty');
}
if (is_string($this->cookies))
{
curl_setopt($this->curl, CURLOPT_COOKIE, $this->cookies);
}
elseif (is_array($this->cookies))
{
if (count($this->cookies) > 0)
{
curl_setopt($this->curl, CURLOPT_COOKIE, implode(';', $this->cookies));
}
}
if (count($this->post_fields) > 0)
{
curl_setopt($this->curl, CURLOPT_POST, true);
if ($this->has_file)
{
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $this->post_fields);
}
else
{
curl_setopt($this->curl, CURLOPT_POSTFIELDS, http_build_query($this->post_fields));
}
}
if (count($this->headers) > 0)
{
curl_setopt($this->curl, CURLOPT_HTTPHEADER, $this->headers);
}
if (!empty($data))
{
curl_setopt($this->curl, CURLOPT_POSTFIELDS, $data);
}
// 支持獲取gzip等編碼類型壓縮過的網頁
curl_setopt($this->curl, CURLOPT_ENCODING, '');
$result = curl_exec($this->curl);
if ($transcoding)
{
// 檢測網頁編碼
$encoding = mb_detect_encoding($result, 'GB2312, GBK, UTF-8');
if ($encoding != 'UTF-8')
{
// 轉換網頁編碼
$result = iconv('gbk', 'utf-8//IGNORE', $result);
}
}
$error = curl_error($this->curl);
if (!empty($error))
{
//return $error;
}
curl_close($this->curl);
return $result;
}
}
//$web_url爲目標網頁地址
$web_url = "http://www.baidu.com?curl.php";
$webpage = new cg_http($web_url);
$content = $webpage->send_request();
$filename = basename($web_url);
//這裏我們指定一個文件夾 將獲取的網頁內容存到aa文件夾裏
file_put_contents("D://aa",$filename,$content);
如果是多個url地址也可以批量操作
//將需要操作的url放入數組中
foreach($arr_url as $key => $urls)
{
$webpage = new cg_http($urls);
$content = $webpage->send_request();
$filename = "{$key}";
file_put_contents("D://aa".$filename , $content);
}
讀取D://aa寫入的文件
$dir = "D://aa";
$handler = opendir($dir);
while(($file = readdir($handler))!=false)
{
if($file == ".." || $file == ".")
{
continue;
}
$content = file_get_contents($dir.$file);
//替換掉源代碼中的回車 空格 製表符
$content = $str_replace("\n",'',$content);
$content = $str_replace("\t",'',$content);
$content = $str_replace("\r",'',$content);
//PCRE 你要匹配的正則表達式
preg_match_all("PCRE",$content,$matches);
}