#include <curl/curl.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30/* should be a multiple of 3 */
#define EBUFLEN 128
#define BUFLEN 10240
using namespacestd;
size_t onWriteData(void *buffer,size_t size, size_t nmemb,void *str) {
if(!str || !buffer) {
return -1;
}
string *result = (string*)str;
result->append((char*)buffer, size * nmemb);
return nmemb;
}
//獲取頁面
int getWeb(string url,string &result)
{
long code =0;
string htmlpage;
CURL *curl =curl_easy_init();
curl_easy_setopt(curl,CURLOPT_URL,url.c_str());//設置url
curl_easy_setopt(curl,CURLOPT_POST,0);//設置請求方法
curl_easy_setopt(curl,CURLOPT_USERAGENT,"Mozilla/5.");//僞裝客戶端
curl_easy_setopt(curl,CURLOPT_WRITEDATA,&htmlpage);//設置接受返回結果字符串
curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,onWriteData);//設置處理方法
curl_easy_perform(curl);//請求
curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&code);
if(code==200)
{
cout<<"request success"<<endl;
result = htmlpage;
//cout<<htmlpage<<endl;
}
curl_easy_cleanup(curl);
return code;
}
int main(int argc,char **argv)
{
pcre *re;
constchar *error;
int erroffset;
int ovector[OVECCOUNT];
int rc, i;
string url ="http://www.dianping.com/search/category/212/10/g103";
string html;
getWeb(url,html);
//char src[] = " ";
//char pattern[] = "(<a>.+?</a>)";
constchar *src = html.c_str();
char pattern[] ="(<li class=\"\"[\\s\\S]*?</li>)";
printf("String : %s\n", src);
printf("Pattern: \"%s\"\n", pattern);
re = pcre_compile(pattern,0, &error, &erroffset, NULL);
if (re ==NULL) {
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return1;
}
char *p = (char*)src;
while ( ( rc =pcre_exec(re, NULL, p,strlen(p), 0,0, ovector, OVECCOUNT)) !=PCRE_ERROR_NOMATCH )
{
printf("\nOK, %d matched ...\n\n",rc);
for (i =0; i < rc-1; i++)
{
char *substring_start = p + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
char matched[10240];
memset( matched,0, 10240);
strncpy( matched, substring_start, substring_length );
printf("match:%s\n", matched );
}
p += ovector[1];
if ( !p )
{
break;
}
}
pcre_free(re);
return0;
}