今天有時間來完善一下!
#include<stdio.h>
#include<windows.h>
#include<malloc.h>
#include<string.h>
#include <wininet.h>
#define MAXBLOCKSIZE 1024
#pragma comment(lib, "wininet.lib")
__inline void Utf8ToGb2312(const char *pcUtf8, char *strGB2312)//主要是因爲大部分的網頁都是uf-8編碼,我們處理的是gb2312,所以要做一下轉碼
{
int nUnicodeLen = MultiByteToWideChar(CP_UTF8, 0, pcUtf8, -1, NULL, 0);//這是一個api函數,又來處理編碼轉換的,wingdowns程序設計裏面有詳細講解
wchar_t * pcUnicode = new wchar_t[nUnicodeLen+1];
memset(pcUnicode, 0, nUnicodeLen * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, pcUtf8, -1, pcUnicode, nUnicodeLen);//中間做一個轉換,把uf-8轉換成寬字符,再轉換成gb2312
int nGb2312Len = WideCharToMultiByte(CP_ACP, 0, pcUnicode, -1, NULL, 0, NULL, NULL);
char *pcGb2312=new char[nGb2312Len + 1];
memset(pcGb2312, 0, nGb2312Len + 1);
WideCharToMultiByte(CP_ACP, 0, pcUnicode, -1, pcGb2312, nGb2312Len, NULL, NULL);
strcpy(strGB2312,pcGb2312);
delete[] pcGb2312;
delete[] pcUnicode;
}
void GetWebSrcCode(const char *Url)//發送請求,下載自己需要的頁面
{
HINTERNET hSession = InternetOpen("down", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
if (hSession != NULL)
{
HINTERNET hURL = InternetOpenUrl(hSession, Url, NULL, 0, INTERNET_FLAG_DONT_CACHE, 0);
if (hURL != NULL)
{
char Temp[MAXBLOCKSIZE] = {0};
ULONG Number = 1;
FILE *stream;
if( (stream = fopen( "E:\\test.html", "w" )) != NULL )
{
while (Number > 0)
{
InternetReadFile(hURL, Temp, MAXBLOCKSIZE - 1, &Number);
fwrite(Temp, sizeof (char), Number , stream);
}
fclose( stream );
}
InternetCloseHandle(hURL);
hURL = NULL;
}
else
printf("網絡連接錯誤!請查看網絡是否連接!");
InternetCloseHandle(hSession);
hSession = NULL;
}
}
int geturl()//還是搜索,比較煩人,現在網站都是多級掩飾,只有接着搜索啊!百度音樂會更煩人!
{
FILE *fp;
char *src;
char *src2;
char m[]={"歌曲出處"};
int i=0,j=0,k=0;
fp=fopen("E:\\test.html","r");
if(NULL!=fp)
{
fseek(fp,0,SEEK_END);
long L=ftell(fp);
src=(char *)malloc(L/2);
src2=(char *)malloc(L/2);
memset(src,0,L/2);
memset(src2,0,L/2);
fseek(fp,L/2,SEEK_SET);
fread(src,L/2,1,fp);
Utf8ToGb2312(src,src2);
// printf("%s",src2);
//printf("\n\n\n\n");
while(i<strlen(src2))
{
if(src2[i]==m[j])
{
i++;
j++;
}
else
{
i=i-j+1;
j=0;
}
if(j>=strlen(m))
{
k=i;
j=0;
int fl=0;
int fm=0,fa=0;
int num=0;
char geturl[100];
FILE *wr;
wr=fopen("E:\\test.txt","a");
memset(geturl,0,100);
while(1)
{
num++;
if(src2[k]=='h' && src2[k+1]=='t' && src2[k+2]=='t' && src2[k+3]=='p')
{
fl++;
}
if(src2[k]=='m' && src2[k+1]=='p' && src2[k+2]=='3')
{
fl++;
}
if(fl)
{
if(fl==1)
{
geturl[fa++]=src2[k];
}
if(fl==2)
{
geturl[fa++]='m';
geturl[fa++]='p';
geturl[fa++]='3';
geturl[fa++]='\n';
printf("%s",geturl);
fwrite(geturl,strlen(geturl),1,wr);
fm=1;
break;
}
if(num>200)
break;
}
k++;
}
fclose(wr);
if(fm)
break;
}
}
}
return 0;
}
int findstr(char *res,char *mode)//這裏就不不羅嗦啦,就是一個簡單的搜索算法,你用strstr()會更加簡潔
{
//printf("%s",res);
int i=0,j=0,k=0,m=0;
int flag=0;
char p[200];
char buff[200];
memset(p,0,200);
memset(buff,0,200);
while(i<strlen(res))
{
if(res[i]==mode[j])
{
i++;
j++;
}
else
{
i=i-j+1;
j=0;
}
if(j>=strlen(mode))
{
j=0;
flag=0;
k=0;
for(m=i-180;m<i;m++)
{
if(res[m]=='\'')
{
flag++;
}
if(flag==1&&res[m]!='\'')
{
p[k++]=res[m];
}
if(flag!=0&&flag!=1)
{
sprintf(buff,"http://music.yahoo.cn/%s",p);
GetWebSrcCode(buff);
geturl();
//printf("%s",p);
//printf("\n\n\n");
break;
}
}
}
}
return 0;
}
int main()
{
char *text;
char *t;
FILE *fp;
int l;
char f[]="愛情";
GetWebSrcCode("http://music.yahoo.cn/s?from=suggest&q=愛情");//大家可以根據自己的分析網頁,這個事我分析雅虎搜索的
fp=fopen("E:\\test.html","r");
if(fp!=NULL)
{
fseek(fp,0,SEEK_END);
long len=ftell(fp);
text=(char *)malloc(len+1);
t=(char *)malloc(len+1);
memset(text,0,len+1);
fseek(fp,len*1/3,SEEK_SET);//這裏是一次性程序,我就全部寫入內存
fread(text,len,1,fp);
Utf8ToGb2312(text,t);
//其實這裏不必這麼寫,就是把自己搜索的詞組轉換成uf-8,再到網頁上搜索,這樣會很快!這裏主要是我要使用網頁裏面的數據
//用這則表達式匹配會簡單一些,當然不想用,你嵌入lua腳本搜索也是很好的。我個人比較喜歡lua,因爲小,而且效率很不錯!
//printf("%s",t);
}
l=findstr(t,f);
return 0;
}
寫的比較粗糙,,有事要做!
下下來記着,有時間了來整理!