c#遠程網頁抓取器(beta1.0)

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Diagnostics;
namespace frmTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
//初始化
txtFilePath.Text = Application.StartupPath;//初始化保存目錄爲應用程序所在的目錄
txtWeb.Text = "http://www.cfchina.cn";
cboEnCode.SelectedIndex = 0;
timer1.Enabled = false;
lblResult.Text = "";
initToolTip();
}

private void btnSelect_Click(object sender, EventArgs e)
{
//選擇文件的保存路徑
if (folderBrowserDlg.ShowDialog()==DialogResult.OK) {
txtFilePath.Text = folderBrowserDlg.SelectedPath;
}
}
private void btnStart_Click(object sender, EventArgs e)
{
lblResult.Text = "正在初始化...";
getUrl();
timer1.Enabled = true;//啓動定時器
}
//抓取網頁並保存
private void getUrl() {
try
{
string fileName;
fileName = txtFilePath.Text + "//" + DateTime.Now.ToString().Replace(" ", "").Replace(":", "").Replace("-", "") + ".html";
WebClient mywebclient = new WebClient();
//從指定網址下載數據
lblResult.Text = "正在嘗試從指定網址下載數據...";
byte[] pagedata = mywebclient.DownloadData(txtWeb.Text);
string pagehtml = "";
//設置編碼
if (cboEnCode.SelectedItem.ToString() == "GB2312")
{
pagehtml = Encoding.Default.GetString(pagedata);
}
else
{
pagehtml = Encoding.UTF8.GetString(pagedata);
}
using (StreamWriter sw = new StreamWriter(fileName))
{
textBox1.Text = pagehtml;
textBox1.Text = textBox1.Text.Replace("gb2312", "utf-8");//默認抓取下的內容就是utf-8編碼的,沒這一行,抓取下來的網頁無法正常顯示
//很奇怪用pagehtml = pagehtml.Repalce("gb2312","utf-8")沒用????可能pagehtml內部已經是utf-8了,根本找不到"gb2312"這幾個字
sw.WriteLine(textBox1.Text);
lst1.Items.Add(fileName);
}
}
catch (WebException webEx)
{
lblResult.Text = "錯誤:" + webEx.Message.ToString();
}
}
//定時器事件
private void timer1_Tick(object sender, EventArgs e)
{
timer1.Interval = Convert.ToInt16(txtTime.Text) * 1000;
getUrl();
}
private void btnEnd_Click(object sender, EventArgs e)
{
lblResult.Text = "已停止";
timer1.Enabled = false;//關閉定時器
delmyFile();//刪除生成的網頁
lst1.Items.Clear();
}
//雙擊打開生成的網頁
private void lst1_DoubleClick(object sender, EventArgs e)
{
if (lst1.Items.Count == 0)
{
return;
}
else
{
using (Process p = new Process())
{
p.StartInfo.FileName = "IEXPLORE.EXE";
p.StartInfo.Arguments = lst1.SelectedItem.ToString();
p.Start();
}
}
}

//刪除生成的網頁
private void delmyFile() {
string filename;
int i = 0;
for (i = 0; i <= lst1.Items.Count - 1; i++) {
filename = lst1.Items[i].ToString();
if (File.Exists(filename)){
File.Delete(filename);
}
}
}

//初始化toolTip
private void initToolTip()
{
toolTip1.SetToolTip(lst1, "抓取並保存後的網頁名稱列表/n雙擊可直接用IE打開抓取下來的網頁");
toolTip1.SetToolTip(btnEnd, "停止網頁抓取,並刪除抓取下來的網頁");
toolTip1.SetToolTip(btnStart, "開始抓取指定的網頁");
toolTip1.SetToolTip(btnSelect, "選擇保存的文件路徑");
toolTip1.SetToolTip(txtTime, "定時器間隔秒數,即每隔多少秒抓取一次網頁");
toolTip1.SetToolTip(lblTime, "定時器間隔秒數,即每隔多少秒抓取一次網頁");
toolTip1.SetToolTip(textBox1, "顯示抓取下來的網頁代碼內容");
toolTip1.SetToolTip(txtWeb, "輸入要抓取的網址,必須以http://開頭");
toolTip1.SetToolTip(cboEnCode, "選取網頁的保存編碼,必須與網站源代碼一致,否則抓取下來後可能顯示不正常");
toolTip1.SetToolTip(txtFilePath, "輸入文件保存目錄,抓下來的網頁將保存在這裏");
toolTip1.SetToolTip(btnPause, "暫時抓取");
}

//抓取暫停
private void btnPause_Click(object sender, EventArgs e)
{
    timer1.Enabled = false;
}
}
}


 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章