C#遠程抓取網頁

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Diagnostics;

namespace frmTest
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void Form1_Load(object sender, EventArgs e)
        {
            //初始化
            txtFilePath.Text = Application.StartupPath;//初始化保存目錄爲應用程序所在的目錄
            txtWeb.Text = "http://www.cfchina.cn";
            cboEnCode.SelectedIndex = 0;
            timer1.Enabled = false;
            lblResult.Text = "";
            initToolTip();
        }


        private void btnSelect_Click(object sender, EventArgs e)
        {
            //選擇文件的保存路徑
            if (folderBrowserDlg.ShowDialog()==DialogResult.OK) {
                txtFilePath.Text = folderBrowserDlg.SelectedPath;
            }
        }

        private void btnStart_Click(object sender, EventArgs e)
        {

            lblResult.Text = "正在初始化...";
            getUrl();
            timer1.Enabled = true;//啓動定時器
        }

        //抓取網頁並保存
        private void getUrl() {
            try
            {
                string fileName;
                fileName = txtFilePath.Text + "//" + DateTime.Now.ToString().Replace(" ", "").Replace(":", "").Replace("-", "") + ".html";
                WebClient mywebclient = new WebClient();

                //從指定網址下載數據
                lblResult.Text = "正在嘗試從指定網址下載數據...";
                byte[] pagedata = mywebclient.DownloadData(txtWeb.Text);
                string pagehtml = "";

                //設置編碼
                if (cboEnCode.SelectedItem.ToString() == "GB2312")
                {
                    pagehtml = Encoding.Default.GetString(pagedata);
                }
                else
                {
                    pagehtml = Encoding.UTF8.GetString(pagedata);
                }              

                using (StreamWriter sw = new StreamWriter(fileName))
                {
                    textBox1.Text = pagehtml;
                    textBox1.Text = textBox1.Text.Replace("gb2312", "utf-8");//默認抓取下的內容就是utf-8編碼的,沒這一行,抓取下來的網頁無法正常顯示
                    //很奇怪用pagehtml = pagehtml.Repalce("gb2312","utf-8")沒用????可能pagehtml內部已經是utf-8了,根本找不到"gb2312"這幾個字
                    sw.WriteLine(textBox1.Text);                  
                    lst1.Items.Add(fileName);                  
                }
            }
            catch (WebException webEx)
            {
                lblResult.Text = "錯誤:" + webEx.Message.ToString();

            }
        }

        //定時器事件
        private void timer1_Tick(object sender, EventArgs e)
        {          
            timer1.Interval = Convert.ToInt16(txtTime.Text) * 1000;
            getUrl();
        }

        private void btnEnd_Click(object sender, EventArgs e)
        {
            lblResult.Text = "已停止";
            timer1.Enabled = false;//關閉定時器
            delmyFile();//刪除生成的網頁
            lst1.Items.Clear();
        }

        //雙擊打開生成的網頁
        private void lst1_DoubleClick(object sender, EventArgs e)
        {
            if (lst1.Items.Count == 0)
            {
                return;
            }
            else
            {
                using (Process p = new Process())
                {
                    p.StartInfo.FileName = "IEXPLORE.EXE";
                    p.StartInfo.Arguments = lst1.SelectedItem.ToString();
                    p.Start();
                }
            }
        }

 

        //刪除生成的網頁
        private void delmyFile() {           
            string filename;
            int i = 0;
            for (i = 0; i <= lst1.Items.Count - 1; i++) {
                filename = lst1.Items[i].ToString();
                if (File.Exists(filename)){
                        File.Delete(filename);
                }
            }
        }


        //初始化toolTip
        private void initToolTip()
        {
            toolTip1.SetToolTip(lst1, "抓取並保存後的網頁名稱列表/n雙擊可直接用IE打開抓取下來的網頁");
            toolTip1.SetToolTip(btnEnd, "停止網頁抓取,並刪除抓取下來的網頁");
            toolTip1.SetToolTip(btnStart, "開始抓取指定的網頁");
            toolTip1.SetToolTip(btnSelect, "選擇保存的文件路徑");
            toolTip1.SetToolTip(txtTime, "定時器間隔秒數,即每隔多少秒抓取一次網頁");
            toolTip1.SetToolTip(lblTime, "定時器間隔秒數,即每隔多少秒抓取一次網頁");
            toolTip1.SetToolTip(textBox1, "顯示抓取下來的網頁代碼內容");
            toolTip1.SetToolTip(txtWeb, "輸入要抓取的網址,必須以http://開頭");
            toolTip1.SetToolTip(cboEnCode, "選取網頁的保存編碼,必須與網站源代碼一致,否則抓取下來後可能顯示不正常");
            toolTip1.SetToolTip(txtFilePath, "輸入文件保存目錄,抓下來的網頁將保存在這裏");
            toolTip1.SetToolTip(btnPause, "暫時抓取");
        }


        //抓取暫停
        private void btnPause_Click(object sender, EventArgs e)
        {
            timer1.Enabled = false;
        }
    }
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章