[C#]基於HttpWebRequest和HttpWebResponse的自動登錄採集

 

C#中實現POST的方法很多,常用的是WebBrowser、WebClient、HttpWebRequest和HttpWebResponse。

1、WebBrowser基本是在DocumentCompleted中分析HtmlDocument ;

2、WebClient是對HttpWebRequest和HttpWebResponse的封裝,用起來更方便,但是靈活性還是不及HttpWebRequest和HttpWebResponse;

3、HttpWebRequest和HttpWebResponse更底層,靈活度更好,不過代碼更多,我做了一個簡單的封裝,將GET改成分段讀取,並加入代理、進度條和錯誤重試處理。代碼如下:

using System;
using System.IO;
using System.Net;
using System.Text;

namespace Van.Base
{
    public class HttpHelper
    {
        #region 委託 事件
        public delegate void dgtProgValueChanged(long Value);
        /// <summary>
        /// 進度改變事件
        /// </summary>
        public event dgtProgValueChanged OnProgValueChanged;
        #endregion

        #region 屬性
        /// <summary>
        /// 代理
        /// </summary>
        public WebProxy Proxy { get; set; }
        /// <summary>
        /// Cookie
        /// </summary>
        public CookieContainer UserCookie { get; set; }
        /// <summary>
        /// 重試次數
        /// </summary>
        public int IAfreshTime { get; set; }
        /// <summary>
        /// 錯誤次數
        /// </summary>
        public int IErrorTime { get; private set; }

        long m_ProgValue = 0;
        /// <summary>
        /// 當前讀取字節
        /// </summary>
        public long ProgValue
        {
            get { return m_ProgValue; }
            private set
            {
                m_ProgValue = value;
                if (OnProgValueChanged != null)
                {
                    OnProgValueChanged(value);
                }
            }
        }
        /// <summary>
        /// 待讀取最大字節
        /// </summary>
        public long ProgMaximum { get; private set; }

        #endregion

        #region 方法
        #region Get
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="URL">地址</param>
        /// <param name="Accept">Accept請求頭</param>
        /// <returns>Html代碼</returns>
        public string GetHTML(string URL, string Accept)
        {
            return GetHTML(URL, Accept, System.Text.Encoding.UTF8);
        }
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="URL">地址</param>
        /// <param name="Accept">Accept請求頭</param>
        /// <param name="encoding">字符編碼</param>
        /// <returns>Html代碼</returns>
        public string GetHTML(string URL, string Accept, Encoding encoding)
        {
            return GetHTML(URL, Accept, encoding, 1024);
        }
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="URL">地址</param>
        /// <param name="Accept">Accept請求頭</param>
        /// <param name="encoding">字符編碼</param>
        /// <param name="bufflen">數據包大小</param>
        /// <returns>Html代碼</returns>
        public string GetHTML(string URL, string Accept, Encoding encoding, int bufflen)
        {
            IErrorTime = 0;
            return _GetHTML(URL, Accept, encoding, bufflen);
        }
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="URL">地址</param>
        /// <param name="Accept">Accept請求頭</param>
        /// <param name="encoding">字符編碼</param>
        /// <param name="bufflen">數據包大小</param>
        /// <returns>Html代碼</returns>
        private string _GetHTML(string URL, string Accept, Encoding encoding,int bufflen)
        {
            try
            {
                HttpWebRequest MyRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
                MyRequest.Proxy = Proxy;
                MyRequest.Accept = Accept;
                if (UserCookie == null)
                {
                    UserCookie = new CookieContainer();
                }
                MyRequest.CookieContainer = UserCookie;
                HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
                return _GetHTML(MyResponse, encoding, bufflen);
            }
            catch (Exception erro)
            {
                if (erro.Message.Contains("連接") && IAfreshTime - IErrorTime > 0)
                {
                    IErrorTime++;
                    return _GetHTML(URL, Accept, encoding, bufflen);
                }
                throw;
            }
        }
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="MyResponse"></param>
        /// <param name="encoding">字符編碼</param>
        /// <param name="bufflen">數據包大小</param>
        /// <returns></returns>
        private string _GetHTML(HttpWebResponse MyResponse, Encoding encoding, int bufflen)
        {
            using (Stream MyStream = MyResponse.GetResponseStream())
            {
                using (StreamReader reader = new StreamReader(MyStream, encoding))
                {
                    ProgMaximum = MyResponse.ContentLength;
                    string result = null;
                    long totalDownloadedByte = 0;
                    byte[] by = new byte[bufflen];
                    int osize = MyStream.Read(by, 0, by.Length);
                    while (osize > 0)
                    {
                        totalDownloadedByte = osize + totalDownloadedByte;
                        result += encoding.GetString(by, 0, osize);
                        ProgValue = totalDownloadedByte;
                        osize = MyStream.Read(by, 0, by.Length);
                    }
                    reader.Close();
                    return result;
                }
            }
        }
        #endregion


        #region GetImg

        public System.Drawing.Bitmap Getimg(string URL, string Accept)
        {
            return _GetBit(URL, Accept);
        }
        /// <summary>
        /// 獲取HTML
        /// </summary>
        /// <param name="URL">地址</param>
        /// <param name="Accept">Accept請求頭</param>
        /// <returns>Html代碼</returns>
        private System.Drawing.Bitmap _GetBit(string URL, string Accept)
        {
            HttpWebRequest MyRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
            MyRequest.Proxy = Proxy;
            MyRequest.Accept = Accept;
            if (UserCookie == null)
            {
                UserCookie = new CookieContainer();
            }
            MyRequest.CookieContainer = UserCookie;
            HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
            return _GetBit(MyResponse);
        }

        /// <summary>
        /// 獲取圖像
        /// </summary>
        /// <param name="MyResponse"></param>
        /// <returns></returns>
        private System.Drawing.Bitmap _GetBit(HttpWebResponse MyResponse)
        {
            using (Stream MyStream = MyResponse.GetResponseStream())
            {
                return new System.Drawing.Bitmap(MyStream);
            }
        }
        #endregion

        #region Post
        /// <summary>
        /// 回發(字符編碼默認UTF-8)
        /// </summary>
        /// <param name="URL">回發地址</param>
        /// <param name="PostData">參數</param>
        /// <returns>Html代碼</returns>
        public string PostPage(string URL, string PostData)
        {
            return PostPage(URL, PostData, System.Text.Encoding.UTF8);
        }
        /// <summary>
        /// 回發
        /// </summary>
        /// <param name="URL">回發地址</param>
        /// <param name="PostData">參數</param>
        /// <param name="encoding">字符編碼</param>
        /// <returns>Html代碼</returns>
        public string PostPage(string URL, string PostData, Encoding encoding)
        {
            return PostPage(URL, PostData, encoding, null);
        }
        /// <summary>
        /// 回發
        /// </summary>
        /// <param name="URL">回發地址</param>
        /// <param name="PostData">參數</param>
        /// <param name="encoding">字符編碼</param>
        /// <returns>Html代碼</returns>
        public string PostPage(string URL, string PostData, Encoding encoding, string ContentType)
        {
            IErrorTime = 0;
            return _PostPage(URL, PostData, encoding, ContentType);
        }
        /// <summary>
        /// 回發
        /// </summary>
        /// <param name="URL">回發地址</param>
        /// <param name="PostData">參數</param>
        /// <param name="encoding">字符編碼</param>
        /// <returns>Html代碼</returns>
        private string _PostPage(string URL, string PostData, Encoding encoding,string ContentType)
        {
            try
            {
                if (ContentType==null)
                {
                    ContentType = "application/x-www-form-urlencoded";
                }
                HttpWebRequest MyRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
                MyRequest.Proxy = Proxy;
                if (UserCookie == null)
                {
                    UserCookie = new CookieContainer();
                }
                MyRequest.CookieContainer = UserCookie;
                MyRequest.Method = "POST";
                MyRequest.ContentType = ContentType;
                byte[] b = encoding.GetBytes(PostData);
                MyRequest.ContentLength = b.Length;
                using (System.IO.Stream sw = MyRequest.GetRequestStream())
                {
                    try
                    {
                        sw.Write(b, 0, b.Length);
                    }
                    catch
                    {
                    }
                }
                HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
                return _GetHTML(MyResponse, encoding, 1024);
            }
            catch (Exception erro)
            {
                if (erro.Message.Contains("連接") && IAfreshTime - IErrorTime > 0)
                {
                    IErrorTime++;
                    return _PostPage(URL, PostData, encoding, ContentType);
                }
                throw;
            }
        }
        #endregion
        #endregion
    }
}



 

 

調用便很方便了,如下:

 

string strProxyAdd = "www.Proxyxxxx.com", UserName = "XXXX",PassWord = "XXX";
int port = 8080;


HttpHelper http = new HttpHelper();
http.Proxy = new WebProxy();//設置代理
this.http.Proxy.Address = new Uri(string.Format("http://{0}:{1}", strProxyAdd,port));//設置代理服務器地址和端口
this.http.Proxy.Credentials = new NetworkCredential(UserName, PassWord);//設置代理用戶名密碼
//http.Proxy = null;//清空代理

var strPHtml = http.PostPage("www.xxxx.com""User=ABCD&Pwd=DEF");//向www.xxxx.com POST數據User=ABCD&Pwd=DEF

var strHtml = http.GetHTML("www.xxxx.com""*/*", Encoding.UTF8, 20480);//從www.xxxx.com獲取HTML數據,並用UTF8進行編碼

由於Cookie在一個實例中是共用的,在一些場景下可以先POST登錄後再GET需要登錄才能進入的頁面信息。

來源:http://www.cnblogs.com/vanjoge/archive/2011/11/18/2253937.html#commentform

發佈了7 篇原創文章 · 獲贊 4 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章