c# 網站生成靜態頁面


在一些需要經常更新頁面數據的網站中,一般訪問量不是很大的都直接發佈的是帶後臺代碼,每次訪問都是有數據庫交互的。但是一旦訪問量增加了,那麼這些服務器開銷變成本就要考慮進來了,像一些文章,後臺編輯後,文章內容存入數據庫,如果1000人訪問,如果還是每次取數據庫,那這1000次的io訪問就顯得比較大了,一個好的方法就是,文章確定之後,做成靜態頁面,而這個做的方法由程序來做,就是遞歸遍歷整個網站,將網站內容都訪問一遍,然後生成這些頁面的靜態文本頁面,在將這些頁面發佈,這樣對瀏覽者而言,他看到的還是同一個地址,同一份文章,只是這份是靜態的而言。這樣就提升了網站的效率節約了資源;

下面附上一份C#遍歷網站內容,然後生成內容頁面代碼;


private ArrayList htmlCreatedList = new ArrayList();

        /// <summary>
        /// 遞歸實現頁面靜態化功能
        /// </summary>
        /// <param name="urlString">要訪問的頁面鏈接地址</param>
        public void SaveHtmlCode(string urlString)
        {
            if (htmlCreatedList.Contains(urlString))
            {
                return;
            }
            string htmlCode = GetHtmlCodeFromUrl(urlString);
            string htmlPath = urlString.ToPhysicalPath();
            string direcHtmlPath = Path.GetDirectoryName(htmlPath);
            if (!Directory.Exists(direcHtmlPath))
            {
                Directory.CreateDirectory(direcHtmlPath);
            }
            File.WriteAllText(htmlPath, htmlCode);
            htmlCreatedList.Add(urlString);
            var urlList = GetUrlLinkFromHtmlCode(htmlCode);
            string urlTemp = string.Empty;
            foreach (string url in urlList)
            {
                urlTemp = url;
                urlTemp = Regex.Replace(urlTemp, "href\\s*=\\s*", "");
                urlTemp = urlTemp.Replace("\"", "");
                urlTemp = urlTemp.Replace("\\", "/");
                urlTemp = WebConfigInfo.UrlPrefix + urlTemp;
                SaveHtmlCode(urlTemp);
            }
        }

        /// <summary>
        /// 通過HttpWebRequest頁面鏈接的html代碼
        /// </summary>
        /// <param name="urlString">頁面鏈接地址</param>
        /// <returns>頁面鏈接對應的html代碼</returns>
        private string GetHtmlCodeFromUrl(string urlString)
        {
            HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString);
            hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
            hwRequest.Accept = "*/*";
            hwRequest.KeepAlive = true;
            hwRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
            HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse();
            Stream streamResponse = hwResponse.GetResponseStream();
            StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding("utf-8"));
            string strHtml = readerOfStream.ReadToEnd();
            readerOfStream.Close();
            streamResponse.Close();
            hwResponse.Close();
            return strHtml;
        }

        ///<summary>
        ///正則表達式匹配出html代碼中的超鏈接
        ///</summary>
        ///<param name="htmlCode">要找出超鏈接的html代碼</param>
        ///<returns></returns>
        private IEnumerable<string> GetUrlLinkFromHtmlCode(string htmlCode)
        {
            string strRegex = "href\\s*=\\s*(?:[\"'](?<1>[^\"'.#:]*)[\"'])";
            Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
            MatchCollection ms = r.Matches(htmlCode);
            IEnumerable<string> listUrl = from Match cc in ms select cc.ToString().Replace("&", "&");
            return listUrl.Distinct();
        }
    }

給string 擴展了一個方法。
 public static string ToPhysicalPath(this string urlString)
        {
            System.Uri uri = new System.Uri(urlString);
            string htmlPath = string.Format("{0}\\Html\\{1}\\", System.Web.HttpContext.Current.Request.PhysicalApplicationPath, uri.AbsolutePath);
            string[] querys = uri.Query.Split(new char[] { '?', '&', '=' }, StringSplitOptions.RemoveEmptyEntries);
            htmlPath += string.Join(string.Empty, querys);
            htmlPath += querys.Length.Equals(0) ? "Index.html" : ".html";
            htmlPath = htmlPath.Replace("/", "\\");
            htmlPath = htmlPath.Replace("\\\\", "\\");
            return htmlPath;
        }


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章