C#正則清理網絡字符

  public static string NoHTML(string Htmlstring)
        {
            //刪除腳本
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            //刪除HTML
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&rdquo;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&ldquo;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&mdash;", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&hellip;", "", RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("rn", "");

            return Htmlstring;
        }

        public static string CleanWORD(string Htmlstring)  //清理字號
        {
            //刪除腳本
            Htmlstring = Regex.Replace(Htmlstring, @"<font size[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<font face[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<font>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</font>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span style[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span lang[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</span>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<span>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p style[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p class[^>]*?>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p align", "<div align", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<st1[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</st1[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<o:p>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</o:p>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanWORDA(string Htmlstring)  //清理空格
        {
            //刪除腳本
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanWORDB(string Htmlstring)  //清理空行
        {
            //刪除腳本
            Htmlstring = Regex.Replace(Htmlstring, @"<p>[ ]*</p>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div>[ ]*</div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<strong>[ ]*</strong>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<br />", "</p><p>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<p>", "<div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</p>", "</div>", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanCSS(string Htmlstring)
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<div align[^>]*?></div>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<div></div>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanHTML(string Htmlstring)  //清理超鏈接
        {
            Htmlstring = Regex.Replace(Htmlstring, @"<a[^>]*?>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"</a>", "", RegexOptions.IgnoreCase);

            return Htmlstring;
        }

        public static string CleanX(string Htmlstring, string rule, string newstr)  //清理關鍵詞
        {
            Htmlstring = Regex.Replace(Htmlstring, @rule, newstr, RegexOptions.IgnoreCase);

            return Htmlstring;
        }

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章