深感找喫的地方不方便,於是盟生了把水木food版的文章搬到手機上去的想法

深感找喫的地方不方便,於是盟生了把水木food版的文章搬到手機上去的想法。
不過E60找不到軟件支持類似"桌面搜索"的文檔內容搜索功能,這樣面對上千篇從food版批量down下來的htm文章,找起來就相當喫力了。翻來弄去發現手機本身的自帶搜索功能可以搜短信和郵件,而且內容搜索也符合我的要求。
於是今天就弄了一天,怎麼把那些food版的批量下載後的htm文件,變成存在我手機上的電子郵件,以便出門在外也可以搜索。

整個過程還算順利:
第一步,先架一個smtp和pop3服務器。
window2003可以架,邊開我的2003虛擬機邊在網上搜有沒有更快的方法。
找到一個Foxmail Server for Windows 公開測試版,看起來直接就能用,就直接把我的虛擬機關掉了,太卡了。
然後按提示裝下來,拿outlook測一下,好用。贊~
第二步,把一個個文件用smtp發出爲一封封信。開始msdn找啊找,找到System.Net.Mail,搞定。
第三步,html格式不爽,想把那些tag都去掉,直接就能用手機看了。繼續msdn,找到一個System.Web.RegularExpressions下的TextRegex,完全不是那麼回事。再找,找到System.Web.HttpUtility.HtmlDecode(string),還有那麼點用,但tag還是刪不完。最後google之,找到了篇文章,rob代碼過來用了。
第四步,跑一下工程,發了一千多封信,outlook收一下正常。不過在手機上收就不那麼順利了,數字就不支持四位數的,而且一下子收太多的信會超時。只好100封100封地發,再用手機一點一點地收。

總算弄完了,希望以後會有用。睡了。zzzzzZZZZZ

using System;
using System.Collections.Generic;
using System.Collections;
using System.Text.RegularExpressions;
using System.Text;
using System.IO;
using System.Web;
using System.Web.RegularExpressions;
using System.Xml;
using System.Net.Mail;

namespace GetHtmlTitleFromFolder
{
    
class Program
    
{
        

        
static void Main(string[] args)
        
{
            SmtpClient client 
= new SmtpClient("localhost"8025);
            
string folder = @"C:/Documents and Settings/Administrator/My Documents/Food";
            DirectoryInfo dir 
= new DirectoryInfo(folder);
            Hashtable ht 
= GetFileTitle(dir);

            FileInfo[] files 
= dir.GetFiles();

            
int step = 700;

            
int startNum = 301 + step;
            
int endNum = 400 + step;

            
for (int i = startNum; i < endNum; i++)
            
{
                FileInfo file 
= files[i];
                
string fileName = file.Name;

                
string title = fileName.Substring(0, fileName.Length - 4+ ht[fileName].ToString();

                StreamReader sr 
= new StreamReader(file.FullName, Encoding.Default);

                
string txt = sr.ReadToEnd();

                
string body = StripHTML(txt);

                client.Send(
"[email protected]""[email protected]", title, body);

            }



        }


        
public static Hashtable GetFileTitle(DirectoryInfo dir)
        
{
            
//folder = @"C:/Documents and Settings/Administrator/My Documents/Food";
            FileInfo[] files = dir.GetFiles();

            ArrayList list 
= new ArrayList();
            Hashtable ht 
= new Hashtable();

            
foreach (FileInfo file in files)
            
{
                StreamReader sr 
= new StreamReader(file.FullName, Encoding.Default);
                
string text = sr.ReadToEnd();

                Regex reg 
= new Regex("<title>.*</title>");
                Match m 
= reg.Match(text);
                
string s = m.Value;
                
string subStr = s.Substring(7, s.Length - 15);

                StringBuilder sb 
= new StringBuilder(subStr);
                sb 
= sb.Replace(""".");
                sb 
= sb.Replace(" """);
                sb 
= sb.Replace("?""");
                sb 
= sb.Replace("/""");
                sb 
= sb.Replace("zz""");
                sb 
= sb.Replace("Re:""");
                sb 
= sb.Replace("e:""");
                sb 
= sb.Replace("*""=");

                ht.Add(file.Name, sb.ToString());

            }

            
return ht;
        }


        
public static string StripHTML(string source)
        
{

            
try
            
{

                
string result;

                
// Remove HTML Development formatting
                
// Replace line breaks with space
                
// because browsers inserts space
                result = source.Replace(" "" ");
                
// Replace line breaks with space
                
// because browsers inserts space
                result = result.Replace(" ""====n====");


                
// Remove step-formatting
                result = result.Replace(" "string.Empty);
                
// Remove repeating speces becuase browsers ignore them
                result = System.Text.RegularExpressions.Regex.Replace(result,
                                                                      
@"( )+"" ");

                
// Remove the header (prepare first by clearing attributes)
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*head([^>])*>""<head>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"(<( )*(/)( )*head( )*>)""</head>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
"(<head>).*(</head>)"string.Empty,
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);


                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                    
@"(<P ALIGN=).*(</P>)"string.Empty,
                    System.Text.RegularExpressions.RegexOptions.IgnoreCase);
               

                
// remove all scripts (prepare first by clearing attributes)
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*script([^>])*>""<script>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"(<( )*(/)( )*script( )*>)""</script>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
//result = System.Text.RegularExpressions.Regex.Replace(result, 
                
//         @"(<script>)([^(<script>.</script>)])*(</script>)",
                
//         string.Empty, 
                
//         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"(<script>).*(</script>)"string.Empty,
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// remove all styles (prepare first by clearing attributes)
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*style([^>])*>""<style>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"(<( )*(/)( )*style( )*>)""</style>",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
"(<style>).*(</style>)"string.Empty,
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// insert tabs in spaces of <td> tags
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*td([^>])*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// insert line breaks in places of <BR> and <LI> tags
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*br( )*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*li( )*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// insert line paragraphs (double line breaks) in place
                
// if <P>, <DIV> and <TR> tags
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*div([^>])*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*tr([^>])*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<( )*p([^>])*>"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// Remove remaining tags like <a>, links, images,
                
// comments etc - anything thats enclosed inside < >
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<[^>]*>"string.Empty,
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// replace special characters:
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&nbsp;"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&bull;"" * ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&lsaquo;""<",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&rsaquo;"">",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&trade;""(tm)",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&frasl;""/",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"<""<",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@">"">",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&copy;""(c)",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&reg;""(r)",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
// Remove all others. More can be added, see
                
// http://hotwired.lycos.com/webmonkey/reference/special_characters/
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
@"&(.{2,6});"string.Empty,
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);

                
// for testng
                
//System.Text.RegularExpressions.Regex.Replace(result, 
                
//       this.txtRegex.Text,string.Empty, 
                
//       System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
                
// make line breaking consistent
                result = result.Replace("""");


                
// make line breaking consistent
                result = result.Replace(" """);

                
// make line breaking consistent
                result = result.Replace("====n===="" ");     

                
// make line breaking consistent
                result = result.Replace(":""");

                
// make line breaking consistent
                result = result.Replace(" "" ");


                
// make line breaking consistent
                result = result.Replace(" "" ");

                
// Remove extra line breaks and tabs:
                
// replace over 2 breaks with 2 and over 4 tabs with 4. 
                
// Prepare first to remove any whitespaces inbetween
                
// the escaped characters and remove redundant tabs inbetween linebreaks
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+( )"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+( )"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+( )"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                result 
= System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+( )"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
// Remove redundant tabs
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+( )"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
// Remove multible tabs followind a linebreak with just one tab
                result = System.Text.RegularExpressions.Regex.Replace(result,
                         
"( )( )+"" ",
                         System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                
// Initial replacement target string for linebreaks
                string breaks = " ";
                
// Initial replacement target string for tabs
                string tabs = " ";
                
for (int index = 0; index < result.Length; index++)
                
{
                    result 
= result.Replace(breaks, " ");
                    result 
= result.Replace(tabs, " ");
                    breaks 
= breaks + " ";
                    tabs 
= tabs + " ";
                }


                
// Thats it.
                return result.Substring(1);

            }

            
catch
            
{
                System.Windows.Forms.MessageBox.Show(
"Error");
                
return source;
            }

        }

    }

}

 
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章