書籤解析程序

使用HtmlAgilityPack解析

using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;

namespace 書籤解析程序
{
    /// <summary>
    /// 本例子使用的是atavi.com 書籤網站導出的書籤格式
    /// </summary>
    public partial class index : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {
            List<BookMark> bookList = new List<BookMark>();
            //讀取書籤內容
            var docText = File.ReadAllText(Server.MapPath("a.html"));
            var doc = new HtmlDocument(); doc.LoadHtml(docText);
            //這裏的/dl 這樣的語法叫xpath
            var res = doc.DocumentNode.SelectSingleNode(@"/dl");
            var list = res.SelectNodes(@"dt");//選擇標籤數組
            foreach (var item in list)
            {
              
                var group = item.SelectSingleNode(@"h3");
                if (group!=null)  //有分組的書籤
                {
                    var link = item.SelectNodes("dl")[0].SelectNodes("dt");
                    foreach (var node in link)
                    {
                        BookMark book = new BookMark();
                        book.groupAddDate = group.Attributes["add_date"].Value;
                        book.groupName = group.InnerHtml;

                        var a = node.SelectNodes("a")[0];
                        book.bookMarkAddDate = a.Attributes["add_date"].Value.Trim();
                        book.bookMarkHref = a.Attributes["href"].Value.Trim();
                        book.bookMarkTitle = a.InnerHtml;
                        bookList.Add(book);
                    }
                }
                else  //無分組的書籤
                {
                    BookMark book = new BookMark();
                    var a = item.SelectSingleNode("a");
                    book.bookMarkAddDate = a.Attributes["add_date"].Value.Trim();
                    book.bookMarkHref = a.Attributes["href"].Value.Trim();
                    book.bookMarkTitle = a.InnerHtml;
                    book.groupAddDate = "";
                    book.groupName = "默認分組";
                    bookList.Add(book);

                }
            }
            var d = bookList;
            //獲取到實體集合,就可以做其他操作
            //。。。。
        }
    }
}

書籤實體:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;

namespace 書籤解析程序
{
    public class BookMark {
        public string bookMarkTitle { get; set; }
        public string bookMarkHref { get; set; }
        public string bookMarkAddDate { get; set; }
        public string groupName { get; set; }
        public string groupAddDate { get; set; }
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章