使用HtmlAgilityPack解析
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
namespace 書籤解析程序
{
/// <summary>
/// 本例子使用的是atavi.com 書籤網站導出的書籤格式
/// </summary>
public partial class index : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
List<BookMark> bookList = new List<BookMark>();
//讀取書籤內容
var docText = File.ReadAllText(Server.MapPath("a.html"));
var doc = new HtmlDocument(); doc.LoadHtml(docText);
//這裏的/dl 這樣的語法叫xpath
var res = doc.DocumentNode.SelectSingleNode(@"/dl");
var list = res.SelectNodes(@"dt");//選擇標籤數組
foreach (var item in list)
{
var group = item.SelectSingleNode(@"h3");
if (group!=null) //有分組的書籤
{
var link = item.SelectNodes("dl")[0].SelectNodes("dt");
foreach (var node in link)
{
BookMark book = new BookMark();
book.groupAddDate = group.Attributes["add_date"].Value;
book.groupName = group.InnerHtml;
var a = node.SelectNodes("a")[0];
book.bookMarkAddDate = a.Attributes["add_date"].Value.Trim();
book.bookMarkHref = a.Attributes["href"].Value.Trim();
book.bookMarkTitle = a.InnerHtml;
bookList.Add(book);
}
}
else //無分組的書籤
{
BookMark book = new BookMark();
var a = item.SelectSingleNode("a");
book.bookMarkAddDate = a.Attributes["add_date"].Value.Trim();
book.bookMarkHref = a.Attributes["href"].Value.Trim();
book.bookMarkTitle = a.InnerHtml;
book.groupAddDate = "";
book.groupName = "默認分組";
bookList.Add(book);
}
}
var d = bookList;
//獲取到實體集合,就可以做其他操作
//。。。。
}
}
}
書籤實體:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
namespace 書籤解析程序
{
public class BookMark {
public string bookMarkTitle { get; set; }
public string bookMarkHref { get; set; }
public string bookMarkAddDate { get; set; }
public string groupName { get; set; }
public string groupAddDate { get; set; }
}
}