全文檢索:分詞,索引

主要用到lucene索引技術及盤古分詞技術,可創建索引,修改索引,刪除索引等全套代碼。

此代碼直接複製調用即可,注意生成索引文件夾(SearchIndex),如果大家有什麼不明白的可以直接來問我,715417165  qq

主要業務調用:

using Lucene.api;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;

namespace Lucene
{
    public partial class lucene : System.Web.UI.Page
    {
        public List<MySearchUnit> searchlist_pangu = null;
        protected void Page_Load(object sender, EventArgs e)
        {
            var action = Request.Params["action"];
            var words = Request.Params["words"];

            switch (action)
            {
                case "createIndex":
                    CreateIndex();
                    break;
                case "deleteAll":
                    DeleteAll();
                    break;
                case "search":
                    Search(words);
                    break;
                default:
                    break;
            }
        }

        /// <summary>
        /// search
        /// </summary>
        public void Search(string words)
        {
            if (!String.IsNullOrEmpty(words.Trim()))
            {
                #region 全文檢索
                int count = 0;
                int pageIndex = 1;
                int pageSize = 2;
                StringBuilder str = new StringBuilder();
                //List<MySearchUnit> searchlist = PanGuLuceneHelper.instance.Search("超級兵王");
                searchlist_pangu = PanGuLuceneHelper.instance.Search("1", Server.HtmlDecode(words), pageIndex, pageSize,false, out count);
                //if (searchlist == null || searchlist.Count == 0)
                //{
                      
                //}
                var versioin = PanGuLuceneHelper.instance.version;
                if (searchlist_pangu == null || searchlist_pangu.Count == 0)
                {
                    str.AppendFormat("檢索結果:當前檢索無任何值");
                }
                else
                {
                    //PanGuLuceneHelper.instance.version
                }
                #endregion
            }
            else
            {

            }
        }

        /// <summary>
        /// 創建索引
        /// </summary>
        public void CreateIndex()
        {
            //new Random().Next(1, 100000).ToString() 
            List<MySearchUnit> list = new List<MySearchUnit>();
            list.Add(new MySearchUnit("329366", "超級兵王1", "他是僱傭兵世界的王者1,他是令各國元首頭疼的兵王!爲朋友,他甘願兩肋插刀;爲親人,不惜血濺五步!是龍,終要翱翔於九天之上,攜風雲之勢,一路高歌猛進,混的風生水起。", "1", "", ""));
            list.Add(new MySearchUnit("80000437", "絕世邪神", "重生異世,放蕩不羈的葉楚面對衆多絕世天才,傾世紅顏。他如何踏破蒼穹,撼動諸天,世人仰望!", "1", "", ""));
            list.Add(new MySearchUnit("357332", "錯入豪門:老公別碰我", "他的殘忍,情人的挑釁,最終將她折磨的遍體鱗傷就在要放棄的時候,他卻溫柔對待。以爲他愛上自己的時候,他卻挽着別的女人高調結婚,甩給她一張離婚協議書!", "1", "", ""));
            list.Add(new MySearchUnit("358089", "一吻成癮", "那一夜,她大膽熱辣,纏綿過後,本以爲兩人不會再有交集,卻在回國後再次重逢,而他的未婚妻,竟是自己同父異母的姐姐!", "", "", ""));
            list.Add(new MySearchUnit("80000556", "腹黑謀少法醫妻", "她是隨時可能失業的前任女法醫,他是京城貴少,胸有謀略,卻黑心無比。她急於把自己嫁出去,擺脫麻煩;他需要娶個女人,給他老子找點麻煩。他掩着眸中的邪惡,“你是自願和我結婚的吧?”她點頭,很認真地和他辦了結婚證。她以爲,結婚不過是各取所需擺個形式罷了,可他居然……賀鎏陽,你個黑心土匪!", "1", "", ""));
            list.Add(new MySearchUnit("80000789", "無敵萌妻限量版", "他是C城翻手爲雲覆手爲雨的南宮集團首席執行官,身家過億,卻偏偏被當成牛郎睡了,爲報此仇,他勢要將此人大卸八塊拿去喂狗。她是一枚小小設計師,胸懷夢想,無奈現實骨感,可不小心睡了上司之後竟然走了狗屎運成爲頂尖設計師!她抱着總裁大腿,感激涕零。爲了看牢這個金大腿,她一路過關斬將,棄渣竹馬,鬥情敵,終於穩固自己總裁夫人的頭銜。", "1", "", ""));
            PanGuLuceneHelper.instance.CreateIndex(list,true);//添加索引
        }

        /// <summary>
        /// 刪除全部索引
        /// </summary>
        public void DeleteAll()
        {
            PanGuLuceneHelper.instance.DeleteAll();//刪除全部
        }

        /// <summary>
        /// 刪除key的索引
        /// </summary>
        /// <param name="id"></param>
        public void DeleteID(string id)
        {
            PanGuLuceneHelper.instance.Delete(id);//根據id刪除
        }

    }
}


核心處理類:

using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Reflection;
using System.Web;

namespace Lucene.api
{

    /// <summary>
    /// 盤古分詞在lucene.net中的使用幫助類
    /// 調用PanGuLuceneHelper.instance
    /// </summary>
    public class PanGuLuceneHelper
    {
        private PanGuLuceneHelper() { }

        #region 單一實例
        private static PanGuLuceneHelper _instance = null;
        /// <summary>
        /// 單一實例
        /// </summary>
        public static PanGuLuceneHelper instance
        {
            get
            {
                if (_instance == null) _instance = new PanGuLuceneHelper();
                return _instance;
            }
        }
        #endregion

        #region 分詞測試
        /// <summary>
        /// 分詞測試
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public string Token(string keyword)
        {
            string ret = "";
            System.IO.StringReader reader = new System.IO.StringReader(keyword);
            Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
            bool hasNext = ts.IncrementToken();
            Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
            while (hasNext)
            {
                ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
                ret += ita.Term + "|";
                hasNext = ts.IncrementToken();
            }
            ts.CloneAttributes();
            reader.Close();
            analyzer.Close();
            return ret;
        }
        #endregion

        #region 創建索引
        /// <summary>
        /// 創建索引
        /// </summary>
        /// <param name="datalist">數據</param>
        /// <param name="indexAdd">false表示追加(true表示刪除之前的重新寫入),注意ID唯一</param>
        /// <returns></returns>
        public bool CreateIndex(List<MySearchUnit> datalist, bool indexAdd)
        {
            IndexWriter writer = null;
            try
            {
                writer = new IndexWriter(directory_luce, analyzer, indexAdd, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示刪除之前的重新寫入)
            }
            catch
            {
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示刪除之前的重新寫入)
            }
            foreach (MySearchUnit data in datalist)
            {
                CreateIndex(writer, data);
            }
            writer.Optimize();
            writer.Dispose();
            return true;
        }

        /// <summary>
        /// 創建索引
        /// </summary>
        /// <param name="datalist"></param>
        /// <returns></returns>
        public bool CreateIndex(List<MySearchUnit> datalist)
        {
            IndexWriter writer = null;
            try
            {
                writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示刪除之前的重新寫入)
            }
            catch
            {
                writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加(true表示刪除之前的重新寫入)
            }
            foreach (MySearchUnit data in datalist)
            {
                CreateIndex(writer, data);
            }
            writer.Optimize();
            writer.Dispose();
            return true;
        }

        public bool CreateIndex(IndexWriter writer, MySearchUnit data)
        {
            try
            {

                if (data == null) return false;
                Document doc = new Document();
                Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空間名稱 + 類名    

                //創建類的實例    
                //object obj = Activator.CreateInstance(type, true);  
                //獲取公共屬性    
                PropertyInfo[] Propertys = type.GetProperties();
                for (int i = 0; i < Propertys.Length; i++)
                {
                    //Propertys[i].SetValue(Propertys[i], i, null); //設置值
                    PropertyInfo pi = Propertys[i];
                    string name = pi.Name;
                    object objval = pi.GetValue(data, null);
                    string value = objval == null ? "" : objval.ToString(); //值
                    if (name == "id" || name == "flag")//id在寫入索引時必是不分詞,否則是模糊搜索和刪除,會出現混亂
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分詞
                    }
                    else
                    {
                        doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
                    }
                }
                writer.AddDocument(doc);
            }
            catch (System.IO.FileNotFoundException fnfe)
            {
                throw fnfe;
            }
            return true;
        }
        #endregion

        #region 在title和content字段中查詢數據
        /// <summary>
        /// 在title和content字段中查詢數據
        /// </summary>
        /// <param name="keyword"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string keyword)
        {

            string[] fileds = { "title", "content" };//查詢字段
            //Stopwatch st = new Stopwatch();
            //st.Start();
            QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一個字段查詢
            parser = new MultiFieldQueryParser(version, fileds, analyzer);//多個字段查詢
            Query query = parser.Parse(keyword);
            int n = 1000;
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只讀
            TopDocs docs = searcher.Search(query, (Filter)null, n);
            if (docs == null || docs.TotalHits == 0)
            {
                return null;
            }
            else
            {
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                foreach (ScoreDoc sd in docs.ScoreDocs)//遍歷搜索到的結果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");

                        string createdate = doc.Get("createdate");
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag, imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查詢時間:" + st.ElapsedMilliseconds + " 毫秒<br/>");

        }
        #endregion

        #region 在不同的分類下再根據title和content字段中查詢數據(分頁)
        /// <summary>
        /// 在不同的類型下再根據title和content字段中查詢數據(分頁)
        /// </summary>
        /// <param name="_flag">分類,傳空值查詢全部</param>
        /// <param name="keyword"></param>
        /// <param name="PageIndex"></param>
        /// <param name="PageSize"></param>
        /// <param name="TotalCount"></param>
        /// <returns></returns>
        public List<MySearchUnit> Search(string _flag, string keyword, int PageIndex, int PageSize, bool Highlight, out int TotalCount)
        {
            if (PageIndex < 1) PageIndex = 1;
            //Stopwatch st = new Stopwatch();
            //st.Start();
            BooleanQuery bq = new BooleanQuery();
            if (_flag != "")
            {
                QueryParser qpflag = new QueryParser(version, "flag", analyzer);
                Query qflag = qpflag.Parse(_flag);
                bq.Add(qflag, Occur.MUST);//與運算
            }
            if (keyword != "")
            {
                string[] fileds = { "title", "content" };//查詢字段
                QueryParser parser = null;// new QueryParser(version, field, analyzer);//一個字段查詢
                parser = new MultiFieldQueryParser(version, fileds, analyzer);//多個字段查詢
                Query queryKeyword = parser.Parse(keyword);
                bq.Add(queryKeyword, Occur.MUST);//與運算
            }

            TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
            IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只讀
            searcher.Search(bq, collector);
            if (collector == null || collector.TotalHits == 0)
            {
                TotalCount = 0;
                return null;
            }
            else
            {
                int start = PageSize * (PageIndex - 1);
                //結束數
                int limit = PageSize;
                ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
                List<MySearchUnit> list = new List<MySearchUnit>();
                int counter = 1;
                TotalCount = collector.TotalHits;
                foreach (ScoreDoc sd in hits)//遍歷搜索到的結果
                {
                    try
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        string id = doc.Get("id");
                        string title = doc.Get("title");
                        string content = doc.Get("content");
                        string flag = doc.Get("flag");
                        string imageurl = doc.Get("imageurl");
                        string updatetime = doc.Get("updatetime");
                        string font_0 = "";
                        string font_1 = "";
                        if (Highlight) { font_0 = "<font color=\"red\">"; font_1 = "</font>"; }
                        PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter(font_0, font_1);
                        PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
                        highlighter.FragmentSize = 50;
                        content = highlighter.GetBestFragment(keyword, content);
                        string titlehighlight = highlighter.GetBestFragment(keyword, title);
                        if (titlehighlight != "") title = titlehighlight;
                        list.Add(new MySearchUnit(id, title, content, flag, imageurl, updatetime));
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                    counter++;
                }
                return list;
            }
            //st.Stop();
            //Response.Write("查詢時間:" + st.ElapsedMilliseconds + " 毫秒<br/>");

        }
        #endregion

        #region 刪除索引數據(根據id)
        /// <summary>
        /// 刪除索引數據(根據id)
        /// </summary>
        /// <param name="id"></param>
        /// <returns></returns>
        public bool Delete(string id)
        {
            bool IsSuccess = false;
            Term term = new Term("id", id);
            //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            //Version version = new Version();
            //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多個字段查詢
            //Query query = parser.Parse("小王");

            //IndexReader reader = IndexReader.Open(directory_luce, false);
            //reader.DeleteDocuments(term);
            //Response.Write("刪除記錄結果: " + reader.HasDeletions + "<br/>");
            //reader.Dispose();

            IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

        #region 刪除全部索引數據
        /// <summary>
        /// 刪除全部索引數據
        /// </summary>
        /// <returns></returns>
        public bool DeleteAll()
        {
            bool IsSuccess = true;
            try
            {
                IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
                writer.DeleteAll();
                writer.Commit();
                //writer.Optimize();//
                IsSuccess = writer.HasDeletions();
                writer.Dispose();
            }
            catch
            {
                IsSuccess = false;
            }
            return IsSuccess;
        }
        #endregion

        #region directory_luce
        private Lucene.Net.Store.Directory _directory_luce = null;
        /// <summary>
        /// Lucene.Net的目錄-參數
        /// </summary>
        public Lucene.Net.Store.Directory directory_luce
        {
            get
            {
                if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
                return _directory_luce;
            }
        }
        #endregion

        #region directory
        private System.IO.DirectoryInfo _directory = null;
        /// <summary>
        /// 索引在硬盤上的目錄
        /// </summary>
        public System.IO.DirectoryInfo directory
        {
            get
            {
                if (_directory == null)
                {
                    string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
                    if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
                    else _directory = new System.IO.DirectoryInfo(dirPath);
                }
                return _directory;
            }
        }
        #endregion

        #region analyzer
        private Analyzer _analyzer = null;
        /// <summary>
        /// 分析器
        /// </summary>
        public Analyzer analyzer
        {
            get
            {
                //if (_analyzer == null)
                {
                    _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//
                    //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
                }
                return _analyzer;
            }
        }
        #endregion

        #region version
        private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
        /// <summary>
        /// 版本號枚舉類
        /// </summary>
        public Lucene.Net.Util.Version version
        {
            get
            {
                return _version;
            }
        }
        #endregion
    }

    #region 索引的一個行單元,相當於數據庫中的一行數據
    /// <summary>
    /// 索引的一個行單元,相當於數據庫中的一行數據
    /// </summary>
    public class MySearchUnit
    {
        public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
        {
            this.id = _id;
            this.title = _title;
            this.content = _content;
            this.flag = _flag;
            this.imageurl = _imageurl;
            this.updatetime = _updatetime;
        }
        /// <summary>
        /// 唯一的id號
        /// </summary>
        public string id { get; set; }
        /// <summary>
        /// 標題
        /// </summary>
        public string title { get; set; }
        /// <summary>
        /// 內容
        /// </summary>
        public string content { get; set; }
        /// <summary>
        /// 其他信息
        /// </summary>
        public string flag { get; set; }
        /// <summary>
        /// 圖片路徑
        /// </summary>
        public string imageurl { get; set; }
        /// <summary>
        /// 時間
        /// </summary>
        public string updatetime { get; set; }
    }
    #endregion
}




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章