lucene.net是.Net下的全文檢索的工具包,不是應用,只是個類庫,完成了全文檢索的功能;預先把數據拆分成原子(字/詞),保存到磁盤中;查詢時把關鍵字也拆分成原子(字/詞),再根據(字/詞)進行匹配,返回結果。
Nuget安裝“Lucene.Net”和“Lucene.Net.Analysis.PanGu”(盤古分詞,一個第三方的分詞器)
一、lucene.net七大對象
1、Analysis:分詞器,負責把字符串拆分成原子,包含了標準分詞,直接空格拆分。項目中用的是盤古中文分詞,
2、Document:數據結構,定義存儲數據的格式
3、Index:索引的讀寫類
4、QueryParser:查詢解析器,負責解析查詢語句
5、Search:負責各種查詢類,命令解析後得到就是查詢類
6、Store:索引存儲類,負責文件夾等等
7、Util:常見工具類庫
二、常用的查詢方式
1、TermQuery:單元查詢 ; new Term("title","張三") =》 title:張三
2、BooleanQuery:多條件查詢;new Term("title","張三") and new Term("title","李四") =》 title:張三 + title:李四
new Term("title","張三") or new Term("title","李四") =》 title:張三 title:李四
3、WildcardQuery:通配符查詢 ;new Term("title","張?") =》 title:張?
new Term("title","張*") =》 title:張*
4、PrefixQuery:前綴查詢; 以xx開頭 title:張*
5、PhraseQuery:間隔距離;包含“我”和“人”,但“我”和“人”之間的間隔不超過5個字符: title: "我 人"~5
6、FuzzyQuery:近似查詢,ibhone----iphone title:ibhone~
7、NumericRangeQuery:範圍查詢; [1,100] {1,100}
三、讀寫範例
1、寫
List<Commodity> commodityList = GetList();
FSDirectory directory = FSDirectory.Open(“d://Test/productIndex”);//文件夾
using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引寫入器
{
foreach (Commodity commdity in commodityList)
{
Document doc = new Document();//一個Document對應一條數據
//一個Field對應一個字段:列名、值、是否保存原始值、是否分詞,NOT_ANALYZED表示不分詞,索引頁中保存原始值
doc.Add(new Field("id", commdity.Id.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("title", commdity.Title, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("url", commdity.Url, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("imageurl", commdity.ImageUrl, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("content", "this is lucene working,powerful tool " + k, Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new NumericField("price", Field.Store.YES, true).SetDoubleValue((double)(commdity.Price + k)));
//doc.Add(new NumericField("time", Field.Store.YES, true).SetLongValue(DateTime.Now.ToFileTimeUtc()));
doc.Add(new NumericField("time", Field.Store.YES, true).SetIntValue(int.Parse(DateTime.Now.ToString("yyyyMMdd")) + k));
writer.AddDocument(doc);//寫到緩衝區中去
}
writer.Optimize();//優化,就是合併
}
2、讀取
FSDirectory dir = FSDirectory.Open(StaticConstant.TestIndexPath);
IndexSearcher searcher = new IndexSearcher(dir);//查找器
//1、直接根據關鍵字查找
{
TermQuery query = new TermQuery(new Term("title", "圖書館"));//包含
TopDocs docs = searcher.Search(query, null, 10000);//找到的數據
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
//2、將關鍵字解析成查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器,用於解析關鍵字
{
string keyword = "這是一隻真實的二哈呀";
{
Query query = parser.Parse(keyword);
TopDocs docs = searcher.Search(query, null, 10000);//找到的數據
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
//3、根據price字段過濾,查詢價格在300-1200之間的數據,並按照price和time字段排序
{
Query query = parser.Parse(keyword);
NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("price", 300, 1200, true, true);//過濾
SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序
SortField sortTime = new SortField("time", SortField.INT, true);//升序
Sort sort = new Sort(sortTime, sortPrice);//排序 哪個前哪個後
TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的數據
int i = -1;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
i++;
if (i>=0 && i < 20) //可以在這裏寫分頁,
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
}
}
}
}