Lucene.Net用了又忘...由於現在信息量爆炸,用過的東西用完就忘,只好自己寫個筆記來記錄一下了...
1: 需要DLL
Lucene.Net.dll
PanGu.dll
PanGu.HighLight.dll
PanGu.Lucene.Analyzer.dll
沒有的話,可以去我的資源包裏面下,地址如下: http://download.csdn.net/download/kimizhou_blog/10016313
2;生成索引
string indexPath = Context.Server.MapPath("~/App_Data/IndexData");//索引文檔保存位置
string commonProductIndexPath = string.Format("{0}/{1}", indexPath, "commonProduct"); //積分商城產品
//開始處理 積分商城產品索引
CreateCommonProductIndex(commonProductIndexPath);
然後看看CreateCommonProductIndex方法
/// <summary>
/// 創建積分商城產品索引
/// </summary>
/// <param name="indexPath"></param>
private void CreateCommonProductIndex(string indexPath)
{
FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//綁定索引目錄
bool isExist = IndexReader.IndexExists(directory);
if (isExist)
{
if (IndexWriter.IsLocked(directory))
{
IndexWriter.Unlock(directory);
}
}
IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED);
writer.DeleteAll();//先刪之前的索引
IList<ProductInfoByIndex> list = Product.GetProductListByIndex();
foreach (var item in list)
{
Document document = new Document();
document.Add(new Field("id", item.ProductID.ToString(), Field.Store.YES, Field.Index.ANALYZED));//--所有字段的值都將以字符串類型保存 因爲索引庫只存儲字符串類型數據
string Content = string.Format("{0}", item.ProductName);
document.Add(new Field("Content", Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(document); //文檔寫入索引庫
}
writer.Close();//會自動解鎖
directory.Close(); //不要忘了Close,否則索引結果搜不到
}
其中
IList<ProductInfoByIndex> list = Product.GetProductListByIndex();方式是去數據中讀取這個list對象,這裏代碼就不貼出來了。到這裏你的索引已經創建出來的,那麼接下來需要查詢和顯示
查詢是最困難的,各種匹配
3:查詢索引並且顯示出來
GetProductIndex方法就是獲取索引代碼如下:
/// <summary>
/// 獲取積分商品索引
/// </summary>
private void GetProductIndex()
{
string indexPath = Context.Server.MapPath("~/App_Data/IndexData");//索引文檔保存位置
string commonProductIndexPath = string.Format("{0}/{1}", indexPath, "commonProduct"); //積分商城產品
FSDirectory directory = FSDirectory.Open(new DirectoryInfo(commonProductIndexPath), new NoLockFactory());
IndexReader reader = IndexReader.Open(directory, true);
IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery bQuery = new BooleanQuery();
foreach (string word in SplitContent.SplitWords(Request["SearchKey"]))
{
Query queryUseringNatrue = new WildcardQuery(new Term("Content", "*" + word + "*"));
bQuery.Add(queryUseringNatrue, BooleanClause.Occur.MUST);// MUST 必須
}
Sort sort = new Sort(new SortField("id", SortField.FLOAT, true)); //true爲降序排序
TopDocs docs = searcher.Search(bQuery, (Filter)null, 9999999, sort);
List<ProductInfoByIndex> proList = new List<ProductInfoByIndex>();
for (int i = 0; i < docs.totalHits; i++)
{
Document doc = searcher.Doc(docs.scoreDocs[i].doc);
ProductInfoByIndex product = new ProductInfoByIndex();
product.ProductID = System.Convert.ToInt32(doc.Get("id"));
product.ProductName = doc.Get("Content");
//product.ProductName = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content"));
proList.Add(product);
}
productResultList = proList;
this.Message += string.Format("|{0}條積分商城產品", docs.totalHits);
//PhraseQuery query = new PhraseQuery();
//foreach (string word in SplitContent.SplitWords(Request["SearchKey"]))
//{
// query.Add(new Term("Content", word));
//}
//query.SetSlop(100);
//TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
//searcher.Search(query, null, collector);
//ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;
//List<ProductInfoByIndex> proList = new List<ProductInfoByIndex>();
//for (int i = 0; i < docs.Length; i++)
//{
// int docId = docs[i].doc;//得到查詢結果文檔的id(Lucene內部分配的id)
// Document doc = searcher.Doc(docId);//根據文檔id來獲得文檔對象Document
// ProductInfoByIndex product = new ProductInfoByIndex();
// product.ProductID = System.Convert.ToInt32(doc.Get("id"));
// //book.ContentDescription = doc.Get("content");//未使用高亮
// //搜索關鍵字高亮顯示 使用盤古提供高亮插件
// product.ProductName = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content"));
// proList.Add(product);
//}
// productResultList = proList;
//this.Message += string.Format("|{0}條積分商城產品", docs.Length);
}
其中我註釋掉的,是另外一種方法,這裏我用的效率比較慢的模糊查詢
Query queryUseringNatrue = new WildcardQuery(new Term("Content", "*" + word + "*"));
這個類似 數據庫的like '%關鍵字%'
到這裏就已經獲取到了所有的索引資料了,是不是很簡單,你get到了嗎?最後我再給大家介紹索引的幾種查詢方式:
第1種:
//string keyWordUseringNatrue = "營運";
//if (!string.IsNullOrWhiteSpace(keyWordUseringNatrue))
//{
// QueryParser parseUseringNatrue = new QueryParser("UseringNatrue", new PanGuAnalyzer());
// Query query = parseUseringNatrue.Parse(keyWordUseringNatrue);
// parseUseringNatrue.SetDefaultOperator(QueryParser.Operator.AND);
// bQuery.Add(query, BooleanClause.Occur.MUST);
//}
//營運
//Query queryUseringNatrue = new WildcardQuery(new Term("UseringNatrue", "營運"));
//bQuery.Add(queryUseringNatrue, BooleanClause.Occur.MUST);// MUST 必須
這個查詢是什麼呢?是一般的查詢,會查詢出運營相關的,但是他和like不一樣,他跟分詞有關,比如說,“愛” 就查詢不出 “可愛” ,pangu有自己的分詞,但是這個比較常用,下面彙總一下其它的查詢:
其它查詢彙總:
//介紹各種Query
//TermQuery: 首先介紹最基本的查詢,如果你想執行一個這樣的查詢:在content字段中查詢包含‘劉備的document”,那麼你可以用TermQuery:
// Term t = new Term("content", "劉備");
// Query query = new TermQuery(t);
//BooleanQuery :如果你想這麼查詢:在content字段中包含”劉備“並且在title字段包含”三國“的document”,那麼你可以建立兩個TermQuery並把它們用BooleanQuery連接起來:
//1 TermQuery termQuery1 = new TermQuery(new Term("content", "劉備"));
//2 TermQuery termQuery2 = new TermQuery(new Term("title", "三國"));
//3 BooleanQuery booleanQuery = new BooleanQuery();
//4 booleanQuery.Add(termQuery1, BooleanClause.Occur.SHOULD);
//5 booleanQuery.Add(termQuery2, BooleanClause.Occur.SHOULD);
//WildcardQuery :如果你想對某單詞進行通配符查詢,你可以用WildcardQuery,通配符包括’?’匹配一個任意字符和’*’匹配零個或多個任意字符,例如你搜索’三國*’,你可能找到’三國演義’或者’三國志’:
//1 Query query = new WildcardQuery(new Term("content", "三國*"));
//PhraseQuery :你可能對中日關係比較感興趣,想查找‘中’和‘日’捱得比較近(5個字的距離內)的文章,超過這個距離的不予考慮,你可以
//1 PhraseQuery query = new PhraseQuery();
//2 query.SetSlop(5);
//3 query.Add(new Term("content ", "中"));
//4 query.Add(new Term("content", "日"));
//那麼它可能搜到“中日合作……”、“中方和日方……”,但是搜不到“中國某高層領導說日本欠扁”
//PrefixQuery :如果你想搜以‘中’開頭的詞語,你可以用PrefixQuery:
//1 PrefixQuery query = new PrefixQuery(new Term("content ", "中"));
//FuzzyQuery :FuzzyQuery用來搜索相似的term,使用Levenshtein算法。假設你想搜索跟‘wuzza’相似的詞語,你可以:
//1 Query query = new FuzzyQuery(new Term("content", "wuzza"));
//你可能得到‘fuzzy’和‘wuzzy’。
//RangeQuery: 另一個常用的Query是RangeQuery,你也許想搜索時間域從20060101到20060130之間的document,你可以用RangeQuery:
//1 RangeQuery query = new RangeQuery(new Term("time","20060101"), new Term("time","20060130"), true);
//最後的true表示用閉合區間。
因爲各個版本,他們使用的都不太一樣,下面介紹一種常用的讀取以後顯示的方式,其中Sort就是排序
Stopwatch stopwath = new Stopwatch();//秒錶
Sort sort = new Sort(new SortField("CarPrice", SortField.FLOAT,true)); //true爲降序排序 CarPrice爲價格 SortField.DOC是?
TopDocs docs = searcher.Search(bQuery, (Filter)null, 9999999, sort);
stopwath.Stop();//秒錶停止
long lSearchTime = stopwath.ElapsedMilliseconds;//耗時
List<CarSourceInfoByIndex> carSourceResult = new List<CarSourceInfoByIndex>();
for (int i=0;i<docs.totalHits; i++)
{
Document doc = searcher.Doc(docs.scoreDocs[i].doc);
CarSourceInfoByIndex carSource = new CarSourceInfoByIndex()
{
Id = int.Parse(doc.Get("Id")),
CarPrice = System.Convert.ToDouble(doc.Get("CarPrice")),
Recommended = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content"))
};
carSourceResult.Add(carSource);
}
carSourceResultList2 = carSourceResult;
this.Message += string.Format("{0}條測試", docs.totalHits);
就到這裏了,不懂的可以加我QQ 10200454諮詢