創建索引 Lucene 3.0+
package test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.apache.poi.hwpf.extractor.WordExtractor;
/**
* 創建索引 Lucene 3.0+
* @author Administrator
*
*/
public class LuceneTest {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
//保存索引文件的地方
String indexDir = "D:\\indexDir";
//將要搜索word文件的地方
String dateDir = "D:\\dateDir";
IndexWriter indexWriter = null;
//創建Directory對象
Directory dir = new SimpleFSDirectory(new File(indexDir));
//創建IndexWriter對象,第一個參數是Directory,第二個是分詞器,第三個表示是否是創建,如果爲false爲在此基礎上面修改,第四表示表示分詞的最大值,比如說new MaxFieldLength(2),就表示兩個字一分,一般用IndexWriter.MaxFieldLength.LIMITED
indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = new File(dateDir).listFiles();
for (int i = 0; i < files.length; i++) {
Document doc = new Document();
InputStream in = new FileInputStream(files[i]);
WordExtractor w = new WordExtractor(in);
//創建Field對象,並放入doc對象中
doc.add(new Field("contents", w.getText(),Field.Store.YES,Field.Index.ANALYZED));
doc.add(new Field("filename", files[i].getName(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED));
//寫入IndexWriter
indexWriter.addDocument(doc);
}
//查看IndexWriter裏面有多少個索引
System.out.println("numDocs"+indexWriter.numDocs());
indexWriter.close();
}
}
搜索索引 Lucene 3.0+
package test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
* 搜索索引 Lucene 3.0+
* @author Administrator
*
*/
public class LuceneSearch {
public static void main(String[] args) throws IOException, ParseException {
//保存索引文件的地方
String indexDir = "D:\\indexDir";
Directory dir = new SimpleFSDirectory(new File(indexDir));
//創建 IndexSearcher對象,相比IndexWriter對象,這個參數就要提供一個索引的目錄就行了
IndexSearcher indexSearch = new IndexSearcher(dir);
//創建QueryParser對象,第一個參數表示Lucene的版本,第二個表示搜索Field的字段,第三個表示搜索使用分詞器
QueryParser queryParser = new QueryParser(Version.LUCENE_30,
"contents", new StandardAnalyzer(Version.LUCENE_30));
//生成Query對象
Query query = queryParser.parse("高鐵");
//搜索結果 TopDocs裏面有scoreDocs[]數組,裏面保存着索引值
TopDocs hits = indexSearch.search(query, 10);
//hits.totalHits表示一共搜到多少個
System.out.println("找到了"+hits.totalHits+"個");
//循環hits.scoreDocs數據,並使用indexSearch.doc方法把Document還原,再拿出對應的字段的值
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc sdoc = hits.scoreDocs[i];
Document doc = indexSearch.doc(sdoc.doc);
System.out.println(doc.get("filename"));
}
indexSearch.close();
}
}
用到的jar包:
lucene-core-3.0.3.jar
poi-3.7-20101029.jar
poi-ooxml-3.7-20101029.jar
poi-ooxml-schemas-3.7-20101029.jar
poi-scratchpad-3.7-20101029.jar