1.使用方式
1.1 構建索引
/**
* 指定indexDir、filePath構建索引
*
* @param indexDir:索引文件夾的路徑
* @param filePath:要建立索引的文件路徑
* @return 如果indexDir存在同名的文件,則return false,文件夾存在,不構建,return true,文件夾不存在,則構建
* @throws Exception
*/
public boolean build_index(String filePath,String indexDir) throws Exception
{
//System.out.println("Begin to build index for the file---------------------");
File fileIndex = new File(indexDir);
if(fileIndex.exists()){
if(fileIndex.isDirectory()) {
System.out.println("The index has been built.");
return true;
}
else {
System.out.println("Sorry, there exits a file with the same name.");
return false;
}
}else{
Date stepOneStart = new Date();
fileIndex.mkdir();
try {
FileReader fr = new FileReader(filePath);
BufferedReader bf = new BufferedReader(fr);
String str;
int lineNum = 1;
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexWriter writer = getWriter(dir);
while ((str = bf.readLine()) != null) {
Document doc = new Document();
doc.add(new StringField("lineNum", lineNum+"", Field.Store.YES));
//doc.add(new TextField("sign", str.split("\t")[0], Field.Store.YES));
doc.add(new TextField("contents", str, Field.Store.YES));
writer.addDocument(doc);
lineNum++;
}
Date stepOneEnd = new Date();
System.out.println("Step One:finish building index---------------------------------------------------- total time:"+(stepOneEnd.getTime()-stepOneStart.getTime())+"ms \n");
writer.close();
bf.close();
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
}
private IndexWriter getWriter(Directory dir) throws Exception {
Analyzer analyzer = new StandardAnalyzer(); // 標準分詞器
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, iwc);
return writer;
}
1.2 進行搜索
/**
* 搜索指定indexDir、queryWord的結果
*
* @param indexDir:索引文件夾的路徑
* @param queryWord:搜索的詞彙
* @return 沒有時,返回空
* @throws Exception
*/
public List<String> searchByGeneName(String indexDir,String queryWord) throws IOException, ParseException{
List<String> results = new ArrayList<String>();
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("contents", analyzer);
Query query = parser.parse(queryWord);
System.out.println("Begin to search the word:"+queryWord);
Date stepOneStart = new Date();
TopDocs hits = is.search(query,max_return);
Date stepOneEnd = new Date();
System.out.println("Step Two:finish searching---------------------------------------------------- total time::" + (stepOneEnd.getTime()-stepOneStart.getTime()) + "ms" + "\nwe find "+ hits.totalHits + "results:");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);// 根據文檔的標識獲取文檔
System.out.println("line Number:"+doc.get("lineNum"));
System.out.println("line Content "+doc.get("contents"));
results.add(doc.get("contents"));
}
reader.close();
return results;
}
1.3 進行測試
public class Test {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
BuildIndex test = new BuildIndex();
//索引構建liugang_variant_statistics.txt
if(test.build_index("E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics.txt", "E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics")) {
//進行檢索
List<String> results = test.searchByGeneName("E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics", "LOC101929372");
if(results.isEmpty()) {
//do nothing
}
else {
for(int i = 0; i< results.size();i++) {
System.out.println(results.get(i));
}
}
}
else {
//do nothing
}
return;
}
}
2.參考鏈接
1.https://www.cnblogs.com/huangfox/archive/2010/10/16/1853086.html
2.https://www.cnblogs.com/lirenzhujiu/p/5912243.html