1.使用方式
1.1 构建索引
/**
* 指定indexDir、filePath构建索引
*
* @param indexDir:索引文件夹的路径
* @param filePath:要建立索引的文件路径
* @return 如果indexDir存在同名的文件,则return false,文件夹存在,不构建,return true,文件夹不存在,则构建
* @throws Exception
*/
public boolean build_index(String filePath,String indexDir) throws Exception
{
//System.out.println("Begin to build index for the file---------------------");
File fileIndex = new File(indexDir);
if(fileIndex.exists()){
if(fileIndex.isDirectory()) {
System.out.println("The index has been built.");
return true;
}
else {
System.out.println("Sorry, there exits a file with the same name.");
return false;
}
}else{
Date stepOneStart = new Date();
fileIndex.mkdir();
try {
FileReader fr = new FileReader(filePath);
BufferedReader bf = new BufferedReader(fr);
String str;
int lineNum = 1;
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexWriter writer = getWriter(dir);
while ((str = bf.readLine()) != null) {
Document doc = new Document();
doc.add(new StringField("lineNum", lineNum+"", Field.Store.YES));
//doc.add(new TextField("sign", str.split("\t")[0], Field.Store.YES));
doc.add(new TextField("contents", str, Field.Store.YES));
writer.addDocument(doc);
lineNum++;
}
Date stepOneEnd = new Date();
System.out.println("Step One:finish building index---------------------------------------------------- total time:"+(stepOneEnd.getTime()-stepOneStart.getTime())+"ms \n");
writer.close();
bf.close();
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
}
private IndexWriter getWriter(Directory dir) throws Exception {
Analyzer analyzer = new StandardAnalyzer(); // 标准分词器
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(dir, iwc);
return writer;
}
1.2 进行搜索
/**
* 搜索指定indexDir、queryWord的结果
*
* @param indexDir:索引文件夹的路径
* @param queryWord:搜索的词汇
* @return 没有时,返回空
* @throws Exception
*/
public List<String> searchByGeneName(String indexDir,String queryWord) throws IOException, ParseException{
List<String> results = new ArrayList<String>();
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("contents", analyzer);
Query query = parser.parse(queryWord);
System.out.println("Begin to search the word:"+queryWord);
Date stepOneStart = new Date();
TopDocs hits = is.search(query,max_return);
Date stepOneEnd = new Date();
System.out.println("Step Two:finish searching---------------------------------------------------- total time::" + (stepOneEnd.getTime()-stepOneStart.getTime()) + "ms" + "\nwe find "+ hits.totalHits + "results:");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);// 根据文档的标识获取文档
System.out.println("line Number:"+doc.get("lineNum"));
System.out.println("line Content "+doc.get("contents"));
results.add(doc.get("contents"));
}
reader.close();
return results;
}
1.3 进行测试
public class Test {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
BuildIndex test = new BuildIndex();
//索引构建liugang_variant_statistics.txt
if(test.build_index("E:\\工作内容\\20190410_建立索引\\liugang_variant_statistics.txt", "E:\\工作内容\\20190410_建立索引\\liugang_variant_statistics")) {
//进行检索
List<String> results = test.searchByGeneName("E:\\工作内容\\20190410_建立索引\\liugang_variant_statistics", "LOC101929372");
if(results.isEmpty()) {
//do nothing
}
else {
for(int i = 0; i< results.size();i++) {
System.out.println(results.get(i));
}
}
}
else {
//do nothing
}
return;
}
}
2.参考链接
1.https://www.cnblogs.com/huangfox/archive/2010/10/16/1853086.html
2.https://www.cnblogs.com/lirenzhujiu/p/5912243.html