lucene 初次使用

1.使用方式

1.1 構建索引

   /**
     *  指定indexDir、filePath構建索引
     * 
     * @param indexDir:索引文件夾的路徑
     * @param filePath:要建立索引的文件路徑
     * @return 如果indexDir存在同名的文件,則return false,文件夾存在,不構建,return true,文件夾不存在,則構建
     * @throws Exception
     */
	public boolean build_index(String filePath,String indexDir) throws Exception 
	{
			//System.out.println("Begin to build index for the file---------------------");
				
			File fileIndex = new File(indexDir);
			
			if(fileIndex.exists()){
				if(fileIndex.isDirectory()) {
					System.out.println("The index has been built.");
					return true;
				}
				else {
					System.out.println("Sorry, there exits a file with the same name.");
					return false;
				}
			}else{
				Date stepOneStart = new Date();	
			    fileIndex.mkdir(); 
				try {
					FileReader fr = new FileReader(filePath);
					BufferedReader bf = new BufferedReader(fr);
					String str;
					int lineNum = 1;
					Directory dir = FSDirectory.open(Paths.get(indexDir));
			        IndexWriter writer = getWriter(dir);
					while ((str = bf.readLine()) != null) {
			            Document doc = new Document();
			            doc.add(new StringField("lineNum", lineNum+"", Field.Store.YES));
			            //doc.add(new TextField("sign", str.split("\t")[0], Field.Store.YES));
			            doc.add(new TextField("contents", str, Field.Store.YES));
			            writer.addDocument(doc);
						lineNum++;
					}
					Date stepOneEnd = new Date();
					System.out.println("Step One:finish building index---------------------------------------------------- total time:"+(stepOneEnd.getTime()-stepOneStart.getTime())+"ms \n");
					writer.close();
					bf.close();
					fr.close();
					
				} catch (IOException e) {
					e.printStackTrace();
				}
				
				return true;
			}
			
	}

    private IndexWriter getWriter(Directory dir) throws Exception {
        Analyzer analyzer = new StandardAnalyzer(); // 標準分詞器
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(dir, iwc);
        return writer;
    }

1.2 進行搜索

    /**
     *  搜索指定indexDir、queryWord的結果
     * 
     * @param indexDir:索引文件夾的路徑
     * @param queryWord:搜索的詞彙
     * @return 沒有時,返回空
     * @throws Exception
     */
	
	public List<String> searchByGeneName(String indexDir,String queryWord) throws IOException, ParseException{
		List<String> results = new ArrayList<String>();
		
        Directory dir = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher is = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(); 
        QueryParser parser = new QueryParser("contents", analyzer);
        Query query = parser.parse(queryWord);
        System.out.println("Begin to search the word:"+queryWord);
        Date stepOneStart = new Date();
        TopDocs hits = is.search(query,max_return);
        Date stepOneEnd = new Date();
        System.out.println("Step Two:finish searching---------------------------------------------------- total time::" + (stepOneEnd.getTime()-stepOneStart.getTime()) + "ms" + "\nwe find "+ hits.totalHits + "results:");
        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);// 根據文檔的標識獲取文檔
            System.out.println("line Number:"+doc.get("lineNum"));
            System.out.println("line Content "+doc.get("contents"));
            results.add(doc.get("contents"));
        }
        reader.close();
        
		return results;
	}

1.3 進行測試

public class Test {
	
	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		BuildIndex test = new BuildIndex();
		
		//索引構建liugang_variant_statistics.txt
		if(test.build_index("E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics.txt", "E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics")) {
			//進行檢索
			List<String> results = test.searchByGeneName("E:\\工作內容\\20190410_建立索引\\liugang_variant_statistics", "LOC101929372");
			if(results.isEmpty()) {
				//do nothing
			}
			else {
				for(int i = 0; i< results.size();i++) {
					System.out.println(results.get(i));
				}
			}
		}
		else {
			//do nothing
		}
		
		return;
	}

}

2.參考鏈接

1.https://www.cnblogs.com/huangfox/archive/2010/10/16/1853086.html

2.https://www.cnblogs.com/lirenzhujiu/p/5912243.html

3.https://blog.csdn.net/wpw1205/article/details/79416771

4:https://www.cnblogs.com/xiaobai1226/p/7652093.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章