Lucene3.6小例子

        最近在学习Lucene,Lucene是一个开放源代码的全文检索引擎工具包。在网上找了一些小例子,无奈网上的资料大部分是2.0的,而我下的Lucene版本是3.6.2,由于很多API已经变了,因此程序报错。于是跑去官网查了下Apache3.6的API,后来再查阅了网上的一些资料,终于把问题解决。在这里贴上一个Lucene3.6的比较简单的例子,希望可以帮到像我一样处于初学阶段的人。
    Lucene主要分为两部分,一部分是建立索引文件,另一部分是根据索引文件进行搜索查询。

    先来看看建立索引的部分

import  java.io.BufferedReader;   
import  java.io.File;   
import  java.io.FileInputStream;   
import  java.io.IOException;   
import  java.io.InputStreamReader;   
import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.document.Field.Index;
import  org.apache.lucene.document.Field.Store;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.IndexWriterConfig;
import  org.apache.lucene.store.FSDirectory;
import  org.apache.lucene.util.Version;

public class LuceneTest {
        private static String ENCODE = "GBK";  
        /*
            读取本地磁盘的待索引文件
        */
	public static String openFile(File szFileName) {
        try {
            BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(szFileName), ENCODE) );
            String szContent="";
            String szTemp;
            
            while ( (szTemp = bis.readLine()) != null) {
                szContent+=szTemp+"\n";
            }
            bis.close();
            return szContent;
        }
        catch( Exception e ) {
            return "";
        }
    	}
        /*
             建立索引
        */
	public void test() throws IOException {
		File file = new File("G:/Workspaces/Eclipse/LuceneTest/indexdata");     //索引文件夹
		File ReadFile = new File("G:/Workspaces/Eclipse/Heritrix/jobs/360BuyIndexDatabase");     //待建立索引文件所在文件夹
		File[] textFiles = ReadFile.listFiles();      
		FSDirectory directory = FSDirectory.open(file);
		//用来创建索引
		IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36));   
		IndexWriter writer = new IndexWriter(directory, conf);
		
		for(int i=0;i<textFiles.length;i++){
			if(textFiles[i].isFile()&&textFiles[i].getName().endsWith(".txt")){
				System.out.println("File"+textFiles[i].getCanonicalPath()+"正在被索引");
				String szContent = openFile(textFiles[i]);
                Document doc = new Document();
                Field FieldPath = new Field("path",textFiles[i].getPath(),Store.YES,Index.NO);  
                Field FieldBody =  new Field("content",szContent,Store.YES,Index.ANALYZED);  
                System.out.println(szContent);
                doc.add(FieldPath);   
                doc.add(FieldBody);   
                writer.addDocument(doc);   			
			}
		}		
		writer.close();//关闭writer索引才会写到磁盘上
	    
}
	
	public static void main(String[] args) throws IOException {
		LuceneTest Test = new LuceneTest();
		Test.test();
	}
}


           下面是根据已经建立的索引文件进行搜索查询部分

          

import java.io.File;
import java.io.IOException;
import java.util.Date; 

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.queryParser.ParseException;   
import org.apache.lucene.queryParser.QueryParser; 
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.Query;


public class LuceneSearch{
	public void Search()throws IOException{
	        File file = new File("G:/Workspaces/Eclipse/LuceneTest/indexdata");       //索引所在文件夹
		FSDirectory directory = FSDirectory.open(file);
		IndexReader reader = IndexReader.open(directory);  
	    //简单索引  
	    IndexSearcher searcher = new IndexSearcher(reader);  
	    
	    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);        
	    try{
	    QueryParser qp = new QueryParser(Version.LUCENE_36,"content",analyzer);  
	    Query query = qp.parse("程序");       //搜索内容
	    ScoreDoc[] docs = searcher.search(query, 5).scoreDocs;   //最匹配的前五个文档

	    for(int i = 0; i < docs.length; i++) {  
	        Date start = new Date();  
	        String content = searcher.doc(docs[i].doc).get("content");  
	        Date end = new Date();  
	        System.out.println(content + (end.getTime() - start.getTime()) + "ms");   
	    }  
	    }catch(ParseException e){	    	
	    }
	}
	public static void main(String[] args)throws IOException{
		
		LuceneSearch ls = new LuceneSearch();
		ls.Search();
	}
}

搜索结果:


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章