Lucene4.0教程

本文采用lucene4.0.0和IKAnalyzer中文分詞器

一、創建索引

package com.jmj.project.web;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Abc {
	private Logger logger = LoggerFactory.getLogger(getClass());

	public String createIndex() {
		String[] ids = { "1", "2" };
		String[] contents = { "我是第一條內容,i am lucene IKAnalyzer", "我在北京天安門廣場吃炸雞" };
		String[] citys = { "北京", "上海" };
		String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目錄
		// 默認IKAnalyzer()-false:實現最細粒度切分算法,true:分詞器採用智能切分
		Analyzer analyzer = new IKAnalyzer();
		IndexWriter indexWriter = null;
		Directory directory = null;
		try {
			directory = FSDirectory.open(new File(indexPath));
			indexWriter = getIndexWriter(directory, analyzer);
		} catch (Exception e) {
			logger.info("索引打開異常!");
		}
		// 添加索引
		try {
			Document document = null;
			for (int i=0; i < ids.length; i++) {
				document = new Document();

				document.add(new StringField("id", ids[i], Field.Store.YES));
				Field content = new TextField("content", contents[i], Field.Store.YES);
				Field city = new TextField("city", citys[i], Field.Store.YES);

				document.add(content);
				document.add(city);

				// 1.權值越高,查詢結果越靠前。
				content.setBoost(2.0f);
				city.setBoost(1.0f);
				indexWriter.addDocument(document);
				indexWriter.commit();
			}
		} catch (IOException e1) {
			logger.info("索引創建異常!");
		}
		try {
			closeWriter(indexWriter);
		} catch (Exception e) {
			logger.info("索引關閉異常!");
		}
		logger.info("索引創建成功!");
		return null;
	}

	/**
	 * 獲得indexwriter對象
	 * 
	 * @param dir
	 * @return
	 * @throws IOException
	 * @throws Exception
	 */
	private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
		return new IndexWriter(dir, iwc);
	}

	/**
	 * 關閉indexwriter對象
	 * 
	 * @throws IOException
	 * 
	 * @throws Exception
	 */
	private void closeWriter(IndexWriter indexWriter) throws IOException {
		if (indexWriter != null) {
			indexWriter.close();
		}
	}
}

二、查詢索引

package com.jmj.project.web;

import java.io.File;
import java.io.IOException;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class Abc {

	private Logger logger = LoggerFactory.getLogger(getClass());

	public PageResults<ViProject> indexSearch(Map<String, Object> filterMap)
			throws ParseException, IOException, InvalidTokenOffsetsException, java.text.ParseException {
		String indexPath = "/home/iflow/luceneindex"; // 建立索引文件的目錄
		Analyzer analyzer = new IKAnalyzer();
		Directory directory = null;
		try {
			directory = FSDirectory.open(new File(indexPath));
		} catch (Exception e) {
			logger.info("索引打開異常!");
		}
		IndexReader ireader = null;
		IndexSearcher isSearcher = null;
		try {
			ireader = IndexReader.open(directory);
		} catch (IOException e) {
			logger.info("索引打開異常!");
		}
		isSearcher = new IndexSearcher(ireader);
		BooleanQuery booleanQuery = new BooleanQuery();

		// eg:多個字段查詢
//		String text = "北京";
//		String[] fields = { "content", "city" };
//		QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
//		Query query = qp.parse(text);
//		booleanQuery.add(query, Occur.MUST);

		// eg:單個字段查詢
		String text = "北京";
		QueryParser qp = new QueryParser(Version.LUCENE_40, "content", analyzer);
		Query query = qp.parse(text);
		booleanQuery.add(query, Occur.MUST);

		// 搜索相似度最高的100條記錄
		TopDocs topDocs = isSearcher.search(booleanQuery, 100);
		// 輸出結果
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for (int i = 0; i < scoreDocs.length; i++) {
			Document targetDoc = isSearcher.doc(scoreDocs[i].doc);
			System.out.println(targetDoc.get("id") + targetDoc.get("content") + targetDoc.get("city"));
		}
		ireader.close();
		return null;

	}
}

三、pom文件

		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>4.0.0</version>
	  	</dependency>
	  	
	  	<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
			<version>4.0.0</version>
	 	</dependency>
	 	
	 	<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-highlighter</artifactId>
			<version>4.0.0</version>
		</dependency>
		
		<dependency>
			<groupId>com.lucene</groupId>
			<artifactId>ikAnalyzer</artifactId>
			<version>RELEASE</version>
		</dependency>
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章