lucene特點及效果文章裏就不說了,網上有的是。我就簡單說下自己號碼大全瞭解,
正常 sql 查詢時:name like '%繼中%' 想必咱們一定理解這樣不會走索引關鍵詞挖掘的,然後就在多行數據等級查詢相應時刻會很慢,對吧,由於數據庫在一行行掃呢。所以咱們自然會想到怎樣能讓它走索引?
解決方案之一:lucene出來了。
本來它即是幫你把文章拆分紅若干個關鍵字,這樣以便按關鍵字查詢時能經過關鍵字直接查詢來確定哪些文章匹配該關鍵字並疾速回來。說再直白點,即是 sql句子的查詢不必like ,而是 name ='繼中',這樣就走索引了,所以就馬上罷了。
下面來說正題,spring框架下配置lucene,lucene版別:3.0.3,直接上代碼,經過代碼我來共享下各行的效果
mvc-config.xml:
仿製代碼
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mvc="http://www.springframework.org/schema/mvc"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:util="http://www.springframework.org/schema/util"
xsi:schemaLocation="
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-3.0.xsd"
default-autowire="byName" >
text/plain;charset=UTF-8
仿製代碼
以上是spring配置文件中對於lucene的代碼片段,看起來是不是很簡單?
咱們持續看代碼
仿製代碼
package com.jizhong.mmmmm.controller;
import java.io.IOException;
import java.io.StringReader;
import javax.servlet.http.HttpServletRequest;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.ui.ModelMap;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
@Controller
public class LuceneController {
private static Logger logger = Logger.getLogger(LuceneController.class);
@Autowired(required = false)//這兒我寫了required = false,需求時再引入,不寫的話會報錯,咱們有非常好解決方案請留言哈
private Analyzer myAnalyzer;
@Autowired(required = false)
private IndexWriter indexWriter;
@Autowired(required = false)
private IndexSearcher searcher;
@RequestMapping(value = "search.do", method = RequestMethod.GET)
public String testsSearch(HttpServletRequest request, ModelMap modelMap) throws Exception {
search();
return "test";
}
@RequestMapping(value = "idSearch.do", method = RequestMethod.GET)
public String idSearch(HttpServletRequest request, ModelMap modelMap) throws Exception {
idSearch();
return "test";
}
@RequestMapping(value = "moreSearch.do", method = RequestMethod.GET)
public String moreSearch(HttpServletRequest request, ModelMap modelMap) throws Exception {
searchMore();
return "test";
}
@RequestMapping(value = "create.do", method = RequestMethod.GET)
public String testsCreate(HttpServletRequest request, ModelMap modelMap) throws Exception {
create("×××值增加");
// create(request.getParameter("name"));
return "test";
}
@RequestMapping(value = "delete.do", method = RequestMethod.GET)
public String delete(HttpServletRequest request, ModelMap modelMap) throws Exception {
delete("id", request.getParameter("id"));
return "test";
}
@RequestMapping(value = "optimize.do", method = RequestMethod.GET)
public String optimize(HttpServletRequest request, ModelMap modelMap) throws Exception {
indexWriter.optimize();//優化索引方法,不主張常常調用,會很耗時,隔段時刻調優下即可
return "test";
}
//對於更新一個文檔要留意一點,儘管它供給了updateDocument,但我覺得他是先刪再加,所以咱們要把所以值都寫上,儘管能夠只更新一個字段
@RequestMapping(value = "update.do", method = RequestMethod.GET)
public String update(HttpServletRequest request, ModelMap modelMap) throws Exception {
Term term = new Term("id", "1999991");
Document doc = new Document();
doc.add(new Field("id", String.valueOf(1999991), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("name", 555555 + "555555" + 555555, Store.YES, Index.ANALYZED));
doc.add(new Field("level1", String.valueOf(555555), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("level2", String.valueOf(555555), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("level3", String.valueOf(555555), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("brand_id", String.valueOf(555555 + 100000), Store.YES, Index.NOT_ANALYZED));
indexWriter.updateDocument(term, doc);
indexWriter.commit();//但凡涉及到索引改變的動作都要提交才幹收效
return "test";
}
//delete,沒啥說的哈
private void delete(String field, String text) throws CorruptIndexException, IOException {
Term term1 = new Term(field, text);
indexWriter.deleteDocuments(term1);
indexWriter.commit();
}
public void create(String string) throws Exception {
long begin = System.currentTimeMillis();
for (int m = 604; m < 605; m++) {
for (int i = m * 10000; i < (m + 1) * 10000; i++) {
Document doc = new Document();
// doc.add(new Field("id", String.valueOf(i), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
NumericField field = new NumericField("id", 6, Field.Store.YES, false);
field.setIntValue(i);
doc.add(field);//這兒不主張這樣寫,無論什麼格局都以字符串方式灌入數據最佳,否則會由於不匹配而查不到,經驗之談哈,如下面這樣:
doc.add(new Field("name", i + string + i, Store.YES, Index.ANALYZED));//對於索引戰略,主張需求含糊查詢字段進行分詞戰略,其他則不分詞
doc.add(new Field("level1", String.valueOf(3), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("level2", String.valueOf(2), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("level3", String.valueOf(1), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("brand_id", String.valueOf(i + 100000), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("hehe", String.valueOf(i + 100000), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
indexWriter.addDocument(doc);
}
System.out.println(m);
}
indexWriter.commit();
System.out.println("create cost:" + (System.currentTimeMillis() - begin) / 1000 + "s");
}
//這兒的查詢是說:查找name字段關鍵字爲“×××的”,level3字段值爲1的內容,兩者條件是 'and'的聯繫
public void search() throws Exception {
long begin = System.currentTimeMillis();
String[] queryString = { "×××", "1" };//留意字段與值要一一對應哦,同下
String[] fields = { "name", "level3" };////留意字段與值要一一對應哦,同上
BooleanClause.Occur[] clauses = { BooleanClause.Occur.MUST, BooleanClause.Occur.MUST };//這兒即是 and 的聯繫,具體戰略看文檔哈
Query query = MultiFieldQueryParser.parse(Version.LUCENE_30, queryString, fields, clauses, myAnalyzer);
IndexReader readerNow = searcher.getIndexReader();
//這個判別很重要,即是當咱們剛灌入了數據就希望查詢出來,由於前者寫索引時封閉了reader,所以咱們如今查詢時要翻開它
if (!readerNow.isCurrent()) {
searcher = new IndexSearcher(readerNow.reopen());
}
System.out.println(searcher.maxDoc());
Sort sort = new Sort();
sort.setSort(new SortField("id", SortField.INT, true));
TopDocs topDocs = searcher.search(query, null, 53, sort);//排序戰略
// TopDocs topDocs = searcher.search(query, 50);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("id:" + doc.get("id"));
System.out.println("name:" + doc.get("name"));
System.out.println("level3:" + doc.get("level3"));
System.out.println("new field:" + doc.get("hehe"));
}
System.out.println("search cost:" + (System.currentTimeMillis() - begin) / 1000 + "s");
}
private void idSearch() throws ParseException, CorruptIndexException, IOException {
long begin = System.currentTimeMillis();
QueryParser qp = new QueryParser(Version.LUCENE_30, "id", myAnalyzer);
Query query = qp.parse("4040011");
IndexReader readerNow = searcher.getIndexReader();
if (!readerNow.isCurrent()) {
searcher = new IndexSearcher(readerNow.reopen());
}
TopDocs topDocs = searcher.search(query, null, 53);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("id:" + doc.get("id"));
System.out.println("name:" + doc.get("name"));
System.out.println("level3:" + doc.get("level3"));
System.out.println("new field:" + doc.get("hehe"));
* @param url = "http://www.shoudashou.com","plus");
* @param url = "http://www.fanselang.com","plus");
* @param url = "http://www.3h5.cn","plus");
* @param url = "http://www.4lunwen.cn","plus");
* @param url = "http://www.zx1234.cn","plus");
* @param url = "http://www.penbar.cn","plus");
* @param url = "http://www.lunjin.net","plus");
* @param url = "http://www.ssstyle.cn","plus");
* @param url = "http://www.91fish.cn","plus");
}
System.out.println("search cost:" + (System.currentTimeMillis() - begin) / 1000 + "s");
}
public void searchMore() throws Exception {
long begin = System.currentTimeMillis();
String[] queryStringOne = { "kkk", "222222" };
String[] queryStringTwo = { "99980", "222222" };
String[] fields = { "name", "level2" };
BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
Query queryOne = MultiFieldQueryParser.parse(Version.LUCENE_30, queryStringOne, fields, clauses, myAnalyzer);
Query queryTwo = MultiFieldQueryParser.parse(Version.LUCENE_30, queryStringTwo, fields, clauses, myAnalyzer);
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(queryOne, BooleanClause.Occur.MUST);
booleanQuery.add(queryTwo, BooleanClause.Occur.MUST);
IndexReader readerNow = searcher.getIndexReader();
if (!readerNow.isCurrent()) {
searcher = new IndexSearcher(readerNow.reopen());
}
System.out.println(searcher.maxDoc());
Sort sort = new Sort();
sort.setSort(new SortField("id", SortField.INT, true));
TopDocs topDocs = searcher.search(booleanQuery, null, 53, sort);
// TopDocs topDocs = searcher.search(query, 50);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("id:" + doc.get("id"));
System.out.println("name:" + doc.get("name"));
System.out.println("level3:" + doc.get("level3"));
System.out.println("new field:" + doc.get("hehe"));
}
System.out.println("search cost:" + (System.currentTimeMillis() - begin) / 1000 + "s");
}
@RequestMapping(value = "result.do", method = RequestMethod.GET)
public void getAnalyzerResult() throws IOException {
StringReader reader = new StringReader("愛國者mp3");
TokenStream ts = myAnalyzer.tokenStream("name", reader);
ts.addAttribute(TermAttribute.class);
while (ts.incrementToken()) {
TermAttribute ta = ts.getAttribute(TermAttribute.class);
System.out.println(ta.term());
}
}
仿製代碼
}