这几天搭建一个博客系统,利用的springboot、mybatis框架搭建了一下并整合了luncene全数据库检索,定时器功能。其他不说了,看下如何整合luncene吧
1.在pom.xml中添加全文检索的jar包
<!-- lucnene核心包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.5.0</version>
</dependency>
<!-- lucnene解析包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.5.0</version>
</dependency>
<!-- lucene 公共包分析器 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.5.0</version>
</dependency>
<!-- 中文分词 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.5.0</version>
</dependency>
<!-- 关键词高亮显示 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.5.0</version>
</dependency>
2.创建实体类
package com.wchstrife.entity.common;
import org.apache.lucene.store.Directory;
/**
* 创建中文索引类
* Created by T430 on 2017/8/18.
*/
public class Indexer {
private Integer ids[];//主键ID
private String titles[];//标题
private String contents[];//内容
private Directory dir;//字典
public Integer[] getIds() {
return ids;
}
public void setIds(Integer[] ids) {
this.ids = ids;
}
public String[] getTitles() {
return titles;
}
public void setTitles(String[] titles) {
this.titles = titles;
}
public String[] getContents() {
return contents;
}
public void setContents(String[] contents) {
this.contents = contents;
}
public Directory getDir() {
return dir;
}
public void setDir(Directory dir) {
this.dir = dir;
}
}
3,创建service层
package com.wchstrife.service;
import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.List;
/**
* 索引Service
* Created by T430 on 2017/8/18.
*/
public interface IndexService {
Indexer getIndexer();
IndexWriter getWriter( Directory directory)throws Exception;//获取inderWrite示例
void index(String indexDir)throws Exception;//构造器,indexDir 是保存地址
List<article> search(String indexDir, String q) throws Exception;//全文检索
void closed(IndexWriter writer) throws IOException;//关闭索引
}
4,创建serviceImpl层
package com.wchstrife.service.impl;
import com.wchstrife.dao.BlogArticleMapper;
import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import com.wchstrife.service.BaseServiceImpl;
import com.wchstrife.service.IndexService;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.document.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
/**
* 全文索引的实现层
* Created by T430 on 2017/8/18.
*/
@Service
public class IndexServiceImpl extends BaseServiceImpl implements IndexService{
@Autowired
BlogArticleMapper blogArticleMapper;
//获取Indexr
public Indexer getIndexer(){
Indexer dir=new Indexer();
List<article> arclist= blogArticleMapper.getAllArticles();
Integer[] ids=new Integer[arclist.size()];//设置对应的ID数组长度
String[] titles=new String[arclist.size()];//设置对应的title数组长度
String[] contents=new String[arclist.size()];//设置对应的contents数组长度
if (arclist!=null && arclist.size()>0){
for (int i = 0; i < arclist.size(); i++) {
ids[i]=arclist.get(i).getId();
titles[i]=arclist.get(i).getTitle();
contents[i]=arclist.get(i).getContent();
}
}
dir.setIds(ids);
dir.setTitles(titles);
dir.setContents(contents);
System.out.println("分词是:===="+dir.getIds()[1]);
return dir;
}
/**
* 获取inderWriter示例
* @return
* @throws Exception
*/
@Override
public IndexWriter getWriter(Directory directory) throws Exception{
//中文分词器
SmartChineseAnalyzer analyzer= new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(directory,iwc);//把分的词和字典都写入进去
return writer;
}
/**
* 创建索引
* @param indexDir
* @throws Exception
*/
@Override
public void index(String indexDir) throws Exception{
Directory dir = FSDirectory.open(Paths.get(indexDir));//获取字典
IndexWriter writer = getWriter(dir);
Integer[] ids=getIndexer().getIds();
for (int i = 0; i <ids.length ; i++) {
Document doc=new Document();//这里选用的是luncene的document,别选错了哦
/**
* 这个是把ID加进去,因为ID是int类型的,所以需要用到的是IntField,由于占的空间不大,所以选择YES保存进去
*/
doc.add(new IntField("id",ids[i], Field.Store.YES));
/**
* 这个是把String类型的保存进去,算是标签吧(个人理解),所占空间不大,所以也选择YES保存进去
*/
doc.add(new StringField("title",getIndexer().getTitles()[i],Field.Store.YES));
/**
* 这个是把内容保存进去,大字段的所以选择的是TextField
*/
doc.add(new TextField("content",getIndexer().getContents()[i],Field.Store.YES));
//添加文档
writer.addDocument(doc);
}
closed(writer);//写完以后就要关闭流,保证性能
}
/**
* 查询
* @param indexDir 索引路径
* @param q 查询用的关键词
* @throws Exception
*/
public List<article> search(String indexDir, String q) throws Exception{
List<article> articlesList=new ArrayList<article>();//存放搜索到的文章
Directory dir= FSDirectory.open(Paths.get(indexDir));//获取字典内容
IndexReader reader = DirectoryReader.open(dir);//读出字典
//索引查询器
IndexSearcher is= new IndexSearcher(reader);
// Analyzer analyzer =new StandardAnalyzer();//标准分词器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();//中文分词器
//查询解析
QueryParser parser= new QueryParser("content",analyzer);
//格式化查询
Query query=parser.parse(q);
//查询前的时间
long start= System.currentTimeMillis();
//查询,返回前十的文档
TopDocs hits=is.search(query,10);
//查询后的时间
long end=System.currentTimeMillis();
System.out.println("匹配:"+q+",中共花费了:"+(end-start
)+"毫秒");
QueryScorer scorer= new QueryScorer(query);//片段得分,计算得分,把得分高的片段计算出来
Fragmenter fragmenter= new SimpleSpanFragmenter(scorer);//把得分放进去,进行格式化
//设置成html的格式,默认的是粗体, 咱们可以给他改成粗体,红色
SimpleHTMLFormatter simpleHTMLFormatter= new SimpleHTMLFormatter("<b><FONT COLOR='RED'>","</FONT></b>");
Highlighter highlighter=new Highlighter(simpleHTMLFormatter,scorer);//高亮显示片段得分高的部分
highlighter.setTextFragmenter(fragmenter);//把得分的摘要设置成text显示出来
//查询到的文档
for (ScoreDoc scoreDoc: hits.scoreDocs){
//根据主键ID获取文档
Document doc= is.doc(scoreDoc.doc);
Integer id= Integer.valueOf(doc.get("id"));//获取文章的ID
String title=doc.get("title");
System.out.println(title);//输出标题
String desc=doc.get("content");//完整的content数据
// System.out.println(desc);//输出完整的desc
/**
* 如果查询得到的desc不为空,则进行高亮,片段显示
*/
if (desc!=null){
//获取很多的片段
TokenStream tokenStream= analyzer.tokenStream("content",new StringReader(desc));
String ZhaiYao= highlighter.getBestFragment(tokenStream,desc);//把权重高的片段摘要显示出来
article al=new article();
al.setId(id);
al.setTitle(title);
al.setContent(ZhaiYao);
System.out.println("显示高亮的关键词片段:===》"+ZhaiYao);
articlesList.add(al);
}
}
reader.close();//关闭
return articlesList;
}
/**
* 关闭索引
* @param writer
*/
public void closed(IndexWriter writer) throws IOException {
if (writer !=null){
writer.close();
}
}
}
5,创建搜索的控制层,在这里,我弄了一个定时器,定时更新检索的分词库,因为要经常更新文章,所以检索的库也要经常更新,就加了定时更新功能
package com.wchstrife.controller;
import com.wchstrife.entity.CategorySuper;
import com.wchstrife.entity.article;
import com.wchstrife.service.BlogArticleService;
import org.apache.lucene.index.IndexWriter;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import java.io.File;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
/**
* 文章控制层
* Created by T430 on 2017/8/18.
*/
@Controller
@RequestMapping("/article")
public class BlogArticleController extends BaseController {
/**
* 全文搜索
* @param searchParam 关键字
* @param model
* @return
*/
@RequestMapping("/search")
public String SearchArticles(@RequestParam("searchParam") String searchParam,Model model){
List<article> list=null;
final String fileName="E:\\luncene7";
Timer timer= new Timer();
//先判断文件是否存在
File file=new File(fileName);
//搜索之前,先创建一个全文索引
timer.schedule(new TimerTask(){
@Override
public void run() {
//先判断文件是否存在
File file=new File(fileName);
//搜索之前,先创建一个全文索引
try {
if (!file.exists() && !file .isDirectory()){
System.out.println("文件夹不存在,创建");
file.mkdir();
}else{
System.out.println("文件夹存在,先删除,再创建");
boolean success = deleteDir(file);
if (success) {
System.out.println("Successfully deleted empty directory: " + file);
//删除成功, 进行新建
file.mkdir();
} else {
System.out.println("Failed to delete empty directory: " + file);
}
}
indexService.index(fileName);
} catch (Exception e) {
e.printStackTrace();
}
}
},8000,80000);
//从创建的全文索引中创建
String indexDir=fileName;//索引目录
System.out.print("关键词:==========="+searchParam);
try {
list= indexService.search(indexDir,searchParam);
} catch (Exception e) {
e.printStackTrace();
}
if (list==null &&list.size()<=0){
model.addAttribute("artiles","抱歉,没有找到");
}
model.addAttribute("artiles",list);
for (article a:list
) {
System.out.println("标题:"+a.getTitle());
System.out.println("摘要:"+a.getContent());
}
return "search";
}
/**
* 递归删除目录下的所有文件及子目录下所有文件
* @param file 将要删除的文件目录
* @return boolean Returns "true" if all deletions were successful.
* If a deletion fails, the method stops attempting to
* delete and returns "false".
*/
private static boolean deleteDir(File file) {
if (file.isFile() || file.list().length == 0) {
file.delete();
// return false;
} else {
File[] files = file.listFiles();
for (File f : files) {
deleteDir(f);//递归删除每一个文件
f.delete();//删除该文件夹
}
}
//删除子文件成功,全部删除文件
return file.delete();
}
}
到此为止, 全文检索功能已经全部实现,下面给出在页面显示的效果
红色的字,就是关键字搜索出来的哦!