這幾天搭建一個博客系統,利用的springboot、mybatis框架搭建了一下並整合了luncene全數據庫檢索,定時器功能。其他不說了,看下如何整合luncene吧
1.在pom.xml中添加全文檢索的jar包
<!-- lucnene核心包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.5.0</version>
</dependency>
<!-- lucnene解析包 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.5.0</version>
</dependency>
<!-- lucene 公共包分析器 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.5.0</version>
</dependency>
<!-- 中文分詞 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.5.0</version>
</dependency>
<!-- 關鍵詞高亮顯示 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.5.0</version>
</dependency>
2.創建實體類
package com.wchstrife.entity.common;
import org.apache.lucene.store.Directory;
/**
* 創建中文索引類
* Created by T430 on 2017/8/18.
*/
public class Indexer {
private Integer ids[];//主鍵ID
private String titles[];//標題
private String contents[];//內容
private Directory dir;//字典
public Integer[] getIds() {
return ids;
}
public void setIds(Integer[] ids) {
this.ids = ids;
}
public String[] getTitles() {
return titles;
}
public void setTitles(String[] titles) {
this.titles = titles;
}
public String[] getContents() {
return contents;
}
public void setContents(String[] contents) {
this.contents = contents;
}
public Directory getDir() {
return dir;
}
public void setDir(Directory dir) {
this.dir = dir;
}
}
3,創建service層
package com.wchstrife.service;
import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.List;
/**
* 索引Service
* Created by T430 on 2017/8/18.
*/
public interface IndexService {
Indexer getIndexer();
IndexWriter getWriter( Directory directory)throws Exception;//獲取inderWrite示例
void index(String indexDir)throws Exception;//構造器,indexDir 是保存地址
List<article> search(String indexDir, String q) throws Exception;//全文檢索
void closed(IndexWriter writer) throws IOException;//關閉索引
}
4,創建serviceImpl層
package com.wchstrife.service.impl;
import com.wchstrife.dao.BlogArticleMapper;
import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import com.wchstrife.service.BaseServiceImpl;
import com.wchstrife.service.IndexService;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.document.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
/**
* 全文索引的實現層
* Created by T430 on 2017/8/18.
*/
@Service
public class IndexServiceImpl extends BaseServiceImpl implements IndexService{
@Autowired
BlogArticleMapper blogArticleMapper;
//獲取Indexr
public Indexer getIndexer(){
Indexer dir=new Indexer();
List<article> arclist= blogArticleMapper.getAllArticles();
Integer[] ids=new Integer[arclist.size()];//設置對應的ID數組長度
String[] titles=new String[arclist.size()];//設置對應的title數組長度
String[] contents=new String[arclist.size()];//設置對應的contents數組長度
if (arclist!=null && arclist.size()>0){
for (int i = 0; i < arclist.size(); i++) {
ids[i]=arclist.get(i).getId();
titles[i]=arclist.get(i).getTitle();
contents[i]=arclist.get(i).getContent();
}
}
dir.setIds(ids);
dir.setTitles(titles);
dir.setContents(contents);
System.out.println("分詞是:===="+dir.getIds()[1]);
return dir;
}
/**
* 獲取inderWriter示例
* @return
* @throws Exception
*/
@Override
public IndexWriter getWriter(Directory directory) throws Exception{
//中文分詞器
SmartChineseAnalyzer analyzer= new SmartChineseAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(directory,iwc);//把分的詞和字典都寫入進去
return writer;
}
/**
* 創建索引
* @param indexDir
* @throws Exception
*/
@Override
public void index(String indexDir) throws Exception{
Directory dir = FSDirectory.open(Paths.get(indexDir));//獲取字典
IndexWriter writer = getWriter(dir);
Integer[] ids=getIndexer().getIds();
for (int i = 0; i <ids.length ; i++) {
Document doc=new Document();//這裏選用的是luncene的document,別選錯了哦
/**
* 這個是把ID加進去,因爲ID是int類型的,所以需要用到的是IntField,由於佔的空間不大,所以選擇YES保存進去
*/
doc.add(new IntField("id",ids[i], Field.Store.YES));
/**
* 這個是把String類型的保存進去,算是標籤吧(個人理解),所佔空間不大,所以也選擇YES保存進去
*/
doc.add(new StringField("title",getIndexer().getTitles()[i],Field.Store.YES));
/**
* 這個是把內容保存進去,大字段的所以選擇的是TextField
*/
doc.add(new TextField("content",getIndexer().getContents()[i],Field.Store.YES));
//添加文檔
writer.addDocument(doc);
}
closed(writer);//寫完以後就要關閉流,保證性能
}
/**
* 查詢
* @param indexDir 索引路徑
* @param q 查詢用的關鍵詞
* @throws Exception
*/
public List<article> search(String indexDir, String q) throws Exception{
List<article> articlesList=new ArrayList<article>();//存放搜索到的文章
Directory dir= FSDirectory.open(Paths.get(indexDir));//獲取字典內容
IndexReader reader = DirectoryReader.open(dir);//讀出字典
//索引查詢器
IndexSearcher is= new IndexSearcher(reader);
// Analyzer analyzer =new StandardAnalyzer();//標準分詞器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();//中文分詞器
//查詢解析
QueryParser parser= new QueryParser("content",analyzer);
//格式化查詢
Query query=parser.parse(q);
//查詢前的時間
long start= System.currentTimeMillis();
//查詢,返回前十的文檔
TopDocs hits=is.search(query,10);
//查詢後的時間
long end=System.currentTimeMillis();
System.out.println("匹配:"+q+",中共花費了:"+(end-start
)+"毫秒");
QueryScorer scorer= new QueryScorer(query);//片段得分,計算得分,把得分高的片段計算出來
Fragmenter fragmenter= new SimpleSpanFragmenter(scorer);//把得分放進去,進行格式化
//設置成html的格式,默認的是粗體, 咱們可以給他改成粗體,紅色
SimpleHTMLFormatter simpleHTMLFormatter= new SimpleHTMLFormatter("<b><FONT COLOR='RED'>","</FONT></b>");
Highlighter highlighter=new Highlighter(simpleHTMLFormatter,scorer);//高亮顯示片段得分高的部分
highlighter.setTextFragmenter(fragmenter);//把得分的摘要設置成text顯示出來
//查詢到的文檔
for (ScoreDoc scoreDoc: hits.scoreDocs){
//根據主鍵ID獲取文檔
Document doc= is.doc(scoreDoc.doc);
Integer id= Integer.valueOf(doc.get("id"));//獲取文章的ID
String title=doc.get("title");
System.out.println(title);//輸出標題
String desc=doc.get("content");//完整的content數據
// System.out.println(desc);//輸出完整的desc
/**
* 如果查詢得到的desc不爲空,則進行高亮,片段顯示
*/
if (desc!=null){
//獲取很多的片段
TokenStream tokenStream= analyzer.tokenStream("content",new StringReader(desc));
String ZhaiYao= highlighter.getBestFragment(tokenStream,desc);//把權重高的片段摘要顯示出來
article al=new article();
al.setId(id);
al.setTitle(title);
al.setContent(ZhaiYao);
System.out.println("顯示高亮的關鍵詞片段:===》"+ZhaiYao);
articlesList.add(al);
}
}
reader.close();//關閉
return articlesList;
}
/**
* 關閉索引
* @param writer
*/
public void closed(IndexWriter writer) throws IOException {
if (writer !=null){
writer.close();
}
}
}
5,創建搜索的控制層,在這裏,我弄了一個定時器,定時更新檢索的分詞庫,因爲要經常更新文章,所以檢索的庫也要經常更新,就加了定時更新功能
package com.wchstrife.controller;
import com.wchstrife.entity.CategorySuper;
import com.wchstrife.entity.article;
import com.wchstrife.service.BlogArticleService;
import org.apache.lucene.index.IndexWriter;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import java.io.File;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
/**
* 文章控制層
* Created by T430 on 2017/8/18.
*/
@Controller
@RequestMapping("/article")
public class BlogArticleController extends BaseController {
/**
* 全文搜索
* @param searchParam 關鍵字
* @param model
* @return
*/
@RequestMapping("/search")
public String SearchArticles(@RequestParam("searchParam") String searchParam,Model model){
List<article> list=null;
final String fileName="E:\\luncene7";
Timer timer= new Timer();
//先判斷文件是否存在
File file=new File(fileName);
//搜索之前,先創建一個全文索引
timer.schedule(new TimerTask(){
@Override
public void run() {
//先判斷文件是否存在
File file=new File(fileName);
//搜索之前,先創建一個全文索引
try {
if (!file.exists() && !file .isDirectory()){
System.out.println("文件夾不存在,創建");
file.mkdir();
}else{
System.out.println("文件夾存在,先刪除,再創建");
boolean success = deleteDir(file);
if (success) {
System.out.println("Successfully deleted empty directory: " + file);
//刪除成功, 進行新建
file.mkdir();
} else {
System.out.println("Failed to delete empty directory: " + file);
}
}
indexService.index(fileName);
} catch (Exception e) {
e.printStackTrace();
}
}
},8000,80000);
//從創建的全文索引中創建
String indexDir=fileName;//索引目錄
System.out.print("關鍵詞:==========="+searchParam);
try {
list= indexService.search(indexDir,searchParam);
} catch (Exception e) {
e.printStackTrace();
}
if (list==null &&list.size()<=0){
model.addAttribute("artiles","抱歉,沒有找到");
}
model.addAttribute("artiles",list);
for (article a:list
) {
System.out.println("標題:"+a.getTitle());
System.out.println("摘要:"+a.getContent());
}
return "search";
}
/**
* 遞歸刪除目錄下的所有文件及子目錄下所有文件
* @param file 將要刪除的文件目錄
* @return boolean Returns "true" if all deletions were successful.
* If a deletion fails, the method stops attempting to
* delete and returns "false".
*/
private static boolean deleteDir(File file) {
if (file.isFile() || file.list().length == 0) {
file.delete();
// return false;
} else {
File[] files = file.listFiles();
for (File f : files) {
deleteDir(f);//遞歸刪除每一個文件
f.delete();//刪除該文件夾
}
}
//刪除子文件成功,全部刪除文件
return file.delete();
}
}
到此爲止, 全文檢索功能已經全部實現,下面給出在頁面顯示的效果
紅色的字,就是關鍵字搜索出來的哦!