利用springboot,mybatis和luncene整合创建博客系统(只给出全数据库搜索并显示在搜索页面)

这几天搭建一个博客系统,利用的springboot、mybatis框架搭建了一下并整合了luncene全数据库检索,定时器功能。其他不说了,看下如何整合luncene吧

1.在pom.xml中添加全文检索的jar包


      <!--  lucnene核心包 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.5.0</version>
        </dependency>
        <!-- lucnene解析包 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.5.0</version>
        </dependency>
        <!--  lucene 公共包分析器 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.5.0</version>
        </dependency>
        <!--  中文分词 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>5.5.0</version>
        </dependency>
        <!--  关键词高亮显示 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.5.0</version>
        </dependency>



2.创建实体类

package com.wchstrife.entity.common;

import org.apache.lucene.store.Directory;

/**
 * 创建中文索引类
 * Created by T430 on 2017/8/18.
 */
public class Indexer {

    private  Integer ids[];//主键ID
    private  String titles[];//标题
    private  String contents[];//内容
    private Directory dir;//字典




    public Integer[] getIds() {

        return ids;
    }

    public void setIds(Integer[] ids) {
        this.ids = ids;
    }

    public String[] getTitles() {
        return titles;
    }

    public void setTitles(String[] titles) {
        this.titles = titles;
    }

    public String[] getContents() {
        return contents;
    }

    public void setContents(String[] contents) {
        this.contents = contents;
    }

    public Directory getDir() {
        return dir;
    }

    public void setDir(Directory dir) {
        this.dir = dir;
    }
}

3,创建service层

package com.wchstrife.service;

import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;

import java.io.IOException;
import java.util.List;

/**
 * 索引Service
 * Created by T430 on 2017/8/18.
 */
public interface IndexService {


    Indexer getIndexer();

    IndexWriter getWriter( Directory directory)throws Exception;//获取inderWrite示例

     void index(String indexDir)throws Exception;//构造器,indexDir 是保存地址

    List<article> search(String indexDir, String q) throws Exception;//全文检索

    void closed(IndexWriter writer) throws IOException;//关闭索引
}

4,创建serviceImpl层

package com.wchstrife.service.impl;

import com.wchstrife.dao.BlogArticleMapper;
import com.wchstrife.entity.article;
import com.wchstrife.entity.common.Indexer;
import com.wchstrife.service.BaseServiceImpl;

import com.wchstrife.service.IndexService;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.document.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.stereotype.Service;

import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
 * 全文索引的实现层
 * Created by T430 on 2017/8/18.
 */
@Service
public class IndexServiceImpl extends BaseServiceImpl implements IndexService{

    @Autowired
    BlogArticleMapper blogArticleMapper;



    //获取Indexr

    public Indexer getIndexer(){
        Indexer dir=new Indexer();

      List<article> arclist= blogArticleMapper.getAllArticles();

        Integer[] ids=new Integer[arclist.size()];//设置对应的ID数组长度
        String[] titles=new String[arclist.size()];//设置对应的title数组长度
        String[] contents=new String[arclist.size()];//设置对应的contents数组长度
      if (arclist!=null && arclist.size()>0){
          for (int i = 0; i < arclist.size(); i++) {
              ids[i]=arclist.get(i).getId();
              titles[i]=arclist.get(i).getTitle();
              contents[i]=arclist.get(i).getContent();
          }
      }
      dir.setIds(ids);
      dir.setTitles(titles);
      dir.setContents(contents);
        System.out.println("分词是:===="+dir.getIds()[1]);
      return dir;
    }

    /**
     * 获取inderWriter示例
     * @return
     * @throws Exception
     */
    @Override
    public IndexWriter getWriter(Directory directory) throws Exception{
        //中文分词器
        SmartChineseAnalyzer analyzer= new SmartChineseAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        IndexWriter writer=new IndexWriter(directory,iwc);//把分的词和字典都写入进去
        return writer;
    }

    /**
     * 创建索引
     * @param indexDir
     * @throws Exception
     */
    @Override
    public void index(String indexDir) throws Exception{
     Directory dir = FSDirectory.open(Paths.get(indexDir));//获取字典
        IndexWriter writer = getWriter(dir);
        Integer[] ids=getIndexer().getIds();
        for (int i = 0; i <ids.length ; i++) {

            Document doc=new Document();//这里选用的是luncene的document,别选错了哦
            /**
             * 这个是把ID加进去,因为ID是int类型的,所以需要用到的是IntField,由于占的空间不大,所以选择YES保存进去
             */
            doc.add(new IntField("id",ids[i], Field.Store.YES));
            /**
             * 这个是把String类型的保存进去,算是标签吧(个人理解),所占空间不大,所以也选择YES保存进去
             */
            doc.add(new StringField("title",getIndexer().getTitles()[i],Field.Store.YES));
            /**
             * 这个是把内容保存进去,大字段的所以选择的是TextField
             */
            doc.add(new TextField("content",getIndexer().getContents()[i],Field.Store.YES));
            //添加文档
            writer.addDocument(doc);
        }


        closed(writer);//写完以后就要关闭流,保证性能
    }


    /**
     * 查询
     * @param indexDir 索引路径
     * @param q 查询用的关键词
     * @throws Exception
     */
    public    List<article>  search(String indexDir, String q) throws Exception{

        List<article> articlesList=new ArrayList<article>();//存放搜索到的文章

        Directory dir= FSDirectory.open(Paths.get(indexDir));//获取字典内容

        IndexReader reader = DirectoryReader.open(dir);//读出字典

        //索引查询器
        IndexSearcher is= new IndexSearcher(reader);
        // Analyzer analyzer =new StandardAnalyzer();//标准分词器
        SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();//中文分词器
        //查询解析
        QueryParser parser= new QueryParser("content",analyzer);
        //格式化查询
        Query query=parser.parse(q);
        //查询前的时间
        long start= System.currentTimeMillis();
        //查询,返回前十的文档
        TopDocs hits=is.search(query,10);
        //查询后的时间
        long end=System.currentTimeMillis();
        System.out.println("匹配:"+q+",中共花费了:"+(end-start
        )+"毫秒");

        QueryScorer scorer= new QueryScorer(query);//片段得分,计算得分,把得分高的片段计算出来
        Fragmenter fragmenter= new SimpleSpanFragmenter(scorer);//把得分放进去,进行格式化
        //设置成html的格式,默认的是粗体, 咱们可以给他改成粗体,红色
        SimpleHTMLFormatter simpleHTMLFormatter= new SimpleHTMLFormatter("<b><FONT COLOR='RED'>","</FONT></b>");
        Highlighter highlighter=new Highlighter(simpleHTMLFormatter,scorer);//高亮显示片段得分高的部分
        highlighter.setTextFragmenter(fragmenter);//把得分的摘要设置成text显示出来

        //查询到的文档
        for (ScoreDoc scoreDoc: hits.scoreDocs){
            //根据主键ID获取文档
            Document doc= is.doc(scoreDoc.doc);

            Integer id= Integer.valueOf(doc.get("id"));//获取文章的ID
            String title=doc.get("title");
            System.out.println(title);//输出标题
            String desc=doc.get("content");//完整的content数据

           // System.out.println(desc);//输出完整的desc
            /**
             * 如果查询得到的desc不为空,则进行高亮,片段显示
             */
            if (desc!=null){
                //获取很多的片段
                TokenStream tokenStream= analyzer.tokenStream("content",new StringReader(desc));
                String ZhaiYao=  highlighter.getBestFragment(tokenStream,desc);//把权重高的片段摘要显示出来

                article al=new article();
                al.setId(id);
                al.setTitle(title);
                al.setContent(ZhaiYao);
                System.out.println("显示高亮的关键词片段:===》"+ZhaiYao);
                articlesList.add(al);
            }
        }
        reader.close();//关闭
        return articlesList;
    }

    /**
     * 关闭索引
     * @param writer
     */
    public void closed(IndexWriter writer) throws IOException {

        if (writer !=null){
            writer.close();
        }
    }


}

5,创建搜索的控制层,在这里,我弄了一个定时器,定时更新检索的分词库,因为要经常更新文章,所以检索的库也要经常更新,就加了定时更新功能

package com.wchstrife.controller;

import com.wchstrife.entity.CategorySuper;
import com.wchstrife.entity.article;
import com.wchstrife.service.BlogArticleService;
import org.apache.lucene.index.IndexWriter;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;

import java.io.File;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;

/**
 * 文章控制层
 * Created by T430 on 2017/8/18.
 */
@Controller
@RequestMapping("/article")
public class BlogArticleController extends BaseController {

    /**
     * 全文搜索
     * @param searchParam 关键字
     * @param model
     * @return
     */
    @RequestMapping("/search")
        public String SearchArticles(@RequestParam("searchParam") String searchParam,Model model){

        List<article> list=null;

       final String fileName="E:\\luncene7";
        Timer timer= new Timer();

        //先判断文件是否存在
        File file=new File(fileName);
                //搜索之前,先创建一个全文索引


        timer.schedule(new TimerTask(){

            @Override
            public void run() {

                //先判断文件是否存在
                File file=new File(fileName);
                //搜索之前,先创建一个全文索引
                try {
                    if (!file.exists() && !file .isDirectory()){
                        System.out.println("文件夹不存在,创建");
                        file.mkdir();
                    }else{
                        System.out.println("文件夹存在,先删除,再创建");

                        boolean success = deleteDir(file);

                        if (success) {
                            System.out.println("Successfully deleted empty directory: " + file);
                            //删除成功, 进行新建
                            file.mkdir();
                        } else {
                            System.out.println("Failed to delete empty directory: " + file);
                        }

                    }
                    indexService.index(fileName);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        },8000,80000);



        //从创建的全文索引中创建
        String indexDir=fileName;//索引目录

        System.out.print("关键词:==========="+searchParam);
        try {
          list= indexService.search(indexDir,searchParam);
        } catch (Exception e) {
            e.printStackTrace();
        }

        if (list==null &&list.size()<=0){
           model.addAttribute("artiles","抱歉,没有找到");
        }
       model.addAttribute("artiles",list);

        for (article a:list
             ) {
            System.out.println("标题:"+a.getTitle());
            System.out.println("摘要:"+a.getContent());
        }

        return "search";
    }

    /**
     * 递归删除目录下的所有文件及子目录下所有文件
     * @param file 将要删除的文件目录
     * @return boolean Returns "true" if all deletions were successful.
     *                 If a deletion fails, the method stops attempting to
     *                 delete and returns "false".
     */
    private static boolean deleteDir(File file) {
        if (file.isFile() || file.list().length == 0) {
            file.delete();
           // return  false;
        } else {
            File[] files = file.listFiles();
            for (File f : files) {
                deleteDir(f);//递归删除每一个文件
                f.delete();//删除该文件夹
            }
        }
        //删除子文件成功,全部删除文件
        return  file.delete();
    }

}




到此为止, 全文检索功能已经全部实现,下面给出在页面显示的效果



红色的字,就是关键字搜索出来的哦!




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章