Lucene2.0升级3.3版本开发笔记之一------建

今天本来是要写OSCache剩下部分的内容的。
由于项目急需将搜索引擎升级为新版本,所以优先级发生了变化。
为什么是升级到3.3是因为3.5才出来2个月,3.3出来了半年。
所以项目认为3.3比3.5稳定。。。

2.0升级到3.3只有很小的区别。
使用到的jar:
lucene-core-3.3.0.jar
IKAnalyzer3.2.5Stable.jar
lucene-highlighter-3.0.1.jar
以前我们做2.0版本的时候引入了很多jar,现在lucene只需要一个core jar就够了
IK也同样升级到匹配的3.2.5稳定版。
高亮也升级到了3.0以上的版本。根据jar包名称可以在网上轻松找到jar包的下载地址。

建索引:

使用的对象和基本步骤:

Analyzer,解析器。

IndexWriter,需要对象IndexWriter来进行索引的创建与更新。

Document,写入的文档,是IndexWriter的基本对象。(一条报警可以用一个文档表示)

Field,一个Document可以有多个Field,这是我们存储的基本单位。(PCIP等都可以视为Field)注:field默认域名区分大小写,最好统一。

A. 创建写对象IndexWriter,它依赖于Analyzer、存储路径,可通过IndexWriterConfig对其进行参数设置。

B. 创建空文档Document doc = newDocument();

C. 向空文档里面添加若干个Fielddoc.add(new Field("PCIP", fields[0],Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));

注:

Field参数STORE,与索引无关,是否额外存储原文 ,可以在搜索结果后调用出来,

NO不额外存储;

YES,额外存储。

Field参数INDEX

NO,不索引;

ANALYZED,分词后索引;

NOT_ANALYZED,不分词索引

ANALYZED_NO_NORMS,分词索引,不存储NORMS

NOT_ANALYZED_NO_NORMS,不分词,索引,不存储NORMS

除了NO外都算索引,可以搜索。NORMS存储了boost所需信息,包含了NORM可能会占用更多内存。

D.IndexWriter添加Documentwriter.addDocument(doc);

E. 优化索引(优化相对比较慢,可以选择进行,优化之后可以达到最大查询速度,//writer.optimize();//优化索引


需要引入的包:

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

importorg.apache.lucene.index.CorruptIndexException;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.IndexWriterConfig;

importorg.apache.lucene.index.Term;

importorg.apache.lucene.store.Directory;

importorg.apache.lucene.store.FSDirectory;

importorg.apache.lucene.store.LockObtainFailedException;

importorg.apache.lucene.util.Version;


我使用当中主要发生的变化已经用红色标记出

   @SuppressWarnings("deprecation")

   public void BuildLawyerPublic(ResponseList<SearchLawyer> lawyerList,

           String path, boolean overwrite, Date start, Date end) {

       IndexWriter indexWriter = null;

      

      try {

           try {

             Analyzer analyzer = new IKAnalyzer();//分词类变为新的

               IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_33, analyzer);//indexWrite配置新的

                Directory dir =FSDirectory.open(new File(path)) ;//地址

               indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);//总是重新创建

 

             indexWriter = new IndexWriter(dir,indexWriterConfig);//新的方法

           } catch (IOException e) {

              e.printStackTrace();

             this.BuildAll();

             return;

           }

 

           long count = 0;

           long startTime = new Date().getTime();

           for (int i = 0; i < lawyerList.size(); i++) {

             // 按小时或天建立索弄17

             if (path == hoursDir || path == dailyDir) {

                  SearchLawyer lawyerBean = new SearchLawyer();

                  BeanUtils.copyProperties(lawyerList.get(i), lawyerBean);

                 // 更新索引

                 if (((SearchLawyer) lawyerList.get(i)).getJoinDate()

                         .before(start)) {

                     Term term = new Term("consultID", String

                            .valueOf(((SearchLawyer) lawyerList.get(i))

                                   .getLawyerID()));

                     indexWriter.updateDocument(term,

                            convert2Doc(lawyerBean));

                    update_count++;

                  } else {// 新增索引

                     indexWriter.addDocument(convert2Doc(lawyerBean));

                    add_count++;

                  }

              } else {

                 // 第一次建立索弄17

                  SearchLawyer lawyerBean = (SearchLawyer) lawyerList.get(i);

                 if (lawyerBean != null) {

                     Document hdoc = convert2Doc(lawyerBean);

                     indexWriter.addDocument(hdoc);

                  }

                  count++;

              }

           }

           indexWriter.optimize();

           indexWriter.close();

           long endTime = new Date().getTime();

           logger.debug("It takes " + (endTime - startTime)

                  + "ms index count :" + count

                  + " consultpublic count:==============="

                  + lawyerList.size());

 

       } catch(CorruptIndexException e) {

           logger.error("{}", e);

           e.printStackTrace();

       } catch(LockObtainFailedException e) {

           logger.error("{}", e);

           e.printStackTrace();

       } catch (IOException e) {

           logger.error("{}", e);

           e.printStackTrace();

       }

    }

 

 

 

   //bean 2

   public Document convert2Doc(SearchLawyer lawyer) {

       Document doc = new Document();

      try {

           // 律师主键

           doc.add(new Field("lawyerID", String.valueOf(lawyer.getLawyerID()),

                  Field.Store.YES, Field.Index.NO));//NO,不索引$17

           // 律师姓名

           doc.add(new Field("fullName", String.valueOf(lawyer.getFullName()),

                 Field.Store.YES, Field.Index.ANALYZED));//ANALYZED,分词后索引

           // 律师电话

           doc.add(new Field("phone", String.valueOf(lawyer.getPhone()),

                  Field.Store.YES, Field.Index.NO));

           // 律师头像

           doc.add(new Field("photo", String.valueOf(lawyer

                  .getPhoto()), Field.Store.YES, Field.Index.NO));

           // 律师箄1717

           doc.add(new Field("lawyer_Intro", String.valueOf(lawyer

                         .getLawyer_Intro()), Field.Store.YES,

                         Field.Index.ANALYZED));

           // 律所名称

           doc.add(new Field("officeName", String

                  .valueOf(lawyer.getOfficeName()), Field.Store.YES,

                 Field.Index.NOT_ANALYZED));//NOT_ANALYZED,不分词索引

           // 律师扄17在省

           doc.add(new Field("provinceName", String.valueOf(lawyer

                         .getProvinceName()), Field.Store.YES,

                         Field.Index.NOT_ANALYZED));

           // 律师扄17在市

           doc.add(new Field("cityName", String.valueOf(lawyer.getCityName()),

                  Field.Store.YES, Field.Index.NOT_ANALYZED));

           // 专注类别

           doc.add(new Field("specialtyName", String

                  .valueOf(lawyer.getSpecialtyName()), Field.Store.YES,

                  Field.Index.ANALYZED));

           // 律师添加的时闄17

           doc.add(new Field("joinDate", DateUtil.getDateTime(

                 "yyyy-MM-dd hh:mm:ss", lawyer.getJoinDate()),

                  Field.Store.YES, Field.Index.NO));

 

       } catch (Exception e) {

           logger.error("{}", e);

           e.printStackTrace();

       }

      return doc;

    }

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章