Lucene 學習開發

package com.highcom.hcgip.lucenesearch;

import javax.servlet.*;
import javax.servlet.http.*;
import java.io.*;
import java.util.*;
import com.highcom.hcgip.cm.deal.admin.*;
import com.highcom.hcgip.cm.dbmap.admin.*;
import com.highcom.hcgip.cm.model.*;
import com.highcom.hcgip.cm.util.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.constant.*;

/**
 * <p>Title: </p>
 * <p>Description: </p>
 * <p>Copyright: Copyright (c) 2004</p>
 * <p>Company: Highcom</p>
 * @author 李新博
 * @version 1.0
 */

public class Searcher
    extends HttpServlet {
  private static final String CONTENT_TYPE = "text/html; charset=GBK";
  //Initialize global variables
  public void init() throws ServletException {
  }

  //Process the HTTP Post request
  public void doPost(HttpServletRequest request, HttpServletResponse response) throws
      ServletException, IOException {
    PrintWriter out = response.getWriter();
    String id = request.getParameter("classid");
    String q = request.getParameter("searchKeys");
    q = Uncode.exChinese(q);
    CategoryDBInterface category = new CategoryDBMap();
    if (id != null && id.equals("0")) {
      Date start = new Date();
      Vector v = category.getCategory();
      String[] indexDir = new String[v.size()];
      CategoryBean tempBean = null;
      for (int i = 0; i < v.size(); i++) {
        tempBean = (CategoryBean) v.get(i);
        indexDir[i] =tempBean.getIndexpath();
        }
      Vector it = Search.getSearch(indexDir,
                                   q);
      Date end = new Date();
      long time = end.getTime() - start.getTime();
      String t = time + "";
      //分頁參數
      int pageCounter = 0;
      int currentPageIndex = 0;
      String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
          request.getParameter("pageIndex");
      currentPageIndex = Integer.parseInt(pageIndex);
      int totalRecord = 0;
      //分頁結束
      Vector temp = new Vector();
      SplitPage ph = new SplitPage();
      ph.setPage(it, 20, currentPageIndex);
      pageCounter = ph.getTotalPages();
      totalRecord = ph.getTotalLines();
      request.setAttribute("totalRecord", new Integer(totalRecord));
      request.setAttribute("pageCounter", new Integer(pageCounter));
      request.setAttribute("currentPageIndex",
                           new Integer(currentPageIndex));
      temp = ph.getPage(currentPageIndex); //分頁後的查詢內容
      request.setAttribute("result", temp);
      request.setAttribute("searchfor", "全部分類");
      request.setAttribute("time", t);
      request.setAttribute("searchKeys", q);
      request.setAttribute("classid", id);
    }
    else {
      Date start = new Date();
      Vector v = category.getCategory(Integer.parseInt(id));
      CategoryBean tempBean = (CategoryBean) v.get(0);
      Vector it = Search.getSearch(tempBean.getIndexpath(),
                                   q);
      Date end = new Date();
      long time = end.getTime() - start.getTime();
      String t = time + "";
      //分頁參數
      int pageCounter = 0;
      int currentPageIndex = 0;
      String pageIndex = (request.getParameter("pageIndex") == null) ? "0" :
          request.getParameter("pageIndex");
      currentPageIndex = Integer.parseInt(pageIndex);
      int totalRecord = 0;
      //分頁結束
      Vector temp = new Vector();
      SplitPage ph = new SplitPage();
      ph.setPage(it, 10, currentPageIndex);
      pageCounter = ph.getTotalPages();
      totalRecord = ph.getTotalLines();
      request.setAttribute("totalRecord", new Integer(totalRecord));
      request.setAttribute("pageCounter", new Integer(pageCounter));
      request.setAttribute("currentPageIndex",
                           new Integer(currentPageIndex));
      temp = ph.getPage(currentPageIndex); //分頁後的查詢內容
      request.setAttribute("result", temp);
      request.setAttribute("searchfor", tempBean.getCategory());
      request.setAttribute("time", t);
      request.setAttribute("searchKeys", q);
      request.setAttribute("classid", id);

    }
    dispatch(request, response,ConstantList.PAGE_BASIC_SEARCHRESULT);
  }

  protected void dispatch(HttpServletRequest request,
                          HttpServletResponse response,
                          String page) throws javax.servlet.ServletException,
      IOException {
    RequestDispatcher dispatcher =
        getServletContext().getRequestDispatcher(page);
    dispatcher.forward(request, response);
  }

  //Clean up resources
  public void destroy() {
  }
}

package com.highcom.hcgip.lucenesearch;

import java.io.IOException;
import java.util.*;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.analysis.cjk.*;
import com.highcom.hcgip.basic.common.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import java.io.*;

public class Search {

  public static Vector getSearch(String indexDir, String q) {
    Hits hits = null;
    Vector it = new Vector();
    File segments = new File(indexDir + File.separator + "segments");
    if(segments.exists()){
      try {
        Searcher searcher = new IndexSearcher(indexDir); //構建搜索器,並指向索引目錄
        Analyzer analyzer = new CJKAnalyzer(); //構建語言分析器
        Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查詢解析器:使用和索引同樣的語言分析器StandardAnalyzer
        Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
        BooleanQuery comboQuery = new BooleanQuery();
        comboQuery.add(contentsquery, false, false);
        comboQuery.add(proposalquery, false, false);
        hits = searcher.search(comboQuery); //搜索結果使用Hits存儲,類似於數據庫返回的記錄集
        for (int i = 0; i < hits.length(); i++) {
          Document doc = hits.doc(i);
          LuceneBean luceneBean = new LuceneBean();
          luceneBean.setPath(doc.get("path"));
          luceneBean.setTitle(doc.get("title"));
          luceneBean.setDate(doc.get("date"));
          luceneBean.setType(doc.get("type"));
          luceneBean.setRealpath(doc.get("realpath"));
          it.add(luceneBean);
        }
        searcher.close();
      }
      catch (ParseException ex) {
        Log.debug(ex.toString());
      }
      catch (IOException ex) {
        Log.debug(ex.toString());
      }
    }

    return it;
  }

  public static Vector getSearch(String[] indexDir, String q) {
    Hits hits = null;
    Vector it = new Vector();
    Analyzer analyzer = new CJKAnalyzer(); //構建語言分析器
    try {
      for (int j = 0; j < indexDir.length; j++) {
        File segments = new File(indexDir[j] + File.separator + "segments");
        if(!segments.exists()){
          continue;
        }
        Searcher searcher = new IndexSearcher(indexDir[j]); //構建搜索器,並指向索引目錄
        Query contentsquery = QueryParser.parse(q, "contents", analyzer); //查詢解析器:使用和索引同樣的語言分析器StandardAnalyzer
        Query proposalquery = QueryParser.parse(q, "proposal", analyzer);
        BooleanQuery comboQuery = new BooleanQuery();
        comboQuery.add(contentsquery, false, false);
        comboQuery.add(proposalquery, false, false);
        hits = searcher.search(comboQuery); //搜索結果使用Hits存儲,類似於數據庫返回的記錄集
        for (int i = 0; i < hits.length(); i++) {
          Document doc = hits.doc(i);
          LuceneBean luceneBean = new LuceneBean();
          luceneBean.setPath(doc.get("path"));
          luceneBean.setTitle(doc.get("title"));
          luceneBean.setDate(doc.get("date"));
          luceneBean.setType(doc.get("type"));
          luceneBean.setRealpath(doc.get("realpath"));
          it.add(luceneBean);
        }
        searcher.close();
      }
    }
    catch (ParseException ex) {
      Log.debug(ex.toString());
    }
    catch (IOException ex) {
      Log.debug(ex.toString());
    }
    return it;
  }

  public static void main(String[] args) {

    Vector it = Search.getSearch("d://bb", "請求");
    LuceneBean luceneBean = new LuceneBean();
    System.out.println(it.size());
    if (it != null && it.size() > 0) {
      for (int i = 0; i < it.size(); i++) {
        luceneBean = (LuceneBean) it.get(i);
        System.out.print("<a href='" + luceneBean.getTitle() + "'>" +
                         luceneBean.getTitle() + "</a>");
        System.out.print(luceneBean.getDate());

      }
    }


  }
}

package com.highcom.hcgip.lucenesearch;

/**
 * <p>Title: </p>
 * <p>Description: </p>
 * <p>Copyright: Copyright (c) 2004</p>
 * <p>Company: Highcom</p>
 * @author 李新博
 * @version 1.0
 */

public class LuceneBean {

  private String path;
  private String title;
  private String date;
  private String type;
  private String realpath;
  public LuceneBean() {
  }
  public String getPath() {
    return path;
  }
  public void setPath(String path) {
    this.path = path;
  }
  public String getTitle() {
    return title;
  }
  public void setTitle(String title) {
    this.title = title;
  }
  public String getDate() {
    return date;
  }
  public void setDate(String date) {
    this.date = date;
  }
  public String getType() {
    return type;
  }
  public void setType(String type) {
    this.type = type;
  }
  public String getRealpath() {
    return realpath;
  }
  public void setRealpath(String realpath) {
    this.realpath = realpath;
  }
}

package com.highcom.hcgip.lucenesearch;

import org.apache.lucene.analysis.cn.*;
import org.apache.lucene.index.IndexWriter;
import java.io.File;
import java.io.Reader;
import java.io.FileInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.cjk.*;
import java.io.*;
import com.highcom.hcgip.basic.common.*;
import com.highcom.hcgip.cm.util.*;
import org.htmlparser.beans.*;

/**
 * 建立索引文件
 * <p>Title: </p>
 * <p>Description: </p>
 * <p>Copyright: Copyright (c) 2004</p>
 * <p>Company: Highcom</p>
 * @author 李新博
 * @version 1.0
 */
public class IndexRunner {
  /**
   * 創建索引文件
   * @param filePath 要創建索引文件的原文件,或者是文件夾
   * @param indexDir 索引文件存放的目錄
   * @param showPath 檢索出來的路徑
   * @param it boolean類型,是否追加索引
   */
  public static synchronized void createIndex(String filePath, String indexDir,
                                 String showPath, String type, boolean it) {
    IndexWriter writer = null;
    try {
      writer = new IndexWriter(indexDir, new CJKAnalyzer(), it);
      indexDocs(writer, new File(filePath), showPath, type); //數據源,文件形式
      writer.optimize();
      writer.close();
    }
    catch (IOException ex) {
      Log.debug(" public static void createIndex===" + ex.toString());
    }
  }

  private static void indexDocs(IndexWriter writer, File file, String showPath,
                                String type) {
    if (file.isDirectory()) {
      String[] files = file.list();
      for (int i = 0; i < files.length; i++)
        indexDocs(writer, new File(file, files[i]), showPath, type);
    }
    else {
      if (file.getPath().endsWith(".html") || file.getPath().endsWith(".htm") ||
          file.getPath().endsWith(".txt")) {

        try {
          writer.addDocument(Document(file, showPath, type));

        }
        catch (IOException ex) {
          Log.debug(" private static void indexDocs==" + ex.toString());
        }
      }
    }
  }

  private static Document Document(File f, String showPath, String type) {
    Document doc = null;
    try {
      doc = null;
      doc = new Document(); //創建lucene接受的數據格式Document
      doc.add(Field.UnIndexed("realpath",f.getPath())); //創建字段名爲path的字段,不索引,只存儲,
      doc.add(Field.UnIndexed("path", showPath + f.getName())); //創建字段名爲path的字段,不索引,只存儲,
      doc.add(Field.Keyword("date", DateFormat.dateFormat2(new Date())));
      doc.add(Field.Keyword("type", type));
      doc.add(Field.Text("title", f.getName().split("_")[0]));
      doc.add(Field.Text("proposal",PropositionParser.getProposition(f.getPath())));
      ///FileInputStream is = new FileInputStream(f);
      //Reader reader = new BufferedReader(new InputStreamReader(is));
      //doc.add(Field.Text("contents", reader)); //創建字段名爲contents的字段來存需用索引的內容
      StringBean sb = new StringBean();
      sb.setLinks(false);
      sb.setCollapse(true);
      sb.setURL(f.getPath());
      sb.setReplaceNonBreakingSpaces(true);
      doc.add(Field.Text("contents",sb.getStrings()));

    }
    catch (Exception ex) {
      Log.debug("private static Document Document==" + ex.toString());
    }
    return doc;
  }
 
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章