全文檢索引擎lucene的研究和使用(一)

// 建立索引
	@Repository
	@Transactional
	public class IndexerJpa implements Serializable, IndexerDAO {
		private static final long serialVersionUID = 1L;

		public void create_index(String path) {
			PropertiesHelper p = null;
			try {
				p = new PropertiesHelper(path);
				String AttachmentDir = p.getValue("AttachmentDir");
				String IndexDir = p.getValue("IndexDir");
				IndexWriter indexWriter = null;
				Directory dir = new SimpleFSDirectory(new File(IndexDir));
				indexWriter = new IndexWriter(dir,
						new IndexWriterConfig(LuceneUtil.VERSION_CURRENT,
								LuceneUtil.ANALYZER_CURRENT)
								.setOpenMode(OpenMode.CREATE));
				File[] files = new File(AttachmentDir).listFiles();
				for (int i = 0; i < files.length; i++) {
					Document doc = new Document();
					String[] str = files[i].getName().split("\\.");
					InputStream inputStream = null;

					if (str[1].equalsIgnoreCase("doc")) {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, Word
								.getContent(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("docx")) {
						doc.add(new Field(LuceneUtil.FILE_CONTENT, Word
								.getContent2007(files[i].toString()),
								Field.Store.YES, Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, "doc",
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("xls")) {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, Excel
								.getContent(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("xlsx")) {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, Excel
								.getContent2007(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, "xls",
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("ppt")) {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, PowerPoint
								.getContent(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("pptx")) {
						doc.add(new Field(LuceneUtil.FILE_CONTENT, PowerPoint
								.getContent2007(files[i].toString()),
								Field.Store.YES, Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, "ppt",
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else if (str[1].equalsIgnoreCase("pdf")) {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, PDF
								.getContent(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					} else {
						inputStream = new FileInputStream(files[i]);
						doc.add(new Field(LuceneUtil.FILE_CONTENT, Text
								.getContent(inputStream), Field.Store.YES,
								Field.Index.ANALYZED));
						doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
								Field.Store.YES, Field.Index.NOT_ANALYZED));
					}

					doc.add(new Field(LuceneUtil.FILE_NAME, str[0],
							Field.Store.YES, Field.Index.ANALYZED));

					doc.add(new Field(LuceneUtil.FILE_PATH, files[i]
							.getAbsolutePath(), Field.Store.YES,
							Field.Index.NOT_ANALYZED));

					long long_date = files[i].lastModified();
					doc.add(new Field(LuceneUtil.FILE_DATE, DateUtil
							.getDate(long_date), Field.Store.YES,
							Field.Index.NOT_ANALYZED));
					indexWriter.addDocument(doc);
					indexWriter.commit();
				}
				indexWriter.close();
			} catch (IOException e) {
				e.printStackTrace();
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

	}

發佈了30 篇原創文章 · 獲贊 6 · 訪問量 6萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章