// 建立索引
@Repository
@Transactional
public class IndexerJpa implements Serializable, IndexerDAO {
private static final long serialVersionUID = 1L;
public void create_index(String path) {
PropertiesHelper p = null;
try {
p = new PropertiesHelper(path);
String AttachmentDir = p.getValue("AttachmentDir");
String IndexDir = p.getValue("IndexDir");
IndexWriter indexWriter = null;
Directory dir = new SimpleFSDirectory(new File(IndexDir));
indexWriter = new IndexWriter(dir,
new IndexWriterConfig(LuceneUtil.VERSION_CURRENT,
LuceneUtil.ANALYZER_CURRENT)
.setOpenMode(OpenMode.CREATE));
File[] files = new File(AttachmentDir).listFiles();
for (int i = 0; i < files.length; i++) {
Document doc = new Document();
String[] str = files[i].getName().split("\\.");
InputStream inputStream = null;
if (str[1].equalsIgnoreCase("doc")) {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, Word
.getContent(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("docx")) {
doc.add(new Field(LuceneUtil.FILE_CONTENT, Word
.getContent2007(files[i].toString()),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, "doc",
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("xls")) {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, Excel
.getContent(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("xlsx")) {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, Excel
.getContent2007(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, "xls",
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("ppt")) {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, PowerPoint
.getContent(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("pptx")) {
doc.add(new Field(LuceneUtil.FILE_CONTENT, PowerPoint
.getContent2007(files[i].toString()),
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, "ppt",
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else if (str[1].equalsIgnoreCase("pdf")) {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, PDF
.getContent(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
Field.Store.YES, Field.Index.NOT_ANALYZED));
} else {
inputStream = new FileInputStream(files[i]);
doc.add(new Field(LuceneUtil.FILE_CONTENT, Text
.getContent(inputStream), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_TYPE, str[1],
Field.Store.YES, Field.Index.NOT_ANALYZED));
}
doc.add(new Field(LuceneUtil.FILE_NAME, str[0],
Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(LuceneUtil.FILE_PATH, files[i]
.getAbsolutePath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
long long_date = files[i].lastModified();
doc.add(new Field(LuceneUtil.FILE_DATE, DateUtil
.getDate(long_date), Field.Store.YES,
Field.Index.NOT_ANALYZED));
indexWriter.addDocument(doc);
indexWriter.commit();
}
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
全文檢索引擎lucene的研究和使用(一)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.