此图为lucene 获取数据建立索引 Understanding the indexing process
lucene 建立索引步骤
1,Extracting text and creating the document 提取数据,创建文档
2,Analysis 分析获取的内容,采用分词 过滤stop word(非关键字)
3,add to the index 添加分析后的结果到 index
在此步 把索引分块存储,为了检索给力,这就是为什么lucene index中为啥有那么多小文件的原因吧
代码提现:
protected String[] ids = {"1", "2"};
protected String[] unindexed = {"Netherlands", "Italy"};
protected String[] unstored = {"Amsterdam has lots of bridges",
"Venice has lots of canals"};
protected String[] text = {"Amsterdam", "Venice"};
private Directory directory;
protected void setUp() throws Exception {
directory = new RAMDirectory(); //内存字典
IndexWriter writer = getWriter(); //io操作,需要输出流,理所当然
for (int i = 0; i < ids.length; i++)
{
Document doc = new Document();
doc.add(new Field("id", ids[i],
Field.Store.YES,
Field.Index.NOT_ANALYZED)); //id 没有必要分词
doc.add(new Field("country", unindexed[i],
Field.Store.YES,
Field.Index.NO));
doc.add(new Field("contents", unstored[i],
Field.Store.NO,
Field.Index.ANALYZED));//内容分词
doc.add(new Field("city", text[i], /**获取数据**/
Field.Store.YES,
Field.Index.ANALYZED)); /**分析数据**/
writer.addDocument(doc); /**加入文档**/
}
writer.close(); /**生成index**/
}