這裏用的solr4.7
首先搭建環境
創建一個新core
這裏有詳細的資料
http://blog.csdn.net/clj198606061111/article/details/21288499/
修改core0裏面的xml
schema.xml
加入
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<field name="text" type="text_general" indexed="true" stored="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
<!-- general -->
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="type" type="string" indexed="true" stored="true" multiValued="false" />
<field name="name" type="string" indexed="true" stored="true" multiValued="false" />
<field name="fileName" type="string" indexed="true" stored="true" multiValued="false" />
<field name="path" type="string" indexed="true" stored="true" multiValued="false" />
修改solrconfig,xml
<requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">attr_</str>
<str name="captureAttr">true</str>
</lst>
</requestHandler>
加入相應的jar包
solr-4.7.2\dist solr-cell-4.7.2
solr-4.7.2\contrib\extraction 所有的jar包
然後引入jar包
在solrconfig.xml文件
<lib dir="../extract" regex=".*\.jar" />
重啓tomcat
數據準備
然後代碼編寫
public static void main(String[] args) {
File parentFile = new File("G:/document/");
if (parentFile.exists()) {
File[] files = parentFile.listFiles();
for (File file : files) {
try {
indexFilesSolrCell(file.getName(), file.getPath());
} catch (IOException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
}
}
}
}
/**
* 從文件創建索引 <功能詳細描述>
*
* @param fileName
* @param solrId
* @see [類、類#方法、類#成員]
*/
public static void indexFilesSolrCell(String fileName, String path)
throws IOException, SolrServerException {
//連接solr服務
String urlString = "http://localhost:8080/solr/core0";
SolrServer solr = new HttpSolrServer(urlString);
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest(
"/update/extract");
String contentType = getFileContentType(fileName);
up.addFile(new File(path), contentType);
up.setParam("literal.id", fileName);
up.setParam("literal.path", path);
up.setParam("literal.fileName", fileName);
up.setParam("fmap.content", "attr_content");
up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true);
/*
* up.addFile(file, contenttype); up.setParam("literal.id", id);
* up.setParam("literal.mytitle", mytitle);
* up.setParam("literal.mytime", dataTurntoLong(savetime));
* up.setParam("literal.myindextype", myindextype);
* up.setParam("literal.myyears", myyears); up.setParam("fmap.content",
* "content");
*/
solr.request(up);
QueryResponse rsp = solr.query(new SolrQuery("*:*"));
SolrDocumentList solrDocumentList = rsp.getResults();
ListIterator<SolrDocument> listIterator = solrDocumentList
.listIterator();
while (listIterator.hasNext()) {
SolrDocument solrDocument = listIterator.next();
System.out.println(solrDocument.getFieldValue("attr_filename"));
}
}
/**
* 根據文件名獲取文件的ContentType類型
*
* @param filename
* @return
*/
public static String getFileContentType(String filename) {
String contentType = "";
String prefix = filename.substring(filename.lastIndexOf(".") + 1);
if (prefix.equals("xlsx")) {
contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
} else if (prefix.equals("pdf")) {
contentType = "application/pdf";
} else if (prefix.equals("doc")) {
contentType = "application/msword";
} else if (prefix.equals("txt")) {
contentType = "text/plain";
} else if (prefix.equals("xls")) {
contentType = "application/vnd.ms-excel";
} else if (prefix.equals("docx")) {
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
} else if (prefix.equals("ppt")) {
contentType = "application/vnd.ms-powerpoint";
} else if (prefix.equals("pptx")) {
contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
}
else {
contentType = "othertype";
}
return contentType;
}
solr操作