Solr搜索服務器直接部署到Web容器之中,如果想要在服務器外部管理索引(創建、更新、刪除),需要向Solr服務器進程發送待處理的請求數據或者命令,實際索引變更是在Solr服務器程序中完成,自然而然底層是調用Lucene的API來實現這一操作的。爲了便於平時在開發中,隨時向Solr服務器發送索引更新請求,來觀察實際執行狀況,或驗證Solr的最新功能,我們基於Solr自帶的SimplePostTool,增加了聚合數據源的接口,實現了一個簡易地小工具。
工具類圖
首先,我們先看一下類圖及其類之間的關係,如圖所示:
編碼與實現
AbstractPostServer抽象類:該類表示一個要與Solr搜索服務器通信的實體,其中實現了一些與服務器基於HTTP協議進行通信的邏輯,在子類實現時,可以直接調用這些相關方法,而具體通過什麼方式、以什麼樣的數據格式則留給子類實現。抽象類代碼如下所示:
package org.shirdrn.solr.tools;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract post tool, generic operations.
*
* @author shirdrn
* @date 2011-12-07
*/
public abstract class AbstractPostServer {
protected static final Logger LOG = LoggerFactory.getLogger(AbstractPostServer.class);
protected URL solrUrl;
protected PostConfig postConfig;
protected HttpURLConnection httpConn;
protected int responseCode = HttpURLConnection.HTTP_UNAVAILABLE;
protected DataLoader dataLoader;
public AbstractPostServer(PostConfig postConfig, DataLoader dataLoader) {
super();
this.postConfig = postConfig;
this.dataLoader = dataLoader;
try {
this.solrUrl = new URL(postConfig.postUrl);
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
public abstract String getResponseMessage();
public int getServerResponseCode() {
return responseCode;
}
protected void post(InputStream data, Integer length, OutputStream output) throws IOException {
httpConn = (HttpURLConnection) solrUrl.openConnection();
httpConn.setRequestMethod(postConfig.postMethod);
httpConn.setDoOutput(true);
httpConn.setDoInput(true);
httpConn.setUseCaches(false);
httpConn.setAllowUserInteraction(false);
httpConn.setRequestProperty("Content-type", postConfig.contentType);
if (null != length) {
httpConn.setFixedLengthStreamingMode(length);
}
OutputStream out = httpConn.getOutputStream();
pipe(data, out);
if(out!=null) {
out.close();
}
InputStream in = null;
responseCode = httpConn.getResponseCode();
if (HttpURLConnection.HTTP_OK != responseCode) {
LOG.error("Solr server error: " + httpConn.getResponseCode() + " " + httpConn.getResponseMessage());
}
in = httpConn.getInputStream();
pipe(in, output);
if(httpConn!=null) {
httpConn.disconnect();
}
in.close();
}
private void pipe(InputStream dataIn, OutputStream dataOut) throws IOException {
byte[] buf = new byte[1024];
int read = 0;
while ((read = dataIn.read(buf)) >= 0) {
if (null != dataOut) {
dataOut.write(buf, 0, read);
}
}
if (null != dataOut) {
dataOut.flush();
}
}
protected InputStream stringToStream(String s) {
InputStream is = null;
try {
is = new ByteArrayInputStream(s.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return is;
}
public void setDataLoader(DataLoader dataLoader) {
this.dataLoader = dataLoader;
}
/**
* Solr post configuration, convenient usage.
*
* @author shirdrn
* @date 2011-12-07
*/
public static final class PostConfig implements Serializable {
private static final long serialVersionUID = 6389419734694067683L;
private String postUrl = "http://localhost:8080/solr/test/update";
private String postMethod = "POST";
private String contentType = "application/xml";
private int maxCommitCount = 100;
private String uniqueKey;
private Set<String> indexFieldSet = new HashSet<String>();
private Set<String> finalFieldSet = new HashSet<String>();
public PostConfig(String postUrl, String postMethod, String contentType, String uniqueKey, String[] indexFields, String[] finalFields, int maxCommitCount) {
super();
this.postUrl = (postUrl==null ? this.postUrl : postUrl);
this.postMethod = (postMethod==null ? this.postMethod : postMethod);
this.contentType = (contentType==null ? this.contentType : contentType);
this.uniqueKey = uniqueKey;
setIndexFieldSet(indexFields);
setFinalFieldSet(finalFields);
this.maxCommitCount = maxCommitCount;
}
public int getMaxCommitCount() {
return maxCommitCount;
}
public String getUniqueKey() {
return uniqueKey;
}
public Set<String> getIndexFieldSet() {
return indexFieldSet;
}
private void setIndexFieldSet(String[] indexFields) {
setFieldSet(indexFields, indexFieldSet);
}
private void setFinalFieldSet(String[] finalFields) {
setFieldSet(finalFields, finalFieldSet);
}
public Set<String> getFinalFieldSet() {
return finalFieldSet;
}
private void setFieldSet(String[] finalFields, Set<String> fieldSet) {
if(finalFields!=null) {
for(String field : finalFields) {
if(!field.isEmpty()) {
fieldSet.add(field.trim());
}
}
}
}
@Override
public boolean equals(Object obj) {
PostConfig other = (PostConfig)obj;
boolean isEquals =
postMethod.toLowerCase().equals(other.postMethod.toLowerCase())
&& contentType.toLowerCase().equals(other.contentType.toLowerCase())
&& postUrl.toLowerCase().equals(other.postUrl.toLowerCase())
&& maxCommitCount == other.maxCommitCount
&& indexFieldSet.equals(other.indexFieldSet)
&& finalFieldSet.equals(other.finalFieldSet);
return isEquals;
}
@Override
public String toString() {
StringBuffer config = new StringBuffer();
config.append("[postUrl=" + postUrl)
.append(", postMethod=" + postMethod)
.append(", contentType=" + contentType)
.append(", maxCommitCount=" + maxCommitCount)
.append(", indexFieldSet=" + indexFieldSet)
.append(", finalFieldSet=" + finalFieldSet)
.append("]");
return config.toString();
}
}
}
AbstractPostServer類中使用了接口DataLoader,在子類中只需要注入DataLoader接口的實現類即可以完成加載數據的功能。DataLoader接口是對不同數據源的抽象,定義如下所示:
package org.shirdrn.solr.tools;
import java.util.Map;
public interface DataLoader {
public Map<Object, Object> fetchOne();
public int getRecordCount();
}
fetchOne方法是供AbstractPostServer類的子類去調用的,每次調用獲取數據源中一條記錄,我們知道,在索引的時候邏輯上通常對應着一個文檔(Document),如一篇文章、一段文字等等。這也就暗示了,實現DataLoader接口,必須提供一個基於數據源的迭代器,每次在fetchOne方法中調用一次迭代器(如next方法),獲取到一條記錄,然後在AbstractPostServer類的子類中轉換成Lucene的Document(實際間接地通過Solr來轉換的,而我們只是規劃好每個字段及其對應的內容)。
這裏,我給出了一個DataLoader接口的實現,數據存儲在MongoDB中,具體實現對應於MongoDataLoader類,代碼如下所示:
package org.shirdrn.solr.tools;
import java.io.Serializable;
import java.util.Map;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
/**
* Load data being indexed from Mongo DB.
*
* @author shirdrn
* @date 2011-12-07
*/
public class MongoDataLoader implements DataLoader {
private MongoConfig mongoConfig;
private DBCollection collection;
private Map<Object, Object> conditions;
private DBCursor cursor;
private int recordCount;
public MongoDataLoader(MongoConfig mongoConfig, Map<Object, Object> conditions) {
super();
this.mongoConfig = mongoConfig;
this.conditions = conditions;
initialize();
}
private void initialize() {
DBObject q = new BasicDBObject();
if(conditions!=null) {
q.putAll(conditions);
}
try {
if(collection==null) {
collection = MongoHelper.newHelper(mongoConfig).getCollection(mongoConfig.collectionName);
}
cursor = collection.find(q);
recordCount = cursor.size();
} catch (MongoException e) {
e.printStackTrace();
}
}
@SuppressWarnings("unchecked")
@Override
public Map<Object, Object> fetchOne() {
Map<Object, Object> m = null;
try {
if(cursor.hasNext()) {
m = cursor.next().toMap();
m.put("id", m.get("_id").toString());
} else {
cursor.close();
}
} catch (Exception e) {
e.printStackTrace();
}
return m;
}
@Override
public int getRecordCount() {
return recordCount;
}
public static class MongoConfig implements Serializable {
private static final long serialVersionUID = -3028092758346115702L;
private String host;
private int port;
private String dbname;
private String collectionName;
public MongoConfig(String host, int port, String dbname, String collectionName) {
super();
this.host = host;
this.port = port;
this.dbname = dbname;
this.collectionName = collectionName;
}
@Override
public boolean equals(Object obj) {
MongoConfig other = (MongoConfig) obj;
return host.equals(other.host) && port==other.port
&& dbname.equals(other.dbname) && collectionName.equals(other.collectionName);
}
}
static class MongoHelper {
private static Mongo mongo;
private static MongoHelper helper;
private MongoConfig mongoConfig;
private MongoHelper(MongoConfig mongoConfig) {
super();
this.mongoConfig = mongoConfig;
}
public synchronized static MongoHelper newHelper(MongoConfig mongoConfig) {
try {
if(helper==null) {
helper = new MongoHelper(mongoConfig);
mongo = new Mongo(mongoConfig.host, mongoConfig.port);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
if(mongo!=null) {
mongo.close();
}
}
});
}
} catch (Exception e) {
e.printStackTrace();
}
return helper;
}
public DBCollection getCollection(String collectionName) {
DBCollection c = null;
try {
c = mongo.getDB(mongoConfig.dbname).getCollection(collectionName);
} catch (Exception e) {
e.printStackTrace();
}
return c;
}
}
}
上面爲了方便,定義了MongoConfig類封裝了與MongoDB相關的一些數據,而MongoHelper類實現了與MongoDB數據庫進行交互的一些操作。實際上,在打開一個DBCollection實例以後,便可以根據查詢條件獲取到查詢結果集的一個遊標(DBCursor),通過遊標就可以迭代整個結果集的記錄,亦即需要進行索引的數據。在我們要實現一個真正提供與Solr搜索服務器進行交互服務的AbstractPostServer之前,我們先看一下,都需要提供哪些基本服務,這是在PostService接口中定義的。PostService接口中定義了管理索引最基本的功能,代碼如下所示:
package org.shirdrn.solr.tools;
import java.io.IOException;
import java.io.OutputStream;
public interface PostService {
public void commit(OutputStream output) throws IOException;
public void optimize(OutputStream output) throws IOException;
public void postUpdate(boolean autoOptimize) throws Exception;
public void postDelete(boolean autoOptimize) throws Exception;
public int getPostCount();
}
實現一個基於JSON數據格式的AbstractPostServer,需要繼承自AbstractPostServer抽象類,同時實現接口PostService,對應的JsonPostServer實現類的代碼如下所示:
package org.shirdrn.solr.tools;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import net.sf.json.JSONObject;
/**
* Json style post server. Actually it represents
* a proxy server of Solr search server, and is responsible for
* managing indices, such as indexing, deleting some document, etc.
*
* @author shirdrn
* @date 2011-12-07
*/
public class JsonPostServer extends AbstractPostServer implements PostService {
private int postCount;
private String responseMessage = "OK";
public JsonPostServer(PostConfig postConfig) {
super(postConfig, null);
}
public JsonPostServer(PostConfig postConfig, DataLoader dataLoader) {
super(postConfig, dataLoader);
}
@Override
public void postUpdate(boolean autoOptimize) throws Exception {
try {
StringBuffer data = new StringBuffer("{");
for(int i=0; i<dataLoader.getRecordCount(); i++) {
Map<Object, Object> record = dataLoader.fetchOne();
JSONObject op = new JSONObject();
JSONObject doc = new JSONObject();
if(record!=null) {
Iterator<Entry<Object, Object>> iter = record.entrySet().iterator();
JSONObject jo = new JSONObject();
while(iter.hasNext()) {
Entry<Object, Object> entry = iter.next();
if(postConfig.getIndexFieldSet().contains(entry.getKey())) {
if(postConfig.getFinalFieldSet().contains(entry.getKey())) {
jo.put(entry.getKey(), entry.getValue().toString());
} else {
jo.put(entry.getKey(), purgeJsonSpecifiedCharacters(entry.getValue().toString()));
}
}
}
doc.put("doc", jo);
op.put("add", doc);
data.append(op.toString().substring(1, op.toString().length()-1));
increment(i+1, dataLoader.getRecordCount(), data);
}
}
if(autoOptimize) {
optimize(System.out);
}
} catch (IOException e) {
responseMessage = e.getMessage();
}
}
private void increment(int i, int recordCount, StringBuffer data) throws IOException {
++postCount;
if(i%postConfig.getMaxCommitCount()==0 || i==recordCount) {
data.append("}");
post(stringToStream(data.toString()), null, System.out);
commit(System.out);
if(i!=recordCount) {
data.delete(0, data.length());
data.append("{");
}
} else {
data.append(",");
}
}
private String purgeJsonSpecifiedCharacters(String data) {
StringBuffer buffer = new StringBuffer();
for (int i=0; i<data.length(); i++){
switch (data.charAt(i)){
case '\"':
case '\\':
case '/':
case '\b':
case '\f':
case '\n':
case '\r':
case '\t':
buffer.append(" ");
break;
default:
buffer.append(data.charAt(i));
}
}
return buffer.toString().trim();
}
@Override
public void postDelete(boolean autoOptimize) throws Exception {
try {
StringBuffer data = new StringBuffer("{");
for(int i=0; i<dataLoader.getRecordCount(); i++) {
Map<Object, Object> record = dataLoader.fetchOne();
JSONObject jo = new JSONObject();
JSONObject op = new JSONObject();
Iterator<Entry<Object, Object>> iter = record.entrySet().iterator();
Entry<Object, Object> entry = iter.next();
if(postConfig.getUniqueKey().equals(entry.getKey()) || entry.getKey().equals("query")) {
jo.put(entry.getKey(), entry.getValue());
op.put("delete", jo);
data.append(op.toString().substring(1, op.toString().length()-1));
increment(i+1, dataLoader.getRecordCount(), data);
}
}
if(autoOptimize) {
optimize(System.out);
}
} catch (IOException e) {
responseMessage = e.getMessage();
}
}
@Override
public void commit(OutputStream output) throws IOException {
JSONObject commit = new JSONObject();
commit.put("commit", new JSONObject());
post(stringToStream(commit.toString()), null, output);
LOG.debug("Commit done: " + commit.toString());
}
@Override
public void optimize(OutputStream output) throws IOException {
JSONObject optimizer = new JSONObject();
JSONObject jo = new JSONObject();
jo.put("waitFlush", false);
jo.put("waitSearcher", false);
optimizer.put("optimize", jo);
post(stringToStream(optimizer.toString()), null, output);
LOG.debug("Optimize done: " + optimizer.toString());
}
@Override
public String getResponseMessage() {
return responseMessage;
}
@Override
public int getPostCount() {
return postCount;
}
}
測試用例
測試用例,我們主要測試postUpdate和postDelete,代碼如下所示:
package org.shirdrn.solr.tools;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import junit.framework.TestCase;
import org.shirdrn.solr.tools.AbstractPostServer.PostConfig;
import org.shirdrn.solr.tools.MongoDataLoader.MongoConfig;
/**
* Test post server based on JSON style data.
*
* @author shirdrn
* @date 2011-12-08
*/
public class TestJsonPostServer extends TestCase {
private PostService server;
@Override
protected void setUp() throws Exception {
PostConfig postConfig = new PostConfig(
"http://192.168.0.195:8080/solr35/core0/update",
"POST", "application/json",
"id",
new String[]{"id", "title", "content", "pubdate", "url"},
new String[] {"url"},
2);
server = new JsonPostServer(postConfig);
}
public void testPostUpdate() {
MongoConfig mongoConfig = new MongoConfig("192.168.0.195", 27017, "pagedb", "page");
DataLoader dataLoader = new MongoDataLoader(mongoConfig, null);
((AbstractPostServer)server).setDataLoader(dataLoader);
try {
server.postUpdate(true);
} catch (Exception e) {
e.printStackTrace();
}
assertEquals(5, server.getPostCount());
assertEquals("OK", ((AbstractPostServer)server).getResponseMessage());
assertEquals(200, ((AbstractPostServer)server).getServerResponseCode());
}
public void testPostDelete() {
final Map<Object, Object> conditions = new HashMap<Object, Object>();
conditions.put("id", "4eded53abf3bfa0014000002");
conditions.put("query", "title:孟加拉國");
final Iterator<Entry<Object, Object>> iter = conditions.entrySet().iterator();
((AbstractPostServer)server).setDataLoader(new DataLoader() {
@Override
public Map<Object, Object> fetchOne() {
Map<Object, Object> m = new HashMap<Object, Object>();
if(iter.hasNext()) {
Entry<Object, Object> entry = iter.next();
m.put(entry.getKey(), entry.getValue());
}
return m;
}
@Override
public int getRecordCount() {
return conditions.size();
}
});
try {
server.postDelete(false);
} catch (Exception e) {
e.printStackTrace();
}
assertEquals(2, server.getPostCount());
assertEquals("OK", ((AbstractPostServer)server).getResponseMessage());
assertEquals(200, ((AbstractPostServer)server).getServerResponseCode());
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
}
}
附錄說明
索引字段
Solr搜索服務器的schema.xml定義的索引字段如下所示:<?xml version="1.0" ?>
<schema name="example core zero" version="1.1">
<types>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true" />
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.SmartChineseSentenceTokenizerFactory" />
<filter class="solr.SmartChineseWordTokenFilterFactory" />
<filter class="solr.PositionFilterFactory" />
<filter class="solr.StandardFilterFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.SmartChineseSentenceTokenizerFactory" />
<filter class="solr.SmartChineseWordTokenFilterFactory" />
<filter class="solr.PositionFilterFactory" />
<filter class="solr.StandardFilterFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="./synonyms.txt" ignoreCase="false" expand="true" />
</analyzer>
</fieldType>
</types>
<fields>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true" />
<field name="content" type="text" indexed="true" stored="true" multiValued="true" />
<field name="pubdate" type="string" indexed="true" stored="true" multiValued="false" />
<field name="title" type="text" indexed="true" stored="true" multiValued="true" />
<field name="url" type="string" indexed="true" stored="true" multiValued="false" />
</fields>
<uniqueKey>id</uniqueKey>
<defaultSearchField>title</defaultSearchField>
<solrQueryParser defaultOperator="OR" />
</schema>
我把整個測試用schema.xml文件的內容都附在上面了。
依賴jar庫
commons-beanutils-1.7.0.jar
commons-collections-3.2.1.jar
commons-lang-2.4.jar
commons-logging-1.1.1.jar
ezmorph-1.0.6.jar
json-lib-2.4-jdk15.jar
mongo-2.5.3.jar
slf4j-api-1.5.5.jar
slf4j-jdk14-1.5.5.jar