Solrj是Solr搜索服務器的一個比較基礎的客戶端工具,可以非常方便地與Solr搜索服務器進行交互,最基本的功能就是管理Solr索引,包括添加、更新、刪除和查詢等。對於一些比較基礎的應用,用Solj基本夠用,而且你可以非常容易地通過使用Solrj的API實現與Solr搜索服務器進行交互,實現對Solr的基本管理功能。如果你的應用比較複雜,可以擴展Solrj來滿足需要。
下面是一個使用Solrj的API實現與Solr服務器交互的工具類SolrPostServer,能夠實現索引的添加、更新、刪除和查詢功能。SolrPostServer類中兩個內部類是與訪問MongoDB的配置和工具。
在實際應用中,對於是否進行commit,可以有兩種方式:
- 一種是直接在客戶端進行計算,亦即,進行索引時計算添加的文檔數,滿足設置的值則進行手動commit,這種方式比較靈活,你可以根據搜索服務器的運行狀況選擇合理的commit文檔數量;
- 另一種是,直接在Solr搜索服務器上進行配置,一般來說,對索引進行大批量更新,一般不會選擇在搜索服務器業務繁忙的時候進行,所以能夠自動進行commit也便利了對索引的管理,更新文檔可以完全可以實現自動化處理。
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler">
<maxPendingDeletes>10000</maxPendingDeletes>
<autoCommit>
<maxDocs>20</maxDocs>
<maxTime>86000</maxTime>
</autoCommit>
</requestHandler>
上面autoCommit中的maxDocs指定的pending多少個文檔後執行一次commit,而maxTime指定了多長時間間隔進行一次commit,一般這兩個選項只需要配置一個即可滿足需要。另外,每次commit會將最近的更新生效,但是如果一次commit操作尚未完成,又達到了下一次commit的時刻,這樣做會嚴重影響索引的吞吐量。實現代碼如下所示:
package org.shirdrn.solr.solrj;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.HttpClient;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
/**
* Solr server for indexes operations.
*
* @author shirdrn
* @date 2011-12-20
*/
public class SolrPostServer {
private static final Logger LOG = Logger.getLogger(SolrPostServer.class);
private CommonsHttpSolrServer server;
private ResponseParser responseParser;
private MongoConfig mongoConfig;
private String[] collectionNames;
private int maxCommitCount = 100;
private boolean manualOptimize = true;
private boolean manualCommit = false;
private Collection<SolrInputDocument> docContainer = new ArrayList<SolrInputDocument>();
private static int totalCount = 0;
public SolrPostServer(String url, HttpClient httpClient, MongoConfig mongoConfig) {
try {
if(httpClient==null) {
server = new CommonsHttpSolrServer(url);
server.setSoTimeout(500000); // socket read timeout
server.setConnectionTimeout(5000);
server.setDefaultMaxConnectionsPerHost(10);
server.setMaxTotalConnections(100);
server.setAllowCompression(true);
server.setMaxRetries(1); // defaults to 0. > 1 not recommended.
} else {
server = new CommonsHttpSolrServer(url, httpClient);
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
this.mongoConfig = mongoConfig;
initialize();
}
/**
* Initialize the {@link CommonsHttpSolrServer}'s basic parameters.
*/
private void initialize() {
if(responseParser!=null) {
server.setParser(responseParser);
} else {
server.setParser(new XMLResponseParser());
}
}
@SuppressWarnings("unchecked")
public void postUpdate() {
DBCursor cursor = null;
try {
for (String c : collectionNames) {
LOG.info("MongoDB collection name: " + c);
DBCollection collection = MongoHelper.newHelper(mongoConfig).getCollection(c);
DBObject q = new BasicDBObject();
cursor = collection.find(q);
while(cursor.hasNext()) {
try {
Map<Object, Object> m = cursor.next().toMap();
if(manualCommit) {
add(m, true);
} else {
add(m, false);
}
++totalCount;
LOG.info("Add fragment: _id = " + m.get("_id").toString());
} catch (IOException e) {
e.printStackTrace();
}
}
cursor.close();
}
LOG.info("Add totalCount: " + totalCount);
finallyCommit();
optimize(manualOptimize);
} catch (MongoException e) {
e.printStackTrace();
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Detele lucene {@link Document} by IDs.
* @param strings
*/
public void deleteById(List<String> strings) {
try {
server.deleteById(strings);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Detele lucene {@link Document} by query.
* @param query
*/
public void deleteByQuery(String query) {
try {
server.deleteByQuery(query);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Query.
* @param params
* @param fields
* @return
*/
public List<Map<String, Object>> query(SolrParams params, String[] fields) {
List<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
try {
SolrDocumentList documents = server.query(params).getResults();
Iterator<SolrDocument> iter = documents.iterator();
while(iter.hasNext()) {
SolrDocument doc = iter.next();
Map<String, Object> map = new HashMap<String, Object>();
for(String field : fields) {
map.put(field, doc.getFieldValue(field));
}
results.add(map);
}
} catch (SolrServerException e) {
e.printStackTrace();
}
return results;
}
/**
* When controlling the committing action at client side, finally execute committing.
* @throws SolrServerException
* @throws IOException
*/
private void finallyCommit() throws SolrServerException, IOException {
if(!docContainer.isEmpty()) {
server.add(docContainer);
commit(false, false);
}
}
/**
* Commit.
* @param waitFlush
* @param waitSearcher
* @throws SolrServerException
* @throws IOException
*/
public void commit(boolean waitFlush, boolean waitSearcher) {
try {
server.commit(waitFlush, waitSearcher);
} catch (SolrServerException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* When controlling the optimizing action at client side, finally execute optimizing.
* @param waitFlush
* @param waitSearcher
* @throws SolrServerException
* @throws IOException
*/
public void optimize(boolean waitFlush, boolean waitSearcher) {
try {
server.optimize(waitFlush, waitSearcher);
commit(waitFlush, waitSearcher);
} catch (Exception e) {
LOG.error("Encounter error when optimizing.", e);
try {
server.rollback();
} catch (SolrServerException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
}
/**
* Optimize.
* @param optimize
* @throws SolrServerException
* @throws IOException
*/
private void optimize(boolean optimize) {
if(optimize) {
optimize(true, true);
}
}
/**
* Add a {@link SolrInputDocument} or collect object and add to the a collection for batch updating
* from a mongodb's recored, a Map object.
* @param m
* @param oneByOne
* @throws SolrServerException
* @throws IOException
*/
private void add(Map<Object, Object> m, boolean oneByOne) throws SolrServerException, IOException {
SolrInputDocument doc = createDocument(m);
if(oneByOne) {
server.add(doc);
} else {
docContainer.add(doc);
if(docContainer.size()>maxCommitCount) {
server.add(docContainer);
server.commit(false, false);
docContainer = new ArrayList<SolrInputDocument>();
}
}
}
/**
* Create a {@link SolrInputDocument} object.
* @param record
* @return
*/
private SolrInputDocument createDocument(Map<Object, Object> record) {
String id = record.get("_id").toString();
String articleId = (String) record.get("articleId");
String title = (String) record.get("title");
String url = (String) record.get("url");
String spiderName = (String) record.get("spiderName");
String fragment = makeFragment((BasicDBObject) record.get("fragment"));
String word = (String) record.get("word");
int pictureCount = (Integer) record.get("pictureCount");
int selectedCount = (Integer) record.get("selectedCount");
int fragmentSize = (Integer) record.get("fragmentSize");
SolrInputDocument doc = new SolrInputDocument();
doc.addField("_id", id, 1.0f);
doc.addField("articleId", articleId, 1.0f);
doc.addField("title", title, 1.0f);
doc.addField("url", url, 1.0f);
doc.addField("spiderName", spiderName, 1.0f);
doc.addField("fragment", fragment, 1.0f);
doc.addField("word", word, 1.0f);
// Additional processing for lucene payload metadata.
doc.addField("pictureCount", word + "|" + pictureCount);
doc.addField("coverage", word + "|" + (float)selectedCount/fragmentSize);
return doc;
}
@SuppressWarnings("unchecked")
private String makeFragment(BasicDBObject fragment) {
StringBuilder builder = new StringBuilder();
Iterator<Map.Entry<Integer, String>> iter = fragment.toMap().entrySet().iterator();
while(iter.hasNext()) {
Map.Entry<Integer, String> entry = iter.next();
builder.append(entry.getValue().trim()).append("<br>");
}
return builder.toString();
}
/**
* Set {@link ResponseParser}, default value is {@link XMLResponseParser}.
* @param responseParser
*/
public void setResponseParser(ResponseParser responseParser) {
this.responseParser = responseParser;
}
/**
* Pulling document resource from multiple collections of MongoDB.
* @param collectionNames
*/
public void setCollectionNames(String[] collectionNames) {
this.collectionNames = collectionNames;
}
public void setMaxCommitCount(int maxCommitCount) {
this.maxCommitCount = maxCommitCount;
}
public void setManualCommit(boolean manualCommit) {
this.manualCommit = manualCommit;
}
public void setManualOptimize(boolean manualOptimize) {
this.manualOptimize = manualOptimize;
}
/**
* Mongo database configuration.
*
* @author shirdrn
* @date 2011-12-20
*/
public static class MongoConfig implements Serializable {
private static final long serialVersionUID = -3028092758346115702L;
private String host;
private int port;
private String dbname;
private String collectionName;
public MongoConfig(String host, int port, String dbname, String collectionName) {
super();
this.host = host;
this.port = port;
this.dbname = dbname;
this.collectionName = collectionName;
}
@Override
public boolean equals(Object obj) {
MongoConfig other = (MongoConfig) obj;
return host.equals(other.host) && port==other.port
&& dbname.equals(other.dbname) && collectionName.equals(other.collectionName);
}
}
/**
* Mongo database utility.
*
* @author shirdrn
* @date 2011-12-20
*/
static class MongoHelper {
private static Mongo mongo;
private static MongoHelper helper;
private MongoConfig mongoConfig;
private MongoHelper(MongoConfig mongoConfig) {
super();
this.mongoConfig = mongoConfig;
}
public synchronized static MongoHelper newHelper(MongoConfig mongoConfig) {
try {
if(helper==null) {
helper = new MongoHelper(mongoConfig);
mongo = new Mongo(mongoConfig.host, mongoConfig.port);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
if(mongo!=null) {
mongo.close();
}
}
});
}
} catch (Exception e) {
e.printStackTrace();
}
return helper;
}
public DBCollection getCollection(String collectionName) {
DBCollection c = null;
try {
c = mongo.getDB(mongoConfig.dbname).getCollection(collectionName);
} catch (Exception e) {
e.printStackTrace();
}
return c;
}
}
}
下面,我們可以通過寫一個測試用例測試一下。
首先,我的Solr搜索服務器已經部署好並啓動成功,對應的url爲http://192.168.0.197:8080/server/fragment/。測試用例如下所示:
package org.shirdrn.solr.solrj;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import junit.framework.TestCase;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.MapSolrParams;
import org.shirdrn.solr.solrj.SolrPostServer.MongoConfig;
@SuppressWarnings("deprecation")
public class TestSolrPostServer extends TestCase {
SolrPostServer myServer;
MongoConfig config;
String url;
String[] collectionNames;
@Override
protected void setUp() throws Exception {
url = "http://192.168.0.197:8080/server/fragment/";
config = new MongoConfig("192.168.0.184", 27017, "fragment", "");
myServer = new SolrPostServer(url, null, config);
myServer.setMaxCommitCount(100);
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
}
public void testPostUpdate() {
collectionNames = new String[] {
"sina",
"lvping",
"daodao",
"go2eu",
"mafengwo",
"lotour",
"17u",
"sohu",
"baseSe",
"bytravel"
};
myServer.setCollectionNames(collectionNames);
myServer.setManualCommit(true);
myServer.setManualOptimize(false);
myServer.postUpdate();
}
public void testPostDelete() {
List<String> strings = new ArrayList<String>();
strings.add("4ef051342c4117a38f63ee97");
strings.add("4ef051322c4117a38f63ee36");
strings.add("4ef051a42c4117a38f63fb51");
strings.add("4ef050d92c4117a38f63dda4");
strings.add("4ef051fe2c4117a38f640bc8");
strings.add("4ef048ef2c4117a38f6207ce");
strings.add("4ef049062c4117a38f620e13");
strings.add("4ef046f12c4117a38f6185c0");
myServer.deleteById(strings);
myServer.commit(false, false);
myServer.optimize(true, false);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public void testQuery() {
Map map = new HashMap();
map.put(CommonParams.Q, "法國");
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "10");
map.put(CommonParams.FQ, "word:盧浮宮");
SolrParams params = new MapSolrParams(map);
List<Map<String, Object>> results = myServer.query(params, new String[] {"_id", "title", "url"});
assertEquals(10, results.size());
}
}
在實際開發的過程中,使用Solrj客戶端可以非常容易爲測試做一些基本操作,如創建索引,測試Solr基本參數及其開發定製Solr相關接口(Retrieval、Highlighting、Faceted Search、Clustering等等)。