如何在Mahout中支持mongodb

目前,NOSQL運用越來越廣,大量的日誌數據都存儲到mongodb了。但是在mahout中對mongo的支持並不好,使用起來十分複雜
例如 mahout0.9

public final class MongoDBDataModel implements DataModel {
    //.....此處省略
  private void buildModel() throws UnknownHostException {
    userIsObject = false;
    itemIsObject = false;
    idCounter = 0;
    preferenceIsString = true;
    Mongo mongoDDBB = new Mongo(mongoHost, mongoPort);
    DB db = mongoDDBB.getDB(mongoDB);
    mongoTimestamp = new Date(0);
    FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
    if (!mongoAuth || db.authenticate(mongoUsername, mongoPassword.toCharArray())) {
      collection = db.getCollection(mongoCollection);
      collectionMap = db.getCollection(mongoMapCollection);
      DBObject indexObj = new BasicDBObject();
      indexObj.put("element_id", 1);
      collectionMap.ensureIndex(indexObj);
      indexObj = new BasicDBObject();
      indexObj.put("long_value", 1);
      collectionMap.ensureIndex(indexObj);
      collectionMap.remove(new BasicDBObject());
      DBCursor cursor = collection.find();
      while (cursor.hasNext()) {
        Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
        if (!user.containsKey("deleted_at")) {
          long userID = Long.parseLong(fromIdToLong(getID(user.get(mongoUserID), true), true));
          long itemID = Long.parseLong(fromIdToLong(getID(user.get(mongoItemID), false), false));
          float ratingValue = getPreference(user.get(mongoPreference));
          Collection<Preference> userPrefs = userIDPrefMap.get(userID);
          if (userPrefs == null) {
            userPrefs = Lists.newArrayListWithCapacity(2);
            userIDPrefMap.put(userID, userPrefs);
          }
          userPrefs.add(new GenericPreference(userID, itemID, ratingValue));
          if (user.containsKey("created_at")
              && mongoTimestamp.compareTo(getDate(user.get("created_at"))) < 0) {
            mongoTimestamp = getDate(user.get("created_at"));
          }
        }
      }
    }
    delegate = new GenericDataModel(GenericDataModel.toDataMap(userIDPrefMap, true));
  }
  // ......
  }

在這個實現中,ensureIndex是mongo2.0 時代產物。。。都什麼年代了。
另外邏輯混亂,明明只需要給GenericDataModel賦值就好了嘛。
fromIdToLong函數實現明顯有bug。。。

所以,改了,採用組合模式來實現
MongoModel .java

package com.resc.datamodel;

import org.apache.mahout.cf.taste.model.DataModel;
import com.resc.datamodel.MongoModel ; 
import com.google.common.collect.Lists;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.resc.main.MainProcess;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
import org.apache.mahout.cf.taste.impl.model.GenericPreference;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import java.net.UnknownHostException;
import java.util.Collection;
import java.util.Map;

public final class MongoModel implements CustomDataModel{
    private static final Logger log = LoggerFactory.getLogger(MongoModel.class);
    private static final String DEFAULT_USER_FIELD = "user";
    private static final String DEFAULT_ITEM_FIELD = "item";
    private static final String DEFAULT_RATING_FIELD = "score";
    private static final String DEFAULT_IPADDR = "127.0.0.1";
    private static final int DEFAULT_PORT = 27017;
    private static final String DEFAULT_DBNMAE = "recommend";
    private static final String DEFAULT_TABLE_NMAE = "preference";
    private String m_strHost;
    private int m_iPort;
    private String m_strDBName;
    private String m_strTableName;
    private String m_strUserFieldName;
    private String m_strItemFileldName;
    private String m_strScoreFieldName;
    private DataModel m_oDataModel;

    public MongoModel(String strHost, int port, String strDBName, String strTableName, String strUserFiledName,
            String strItemFiledName, String strScoreFiledName) throws UnknownHostException {
        m_strHost = strHost != "" ? strHost : DEFAULT_IPADDR;
        m_iPort = port > 0 ? port : DEFAULT_PORT;
        m_strDBName = strDBName != "" ? strDBName : DEFAULT_DBNMAE;
        m_strTableName = strTableName != "" ? strTableName : DEFAULT_TABLE_NMAE;
        m_strUserFieldName = strUserFiledName != "" ? strUserFiledName : DEFAULT_USER_FIELD;
        m_strItemFileldName = strItemFiledName != "" ? strItemFiledName : DEFAULT_ITEM_FIELD;
        m_strScoreFieldName = strScoreFiledName != "" ? strScoreFiledName : DEFAULT_RATING_FIELD;
        BuildModel() ; 
    }

    private void BuildModel() throws UnknownHostException {
        Mongo mongoDDBB = new Mongo(m_strHost, m_iPort);
        DB db = mongoDDBB.getDB(m_strDBName);
        DBCollection collection = db.getCollection(m_strTableName);

        DBCursor cursor = collection.find();
        FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
        while (cursor.hasNext()) {
            Map<String, Object> a = (Map<String, Object>) cursor.next().toMap();
            long uid = Object2Long(a.get( m_strUserFieldName ));
            long item = Object2Long(a.get(m_strItemFileldName ));
            float ratingValue = getPreference(a.get(m_strScoreFieldName));
            log.info(String.valueOf(uid) + "\t\t" + String.valueOf(item) + "\t\t" + String.valueOf(ratingValue));

            Collection<Preference> userPrefs = userIDPrefMap.get(uid);
            if (userPrefs == null) {
                userPrefs = Lists.newArrayListWithCapacity(2);
                userIDPrefMap.put(uid, userPrefs);
            }
            userPrefs.add(new GenericPreference(uid, item, ratingValue));
        }
        m_oDataModel = new GenericDataModel(GenericDataModel.toDataMap(userIDPrefMap, true));
    }

    public DataModel DataModel() {
        return m_oDataModel;
    }

    private static long Object2Long(Object o) {
        if (o != null) {
            return Double.valueOf(o.toString()).longValue();
        }
        return 0;
    }

    private static float getPreference(Object value) {
        if (value != null) {
            if (value.getClass().getName().contains("String")) {
                return Float.parseFloat(value.toString());
            } else {
                return Double.valueOf(value.toString()).floatValue();
            }
        } else {
            return 0.5f;
        }
    }

}

這樣使用時候就方便很多了
CustomDataModel _d = new MongoModel(“xxxxxx”, 5713 , “數據庫名稱” , “評價表” , “存儲user的列的名稱” ,”存儲item的列的名稱” ,”存儲評分的列的名稱”) ;
DataModel oDataModel = _d.DataModel() ;
UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(oDataModel);
……

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章