Mahout基於用戶的協同過濾算法的例子

每行測試數據分別標識用戶id(uid),物品id(itemid),評分(rating),評分時間(time)
3464,2502,3,973282547
3464,3160,2,973282494
3464,2505,3,967175070
3464,1703,2,967248043
3464,1704,5,967246680
3464,3163,1,967174266
3464,2369,4,973282339
3464,1569,4,967247436
3464,896,3,967247557
3464,3316,3,973282934
3464,2517,3,967174139
3464,3174,4,967174266
3464,3175,2,973282421
3464,3176,3,967174298
3464,1573,3,967247865
3464,3178,4,967247587
3464,105,3,967248019
3464,3325,4,973282547
3464,1721,3,967247042
3464,3327,4,973282892
3464,3185,3,967174298
3464,1727,4,967248268
3464,111,5,967174438
3464,3186,4,967242949
3464,1729,3,967247165
3464,1584,3,967247078
3464,2387,3,967247884
3464,2389,4,967175256
3464,1589,4,967248019
3464,1732,4,967247306
3464,2391,4,967246935
3464,2395,4,973282625
3464,2396,5,967246752
3464,1597,4,967174960
3464,2541,3,967247865

package userBased;

import java.io.File;
import java.util.List;

import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

/**
 * mahout基於用戶的協同過濾算法
 *
 */
public class UserBased {

    public static void main(String[] args) throws Exception {

        DataModel model = new FileDataModel(new File("F:/ml-1m/ratings.dat"));
        /**
         * 用戶偏好數據包含評分    
                歐氏距離:EuclideanDistanceSimilarity
                         皮爾森距離:PearsonCorrelationSimilarity
                                            餘弦距離:UncenteredCosineSimilarity

                                    用戶偏好數據不包含評分   
                                        曼哈頓距離:CityBlockSimilarity
                                       對數似然距離: LogLikelihoodSimilarity              
         */
        UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
        // 相鄰用戶UserNeighborhood
        /**
         * NearestNUserNeighborhood 
                 指定距離最近的N個用戶作爲鄰居。
                示例:UserNeighborhood unb = new NearestNUserNeighborhood(10, us, dm);
                三個參數分別是: 鄰居的個數,用戶相似度,數據模型 
                鄰居個數的大小直接決定了推薦結果的近似程度和計算的複雜度   
           ThresholdUserNeighborhood 
                指定距離最近的一定百分比的用戶作爲鄰居。
                示例:UserNeighborhood unb = new ThresholdUserNeighborhood(0.2, us, dm); 
                三個參數分別是: 閥值(取值範圍0到1之間),用戶相似度,數據模型
         */
        UserNeighborhood neighborhood = new NearestNUserNeighborhood(500, similarity, model);
        //根據數據模型、用戶相似度模型、以及鄰近值構建推薦引擎
        Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
        // 向用戶100推薦2個商品
        List<RecommendedItem> recommendations = recommender.recommend(100, 2);
        for (RecommendedItem recommendation : recommendations) {
            // 輸出推薦結果
            System.out.println(recommendation);
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章