Flink用戶畫像(二)

Flink用戶畫像

創建用戶畫像偏愛品牌標籤

創建一個商品品牌標籤類

@Data
public class Brand {
    private Long userId;
    private Long productId;
    private String brand;
    private Long nums = 0L;
    private String groupField;
    private Long timeInfo;
}

一個BrandMap實現MapFunction接口的轉換類

public class BrandMap implements MapFunction<String,Brand> {
    @Override
    public Brand map(String value) throws Exception {
        ScanOpertor scanOpertor = JSONObject.parseObject(value,ScanOpertor.class);
        Long userId = scanOpertor.getUserId();
        Long productId = scanOpertor.getProductId();
        String tablename = "product";
        String rowkey = productId + "";
        String famliyname = "info";
        String colum = "product_brand";
        //獲取歷史用戶偏好品牌
        String brandString = HbaseUtil.getdata(tablename, rowkey, famliyname, colum);
        Brand brand = new Brand();
        brand.setBrand(brandString);
        Long timeInfo = DateUntil.getCurrentHourStart(System.currentTimeMillis());
        String groupField = "brand==" + timeInfo + "==" + userId
                + "==" + brandString;
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setUserId(userId);
        brand.setProductId(productId);
        brand.setNums(1L);

        return brand;
    }
}

當我們新增一個商品

INSERT INTO `product` VALUES (1, 1, '大牛洗髮水', '洗髮水', 23.2000000000, 23, '2021-11-01 19:13:44', '2021-11-01 19:13:51', '內蒙古包頭市', '大牛', 10.0000000000, 'RF-666')

可以在HBase中查看到該商品的品牌

scan 'product',{COLUMNS=>'info:product_brand'}
ROW                   COLUMN+CELL                                               
 1                    column=info:product_brand, timestamp=1636809177766, value=
                      \xE5\xA4\xA7\xE7\x89\x9B

當用戶瀏覽該商品時就會留下瀏覽痕跡。此處是爲了存儲用戶每小時點擊過的品牌和點擊次數。

一個BrandReduce實現了ReduceFunction接口的統計類

public class BrandReduce implements ReduceFunction<Brand> {
    @Override
    public Brand reduce(Brand value1, Brand value2) throws Exception {
        Long numbers1 = value1.getNums();
        String groupField = value1.getGroupField();
        Long userId = value1.getUserId();
        String brandString = value1.getBrand();
        Long timeInfo = value1.getTimeInfo();

        Long numbers2 = value2.getNums();

        Brand brand = new Brand();
        brand.setUserId(userId);
        brand.setBrand(brandString);
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setNums(numbers1 + numbers2);
        return brand;
    }
}

一個BrandSink實現了SinkFunction的存儲類

public class BrandSink implements SinkFunction<Brand> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Brand value, Context context) throws Exception {
        if (value != null) {
            Long timeInfo = value.getTimeInfo();
            String brandString = value.getBrand();
            Long numbers = value.getNums();
            String tablename = "brand_info";
            Map<String, String> dataMap = new HashMap<>();
            dataMap.put("timeinfo",timeInfo + "");
            dataMap.put("brandlabel",brandString);
            dataMap.put("numbers",numbers + "");
            Set<String> fields = new HashSet<>();
            fields.add("timeinfo");
            fields.add("numbers");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

一個UserBrandSaveMap實現了MapFunction接口的轉換類

public class UserBrandSaveMap implements MapFunction<Brand,Brand> {
    @Override
    public Brand map(Brand value) throws Exception {
        Long userId = value.getUserId();
        String brandString = value.getBrand();
        Long timeInfo = value.getTimeInfo();
        String tablename = "user_info";
        String rowkey = userId + "";
        String famliyname = "info";
        String colum = "brandlist";
        //獲取用戶偏愛的品牌歷史數據
        String brandListString = HbaseUtil.getdata(tablename, rowkey, famliyname, colum);
        List<Map> temp = new ArrayList<>();
        List<Map<String,String>> result = new ArrayList<>();
        if (StringUtils.isNotBlank(brandListString)) {
            temp = JSONObject.parseArray(brandListString,Map.class);
        }
        for (Map map : temp) {
            String brandStr = map.get("key").toString();
            Long value1 = Long.parseLong(map.get("value").toString());
            //如果新的商品品牌與歷史商品品牌相同,偏好值+1
            if (brandString.equals(brandStr)) {
                value1++;
                map.put("value",value1 + "");
            }
            result.add(map);
        }
        //對用戶偏愛的品牌進行排序,取前5各品牌
        Collections.sort(result,(o1, o2) -> {
            Long value1 = Long.parseLong(o1.get("value"));
            Long value2 = Long.parseLong(o2.get("value"));
            return value2.compareTo(value1);
        });
        if (result.size() > 5) {
            result = result.subList(0,5);
        }
        String data = JSONObject.toJSONString(result);
        HbaseUtil.putdata(tablename,rowkey,famliyname,colum,data);
        Brand brand = new Brand();
        String groupField = "brandBy==" + timeInfo + "==" + brandString;
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setBrand(brandString);
        brand.setNums(1L);

        return brand;
    }
}

此處是爲了存儲用戶最爲偏愛前5名的品牌的排名,用戶每點擊一次該品牌,就會使用戶對該品牌的偏愛度+1,並重新排序存儲。

一個UserBrandReduce實現了ReduceFunction接口的統計類

public class UserBrandReduce implements ReduceFunction<Brand> {
    @Override
    public Brand reduce(Brand value1, Brand value2) throws Exception {
        Long numbers1 = value1.getNums();
        String groupField = value1.getGroupField();
        String brandString = value1.getBrand();
        Long timeInfo = value1.getTimeInfo();

        Long numbers2 = value2.getNums();

        Brand brand = new Brand();
        brand.setBrand(brandString);
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setNums(numbers1 + numbers2);
        return brand;
    }
}

一個UserBrandSink實現了SinkFunction接口的存儲類

public class UserBrandSink implements SinkFunction<Brand> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Brand value, Context context) throws Exception {
        if (value != null) {
            Long timeInfo = value.getTimeInfo();
            String brandString = value.getBrand();
            Long numbers = value.getNums();
            String tablename = "user_brand_info";
            Map<String, String> dataMap = new HashMap<>();
            dataMap.put("timeinfo",timeInfo + "");
            dataMap.put("userbrandlabel",brandString);
            dataMap.put("numbers",numbers + "");
            Set<String> fields = new HashSet<>();
            fields.add("timeinfo");
            fields.add("numbers");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

然後是Flink的流處理

public class BrandAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("scan",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Brand> map = data.map(new BrandMap());
        DataStream<Brand> reduce = map.keyBy(Brand::getGroupField)
                .timeWindowAll(Time.hours(1))
                .reduce(new BrandReduce());
        reduce.addSink(new BrandSink());

        DataStream<Brand> userMap = reduce.map(new UserBrandSaveMap());
        DataStream<Brand> userReduce = userMap.keyBy(Brand::getGroupField).timeWindowAll(Time.hours(1))
                .reduce(new UserBrandReduce());
        userReduce.addSink(new UserBrandSink());

        env.execute("portrait brand type");
    }
}

推薦部分

熱門商品統計

所謂熱門商品就是用戶購買數量最多的商品。

創建一個歷史熱門商品類

@Data
public class HistoryHotProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String groupField;
}

一個HistoryHotProductMap實現了FlatMapFunction接口的轉換類

public class HistoryHotProductMap implements FlatMapFunction<String,HistoryHotProduct> {
    @Override
    public void flatMap(String value, Collector<HistoryHotProduct> out) throws Exception {
        Order order = JSONObject.parseObject(value,Order.class);
        Integer payStatus = order.getPayStatus();
        //如果訂單不是未支付狀態
        if (payStatus > 0) {
            HistoryHotProduct historyHotProduct = new HistoryHotProduct();
            historyHotProduct.setProductId(order.getProductId());
            historyHotProduct.setProductTypeId(order.getProductTypeId());
            historyHotProduct.setNumbers(1L);
            String groupFiled = "HistoryHotProduct==" + order.getProductId() + "=="
                    + order.getProductTypeId();
            historyHotProduct.setGroupField(groupFiled);
            out.collect(historyHotProduct);
        }
    }
}

一個HistoryHotProductReduce實現了ReduceFunction接口的統計類

public class HistoryHotProductReduce implements ReduceFunction<HistoryHotProduct> {
    @Override
    public HistoryHotProduct reduce(HistoryHotProduct value1, HistoryHotProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一個HistoryHotProductSink實現了SinkFunction接口的存儲類

public class HistoryHotProductSink implements SinkFunction<HistoryHotProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(HistoryHotProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            String tablename = "history_hot_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from history_hot_product " +
                    "where product_id=" + productId);
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class HistoryHotProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("order",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<HistoryHotProduct> map = data.flatMap(new HistoryHotProductMap());
        DataStream<HistoryHotProduct> reduce = map.keyBy(HistoryHotProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new HistoryHotProductReduce());
        reduce.addSink(new HistoryHotProductSink());
        
        env.execute("history hot product");
    }
}

 歷史評分統計

在數據庫中新建評價表

DROP TABLE IF EXISTS `evaluate`;
CREATE TABLE `evaluate` (
  `id` bigint(20) NOT NULL,
  `user_id` bigint(20) DEFAULT NULL,
  `order_id` bigint(20) DEFAULT NULL,
  `product_id` bigint(20) DEFAULT NULL,
  `product_type_id` bigint(20) DEFAULT NULL,
  `evaluate_time` datetime DEFAULT NULL,
  `score` int(255) DEFAULT NULL,
  `content` varchar(500) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

對應實體類

@Data
public class Evaluate {
    private Long id;
    private Long userId;
    private Long orderId;
    private Long productId;
    private Long productTypeId;
    private Date evaluateTime;
    private Integer score; //1-5 1-2差評 3-4中評 5好評
    private String content; //評價內容
}

在HBase中執行

create 'evaluate','info'

在Kafka的bin目錄下執行

./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic evaluate

一個評分標籤實體類

@Data
public class Score {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String groupField;
}

一個ScoreMap實現了FlatMapFunction接口的轉換類

public class ScoreMap implements FlatMapFunction<String,Score> {
    @Override
    public void flatMap(String value, Collector<Score> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
        Integer score = evaluate.getScore();
        if (score > 2) {
            Score scoreResult = new Score();
            Long productId = evaluate.getProductId();
            Long productTypeId = evaluate.getProductTypeId();
            scoreResult.setProductId(productId);
            scoreResult.setProductTypeId(productTypeId);
            scoreResult.setNumbers(1L);
            String groupField = "score==" + productId + "==" + productTypeId;
            scoreResult.setGroupField(groupField);
            out.collect(scoreResult);
        }
    }
}

一個ScoreReduce實現了ReduceFunction的統計類

public class ScoreReduce implements ReduceFunction<Score> {
    @Override
    public Score reduce(Score value1, Score value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一個ScoreSink實現了SinkFunction接口的存儲類

public class ScoreSink implements SinkFunction<Score> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Score value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            String tablename = "score";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from score " +
                    "where product_id=" + productId);
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class ScoreAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Score> map = data.flatMap(new ScoreMap());
        DataStream<Score> reduct = map.keyBy(Score::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new ScoreReduce());
        reduct.addSink(new ScoreSink());
        
        env.execute("portrait score");
    }
}

近期熱門商品統計

近期熱門指的的最近10天的熱門商品

創建一個近期熱門商品標籤實體類

@Data
public class RecentHotProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

DateUntil增加一個靜態方法

public static String transferDate(Date date,String dateFormatStr) {
    DateFormat dateFormat = new SimpleDateFormat(dateFormatStr);
    return dateFormat.format(date);
}

一個RecentHotProductMap實現了FlatMapFunction接口的轉換類

public class RecentHotProductMap implements FlatMapFunction<String,RecentHotProduct> {
    @Override
    public void flatMap(String value, Collector<RecentHotProduct> out) throws Exception {
        Order order = JSONObject.parseObject(value,Order.class);
        Integer payStatus = order.getPayStatus();
        if (payStatus > 0) {
            RecentHotProduct recentHotProduct = new RecentHotProduct();
            Date date = order.getCreateTime();
            Long productId = order.getProductId();
            Long productTypeId = order.getProductTypeId();
            String dateString = DateUntil.transferDate(date,"yyyyMMdd");
            String groupField = "RecentHotProduct==" + productId + "=="
                    + productTypeId + "==" + dateString;
            recentHotProduct.setDateTime(dateString);
            recentHotProduct.setGroupField(groupField);
            recentHotProduct.setProductId(productId);
            recentHotProduct.setProductTypeId(productTypeId);
            recentHotProduct.setNumbers(1L);
            out.collect(recentHotProduct);
        }
    }
}

一個RecentHotProductReduce實現了ReduceFunction接口的統計類

public class RecentHotProductReduce implements ReduceFunction<RecentHotProduct> {
    @Override
    public RecentHotProduct reduce(RecentHotProduct value1, RecentHotProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一個RecentHotProductSink實現了SinkFunction接口的存儲類

public class RecentHotProductSink implements SinkFunction<RecentHotProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(RecentHotProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "recent_hot_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from recent_hot_product " +
                    "where product_id=" + productId + " and date_time='" + dateTime +"'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class RecentHotProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("order",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<RecentHotProduct> map = data.flatMap(new RecentHotProductMap());
        DataStream<RecentHotProduct> reduce = map.keyBy(RecentHotProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new RecentHotProductReduce());
        reduce.addSink(new RecentHotProductSink());

        env.execute("recent hot product");
    }
}

優質商品統計

創建一個優質商品標籤實體類

@Data
public class HighQualityProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private Integer scoreTotal;
    private String dateTime;
    private String groupField;
}

一個HighQualityProductMap實現了FlatMapFunction接口的轉換類

public class HighQualityProductMap implements FlatMapFunction<String,HighQualityProduct> {
    @Override
    public void flatMap(String value, Collector<HighQualityProduct> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value, Evaluate.class);
        Integer score = evaluate.getScore();
        Date date = evaluate.getEvaluateTime();
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        HighQualityProduct highQualityProduct = new HighQualityProduct();
        Long productId = evaluate.getProductId();
        Long productTypeId = evaluate.getProductTypeId();
        highQualityProduct.setProductId(productId);
        highQualityProduct.setProductTypeId(productTypeId);
        highQualityProduct.setNumbers(1L);
        highQualityProduct.setScoreTotal(score);
        highQualityProduct.setDateTime(dateTime);
        String groupField = "HighQualityProduct==" + productId + "==" + productTypeId
                + "==" + dateTime;
        highQualityProduct.setGroupField(groupField);
        out.collect(highQualityProduct);
    }
}

一個HighQualityProductReduce實現了ReduceFunction接口的統計類

public class HighQualityProductReduce implements ReduceFunction<HighQualityProduct> {
    @Override
    public HighQualityProduct reduce(HighQualityProduct value1, HighQualityProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        Integer score1 = value1.getScoreTotal();
        Integer score2 = value2.getScoreTotal();
        value1.setNumbers(numbers1 + numbers2);
        value1.setScoreTotal(score1 + score2);
        return value1;
    }
}

一個HighQualityProductSink實現了SinkFunction接口的存儲類

public class HighQualityProductSink implements SinkFunction<HighQualityProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(HighQualityProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            Integer scoreTotal = value.getScoreTotal();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("scoreTotal",scoreTotal + "");
            data.put("dateTime",dateTime);
            String tablename = "high_quality_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            fields.add("scoreTotal");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from high_quality_product " +
                    "where product_id=" + productId + " and date_time='" + dateTime +"'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                Integer scoreTotalBefor = queryResult.getInt(4);
                numbers += numbersBefor;
                scoreTotal += scoreTotalBefor;
                data.put("numbers", numbers + "");
                data.put("scoreTotal",scoreTotal + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class HighQualityProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<HighQualityProduct> map = data.flatMap(new HighQualityProductMap());
        DataStream<HighQualityProduct> reduce = map.keyBy(HighQualityProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new HighQualityProductReduce());
        reduce.addSink(new HighQualityProductSink());

        env.execute("high quality product");
    }
}

瀏覽次數統計

創建一個瀏覽次數標籤實體類

@Data
public class ScanTimes {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

DateUnitl增加一個靜態方法

public static Date getCurrentTime(Long visitTime) {
    return new Date(visitTime);
}

一個ScanTimesMap實現了FlatMapFunction接口的轉換類

public class ScanTimesMap implements FlatMapFunction<String,ScanTimes> {
    @Override
    public void flatMap(String value, Collector<ScanTimes> out) throws Exception {
        ScanOpertor scanOpertor = JSONObject.parseObject(value,ScanOpertor.class);
        ScanTimes scanTimes = new ScanTimes();
        Long productId = scanOpertor.getProductId();
        Long productTypeId = scanOpertor.getProductTypeId();
        Long time = scanOpertor.getScanTime();
        Date date = DateUntil.getCurrentTime(time);
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        scanTimes.setProductId(productId);
        scanTimes.setProductTypeId(productTypeId);
        scanTimes.setDateTime(dateTime);
        String groupField = "ScanTimes==" + productId + "==" + productTypeId
                + "==" + dateTime;
        scanTimes.setGroupField(groupField);
        scanTimes.setNumbers(1L);
        out.collect(scanTimes);
    }
}

一個ScanTimesReduce實現了ReduceFunction接口的統計類

public class ScanTimesReduce implements ReduceFunction<ScanTimes> {
    @Override
    public ScanTimes reduce(ScanTimes value1, ScanTimes value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一個ScanTimesSink實現了SinkFunction接口的存儲類

public class ScanTimesSink implements SinkFunction<ScanTimes> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(ScanTimes value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "scan_times";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from scan_times " +
                    "where product_id=" + productId + " and dateTime='" + dateTime + "'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class ScanTimesAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("scan",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<ScanTimes> map = data.flatMap(new ScanTimesMap());
        DataStream<ScanTimes> reduct = map.keyBy(ScanTimes::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new ScanTimesReduce());
        reduct.addSink(new ScanTimesSink());

        env.execute("scan times");
    }
}

收藏次數統計

創建一個收藏標籤實體類

@Data
public class Collection {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

一個CollectionTimesMap實現了FlatMapFunction接口的轉換類

public class CollectionTimesMap implements FlatMapFunction<String,Collection> {
    @Override
    public void flatMap(String value, Collector<Collection> out) throws Exception {
        CollectOpertor collectOpertor = JSONObject.parseObject(value,CollectOpertor.class);
        Long productId = collectOpertor.getProductId();
        Long productTypeId = collectOpertor.getProductTypeId();
        Long time = collectOpertor.getOpertorTime();
        Date date = DateUntil.getCurrentTime(time);
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        Collection collection = new Collection();
        collection.setProductId(productId);
        collection.setProductTypeId(productTypeId);
        collection.setDateTime(dateTime);
        String groupField = "CollectionTimes==" + productId + "==" + productTypeId
                + "==" + dateTime;
        collection.setGroupField(groupField);
        collection.setNumbers(1L);
        out.collect(collection);
    }
}

一個CollectionTimesReduce實現了ReduceFunction接口的統計類

public class CollectionTimesReduce implements ReduceFunction<Collection> {
    @Override
    public Collection reduce(Collection value1, Collection value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一個CollectionTimesSink實現了SinkFunction接口的存儲類

public class CollectionTimesSink implements SinkFunction<Collection> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Collection value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "collection_times";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from collection_times " +
                    "where product_id=" + productId + " and dateTime='" + dateTime + "'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然後是Flink的流處理

public class CollectionTimesAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("collection",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Collection> map = data.flatMap(new CollectionTimesMap());
        DataStream<Collection> reduct = map.keyBy(Collection::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new CollectionTimesReduce());
        reduct.addSink(new CollectionTimesSink());

        env.execute("collection times");
    }
}

 基於商品內容的embedding相似度推薦

由於電商系統一說,商品的數量一般會遠遠小於用戶數量,我們需要開發一個自動整理全部商品的任務調度功能,定時增量的將所有商品的部分內容存儲到一個csv文件中,我們假定該文件的格式如下

product_id,product_type,product_title,product_name,product_desc
1====牛奶====風情牛奶====蒙牛牛奶====美麗的大草原上盛產風情牛奶
2====冰箱====酷炫冰箱====海兒冰箱====新一代年輕人喜愛的智能現代化冰箱
3====手機====智能手機====華爲手機====智能拍照手機,你值得擁有
4====汽水====元氣汽水====森林汽水====無糖茶葉氣泡水

在ClickHouse中創建一個表

create table product_similar(productId UInt32,productIdSimilar UInt32,similarValue Float32,create_date date)ENGINE=MergeTree(create_date,(productId),8192); 

CREATE TABLE product_similar
(
    `productId` UInt32,
    `productIdSimilar` UInt32,
    `similarValue` Float32,
    `create_date` date
)
ENGINE = MergeTree(create_date, productId, 8192)

Query id: d9bd5af1-d2a7-4623-93dc-df8559990c1e

Ok.

0 rows in set. Elapsed: 0.118 sec. 

對所有商品進行兩兩相似度計算

/**
 * 產品相似度計算
 */
public class ProductSimilarAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String path = "/Users/admin/Documents/商品表.csv";
        String schema = "productId long,productType string,title string," +
                "name string,desc string";
        CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(path)
                .setSchemaStr(schema)
                .setFieldDelimiter("====");
        //分詞處理
        BatchOperator<?> segment = new SegmentBatchOp()
                .setSelectedCol("desc")
                .setOutputCol("segment")
                .linkFrom(csvSourceBatchOp);
        //停用詞處理
        BatchOperator<?> stopWord = new StopWordsRemoverBatchOp()
                .setSelectedCol("segment")
                .setOutputCol("remove")
                .linkFrom(segment);
        //關鍵詞處理
        BatchOperator<?> keyWord = new KeywordsExtractionBatchOp()
                .setSelectedCol("remove")
                .setOutputCol("keywords")
                .setTopN(10)
                .linkFrom(stopWord);
        List<Row> list = keyWord.getDataSet().collect();
        for (Row rowOut : list) {
            List<Row> dataRow = new ArrayList<>();
            for (Row rowInner : list) {
                Long productIdOut = (Long) rowOut.getField("productId");
                Long productIdInner = (Long) rowInner.getField("productId");
                if (productIdOut == productIdInner) {
                    continue;
                }
                String keywordsOut = (String) rowOut.getField("keywords");
                String keywordsInner = (String) rowInner.getField("keywords");
                //提取任意兩個不同產品的關鍵詞
                dataRow.add(Row.of(productIdOut,productIdInner,
                        keywordsOut,keywordsInner));
            }
            //對任意兩個產品的關鍵詞進行文本相似度計算
            BatchOperator<?> inOp = new MemSourceBatchOp(dataRow,"productId long," +
                    "productIdSimilar long,keywords1 string,keywords2 string");
            BatchOperator similar = new TextSimilarityPairwiseBatchOp()
                    .setSelectedCols("keywords1","keywords2")
                    .setMetric("COSINE")
                    .setOutputCol("similarValue")
                    .linkFrom(inOp);
            List<Row> dataRowSimilar = similar.getDataSet().collect();
            //對相似度進行排序,相似度越大的排在最前面
            Collections.sort(dataRowSimilar,(o1,o2) -> {
                Double similarValue2 = (Double) o2.getField("similarValue");
                Double similarValue1 = (Double) o1.getField("similarValue");
                return similarValue2.compareTo(similarValue1);
            });
            //截取相似度排在前10位的產品
            if (dataRowSimilar.size() > 10) {
                dataRowSimilar = dataRowSimilar.subList(0, 10);
            }
            for (Row row : dataRowSimilar) {
                String tablename = "product_similar";
                Map<String,String> dataMap = new HashMap<>();
                dataMap.put("productId",row.getField("productId").toString());
                dataMap.put("productIdSimilar",row.getField("productIdSimilar").toString());
                dataMap.put("similarValue",row.getField("similarValue").toString());
                Set<String> fields = new HashSet<>();
                fields.add("productId");
                fields.add("productIdSimilar");
                fields.add("similarValue");
                clickUntil.saveData(tablename,dataMap,fields);
            }
        }
    }
}

運行結果

select * from product_similar;

SELECT *
FROM product_similar

Query id: 8ef7bc91-d972-488e-a56f-0bb5e2dedca8

┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                2 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘
┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                4 │            0 │  1970-01-01 │
│         2 │                4 │            0 │  1970-01-01 │
│         2 │                3 │            0 │  1970-01-01 │
│         2 │                1 │            0 │  1970-01-01 │
│         3 │                4 │            0 │  1970-01-01 │
│         3 │                2 │            0 │  1970-01-01 │
│         3 │                1 │            0 │  1970-01-01 │
│         4 │                2 │            0 │  1970-01-01 │
│         4 │                3 │            0 │  1970-01-01 │
│         4 │                1 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘
┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                3 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘

12 rows in set. Elapsed: 0.056 sec. 

雖然這裏的幾個商品彼此之間的關鍵詞的相似度都爲0,不過這個不重要,只要構建好足夠相似的商品,它們的相似度就會顯現出來。

 離線商品評分推薦

首先我們需要將用戶對商品的真實評分記錄整理到這樣一個文件中作爲訓練數據集,格式如下

user_id,product_id,score
1,1,5
1,5,4
2,1,3
2,5,4
3,1,2
3,2,5
4,3,3
4,2,5
5,3,4

然後我們還需要一份用戶沒有對商品做出過評分的測試數據文件

user_id,product_id
1,2
1,3
2,2
2,3
3,3
3,5
4,1
4,5
5,1

在ClickHouse中創建一個表

create table score_recommend(userId UInt32,productId UInt32,predictScore Float32,create_date date)ENGINE=MergeTree(create_date,(userId),8192); 

CREATE TABLE score_recommend
(
    `userId` UInt32,
    `productId` UInt32,
    `predictScore` Float32,
    `create_date` date
)
ENGINE = MergeTree(create_date, userId, 8192)

Query id: 64c7f961-f134-454a-9de6-f9108271a41c

Connecting to database test at localhost:9000 as user default.
Connected to ClickHouse server version 21.11.2 revision 54450.

Ok.

0 rows in set. Elapsed: 0.086 sec. 

對訓練數據集進行訓練,並對測試數據集進行評分預測

public class ALSOutLineAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String pathTrain = "/Users/admin/Documents/評分表.csv";
        String pathTest = "/Users/admin/Documents/評分測試表.csv";
        String schemaTrain = "userId long,productId long,score double";
        String schemaTest = "userId long,productId long";
        CsvSourceBatchOp csvSourceBatchOpTrain = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathTrain)
                .setSchemaStr(schemaTrain)
                .setFieldDelimiter(",");
        CsvSourceBatchOp csvSourceBatchOpTest = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathTest)
                .setSchemaStr(schemaTest)
                .setFieldDelimiter(",");
        //對訓練數據集進行訓練
        BatchOperator<?> model = new AlsTrainBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRateCol("score")
                .setNumIter(10)
                .setRank(10)
                .setLambda(0.01)
                .linkFrom(csvSourceBatchOpTrain);
        //對測試數據集進行預測
        BatchOperator<?> result = new AlsRateRecommBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRecommCol("predictScore")
                .linkFrom(model,csvSourceBatchOpTest);
        List<Row> list = result.getDataSet().collect();
        for (Row row : list) {
            Long userId = (Long) row.getField("userId");
            Long productId = (Long) row.getField("productId");
            Double predictScore = (Double) row.getField("predictScore");
            String tablename = "score_recommend";
            Map<String,String> dataMap = new HashMap<>();
            dataMap.put("userId",userId + "");
            dataMap.put("productId",productId + "");
            dataMap.put("predictScore",predictScore + "");
            Set<String> fields = new HashSet<>();
            fields.add("userId");
            fields.add("productId");
            fields.add("predictScore");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

運行結果

select * from score_recommend;

SELECT *
FROM score_recommend

Query id: 3dd93421-23e3-404c-bee9-381446589471

┌─userId─┬─productId─┬─predictScore─┬─create_date─┐
│      1 │         3 │     5.605932 │  1970-01-01 │
│      1 │         2 │    2.0335705 │  1970-01-01 │
│      2 │         3 │    3.9875667 │  1970-01-01 │
│      2 │         2 │    1.5447733 │  1970-01-01 │
│      3 │         5 │    2.3882878 │  1970-01-01 │
│      3 │         3 │    2.6757712 │  1970-01-01 │
│      4 │         5 │     2.639401 │  1970-01-01 │
│      4 │         1 │     2.265847 │  1970-01-01 │
│      5 │         1 │    3.0576394 │  1970-01-01 │
└────────┴───────────┴──────────────┴─────────────┘

9 rows in set. Elapsed: 0.014 sec. 

商品協同過濾相似度實時推薦

現在我們需要將上面的離線預測的評分數據整理成一個新的文件

user_id,product_id,result
1,2,2.0335705
1,3,5.605932
2,2,1.5447733
2,3,3.9875667
3,3,2.6757712
3,5,2.3882878
4,1,2.265847
4,5,2.639401
5,1,3.0576394

ClickHouse創建一個表

create table product_recommend(productId UInt32,productIdsRecommend String,scores String,create_date date)ENGINE=MergeTree(create_date,(productId),8192);

CREATE TABLE product_recommend
(
    `productId` UInt32,
    `productIdsRecommend` String,
    `scores` String,
    `create_date` date
)
ENGINE = MergeTree(create_date, productId, 8192)

Query id: 6e1cf305-5e4f-4f0b-9e0d-a7d9be84a231

Connecting to database test at localhost:9000 as user default.
Connected to ClickHouse server version 21.11.2 revision 54450.

Ok.

0 rows in set. Elapsed: 0.076 sec. 

基於商品協同過濾相似度進行推薦

public class ALSProductSimilarAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String pathResult = "/Users/admin/Documents/評分預測表.csv";
        String schema = "userId long,productId long,result double";
        CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathResult)
                .setSchemaStr(schema)
                .setFieldDelimiter(",");
        BatchOperator<?> model = new AlsTrainBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRateCol("result")
                .setNumIter(10)
                .setRank(10)
                .setLambda(0.01)
                .linkFrom(csvSourceBatchOp);
        //基於商品協同過濾相似度進行推薦
        BatchOperator<?> result = new AlsSimilarItemsRecommBatchOp()
                .setItemCol("productId")
                .setRecommCol("productRecommend")
                .setK(10)
                .setReservedCols("productId")
                .linkFrom(model,csvSourceBatchOp);
        List<Row> list = result.getDataSet().collect();
        for (Row row : list) {
            Long productId = (Long) row.getField("productId");
            JSONObject jsonObject = JSONObject.parseObject(row.getField("productRecommend").toString());
            JSONArray productIdsRecommend = jsonObject.getJSONArray("productId");
            JSONArray scores = jsonObject.getJSONArray("score");
            String tablename = "product_recommend";
            Map<String,String> dataMap = new HashMap<>();
            dataMap.put("productId",productId + "");
            dataMap.put("productIdsRecommend",productIdsRecommend.toJSONString());
            dataMap.put("scores",scores.toJSONString());
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

運行結果

select * from product_recommend;

SELECT *
FROM product_recommend

Query id: 9695ecdf-207d-4931-a7de-6ee76c603588

┌─productId─┬─productIdsRecommend─┬─scores──────────────────────────────────────────────────┬─create_date─┐
│         5 │ [3,1,2]             │ [4.11977117110399,3.152251384796194,1.4976801931900996] │  1970-01-01 │
└───────────┴─────────────────────┴─────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores─────────────────────────────────────────────────────┬─create_date─┐
│         2 │ [3,5,1]             │ [1.7338918392755318,1.4976801931900996,1.2717521355519668] │  1970-01-01 │
└───────────┴─────────────────────┴────────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores────────────────────────────────────────────────────┬─create_date─┐
│         1 │ [3,5,2]             │ [3.5011045336610236,3.152251384796194,1.2717521355519668] │  1970-01-01 │
│         1 │ [3,5,2]             │ [3.5011045336610236,3.152251384796194,1.2717521355519668] │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         5 │ [3,1,2]             │ [4.11977117110399,3.152251384796194,1.4976801931900996]   │  1970-01-01 │
└───────────┴─────────────────────┴───────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores─────────────────────────────────────────────────────┬─create_date─┐
│         2 │ [3,5,1]             │ [1.7338918392755318,1.4976801931900996,1.2717521355519668] │  1970-01-01 │
└───────────┴─────────────────────┴────────────────────────────────────────────────────────────┴─────────────┘

9 rows in set. Elapsed: 0.040 sec. 

用戶最近評分商品實時推薦

添加一個Redis依賴

<dependency>
   <groupId>redis.clients</groupId>
   <artifactId>jedis</artifactId>
   <version>2.9.0</version>
</dependency>

增加一個Redis工具類

public class RedisUntil {
    private static JedisPool jedisPool;

    static {
        JedisPoolConfig jedisConfig = new JedisPoolConfig();
        jedisConfig.setMaxIdle(8);
        jedisConfig.setMaxWaitMillis(-1);
        jedisPool = new JedisPool(jedisConfig,"127.0.0.1",6379,
                10000,"*****");
    }

    public static void saveEvaluate(Evaluate evaluate) {
        Jedis jedis = null;
        Long userId = evaluate.getUserId();
        Long time = evaluate.getEvaluateTime().getTime();
        String result = JSONObject.toJSONString(evaluate);
        try {
            jedis = jedisPool.getResource();
            jedis.zadd("evaluate" + userId,time,result);
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (jedis != null) {
                jedis.close();
            }
        }
    }

    public static List<Evaluate> getByUser(long userId,long numbers) {
        Jedis jedis = null;
        Set<Tuple> setByScore = null;
        try {
            jedis = jedisPool.getResource();
            setByScore = jedis.zrangeByScoreWithScores("evaluate" + userId,0,numbers);
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (jedis != null) {
                jedis.close();
            }
        }
        List<Evaluate> result = new ArrayList<>();
        for (Tuple tuple : setByScore) {
            String element = tuple.getElement();
            double score = tuple.getScore();
            System.out.println(score);
            Evaluate evaluate = JSONObject.parseObject(element,Evaluate.class);
            result.add(evaluate);
        }
        return result;
    }
}

修改ScoreMap,將每次獲取的評分對象放入Redis

public class ScoreMap implements FlatMapFunction<String,Score> {
    @Override
    public void flatMap(String value, Collector<Score> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
        Integer score = evaluate.getScore();
        RedisUntil.saveEvaluate(evaluate);
        if (score > 2) {
            Score scoreResult = new Score();
            Long productId = evaluate.getProductId();
            Long productTypeId = evaluate.getProductTypeId();
            scoreResult.setProductId(productId);
            scoreResult.setProductTypeId(productTypeId);
            scoreResult.setNumbers(1L);
            String groupField = "score==" + productId + "==" + productTypeId;
            scoreResult.setGroupField(groupField);
            out.collect(scoreResult);
        }
    }
}

創建一個MapUntil工具類

public class MapUntil {
    public static Map<Long,Double> top(Map<Long,Double> data,Integer numbers) {
        Set<Map.Entry<Long,Double>> set = data.entrySet();
        List<Map.Entry<Long,Double>> list = new ArrayList<>(set);
        Collections.sort(list, (o1,o2) -> {
            Double o22 = o2.getValue();
            Double o11 = o1.getValue();
            return o22.compareTo(o11);
        });
        if (list.size() > numbers) {
            list = list.subList(0,numbers);
        }
        Map<Long,Double> result = new HashMap<>();
        for (Map.Entry<Long,Double> entry : list) {
            result.put(entry.getKey(),entry.getValue());
        }
        return result;
    }
}

然後就是Flink流的實時推薦

/**
 * 實時推薦
 */
public class RealTimeRecommendationAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    private static Double log(Long number) {
        return Math.log(number) / Math.log(10);
    }

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        myConsumer.setStartFromLatest();
        DataStreamSource<String> dataSource = env.addSource(myConsumer);
        env.enableCheckpointing(5000);

        dataSource.flatMap(new FlatMapFunction<String, Object>() {
            @Override
            public void flatMap(String value, Collector<Object> out) throws Exception {
                Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
                Long userId = evaluate.getUserId();
                Long productId = evaluate.getProductId();
                //獲取用戶評價的該商品相似的所有推薦商品
                ResultSet queryResult = clickUntil.getQueryResult("test", "select productIdsRecommend,scores " +
                        "from product_recommend where productId=" + productId);
                if (queryResult.next()) {
                    String productIdsRecommend = queryResult.getString("productIdsRecommend");
                    String scores = queryResult.getString("scores");
                    //獲取所有推薦商品的id
                    List<Long> recommendProductIds = JSONObject.parseArray(productIdsRecommend,Long.class);
                    //獲取所有推薦商品的推薦度
                    List<Double> recommendScores = JSONObject.parseArray(scores,Double.class);
                    //獲取用戶的所有評價
                    List<Evaluate> currentEvaluates = RedisUntil.getByUser(userId, Long.MAX_VALUE);
                    Map<Long,Double> finalMap = new HashMap<>();
                    //遍歷所有的推薦商品
                    for (Long prodId : recommendProductIds) {
                        //獲取所推薦商品的推薦度
                        Double score = recommendScores.get(recommendProductIds.indexOf(prodId));
                        Long numbers = 0L;
                        Double totalSimilar = 0.0;
                        Long addNumber = 0L;
                        Long descrNumber = 0L;
                        //遍歷用戶的所有評價
                        for (Evaluate eva : currentEvaluates) {
                            //獲取用戶每一條評價的商品id
                            Long evaluateProductId = eva.getProductId();
                            //獲取用戶每一條評價的分數
                            Integer evaluateScore = eva.getScore();
                            //如果該推薦商品該用戶也評價過
                            //且推薦商品的推薦度大於等於0.55
                            //獲取推薦度與評價分數的乘積並累加
                            if (prodId == evaluateProductId) {
                                if (score >= 0.55) {
                                    Double result = evaluateScore * score;
                                    numbers++;
                                    totalSimilar += result;
                                }
                                //如果評價分數大於3則增強
                                if (evaluateScore > 3) {
                                    addNumber++;
                                }
                                //如果評價分數小於2則減弱
                                if (evaluateScore < 2) {
                                    descrNumber++;
                                }
                            }
                        }
                        if (numbers > 0L) {
                            Double productTotalSimilar = totalSimilar / numbers
                                    + log(addNumber) - log(descrNumber);
                            finalMap.put(prodId,productTotalSimilar);
                            finalMap = MapUntil.top(finalMap,5);
                            Set<Long> productIdSimilarList = finalMap.keySet();

                            String tablename = "user_recommend";
                            Map<String,String> dataMap = new HashMap<>();
                            dataMap.put("userId",userId + "");
                            dataMap.put("productId",productId + "");
                            dataMap.put("productIdSimilarList",JSONObject.toJSONString(productIdSimilarList));
                            Set<String> fields = new HashSet<>();
                            fields.add("userId");
                            fields.add("productId");
                            clickUntil.saveData(tablename,dataMap,fields);
                        }
                    }
                }
            }
        });
        env.execute("realTime recommendation");
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章