Flink用户画像(二)

Flink用户画像

创建用户画像偏爱品牌标签

创建一个商品品牌标签类

@Data
public class Brand {
    private Long userId;
    private Long productId;
    private String brand;
    private Long nums = 0L;
    private String groupField;
    private Long timeInfo;
}

一个BrandMap实现MapFunction接口的转换类

public class BrandMap implements MapFunction<String,Brand> {
    @Override
    public Brand map(String value) throws Exception {
        ScanOpertor scanOpertor = JSONObject.parseObject(value,ScanOpertor.class);
        Long userId = scanOpertor.getUserId();
        Long productId = scanOpertor.getProductId();
        String tablename = "product";
        String rowkey = productId + "";
        String famliyname = "info";
        String colum = "product_brand";
        //获取历史用户偏好品牌
        String brandString = HbaseUtil.getdata(tablename, rowkey, famliyname, colum);
        Brand brand = new Brand();
        brand.setBrand(brandString);
        Long timeInfo = DateUntil.getCurrentHourStart(System.currentTimeMillis());
        String groupField = "brand==" + timeInfo + "==" + userId
                + "==" + brandString;
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setUserId(userId);
        brand.setProductId(productId);
        brand.setNums(1L);

        return brand;
    }
}

当我们新增一个商品

INSERT INTO `product` VALUES (1, 1, '大牛洗发水', '洗发水', 23.2000000000, 23, '2021-11-01 19:13:44', '2021-11-01 19:13:51', '内蒙古包头市', '大牛', 10.0000000000, 'RF-666')

可以在HBase中查看到该商品的品牌

scan 'product',{COLUMNS=>'info:product_brand'}
ROW                   COLUMN+CELL                                               
 1                    column=info:product_brand, timestamp=1636809177766, value=
                      \xE5\xA4\xA7\xE7\x89\x9B

当用户浏览该商品时就会留下浏览痕迹。此处是为了存储用户每小时点击过的品牌和点击次数。

一个BrandReduce实现了ReduceFunction接口的统计类

public class BrandReduce implements ReduceFunction<Brand> {
    @Override
    public Brand reduce(Brand value1, Brand value2) throws Exception {
        Long numbers1 = value1.getNums();
        String groupField = value1.getGroupField();
        Long userId = value1.getUserId();
        String brandString = value1.getBrand();
        Long timeInfo = value1.getTimeInfo();

        Long numbers2 = value2.getNums();

        Brand brand = new Brand();
        brand.setUserId(userId);
        brand.setBrand(brandString);
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setNums(numbers1 + numbers2);
        return brand;
    }
}

一个BrandSink实现了SinkFunction的存储类

public class BrandSink implements SinkFunction<Brand> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Brand value, Context context) throws Exception {
        if (value != null) {
            Long timeInfo = value.getTimeInfo();
            String brandString = value.getBrand();
            Long numbers = value.getNums();
            String tablename = "brand_info";
            Map<String, String> dataMap = new HashMap<>();
            dataMap.put("timeinfo",timeInfo + "");
            dataMap.put("brandlabel",brandString);
            dataMap.put("numbers",numbers + "");
            Set<String> fields = new HashSet<>();
            fields.add("timeinfo");
            fields.add("numbers");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

一个UserBrandSaveMap实现了MapFunction接口的转换类

public class UserBrandSaveMap implements MapFunction<Brand,Brand> {
    @Override
    public Brand map(Brand value) throws Exception {
        Long userId = value.getUserId();
        String brandString = value.getBrand();
        Long timeInfo = value.getTimeInfo();
        String tablename = "user_info";
        String rowkey = userId + "";
        String famliyname = "info";
        String colum = "brandlist";
        //获取用户偏爱的品牌历史数据
        String brandListString = HbaseUtil.getdata(tablename, rowkey, famliyname, colum);
        List<Map> temp = new ArrayList<>();
        List<Map<String,String>> result = new ArrayList<>();
        if (StringUtils.isNotBlank(brandListString)) {
            temp = JSONObject.parseArray(brandListString,Map.class);
        }
        for (Map map : temp) {
            String brandStr = map.get("key").toString();
            Long value1 = Long.parseLong(map.get("value").toString());
            //如果新的商品品牌与历史商品品牌相同,偏好值+1
            if (brandString.equals(brandStr)) {
                value1++;
                map.put("value",value1 + "");
            }
            result.add(map);
        }
        //对用户偏爱的品牌进行排序,取前5各品牌
        Collections.sort(result,(o1, o2) -> {
            Long value1 = Long.parseLong(o1.get("value"));
            Long value2 = Long.parseLong(o2.get("value"));
            return value2.compareTo(value1);
        });
        if (result.size() > 5) {
            result = result.subList(0,5);
        }
        String data = JSONObject.toJSONString(result);
        HbaseUtil.putdata(tablename,rowkey,famliyname,colum,data);
        Brand brand = new Brand();
        String groupField = "brandBy==" + timeInfo + "==" + brandString;
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setBrand(brandString);
        brand.setNums(1L);

        return brand;
    }
}

此处是为了存储用户最为偏爱前5名的品牌的排名,用户每点击一次该品牌,就会使用户对该品牌的偏爱度+1,并重新排序存储。

一个UserBrandReduce实现了ReduceFunction接口的统计类

public class UserBrandReduce implements ReduceFunction<Brand> {
    @Override
    public Brand reduce(Brand value1, Brand value2) throws Exception {
        Long numbers1 = value1.getNums();
        String groupField = value1.getGroupField();
        String brandString = value1.getBrand();
        Long timeInfo = value1.getTimeInfo();

        Long numbers2 = value2.getNums();

        Brand brand = new Brand();
        brand.setBrand(brandString);
        brand.setGroupField(groupField);
        brand.setTimeInfo(timeInfo);
        brand.setNums(numbers1 + numbers2);
        return brand;
    }
}

一个UserBrandSink实现了SinkFunction接口的存储类

public class UserBrandSink implements SinkFunction<Brand> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Brand value, Context context) throws Exception {
        if (value != null) {
            Long timeInfo = value.getTimeInfo();
            String brandString = value.getBrand();
            Long numbers = value.getNums();
            String tablename = "user_brand_info";
            Map<String, String> dataMap = new HashMap<>();
            dataMap.put("timeinfo",timeInfo + "");
            dataMap.put("userbrandlabel",brandString);
            dataMap.put("numbers",numbers + "");
            Set<String> fields = new HashSet<>();
            fields.add("timeinfo");
            fields.add("numbers");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

然后是Flink的流处理

public class BrandAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("scan",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Brand> map = data.map(new BrandMap());
        DataStream<Brand> reduce = map.keyBy(Brand::getGroupField)
                .timeWindowAll(Time.hours(1))
                .reduce(new BrandReduce());
        reduce.addSink(new BrandSink());

        DataStream<Brand> userMap = reduce.map(new UserBrandSaveMap());
        DataStream<Brand> userReduce = userMap.keyBy(Brand::getGroupField).timeWindowAll(Time.hours(1))
                .reduce(new UserBrandReduce());
        userReduce.addSink(new UserBrandSink());

        env.execute("portrait brand type");
    }
}

推荐部分

热门商品统计

所谓热门商品就是用户购买数量最多的商品。

创建一个历史热门商品类

@Data
public class HistoryHotProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String groupField;
}

一个HistoryHotProductMap实现了FlatMapFunction接口的转换类

public class HistoryHotProductMap implements FlatMapFunction<String,HistoryHotProduct> {
    @Override
    public void flatMap(String value, Collector<HistoryHotProduct> out) throws Exception {
        Order order = JSONObject.parseObject(value,Order.class);
        Integer payStatus = order.getPayStatus();
        //如果订单不是未支付状态
        if (payStatus > 0) {
            HistoryHotProduct historyHotProduct = new HistoryHotProduct();
            historyHotProduct.setProductId(order.getProductId());
            historyHotProduct.setProductTypeId(order.getProductTypeId());
            historyHotProduct.setNumbers(1L);
            String groupFiled = "HistoryHotProduct==" + order.getProductId() + "=="
                    + order.getProductTypeId();
            historyHotProduct.setGroupField(groupFiled);
            out.collect(historyHotProduct);
        }
    }
}

一个HistoryHotProductReduce实现了ReduceFunction接口的统计类

public class HistoryHotProductReduce implements ReduceFunction<HistoryHotProduct> {
    @Override
    public HistoryHotProduct reduce(HistoryHotProduct value1, HistoryHotProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一个HistoryHotProductSink实现了SinkFunction接口的存储类

public class HistoryHotProductSink implements SinkFunction<HistoryHotProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(HistoryHotProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            String tablename = "history_hot_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from history_hot_product " +
                    "where product_id=" + productId);
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class HistoryHotProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("order",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<HistoryHotProduct> map = data.flatMap(new HistoryHotProductMap());
        DataStream<HistoryHotProduct> reduce = map.keyBy(HistoryHotProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new HistoryHotProductReduce());
        reduce.addSink(new HistoryHotProductSink());
        
        env.execute("history hot product");
    }
}

 历史评分统计

在数据库中新建评价表

DROP TABLE IF EXISTS `evaluate`;
CREATE TABLE `evaluate` (
  `id` bigint(20) NOT NULL,
  `user_id` bigint(20) DEFAULT NULL,
  `order_id` bigint(20) DEFAULT NULL,
  `product_id` bigint(20) DEFAULT NULL,
  `product_type_id` bigint(20) DEFAULT NULL,
  `evaluate_time` datetime DEFAULT NULL,
  `score` int(255) DEFAULT NULL,
  `content` varchar(500) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

对应实体类

@Data
public class Evaluate {
    private Long id;
    private Long userId;
    private Long orderId;
    private Long productId;
    private Long productTypeId;
    private Date evaluateTime;
    private Integer score; //1-5 1-2差评 3-4中评 5好评
    private String content; //评价内容
}

在HBase中执行

create 'evaluate','info'

在Kafka的bin目录下执行

./kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic evaluate

一个评分标签实体类

@Data
public class Score {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String groupField;
}

一个ScoreMap实现了FlatMapFunction接口的转换类

public class ScoreMap implements FlatMapFunction<String,Score> {
    @Override
    public void flatMap(String value, Collector<Score> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
        Integer score = evaluate.getScore();
        if (score > 2) {
            Score scoreResult = new Score();
            Long productId = evaluate.getProductId();
            Long productTypeId = evaluate.getProductTypeId();
            scoreResult.setProductId(productId);
            scoreResult.setProductTypeId(productTypeId);
            scoreResult.setNumbers(1L);
            String groupField = "score==" + productId + "==" + productTypeId;
            scoreResult.setGroupField(groupField);
            out.collect(scoreResult);
        }
    }
}

一个ScoreReduce实现了ReduceFunction的统计类

public class ScoreReduce implements ReduceFunction<Score> {
    @Override
    public Score reduce(Score value1, Score value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一个ScoreSink实现了SinkFunction接口的存储类

public class ScoreSink implements SinkFunction<Score> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Score value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            String tablename = "score";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from score " +
                    "where product_id=" + productId);
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class ScoreAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Score> map = data.flatMap(new ScoreMap());
        DataStream<Score> reduct = map.keyBy(Score::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new ScoreReduce());
        reduct.addSink(new ScoreSink());
        
        env.execute("portrait score");
    }
}

近期热门商品统计

近期热门指的的最近10天的热门商品

创建一个近期热门商品标签实体类

@Data
public class RecentHotProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

DateUntil增加一个静态方法

public static String transferDate(Date date,String dateFormatStr) {
    DateFormat dateFormat = new SimpleDateFormat(dateFormatStr);
    return dateFormat.format(date);
}

一个RecentHotProductMap实现了FlatMapFunction接口的转换类

public class RecentHotProductMap implements FlatMapFunction<String,RecentHotProduct> {
    @Override
    public void flatMap(String value, Collector<RecentHotProduct> out) throws Exception {
        Order order = JSONObject.parseObject(value,Order.class);
        Integer payStatus = order.getPayStatus();
        if (payStatus > 0) {
            RecentHotProduct recentHotProduct = new RecentHotProduct();
            Date date = order.getCreateTime();
            Long productId = order.getProductId();
            Long productTypeId = order.getProductTypeId();
            String dateString = DateUntil.transferDate(date,"yyyyMMdd");
            String groupField = "RecentHotProduct==" + productId + "=="
                    + productTypeId + "==" + dateString;
            recentHotProduct.setDateTime(dateString);
            recentHotProduct.setGroupField(groupField);
            recentHotProduct.setProductId(productId);
            recentHotProduct.setProductTypeId(productTypeId);
            recentHotProduct.setNumbers(1L);
            out.collect(recentHotProduct);
        }
    }
}

一个RecentHotProductReduce实现了ReduceFunction接口的统计类

public class RecentHotProductReduce implements ReduceFunction<RecentHotProduct> {
    @Override
    public RecentHotProduct reduce(RecentHotProduct value1, RecentHotProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一个RecentHotProductSink实现了SinkFunction接口的存储类

public class RecentHotProductSink implements SinkFunction<RecentHotProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(RecentHotProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "recent_hot_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from recent_hot_product " +
                    "where product_id=" + productId + " and date_time='" + dateTime +"'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class RecentHotProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("order",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<RecentHotProduct> map = data.flatMap(new RecentHotProductMap());
        DataStream<RecentHotProduct> reduce = map.keyBy(RecentHotProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new RecentHotProductReduce());
        reduce.addSink(new RecentHotProductSink());

        env.execute("recent hot product");
    }
}

优质商品统计

创建一个优质商品标签实体类

@Data
public class HighQualityProduct {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private Integer scoreTotal;
    private String dateTime;
    private String groupField;
}

一个HighQualityProductMap实现了FlatMapFunction接口的转换类

public class HighQualityProductMap implements FlatMapFunction<String,HighQualityProduct> {
    @Override
    public void flatMap(String value, Collector<HighQualityProduct> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value, Evaluate.class);
        Integer score = evaluate.getScore();
        Date date = evaluate.getEvaluateTime();
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        HighQualityProduct highQualityProduct = new HighQualityProduct();
        Long productId = evaluate.getProductId();
        Long productTypeId = evaluate.getProductTypeId();
        highQualityProduct.setProductId(productId);
        highQualityProduct.setProductTypeId(productTypeId);
        highQualityProduct.setNumbers(1L);
        highQualityProduct.setScoreTotal(score);
        highQualityProduct.setDateTime(dateTime);
        String groupField = "HighQualityProduct==" + productId + "==" + productTypeId
                + "==" + dateTime;
        highQualityProduct.setGroupField(groupField);
        out.collect(highQualityProduct);
    }
}

一个HighQualityProductReduce实现了ReduceFunction接口的统计类

public class HighQualityProductReduce implements ReduceFunction<HighQualityProduct> {
    @Override
    public HighQualityProduct reduce(HighQualityProduct value1, HighQualityProduct value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        Integer score1 = value1.getScoreTotal();
        Integer score2 = value2.getScoreTotal();
        value1.setNumbers(numbers1 + numbers2);
        value1.setScoreTotal(score1 + score2);
        return value1;
    }
}

一个HighQualityProductSink实现了SinkFunction接口的存储类

public class HighQualityProductSink implements SinkFunction<HighQualityProduct> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(HighQualityProduct value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            Integer scoreTotal = value.getScoreTotal();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("scoreTotal",scoreTotal + "");
            data.put("dateTime",dateTime);
            String tablename = "high_quality_product";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            fields.add("scoreTotal");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from high_quality_product " +
                    "where product_id=" + productId + " and date_time='" + dateTime +"'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                Integer scoreTotalBefor = queryResult.getInt(4);
                numbers += numbersBefor;
                scoreTotal += scoreTotalBefor;
                data.put("numbers", numbers + "");
                data.put("scoreTotal",scoreTotal + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class HighQualityProductAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        //指定偏移量
        myConsumer.setStartFromLatest();
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<HighQualityProduct> map = data.flatMap(new HighQualityProductMap());
        DataStream<HighQualityProduct> reduce = map.keyBy(HighQualityProduct::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new HighQualityProductReduce());
        reduce.addSink(new HighQualityProductSink());

        env.execute("high quality product");
    }
}

浏览次数统计

创建一个浏览次数标签实体类

@Data
public class ScanTimes {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

DateUnitl增加一个静态方法

public static Date getCurrentTime(Long visitTime) {
    return new Date(visitTime);
}

一个ScanTimesMap实现了FlatMapFunction接口的转换类

public class ScanTimesMap implements FlatMapFunction<String,ScanTimes> {
    @Override
    public void flatMap(String value, Collector<ScanTimes> out) throws Exception {
        ScanOpertor scanOpertor = JSONObject.parseObject(value,ScanOpertor.class);
        ScanTimes scanTimes = new ScanTimes();
        Long productId = scanOpertor.getProductId();
        Long productTypeId = scanOpertor.getProductTypeId();
        Long time = scanOpertor.getScanTime();
        Date date = DateUntil.getCurrentTime(time);
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        scanTimes.setProductId(productId);
        scanTimes.setProductTypeId(productTypeId);
        scanTimes.setDateTime(dateTime);
        String groupField = "ScanTimes==" + productId + "==" + productTypeId
                + "==" + dateTime;
        scanTimes.setGroupField(groupField);
        scanTimes.setNumbers(1L);
        out.collect(scanTimes);
    }
}

一个ScanTimesReduce实现了ReduceFunction接口的统计类

public class ScanTimesReduce implements ReduceFunction<ScanTimes> {
    @Override
    public ScanTimes reduce(ScanTimes value1, ScanTimes value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一个ScanTimesSink实现了SinkFunction接口的存储类

public class ScanTimesSink implements SinkFunction<ScanTimes> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(ScanTimes value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "scan_times";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from scan_times " +
                    "where product_id=" + productId + " and dateTime='" + dateTime + "'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class ScanTimesAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("scan",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<ScanTimes> map = data.flatMap(new ScanTimesMap());
        DataStream<ScanTimes> reduct = map.keyBy(ScanTimes::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new ScanTimesReduce());
        reduct.addSink(new ScanTimesSink());

        env.execute("scan times");
    }
}

收藏次数统计

创建一个收藏标签实体类

@Data
public class Collection {
    private Long productId;
    private Long productTypeId;
    private Long numbers;
    private String dateTime;
    private String groupField;
}

一个CollectionTimesMap实现了FlatMapFunction接口的转换类

public class CollectionTimesMap implements FlatMapFunction<String,Collection> {
    @Override
    public void flatMap(String value, Collector<Collection> out) throws Exception {
        CollectOpertor collectOpertor = JSONObject.parseObject(value,CollectOpertor.class);
        Long productId = collectOpertor.getProductId();
        Long productTypeId = collectOpertor.getProductTypeId();
        Long time = collectOpertor.getOpertorTime();
        Date date = DateUntil.getCurrentTime(time);
        String dateTime = DateUntil.transferDate(date,"yyyyMMdd");
        Collection collection = new Collection();
        collection.setProductId(productId);
        collection.setProductTypeId(productTypeId);
        collection.setDateTime(dateTime);
        String groupField = "CollectionTimes==" + productId + "==" + productTypeId
                + "==" + dateTime;
        collection.setGroupField(groupField);
        collection.setNumbers(1L);
        out.collect(collection);
    }
}

一个CollectionTimesReduce实现了ReduceFunction接口的统计类

public class CollectionTimesReduce implements ReduceFunction<Collection> {
    @Override
    public Collection reduce(Collection value1, Collection value2) throws Exception {
        Long numbers1 = value1.getNumbers();
        Long numbers2 = value2.getNumbers();
        value1.setNumbers(numbers1 + numbers2);
        return value1;
    }
}

一个CollectionTimesSink实现了SinkFunction接口的存储类

public class CollectionTimesSink implements SinkFunction<Collection> {
    private ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    @Override
    public void invoke(Collection value, Context context) throws Exception {
        if (value != null) {
            Map<String, String> data = new HashMap<>();
            Long productId = value.getProductId();
            Long productTypeId = value.getProductTypeId();
            Long numbers = value.getNumbers();
            String dateTime = value.getDateTime();
            data.put("productId", productId + "");
            data.put("productTypeId", productTypeId + "");
            data.put("numbers", numbers + "");
            data.put("dateTime",dateTime);
            String tablename = "collection_times";
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            fields.add("productTypeId");
            fields.add("numbers");
            ResultSet queryResult = clickUntil.getQueryResult("test", "select * from collection_times " +
                    "where product_id=" + productId + " and dateTime='" + dateTime + "'");
            if (queryResult.next()) {
                Long numbersBefor = queryResult.getLong(3);
                numbers += numbersBefor;
                data.put("numbers", numbers + "");
                clickUntil.saveData(tablename, data, fields);
            }else {
                clickUntil.saveData(tablename, data, fields);
            }
        }
    }
}

然后是Flink的流处理

public class CollectionTimesAnaly {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("collection",
                new SimpleStringSchema(),properties);
        DataStreamSource<String> data = env.addSource(myConsumer);
        env.enableCheckpointing(5000);
        DataStream<Collection> map = data.flatMap(new CollectionTimesMap());
        DataStream<Collection> reduct = map.keyBy(Collection::getGroupField)
                .timeWindow(Time.hours(5))
                .reduce(new CollectionTimesReduce());
        reduct.addSink(new CollectionTimesSink());

        env.execute("collection times");
    }
}

 基于商品内容的embedding相似度推荐

由于电商系统一说,商品的数量一般会远远小于用户数量,我们需要开发一个自动整理全部商品的任务调度功能,定时增量的将所有商品的部分内容存储到一个csv文件中,我们假定该文件的格式如下

product_id,product_type,product_title,product_name,product_desc
1====牛奶====风情牛奶====蒙牛牛奶====美丽的大草原上盛产风情牛奶
2====冰箱====酷炫冰箱====海儿冰箱====新一代年轻人喜爱的智能现代化冰箱
3====手机====智能手机====华为手机====智能拍照手机,你值得拥有
4====汽水====元气汽水====森林汽水====无糖茶叶气泡水

在ClickHouse中创建一个表

create table product_similar(productId UInt32,productIdSimilar UInt32,similarValue Float32,create_date date)ENGINE=MergeTree(create_date,(productId),8192); 

CREATE TABLE product_similar
(
    `productId` UInt32,
    `productIdSimilar` UInt32,
    `similarValue` Float32,
    `create_date` date
)
ENGINE = MergeTree(create_date, productId, 8192)

Query id: d9bd5af1-d2a7-4623-93dc-df8559990c1e

Ok.

0 rows in set. Elapsed: 0.118 sec. 

对所有商品进行两两相似度计算

/**
 * 产品相似度计算
 */
public class ProductSimilarAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String path = "/Users/admin/Documents/商品表.csv";
        String schema = "productId long,productType string,title string," +
                "name string,desc string";
        CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(path)
                .setSchemaStr(schema)
                .setFieldDelimiter("====");
        //分词处理
        BatchOperator<?> segment = new SegmentBatchOp()
                .setSelectedCol("desc")
                .setOutputCol("segment")
                .linkFrom(csvSourceBatchOp);
        //停用词处理
        BatchOperator<?> stopWord = new StopWordsRemoverBatchOp()
                .setSelectedCol("segment")
                .setOutputCol("remove")
                .linkFrom(segment);
        //关键词处理
        BatchOperator<?> keyWord = new KeywordsExtractionBatchOp()
                .setSelectedCol("remove")
                .setOutputCol("keywords")
                .setTopN(10)
                .linkFrom(stopWord);
        List<Row> list = keyWord.getDataSet().collect();
        for (Row rowOut : list) {
            List<Row> dataRow = new ArrayList<>();
            for (Row rowInner : list) {
                Long productIdOut = (Long) rowOut.getField("productId");
                Long productIdInner = (Long) rowInner.getField("productId");
                if (productIdOut == productIdInner) {
                    continue;
                }
                String keywordsOut = (String) rowOut.getField("keywords");
                String keywordsInner = (String) rowInner.getField("keywords");
                //提取任意两个不同产品的关键词
                dataRow.add(Row.of(productIdOut,productIdInner,
                        keywordsOut,keywordsInner));
            }
            //对任意两个产品的关键词进行文本相似度计算
            BatchOperator<?> inOp = new MemSourceBatchOp(dataRow,"productId long," +
                    "productIdSimilar long,keywords1 string,keywords2 string");
            BatchOperator similar = new TextSimilarityPairwiseBatchOp()
                    .setSelectedCols("keywords1","keywords2")
                    .setMetric("COSINE")
                    .setOutputCol("similarValue")
                    .linkFrom(inOp);
            List<Row> dataRowSimilar = similar.getDataSet().collect();
            //对相似度进行排序,相似度越大的排在最前面
            Collections.sort(dataRowSimilar,(o1,o2) -> {
                Double similarValue2 = (Double) o2.getField("similarValue");
                Double similarValue1 = (Double) o1.getField("similarValue");
                return similarValue2.compareTo(similarValue1);
            });
            //截取相似度排在前10位的产品
            if (dataRowSimilar.size() > 10) {
                dataRowSimilar = dataRowSimilar.subList(0, 10);
            }
            for (Row row : dataRowSimilar) {
                String tablename = "product_similar";
                Map<String,String> dataMap = new HashMap<>();
                dataMap.put("productId",row.getField("productId").toString());
                dataMap.put("productIdSimilar",row.getField("productIdSimilar").toString());
                dataMap.put("similarValue",row.getField("similarValue").toString());
                Set<String> fields = new HashSet<>();
                fields.add("productId");
                fields.add("productIdSimilar");
                fields.add("similarValue");
                clickUntil.saveData(tablename,dataMap,fields);
            }
        }
    }
}

运行结果

select * from product_similar;

SELECT *
FROM product_similar

Query id: 8ef7bc91-d972-488e-a56f-0bb5e2dedca8

┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                2 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘
┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                4 │            0 │  1970-01-01 │
│         2 │                4 │            0 │  1970-01-01 │
│         2 │                3 │            0 │  1970-01-01 │
│         2 │                1 │            0 │  1970-01-01 │
│         3 │                4 │            0 │  1970-01-01 │
│         3 │                2 │            0 │  1970-01-01 │
│         3 │                1 │            0 │  1970-01-01 │
│         4 │                2 │            0 │  1970-01-01 │
│         4 │                3 │            0 │  1970-01-01 │
│         4 │                1 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘
┌─productId─┬─productIdSimilar─┬─similarValue─┬─create_date─┐
│         1 │                3 │            0 │  1970-01-01 │
└───────────┴──────────────────┴──────────────┴─────────────┘

12 rows in set. Elapsed: 0.056 sec. 

虽然这里的几个商品彼此之间的关键词的相似度都为0,不过这个不重要,只要构建好足够相似的商品,它们的相似度就会显现出来。

 离线商品评分推荐

首先我们需要将用户对商品的真实评分记录整理到这样一个文件中作为训练数据集,格式如下

user_id,product_id,score
1,1,5
1,5,4
2,1,3
2,5,4
3,1,2
3,2,5
4,3,3
4,2,5
5,3,4

然后我们还需要一份用户没有对商品做出过评分的测试数据文件

user_id,product_id
1,2
1,3
2,2
2,3
3,3
3,5
4,1
4,5
5,1

在ClickHouse中创建一个表

create table score_recommend(userId UInt32,productId UInt32,predictScore Float32,create_date date)ENGINE=MergeTree(create_date,(userId),8192); 

CREATE TABLE score_recommend
(
    `userId` UInt32,
    `productId` UInt32,
    `predictScore` Float32,
    `create_date` date
)
ENGINE = MergeTree(create_date, userId, 8192)

Query id: 64c7f961-f134-454a-9de6-f9108271a41c

Connecting to database test at localhost:9000 as user default.
Connected to ClickHouse server version 21.11.2 revision 54450.

Ok.

0 rows in set. Elapsed: 0.086 sec. 

对训练数据集进行训练,并对测试数据集进行评分预测

public class ALSOutLineAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String pathTrain = "/Users/admin/Documents/评分表.csv";
        String pathTest = "/Users/admin/Documents/评分测试表.csv";
        String schemaTrain = "userId long,productId long,score double";
        String schemaTest = "userId long,productId long";
        CsvSourceBatchOp csvSourceBatchOpTrain = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathTrain)
                .setSchemaStr(schemaTrain)
                .setFieldDelimiter(",");
        CsvSourceBatchOp csvSourceBatchOpTest = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathTest)
                .setSchemaStr(schemaTest)
                .setFieldDelimiter(",");
        //对训练数据集进行训练
        BatchOperator<?> model = new AlsTrainBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRateCol("score")
                .setNumIter(10)
                .setRank(10)
                .setLambda(0.01)
                .linkFrom(csvSourceBatchOpTrain);
        //对测试数据集进行预测
        BatchOperator<?> result = new AlsRateRecommBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRecommCol("predictScore")
                .linkFrom(model,csvSourceBatchOpTest);
        List<Row> list = result.getDataSet().collect();
        for (Row row : list) {
            Long userId = (Long) row.getField("userId");
            Long productId = (Long) row.getField("productId");
            Double predictScore = (Double) row.getField("predictScore");
            String tablename = "score_recommend";
            Map<String,String> dataMap = new HashMap<>();
            dataMap.put("userId",userId + "");
            dataMap.put("productId",productId + "");
            dataMap.put("predictScore",predictScore + "");
            Set<String> fields = new HashSet<>();
            fields.add("userId");
            fields.add("productId");
            fields.add("predictScore");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

运行结果

select * from score_recommend;

SELECT *
FROM score_recommend

Query id: 3dd93421-23e3-404c-bee9-381446589471

┌─userId─┬─productId─┬─predictScore─┬─create_date─┐
│      1 │         3 │     5.605932 │  1970-01-01 │
│      1 │         2 │    2.0335705 │  1970-01-01 │
│      2 │         3 │    3.9875667 │  1970-01-01 │
│      2 │         2 │    1.5447733 │  1970-01-01 │
│      3 │         5 │    2.3882878 │  1970-01-01 │
│      3 │         3 │    2.6757712 │  1970-01-01 │
│      4 │         5 │     2.639401 │  1970-01-01 │
│      4 │         1 │     2.265847 │  1970-01-01 │
│      5 │         1 │    3.0576394 │  1970-01-01 │
└────────┴───────────┴──────────────┴─────────────┘

9 rows in set. Elapsed: 0.014 sec. 

商品协同过滤相似度实时推荐

现在我们需要将上面的离线预测的评分数据整理成一个新的文件

user_id,product_id,result
1,2,2.0335705
1,3,5.605932
2,2,1.5447733
2,3,3.9875667
3,3,2.6757712
3,5,2.3882878
4,1,2.265847
4,5,2.639401
5,1,3.0576394

ClickHouse创建一个表

create table product_recommend(productId UInt32,productIdsRecommend String,scores String,create_date date)ENGINE=MergeTree(create_date,(productId),8192);

CREATE TABLE product_recommend
(
    `productId` UInt32,
    `productIdsRecommend` String,
    `scores` String,
    `create_date` date
)
ENGINE = MergeTree(create_date, productId, 8192)

Query id: 6e1cf305-5e4f-4f0b-9e0d-a7d9be84a231

Connecting to database test at localhost:9000 as user default.
Connected to ClickHouse server version 21.11.2 revision 54450.

Ok.

0 rows in set. Elapsed: 0.076 sec. 

基于商品协同过滤相似度进行推荐

public class ALSProductSimilarAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    public static void main(String[] args) throws Exception {
        String pathResult = "/Users/admin/Documents/评分预测表.csv";
        String schema = "userId long,productId long,result double";
        CsvSourceBatchOp csvSourceBatchOp = new CsvSourceBatchOp()
                .setIgnoreFirstLine(true)
                .setFilePath(pathResult)
                .setSchemaStr(schema)
                .setFieldDelimiter(",");
        BatchOperator<?> model = new AlsTrainBatchOp()
                .setUserCol("userId")
                .setItemCol("productId")
                .setRateCol("result")
                .setNumIter(10)
                .setRank(10)
                .setLambda(0.01)
                .linkFrom(csvSourceBatchOp);
        //基于商品协同过滤相似度进行推荐
        BatchOperator<?> result = new AlsSimilarItemsRecommBatchOp()
                .setItemCol("productId")
                .setRecommCol("productRecommend")
                .setK(10)
                .setReservedCols("productId")
                .linkFrom(model,csvSourceBatchOp);
        List<Row> list = result.getDataSet().collect();
        for (Row row : list) {
            Long productId = (Long) row.getField("productId");
            JSONObject jsonObject = JSONObject.parseObject(row.getField("productRecommend").toString());
            JSONArray productIdsRecommend = jsonObject.getJSONArray("productId");
            JSONArray scores = jsonObject.getJSONArray("score");
            String tablename = "product_recommend";
            Map<String,String> dataMap = new HashMap<>();
            dataMap.put("productId",productId + "");
            dataMap.put("productIdsRecommend",productIdsRecommend.toJSONString());
            dataMap.put("scores",scores.toJSONString());
            Set<String> fields = new HashSet<>();
            fields.add("productId");
            clickUntil.saveData(tablename,dataMap,fields);
        }
    }
}

运行结果

select * from product_recommend;

SELECT *
FROM product_recommend

Query id: 9695ecdf-207d-4931-a7de-6ee76c603588

┌─productId─┬─productIdsRecommend─┬─scores──────────────────────────────────────────────────┬─create_date─┐
│         5 │ [3,1,2]             │ [4.11977117110399,3.152251384796194,1.4976801931900996] │  1970-01-01 │
└───────────┴─────────────────────┴─────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores─────────────────────────────────────────────────────┬─create_date─┐
│         2 │ [3,5,1]             │ [1.7338918392755318,1.4976801931900996,1.2717521355519668] │  1970-01-01 │
└───────────┴─────────────────────┴────────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores────────────────────────────────────────────────────┬─create_date─┐
│         1 │ [3,5,2]             │ [3.5011045336610236,3.152251384796194,1.2717521355519668] │  1970-01-01 │
│         1 │ [3,5,2]             │ [3.5011045336610236,3.152251384796194,1.2717521355519668] │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         3 │ [5,1,2]             │ [4.11977117110399,3.5011045336610236,1.7338918392755318]  │  1970-01-01 │
│         5 │ [3,1,2]             │ [4.11977117110399,3.152251384796194,1.4976801931900996]   │  1970-01-01 │
└───────────┴─────────────────────┴───────────────────────────────────────────────────────────┴─────────────┘
┌─productId─┬─productIdsRecommend─┬─scores─────────────────────────────────────────────────────┬─create_date─┐
│         2 │ [3,5,1]             │ [1.7338918392755318,1.4976801931900996,1.2717521355519668] │  1970-01-01 │
└───────────┴─────────────────────┴────────────────────────────────────────────────────────────┴─────────────┘

9 rows in set. Elapsed: 0.040 sec. 

用户最近评分商品实时推荐

添加一个Redis依赖

<dependency>
   <groupId>redis.clients</groupId>
   <artifactId>jedis</artifactId>
   <version>2.9.0</version>
</dependency>

增加一个Redis工具类

public class RedisUntil {
    private static JedisPool jedisPool;

    static {
        JedisPoolConfig jedisConfig = new JedisPoolConfig();
        jedisConfig.setMaxIdle(8);
        jedisConfig.setMaxWaitMillis(-1);
        jedisPool = new JedisPool(jedisConfig,"127.0.0.1",6379,
                10000,"*****");
    }

    public static void saveEvaluate(Evaluate evaluate) {
        Jedis jedis = null;
        Long userId = evaluate.getUserId();
        Long time = evaluate.getEvaluateTime().getTime();
        String result = JSONObject.toJSONString(evaluate);
        try {
            jedis = jedisPool.getResource();
            jedis.zadd("evaluate" + userId,time,result);
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (jedis != null) {
                jedis.close();
            }
        }
    }

    public static List<Evaluate> getByUser(long userId,long numbers) {
        Jedis jedis = null;
        Set<Tuple> setByScore = null;
        try {
            jedis = jedisPool.getResource();
            setByScore = jedis.zrangeByScoreWithScores("evaluate" + userId,0,numbers);
        }catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (jedis != null) {
                jedis.close();
            }
        }
        List<Evaluate> result = new ArrayList<>();
        for (Tuple tuple : setByScore) {
            String element = tuple.getElement();
            double score = tuple.getScore();
            System.out.println(score);
            Evaluate evaluate = JSONObject.parseObject(element,Evaluate.class);
            result.add(evaluate);
        }
        return result;
    }
}

修改ScoreMap,将每次获取的评分对象放入Redis

public class ScoreMap implements FlatMapFunction<String,Score> {
    @Override
    public void flatMap(String value, Collector<Score> out) throws Exception {
        Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
        Integer score = evaluate.getScore();
        RedisUntil.saveEvaluate(evaluate);
        if (score > 2) {
            Score scoreResult = new Score();
            Long productId = evaluate.getProductId();
            Long productTypeId = evaluate.getProductTypeId();
            scoreResult.setProductId(productId);
            scoreResult.setProductTypeId(productTypeId);
            scoreResult.setNumbers(1L);
            String groupField = "score==" + productId + "==" + productTypeId;
            scoreResult.setGroupField(groupField);
            out.collect(scoreResult);
        }
    }
}

创建一个MapUntil工具类

public class MapUntil {
    public static Map<Long,Double> top(Map<Long,Double> data,Integer numbers) {
        Set<Map.Entry<Long,Double>> set = data.entrySet();
        List<Map.Entry<Long,Double>> list = new ArrayList<>(set);
        Collections.sort(list, (o1,o2) -> {
            Double o22 = o2.getValue();
            Double o11 = o1.getValue();
            return o22.compareTo(o11);
        });
        if (list.size() > numbers) {
            list = list.subList(0,numbers);
        }
        Map<Long,Double> result = new HashMap<>();
        for (Map.Entry<Long,Double> entry : list) {
            result.put(entry.getKey(),entry.getValue());
        }
        return result;
    }
}

然后就是Flink流的实时推荐

/**
 * 实时推荐
 */
public class RealTimeRecommendationAnaly {
    private static ClickUntil clickUntil = ClickUntilFactory.createClickUntil();

    private static Double log(Long number) {
        return Math.log(number) / Math.log(10);
    }

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","127.0.0.1:9092");
        properties.setProperty("group.id","portrait");
        FlinkKafkaConsumer<String> myConsumer = new FlinkKafkaConsumer<>("evaluate",
                new SimpleStringSchema(),properties);
        myConsumer.setStartFromLatest();
        DataStreamSource<String> dataSource = env.addSource(myConsumer);
        env.enableCheckpointing(5000);

        dataSource.flatMap(new FlatMapFunction<String, Object>() {
            @Override
            public void flatMap(String value, Collector<Object> out) throws Exception {
                Evaluate evaluate = JSONObject.parseObject(value,Evaluate.class);
                Long userId = evaluate.getUserId();
                Long productId = evaluate.getProductId();
                //获取用户评价的该商品相似的所有推荐商品
                ResultSet queryResult = clickUntil.getQueryResult("test", "select productIdsRecommend,scores " +
                        "from product_recommend where productId=" + productId);
                if (queryResult.next()) {
                    String productIdsRecommend = queryResult.getString("productIdsRecommend");
                    String scores = queryResult.getString("scores");
                    //获取所有推荐商品的id
                    List<Long> recommendProductIds = JSONObject.parseArray(productIdsRecommend,Long.class);
                    //获取所有推荐商品的推荐度
                    List<Double> recommendScores = JSONObject.parseArray(scores,Double.class);
                    //获取用户的所有评价
                    List<Evaluate> currentEvaluates = RedisUntil.getByUser(userId, Long.MAX_VALUE);
                    Map<Long,Double> finalMap = new HashMap<>();
                    //遍历所有的推荐商品
                    for (Long prodId : recommendProductIds) {
                        //获取所推荐商品的推荐度
                        Double score = recommendScores.get(recommendProductIds.indexOf(prodId));
                        Long numbers = 0L;
                        Double totalSimilar = 0.0;
                        Long addNumber = 0L;
                        Long descrNumber = 0L;
                        //遍历用户的所有评价
                        for (Evaluate eva : currentEvaluates) {
                            //获取用户每一条评价的商品id
                            Long evaluateProductId = eva.getProductId();
                            //获取用户每一条评价的分数
                            Integer evaluateScore = eva.getScore();
                            //如果该推荐商品该用户也评价过
                            //且推荐商品的推荐度大于等于0.55
                            //获取推荐度与评价分数的乘积并累加
                            if (prodId == evaluateProductId) {
                                if (score >= 0.55) {
                                    Double result = evaluateScore * score;
                                    numbers++;
                                    totalSimilar += result;
                                }
                                //如果评价分数大于3则增强
                                if (evaluateScore > 3) {
                                    addNumber++;
                                }
                                //如果评价分数小于2则减弱
                                if (evaluateScore < 2) {
                                    descrNumber++;
                                }
                            }
                        }
                        if (numbers > 0L) {
                            Double productTotalSimilar = totalSimilar / numbers
                                    + log(addNumber) - log(descrNumber);
                            finalMap.put(prodId,productTotalSimilar);
                            finalMap = MapUntil.top(finalMap,5);
                            Set<Long> productIdSimilarList = finalMap.keySet();

                            String tablename = "user_recommend";
                            Map<String,String> dataMap = new HashMap<>();
                            dataMap.put("userId",userId + "");
                            dataMap.put("productId",productId + "");
                            dataMap.put("productIdSimilarList",JSONObject.toJSONString(productIdSimilarList));
                            Set<String> fields = new HashSet<>();
                            fields.add("userId");
                            fields.add("productId");
                            clickUntil.saveData(tablename,dataMap,fields);
                        }
                    }
                }
            }
        });
        env.execute("realTime recommendation");
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章