MapReducer例題-找出有共同好友的user及他們的共同好友

原創

2019-03-14 20:50

數據：

格式說明：user:friend...

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

代碼如下（看註釋）：


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;

/**
 *  找出有共同好友的users
 * <p>
 * 按題意應該是求出任意兩個用戶的共同好友
 */
public class MR {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        args = new String[3];
        args[0] = "hdfs://localhost:9000/mapreducer/0314/data/data.txt";
        args[1] = "hdfs://localhost:9000/mapreducer/0314/out/my/1";
        args[2] = "hdfs://localhost:9000/mapreducer/0314/out/my/2";

        Configuration conf = new Configuration();

        FileSystem fs = FileSystem.get(URI.create(args[1]), conf);
        if (fs.exists(new Path(args[1]))) {
            fs.delete(new Path(args[1]), true);
        }
        if (fs.exists(new Path(args[2]))) {
            fs.delete(new Path(args[2]), true);
        }

        Job job = Job.getInstance(conf);

        job.setJarByClass(MR.class);

        job.setMapperClass(MR.MyMapper.class);
        job.setReducerClass(MR.MyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        if (job.waitForCompletion(true)) {
            Job job1 = Job.getInstance(conf);

            job1.setJarByClass(MR.class);

            job1.setMapperClass(MR.MyMapper1.class);
            job1.setReducerClass(MR.MyReducer1.class);

            job1.setOutputKeyClass(Text.class);
            job1.setOutputValueClass(Text.class);

            FileInputFormat.addInputPath(job1, new Path(args[1]));
            FileOutputFormat.setOutputPath(job1, new Path(args[2]));

            job1.waitForCompletion(true);
        }
    }

    private static class MyMapper extends Mapper<Object, Text, Text, Text> {

        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(":");
            String[] split1 = split[1].split(",");
            /*
            將這個人所有的朋友作爲key，將自身作爲value輸出，經過reducer階段分組求出來的是這些用戶（value）都有這個朋友（key）
             */
            for (String s : split1) {
                outK.set(s);
                outV.set(split[0]);
                context.write(outK, outV);
            }
        }
    }

    private static class MyReducer extends Reducer<Text, Text, Text, Text> {
        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            ArrayList<String> list = new ArrayList<>();
            for (Text value : values) {
                list.add(value.toString());
            }
            /*
             * 在這裏進行組合，將任意組合起來的用戶作爲key，然後將朋友作爲value
             * 要注意兩個問題：一是重複的問題，二是用戶組合之後的順序，所以用compareTo比較一下
             */
            for (int i = 0; i < list.size(); i++) {
                for (int j = i + 1; j < list.size(); j++) {
                    String s = list.get(i);
                    String s1 = list.get(j);
                    if (s.compareTo(s1) > 0) {
                        outK.set(s + "," + s1);
                    } else {
                        outK.set(s1 + "," + s);
                    }
                    outV.set(key);
                    context.write(outK, outV);
                }
            }
        }
    }

    /**
     * 第二個job作業就是爲了將上一階段的數據組合起來
     */
    private static class MyMapper1 extends Mapper<Object, Text, Text, Text> {
        Text outK = new Text();
        Text outV = new Text();

        @Override
        protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
            String[] line = value.toString().split("\t");

            outK.set(line[0]);
            outV.set(line[1]);
            context.write(outK, outV);
        }
    }

    private static class MyReducer1 extends Reducer<Text, Text, Text, Text> {
        Text outV = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder sb = new StringBuilder();

            for (Text value : values) {
                sb.append(value.toString()).append(",");
            }
            if (sb.toString().endsWith(",")) {
                sb.setLength(sb.length() - 1);
            }
            outV.set(sb.toString());
            context.write(key, outV);
        }
    }
}

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

MapReducer例題-找出有共同好友的user及他們的共同好友

schematool --dbType mysql --initSchema 安裝hive+mysql 報錯

pandas在Series中的時間日期（datetime64）類型怎麼取出年月日並進行判斷。

c語言打印字母菱形————用絕對值

c語言十進制轉二進制兩種方法

序列求和

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結