數據:
格式說明:user:friend...
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
代碼如下(看註釋):
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
/**
* 找出有共同好友的users
* <p>
* 按題意應該是求出任意兩個用戶的共同好友
*/
public class MR {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
args = new String[3];
args[0] = "hdfs://localhost:9000/mapreducer/0314/data/data.txt";
args[1] = "hdfs://localhost:9000/mapreducer/0314/out/my/1";
args[2] = "hdfs://localhost:9000/mapreducer/0314/out/my/2";
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(args[1]), conf);
if (fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]), true);
}
if (fs.exists(new Path(args[2]))) {
fs.delete(new Path(args[2]), true);
}
Job job = Job.getInstance(conf);
job.setJarByClass(MR.class);
job.setMapperClass(MR.MyMapper.class);
job.setReducerClass(MR.MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
if (job.waitForCompletion(true)) {
Job job1 = Job.getInstance(conf);
job1.setJarByClass(MR.class);
job1.setMapperClass(MR.MyMapper1.class);
job1.setReducerClass(MR.MyReducer1.class);
job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job1, new Path(args[1]));
FileOutputFormat.setOutputPath(job1, new Path(args[2]));
job1.waitForCompletion(true);
}
}
private static class MyMapper extends Mapper<Object, Text, Text, Text> {
Text outK = new Text();
Text outV = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(":");
String[] split1 = split[1].split(",");
/*
將這個人所有的朋友作爲key,將自身作爲value輸出,經過reducer階段分組求出來的是這些用戶(value)都有這個朋友(key)
*/
for (String s : split1) {
outK.set(s);
outV.set(split[0]);
context.write(outK, outV);
}
}
}
private static class MyReducer extends Reducer<Text, Text, Text, Text> {
Text outK = new Text();
Text outV = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
ArrayList<String> list = new ArrayList<>();
for (Text value : values) {
list.add(value.toString());
}
/*
* 在這裏進行組合,將任意組合起來的用戶作爲key,然後將朋友作爲value
* 要注意兩個問題:一是重複的問題,二是用戶組合之後的順序,所以用compareTo比較一下
*/
for (int i = 0; i < list.size(); i++) {
for (int j = i + 1; j < list.size(); j++) {
String s = list.get(i);
String s1 = list.get(j);
if (s.compareTo(s1) > 0) {
outK.set(s + "," + s1);
} else {
outK.set(s1 + "," + s);
}
outV.set(key);
context.write(outK, outV);
}
}
}
}
/**
* 第二個job作業就是爲了將上一階段的數據組合起來
*/
private static class MyMapper1 extends Mapper<Object, Text, Text, Text> {
Text outK = new Text();
Text outV = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] line = value.toString().split("\t");
outK.set(line[0]);
outV.set(line[1]);
context.write(outK, outV);
}
}
private static class MyReducer1 extends Reducer<Text, Text, Text, Text> {
Text outV = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
for (Text value : values) {
sb.append(value.toString()).append(",");
}
if (sb.toString().endsWith(",")) {
sb.setLength(sb.length() - 1);
}
outV.set(sb.toString());
context.write(key, outV);
}
}
}