topN 全世界都一樣。。。先map取topN,在reduce把各個map的topN取topN
map時候top10cast.put後,所有的value都會變成最新的一個,應該是地址引用問題,之前從來沒有注意。只能put之前先tostring一把。
reduce時候也一樣
public static void main(String[] args) throws Exception {
Configuration conf1 = new Configuration();
System.setProperty("hadoop.home.dir", "D:\\hadoop-2.5.2");
conf1.setInt("N", 10);
conf1.setBoolean("dfs.permissions", false);
Job job = Job.getInstance(conf1, "TopN");
job.setMapperClass(TopNMapper.class);
job.setReducerClass(TopNReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
FileInputFormat.setInputPaths(job, new Path("C:\\demo\\03\\03.txt"));
FileOutputFormat.setOutputPath(job, new Path("C:\\demo\\03\\out"));
if (job.waitForCompletion(true)) {
log.info("MR run successfully");
} else {
log.error("MR run failed");
}
}
public class TopNMapper extends Mapper<Object, Text, NullWritable, Text> {
private SortedMap<Double, Text> top10cast = new TreeMap<Double, Text>();
private int N = 10;
@Override
protected void setup(
Mapper<Object, Text, NullWritable, Text>.Context context)
throws IOException, InterruptedException {
this.N = context.getConfiguration().getInt("N", 10);
}
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String[] tokens = value.toString().split(",");
String s = value.toString();
double weight = Double.parseDouble(tokens[0]);
top10cast.put(weight, new Text(s));
if (top10cast.size() > N) {
top10cast.remove(top10cast.firstKey());
}
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
for (Text catAttributes : top10cast.values()) {
context.write(NullWritable.get(), catAttributes);
}
}
}
public class TopNReducer extends Reducer<NullWritable, Text, NullWritable, Text> {
private int N = 10;
@Override
protected void setup(
Reducer<NullWritable, Text, NullWritable, Text>.Context context)
throws IOException, InterruptedException {
this.N = context.getConfiguration().getInt("N", 10);
}
/**
* reduce
*/
@Override
protected void reduce(NullWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
SortedMap<Double, Text> finaltop10cast = new TreeMap<Double, Text>();
for (Text catRecord:values) {
double weight = Double.parseDouble(catRecord.toString().split(",")[0]);
String s = catRecord.toString();
finaltop10cast.put(weight, new Text(s));
if (finaltop10cast.size() >N) {
finaltop10cast.remove(finaltop10cast.firstKey());
}
}
for (Text text :finaltop10cast.values()) {
context.write(NullWritable.get(), text);
}
}
}
輸入
12,cat1
13,cat2
14,cat3
15,cat4
10,cat5
100,cat100
200,cat200
300,cat300
1,cat001
67,cat67
22,cat22
23,cat23
1000,cat1000
2000,cat2000
輸出
14,cat3
15,cat4
22,cat22
23,cat23
67,cat67
100,cat100
200,cat200
300,cat300
1000,cat1000
2000,cat2000