昨天聽朋友說了一個題目,具體的題目忘了! 有數據是這樣的:
<1,0> <2,8> <1,9> <2,7> <1,0> <3,15> <5,20> <3,25> <4,20> <3,50>
要得到結果試着樣的:
1 2 2 2 3 3 4 1 5 1
對左側數據的統計,對右側數據的去重; 當左側相同時,右側也相同,之記錄一次;當左側相同,右側不同,左側數據次數累加; 當左側不相同,右側也不相同時候,左側數據累加統計。
瞭解過大意以後發現這個就是對數據的去重統計的一個小測試! 思路就不寫了,跟着代碼隨意遐想,代碼僅限上述情況:
package com.amir.test; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class MapReducer_MulTask { public static class Ma***emovingMap extends MapReduceBase implements Mapper<Object, Text, Text, Text> { private Text line = new Text(); public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { line = value; output.collect(line, new Text("")); } } public static class Ma***emovingReduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> { public void reduce(Text key, Iterator<IntWritable> value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { output.collect(key, new Text("")); } } public static class StatisticsMap extends MapReduceBase implements Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { String[] temp = itr.nextToken().split(","); String akey = temp[0].replace("<", ""); word.set(akey); output.collect(word, one); } } } public static class StatisticsReduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterator<IntWritable> value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (value.hasNext()) { IntWritable val = value.next(); sum += val.get(); } result.set(sum); output.collect(key, result); } } public static void TaskMa***emoving() throws IOException{ String[] param = { "/test/testw/ss", "/test/testw/woutput" }; Configuration conf = new Configuration(); JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class); jobconf.setJobName("TaskMa***emoving"); jobconf.setJarByClass(MapReducer_MulTask.class); jobconf.setMapperClass(Ma***emovingMap.class); jobconf.setCombinerClass(Ma***emovingReduce.class); jobconf.setReducerClass(Ma***emovingReduce.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(jobconf, new Path(param[0])); FileOutputFormat.setOutputPath(jobconf, new Path(param[1])); JobClient.runJob(jobconf).waitForCompletion(); } public static void TaskStatistics() throws IOException{ String[] param = {"/test/testw/woutput/part-00000","/test/testw/woutput/wordcount"}; Configuration conf = new Configuration(); JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class); jobconf.setJobName("TaskStatistics"); jobconf.setJarByClass(MapReducer_MulTask.class); jobconf.setMapperClass(StatisticsMap.class); jobconf.setCombinerClass(StatisticsReduce.class); jobconf.setReducerClass(StatisticsReduce.class); jobconf.setOutputKeyClass(Text.class); jobconf.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(jobconf, new Path(param[0])); FileOutputFormat.setOutputPath(jobconf, new Path(param[1])); JobClient.runJob(jobconf).waitForCompletion(); } public static void main(String[] args) throws IOException { try { MapReducer_MulTask.TaskMa***emoving(); // 01 MapReducer_MulTask.TaskStatistics(); // 02 System.out.println("OK!"); } catch (Exception e) { e.printStackTrace(); } } }
主要對MapReducer 基本使用的測試!!!!