【重拾】MapReducer[第一篇]

    昨天聽朋友說了一個題目,具體的題目忘了! 有數據是這樣的:

<1,0> 
<2,8>
<1,9>
<2,7>
<1,0>
<3,15>
<5,20>  
<3,25>
<4,20>
<3,50>

    要得到結果試着樣的:

1    2
2    2
3    3
4    1
5    1

    對左側數據的統計,對右側數據的去重; 當左側相同時,右側也相同,之記錄一次;當左側相同,右側不同,左側數據次數累加; 當左側不相同,右側也不相同時候,左側數據累加統計。

    瞭解過大意以後發現這個就是對數據的去重統計的一個小測試! 思路就不寫了,跟着代碼隨意遐想,代碼僅限上述情況:

package com.amir.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class MapReducer_MulTask {

    public static class Ma***emovingMap extends MapReduceBase implements
            Mapper<Object, Text, Text, Text> {

        private Text line = new Text();

        public void map(Object key, Text value,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            line = value;
            output.collect(line, new Text(""));
        }
    }

    public static class Ma***emovingReduce extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, Text> {

        public void reduce(Text key, Iterator<IntWritable> value,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            output.collect(key, new Text(""));
        }
    }

    public static class StatisticsMap extends MapReduceBase implements
            Mapper<Object, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(Object key, Text value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {

            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                String[] temp = itr.nextToken().split(",");
                String akey = temp[0].replace("<", "");
                word.set(akey);
                output.collect(word, one);
            }
        }
    }

    public static class StatisticsReduce extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, IntWritable> {

        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterator<IntWritable> value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            int sum = 0;
            while (value.hasNext()) {
                IntWritable val = value.next();
                sum += val.get();
            }
            result.set(sum);
            output.collect(key, result);
        }

    }

    public static void TaskMa***emoving() throws IOException{
        String[] param = { "/test/testw/ss", "/test/testw/woutput" };
        Configuration conf = new Configuration();
        JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class);
        jobconf.setJobName("TaskMa***emoving");
        
        jobconf.setJarByClass(MapReducer_MulTask.class);
        jobconf.setMapperClass(Ma***emovingMap.class);
        jobconf.setCombinerClass(Ma***emovingReduce.class);
        jobconf.setReducerClass(Ma***emovingReduce.class);
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(Text.class);
        
        FileInputFormat.addInputPath(jobconf, new Path(param[0]));
        FileOutputFormat.setOutputPath(jobconf, new Path(param[1]));
        JobClient.runJob(jobconf).waitForCompletion();
    }
    
    public static void TaskStatistics() throws IOException{
        String[] param = {"/test/testw/woutput/part-00000","/test/testw/woutput/wordcount"};
        Configuration conf = new Configuration();
        JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class);
        jobconf.setJobName("TaskStatistics");
        
        jobconf.setJarByClass(MapReducer_MulTask.class);
        jobconf.setMapperClass(StatisticsMap.class);
        jobconf.setCombinerClass(StatisticsReduce.class);
        jobconf.setReducerClass(StatisticsReduce.class);
        
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(IntWritable.class);
        
        FileInputFormat.addInputPath(jobconf, new Path(param[0]));
        FileOutputFormat.setOutputPath(jobconf, new Path(param[1]));
        JobClient.runJob(jobconf).waitForCompletion();
        
    }
    
    public static void main(String[] args) throws IOException {
        try {
            MapReducer_MulTask.TaskMa***emoving(); // 01
            MapReducer_MulTask.TaskStatistics();  // 02
            System.out.println("OK!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

    主要對MapReducer 基本使用的測試!!!!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章