【重拾】MapReducer[第一篇]

昨天聽朋友說了一個題目，具體的題目忘了！有數據是這樣的：

<1,0> 
<2,8>
<1,9>
<2,7>
<1,0>
<3,15>
<5,20>  
<3,25>
<4,20>
<3,50>

要得到結果試着樣的：

對左側數據的統計，對右側數據的去重；當左側相同時，右側也相同，之記錄一次；當左側相同，右側不同，左側數據次數累加；當左側不相同，右側也不相同時候，左側數據累加統計。

瞭解過大意以後發現這個就是對數據的去重統計的一個小測試！思路就不寫了，跟着代碼隨意遐想，代碼僅限上述情況：

package com.amir.test;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class MapReducer_MulTask {

    public static class Ma***emovingMap extends MapReduceBase implements
            Mapper<Object, Text, Text, Text> {

        private Text line = new Text();

        public void map(Object key, Text value,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            line = value;
            output.collect(line, new Text(""));
        }
    }

    public static class Ma***emovingReduce extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, Text> {

        public void reduce(Text key, Iterator<IntWritable> value,
                OutputCollector<Text, Text> output, Reporter reporter)
                throws IOException {
            output.collect(key, new Text(""));
        }
    }

    public static class StatisticsMap extends MapReduceBase implements
            Mapper<Object, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(Object key, Text value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {

            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                String[] temp = itr.nextToken().split(",");
                String akey = temp[0].replace("<", "");
                word.set(akey);
                output.collect(word, one);
            }
        }
    }

    public static class StatisticsReduce extends MapReduceBase implements
            Reducer<Text, IntWritable, Text, IntWritable> {

        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterator<IntWritable> value,
                OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            int sum = 0;
            while (value.hasNext()) {
                IntWritable val = value.next();
                sum += val.get();
            }
            result.set(sum);
            output.collect(key, result);
        }

    }

    public static void TaskMa***emoving() throws IOException{
        String[] param = { "/test/testw/ss", "/test/testw/woutput" };
        Configuration conf = new Configuration();
        JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class);
        jobconf.setJobName("TaskMa***emoving");
        
        jobconf.setJarByClass(MapReducer_MulTask.class);
        jobconf.setMapperClass(Ma***emovingMap.class);
        jobconf.setCombinerClass(Ma***emovingReduce.class);
        jobconf.setReducerClass(Ma***emovingReduce.class);
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(Text.class);
        
        FileInputFormat.addInputPath(jobconf, new Path(param[0]));
        FileOutputFormat.setOutputPath(jobconf, new Path(param[1]));
        JobClient.runJob(jobconf).waitForCompletion();
    }
    
    public static void TaskStatistics() throws IOException{
        String[] param = {"/test/testw/woutput/part-00000","/test/testw/woutput/wordcount"};
        Configuration conf = new Configuration();
        JobConf jobconf = new JobConf(conf, MapReducer_MulTask.class);
        jobconf.setJobName("TaskStatistics");
        
        jobconf.setJarByClass(MapReducer_MulTask.class);
        jobconf.setMapperClass(StatisticsMap.class);
        jobconf.setCombinerClass(StatisticsReduce.class);
        jobconf.setReducerClass(StatisticsReduce.class);
        
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(IntWritable.class);
        
        FileInputFormat.addInputPath(jobconf, new Path(param[0]));
        FileOutputFormat.setOutputPath(jobconf, new Path(param[1]));
        JobClient.runJob(jobconf).waitForCompletion();
        
    }
    
    public static void main(String[] args) throws IOException {
        try {
            MapReducer_MulTask.TaskMa***emoving(); // 01
            MapReducer_MulTask.TaskStatistics();  // 02
            System.out.println("OK!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

主要對MapReducer 基本使用的測試！！！！

【重拾】MapReducer[第一篇]

AI模型 Llama 3體驗筆記

【面試準備】又一次失敗的面試經歷，題目離譜～資深軟件測試工程師

dotnet 8 版本與銀河麒麟V10和UOS系統的 glibc 兼容性

【重拾】MapReducer[第一篇]

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結