【MapReduce Java】簡單的平均距離計算

要求

沒什麼太多要說的,以前的MapR in Python用熟練了,Java要不是作業感覺也不會太常用它了…… 就貼一下以備忘(防止以後突然要用java寫的時候可以來參考一下)

  • 輸入文件:文本文件
    • 每行格式
      • source _ destination _ time
      • 3個部分由空格隔開
      • 其中source和destination爲兩個字符串,內部沒有空格
      • time爲一個浮點數,代表時間(秒爲單位)
      • 涵義:可以表示一次電話通話,或表示一次網站訪問等
    • 輸入可能有噪音
      • 如果一行不符合上述格式,應該被丟棄,程序需要正確執行
  • MapReduce計算:統計每對source-destination的信息
  • 輸出
    • source _ destination _ count _ average time
    • 每一個source-destination組合輸出一行(注意:順序相反按不同處理)
    • 每行輸出通話次數和通話平均時間(保留3位小數,例如2.300)

Source Code

/* 3, 201618013229031, ChenDian */
// Based on WordCount.java:
// Modified by Shimin Chen to demonstrate functionality for Homework 2
// April-May 2015

import java.io.IOException;
import java.util.StringTokenizer;


import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Hw2Part1 {

    // This is the Mapper class
    // reference: http://hadoop.apache.org/docs/r2.6.0/api/org/apache/hadoop/mapreduce/Mapper.html
    //
    public static class CDMapper 
        extends Mapper<Object, Text, Text, DoubleWritable>{

        private Text pair = new Text(); 
        private DoubleWritable time = new DoubleWritable();

        // (src, dest)=key, (time)=value
        private String keyStr = new String();

        public void map(Object key, Text value, Context context
                        ) throws IOException, InterruptedException {
            String delimiters = "\\s+";
            String[] tokens = value.toString().split(delimiters);

            keyStr = tokens[0] + " " + tokens[1];
            pair.set(keyStr); // Key
            time.set(Double.parseDouble(tokens[2])); // Value
            context.write(pair, time);
        }
    }

    // This is the Reducer class
    // reference http://hadoop.apache.org/docs/r2.6.0/api/org/apache/hadoop/mapreduce/Reducer.html
    //
    public static class CDReducer
        extends Reducer<Text, DoubleWritable, Text, Text> {

        private Text retKey= new Text();
        private Text retValue= new Text();

        public void reduce(Text key, Iterable<DoubleWritable> values, Context context
                           ) throws IOException, InterruptedException {
            int cnt = 0; double sum = 0.0;
            for (DoubleWritable val : values) {
                sum += val.get();
                cnt += 1;
            }

            // calculate for return value
            String vStr = new String();
            double ans = sum / (double)cnt;
            vStr = Integer.toString(cnt) + " " + String.format("%.3f", ans);

            // generate result key
            retKey.set(key);

            // generate result value
            retValue.set(vStr);

            // write answer
            context.write(retKey, retValue);
        }
    }

    public static void deletePath(String path) throws IOException{
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path f = new Path(path);
        if(fs.exists(f)) fs.delete(f, true);
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: Hw2Part1 <in> <out>");
            System.exit(2);
        }

        // deletePath(otherArgs[otherArgs.length - 1]) 
        // Perhaps... Teacher's script has done this...

        Job job = Job.getInstance(conf, "Mapr_Average");

        job.setJarByClass(Hw2Part1.class);
        job.setMapperClass(CDMapper.class);
        job.setReducerClass(CDReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(DoubleWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job,
            new Path(otherArgs[otherArgs.length - 1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}
發佈了295 篇原創文章 · 獲贊 81 · 訪問量 44萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章