Hadoop MapReduce程序分析飛機航班信息源代碼

1: 數據來源:
基於美國民航航班1987年數據,開發MapReduce應用程序計算其中某一年各個航班的飛行數據。
數據式樣
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA
1987,10,15,4,729,730,903,849,PS,1451,NA,94,79,NA,14,-1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA……
注意數據的以下字段:
第4列爲星期值
第9列爲航班代號
第10列爲航班號

2: 代碼實現目標
編寫MapReduce應用程序,統計一個星期中每天的航班飛行架次,以及各航班飛行總里程,將結果保存在兩個文本文件中。

3:實現代碼

package org.apache.flight;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class FlightWeekDist {

    // 分析航班的每週星期的航班次數
    public static class FlightNumMapper extends Mapper<Object, Text, Text, IntWritable>{
        private final static IntWritable one = new IntWritable(1);
        private Text dateofWeek = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                    String[] fields = value.toString().split(",");
                    try {
                     int  year = Integer.parseInt(fields[0]);  //filter first raw
                    } catch (NumberFormatException e) { return;}
                    dateofWeek.set(fields[3]);    // date of week
                    context.write(dateofWeek, one);
        }
    }
    // 分析航班中每一個航班每週的航程裏數的map函數
    public static class FlightMilesMapper extends Mapper<Object, Text, Text, IntWritable>{
        private IntWritable Miles = new IntWritable();
        private Text FlightNum = new Text();

        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                String[] fields = value.toString().split(",");
                try {
              int  year = Integer.parseInt(fields[0]);  //filter first raw
                } catch (NumberFormatException e) { return;}
                String flight = fields[8]+fields[9];
                FlightNum.set(flight);    // class name
        int miles = 0;
        try {
            miles = Integer.parseInt(fields[18]);  //filter first raw
                } catch (NumberFormatException e) { }
        Miles.set(miles);
        context.write(FlightNum, Miles);
        }
    }
    //Reduce函數 兩個map公用
    public static class FlightSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
   int sum = 0;
    for (IntWritable val : values) {
        sum += val.get();
            }
    result.set(sum);
    context.write(key, result);
        }
    }

//如果文件夾存在,則刪除文件夾
 private static void removeOutputPath(Configuration conf, 
        String output1, String output2) throws IOException {
        FileSystem hdfs = FileSystem.get(conf);
        Path path = new Path(output1);
        hdfs.delete(path, true);

        path = new Path(output2);
        hdfs.delete(path, true);
    }
//創建航班數目的工作
    private static Job createFlightNumJob(Configuration conf, 
            String input, String output) throws IOException {
        Job job = new Job(conf, "Flight Numbers");
        job.setJarByClass(FlightWeekDist.class);

        job.setMapperClass(FlightNumMapper.class);
        job.setCombinerClass(FlightSumReducer.class);
        job.setReducerClass(FlightSumReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));

        return job;
    }

    private static Job createFlightMilesJob(Configuration conf, 
            String input, String output) throws IOException {
        Job job = new Job(conf, "Flight Milse");
        job.setJarByClass(FlightWeekDist.class);

        job.setMapperClass(FlightMilesMapper.class);
        job.setCombinerClass(FlightSumReducer.class);
        job.setReducerClass(FlightSumReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(input));
        FileOutputFormat.setOutputPath(job, new Path(output));

        return job;
    }
   //主函數 
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        if (otherArgs.length != 3) {
            System.err.println("Usage: ScoreAnalysis <in> <out1> <out2>");
            System.exit(2);
        }

        removeOutputPath(conf, otherArgs[1], otherArgs[2]);
        Job job = createFlightNumJob(conf, otherArgs[0], otherArgs[1]);
        job.waitForCompletion(true);

        job = createFlightMilesJob(conf, otherArgs[0], otherArgs[2]);
        job.waitForCompletion(true);
    }

}
發佈了20 篇原創文章 · 獲贊 30 · 訪問量 15萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章