1: 數據來源:
基於美國民航航班1987年數據,開發MapReduce應用程序計算其中某一年各個航班的飛行數據。
數據式樣
Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay
1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA
1987,10,15,4,729,730,903,849,PS,1451,NA,94,79,NA,14,-1,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA……
注意數據的以下字段:
第4列爲星期值
第9列爲航班代號
第10列爲航班號
2: 代碼實現目標
編寫MapReduce應用程序,統計一個星期中每天的航班飛行架次,以及各航班飛行總里程,將結果保存在兩個文本文件中。
3:實現代碼
package org.apache.flight;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class FlightWeekDist {
// 分析航班的每週星期的航班次數
public static class FlightNumMapper extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text dateofWeek = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
try {
int year = Integer.parseInt(fields[0]); //filter first raw
} catch (NumberFormatException e) { return;}
dateofWeek.set(fields[3]); // date of week
context.write(dateofWeek, one);
}
}
// 分析航班中每一個航班每週的航程裏數的map函數
public static class FlightMilesMapper extends Mapper<Object, Text, Text, IntWritable>{
private IntWritable Miles = new IntWritable();
private Text FlightNum = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
try {
int year = Integer.parseInt(fields[0]); //filter first raw
} catch (NumberFormatException e) { return;}
String flight = fields[8]+fields[9];
FlightNum.set(flight); // class name
int miles = 0;
try {
miles = Integer.parseInt(fields[18]); //filter first raw
} catch (NumberFormatException e) { }
Miles.set(miles);
context.write(FlightNum, Miles);
}
}
//Reduce函數 兩個map公用
public static class FlightSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
//如果文件夾存在,則刪除文件夾
private static void removeOutputPath(Configuration conf,
String output1, String output2) throws IOException {
FileSystem hdfs = FileSystem.get(conf);
Path path = new Path(output1);
hdfs.delete(path, true);
path = new Path(output2);
hdfs.delete(path, true);
}
//創建航班數目的工作
private static Job createFlightNumJob(Configuration conf,
String input, String output) throws IOException {
Job job = new Job(conf, "Flight Numbers");
job.setJarByClass(FlightWeekDist.class);
job.setMapperClass(FlightNumMapper.class);
job.setCombinerClass(FlightSumReducer.class);
job.setReducerClass(FlightSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
return job;
}
private static Job createFlightMilesJob(Configuration conf,
String input, String output) throws IOException {
Job job = new Job(conf, "Flight Milse");
job.setJarByClass(FlightWeekDist.class);
job.setMapperClass(FlightMilesMapper.class);
job.setCombinerClass(FlightSumReducer.class);
job.setReducerClass(FlightSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(input));
FileOutputFormat.setOutputPath(job, new Path(output));
return job;
}
//主函數
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: ScoreAnalysis <in> <out1> <out2>");
System.exit(2);
}
removeOutputPath(conf, otherArgs[1], otherArgs[2]);
Job job = createFlightNumJob(conf, otherArgs[0], otherArgs[1]);
job.waitForCompletion(true);
job = createFlightMilesJob(conf, otherArgs[0], otherArgs[2]);
job.waitForCompletion(true);
}
}