內容:
假設一個年級有兩個班級,數據分別在class1.csv和class2.csv中,求該年級的數學成績平均值。數據第一列爲學號,第二列爲數學成績。 要求,必須使用Combiner類,且最後輸出一行數據,該行僅有一個平均值。
代碼實現:
1、Mapper
public class myMapper extends Mapper<LongWritable, Text, NullWritable, FloatWritable> {
FloatWritable v=new FloatWritable();
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
//從輸入數據中獲取每一個文件中的每一行的值,並且把文本內容轉換成String
String line=ivalue.toString();
//對每一行數據進行切分
String[] words=line.split(",");
//業務處理
float score=Float.parseFloat((words[1]));
v.set(score);
context.write(NullWritable.get(), v);
}
}
2、Reduce
public class myReducer extends Reducer<NullWritable, FloatWritable, NullWritable, FloatWritable> {
public void reduce(NullWritable n, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException {
// process values
float total=0;
int count=0;
//統計文本的數據數量和總和,求平均值
for (FloatWritable val : values) {
count++;
total +=val.get();
}
context.write(NullWritable.get(), new FloatWritable(total/count));
}
}
3、Driver
public class myDriver {
public static void main(String[] args) throws Exception {
//獲取配置信息,job對象實例
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//指定需要使用Combiner,以及用哪個類作爲Combiner的邏輯
job.setCombinerClass(myReducer.class);
job.setJarByClass(myDriver.class);
//指定業務job要使用mapper/Reduce業務類
// TODO: specify a mapper
job.setMapperClass(myMapper.class);
// TODO: specify a reducer
job.setReducerClass(myReducer.class);
//指定mapper輸出數據的K,V類型
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(FloatWritable.class);
//最終輸出數據的K,V類型
// TODO: specify output types
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(FloatWritable.class);
Path outpath=new Path("hdfs://192.168.8.129:9000/hdfstest/paixu/output1");
String inpath="hdfs://192.168.8.129:9000/hdfstest/paixu/";
String[] puts=new String[] {inpath+"class1.csv",inpath+"class2.csv"};
Path[] inpaths=new Path[puts.length];
for(int i=0;i<puts.length;i++) {
inpaths[i]=new Path(puts[i]);
}
//指定job的輸入原始文件所在目錄
// TODO: specify input and output DIRECTORIES (not files)
FileInputFormat.setInputPaths(job, inpaths);
FileOutputFormat.setOutputPath(job, outpath);
boolean result=job.waitForCompletion(true);
System.exit(result?0:1);
}
}
4、結果