前言:
MapReduce默認情況下,一個reducer產生一個文件,以name-r-nnnnn來命名,其中默認的name爲part,nnnnn從(00000開始遞增),保證了每個reducer不會產生重複的文件。
一、僅替代文件名part,輸出結果爲score-r-00000
1.使用org.apache.hadoop.mapreduce.lib.output.MultipleOutputs類
2.MultipleOutputs類需要在Reduce的setup()方法初始化,最好在cleanup()中關閉
3.這個時候還會生產成part-r-000000這種文件,發現是裏面是空的,需要LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
代碼樣例:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
/**
* Created by HuiQ on 2019-10-16.
*/
public class WordCount {
public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
for (String str: words){
word.set(str);
context.write(word,one);
}
}
}
public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private MultipleOutputs<Text, IntWritable> multipleOutputs;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
multipleOutputs = new MultipleOutputs<Text, IntWritable>(context);
}
@Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int total=0;
for (IntWritable val : values){
total++;
}
// 自定義輸出文件名
multipleOutputs.write(key, new IntWritable(total), "score");
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
multipleOutputs.close();
}
}
public static void main (String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 去掉臨時輸出目錄會生成part-r-00000或者part-m-00000的空文件
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // 注意:想全部自定義文件名這行一定不能有,否則最終生成的還是part-r-00000
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
二、要想全部自定義文件名,需要重寫RecordWriter
自定義reducer類輸出是通過重寫FileOutputFormat類和RecordWriter類實現的。具體操作是通過重寫RecordWriter類中的write方法,然後通過FileOutFormat類返回一個RecordWriter對象。
代碼樣例:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* Created by HuiQ on 2019-10-16.
*/
public class WordCount {
public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
for (String str: words){
word.set(str);
context.write(word,one);
}
}
}
public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int total=0;
for (IntWritable val : values){
total++;
}
context.write(key, new IntWritable(total));
}
}
// 注意:1.必須要把static關鍵字加上 2.FileOutputFormat<Text,IntWritable>中的數據類型一定要和reduce端<Text,IntWritable>輸出對應上
public static class MyFileOutputFormat extends FileOutputFormat<Text,IntWritable>{
@Override
public RecordWriter<Text, IntWritable> getRecordWriter(TaskAttemptContext job)throws IOException, InterruptedException {
FileSystem fileSystem=FileSystem.newInstance(job.getConfiguration());
//自定義的輸出路徑
final FSDataOutputStream title=fileSystem.create(new Path("/huiqiang/output/test.txt"));
RecordWriter<Text,IntWritable> recordWriter=new RecordWriter<Text, IntWritable>() {
@Override
public void close(TaskAttemptContext arg0) throws IOException,
InterruptedException {
if(title!=null){
title.close();
}
}
@Override
public void write(Text key, IntWritable value) throws IOException,
InterruptedException {
String fenGe=" ";
String charSet="UTF-8";
System.out.println("key="+key.toString());
//輸出key
title.write(key.toString().getBytes(charSet),0,key.toString().getBytes(charSet).length);
//輸出key和value的分隔符
title.write(fenGe.getBytes(charSet),0,fenGe.getBytes(charSet).length);
//輸出value
title.write(value.toString().getBytes(charSet),0,value.toString().getBytes(charSet).length);
title.write("\n".getBytes(charSet),0,"\n".getBytes(charSet).length);
title.flush();
}
};
return recordWriter;
}
}
public static void main (String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputFormatClass(MyFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
MyFileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
參考:https://blog.csdn.net/smallpizza/article/details/78060638