原文地址:http://chenxiaoqiong.com/articles/mapreduce2/
看了 MapReduce實例(一),應該對mapreduce有了基本瞭解,試着自己去實現下面的例子,相信你會有收穫的。
實例需求
將輸入文件中的數字進行排序,要求輸出文件中輸出序號、數字。
輸入文件
1
999
24
12
45
輸出文件
1 1
2 12
3 24
4 45
5 999
設計思路
熟悉MapReduce過程的讀者會很快想到在MapReduce過程中就有排序,我們可以利用IntWritable排序規則,map按數字大小對key進行排序,reduce拿到key,循環value-list之後,將行號作爲序號,輸入的key作爲value輸出。
代碼實現
代碼已上傳至我的git:https://github.com/chenxiaoqiong/sortMapReduce
主要代碼:
/**
* <h1> MapReduce實例(二) </h1>
* SortMapReduce:對輸入數字進行排序輸出
* Created by chenxiaoqiong on 2017/3/27 0017 下午 2:14.
*/
public class SortMapReduce extends Configured implements Tool {
/**
* map:處理輸入文件:按輸入數字排序輸出(數字 1)
*/
public static class SortMapper
extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
private final static IntWritable ints = new IntWritable(1);
private IntWritable keyword = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line=value.toString();
keyword.set(Integer.parseInt(line));
// void write(KEYOUT var1, VALUEOUT var2) 此方法會按KEYOUT var1自動排序
context.write(keyword, ints);
}
}
/**
* reduce:輸出序號和map排序好的數字(序號 數字)
*/
public static class SortReducer
extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private IntWritable linenum = new IntWritable(1);
@Override
protected void reduce(IntWritable key, Iterable<IntWritable> value, Context context)
throws IOException, InterruptedException {
for(IntWritable val:value){
context.write(linenum, key);
linenum = new IntWritable(linenum.get()+1);
}
}
}
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//獲取配置文件:
Configuration conf = super.getConf();
//創建job:
Job job = Job.getInstance(conf, this.getClass().getSimpleName());
job.setJarByClass(SortMapReduce.class);
//配置作業:
// Input --> Map --> Reduce --> Output
// Input:
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job, inPath);
//FileInputFormat過程會將文件處理(Format)成 <偏移量,每一行內容> 的key value對。
//Map 設置Mapper類,設置Mapper類輸出的Key、Value的類型:
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(IntWritable.class);
//Reduce 設置Reducer類, 設置最終輸出的 Key、Value的類型(setOutputKeyClass、setOutputValueClass):
job.setReducerClass(SortReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//Output 設置輸出路徑
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
//提交任務
boolean isSucess = job.waitForCompletion(true);
return isSucess ? 1 : 0; //成功返回1 ,失敗返回0
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
int status = ToolRunner.run(conf, new SortMapReduce(), args);
System.exit(status);
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>hadoop</groupId>
<artifactId>countMapReduce</artifactId>
<version>1.0-SNAPSHOT</version>
<repositories>
<repository>
<id>apache</id>
<url>http://maven.apache.org</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<configuration>
<excludeTransitive>false</excludeTransitive>
<stripVersion>true</stripVersion>
<outputDirectory>./lib</outputDirectory>
</configuration>
</plugin>
</plugins>
</build>
</project>