MapReduce實例（二）

原創

随心_

2018-08-26 18:55

原文地址：http://chenxiaoqiong.com/articles/mapreduce2/
看了 MapReduce實例（一），應該對mapreduce有了基本瞭解，試着自己去實現下面的例子，相信你會有收穫的。

實例需求

將輸入文件中的數字進行排序，要求輸出文件中輸出序號、數字。

輸入文件

輸出文件

設計思路

熟悉MapReduce過程的讀者會很快想到在MapReduce過程中就有排序，我們可以利用IntWritable排序規則，map按數字大小對key進行排序，reduce拿到key，循環value-list之後，將行號作爲序號，輸入的key作爲value輸出。

代碼實現

代碼已上傳至我的git：https://github.com/chenxiaoqiong/sortMapReduce
主要代碼：

/**
 * <h1> MapReduce實例（二） </h1>
 * SortMapReduce：對輸入數字進行排序輸出
 * Created by chenxiaoqiong on 2017/3/27 0017 下午 2:14.
 */
public class SortMapReduce extends Configured implements Tool {

    /**
     * map：處理輸入文件：按輸入數字排序輸出（數字 1）
     */
    public static class SortMapper
            extends Mapper<LongWritable, Text, IntWritable, IntWritable> {

        private final static IntWritable ints = new IntWritable(1);
        private IntWritable keyword = new IntWritable();

        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String line=value.toString();

            keyword.set(Integer.parseInt(line));

            // void write(KEYOUT var1, VALUEOUT var2) 此方法會按KEYOUT var1自動排序
            context.write(keyword, ints);
        }
    }

    /**
     * reduce：輸出序號和map排序好的數字（序號 數字）
     */
    public static class SortReducer
            extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

        private IntWritable linenum  = new IntWritable(1);

        @Override
        protected void reduce(IntWritable key, Iterable<IntWritable> value, Context context)
                throws IOException, InterruptedException {

            for(IntWritable val:value){

                context.write(linenum, key);

                linenum = new IntWritable(linenum.get()+1);

            }
        }
    }

    public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //獲取配置文件：
        Configuration conf = super.getConf();

        //創建job：
        Job job = Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(SortMapReduce.class);

        //配置作業：
        // Input --> Map --> Reduce --> Output
        // Input:
        Path inPath = new Path(args[0]);
        FileInputFormat.addInputPath(job, inPath);
        //FileInputFormat過程會將文件處理（Format）成 <偏移量,每一行內容> 的key value對。

        //Map  設置Mapper類，設置Mapper類輸出的Key、Value的類型：
        job.setMapperClass(SortMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        //Reduce  設置Reducer類， 設置最終輸出的 Key、Value的類型（setOutputKeyClass、setOutputValueClass）：
        job.setReducerClass(SortReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        //Output 設置輸出路徑
        Path outPath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outPath);

        //提交任務
        boolean isSucess = job.waitForCompletion(true);
        return isSucess ? 1 : 0;     //成功返回1 ，失敗返回0
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        int status = ToolRunner.run(conf, new SortMapReduce(), args);
        System.exit(status);
    }
}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>hadoop</groupId>
    <artifactId>countMapReduce</artifactId>
    <version>1.0-SNAPSHOT</version>

    <repositories>
        <repository>
            <id>apache</id>
            <url>http://maven.apache.org</url>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>1.2.1</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <artifactId>maven-dependency-plugin</artifactId>
                <configuration>
                    <excludeTransitive>false</excludeTransitive>
                    <stripVersion>true</stripVersion>
                    <outputDirectory>./lib</outputDirectory>
                </configuration>

            </plugin>
        </plugins>
    </build>
</project>

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

MapReduce實例（二）

實例需求

設計思路

代碼實現

釘釘打卡速度慢

Nginx R31 doc 官方文檔-01-nginx 如何安裝

Qt/C++音視頻開發74-合併標籤圖形/生成yolo運算結果圖形/文字和圖形合併成一個/水印濾鏡

挑戰程序設計競賽 2.2章習題 POJ - 3617 Best Cow Line 貪心

字節面試：MySQL什麼時候鎖表？如何防止鎖表？

.NET8連接SQL SERVER 2008 R2 報：證書鏈是由不受信任的頒發機構頒發的

golang開發環境搭建(win10)

python計算機視覺學習筆記——PIL庫的用法

Golang初學：獲取程序內存使用情況，std runtime

MapReduce實例（二）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結