MapReduce分佈式離線計算框架學習摘要,時間溫度排序(二)

對溫度進行排序實例(實例來源於《從零開始學Hadoop大數據分析》)
如有下溫度數據,根據這些溫度信息找出每年每月3個最高溫度的年月及溫度並按照溫度進行降序排列
2010-01-01 12:00:21    8
2010-01-02 12:00:21    12
2010-01-03 12:00:21    10
2010-01-04 12:00:21    8
2010-01-05 12:00:21    8
2010-01-06 12:00:21    8
2010-01-07 12:00:21    8
2010-02-01 12:00:21    8
2010-02-02 12:00:21    12
2010-02-03 12:00:21    10
2010-02-04 12:00:21    8
2010-02-05 12:00:21    8
2010-02-06 12:00:21    8
2010-02-07 12:00:21    8
2010-03-01 12:00:21    8
2010-03-02 12:00:21    12
2010-03-03 12:00:21    10
2010-03-04 12:00:21    8
2010-03-05 12:00:21    8
2010-03-06 12:00:21    8
2010-03-07 12:00:21    8
2011-01-01 12:00:21    8
2011-01-02 12:00:21    12
2011-01-03 12:00:21    10
2011-01-04 12:00:21    8
2011-01-05 12:00:21    8
2011-01-06 12:00:21    8
2011-01-07 12:00:21    8
2011-02-01 12:00:21    8
2011-02-02 12:00:21    12
2011-02-03 12:00:21    10
2011-02-04 12:00:21    8
2011-02-05 12:00:21    8
2011-02-06 12:00:21    8
2011-02-07 12:00:21    8
2011-03-01 12:00:21    8
2011-03-02 12:00:21    12
2011-03-03 12:00:21    10
2011-03-04 12:00:21    8
2011-03-05 12:00:21    8
2011-03-06 12:00:21    8
2011-03-07 12:00:21    8
2012-01-01 12:00:21    8
2012-01-02 12:00:21    12
2012-01-03 12:00:21    10
2012-01-04 12:00:21    8
2012-01-05 12:00:21    8
2012-01-06 12:00:21    8
2012-01-07 12:00:21    8
2012-02-01 12:00:21    8
2012-02-02 12:00:21    12
2012-02-03 12:00:21    10
2012-02-04 12:00:21    8
2012-02-05 12:00:21    8
2012-02-06 12:00:21    8
2012-02-07 12:00:21    8
2012-03-01 12:00:21    8
2012-03-02 12:00:21    12
2012-03-03 12:00:21    10
2012-03-04 12:00:21    8
2012-03-05 12:00:21    8
2012-03-06 12:00:21    8
2012-03-07 12:00:21    8
2013-01-01 12:00:21    8
2013-01-02 12:00:21    12
2013-01-03 12:00:21    10
2013-01-04 12:00:21    8
2013-01-05 12:00:21    8
2013-01-06 12:00:21    8
2013-01-07 12:00:21    8
2013-02-01 12:00:21    8
2013-02-02 12:00:21    12
2013-02-03 12:00:21    10
2013-02-04 12:00:21    8
2013-02-05 12:00:21    8
2013-02-06 12:00:21    8
2013-02-07 12:00:21    8
2013-03-01 12:00:21    8
2013-03-02 12:00:21    12
2013-03-03 12:00:21    10
2013-03-04 12:00:21    8
2013-03-05 12:00:21    8
2013-03-06 12:00:21    8
2013-03-07 12:00:21    8
 
注:年月日時分秒後有一個tab鍵後面再跟上一個溫度值
對時間和溫度的封裝類
MyKey 
/**
* 封裝年月及溫度,實現序列化與反序列化
*/
public class MyKey implements WritableComparable {


    private int year;   //年
    private int month;  //月
    private double t;   //溫度


    //getter及setter方法
    public int getYear() {
        return year;
    }


    public void setYear(int year) {
        this.year = year;
    }


    public int getMonth() {
        return month;
    }


    public void setMonth(int month) {
        this.month = month;
    }


    public double getT() {
        return t;
    }


    public void setT(double t) {
        this.t = t;
    }




    @Override
    public int compareTo(Object o) {
        return this==o?0:-1;
    }


    @Override
    public void write(DataOutput dataOutput) throws IOException {
        //序列化過程
        dataOutput.writeInt(year);
        dataOutput.writeInt(month);
        dataOutput.writeDouble(t);
    }


    @Override
    public void readFields(DataInput dataInput) throws IOException {
        //反序列化
        year = dataInput.readInt();
        month = dataInput.readInt();
        t = dataInput.readDouble();
    }
}

 

 
Mapper任務MyMapper
/**
* 這個類把數據解析爲key-value的形式
* 這裏輸入的是key和value都是Text類型,把年、月進行切割後,輸出爲封裝後的MyKey,溫度是Text
*/
public class MyMapper extends Mapper<Text,Text,MyKey,Text> {
    @Override
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
        //年月日通過-分割
        String[] strArray = key.toString().split("-");
        //對MyKey進行封裝
        MyKey myKey = new MyKey();
        myKey.setYear(Integer.parseInt(strArray[0]));
        myKey.setMonth(Integer.parseInt(strArray[0]));
        myKey.setT(Double.parseDouble(value.toString()));
        context.write(myKey,new Text(key.toString() + "\t" + value));
    }
}

 

 
數據分組MyGroup
public class MyGroup extends WritableComparator {


    //繼承WritableComparator類來實現排序
    public MyGroup(){
        super(MyKey.class,true);
    }


    @Override
    public int compare(WritableComparable a, WritableComparable b) {


        MyKey myKey1 = (MyKey) a;
        MyKey myKey2 = (MyKey) b;
        //以年做對比,如果在同一年則返回所在月份,不在同一年則返回比較結果
        int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
        if(r1 == 0){
            //同年
            return Integer.compare(myKey1.getMonth(),myKey2.getMonth());
        }
        //非同年
        return r1;
    }
}

 

 
排序類MySort
public class MySort extends WritableComparator {


    public MySort(){
        super(MyKey.class,true);
    }


    @Override
    public int compare(WritableComparable a, WritableComparable b) {


        //通過MyKey進行排序處理分組合並
        MyKey myKey1 = (MyKey) a;
        MyKey myKey2 = (MyKey) b;


        //以年作爲比較
        int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
        if(r1 == 0){
            //同年,則比較月,年不同則返回年的比較結果
            int r2 = Integer.compare(myKey1.getMonth(),myKey2.getMonth());
            if(r2 == 0){
                //月相等則把溫度倒序排,月不同則返回月的比較結果
                return -Double.compare(myKey1.getT(),myKey2.getT());
            }
            return r2;
        }
        return r1;
    }
}

 

 
數據分區MyPartitioner
/**
* 分區,用來控制Reducer的數量
*/
public class MyPartitioner extends Partitioner<MyKey,Text> {


    @Override
    public int getPartition(MyKey myKey, Text text, int i) {
        //以年份作爲分區
        return myKey.getYear()%i;
    }
}

Reducer任務MyReducer
public class MyReducer extends Reducer<MyKey,Text,NullWritable,Text> {
    //取出前三個
    @Override
    protected void reduce(MyKey key, Iterable<Text> values, Context context) throws IOException, InterruptedException {


        int sum = 0;    //這是一個計數器
        for(Text t:values){
            sum++;
            //如果大於3則跳出來
            if(sum > 3){
                break;
            } else {
                context.write(NullWritable.get(),t);
            }
        }
    }
}

 

主函數RunJob
public class RunJob {


    public static void main(String[] args) {
        Configuration conf = new Configuration();
        //NameNode的入口
        conf.set("fs.defaultFS","hdfs://192.168.2.4:8020");
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }


        Job job = null;
        try {
            //定義任務
            job = Job.getInstance(conf,"weather");
        } catch (IOException e) {
            e.printStackTrace();
        }
        //主方法
        job.setJarByClass(RunJob.class);
        //mapper方法
        job.setMapperClass(MyMapper.class);
        //InputFormat方法
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        //Reducer方法
        job.setReducerClass(MyReducer.class);
        //Partitioner方法
        job.setPartitionerClass(MyPartitioner.class);
        //SortComparator方法
        job.setSortComparatorClass(MySort.class);
        //GroupingComparator方法
        job.setGroupingComparatorClass(MyGroup.class);


        //Reducer Text的數量
        job.setNumReduceTasks(3);
        //Map輸出key類型
        job.setOutputKeyClass(MyKey.class);
        //Map輸出value類型
        job.setOutputValueClass(Text.class);


        //讀取文件的位置
        File f = new File("ETLDemo2\\temp");
        //System.out.println(f.getAbsolutePath());
        Path inpuPath = new Path("/usr/input/data/weather");
        Path path = new Path(f.getAbsolutePath());
        try {
            //創建目錄(目錄不存在時創建)
            if(!fs.exists(inpuPath)){
                fs.mkdirs(inpuPath);
            }
            //上傳文件(文件不存在時上傳)
            Path filePath = new Path(inpuPath.toString() + "/temp");
            if(!fs.exists(filePath)) {
                fs.copyFromLocalFile(path, filePath);
            }
            FileInputFormat.addInputPath(job,inpuPath);
        } catch (IOException e) {
            e.printStackTrace();
        }


        try {
            //輸出文件位置
            Path outPath = new Path("/usr/output/data/weather");
            if(fs.exists(outPath)){
                fs.delete(outPath,true);
            }
            FileOutputFormat.setOutputPath(job,outPath);
        } catch (IOException e) {
            e.printStackTrace();
        }


        try {
            job.waitForCompletion(true);
        } catch (Exception e) {
            e.printStackTrace();
        }


    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章