MapReduce分布式离线计算框架学习摘要,时间温度排序(二)

对温度进行排序实例(实例来源于《从零开始学Hadoop大数据分析》)
如有下温度数据,根据这些温度信息找出每年每月3个最高温度的年月及温度并按照温度进行降序排列
2010-01-01 12:00:21    8
2010-01-02 12:00:21    12
2010-01-03 12:00:21    10
2010-01-04 12:00:21    8
2010-01-05 12:00:21    8
2010-01-06 12:00:21    8
2010-01-07 12:00:21    8
2010-02-01 12:00:21    8
2010-02-02 12:00:21    12
2010-02-03 12:00:21    10
2010-02-04 12:00:21    8
2010-02-05 12:00:21    8
2010-02-06 12:00:21    8
2010-02-07 12:00:21    8
2010-03-01 12:00:21    8
2010-03-02 12:00:21    12
2010-03-03 12:00:21    10
2010-03-04 12:00:21    8
2010-03-05 12:00:21    8
2010-03-06 12:00:21    8
2010-03-07 12:00:21    8
2011-01-01 12:00:21    8
2011-01-02 12:00:21    12
2011-01-03 12:00:21    10
2011-01-04 12:00:21    8
2011-01-05 12:00:21    8
2011-01-06 12:00:21    8
2011-01-07 12:00:21    8
2011-02-01 12:00:21    8
2011-02-02 12:00:21    12
2011-02-03 12:00:21    10
2011-02-04 12:00:21    8
2011-02-05 12:00:21    8
2011-02-06 12:00:21    8
2011-02-07 12:00:21    8
2011-03-01 12:00:21    8
2011-03-02 12:00:21    12
2011-03-03 12:00:21    10
2011-03-04 12:00:21    8
2011-03-05 12:00:21    8
2011-03-06 12:00:21    8
2011-03-07 12:00:21    8
2012-01-01 12:00:21    8
2012-01-02 12:00:21    12
2012-01-03 12:00:21    10
2012-01-04 12:00:21    8
2012-01-05 12:00:21    8
2012-01-06 12:00:21    8
2012-01-07 12:00:21    8
2012-02-01 12:00:21    8
2012-02-02 12:00:21    12
2012-02-03 12:00:21    10
2012-02-04 12:00:21    8
2012-02-05 12:00:21    8
2012-02-06 12:00:21    8
2012-02-07 12:00:21    8
2012-03-01 12:00:21    8
2012-03-02 12:00:21    12
2012-03-03 12:00:21    10
2012-03-04 12:00:21    8
2012-03-05 12:00:21    8
2012-03-06 12:00:21    8
2012-03-07 12:00:21    8
2013-01-01 12:00:21    8
2013-01-02 12:00:21    12
2013-01-03 12:00:21    10
2013-01-04 12:00:21    8
2013-01-05 12:00:21    8
2013-01-06 12:00:21    8
2013-01-07 12:00:21    8
2013-02-01 12:00:21    8
2013-02-02 12:00:21    12
2013-02-03 12:00:21    10
2013-02-04 12:00:21    8
2013-02-05 12:00:21    8
2013-02-06 12:00:21    8
2013-02-07 12:00:21    8
2013-03-01 12:00:21    8
2013-03-02 12:00:21    12
2013-03-03 12:00:21    10
2013-03-04 12:00:21    8
2013-03-05 12:00:21    8
2013-03-06 12:00:21    8
2013-03-07 12:00:21    8
 
注:年月日时分秒后有一个tab键后面再跟上一个温度值
对时间和温度的封装类
MyKey 
/**
* 封装年月及温度,实现序列化与反序列化
*/
public class MyKey implements WritableComparable {


    private int year;   //年
    private int month;  //月
    private double t;   //温度


    //getter及setter方法
    public int getYear() {
        return year;
    }


    public void setYear(int year) {
        this.year = year;
    }


    public int getMonth() {
        return month;
    }


    public void setMonth(int month) {
        this.month = month;
    }


    public double getT() {
        return t;
    }


    public void setT(double t) {
        this.t = t;
    }




    @Override
    public int compareTo(Object o) {
        return this==o?0:-1;
    }


    @Override
    public void write(DataOutput dataOutput) throws IOException {
        //序列化过程
        dataOutput.writeInt(year);
        dataOutput.writeInt(month);
        dataOutput.writeDouble(t);
    }


    @Override
    public void readFields(DataInput dataInput) throws IOException {
        //反序列化
        year = dataInput.readInt();
        month = dataInput.readInt();
        t = dataInput.readDouble();
    }
}

 

 
Mapper任务MyMapper
/**
* 这个类把数据解析为key-value的形式
* 这里输入的是key和value都是Text类型,把年、月进行切割后,输出为封装后的MyKey,温度是Text
*/
public class MyMapper extends Mapper<Text,Text,MyKey,Text> {
    @Override
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
        //年月日通过-分割
        String[] strArray = key.toString().split("-");
        //对MyKey进行封装
        MyKey myKey = new MyKey();
        myKey.setYear(Integer.parseInt(strArray[0]));
        myKey.setMonth(Integer.parseInt(strArray[0]));
        myKey.setT(Double.parseDouble(value.toString()));
        context.write(myKey,new Text(key.toString() + "\t" + value));
    }
}

 

 
数据分组MyGroup
public class MyGroup extends WritableComparator {


    //继承WritableComparator类来实现排序
    public MyGroup(){
        super(MyKey.class,true);
    }


    @Override
    public int compare(WritableComparable a, WritableComparable b) {


        MyKey myKey1 = (MyKey) a;
        MyKey myKey2 = (MyKey) b;
        //以年做对比,如果在同一年则返回所在月份,不在同一年则返回比较结果
        int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
        if(r1 == 0){
            //同年
            return Integer.compare(myKey1.getMonth(),myKey2.getMonth());
        }
        //非同年
        return r1;
    }
}

 

 
排序类MySort
public class MySort extends WritableComparator {


    public MySort(){
        super(MyKey.class,true);
    }


    @Override
    public int compare(WritableComparable a, WritableComparable b) {


        //通过MyKey进行排序处理分组合并
        MyKey myKey1 = (MyKey) a;
        MyKey myKey2 = (MyKey) b;


        //以年作为比较
        int r1 = Integer.compare(myKey1.getYear(),myKey2.getYear());
        if(r1 == 0){
            //同年,则比较月,年不同则返回年的比较结果
            int r2 = Integer.compare(myKey1.getMonth(),myKey2.getMonth());
            if(r2 == 0){
                //月相等则把温度倒序排,月不同则返回月的比较结果
                return -Double.compare(myKey1.getT(),myKey2.getT());
            }
            return r2;
        }
        return r1;
    }
}

 

 
数据分区MyPartitioner
/**
* 分区,用来控制Reducer的数量
*/
public class MyPartitioner extends Partitioner<MyKey,Text> {


    @Override
    public int getPartition(MyKey myKey, Text text, int i) {
        //以年份作为分区
        return myKey.getYear()%i;
    }
}

Reducer任务MyReducer
public class MyReducer extends Reducer<MyKey,Text,NullWritable,Text> {
    //取出前三个
    @Override
    protected void reduce(MyKey key, Iterable<Text> values, Context context) throws IOException, InterruptedException {


        int sum = 0;    //这是一个计数器
        for(Text t:values){
            sum++;
            //如果大于3则跳出来
            if(sum > 3){
                break;
            } else {
                context.write(NullWritable.get(),t);
            }
        }
    }
}

 

主函数RunJob
public class RunJob {


    public static void main(String[] args) {
        Configuration conf = new Configuration();
        //NameNode的入口
        conf.set("fs.defaultFS","hdfs://192.168.2.4:8020");
        FileSystem fs = null;
        try {
            fs = FileSystem.get(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }


        Job job = null;
        try {
            //定义任务
            job = Job.getInstance(conf,"weather");
        } catch (IOException e) {
            e.printStackTrace();
        }
        //主方法
        job.setJarByClass(RunJob.class);
        //mapper方法
        job.setMapperClass(MyMapper.class);
        //InputFormat方法
        job.setInputFormatClass(KeyValueTextInputFormat.class);
        //Reducer方法
        job.setReducerClass(MyReducer.class);
        //Partitioner方法
        job.setPartitionerClass(MyPartitioner.class);
        //SortComparator方法
        job.setSortComparatorClass(MySort.class);
        //GroupingComparator方法
        job.setGroupingComparatorClass(MyGroup.class);


        //Reducer Text的数量
        job.setNumReduceTasks(3);
        //Map输出key类型
        job.setOutputKeyClass(MyKey.class);
        //Map输出value类型
        job.setOutputValueClass(Text.class);


        //读取文件的位置
        File f = new File("ETLDemo2\\temp");
        //System.out.println(f.getAbsolutePath());
        Path inpuPath = new Path("/usr/input/data/weather");
        Path path = new Path(f.getAbsolutePath());
        try {
            //创建目录(目录不存在时创建)
            if(!fs.exists(inpuPath)){
                fs.mkdirs(inpuPath);
            }
            //上传文件(文件不存在时上传)
            Path filePath = new Path(inpuPath.toString() + "/temp");
            if(!fs.exists(filePath)) {
                fs.copyFromLocalFile(path, filePath);
            }
            FileInputFormat.addInputPath(job,inpuPath);
        } catch (IOException e) {
            e.printStackTrace();
        }


        try {
            //输出文件位置
            Path outPath = new Path("/usr/output/data/weather");
            if(fs.exists(outPath)){
                fs.delete(outPath,true);
            }
            FileOutputFormat.setOutputPath(job,outPath);
        } catch (IOException e) {
            e.printStackTrace();
        }


        try {
            job.waitForCompletion(true);
        } catch (Exception e) {
            e.printStackTrace();
        }


    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章