Shuffle分区及排序

原創

2019-07-30 02:59

所有的思路都在思维导图上，在这里直接实战进行分区和全排序

//编写Bean对象
package flow1;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements WritableComparable<FlowBean>{

    private int sumFlow;//总流量

    public FlowBean(){}

    public FlowBean(int sumFlow) {
        this.sumFlow = sumFlow;
    }

    //比较
    public int compareTo(FlowBean a) {
        if(sumFlow>a.getSumFlow()){
            return -1;
        }else if(sumFlow<a.getSumFlow()){
            return 1;
        }else {
            return 0;
        }
    }

    //序列化
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(sumFlow);
    }

    //反序列化
    public void readFields(DataInput dataInput) throws IOException {
        sumFlow=dataInput.readInt();
    }

    public int getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(int sumFlow) {
        this.sumFlow = sumFlow;
    }

    @Override
    public String toString() {
        return ""+sumFlow;
    }
}

//编写Mapper
package flow1;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class FlowMapper extends Mapper<LongWritable,Text,FlowBean,Text>{

    FlowBean k=new FlowBean();
    Text v=new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String s = value.toString();
        String[] splits = s.split(" ");
        k.setSumFlow(Integer.parseInt(splits[0]));
        v.set(splits[1]);
        context.write(k,v);
    }
}

//编写Reduce
package flow1;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class FlowReduce extends Reducer<FlowBean,Text,Text,Text> {

    @Override
    protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        for(Text v:values){
            context.write(new Text(key.toString()),v);
        }

    }
}

//编写分区相关类
package flow1;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class FlowPartition extends Partitioner<FlowBean,Text> {

    public int getPartition(FlowBean flowBean, Text text, int i) {
        String s = text.toString();
        System.out.println(s.substring(0,2));
        if(s.substring(0,3).equals("138")){
            return 0;//分区0
        }else if(s.substring(0,3).equals("135")){
            return 1;
        }else if(s.substring(0,3).equals("151")){
            return 2;
        }else {
            return 3;
        }
    }
}

//编写驱动类
package flow1;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class FlowDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        args=new String[]{"F:/Test/num.txt","F:/Test/output"};
        //创建配置文件
        Configuration con=new Configuration();
        //获取Job对象
        Job job=Job.getInstance(con);
        //设置指定的jar包
        job.setJarByClass(FlowDriver.class);
        //指定MR文件
        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReduce.class);

        //指定Map的输出类
        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(Text.class);

        //设置分区
        job.setNumReduceTasks(4);
        //指定分区类
        job.setPartitionerClass(FlowPartition.class);

        //设置总的输出
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //设置输出/入文件
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        //提交作业
        job.submit();
    }
}

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Shuffle分区及排序

SpringBoot開發中實現對前端返回數據的一致及錯誤異常統一處理

使用mybatis-generator自動生成Mybatis

SSM整合(Maven)筆記

com.alibaba.druid.pool.DruidDataSource : create connection error, url: jdbc:mysql:// java.sql.SQLE

SpringBoot引入durid時報錯

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結