adoop MR mapreduce和序列化

mapreduce

分三部分

mapper
reducer
driver
仿寫 wordCount

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-03-31 20:48
 */
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    private Text text = new Text();
    private IntWritable count = new IntWritable(1);
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        String[] words = line.split(" ");

        for (String word : words) {
            text.set(word);
            context.write(text, count);
        }
    }
}

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-03-31 21:19
 */
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    private IntWritable value = new IntWritable();

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable i : values) {
            sum = sum+ i.get();
        }
        value.set(sum);
        context.write(key, value);

    }
}

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-03-31 21:33
 */
public class WordCountDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    	//創建 job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
		//指定 job 啓動的 main class
        job.setJarByClass(WordCountDriver.class);
		//指定 job 運行的 mapper 和 reducer
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
		//指定 mapper 的輸出
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
		//指定reducer 的輸出
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
		//指定文件的輸入和輸出路徑
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
		//job 開始執行
        boolean result = job.waitForCompletion(true);
        System.out.println(result?0:1);
    }
}

項目配置 maven 打包 jar

	<!-- pom 文件增加打包插件-->
    <build>
        <pluginManagement>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-jar-plugin</artifactId>
                    <version>2.4</version>
                    <configuration>
                        <archive>
                            <manifest>
                                <addClasspath>true</addClasspath>
                                <classpathPrefix>lib/</classpathPrefix>
                                <!--指定 main 類 全限定名-->
                                <mainClass>com.zcz.study.hadoop.mapreduce.WordCountDriver</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>

mvn clean package

hadoop jar customerWorkCount.jar com.zcz.study.hadoop.mapreduce.WordCountDriver /user/zcz/input/ /user/zcz/output/

序列化

hadoop 中 mapper 與 reducer 輸出的對象都需要是 hadoop 序列化的對象實現Writable
mapper 與 reducer 輸出的 key 需要是可比較的實現 Comparable
Mapeduce 中對象可實現WritableComparable接口同時支持序列化和比較
mapper過程會對 key 進行排序, 所以 value 可以不需要比較

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-04-01 20:53
 */
public class Flow implements WritableComparable<Flow> {
    private Long upFlow;

    private Long downFlow;

    private Long sumFlow;

    public Long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(Long upFlow) {
        this.upFlow = upFlow;
    }

    public Long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(Long downFlow) {
        this.downFlow = downFlow;
    }

    public Long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(Long sumFlow) {
        this.sumFlow = sumFlow;
    }

    public Flow(Long upFlow, Long downFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = this.upFlow + this.downFlow;
    }

    public Flow() {
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeLong(upFlow);
        out.writeLong(downFlow);
        out.writeLong(sumFlow);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.upFlow = in.readLong();
        this.downFlow = in.readLong();
        this.sumFlow = in.readLong();
    }

    @Override
    public String toString() {
        return upFlow + "\t" +
                downFlow + "\t" +
                sumFlow ;
    }

    @Override
    public int compareTo(Flow o) {
        return this.sumFlow> o.getSumFlow()?1:-1;
    }
}

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-04-01 20:59
 */
public class FlowMapper extends Mapper<LongWritable, Text, Text, Flow> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] strs = value.toString().split("\t");

        Flow flow = new Flow(Long.valueOf(strs[3]), Long.valueOf(strs[4]));
        context.write(new Text(strs[1]), flow);

    }
}

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-04-01 21:09
 */
public class FlowReducer extends Reducer<Text, Flow,Text, Flow> {
    @Override
    protected void reduce(Text key, Iterable<Flow> values, Context context) throws IOException, InterruptedException {
        Long up = 0L;
        Long down = 0L;
        Long sum = 0L;
        for (Flow flow : values) {
            up = up + flow.getUpFlow();
            down = down + flow.getDownFlow();
            sum = sum + flow.getSumFlow();
        }
        context.write(key, new Flow(up, down));
    }
}

/**
 * <h3>study-all</h3>
 *
 * <p></p>
 *
 * @Author zcz
 * @Date 2020-04-01 21:15
 */
public class SerializeDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(SerializeDriver.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Flow.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Flow.class);

        job.setMapperClass(FlowMapper.class);
        job.setReducerClass(FlowReducer.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        boolean result = job.waitForCompletion(true);

        System.out.println(result? 0 : 1 );
    }

}

java 類型對應 mapred 序列化類型

Java類型	Hadoop Writable類型
boolean	BooleanWritable
byte	ByteWritable
int	IntWritable
float	FloatWritable
long	LongWritable
double	DoubleWritable
String	Text
map	MapWritable
array	ArrayWritable

adoop MR mapreduce和序列化

mapreduce

序列化

nginx+fastDFS+lua+graphicMagick部署

hadoop完全高可用配置

Hadoop MR Shuffle

springCloud-ribbon 源碼分析

hadoop hdfs NameNode 與 Secondary NameNode元數據操作

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結