MapReduce學習小結(二)

昨天時候重新敲了一遍wordcount.敲代碼挺好的~早上醒來,繼續開始複習.mapreduce很好用.早上練習了一個簡單的數據去重,一個求平均分,一個排序並且加數字編號.下面是代碼.其實都相似~多敲幾次就漸漸體會到了.

package demos;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/*數字排序並加序號 源數據：
2
32
654
32
15
756
65223
5956
22
650
92
26
54
6*/
public class AddNums {

    public static void main(String[] args) throws Exception{
        if(args.length!=2){
            System.err.println("user inpath err !");
            System.exit(-1);
        }
        @SuppressWarnings("deprecation")
        Job job=new Job(new Configuration(),"Paixu");
        job.setJarByClass(AddNums.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.setMapperClass(saMaps.class);
        job.setReducerClass(saReduce.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(true);

    }
    public static class saMaps extends Mapper<LongWritable, Text, IntWritable, NullWritable>{
        @Override
        protected void map(LongWritable key, Text value,
                Mapper<LongWritable, Text, IntWritable, NullWritable>.Context context)
                throws IOException, InterruptedException {
            String nums = value.toString();
            int n = Integer.parseInt(nums);
            context.write(new IntWritable(n), NullWritable.get());
        }
    }
    //shuffle 有一個自定義的排序.如果是數字升序排列。從小到大.如果是文字,就安字典順序
    public static class saReduce extends Reducer<IntWritable, NullWritable, IntWritable, IntWritable>{
        int Nums=0;
        @Override
        protected void reduce(IntWritable key, Iterable<NullWritable> value,
                Reducer<IntWritable, NullWritable, IntWritable, IntWritable>.Context context)
                throws IOException, InterruptedException {
            Nums++;
            context.write(new IntWritable(Nums), key);
        }
    }

}

package demos;
/*求平均分
 張三 98
李四 96
王五 95
張三 90
李四 92
王五 99
張三 80
李四 90
王五 94
張三 82
李四 92*/
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Avgs {

    public static void main(String[] args) throws Exception {
        if (args.length!=2) {
            System.err.println("user infos err: <inpath>,<outpath>");
            System.exit(-1);
        }
        @SuppressWarnings("deprecation")
        Job job = new Job(new Configuration(), "savg");
        job.setJarByClass(Avgs.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));//輸入路徑
        FileOutputFormat.setOutputPath(job,new Path(args[1]));//輸出路徑

        job.setMapperClass(SortMap.class);
        job.setReducerClass(scRedcue.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        job.waitForCompletion(true);

    }
    public static class SortMap extends Mapper<LongWritable, Text, Text, IntWritable>{
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            String[] lines = value.toString().split("\t");
            String name = lines[0].trim();
            String score = lines[1].trim();
            int sc = Integer.parseInt(score);
            context.write(new Text(name), new IntWritable(sc));

        }
    }
    public static class scRedcue extends Reducer<Text, IntWritable, Text, DoubleWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> value,
                Reducer<Text, IntWritable, Text, DoubleWritable>.Context context)
                throws IOException, InterruptedException {
            int sum=0;
            int i=0;
            for (IntWritable sc : value) {
                sum+=sc.get();
                i++;
            }
            double avgs=sum/i;
            context.write(key, new DoubleWritable(avgs));
        }
    }

}

package demos;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/*
  數據簡單去重 源數據：
2012-3-1 a
2012-3-2 b
2012-3-3 c
2012-3-4 d
2012-3-5 a
2012-3-6 b
2012-3-7 c
2012-3-3 c
2012-3-1 b
2012-3-2 a
2012-3-3 b
2012-3-4 d
2012-3-5 a
2012-3-6 c
2012-3-7 d
2012-3-3 c
最終結果：
2012-3-1 a
2012-3-1 b
2012-3-2 a
2012-3-2 b
2012-3-3 b
2012-3-3 c
2012-3-4 d
2012-3-5 a
2012-3-6 b
2012-3-6 c
2012-3-7 c
2012-3-7 d
 */
public class DatatoHeavy {

    public static void main(String[] args) throws Exception {
        if (args.length!=2) {
            System.err.println("path err");
            System.exit(-1);
        }
        @SuppressWarnings("deprecation")
        Job job=new Job(new Configuration(),"quchong");
        job.setJarByClass(DatatoHeavy.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.setMapperClass(hmap.class);
        job.setReducerClass(hreduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        job.waitForCompletion(true);

    }
    //map
    public static class hmap extends Mapper<LongWritable, Text, Text,NullWritable>{
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
                throws IOException, InterruptedException {
            context.write(value, NullWritable.get());
        }
    }
    //使用shuffle階段,自己有一個去重的功能,就是把key相同的合併起來.
    //reduce
    public static class hreduce extends Reducer<Text, NullWritable, Text, NullWritable>{
        @Override
        protected void reduce(Text key, Iterable<NullWritable> value,
                Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
            context.write(key, NullWritable.get());

        }
    }

}

MapReduce學習小結(二)

[轉帖]使用NMT和pmap解決JVM資源泄漏問題原創

Python實現大麥網搶票的四大關鍵技術點解析

Python 安裝庫指令大全

salesforce零基礎學習（一百三十八）零碎知識點小總結（十）

一款開源的.NET程序集反編譯、編輯和調試神器

關於接口協議，你必須要知道這些！

2020年上半年數據庫系統工程師考試

基於 Milvus + LlamaIndex 實現高級 RAG

【2024-05-21】以茶會友

WordCount案例---MapReduce學習小結(-)

SparkCore學習筆記（一）

MapReduce學習小結(二)

DistCp（分佈式拷貝）

Spark Streaming學習筆記

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結