MapReduce--13--學生成績(增強版)--需求2

題目描述

關於對於學生成績相關的練習題,之前是一個入門級別的需求,現在對這些需求進行增強,首先看數據的改變:

computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

一、數據解釋

數據字段個數不固定:
第一個是課程名稱,總共四個課程,computer,math,english,algorithm,
第二個是學生姓名,後面是每次考試的分數

 

二、統計需求:

1、統計每門課程的參考人數和課程平均分

2、統計每門課程參考學生的平均分,並且按課程存入不同的結果文件,要求一門課程一個結果文件,並且按平均分從高到低排序,分數保留一位小數

3、求出每門課程參考學生成績最高的學生的信息:課程,姓名和平均分

 

三、解題思路

mapper階段的輸出:

key: CourseScore

value: NullWritable

reducer階段的輸出:

key:CourseScore

value:NullWritable

 

四、代碼實現

package com.ghgj.mazh.mapreduce.exercise.coursescore3;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class CourseScoreMR_Pro_02 {

    public static void main(String[] args) throws Exception {
        /**
         * 一些參數的初始化
         */
        String inputPath = "D:\\bigdata\\coursescore2\\input";
        String outputPath = "D:\\bigdata\\coursescore2\\output2";

        /**
         * 初始化一個Job對象
         */
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        /**
         * 設置jar包所在路徑
         */
        job.setJarByClass(CourseScoreMR_Pro_02.class);

        /**
         * 指定mapper類和reducer類 等各種其他業務邏輯組件
         */
        job.setMapperClass(Mapper_CS.class);
        job.setReducerClass(Reducer.class);
        // 指定maptask的輸出類型
        job.setMapOutputKeyClass(CourseScore.class);
        job.setMapOutputValueClass(NullWritable.class);
        // 指定reducetask的輸出類型
        job.setOutputKeyClass(CourseScore.class);
        job.setOutputValueClass(NullWritable.class);

        /**
         * 設置reduceTask數量和分區器
         */
        job.setNumReduceTasks(4);
        job.setPartitionerClass(MyPartitioner.class);

        /**
         * 指定該mapreduce程序數據的輸入和輸出路徑
         */
        Path input = new Path(inputPath);
        Path output = new Path(outputPath);
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);

        /**
         * 最後提交任務
         */
        boolean waitForCompletion = job.waitForCompletion(true);
        System.exit(waitForCompletion ? 0 : 1);
    }

    /**
     * Mapper組件:
     * <p>
     * 輸入的key:
     * 輸入的value:
     * <p>
     * 輸出的key:
     * 輸入的value:
     */
    private static class Mapper_CS extends Mapper<LongWritable, Text, CourseScore, NullWritable> {

        CourseScore keyOut = new CourseScore();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String[] splits = value.toString().split(",");
            String course = splits[0];
            String name = splits[1];

            int sum = 0;
            int num = 0;
            for(int i=2; i<splits.length; i++){
                sum += Integer.valueOf(splits[i]);
                num ++;
            }
            double avgScore = Math.round(sum * 1D / num * 10) / 10D;

            keyOut.setCourse(course);
            keyOut.setName(name);
            keyOut.setScore(avgScore);

            context.write(keyOut, NullWritable.get());
        }
    }

    /**
     * 自定義分區組件
     */
    public static class MyPartitioner extends Partitioner<CourseScore, NullWritable>{

        @Override
        public int getPartition(CourseScore courseScore, NullWritable nullWritable, int numPartitions) {

            String course = courseScore.getCourse();
            if(course.equals("computer")){
                return 0;
            }else if(course.equals("english")){
                return 1;
            }else if(course.equals("algorithm")){
                return 2;
            }else{
                return 3;
            }
        }
    }

}

其中 CourseScore類的實現:

package com.ghgj.mazh.mapreduce.exercise.coursescore3;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CourseScore implements WritableComparable<CourseScore> {

    private String course;
    private String name;
    private double score;

    public CourseScore(String course, String name, double score) {
        super();
        this.course = course;
        this.name = name;
        this.score = score;
    }

    public CourseScore() {
    }

    public String getCourse() {
        return course;
    }

    public void setCourse(String course) {
        this.course = course;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public double getScore() {
        return score;
    }

    public void setScore(double score) {
        this.score = score;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        // TODO Auto-generated method stub
        out.writeUTF(course);
        out.writeUTF(name);
        out.writeDouble(score);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        // TODO Auto-generated method stub
        this.course = in.readUTF();
        this.name = in.readUTF();
        this.score = in.readDouble();
    }

    /**
     * 排序規則
     * compareTo方法既充當排序用,用充當分組規則
     */
    @Override
    public int compareTo(CourseScore cs) {

        int compareTo = this.course.compareTo(cs.getCourse());

        if (compareTo == 0) {
            double diff = cs.getScore() - this.score;
            if (diff >
                    0) {
                return 1;
            } else if (diff < 0) {
                return -1;
            } else {
                return 0;
            }
        } else {
            return compareTo;
        }
    }


    @Override
    public String toString() {
        return course + "\t" + name + "\t" + score;
    }
}

 

五、執行結果

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章