Hadoop序列化案例

一、問題描述

       根據所給的數據輸出每一個手機號上網的上載流量、下載流量和總流量。

二、數據格式

    輸入數據(部分)格式

    1363157973098     15013685858    5C-0E-8B-C7-F7-90:CMCC    120.197.40.4    rank.ie.sogou.com    搜索引擎    28    27    3659    3538    200
    1363157986029     15989002119    E8-99-C4-4E-93-E0:CMCC-EASY    120.196.100.99    www.umeng.com    站點統計    3    3    1938    180    200
    1363157992093     13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            15    9    918    4938    200
    1363157986041     13480253104    5C-0E-8B-C7-FC-80:CMCC-EASY    120.197.40.4            3    3    180    180    200
    1363157984040     13602846565    5C-0E-8B-8B-B6-00:CMCC    120.197.40.4    2052.flash2-http.qq.com    綜合門戶    15    12    1938    2910    200

    輸出數據格式

    手機號   上載流量(總)  下載流量(總)  總流量

三、代碼實現

       DataCount:

package edu.jianwei.hadoop.mr;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DataCount {
	
	static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{
        private Text k=new Text();
        private DataBean v=new DataBean();
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			String line=value.toString();
			String[] words=line.split("\t");
		  
			String telNum=words[1];
			double upLoad=Double.parseDouble(words[8]);
			double downLoad=Double.parseDouble(words[9]);
			k.set(telNum);
			v.Set(telNum, upLoad, downLoad);
			context.write(k, v);
			
		}
		
		
	}
	
	static class DCReduce extends Reducer<Text,DataBean, Text, DataBean>{
		 private DataBean v=new DataBean();

		@Override
		protected void reduce(Text key, Iterable<DataBean> v2s,
				Context context)
				throws IOException, InterruptedException {
			double upTotal=0;
			double downToal=0;
			for (DataBean d : v2s) {
				upTotal+=d.getUpLoad();
				downToal+=d.getDownload();
			}
			v.Set("", upTotal, downToal);
			context.write(key, v);
			
		}
		
	}

	public static void main(String[] args) throws Exception {
		 Configuration conf=new Configuration();
		 Job job=Job.getInstance();
		 
		 job.setJarByClass(DataCount.class);
		 
		 job.setMapperClass(DCMapper.class);
		 job.setMapOutputKeyClass(Text.class);
		 job.setMapOutputValueClass(DataBean.class);
		 FileInputFormat.setInputPaths(job, new Path(args[0]));
		 
		 
		 job.setReducerClass(DCReduce.class);
		 job.setOutputKeyClass(Text.class);
		 job.setOutputValueClass(DataBean.class);
		 FileOutputFormat.setOutputPath(job, new Path(args[1]));
		 
		 job.waitForCompletion(true);
	}

}

     DataBean:

package edu.jianwei.hadoop.mr;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class DataBean implements Writable {
	private String telNum;
	private double upLoad;
	private double download;
	private double total;
	
	public void Set(String telnum,double upload,double download){
		this.telNum=telnum;
		this.upLoad=upload;
		this.download=download;
		this.total=upload+download;
	}

	public void write(DataOutput out) throws IOException {
	    out.writeUTF(telNum);
	    out.writeDouble(upLoad);
	    out.writeDouble(download);
	    out.writeDouble(total);
		
	}

	public void readFields(DataInput in) throws IOException {
		
		this.telNum=in.readUTF();
		this.upLoad=in.readDouble();
		this.download=in.readDouble();
		this.total=in.readDouble();
	}

	public String getTelNum() {
		return telNum;
	}

	public void setTelNum(String telNum) {
		this.telNum = telNum;
	}

	public double getUpLoad() {
		return upLoad;
	}

	public void setUpLoad(double upLoad) {
		this.upLoad = upLoad;
	}

	public double getDownload() {
		return download;
	}

	public void setDownload(double download) {
		this.download = download;
	}

	public double getTotal() {
		return total;
	}

	public void setTotal(double total) {
		this.total = total;
	}

	@Override
	public String toString() {
		// TODO Auto-generated method stub
		return upLoad+"\t"+download+"\t"+total;
	}
   
	
}

四、代碼運行

      1.運行代碼

         hadoop jar /root/dc.jar  /dc  /dc/res

       2.代碼運行結果(部分)

        13560436666     1116.0  954.0   2070.0
        13560439658     2034.0  5892.0  7926.0
        13602846565     1938.0  2910.0  4848.0
        13660577991     6960.0  690.0   7650.0
        13719199419     240.0   0.0     240.0
        13726230503     2481.0  24681.0 27162.0

 


        

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章