Weather

需求:

每個月氣溫最高的2天

 

數據集:

1949-10-01 14:21:02	34c
1949-10-01 19:21:02	38c
1949-10-02 14:01:02	36c
1950-01-01 11:21:02	32c
1950-10-01 12:21:02	37c
1951-12-01 12:21:02	23c
1950-10-02 12:21:02	41c
1950-10-03 12:21:02	27c
1951-07-01 12:21:02	45c
1951-07-02 12:21:02	46c
1951-07-03 12:21:03	47c

客戶端:

package com.ny.mapreduce.weather;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyTQ {
	
	public static void main(String[] args) throws Exception {
		//創建conf對象 讀取配置文件
		Configuration conf = new Configuration(true);
		
		//創建job對象
		Job job = Job.getInstance(conf);
	
		//Jar包
		job.setJarByClass(MyTQ.class);
		
		//----conf---------配置環節---
		//輸入格式化類的創建
//		job.setInputFormatClass(null); 
		/**-- MAP階段-->*/
		//map類處理輸入來的數據
		job.setMapperClass(TMapper.class);
		//Map類處理後產生得K,V    TQ天氣的類型定義在TQ類中
		/*Map輸出的類型*/
		job.setMapOutputKeyClass(TQ.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		/*  (k,v,p)  分 區*/
		job.setPartitionerClass(TPartitioner.class);
		
		/*緩衝區去  排序*/
		job.setSortComparatorClass(TSortComparator.class);
		/* Combine*/
//		job.setCombinerClass(TCombiner.class);
		/**Map階段結束*/
		
		
		/**Reduce環節*/
		//分組比較器
		job.setGroupingComparatorClass(TGroupComparator.class);
		//Reduce類處理
		job.setReducerClass(Treduce.class);
		/**Reduce階段結束*/
		
		/** 輸入輸出路徑*/
		Path input = new Path("/data/weather/input");
		FileInputFormat.addInputPath(job, input);
		
		Path output = new Path("/data/weather/output");
		FileOutputFormat.setOutputPath(job, output );
		
		/**設置reducetask的數量*/
		job.setNumReduceTasks(2);	
		
		job.waitForCompletion(true);
		
	}
	
}

 Map類:

package com.ny.mapreduce.weather;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
//默認輸入的格式化類爲TextInputFormat.class     <KEYIN, VALUEIN, KEYOUT, VALUEOUT>
public class TMapper extends Mapper<LongWritable, Text, TQ, IntWritable>{

	TQ mkey = new TQ();
	IntWritable mval = new IntWritable();
	
	//重寫map()
	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, TQ, IntWritable>.Context context)
			throws IOException, InterruptedException {
		
		try {
			//從value中拿去   1951-07-01 12:21:02	45c
			String[] strs = StringUtils.split(value.toString(), '\t');
			//轉換爲指定格式時間
			SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
			Date date = sdf.parse(strs[0]);
			Calendar cal = Calendar.getInstance();
			cal.setTime(date);
			//對key賦值
			mkey.setYear(cal.get(Calendar.YEAR));
			mkey.setMonth(cal.get(Calendar.MONDAY) +1);
			mkey.setDay(cal.get(Calendar.DAY_OF_MONTH));
			
			//獲取溫度數值
			int wd = Integer.parseInt(strs[1].substring(0, strs[1].length()-1));
			
			mkey.setWd(wd);
			
			//對value賦值
			mval.set(wd);
			
			//map輸出
			context.write(mkey, mval);
		} catch (Exception e) {
			e.printStackTrace();
		}
}
	
}

 設置Map輸入key的類型

package com.ny.mapreduce.weather;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;

//key實現接口
public class TQ implements WritableComparable<TQ>{
	private int year;
	private int month;
	private int day;
	private int wd;
	public int getYear() {
		return year;
	}

	public void setYear(int year) {
		this.year = year;
	}

	public int getMonth() {
		return month;
	}

	public void setMonth(int month) {
		this.month = month;
	}

	public int getDay() {
		return day;
	}

	public void setDay(int day) {
		this.day = day;
	}

	public int getWd() {
		return wd;
	}

	public void setWd(int wd) {
		this.wd = wd;
	}
	
	//序列化
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(year);
		out.writeInt(month);
		out.writeInt(day);
		out.writeInt(wd);
		
	}
	//反序列化
	@Override
	public void readFields(DataInput in) throws IOException {
		this.year = in.readInt();
		this.month = in.readInt();
		this.day = in.readInt();
		this.wd = in.readInt();
		
	}
	//按日期時間做正序
	@Override
	public int compareTo(TQ that) {
		int c1 = Integer.compare(this.year, that.getYear());
		//年相同按月比
		if(c1 ==0) {
			int c2 = Integer.compare(this.month, that.getMonth());
			//月相同按日比
			if(c2 ==0) {
				return Integer.compare(this.day, that.getDay());
			}
		}
		return 0;
	}
		
}

分區類: 

package com.ny.mapreduce.weather;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;

public class TPartitioner extends Partitioner<TQ, IntWritable>{
	//對key進行分組  (分區)
	@Override
	public int getPartition(TQ key, IntWritable value, int numPartitions) {
		
		
		return key.getYear() % numPartitions;
	}

}

排序類:

package com.ny.mapreduce.weather;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

//排序類
public class TSortComparator extends WritableComparator{
	
	public TSortComparator() {
		super(TQ.class,true);
	}
	
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		TQ t1 = (TQ) a;
		TQ t2 = (TQ) b;
		int c1 = Integer.compare(t1.getYear(), t2.getYear());
		if(c1 ==0) {//年相等比較月
			int c2 = Integer.compare(t1.getMonth(), t2.getMonth());
			if(c2 == 0) {//月相等比較溫度
				return -Integer.compare(t1.getWd(), t2.getWd());		
			}
			return c2;
		}
		return c1;	
	}
}

分組類:

package com.ny.mapreduce.weather;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class TGroupComparator extends WritableComparator{
	public TGroupComparator() {
		super(TQ.class,true);
	}
	
	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		TQ t1 = (TQ) a;
		TQ t2 = (TQ) b;
		int c1 = Integer.compare(t1.getYear(), t2.getYear());
		if(c1 ==0) {//年相等比較月        年相同月相同表示爲一組   reduce   年相同月不同不是一組
			return Integer.compare(t1.getMonth(), t2.getMonth());
		}
		return c1;	
	}
	
}

reduce類 :

package com.ny.mapreduce.weather;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
public class Treduce extends Reducer<TQ, IntWritable, Text, IntWritable>{
	Text rkey = new Text();
	IntWritable rval = new IntWritable();
	@Override
	protected void reduce(TQ key, Iterable<IntWritable> valuese, Context context)
			throws IOException, InterruptedException {
		//相同的key爲一組1949-10-01   value 23
//		1949 10 01 23
//		1949 10 01 25
		
		int flag=0;
		int day=0;
		for (IntWritable v : valuese) {
			if(flag ==0) {
			rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()+":"+key.getWd());
			rval.set(key.getWd());
			flag++;
			day=key.getDay();
			context.write(rkey, rval);
			}
			if(flag!=0 && day!=key.getDay()) {
				rkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay()+":"+key.getWd());
				rval.set(key.getWd());
				context.write(rkey, rval);
				break;
			}
			
		}
}

}

 

出現了數據傾斜 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章