《hadoop權威指南》輔助排序

MaxTemperatureUsingSecodarySort.java  : 

package com.hadoop.ncdcdata;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.hadoop.util.JobBuilder;

public class MaxTemperatureUsingSecodarySort extends Configured implements Tool{
    public static class IntPair implements WritableComparable<IntPair>{
    	private int first;
    	private int second;
		
    	public IntPair() {
    		//必須有否則會報錯 
    	}
    	
    	public IntPair(int first, int second) {
    		this.first = first;
    		this.second = second;
    	}
    	@Override
		public void readFields(DataInput arg0) throws IOException {
			// TODO Auto-generated method stub
			first = arg0.readInt();
			second = arg0.readInt(); 
		}

		@Override
		public void write(DataOutput arg0) throws IOException {
			// TODO Auto-generated method stub
			arg0.writeInt(first);
			arg0.writeInt(second);
		}

		@Override
		public int compareTo(IntPair o) {
			// TODO Auto-generated method stub
			int tmp = Integer.compare(first, o.first);
			if (tmp != 0) {
				return tmp;
			}
			return Integer.compare(second, o.second);
		}

		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + first;
			result = prime * result + second;
			return result;
		}

		@Override
		public boolean equals(Object obj) {
			if (obj instanceof IntPair) {
				IntPair ti = (IntPair) obj;
				return first == ti.getFirst() && second == ti.getSecond();
			}
			return false;
		}
    	
		public int getFirst() {
			return first;
		}
		
		public int getSecond() {
			return second;
		}

		@Override
		public String toString() {
			return first + "\t" + second ;
		}
		
		
    }
    
    static class MaxTemperatureMapper extends Mapper<LongWritable, Text, IntPair, NullWritable> {
        private NcdcRecordParser parser = new NcdcRecordParser();
		@Override
		protected void map(LongWritable key, Text value,Context context)
						throws IOException, InterruptedException {
			parser.parser(value);
			if (parser.isValidTemperature()) {
				context.write(new IntPair(Integer.parseInt(parser.getYear()), parser.getAirTemperature()), NullWritable.get());	
			}
			context.getCounter("TemperatureQulity", parser.getQuality()).increment(1);
		}

    }
    
    static class MaxTemperatureReducer extends Reducer<IntPair, NullWritable, IntPair, NullWritable> {

		@Override
		protected void reduce(IntPair key, Iterable<NullWritable> values,Context context)
						throws IOException, InterruptedException {
			context.write(key, NullWritable.get());
		}
    	
    }
    
    static class FirstPartitioner extends Partitioner<IntPair, NullWritable> {

    	@Override
    	public int getPartition(IntPair key, NullWritable value, int numPartitions) {
    		
    		return Math.abs(key.getFirst() * 127) % numPartitions;
    	}
    	
    }

    static class KeyComparator extends WritableComparator {
       protected KeyComparator() {
        	super(IntPair.class, true);
        }
		@Override
		public int compare(WritableComparable a, WritableComparable b) {
			IntPair ip1 = (IntPair) a;
			IntPair ip2 = (IntPair) b;
			int cmp = Integer.compare(ip1.getFirst(), ip2.getFirst());
			if (cmp != 0){
				return cmp;
			}
			return -Integer.compare(ip1.getSecond(), ip2.getSecond());
		}
    }
    
    static class GroupComparator extends WritableComparator {
        protected  GroupComparator() {
			super(IntPair.class, true);
		}
		@Override
		public int compare(WritableComparable a, WritableComparable b) {
			IntPair ip1 = (IntPair) a;
			IntPair ip2 = (IntPair) b;
			return Integer.compare(ip1.getFirst(), ip2.getFirst());
		}
    	 
    }

	@Override
	public int run(String[] arg0) throws Exception {
		Configuration conf = getConf();
		//本地測試 所需參數  
		conf.set("mapreduce.framework.name","local");
		conf.set("fs.defaultFS","file:///");
		Job job = JobBuilder.parserInputAndOutput(this, conf, arg0);
		job.setMapperClass(MaxTemperatureMapper.class);
		job.setPartitionerClass(FirstPartitioner.class);
		job.setSortComparatorClass(KeyComparator.class);
		job.setGroupingComparatorClass(GroupComparator.class);
		job.setReducerClass(MaxTemperatureReducer.class);
		job.setOutputKeyClass(IntPair.class);
		job.setOutputValueClass(NullWritable.class);
		//job.setNumReduceTasks(3);
		return job.waitForCompletion(true) ? 0 : 1;
	}
    
	public static void main(String[] args) throws Exception {
		int exitCode = ToolRunner.run(new MaxTemperatureUsingSecodarySort(), args);
		System.exit(exitCode);
	}
}

NcdcRecordParser.java  用來解析氣象數據 

package com.hadoop.ncdcdata;

import org.apache.hadoop.io.Text;

public class NcdcRecordParser {
    private static final int MISSING_TEMPERATURE = 9999;
    
	private String year;
    private int airTemperature;
    private String quality;
    private String stationId;
	public void parser(String record) {
    	year = record.substring(15, 19);
    	stationId = record.substring(4, 10);
    	String airTemperatureString;
    	if (record.charAt(87) == '+') {
    		airTemperatureString = record.substring(88, 92);
    	} else {
    		airTemperatureString = record.substring(87, 92);
    	}

    	airTemperature = Integer.parseInt(airTemperatureString);
    	quality = record.substring(92, 93);
    }
	
	public void parser(Text text) {
		parser(text.toString());
	}
	
	public boolean isValidTemperature() {
		return airTemperature != MISSING_TEMPERATURE && quality.matches("[01459]");
	}
	
	public String getYear() {
		return year;
	}
	
	public int getAirTemperature() {
		return airTemperature;
	}
	
	public String getQuality() {
		return quality;
	}
	
	public String getStationId() {
		return stationId;
	}
	
	public static void main(String[] args)  {
		//BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("D:\\study\\test\\ncdcdata\\data\\1901")));
		String firstLine = "0029029070999991901010106004+64333+023450FM-12+000599999V0202701N015919999999N0000001N9-00781+99999102001ADDGF108991999999999999999999";
		System.out.println(firstLine);
		NcdcRecordParser nrp = new NcdcRecordParser();
		nrp.parser(firstLine);
		System.out.println(nrp.getYear());
		System.out.println(nrp.getAirTemperature());
		System.out.println(nrp.getStationId());
		//br.close();
	}
	
}

JobBuilder.java 輔助類

 

package com.hadoop.util;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;

public class JobBuilder {
    public  static Job parserInputAndOutput (Tool tool, Configuration conf, String[] args) throws IOException{
		if (args.length != 2){
			printUsage(tool, "<input> <output>");
			return null;
		}
    	Job job = Job.getInstance(conf);
    	job.setJarByClass(tool.getClass());
    	FileInputFormat.addInputPath(job, new Path(args[0]));
    	FileOutputFormat.setOutputPath(job, new Path(args[1]));
    	return job;
    }
    
    public static void printUsage(Tool tool, String extraArgsUsage){
    	System.err.printf("Usage: %s [genericOptions] %s\n\n", tool.getClass().getSimpleName(), extraArgsUsage);
    }
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章