MaxTemperatureUsingSecodarySort.java :
package com.hadoop.ncdcdata;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.hadoop.util.JobBuilder;
public class MaxTemperatureUsingSecodarySort extends Configured implements Tool{
public static class IntPair implements WritableComparable<IntPair>{
private int first;
private int second;
public IntPair() {
//必須有否則會報錯
}
public IntPair(int first, int second) {
this.first = first;
this.second = second;
}
@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
first = arg0.readInt();
second = arg0.readInt();
}
@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
arg0.writeInt(first);
arg0.writeInt(second);
}
@Override
public int compareTo(IntPair o) {
// TODO Auto-generated method stub
int tmp = Integer.compare(first, o.first);
if (tmp != 0) {
return tmp;
}
return Integer.compare(second, o.second);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + first;
result = prime * result + second;
return result;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof IntPair) {
IntPair ti = (IntPair) obj;
return first == ti.getFirst() && second == ti.getSecond();
}
return false;
}
public int getFirst() {
return first;
}
public int getSecond() {
return second;
}
@Override
public String toString() {
return first + "\t" + second ;
}
}
static class MaxTemperatureMapper extends Mapper<LongWritable, Text, IntPair, NullWritable> {
private NcdcRecordParser parser = new NcdcRecordParser();
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
parser.parser(value);
if (parser.isValidTemperature()) {
context.write(new IntPair(Integer.parseInt(parser.getYear()), parser.getAirTemperature()), NullWritable.get());
}
context.getCounter("TemperatureQulity", parser.getQuality()).increment(1);
}
}
static class MaxTemperatureReducer extends Reducer<IntPair, NullWritable, IntPair, NullWritable> {
@Override
protected void reduce(IntPair key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
static class FirstPartitioner extends Partitioner<IntPair, NullWritable> {
@Override
public int getPartition(IntPair key, NullWritable value, int numPartitions) {
return Math.abs(key.getFirst() * 127) % numPartitions;
}
}
static class KeyComparator extends WritableComparator {
protected KeyComparator() {
super(IntPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
IntPair ip1 = (IntPair) a;
IntPair ip2 = (IntPair) b;
int cmp = Integer.compare(ip1.getFirst(), ip2.getFirst());
if (cmp != 0){
return cmp;
}
return -Integer.compare(ip1.getSecond(), ip2.getSecond());
}
}
static class GroupComparator extends WritableComparator {
protected GroupComparator() {
super(IntPair.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
IntPair ip1 = (IntPair) a;
IntPair ip2 = (IntPair) b;
return Integer.compare(ip1.getFirst(), ip2.getFirst());
}
}
@Override
public int run(String[] arg0) throws Exception {
Configuration conf = getConf();
//本地測試 所需參數
conf.set("mapreduce.framework.name","local");
conf.set("fs.defaultFS","file:///");
Job job = JobBuilder.parserInputAndOutput(this, conf, arg0);
job.setMapperClass(MaxTemperatureMapper.class);
job.setPartitionerClass(FirstPartitioner.class);
job.setSortComparatorClass(KeyComparator.class);
job.setGroupingComparatorClass(GroupComparator.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(IntPair.class);
job.setOutputValueClass(NullWritable.class);
//job.setNumReduceTasks(3);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new MaxTemperatureUsingSecodarySort(), args);
System.exit(exitCode);
}
}
NcdcRecordParser.java 用來解析氣象數據
package com.hadoop.ncdcdata;
import org.apache.hadoop.io.Text;
public class NcdcRecordParser {
private static final int MISSING_TEMPERATURE = 9999;
private String year;
private int airTemperature;
private String quality;
private String stationId;
public void parser(String record) {
year = record.substring(15, 19);
stationId = record.substring(4, 10);
String airTemperatureString;
if (record.charAt(87) == '+') {
airTemperatureString = record.substring(88, 92);
} else {
airTemperatureString = record.substring(87, 92);
}
airTemperature = Integer.parseInt(airTemperatureString);
quality = record.substring(92, 93);
}
public void parser(Text text) {
parser(text.toString());
}
public boolean isValidTemperature() {
return airTemperature != MISSING_TEMPERATURE && quality.matches("[01459]");
}
public String getYear() {
return year;
}
public int getAirTemperature() {
return airTemperature;
}
public String getQuality() {
return quality;
}
public String getStationId() {
return stationId;
}
public static void main(String[] args) {
//BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("D:\\study\\test\\ncdcdata\\data\\1901")));
String firstLine = "0029029070999991901010106004+64333+023450FM-12+000599999V0202701N015919999999N0000001N9-00781+99999102001ADDGF108991999999999999999999";
System.out.println(firstLine);
NcdcRecordParser nrp = new NcdcRecordParser();
nrp.parser(firstLine);
System.out.println(nrp.getYear());
System.out.println(nrp.getAirTemperature());
System.out.println(nrp.getStationId());
//br.close();
}
}
JobBuilder.java 輔助類
package com.hadoop.util;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
public class JobBuilder {
public static Job parserInputAndOutput (Tool tool, Configuration conf, String[] args) throws IOException{
if (args.length != 2){
printUsage(tool, "<input> <output>");
return null;
}
Job job = Job.getInstance(conf);
job.setJarByClass(tool.getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job;
}
public static void printUsage(Tool tool, String extraArgsUsage){
System.err.printf("Usage: %s [genericOptions] %s\n\n", tool.getClass().getSimpleName(), extraArgsUsage);
}
}