1、編寫自定義類
如果寫二次排序的話再在裏面添加
package org.hdfs.urlMapReduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class UrlWritable implements WritableComparable<UrlWritable>{
private String uid;
private String time;
public UrlWritable() {}
public UrlWritable(String uid,String time) {
this.set(uid, time);
}
public void set(String uid,String time) {
this.uid=uid;
this.time=time;
}
public String getUid() {
return uid;
}
public void setUid(String uid) {
this.uid = uid;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(uid);
out.writeUTF(time);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.uid=in.readUTF();
this.time=in.readUTF();
}
@Override
public int compareTo(UrlWritable o) {
// TODO Auto-generated method stub
String uid=o.uid;
if(this.uid.compareTo(uid)!=0) {
return this.uid.compareTo(uid);
}
return String.valueOf(this.time).compareTo(o.time);
}
@Override
public String toString() {
return "UrlWritable [uid=" + uid + ", time=" + time + "]";
}
}
2、編寫Map類,將數據進行業務邏輯的處理
package org.hdfs.urlMapReduce;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
public class UrlMaper extends Mapper<LongWritable, Text, UrlWritable, Text>{
private static Logger logger=Logger.getLogger(UrlMaper.class);
private Text outputValue=new Text();
private UrlWritable uw=new UrlWritable();
@Override
protected void cleanup(Mapper<LongWritable, Text, UrlWritable, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
logger.info("調用cleanup");
}
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
logger.info("調用map方法");
String line=value.toString();
System.out.println(line);
String[] u=line.split(",");
uw.setUid(u[0]);
uw.setTime(u[1]);
outputValue.set(u[2]);
context.write(uw, outputValue);
}
@Override
protected void setup(Mapper<LongWritable, Text, UrlWritable, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
logger.info("調用setup方法");
}
}
3、編寫自定義的分區類,將不同的ID放入不同的分區
package org.hdfs.urlMapReduce;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class UrlPartition extends Partitioner<UrlWritable, Text>{
static HashMap<String, Integer> u=new HashMap<>();
static {
u.put("uid1", 0);
u.put("uid2", 1);
u.put("uid3", 2);
}
@Override
public int getPartition(UrlWritable key, Text value, int numPartitions) {
// TODO Auto-generated method stub
String uid=key.toString();
Integer a=u.get(uid);
return a==null?null:a;
}
}
4、分區並排序中的排序
package org.hdfs.urlMapReduce;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class UrlGroup extends WritableComparator{
public UrlGroup() {
super(UrlWritable.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
UrlWritable v1=(UrlWritable) a;
UrlWritable v2=(UrlWritable) b;
return v1.compareTo(v2);
}
}
5、編寫Reduce類
package org.hdfs.urlMapReduce;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class UrlReduce extends Reducer<UrlWritable, Text, Text, Text>{
private Text outputKey=new Text();
private Text outputValue=new Text();
@Override
protected void reduce(UrlWritable key, Iterable<Text> values, Reducer<UrlWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
System.err.println("#####");
outputKey.set(key.getUid());
String res =new String();
for(Text value:values) {
res += String.valueOf(value);
}
outputValue.set(res);
context.write(outputKey, outputValue);
}
}
6、編寫驅動類
package org.hdfs.urlMapReduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class UrlRunner implements Tool{
private Configuration con=null;
public int run(String[]args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=this.getConf();
Job job=Job.getInstance(conf, "wordcount");//任務
job.setJarByClass(UrlRunner.class);
FileInputFormat.addInputPath(job, new Path("/test"));//輸入路徑
job.setMapperClass(UrlMaper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(UrlWritable.class);
job.setReducerClass(UrlReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
Path outdir=new Path("/resultresultreare");//輸出路徑
FileOutputFormat.setOutputPath(job, outdir);
return job.waitForCompletion(true) ?0:-1;
}
@Override
public void setConf(Configuration conf) {
// TODO Auto-generated method stub
this.con=conf;
this.con.set("fs.defaultFS", "hdfs://hh:8020");
}
@Override
public Configuration getConf() {
// TODO Auto-generated method stub
return this.con;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new UrlRunner(), args);
}
}