MapReduce處理數據

1、編寫自定義類

如果寫二次排序的話再在裏面添加

package org.hdfs.urlMapReduce;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class UrlWritable implements WritableComparable<UrlWritable>{

	private String uid;
	
	private String time;

	
	
	public UrlWritable() {}
	
	public UrlWritable(String uid,String time) {
		
		this.set(uid, time);
	}
	
	public void set(String uid,String time) {
		
		this.uid=uid;
		
		this.time=time;
	}
	
	public String getUid() {
		return uid;
	}

	public void setUid(String uid) {
		this.uid = uid;
	}

	public String getTime() { 
		return time;
	}

	public void setTime(String time) {
		this.time = time;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		// TODO Auto-generated method stub
		out.writeUTF(uid);
		
		out.writeUTF(time);
		
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		// TODO Auto-generated method stub
		this.uid=in.readUTF();
		
		this.time=in.readUTF();
	}

	@Override
	public int compareTo(UrlWritable o) {
		// TODO Auto-generated method stub
	
		String uid=o.uid;
		
		if(this.uid.compareTo(uid)!=0) {
			
			
			return this.uid.compareTo(uid);
		}
		
		return String.valueOf(this.time).compareTo(o.time);
	}

	@Override
	public String toString() {
		return "UrlWritable [uid=" + uid + ", time=" + time + "]";
	}
	 
}

2、編寫Map類,將數據進行業務邏輯的處理

package org.hdfs.urlMapReduce;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;

public class UrlMaper extends Mapper<LongWritable, Text, UrlWritable, Text>{

	
	private static Logger logger=Logger.getLogger(UrlMaper.class);
	
	private Text outputValue=new Text();
	
	private UrlWritable uw=new UrlWritable();
	
	@Override
	protected void cleanup(Mapper<LongWritable, Text, UrlWritable, Text>.Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		logger.info("調用cleanup");
	}

	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		logger.info("調用map方法");
		
		String line=value.toString();
		
		System.out.println(line);
		
		String[] u=line.split(",");
		
		uw.setUid(u[0]);
		
		uw.setTime(u[1]);

		outputValue.set(u[2]);

		context.write(uw, outputValue);
		
	}

	@Override
	protected void setup(Mapper<LongWritable, Text, UrlWritable, Text>.Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		logger.info("調用setup方法");
	}

	
}

3、編寫自定義的分區類,將不同的ID放入不同的分區

package org.hdfs.urlMapReduce;

import java.util.HashMap;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class UrlPartition extends Partitioner<UrlWritable, Text>{

	static HashMap<String, Integer> u=new HashMap<>();
	
	static {
		
		u.put("uid1", 0);
		
		u.put("uid2", 1);
		
		u.put("uid3", 2);
		
	}
	
	
	
	
	@Override
	public int getPartition(UrlWritable key, Text value, int numPartitions) {
		// TODO Auto-generated method stub
		
		String uid=key.toString();
		
		Integer a=u.get(uid);
		 
		return a==null?null:a;
	}

	
}

4、分區並排序中的排序

package org.hdfs.urlMapReduce;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class UrlGroup extends WritableComparator{

	public UrlGroup() {
		
		super(UrlWritable.class,true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		// TODO Auto-generated method stub
		UrlWritable v1=(UrlWritable) a;
		
		UrlWritable v2=(UrlWritable) b;
		
		return v1.compareTo(v2);
	}
	
}

5、編寫Reduce類

package org.hdfs.urlMapReduce;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class UrlReduce extends Reducer<UrlWritable, Text, Text, Text>{

	private Text outputKey=new Text();
	
	private Text outputValue=new Text();

	@Override
	protected void reduce(UrlWritable key, Iterable<Text> values, Reducer<UrlWritable, Text, Text, Text>.Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		System.err.println("#####");
		outputKey.set(key.getUid());
		
		String res =new String(); 
		
		for(Text value:values) {
			
			res += String.valueOf(value);
			
		}	
		
		outputValue.set(res);
		
		context.write(outputKey, outputValue);
	}
	
	
	
	
}

6、編寫驅動類

package org.hdfs.urlMapReduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class UrlRunner implements Tool{

private Configuration con=null;
	
	
	public int run(String[]args) throws IOException, ClassNotFoundException, InterruptedException {
		
		Configuration conf=this.getConf();
		
		Job job=Job.getInstance(conf, "wordcount");//任務
		
		job.setJarByClass(UrlRunner.class);
		
		FileInputFormat.addInputPath(job, new Path("/test"));//輸入路徑
		
		job.setMapperClass(UrlMaper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setOutputValueClass(UrlWritable.class);
		
		job.setReducerClass(UrlReduce.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		Path outdir=new Path("/resultresultreare");//輸出路徑
		
		FileOutputFormat.setOutputPath(job, outdir);
		
		return job.waitForCompletion(true) ?0:-1;
		
		
	}
	
	@Override
	public void setConf(Configuration conf) {
		// TODO Auto-generated method stub
		this.con=conf;
		
		this.con.set("fs.defaultFS", "hdfs://hh:8020");
		
	}

	@Override
	public Configuration getConf() {
		// TODO Auto-generated method stub
		return this.con;
	}

    public static void main(String[] args) throws Exception {
		
    	ToolRunner.run(new UrlRunner(), args);
	}


}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章