爲了解決大量小圖片在HDFS存儲是存儲在的問題,將小圖片存儲到SequenceFile中,然後通過MapReduce函數對SequenceFile文件進行操作。
用過設置,job的輸入文件格式得到SequenceFile中的數據,代碼如下:
package com.wang;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;
public class Parral_Pyramid {
static int i=0;
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
Job job = Job.getInstance(conf);
//ע設置main的主類
job.setJarByClass(Parral_Pyramid.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
//設置Mapper參數
job.setMapperClass(Image_Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job, new Path("hdfs://master:9000/wang/result1.seq/part-r-00000"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/wang/result2.seq"));
job.waitForCompletion(true);
}
static class Image_Mapper extends Mapper<Text, Text, Text, Text>{
// private SequenceFile.Reader reader = null;
@Override
protected void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
//得到文件內容
i++;
System.out.println("now_key:"+key.toString()+"value="+value); //這裏map讀進來的數據即SequenceFile中的key和value
// context.write(new Text(next_key),value);
}
}
}