mapred包升級爲mapreduce包後,一個NutchJob的主要修改

引用的包從mapred改爲mapreduce,一個NutchJob的相關代碼修改。

1. Job設置和運行

舊API

    // mapred 包中有 JobConf、JobClient,在 mapreduce 包中都取消了
    public void myTask() throws Exception {
        JobConf job = new NutchJob(getConf());
        job.setJobName("MyTool");
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // my conf
        job.set(key, value);
        try{
            RunningJob runningJob = JobClient.runJob(job);
            if (!runningJob.isSuccessful()){
                throw new Exception("@@JOB FAILED");
            }
        }catch (Exception e){
            throw e;
        }
    }

新API

    // mapreduce 包中配置使用 Configuration 類,運行通過 Job 類
    public void myTask() throws Exception {
        Job job = NutchJob.getInstance(getConf());
        Configuration conf = job.getConfiguration();
        job.setJobName("MyTool");
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setJarByClass(MyTool.class); // 通過傳入的 class 找到 job 的 jar 包
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        // my conf
        conf.set(key, value);

        try{
            boolean success = job.waitForCompletion(true);
            if (!success) {
                throw new Exception("@@JOB FAILED");
            }
        }catch (Exception e){
            throw e;
        }
    }

2. Mapper

	// 舊 API 中 Mapper 和 Reducer 是接口
    public static class MyMapper implements Mapper<Text, CrawlDatum, Text, Text>{
        @Override
        public void map(Text key, CrawlDatum value, OutputCollector<Text, Text> output, Reporter reporter) 
        		throws IOException {
            String mapOutputKey;
            String mapOutoutValue;
            // some operations
            output.collect(new Text(mapOutputKey), new Text(mapOutoutValue));
        }
    }
	// 新 API 中 Mapper 和 Reducer 是抽象類
    // 新 API 廣泛使用 Context ,允許用戶代碼與 MapReduce 系統進行通信。
    public static class MyMapper extends Mapper<Text, CrawlDatum, Text, Text> {
        @Override
        public void map(Text key, CrawlDatum value, Context context) 
        		throws IOException, InterruptedException {
            String mapOutputKey;
            String mapOutoutValue;
            // some operations
            context.write(new Text(mapOutputKey), new Text(mapOutoutValue));
        }
    }

3. Reducer

	// 舊 API 的配置繼承了JobConfigurable 中的 configure
	public static class MyReducer implements Reducer<Text, Text, Text, Text>{
        @Override
        public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) 
        		throws IOException {
            String reduceOutputKey;
            String reduceOutoutValue;
            while (values.hasNext()){
                // some operations
            }
            output.collect(new Text(reduceOutputKey), new Text(reduceOutoutValue));
        }
        @Override
        public void configure(JobConf job) {
            String someValue= job.get(SOME_KEY);
        }
}
    // 新 API 的配置函數 setup 在Mapper 和 Reducer 中,通過 context 傳遞上下文
    public static class MyReducer extends Reducer<Text, Text, Text, Text>{
        @Override
        public void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            String reduceOutputKey;
            String reduceOutoutValue;
            for (Text value : values) {
                 // some operations
            }
            context.write(new Text(reduceOutputKey), new Text(reduceOutoutValue));
        }
        @Override
        public void setup(Context context) {
            Configuration conf = context.getConfiguration();
            String someValue = conf.get(SOME_KEY);
        }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章