關於單機版的配置,eclipse環境搭建,以後再補充吧
首先是程序
project: wordcunt
- import java.io.IOException;
- import java.util.StringTokenizer;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Mapper;
- public class TokenizerMapper extends
- Mapper<Object, Text, Text, IntWritable> {
- private final static IntWritable one = new IntWritable(1);
- private Text word = new Text();
- public void map(Object key, Text value, Context context)
- throws IOException, InterruptedException {
- StringTokenizer itr = new StringTokenizer(value.toString());
- while (itr.hasMoreTokens()) {
- word.set(itr.nextToken());
- context.write(word, one);
- }
- }
- }
- import java.io.IOException;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Reducer;
- public class IntSumReducer extends
- Reducer<Text, IntWritable, Text, IntWritable> {
- private IntWritable result = new IntWritable();
- public void reduce(Text key, Iterable<IntWritable> values, Context context)
- throws IOException, InterruptedException {
- int sum = 0;
- for (IntWritable val : values) {
- sum += val.get();
- }
- result.set(sum);
- context.write(key, result);
- }
- }
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.util.GenericOptionsParser;
- public class WordCount {
- public static void main(String[] args) throws Exception {
- Configuration conf = new Configuration();
- // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
- // if (otherArgs.length != 2) {
- // System.err.println("Usage: wordcount <in> <out>");
- // System.exit(2);
- // }
- Job job = new Job(conf, "word count");
- job.setJarByClass(WordCount.class);
- job.setMapperClass(TokenizerMapper.class);
- job.setCombinerClass(IntSumReducer.class);
- job.setReducerClass(IntSumReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(IntWritable.class);
- FileInputFormat.addInputPath(job, new Path("/tmp/input"));
- FileOutputFormat.setOutputPath(job, new Path("/tmp/output"));
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
- }
將項目export成爲jar包,注意選擇運行類爲WordCount
在hadoop機器上:
Hello, i love china
are you ok?
[admin@host WordCount]$ vim input2.txt
hello, i love word
You are ok
在hadoop上新建目錄,和put程序運行所需要的輸入文件:
hadoop fs -mkdir /tmp/output
hadoop fs -put input1.txt /tmp/input/
hadoop fs -put input2.txt /tmp/input/
OK!