hadoop 單機下運行wordcount

關於單機版的配置,eclipse環境搭建,以後再補充吧

首先是程序

project: wordcunt

  1. import java.io.IOException; 
  2. import java.util.StringTokenizer; 
  3.  
  4. import org.apache.hadoop.io.IntWritable; 
  5. import org.apache.hadoop.io.Text; 
  6. import org.apache.hadoop.mapreduce.Mapper; 
  7.  
  8. public class TokenizerMapper extends 
  9.         Mapper<Object, Text, Text, IntWritable> { 
  10.  
  11.     private final static IntWritable one = new IntWritable(1); 
  12.     private Text word = new Text(); 
  13.  
  14.     public void map(Object key, Text value, Context context) 
  15.             throws IOException, InterruptedException { 
  16.         StringTokenizer itr = new StringTokenizer(value.toString()); 
  17.         while (itr.hasMoreTokens()) { 
  18.             word.set(itr.nextToken()); 
  19.             context.write(word, one); 
  20.         } 
  21.     } 
  22.  
  23.  
  24. import java.io.IOException; 
  25.  
  26. import org.apache.hadoop.io.IntWritable; 
  27. import org.apache.hadoop.io.Text; 
  28. import org.apache.hadoop.mapreduce.Reducer; 
  29.  
  30. public class IntSumReducer extends 
  31.         Reducer<Text, IntWritable, Text, IntWritable> { 
  32.     private IntWritable result = new IntWritable(); 
  33.  
  34.     public void reduce(Text key, Iterable<IntWritable> values, Context context) 
  35.             throws IOException, InterruptedException { 
  36.         int sum = 0
  37.         for (IntWritable val : values) { 
  38.             sum += val.get(); 
  39.         } 
  40.         result.set(sum); 
  41.         context.write(key, result); 
  42.     } 
  43.  
  44.  
  45. import org.apache.hadoop.conf.Configuration; 
  46. import org.apache.hadoop.fs.Path; 
  47. import org.apache.hadoop.io.IntWritable; 
  48. import org.apache.hadoop.io.Text; 
  49. import org.apache.hadoop.mapreduce.Job; 
  50. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
  51. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
  52. import org.apache.hadoop.util.GenericOptionsParser; 
  53.  
  54. public class WordCount { 
  55.  
  56.       public static void main(String[] args) throws Exception { 
  57.             Configuration conf = new Configuration(); 
  58. //          String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
  59. //          if (otherArgs.length != 2) { 
  60. //            System.err.println("Usage: wordcount <in> <out>"); 
  61. //            System.exit(2); 
  62. //          } 
  63.             Job job = new Job(conf, "word count"); 
  64.             job.setJarByClass(WordCount.class); 
  65.             job.setMapperClass(TokenizerMapper.class); 
  66.             job.setCombinerClass(IntSumReducer.class); 
  67.             job.setReducerClass(IntSumReducer.class); 
  68.             job.setOutputKeyClass(Text.class); 
  69.             job.setOutputValueClass(IntWritable.class); 
  70.             FileInputFormat.addInputPath(job, new Path("/tmp/input")); 
  71.             FileOutputFormat.setOutputPath(job, new Path("/tmp/output")); 
  72.             System.exit(job.waitForCompletion(true) ? 0 : 1); 
  73.           } 
  74.  

將項目export成爲jar包,注意選擇運行類爲WordCount

在hadoop機器上:

[admin@host WordCount]$ vim input1.txt
Hello, i love china
are you ok
?
[admin@host WordCount]$ vim input2.txt
hello, i love word
You are ok

  在hadoop上新建目錄,和put程序運行所需要的輸入文件:

hadoop fs -mkdir /tmp/input
hadoop fs
-mkdir /tmp/output
hadoop fs
-put input1.txt /tmp/input/
hadoop fs
-put input2.txt /tmp/input/
 
執行:
hadoop jar wordcount.jar WordCount
 
查看效果:
hadoop fs -ls /tmp/output/
hadoop fs -cat /tmp/output/part-r-00000

OK!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章