Hadoop MapReduce 學習筆記(二) 序言和準備2

然後是兩個測試子類,主要區別在於生成不同的測試數據.我想有一個又淺入深的過程,比如我們一開始接觸的MapReduce是WordCount,統計單個單詞的個數.這裏單詞只是一列,相對數據庫來說單詞表只有一個單詞字段.而實際中可能會有多列數據.如用戶表:ID INT,USER_NAME VARCHAR(32),AGE INT.所以我引入了兩個子類,從簡單到複雜.

     1.類似上面的單詞表測試類,只有一個字段.

 
Java代碼  收藏代碼

    package com.guoyun.hadoop.mapreduce.study;  
      
    import java.io.File;  
    import java.io.FileWriter;  
      
    import org.slf4j.Logger;  
    import org.slf4j.LoggerFactory;  
    /**
     * 單列數據的mapreduce測試,類似表
     * CREATE TABLE TABLE_NAME(
     *  ID INT;
     * )
     * 有不同的子類去實現不同的功能,如求最大最小值,排序等
     */  
    public class MyMapReduceSIngleColumnTest extends MyMapReduceTest{  
      public static final Logger log=LoggerFactory.getLogger(MyMapReduceSIngleColumnTest.class);  
        
      public MyMapReduceSIngleColumnTest(long dataLength, String inputPath,  
          String outputPath) throws Exception {  
        super(dataLength, inputPath, outputPath);  
        // TODO Auto-generated constructor stub  
      }  
      
      public MyMapReduceSIngleColumnTest(long dataLength) throws Exception {  
        super(dataLength);  
        // TODO Auto-generated constructor stub  
      }  
      
      public MyMapReduceSIngleColumnTest(String inputPath, String outputPath) {  
        super(inputPath, outputPath);  
        // TODO Auto-generated constructor stub  
      }  
      
      public MyMapReduceSIngleColumnTest(String outputPath) {  
        super(outputPath);  
        // TODO Auto-generated constructor stub  
      }  
      
      protected  void generateDatas(long length) throws Exception{  
        FileWriter fw=null;  
        File file=null;  
        long generateValue=0;  
          
        file=new File(inputPath);  
        if(!file.getParentFile().exists()){  
          if(!file.getParentFile().mkdirs()){  
            throw new Exception("generate datas error,can not create dir:"+file.getParentFile().getAbsolutePath());  
          }  
        }  
          
        try {  
          fw=new FileWriter(file);  
          for(int i=0;i<length;i++){  
            generateValue=(long)(Math.random()*length)+1;  
            if(generateValue>this.maxValue){  
              this.maxValue=generateValue;  
            }else if(generateValue<this.minValue){  
              this.minValue=generateValue;  
            }  
            fw.write(generateValue+NEW_LINE);  
          }  
        } catch (Exception e) {  
          // TODO Auto-generated catch block  
          e.printStackTrace();  
        }finally{  
          if(fw!=null){  
            fw.flush();  
            fw.close();  
          }  
        }  
      }  
    }  


 

   2.類似上面的用戶表,有多列數據,但我這裏生成的只是兩列,你可以下載自己做修改
Java代碼  收藏代碼

    package com.guoyun.hadoop.mapreduce.study;  
      
    import java.io.DataInput;  
    import java.io.DataOutput;  
    import java.io.File;  
    import java.io.FileWriter;  
    import java.io.IOException;  
    import java.util.ArrayList;  
    import java.util.List;  
      
    import org.apache.hadoop.io.LongWritable;  
    import org.apache.hadoop.io.Text;  
    import org.apache.hadoop.io.WritableComparable;  
    import org.apache.hadoop.mapreduce.Mapper;  
    import org.slf4j.Logger;  
    import org.slf4j.LoggerFactory;  
      
    /**
     * 針對一行有多列數據的MapReduce test
     * 類似Table:
     * CREATE TABLE TABLE_NAME(
     *  ID INT,
     *  NAME VARCHAR(32),
     *  ...
     * )
     * 由不同的子類實現不同的功能,如求ID的最大最小值,對ID排序等
     */  
    public class MyMapReduceMultiColumnTest extends MyMapReduceTest {  
      public static final Logger log=LoggerFactory.getLogger(MyMapReduceTest.class);  
        
      public static final String DEFAULT_INPUT_PATH="testDatas/mapreduce/MRInput_MultiColumn";  
      public static final String DEFAULT_OUTPUT_PATH="testDatas/mapreduce/MRInput_MultiColumn";  
      public static final String SPLIT_TAB="\t";  
      private static final List<String> frameworkNames=new ArrayList<String>();  
        
      static{  
        frameworkNames.add("Hadoop");  
        frameworkNames.add("Hbase");  
        frameworkNames.add("Pig");  
        frameworkNames.add("Zookeeper");  
        frameworkNames.add("Chuwka");  
        frameworkNames.add("Avro");  
        frameworkNames.add("Sqoop");  
        frameworkNames.add("Cassandra");  
        frameworkNames.add("Hive");  
        frameworkNames.add("Mahout");  
        frameworkNames.add("Nutch");  
        frameworkNames.add("Lucene");  
        frameworkNames.add("Solr");  
        frameworkNames.add("Heritrix");  
        frameworkNames.add("Netty");  
        frameworkNames.add("Tomcat");  
        frameworkNames.add("Thrift");  
        frameworkNames.add("Ant");  
        frameworkNames.add("Log4j");  
        frameworkNames.add("CouchDB");  
        frameworkNames.add("Maven");  
        frameworkNames.add("Mina");  
        frameworkNames.add("OpenJPA");  
        frameworkNames.add("POI");  
        frameworkNames.add("Struts");  
        frameworkNames.add("Spring");  
        frameworkNames.add("Subversion");  
        frameworkNames.add("Tika");  
      }  
      
      public MyMapReduceMultiColumnTest(long dataLength) throws Exception {  
        super(dataLength);  
        // TODO Auto-generated constructor stub  
      }  
      
      public MyMapReduceMultiColumnTest(String outputPath) throws Exception {  
        super(outputPath);  
        // TODO Auto-generated constructor stub  
      }  
        
      
      public MyMapReduceMultiColumnTest(String inputPath, String outputPath) {  
        super(inputPath, outputPath);  
      }  
        
      
      public MyMapReduceMultiColumnTest(long dataLength, String inputPath,  
          String outputPath) throws Exception {  
        super(dataLength, inputPath, outputPath);  
      }  
      
      @Override  
      protected void generateDatas(long length) throws Exception {  
        FileWriter fw=null;  
        File file=null;  
        long generateValue=0;  
          
        file=new File(inputPath);  
        if(!file.getParentFile().exists()){  
          if(!file.getParentFile().mkdirs()){  
            throw new Exception("generate datas error,can not create dir:"+file.getParentFile().getAbsolutePath());  
          }  
        }  
          
        try {  
          fw=new FileWriter(file);  
          for(int i=0;i<length;i++){  
            generateValue=(long)(Math.random()*length)+1;  
            if(generateValue>this.maxValue){  
              this.maxValue=generateValue;  
            }else if(generateValue<this.minValue){  
              this.minValue=generateValue;  
            }  
            fw.write(this.generateFrameWork()+SPLIT_TAB+generateValue+NEW_LINE);  
          }  
        } catch (Exception e) {  
          // TODO Auto-generated catch block  
          e.printStackTrace();  
        }finally{  
          if(fw!=null){  
            fw.flush();  
            fw.close();  
          }  
        }  
      
      }  
        
      private String generateFrameWork(){  
        int index=(int)(Math.random()*frameworkNames.size());  
        return frameworkNames.get(index);  
      }  
        
      public static class MultiColumnWritable implements  WritableComparable{  
        private String frameworkName="";  
        private long number=-1;  
          
        public String getFrameworkName() {  
          return frameworkName;  
        }  
      
        public void setFrameworkName(String frameworkName) {  
          this.frameworkName = frameworkName;  
        }  
      
        public long getNumber() {  
          return number;  
        }  
      
        public void setNumber(long number) {  
          this.number = number;  
        }  
      
        public MultiColumnWritable() {  
          super();  
        }  
      
        public MultiColumnWritable(String frameworkName, long number) {  
          super();  
          this.frameworkName = frameworkName;  
          this.number = number;  
        }  
      
        @Override  
        public int compareTo(Object obj) {  
          int result=-1;  
          if(obj instanceof MultiColumnWritable){  
            MultiColumnWritable mcw=(MultiColumnWritable)obj;  
            if(mcw.getNumber()<this.getNumber()){  
              result =1;  
            }else if(mcw.getNumber()==this.getNumber()){  
              result=0;  
            }  
          }  
          return result;  
        }  
      
        @Override  
        public void readFields(DataInput in) throws IOException {  
          frameworkName=in.readUTF();  
          number=in.readLong();  
        }  
      
        @Override  
        public void write(DataOutput out) throws IOException {  
          out.writeUTF(frameworkName);  
          out.writeLong(number);  
        }  
      
        @Override  
        public String toString() {  
          return frameworkName+"\t"+number;  
        }  
          
        public static MultiColumnWritable copy(MultiColumnWritable obj){  
          return new MultiColumnWritable(obj.getFrameworkName(),obj.getNumber());  
        }  
          
      }  
        
      /**
       * Map,to get the source datas
       */  
      protected static class MultiSupMapper extends Mapper<LongWritable,Text,Text,MultiColumnWritable>{  
        private final Text writeKey=new Text("K");  
        private MultiColumnWritable writeValue=new MultiColumnWritable();  
          
        @Override  
        protected void map(LongWritable key, Text value, Context context)  
            throws IOException, InterruptedException {  
          log.debug("begin to map");  
          String[] split=null;  
            
          try {  
            split=value.toString().split("\\t");  
            if(split!=null&&split.length==2){  
              writeValue.setFrameworkName(split[0].trim());  
              writeValue.setNumber(Long.parseLong(split[1].trim()));  
            }  
          } catch (NumberFormatException e) {  
            log.error("map error:"+e.getMessage());  
          }  
            
          context.write(writeKey, writeValue);  
        }  
      }  
        
      public static void main(String[] args) throws Exception{  
        MyMapReduceTest test=new MyMapReduceMultiColumnTest(1000);  
      }  
      
    } 
轉自:http://guoyunsky.iteye.com/blog/1233714
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章