HBase API 操作:實現將HDFS中的數據寫入HBase表實例

在windows下使用編譯器寫代碼時,要注意jdk版本和linux下的jdk版本保持一致,避免版本不同造成在linux環境下不能運行的情況。這裏使用的是mapreduce框架來做數據的分析。

如果創建的是Maven項目,需要在pom.xml文件中導入相關依賴(小可不太喜歡Maven,所以直接將hbase的lib包中的jar包導入項目,同時避免版本不同造成的不必要的麻煩)。

這裏有一些可以用來練手的數據:https://download.csdn.net/download/weixin_43562234/11022425

相關依賴:

<dependency>     
	<groupId>org.apache.hbase</groupId>     
	<artifactId>hbase-server</artifactId>     
	<version>1.2.6</version> 
</dependency> 
 
<dependency>     
	<groupId>org.apache.hbase</groupId>     
	<artifactId>hbase-client</artifactId>     
	<version>1.2.6</version> 
</dependency> 
 
<dependency>  
	<groupId>jdk.tools</groupId>  
	<artifactId>jdk.tools</artifactId>  
	<version>1.8</version>  
	<scope>system</scope>  
	<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath> 
</dependency>

<dependency>
    <groupId>log4j</groupId>
    <artifactId>log4j</artifactId>
    <version>1.2.17</version>
</dependency>

1.Mapper

package test1_HDFS2;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class ReadHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

    @Override
    protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {

        //從HDFS文件中讀取數據
        String lineValue=value.toString();

        //將讀出的數據每行用“,”分割,存進String[]數組中
        String[] values= lineValue.split(",");

        //按含義取出String[]中的值
        String one=values[0];
        String two=values[1];
        String three=values[2];
        String four=values[3];

        //表的部分數據的第五列沒有數據,爲不丟失數據,可以將沒有數據的位置用一個特殊字符來代替
        String five;
        if (values.length<5){
            five="null";
        }else{
            five=values[4];
        }

        //初始化rowKey
        //同一用戶在不同時間會產生不同的數據,如果只用用戶id來做Key會造成數據的丟失,建議用時間戳和用戶id生成一個key
        long timetamp = System.currentTimeMillis();
        String rowKey=one+"_"+timetamp;
        ImmutableBytesWritable rowKeyWriteable=new ImmutableBytesWritable(Bytes.toBytes(rowKey));

        //初始化put對象
        Put put=new Put(Bytes.toBytes(rowKey));

        //參數:列族、列、值
        put.add(Bytes.toBytes("info"), Bytes.toBytes("one"),  Bytes.toBytes(one));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("two"),  Bytes.toBytes(two));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("three"),  Bytes.toBytes(three));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("four"),  Bytes.toBytes(four));
        put.add(Bytes.toBytes("info"), Bytes.toBytes("five"),  Bytes.toBytes(five));

        context.write(rowKeyWriteable,put);

    }
}

2.Reducer

package test1_HDFS2;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;

public class WriteReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {

    @Override
    protected void reduce(ImmutableBytesWritable key,Iterable<Put> values,Context context) throws IOException, InterruptedException {

        //將讀出來的數據寫入data表中
        for(Put put:values){
            context.write(NullWritable.get(),put);
        }
    }
}

3.Driver

package test1_HDFS2;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import test1_HDFS.Txt2TestRunner;
import test1_HDFS.WriteTestMRFromTxtReducer;

import java.io.IOException;

public class Driver extends Configured implements Tool {

    public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {


        //獲取配置
        //hbase依賴zookeeper
        Configuration configuration;
        configuration= HBaseConfiguration.create();
        configuration.set("hbase.zookeeper.quorum","master,slave3,slave4");
        configuration.set("hbase.zookeeper.property.clientPort", "2181");
        configuration.set("hbase.master", "master:60000");

        //創建job
        Job job=Job.getInstance(configuration,this.getClass().getSimpleName());
        job.setJarByClass(Driver.class);
        Path inPath=new Path("hdfs://master:9000/user/hadoop/test/data.csv");
        FileInputFormat.addInputPath(job,inPath);

        //設置Mapper
        job.setMapperClass(ReadHDFSMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);

        //設置 Reducer
        TableMapReduceUtil.initTableReducerJob("data", WriteTestMRFromTxtReducer.class, job);

        //設置 Reduce 數量,最少 1 個
        job.setNumReduceTasks(1);

        boolean isSuccess = job.waitForCompletion(true); if(!isSuccess){
            throw new IOException("Job running with error");
        }

        return isSuccess ? 0 : 1;
    }


    public static void main(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();
        int status = ToolRunner.run(conf, new Txt2TestRunner(), args);
        System.exit(status);
    }
    
}

如果有錯誤,還請大佬批評指正,不勝感激!

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章