在windows下使用編譯器寫代碼時,要注意jdk版本和linux下的jdk版本保持一致,避免版本不同造成在linux環境下不能運行的情況。這裏使用的是mapreduce框架來做數據的分析。
如果創建的是Maven項目,需要在pom.xml文件中導入相關依賴(小可不太喜歡Maven,所以直接將hbase的lib包中的jar包導入項目,同時避免版本不同造成的不必要的麻煩)。
這裏有一些可以用來練手的數據:https://download.csdn.net/download/weixin_43562234/11022425
相關依賴:
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.8</version>
<scope>system</scope>
<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
1.Mapper
package test1_HDFS2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class ReadHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
//從HDFS文件中讀取數據
String lineValue=value.toString();
//將讀出的數據每行用“,”分割,存進String[]數組中
String[] values= lineValue.split(",");
//按含義取出String[]中的值
String one=values[0];
String two=values[1];
String three=values[2];
String four=values[3];
//表的部分數據的第五列沒有數據,爲不丟失數據,可以將沒有數據的位置用一個特殊字符來代替
String five;
if (values.length<5){
five="null";
}else{
five=values[4];
}
//初始化rowKey
//同一用戶在不同時間會產生不同的數據,如果只用用戶id來做Key會造成數據的丟失,建議用時間戳和用戶id生成一個key
long timetamp = System.currentTimeMillis();
String rowKey=one+"_"+timetamp;
ImmutableBytesWritable rowKeyWriteable=new ImmutableBytesWritable(Bytes.toBytes(rowKey));
//初始化put對象
Put put=new Put(Bytes.toBytes(rowKey));
//參數:列族、列、值
put.add(Bytes.toBytes("info"), Bytes.toBytes("one"), Bytes.toBytes(one));
put.add(Bytes.toBytes("info"), Bytes.toBytes("two"), Bytes.toBytes(two));
put.add(Bytes.toBytes("info"), Bytes.toBytes("three"), Bytes.toBytes(three));
put.add(Bytes.toBytes("info"), Bytes.toBytes("four"), Bytes.toBytes(four));
put.add(Bytes.toBytes("info"), Bytes.toBytes("five"), Bytes.toBytes(five));
context.write(rowKeyWriteable,put);
}
}
2.Reducer
package test1_HDFS2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class WriteReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key,Iterable<Put> values,Context context) throws IOException, InterruptedException {
//將讀出來的數據寫入data表中
for(Put put:values){
context.write(NullWritable.get(),put);
}
}
}
3.Driver
package test1_HDFS2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import test1_HDFS.Txt2TestRunner;
import test1_HDFS.WriteTestMRFromTxtReducer;
import java.io.IOException;
public class Driver extends Configured implements Tool {
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//獲取配置
//hbase依賴zookeeper
Configuration configuration;
configuration= HBaseConfiguration.create();
configuration.set("hbase.zookeeper.quorum","master,slave3,slave4");
configuration.set("hbase.zookeeper.property.clientPort", "2181");
configuration.set("hbase.master", "master:60000");
//創建job
Job job=Job.getInstance(configuration,this.getClass().getSimpleName());
job.setJarByClass(Driver.class);
Path inPath=new Path("hdfs://master:9000/user/hadoop/test/data.csv");
FileInputFormat.addInputPath(job,inPath);
//設置Mapper
job.setMapperClass(ReadHDFSMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//設置 Reducer
TableMapReduceUtil.initTableReducerJob("data", WriteTestMRFromTxtReducer.class, job);
//設置 Reduce 數量,最少 1 個
job.setNumReduceTasks(1);
boolean isSuccess = job.waitForCompletion(true); if(!isSuccess){
throw new IOException("Job running with error");
}
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
int status = ToolRunner.run(conf, new Txt2TestRunner(), args);
System.exit(status);
}
}
如果有錯誤,還請大佬批評指正,不勝感激!