示例
代碼:
- import java.io.ByteArrayOutputStream;
- import java.io.DataOutputStream;
- import java.io.IOException;
- import java.util.HashMap;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.hbase.HBaseConfiguration;
- import org.apache.hadoop.hbase.HColumnDescriptor;
- import org.apache.hadoop.hbase.HTableDescriptor;
- import org.apache.hadoop.hbase.client.HBaseAdmin;
- import org.apache.hadoop.hbase.client.HTable;
- import org.apache.hadoop.hbase.client.Put;
- import org.apache.hadoop.hbase.client.Result;
- import org.apache.hadoop.hbase.client.Scan;
- import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
- import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
- import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
- import org.apache.hadoop.hbase.util.Base64;
- import org.apache.hadoop.hbase.util.Bytes;
- import org.apache.hadoop.io.Writable;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- public class IndexBuilder {
- // 索引表唯一的一列爲 INDEX_ROW,其中 INDEX 爲列族
- private static final byte[] INDEX_COLUMN = Bytes.toBytes("INDEX");
- private static final byte[] INDEX_QUALIFIER = Bytes.toBytes("ROW");
- // 實現 Map 類
- public static class Map extends
- Mapper<ImmutableBytesWritable, Result, ImmutableBytesWritable, Writable> {
- // 存儲了“列名”到“表名——列名”的映射
- // 前者用於獲取某列的值,並作爲索引表的鍵值;後者用戶作爲索引表的表名
- private HashMap<byte[], ImmutableBytesWritable> indexes;
- private byte[] family;
- // 實現 map 函數
- public void map(ImmutableBytesWritable key, Result value,
- Context context) throws IOException, InterruptedException {
- // indexes是在setup方法中初始化的
- for (java.util.Map.Entry<byte[], ImmutableBytesWritable> index : indexes
- .entrySet()) {
- // 獲取列名
- byte[] qualifier = index.getKey();
- // 索引表的表名
- ImmutableBytesWritable tableName = index.getValue();
- // 根據“列族:列名”獲得元素值
- byte[] newValue = value.getValue(family, qualifier);
- if (newValue != null) {
- // 以列值作爲行健,在列“INDEX:ROW”中插入行健
- Put put = new Put(newValue);
- put.add(INDEX_COLUMN, INDEX_QUALIFIER, key.get());
- // 在 tableName 表上執行 put
- // 操作使用 MultipleOutputFormat 時,
- //第二個參數必須是 Put 和 Delete 類型
- context.write(tableName, put);
- }
- }
- }
- // setup爲Mapper中的方法,該方法只在任務初始化時執行一次
- protected void setup(Context context) throws IOException,
- InterruptedException {
- Configuration conf = context.getConfiguration();
- // 通過 Configuration.set()方法傳遞參數
- String tableName = conf.get("index.tablename");
- String[] fields = conf.getStrings("index.fields");
- // fields 內爲需要做索引的列名
- String familyName = conf.get("index.familyname");
- family = Bytes.toBytes(familyName);
- // 初始化 indexes 方法
- indexes = new HashMap<byte[], ImmutableBytesWritable>();
- for (String field : fields) {
- // 如果給 name 做索引,則索引表的名稱爲“heroes‐name”
- indexes.put(Bytes.toBytes(field),
- new ImmutableBytesWritable(
- Bytes.toBytes(tableName + "‐" + field)));
- }
- }
- }
- // 初始化示例數據表——“heroes”
- public static void initHBaseTable(Configuration conf, String tableName)
- throws IOException {
- // 創建表描述
- HTableDescriptor htd = new HTableDescriptor(tableName);
- // 創建列族描述
- HColumnDescriptor col = new HColumnDescriptor("info");
- htd.addFamily(col);
- HBaseAdmin hAdmin = new HBaseAdmin(conf);
- if (hAdmin.tableExists(tableName)) {
- System.out.println("該數據表已經存在,正在重新創建。");
- hAdmin.disableTable(tableName);
- hAdmin.deleteTable(tableName);
- }
- System.out.println("創建表:" + tableName);
- // 創建表
- hAdmin.createTable(htd);
- HTable table = new HTable(conf, tableName);
- System.out.println("向表中插入數據");
- // 添加數據
- addRow(table, "1", "info", "name", "peter");
- addRow(table, "1", "info", "email", "[email protected]");
- addRow(table, "1", "info", "power", "absorb abilities");
- addRow(table, "2", "info", "name", "hiro");
- addRow(table, "2", "info", "email", "[email protected]");
- addRow(table, "2", "info", "power", "bend time and space");
- addRow(table, "3", "info", "name", "sylar");
- addRow(table, "3", "info", "email", "[email protected]");
- addRow(table, "3", "info", "power", "hnow how things work");
- addRow(table, "4", "info", "name", "claire");
- addRow(table, "4", "info", "email", "[email protected]");
- addRow(table, "4", "info", "power", "heal");
- addRow(table, "5", "info", "name", "noah");
- addRow(table, "5", "info", "email", "[email protected]");
- addRow(table, "5", "info", "power", "cath the people with ablities");
- }
- // 添加一條數據
- private static void addRow(HTable table, String row,
- String columnFamily,String column, String value) throws IOException {
- Put put = new Put(Bytes.toBytes(row));
- // 參數出分別:列族、列、值
- put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column),
- Bytes.toBytes(value));
- table.put(put);
- }
- // 創建數據庫表
- public static void createIndexTable(Configuration conf,
- String tableName) throws Exception {
- // 新建一個數據庫管理員
- HBaseAdmin hAdmin = new HBaseAdmin(conf);
- if (hAdmin.tableExists(tableName)) {
- System.out.println("該數據表已經存在,正在重新創建。");
- hAdmin.disableTable(tableName);
- hAdmin.deleteTable(tableName);
- }
- // 新建一個表的描述
- HTableDescriptor tableDesc = new HTableDescriptor(tableName);
- // 在描述裏添加列族
- tableDesc.addFamily(new HColumnDescriptor(INDEX_COLUMN));
- // 根據配置好的描述建表
- hAdmin.createTable(tableDesc);
- System.out.println("創建" + tableName + "表成功");
- }
- public static Job configureJob(Configuration conf, String jobName)
- throws IOException {
- Job job = new Job(conf, jobName);
- job.setJarByClass(IndexBuilder.class);
- // 設置 Map 處理類
- job.setMapperClass(Map.class);
- // 設置 Reduce 個數
- job.setNumReduceTasks(0);
- // 設置輸入和輸出格式
- job.setInputFormatClass(TableInputFormat.class);
- job.setOutputFormatClass(MultiTableOutputFormat.class);
- return job;
- }
- private static String convertScanToString(Scan scan)
- throws IOException {
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(out);
- scan.write(dos);
- return Base64.encodeBytes(out.toByteArray());
- }
- public static void main(String[] args) throws Exception {
- Configuration conf = HBaseConfiguration.create();
- conf.set("hbase.zookeeper.quorum", "master");
- conf.set("hbase.zookeeper.property.clientPort", "2181");
- String tableName = "heroes";
- String columnFamily = "info";
- String[] fields = { "name", "power" };
- // 第一步:初始化數據庫表
- IndexBuilder.initHBaseTable(conf, tableName);
- // 第二步:創建索引表
- for (String field : fields) {
- IndexBuilder.createIndexTable(conf, tableName + "‐" + field);
- }
- // 第三步:進行 MapReduce 處理
- conf.set("mapred.job.tracker", "master:9001");
- conf.set(TableInputFormat.SCAN, convertScanToString(new Scan()));
- conf.set(TableInputFormat.INPUT_TABLE, tableName);
- // 設置傳遞屬性值
- conf.set("index.tablename", tableName);
- conf.set("index.familyname", columnFamily);
- conf.setStrings("index.fields", fields);
- Job job = IndexBuilder.configureJob(conf, "Index Builder");
- System.exit(job.waitForCompletion(true) ? 0 : 1);
- }
- }
編譯完成後,可在hbase shell下運行:list,查看所創建的表,其他命令來操作表,在此不再贅述。
該示例只有map,沒有reduce。