hive安裝配置

requirement:
    Java 1.6
    Hadoop 0.20.x.

ref:https://cwiki.apache.org/confluence/display/Hive/GettingStarted#GettingStarted-InstallingHivefromaStableRelease

1)download hive
http://hive.apache.org/releases.html
2)Installing Hive
tar zxvf hive-0.7.0-bin.tar.gz

root@hadoop1:/opt#
root@hadoop1:/opt# ln -sf /opt/hadoop/hive-0.7.0-bin/ /opt/hadoop/hive

export HIVE_HOME=/opt/hadoop/hive
export PATH=/opt/hadoop/hive/bin:$PATH
5)running hive

$ $HADOOP_HOME/bin/hadoop fs -mkdir       /tmp
$ $HADOOP_HOME/bin/hadoop fs -mkdir       /user/hive/warehouse
$ $HADOOP_HOME/bin/hadoop fs -chmod g+w   /tmp
$ $HADOOP_HOME/bin/hadoop fs -chmod g+w   /user/hive/warehouse

root@hadoop1:/opt/hadoop/hive/bin# ./hive
Hive history file=/tmp/root/hive_job_log_root_201107121412_939983567.txt

DDL Operations
hive> CREATE TABLE pokes (foo INT, bar STRING);
OK
Time taken: 0.42 seconds
hive> CREATE TABLE invites (foo INT, bar STRING) PARTITIONED BY (ds STRING);
OK
Time taken: 0.099 seconds
hive> SHOW TABLES;
OK
invites
pokes
Time taken: 0.222 seconds
hive> SHOW TABLES '.*s';
OK
invites
pokes
Time taken: 0.134 seconds
hive> DESCRIBE invites;
OK
foo   int
bar   string
ds   string
Time taken: 0.174 seconds
hive> ALTER TABLE pokes ADD COLUMNS (new_col INT);
OK
Time taken: 0.147 seconds
hive> ALTER TABLE invites ADD COLUMNS (new_col2 INT COMMENT 'a comment');
OK
Time taken: 0.115 seconds
hive> DROP TABLE pokes;
OK
Time taken: 1.054 seconds
hive> show tables;
OK
invites
Time taken: 0.131 seconds

DML Operations
從本地加載文件
hive> LOAD DATA LOCAL INPATH '/opt/hadoop/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE pokes;
Copying data from file:/opt/hadoop/hive/examples/files/kv1.txt
Copying file: file:/opt/hadoop/hive/examples/files/kv1.txt
Loading data to table default.pokes
Deleted hdfs://hadoop1:9000/user/hive/warehouse/pokes
OK
Time taken: 0.318 seconds

hive> select * from pokes limit 10;
OK
238   val_238
86   val_86
311   val_311
27   val_27
165   val_165
409   val_409
255   val_255
278   val_278
98   val_98
484   val_484
Time taken: 0.137 seconds

--分區表加載
hive> LOAD DATA LOCAL INPATH '/opt/hadoop/hive/examples/files/kv2.txt' OVERWRITE INTO TABLE invites PARTITION (ds='2008-08-15');
Copying data from file:/opt/hadoop/hive/examples/files/kv2.txt
Copying file: file:/opt/hadoop/hive/examples/files/kv2.txt
Loading data to table default.invites partition (ds=2008-08-15)
OK
Time taken: 0.394 seconds
hive> select * from invites limit 10;
OK
474   val_475   NULL   2008-08-15
281   val_282   NULL   2008-08-15
179   val_180   NULL   2008-08-15
291   val_292   NULL   2008-08-15
62   val_63   NULL   2008-08-15
271   val_272   NULL   2008-08-15
217   val_218   NULL   2008-08-15
135   val_136   NULL   2008-08-15
167   val_168   NULL   2008-08-15
468   val_469   NULL   2008-08-15
Time taken: 0.217 seconds

hive> LOAD DATA LOCAL INPATH './examples/files/kv3.txt' OVERWRITE INTO TABLE invites PARTITION (ds='2008-08-08');root@hadoop1:/opt/hadoop/hive/bin#
root@hadoop1:/opt/hadoop/hive/bin# ./hive
Hive history file=/tmp/root/hive_job_log_root_201107121431_842989549.txt
hive> LOAD DATA LOCAL INPATH '/opt/hadoop/hive/examples/files/kv3.txt' OVERWRITE INTO TABLE invites PARTITION (ds='2008-08-08');
Copying data from file:/opt/hadoop/hive/examples/files/kv3.txt
Copying file: file:/opt/hadoop/hive/examples/files/kv3.txt
Loading data to table default.invites partition (ds=2008-08-08)
OK
Time taken: 6.787 seconds
hive> select * from invites limit 10;
OK
238   val_238   NULL   2008-08-08
NULL       NULL   2008-08-08
311   val_311   NULL   2008-08-08
NULL   val_27   NULL   2008-08-08
NULL   val_165   NULL   2008-08-08
NULL   val_409   NULL   2008-08-08
255   val_255   NULL   2008-08-08
278   val_278   NULL   2008-08-08
98   val_98   NULL   2008-08-08
NULL   val_484   NULL   2008-08-08
Time taken: 0.589 seconds

SQL Operations
hive>
    >
    >
    > SELECT a.foo FROM invites a WHERE a.ds='2008-08-15' limit 10;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/root/root_20110712144040_e058253d-bb7f-45b9-97b8-f6c78c5483b1.log
Job running in-process (local Hadoop)
2011-07-12 14:40:52,786 null map = 100%, reduce = 0%
Ended Job = job_local_0001
OK
474
281
179
291
62
271
217
135
167
468
Time taken: 3.62 seconds

hive> INSERT OVERWRITE DIRECTORY '/tmp/hdfs_out' SELECT a.* FROM invites a WHERE a.ds='2008-08-15';
Total MapReduce jobs = 2
Launching Job 1 out of 2
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/root/root_20110712144141_7b6e4021-a419-42b5-a6eb-c45010872c0a.log
Job running in-process (local Hadoop)
2011-07-12 14:41:39,056 null map = 100%, reduce = 0%
Ended Job = job_local_0001
Ended Job = -1864542964, job is filtered out (removed at runtime).
Moving data to: hdfs://hadoop1:9000/tmp/hive-root/hive_2011-07-12_14-41-36_001_2590472032748705056/-ext-10000
Moving data to: /tmp/hdfs_out
OK
Time taken: 3.247 seconds

hive> INSERT OVERWRITE LOCAL DIRECTORY '/tmp/local_out' SELECT a.* FROM pokes a;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/root/root_20110712144242_920398b9-3c37-431b-b088-dcffe1c54aa2.log
Job running in-process (local Hadoop)
2011-07-12 14:42:19,666 null map = 100%, reduce = 0%
Ended Job = job_local_0001
Copying data to local directory /tmp/local_out
Copying data to local directory /tmp/local_out
OK
Time taken: 3.189 seconds

lpxuan@hadoop1:/tmp/local_out$ more 000000_0
238val_238
86val_86
311val_311
27val_27
165val_165

--group by operation
hive>
    >
    > SELECT a.bar, count(*) FROM invites a WHERE a.foo > 0 GROUP BY a.bar;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Execution log at: /tmp/root/root_20110712144545_01fc3105-f98f-4d77-841f-61c5d65f80fc.log
Job running in-process (local Hadoop)
2011-07-12 14:45:45,313 null map = 0%, reduce = 0%
2011-07-12 14:45:53,745 null map = 100%, reduce = 0%
2011-07-12 14:45:55,748 null map = 100%, reduce = 100%
Ended Job = job_local_0001
OK
   3
val_100   1
val_101   2
val_79   1
val_81   1
val_83   2
val_86   1
val_87   1
val_88   2
val_9   1
val_90   3
val_92   1
val_94   3
val_95   1
val_98   3
..
Time taken: 18.354 seconds

--join
hive> SELECT t1.bar, t1.foo, t2.foo FROM pokes t1 JOIN invites t2 ON (t1.bar = t2.bar) limit 10;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapred.reduce.tasks=<number>
Execution log at: /tmp/root/root_20110712144848_0aa68f57-4d70-4cdc-abb7-fa48a5e379dd.log
Job running in-process (local Hadoop)
2011-07-12 14:48:55,650 null map = 0%, reduce = 0%
2011-07-12 14:48:56,653 null map = 100%, reduce = 0%
2011-07-12 14:48:57,659 null map = 100%, reduce = 100%
Ended Job = job_local_0001
OK
val_100   100   99
val_100   100   99
val_103   103   102
val_103   103   102
val_105   105   104
val_105   105   104
val_105   105   104
val_11   11   10
val_111   111   110
val_118   118   117
Time taken: 8.686 seconds

lpxuan151009

發佈了100 篇原創文章 · 獲贊 21 · 訪問量 33萬+

私信關注

oracle模糊查詢:分區局部全文索引方式（四）

java servlet實例

hive sql執行計劃

eclipse運行hadoop wordcount example

hive sql order by 與sort by

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結