Hive 查詢語法大集合

SELECT … FROM Clauses

[sql]view
plaincopyprint?

hive> SELECT name, salary FROM employees;  

表別名

[sql]view
plaincopyprint?

hive> SELECT   name,   salary FROM employees;  

hive> SELECT e.name, e.salary FROM employees e;  

Specify Columns with Regular Expressions

[sql]view
plaincopyprint?

hive> SELECT symbol, `price.*` FROM stocks;  

AAPL    195.69  197.88  194.0   194.12  194.12  

AAPL    192.63  196.0   190.85  195.46  195.46  

AAPL    196.73  198.37  191.57  192.05  192.05  

AAPL    195.17  200.2   194.42  199.23  199.23  

AAPL    195.91  196.32  193.38  195.86  195.86

Computing with Column Values

[sql]view
plaincopyprint?

hive> SELECT upper(name), salary, deductions["Federal Taxes"],  

    > round(salary * (1 - deductions["Federal Taxes"])) FROM employees;  

JOHN DOE    100000.0  0.2   80000  

MARY SMITH   80000.0  0.2   64000  

TODD JONES   70000.0  0.15  59500  

BILL KING    60000.0  0.15  51000

Arithmetic Operators --算數運算

Mathematical functions

Aggregate functions -- 集合函數

[sql]view
plaincopyprint?

hive> SET hive.map.aggr=true;  

hive> SELECT count(*), avg(salary) FROM employees;  

Table generating functions

[sql]view
plaincopyprint?

hive> SELECT explode(subordinates) AS sub FROM employees;  

Mary Smith  

Todd Jones  

Bill King  

Other built-in functions

LIMIT Clause --限制行數

[sql]view
plaincopyprint?

hive> SELECT upper(name), salary, deductions["Federal Taxes"],  

    > round(salary * (1 - deductions["Federal Taxes"])) FROM employees  

    > LIMIT 2;  

JOHN DOE    100000.0  0.2   80000  

MARY SMITH   80000.0  0.2   64000

Column Aliases --列別名

[sql]view
plaincopyprint?

hive> SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,  

    > round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes  

    > FROM employees LIMIT 2;  

JOHN DOE    100000.0  0.2   80000  

MARY SMITH   80000.0  0.2   64000

Nested SELECT Statements --子查詢

[sql]view
plaincopyprint?

hive> FROM (  

    >   SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,  

    >   round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes  

    >   FROM employees  

    > ) e  

    > SELECT e.name, e.salary_minus_fed_taxes  

    > WHERE e.salary_minus_fed_taxes > 70000;  

JOHN DOE    100000.0  0.2   80000

CASE … WHEN … THEN Statements --case 關鍵字

[sql]view
plaincopyprint?

hive> SELECT name, salary,  

    >   CASE  

    >     WHEN salary <  50000.0 THEN 'low'  

    >     WHEN salary >= 50000.0 AND salary <  70000.0 THEN 'middle'  

    >     WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'  

    >     ELSE 'very high'  

    >   END AS bracket FROM employees;  

John Doe         100000.0   very high  

Mary Smith        80000.0   high  

Todd Jones        70000.0   high  

Bill King         60000.0   middle  

Boss Man         200000.0   very high  

Fred Finance     150000.0   very high  

Stacy Accountant  60000.0   middle

WHERE Clauses -- 篩選

[sql]view
plaincopyprint?

SELECT * FROM employees  

WHERE country = 'US' AND state = 'CA';  

[sql]view
plaincopyprint?

hive> SELECT name, salary, deductions["Federal Taxes"],  

    >   salary * (1 - deductions["Federal Taxes"])  

    > FROM employees  

    > WHERE round(salary * (1 - deductions["Federal Taxes"])) > 70000;  

John Doe    100000.0  0.2   80000.0

[sql]view
plaincopyprint?

hive>  SELECT name, salary, deductions["Federal Taxes"],  

    >    salary * (1 - deductions["Federal Taxes"]) as salary_minus_fed_taxes  

    >  FROM employees  

    >  WHERE round(salary_minus_fed_taxes) > 70000;  

FAILED: Error in semantic analysis: Line 4:13 Invalid table alias or  

column reference 'salary_minus_fed_taxes': (possible column names are:  

name, salary, subordinates, deductions, address)

[sql]view
plaincopyprint?

hive> SELECT e.* FROM  

    > (SELECT name, salary, deductions["Federal Taxes"] as ded,  

    >    salary * (1 - deductions["Federal Taxes"]) as salary_minus_fed_taxes  

    >  FROM employees) e  

    > WHERE round(e.salary_minus_fed_taxes) > 70000;  

John Doe        100000.0        0.2     80000.0  

Boss Man        200000.0        0.3     140000.0  

Fred Finance    150000.0        0.3     105000.0

Predicate Operators

LIKE and RLIKE

[sql]view
plaincopyprint?

hive> SELECT name, address.street FROM employees WHERE address.street LIKE '%Ave.';  

John Doe        1 Michigan Ave.  

Todd Jones      200 Chicago Ave.  

hive> SELECT name, address.city FROM employees WHERE address.city LIKE 'O%';  

Todd Jones      Oak Park  

Bill King       Obscuria  

hive> SELECT name, address.street FROM employees WHERE address.street LIKE '%Chi%';  

Todd Jones      200 Chicago Ave.

[sql]view
plaincopyprint?

hive> SELECT name, address.street  

    > FROM employees WHERE address.street RLIKE '.*(Chicago|Ontario).*';  

Mary Smith      100 Ontario St.  

Todd Jones      200 Chicago Ave.  

[sql]view
plaincopyprint?

SELECT name, address FROM employees  

WHERE address.street LIKE '%Chicago%' OR address.street LIKE '%Ontario%';  

GROUP BY Clauses

[sql]view
plaincopyprint?

hive> SELECT year(ymd), avg(price_close) FROM stocks  

    > WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'  

    > GROUP BY year(ymd);  

1984    25.578625440597534  

1985    20.193676221040867  

1986    32.46102808021274  

1987    53.88968399108163  

1988    41.540079275138766  

1989    41.65976212516664  

1990    37.56268799823263  

1991    52.49553383386182  

1992    54.80338610251119  

1993    41.02671956450572  

1994    34.0813495847914

HAVING Clauses

[sql]view
plaincopyprint?

hive> SELECT year(ymd), avg(price_close) FROM stocks  

    > WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'  

    > GROUP BY year(ymd)  

        > HAVING avg(price_close) > 50.0;  

1987    53.88968399108163  

1991    52.49553383386182  

1992    54.80338610251119  

1999    57.77071460844979  

2000    71.74892876261757  

2005    52.401745992993554

Inner JOIN

[sql]view
plaincopyprint?

hive> SELECT a.ymd, a.price_close, b.price_close  

    > FROM stocks a JOIN stocks b ON a.ymd = b.ymd  

    > WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';  

2010-01-04      214.01  132.45  

2010-01-05      214.38  130.85  

2010-01-06      210.97  130.0  

2010-01-07      210.58  129.55  

2010-01-08      211.98  130.85  

2010-01-11      210.11  129.48

Example 6-1. Query that will not work in Hive

[sql]view
plaincopyprint?

SELECT a.ymd, a.price_close, b.price_close  

FROM stocks a JOIN stocks b  

ON a.ymd <= b.ymd  

WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';  

LEFT OUTER JOIN

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend  

    > FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol  

    > WHERE s.symbol = 'AAPL';  

...  

1987-05-01      AAPL    80.0    NULL  

1987-05-04      AAPL    79.75   NULL  

1987-05-05      AAPL    80.25   NULL  

1987-05-06      AAPL    80.0    NULL  

1987-05-07      AAPL    80.25   NULL  

1987-05-08      AAPL    79.0    NULL  

1987-05-11      AAPL    77.0    0.015  

1987-05-12      AAPL    75.5    NULL  

1987-05-13      AAPL    78.5    NULL  

1987-05-14      AAPL    79.25   NULL  

1987-05-15      AAPL    78.25   NULL  

1987-05-18      AAPL    75.75   NULL  

1987-05-19      AAPL    73.25   NULL  

1987-05-20      AAPL    74.5    NULL  

...

OUTER JOIN Gotcha

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend  

    > FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol  

    > WHERE s.symbol = 'AAPL'  

    > AND s.exchange = 'NASDAQ' AND d.exchange = 'NASDAQ';  

1987-05-11      AAPL    77.0    0.015  

1987-08-10      AAPL    48.25   0.015  

1987-11-17      AAPL    35.0    0.02  

1988-02-12      AAPL    41.0    0.02  

1988-05-16      AAPL    41.25   0.02  

...

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend FROM  

    > (SELECT * FROM stocks WHERE symbol = 'AAPL' AND exchange = 'NASDAQ') s  

    > LEFT OUTER JOIN  

    > (SELECT * FROM dividends WHERE symbol = 'AAPL' AND exchange = 'NASDAQ') d  

    > ON s.ymd = d.ymd;  

...  

1988-02-10      AAPL    41.0    NULL  

1988-02-11      AAPL    40.63   NULL  

1988-02-12      AAPL    41.0    0.02  

1988-02-16      AAPL    41.25   NULL  

1988-02-17      AAPL    41.88   NULL

RIGHT OUTER JOIN

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend  

    > FROM dividends d RIGHT OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol  

    > WHERE s.symbol = 'AAPL';  

...  

1987-05-07      AAPL    80.25   NULL  

1987-05-08      AAPL    79.0    NULL  

1987-05-11      AAPL    77.0    0.015  

1987-05-12      AAPL    75.5    NULL  

1987-05-13      AAPL    78.5    NULL

FULL OUTER JOIN

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close, d.dividend  

    > FROM dividends d FULL OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol  

    > WHERE s.symbol = 'AAPL';  

...  

1987-05-07      AAPL    80.25   NULL  

1987-05-08      AAPL    79.0    NULL  

1987-05-11      AAPL    77.0    0.015  

1987-05-12      AAPL    75.5    NULL  

1987-05-13      AAPL    78.5    NULL  

...

LEFT SEMI-JOIN

Example 6-2. Query that will not work in Hive

[sql]view
plaincopyprint?

SELECT s.ymd, s.symbol, s.price_close FROM stocks s  

WHERE s.ymd, s.symbol IN  

(SELECT d.ymd, d.symbol FROM dividends d);  

Instead, you use the following LEFT SEMI JOIN syntax:

[sql]view
plaincopyprint?

hive> SELECT s.ymd, s.symbol, s.price_close  

    > FROM stocks s LEFT SEMI JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol;  

...  

1962-11-05      IBM     361.5  

1962-08-07      IBM     373.25  

1962-05-08      IBM     459.5  

1962-02-06      IBM     551.5

ORDER BY and SORT BY

Here is an example using ORDER BY:

[sql]view
plaincopyprint?

SELECT s.ymd, s.symbol, s.price_close  

FROM stocks s  

ORDER BY s.ymd ASC, s.symbol DESC;  

Here is the same example using SORT BY instead:

[sql]view
plaincopyprint?

SELECT s.ymd, s.symbol, s.price_cl  

FROM stocks s  

SORT BY s.ymd ASC, s.symbol DESC;  

Casting

[sql]view
plaincopyprint?

SELECT name, salary FROM employees  

WHERE cast(salary AS FLOAT) < 100000.0;  

Queries that Sample Data -- 抽樣

[sql]view
plaincopyprint?

hive> SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;  

2  

4  

hive> SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;  

7  

10  

hive> SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;

[sql]view
plaincopyprint?

hive> SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON number) s;  

2  

hive> SELECT * from numbers TABLESAMPLE(BUCKET 5 OUT OF 10 ON number) s;  

4  

hive> SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON number) s;  

2

[sql]view
plaincopyprint?

hive> SELECT * from numbers TABLESAMPLE(BUCKET 1 OUT OF 2 ON number) s;  

2  

4  

6  

8  

10  

hive> SELECT * from numbers TABLESAMPLE(BUCKET 2 OUT OF 2 ON number) s;  

1  

3  

5  

7  

9

UNION ALL

[sql]view
plaincopyprint?

SELECT log.ymd, log.level, log.message  

  FROM (  

    SELECT l1.ymd, l1.level,  

      l1.message, 'Log1' AS source  

    FROM log1 l1  

  UNION ALL  

    SELECT l2.ymd, l2.level,  

      l2.message, 'Log2' AS source  

    FROM log1 l2  

  ) log  

SORT BY log.ymd ASC;

[sql]view
plaincopyprint?

FROM (  

  FROM src SELECT src.key, src.value WHERE src.key < 100  

  UNION ALL  

  FROM src SELECT src.* WHERE src.key > 110  

) unioninput  

INSERT OVERWRITE DIRECTORY '/tmp/union.out' SELECT unioninput.*

Hive 查詢語法大集合

工作中用到的腳本合集

通過f-string編寫簡潔高效的Python格式化輸出代碼

24-5-18 X

js獲取瀏覽器類型（包含IE11）

win7下安裝 python2 和python3

安裝xampp之後報錯XAMPP: Starting Apache...fail.修改端口

centos7.5安裝mysql-5.7.28 經典版

centos 7.6 安裝python2和python3

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結