本文簡單記敘在Linux環境下通過TPC-H生成MySQL數據庫測試數據的步驟,作爲後續參考。
生成數據的量級如下,數據庫需要300G左右空間。
表名 | 數據行數 | 量級 |
customer | 15000000 | 1.5千萬 |
lineitem | 600037902 | 6億 |
nation | 25 | |
orders | 150000000 | 1.5億 |
part | 20000000 | 2千萬 |
partsupp | 80000000 | 8千萬 |
region | 5 | |
supplier | 1000000 | 1百萬 |
1下載TPC-H:https://download.csdn.net/download/kkdelta/12390317
這個源文件針對mysql做了定製修改
修改1:makefile (如果是生成其它數據庫類型的測試數據,請修改DATABASE= 對應數據庫)
CC = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
# SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS,
# SGI, SUN, U2200, VMS, LINUX, WIN32
# Current values for WORKLOAD are: TPCH
DATABASE= MYSQL
MACHINE = LINUX
WORKLOAD = TPCH
修改2:頭文件添加了如下信息,默認沒有MYSQL
#ifdef MYSQL
#define GEN_QUERY_PLAN ""
#define START_TRAN "START TRANSACTION"
#define END_TRAN "COMMIT"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "limit %d;\n"
#define SET_DBASE "use %s;\n"
#endif
2解壓編譯
#tar -xzvf
#進入tpch_2.18.0_rc2/dbgen 目錄執行 make命令
3生成數據,生成100G測試數據,更多dbgen 參數請參照https://github.com/electrum/tpch-dbgen?spm=a2c4g.11186623.2.12.49503a21XMT2IL
nohup ./dbgen -f -s 100 >out.txt 2>&1 &
4導入數據
41. 創建數據庫表結構語句
CREATE TABLE `customer` (
`C_CUSTKEY` int(11) NOT NULL,
`C_NAME` varchar(25) NOT NULL,
`C_ADDRESS` varchar(40) NOT NULL,
`C_NATIONKEY` int(11) NOT NULL,
`C_PHONE` varchar(15) NOT NULL,
`C_ACCTBAL` decimal(12,2) NOT NULL,
`C_MKTSEGMENT` varchar(10) NOT NULL,
`C_COMMENT` varchar(117) NOT NULL,
PRIMARY KEY (`C_CUSTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `lineitem` (
`L_ORDERKEY` bigint(20) NOT NULL,
`L_PARTKEY` int(11) NOT NULL,
`L_SUPPKEY` int(11) NOT NULL,
`L_LINENUMBER` bigint(20) NOT NULL,
`L_QUANTITY` decimal(12,2) NOT NULL,
`L_EXTENDEDPRICE` decimal(12,2) NOT NULL,
`L_DISCOUNT` decimal(12,2) NOT NULL,
`L_TAX` decimal(12,2) NOT NULL,
`L_RETURNFLAG` varchar(1) NOT NULL,
`L_LINESTATUS` varchar(1) NOT NULL,
`L_SHIPDATE` date NOT NULL,
`L_COMMITDATE` date NOT NULL,
`L_RECEIPTDATE` date NOT NULL,
`L_SHIPINSTRUCT` varchar(25) NOT NULL,
`L_SHIPMODE` varchar(10) NOT NULL,
`L_COMMENT` varchar(44) NOT NULL,
PRIMARY KEY (`L_ORDERKEY`,`L_LINENUMBER`,`L_SHIPDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `nation` (
`N_NATIONKEY` int(11) NOT NULL,
`N_NAME` varchar(25) NOT NULL,
`N_REGIONKEY` int(11) NOT NULL,
`N_COMMENT` varchar(152) DEFAULT NULL,
PRIMARY KEY (`N_NATIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `orders` (
`O_ORDERKEY` bigint(20) NOT NULL,
`O_CUSTKEY` int(11) NOT NULL,
`O_ORDERSTATUS` varchar(1) NOT NULL,
`O_TOTALPRICE` decimal(12,2) NOT NULL,
`O_ORDERDATE` date NOT NULL,
`O_ORDERPRIORITY` varchar(15) NOT NULL,
`O_CLERK` varchar(15) NOT NULL,
`O_SHIPPRIORITY` int(11) NOT NULL,
`O_COMMENT` varchar(79) NOT NULL,
PRIMARY KEY (`O_ORDERKEY`,`O_ORDERDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `part` (
`P_PARTKEY` int(11) NOT NULL,
`P_NAME` varchar(55) NOT NULL,
`P_MFGR` varchar(25) NOT NULL,
`P_BRAND` varchar(10) NOT NULL,
`P_TYPE` varchar(25) NOT NULL,
`P_SIZE` int(11) NOT NULL,
`P_CONTAINER` varchar(10) NOT NULL,
`P_RETAILPRICE` decimal(12,2) NOT NULL,
`P_COMMENT` varchar(23) NOT NULL,
PRIMARY KEY (`P_PARTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `partsupp` (
`PS_PARTKEY` int(11) NOT NULL,
`PS_SUPPKEY` int(11) NOT NULL,
`PS_AVAILQTY` int(11) NOT NULL,
`PS_SUPPLYCOST` decimal(12,2) NOT NULL,
`PS_COMMENT` varchar(199) NOT NULL,
PRIMARY KEY (`PS_PARTKEY`,`PS_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `region` (
`R_REGIONKEY` int(11) NOT NULL,
`R_NAME` varchar(25) NOT NULL,
`R_COMMENT` varchar(152) DEFAULT NULL,
PRIMARY KEY (`R_REGIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `supplier` (
`S_SUPPKEY` int(11) NOT NULL,
`S_NAME` varchar(25) NOT NULL,
`S_ADDRESS` varchar(40) NOT NULL,
`S_NATIONKEY` int(11) NOT NULL,
`S_PHONE` varchar(15) NOT NULL,
`S_ACCTBAL` decimal(12,2) NOT NULL,
`S_COMMENT` varchar(101) NOT NULL,
PRIMARY KEY (`S_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
在100GB數據規模下,缺省MySQL表結構無法完成全部測試,可以通過增加索引提升數據查詢性能。
如果是在加載完數據後再創建index會比較耗費時間,注意單條執行
create index idx_c_mk on customer(c_mktsegment);
create index idx_c_ck on customer(c_custkey);
create index idx_c_nk on customer(c_nationkey);
create index idx_o_ck on orders(o_custkey);
create index idx_o_ok on orders(o_orderkey);
create index idx_o_od on orders(o_orderdate);
create index idx_o_op on orders(o_orderpriority);
create index idx_o_os on orders(o_orderstatus);
create index idx_li_sd on lineitem(l_shipdate);
create index idx_li_rf on lineitem(l_returnflag);
create index idx_li_sm on lineitem(l_shipmode);
create index idx_li_cd on lineitem(l_commitdate);
create index idx_li_rd on lineitem(l_receiptdate);
create index idx_li_pk on lineitem(l_partkey);
create index idx_li_sk on lineitem(l_suppkey);
create index idx_li_ok on lineitem(l_orderkey);
create index idx_li_dc on lineitem(l_discount);
create index idx_li_q on lineitem(l_quantity);
create index idx_li_rf_ls on lineitem(l_returnflag,l_linestatus);
create index idx_p_s on part(p_size);
create index idx_p_t on part(p_type);
create index idx_p_pk on part(p_partkey);
create index idx_p_b on part(p_brand);
create index idx_p_c on part(p_container);
create index idx_ps_pk on partsupp(ps_partkey);
create index idx_ps_sc on partsupp(ps_supplycost);
create index idx_ps_sk on partsupp(ps_suppkey);
create index idx_s_sk on supplier(s_suppkey);
create index idx_s_nk on supplier(s_nationkey);
create index idx_n_nk on nation(n_nationkey);
create index idx_n_rk on nation(n_regionkey);
create index idx_n_n on nation(n_name);
create index idx_r_rk on region(r_regionkey);
create index idx_r_n on region(r_name);
4.2導入數據,通過mysql客戶端命令行工具連接mysql,執行導入語句。
LOAD DATA LOCAL INFILE 'customer.tbl' INTO TABLE CUSTOMER
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'orders.tbl' INTO TABLE ORDERS
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'lineitem.tbl' INTO TABLE LINEITEM
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'nation.tbl' INTO TABLE NATION
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'partsupp.tbl' INTO TABLE PARTSUPP
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'part.tbl' INTO TABLE PART
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'region.tbl' INTO TABLE REGION
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'supplier.tbl' INTO TABLE SUPPLIER
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
5執行SQL語句測試
5.1 一些單表查詢性能測試(百萬以上的記錄如果沒有索引的話性能就很差了)
Num | SQL | Seconds | 錶行數 | mark | 機器配置 |
1 | select count(*)from supplier; | 0.95 | 1百萬 | 阿里雲RDS 8核16G | |
select * from supplier where S_NAME ='Supplier#000000187' | 3 | 1百萬 | 無索引 | ||
2 | select count(*) from CUSTOMER; | 20.36 | 1.5千萬 | ||
select * from CUSTOMER order by C_NAME limit 100; | 64.5 | ||||
select * from CUSTOMER where C_PHONE ='13-750-942-6364'; | 51 | 無索引1分鐘左右 | |||
select count(c_custkey) from CUSTOMER | 5 | 1.5千萬 | 有索引 | ||
select * from CUSTOMER where c_custkey =1124; | 0 | 按主鍵索引查單條 | |||
3 | select count(*) from ORDERS; | 93.8 | 1.5億 | ||
select * from orders where O_ORDERKEY =20001; | 0 | 按索引查單條 | |||
select * from orders where O_CUSTKEY =6296771; | 372.5 | 無索引6分鐘左右 | |||
4 | select count(*) from lineitem ; | 431.9 | 6億 | ||
SELECT * FROM lineitem where L_ORDERKEY = 6789; | 0 | 按主鍵索引查單條 | |||
SELECT * FROM lineitem where L_SHIPDATE = '1998-04-19'; | 0.078 | 索引查詢 | |||
SELECT * FROM lineitem where L_PARTKEY ='18205184'; | 1659.9 | 31/6億 | 非索引需要27.665分鐘 |
5.2多表關聯查詢
-------------------------SQL-1
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '120' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
-------------------------SQL-2
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 48
and p_type like '%STEEL'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100;
-------------------------SQL-3
select
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
customer,
orders,
lineitem
where
c_mktsegment = 'MACHINERY'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < date '1995-03-23'
and l_shipdate > date '1995-03-23'
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10;
select
o_orderpriority,
count(*) as order_count
from
orders
where
o_orderdate >= date '1996-07-01'
and o_orderdate < date '1996-07-01' + interval '3' month
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority;
-------------------------SQL-4
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer,
orders,
lineitem,
supplier,
nation,
region
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and o_orderdate >= date '1996-01-01'
and o_orderdate < date '1996-01-01' + interval '1' year
group by
n_name
order by
revenue desc;
select
sum(l_extendedprice * l_discount) as revenue
from
lineitem
where
l_shipdate >= date '1996-01-01'
and l_shipdate < date '1996-01-01' + interval '1' year
and l_discount between 0.02 - 0.01 and 0.02 + 0.01
and l_quantity < 24;
-------------------------SQL-5
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from
(
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'CANADA' and n2.n_name = 'BRAZIL')
or (n1.n_name = 'BRAZIL' and n2.n_name = 'CANADA')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year;
-------------------------SQL-6
select
o_year,
sum(case
when nation = 'BRAZIL' then volume
else 0
end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
part,
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2,
region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'LARGE ANODIZED COPPER'
) as all_nations
group by
o_year
order by
o_year;
-------------------------SQL-7
select
nation,
o_year,
sum(amount) as sum_profit
from
(
select
n_name as nation,
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from
part,
supplier,
lineitem,
partsupp,
orders,
nation
where
s_suppkey = l_suppkey
and ps_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
and p_name like '%maroon%'
) as profit
group by
nation,
o_year
order by
nation,
o_year desc;
測試結果
更多SQL測試可參照 https://help.aliyun.com/document_detail/156330.html?spm=a2c4g.11186623.6.775.10865130GYKkgh
更多TPC-H 信息可以參考 https://blog.csdn.net/leixingbang1989/article/details/8766047