使用TPC-H 進行MySQL數據庫性能測試

本文簡單記敘在Linux環境下通過TPC-H生成MySQL數據庫測試數據的步驟,作爲後續參考。

生成數據的量級如下,數據庫需要300G左右空間。

表名 數據行數 量級
customer 15000000 1.5千萬
lineitem 600037902 6億
nation 25  
orders 150000000 1.5億
part 20000000 2千萬
partsupp 80000000 8千萬
region 5  
supplier 1000000 1百萬

1下載TPC-H:https://download.csdn.net/download/kkdelta/12390317

這個源文件針對mysql做了定製修改

修改1:makefile (如果是生成其它數據庫類型的測試數據,請修改DATABASE= 對應數據庫)

CC      = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
#                                  SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are:  ATT, DOS, HP, IBM, ICL, MVS, 
#                                  SGI, SUN, U2200, VMS, LINUX, WIN32 
# Current values for WORKLOAD are:  TPCH
DATABASE= MYSQL
MACHINE = LINUX
WORKLOAD = TPCH

修改2:頭文件添加了如下信息,默認沒有MYSQL

#ifdef MYSQL
#define GEN_QUERY_PLAN  ""
#define START_TRAN      "START TRANSACTION"
#define END_TRAN        "COMMIT"
#define SET_OUTPUT      ""
#define SET_ROWCOUNT    "limit %d;\n"
#define SET_DBASE       "use %s;\n"
#endif

2解壓編譯

#tar -xzvf 

#進入tpch_2.18.0_rc2/dbgen 目錄執行 make命令

3生成數據,生成100G測試數據,更多dbgen 參數請參照https://github.com/electrum/tpch-dbgen?spm=a2c4g.11186623.2.12.49503a21XMT2IL

nohup ./dbgen -f -s 100 >out.txt 2>&1 &

4導入數據

41. 創建數據庫表結構語句

CREATE TABLE `customer` (
  `C_CUSTKEY` int(11) NOT NULL,
  `C_NAME` varchar(25) NOT NULL,
  `C_ADDRESS` varchar(40) NOT NULL,
  `C_NATIONKEY` int(11) NOT NULL,
  `C_PHONE` varchar(15) NOT NULL,
  `C_ACCTBAL` decimal(12,2) NOT NULL,
  `C_MKTSEGMENT` varchar(10) NOT NULL,
  `C_COMMENT` varchar(117) NOT NULL,
  PRIMARY KEY (`C_CUSTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;



CREATE TABLE `lineitem` (
  `L_ORDERKEY` bigint(20) NOT NULL,
  `L_PARTKEY` int(11) NOT NULL,
  `L_SUPPKEY` int(11) NOT NULL,
  `L_LINENUMBER` bigint(20) NOT NULL,
  `L_QUANTITY` decimal(12,2) NOT NULL,
  `L_EXTENDEDPRICE` decimal(12,2) NOT NULL,
  `L_DISCOUNT` decimal(12,2) NOT NULL,
  `L_TAX` decimal(12,2) NOT NULL,
  `L_RETURNFLAG` varchar(1) NOT NULL,
  `L_LINESTATUS` varchar(1) NOT NULL,
  `L_SHIPDATE` date NOT NULL,
  `L_COMMITDATE` date NOT NULL,
  `L_RECEIPTDATE` date NOT NULL,
  `L_SHIPINSTRUCT` varchar(25) NOT NULL,
  `L_SHIPMODE` varchar(10) NOT NULL,
  `L_COMMENT` varchar(44) NOT NULL,
  PRIMARY KEY (`L_ORDERKEY`,`L_LINENUMBER`,`L_SHIPDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `nation` (
  `N_NATIONKEY` int(11) NOT NULL,
  `N_NAME` varchar(25) NOT NULL,
  `N_REGIONKEY` int(11) NOT NULL,
  `N_COMMENT` varchar(152) DEFAULT NULL,
  PRIMARY KEY (`N_NATIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `orders` (
  `O_ORDERKEY` bigint(20) NOT NULL,
  `O_CUSTKEY` int(11) NOT NULL,
  `O_ORDERSTATUS` varchar(1) NOT NULL,
  `O_TOTALPRICE` decimal(12,2) NOT NULL,
  `O_ORDERDATE` date NOT NULL,
  `O_ORDERPRIORITY` varchar(15) NOT NULL,
  `O_CLERK` varchar(15) NOT NULL,
  `O_SHIPPRIORITY` int(11) NOT NULL,
  `O_COMMENT` varchar(79) NOT NULL,
  PRIMARY KEY (`O_ORDERKEY`,`O_ORDERDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `part` (
  `P_PARTKEY` int(11) NOT NULL,
  `P_NAME` varchar(55) NOT NULL,
  `P_MFGR` varchar(25) NOT NULL,
  `P_BRAND` varchar(10) NOT NULL,
  `P_TYPE` varchar(25) NOT NULL,
  `P_SIZE` int(11) NOT NULL,
  `P_CONTAINER` varchar(10) NOT NULL,
  `P_RETAILPRICE` decimal(12,2) NOT NULL,
  `P_COMMENT` varchar(23) NOT NULL,
  PRIMARY KEY (`P_PARTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `partsupp` (
  `PS_PARTKEY` int(11) NOT NULL,
  `PS_SUPPKEY` int(11) NOT NULL,
  `PS_AVAILQTY` int(11) NOT NULL,
  `PS_SUPPLYCOST` decimal(12,2) NOT NULL,
  `PS_COMMENT` varchar(199) NOT NULL,
  PRIMARY KEY (`PS_PARTKEY`,`PS_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `region` (
  `R_REGIONKEY` int(11) NOT NULL,
  `R_NAME` varchar(25) NOT NULL,
  `R_COMMENT` varchar(152) DEFAULT NULL,
  PRIMARY KEY (`R_REGIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `supplier` (
  `S_SUPPKEY` int(11) NOT NULL,
  `S_NAME` varchar(25) NOT NULL,
  `S_ADDRESS` varchar(40) NOT NULL,
  `S_NATIONKEY` int(11) NOT NULL,
  `S_PHONE` varchar(15) NOT NULL,
  `S_ACCTBAL` decimal(12,2) NOT NULL,
  `S_COMMENT` varchar(101) NOT NULL,
  PRIMARY KEY (`S_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

在100GB數據規模下,缺省MySQL表結構無法完成全部測試,可以通過增加索引提升數據查詢性能。
如果是在加載完數據後再創建index會比較耗費時間,注意單條執行
create index idx_c_mk on customer(c_mktsegment);
create index idx_c_ck on customer(c_custkey);
create index idx_c_nk on customer(c_nationkey);

create index idx_o_ck on orders(o_custkey);
create index idx_o_ok on orders(o_orderkey);
create index idx_o_od on orders(o_orderdate);
create index idx_o_op on orders(o_orderpriority);
create index idx_o_os on orders(o_orderstatus);

create index idx_li_sd on lineitem(l_shipdate);
create index idx_li_rf on lineitem(l_returnflag);
create index idx_li_sm on lineitem(l_shipmode);
create index idx_li_cd on lineitem(l_commitdate);
create index idx_li_rd on lineitem(l_receiptdate);
create index idx_li_pk on lineitem(l_partkey);
create index idx_li_sk on lineitem(l_suppkey);
create index idx_li_ok on lineitem(l_orderkey);
create index idx_li_dc on lineitem(l_discount);
create index idx_li_q on lineitem(l_quantity);
create index idx_li_rf_ls on lineitem(l_returnflag,l_linestatus);

create index idx_p_s on part(p_size);
create index idx_p_t on part(p_type);
create index idx_p_pk on part(p_partkey);
create index idx_p_b on part(p_brand);
create index idx_p_c on part(p_container);

create index idx_ps_pk on partsupp(ps_partkey);
create index idx_ps_sc on partsupp(ps_supplycost);
create index idx_ps_sk on partsupp(ps_suppkey);

create index idx_s_sk on supplier(s_suppkey);
create index idx_s_nk on supplier(s_nationkey);

create index idx_n_nk on nation(n_nationkey);
create index idx_n_rk on nation(n_regionkey);
create index idx_n_n on nation(n_name);

create index idx_r_rk on region(r_regionkey);
create index idx_r_n on region(r_name);

4.2導入數據,通過mysql客戶端命令行工具連接mysql,執行導入語句。

LOAD DATA LOCAL INFILE 'customer.tbl' INTO TABLE CUSTOMER 
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'orders.tbl'   INTO TABLE ORDERS   
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'lineitem.tbl' INTO TABLE LINEITEM 
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'nation.tbl'   INTO TABLE NATION   
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'partsupp.tbl' INTO TABLE PARTSUPP 
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'part.tbl'     INTO TABLE PART     
FIELDS TERMINATED BY '|'   LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'region.tbl'   INTO TABLE REGION   
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'supplier.tbl' INTO TABLE SUPPLIER 
FIELDS TERMINATED BY '|'  LINES TERMINATED BY '\n';

5執行SQL語句測試

5.1 一些單表查詢性能測試(百萬以上的記錄如果沒有索引的話性能就很差了

Num SQL Seconds 錶行數 mark 機器配置
1 select count(*)from  supplier; 0.95 1百萬    阿里雲RDS 8核16G
  select * from  supplier where S_NAME ='Supplier#000000187' 3 1百萬 無索引  
           
2 select count(*) from CUSTOMER; 20.36 1.5千萬    
  select * from CUSTOMER order by C_NAME limit 100; 64.5      
  select * from CUSTOMER  where C_PHONE ='13-750-942-6364'; 51   無索引1分鐘左右  
  select count(c_custkey) from CUSTOMER 5 1.5千萬 有索引  
  select * from CUSTOMER  where c_custkey =1124; 0   按主鍵索引查單條  
           
3 select count(*) from ORDERS; 93.8 1.5億    
  select * from orders  where O_ORDERKEY =20001; 0   按索引查單條  
  select * from orders  where O_CUSTKEY =6296771; 372.5   無索引6分鐘左右  
           
4 select count(*) from lineitem ; 431.9 6億    
  SELECT * FROM lineitem where L_ORDERKEY = 6789; 0   按主鍵索引查單條  
  SELECT * FROM lineitem where L_SHIPDATE = '1998-04-19'; 0.078   索引查詢  
  SELECT * FROM lineitem where L_PARTKEY ='18205184'; 1659.9 31/6億 非索引需要27.665分鐘  

5.2多表關聯查詢

-------------------------SQL-1

select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        avg(l_quantity) as avg_qty,
        avg(l_extendedprice) as avg_price,
        avg(l_discount) as avg_disc,
        count(*) as count_order
from
        lineitem
where
        l_shipdate <= date '1998-12-01' - interval '120' day
group by
        l_returnflag,
        l_linestatus
order by
        l_returnflag,
        l_linestatus;

-------------------------SQL-2	

select
        s_acctbal,
        s_name,
        n_name,
        p_partkey,
        p_mfgr,
        s_address,
        s_phone,
        s_comment
from
        part,
        supplier,
        partsupp,
        nation,
        region
where
        p_partkey = ps_partkey
        and s_suppkey = ps_suppkey
        and p_size = 48
        and p_type like '%STEEL'
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'EUROPE'
        and ps_supplycost = (
                select
                        min(ps_supplycost)
                from
                        partsupp,
                        supplier,
                        nation,
                        region
                where
                        p_partkey = ps_partkey
                        and s_suppkey = ps_suppkey
                        and s_nationkey = n_nationkey
                        and n_regionkey = r_regionkey
                        and r_name = 'EUROPE'
        )
order by
        s_acctbal desc,
        n_name,
        s_name,
        p_partkey
limit 100;

-------------------------SQL-3

select
        l_orderkey,
        sum(l_extendedprice * (1 - l_discount)) as revenue,
        o_orderdate,
        o_shippriority
from
        customer,
        orders,
        lineitem
where
        c_mktsegment = 'MACHINERY'
        and c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate < date '1995-03-23'
        and l_shipdate > date '1995-03-23'
group by
        l_orderkey,
        o_orderdate,
        o_shippriority
order by
        revenue desc,
        o_orderdate
limit 10;

select
        o_orderpriority,
        count(*) as order_count
from
        orders
where
        o_orderdate >= date '1996-07-01'
        and o_orderdate < date '1996-07-01' + interval '3' month
        and exists (
                select
                        *
                from
                        lineitem
                where
                        l_orderkey = o_orderkey
                        and l_commitdate < l_receiptdate
        )
group by
        o_orderpriority
order by
        o_orderpriority;

-------------------------SQL-4

select
        n_name,
        sum(l_extendedprice * (1 - l_discount)) as revenue
from
        customer,
        orders,
        lineitem,
        supplier,
        nation,
        region
where
        c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and l_suppkey = s_suppkey
        and c_nationkey = s_nationkey
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'EUROPE'
        and o_orderdate >= date '1996-01-01'
        and o_orderdate < date '1996-01-01' + interval '1' year
group by
        n_name
order by
        revenue desc;

select
        sum(l_extendedprice * l_discount) as revenue
from
        lineitem
where
        l_shipdate >= date '1996-01-01'
        and l_shipdate < date '1996-01-01' + interval '1' year
        and l_discount between 0.02 - 0.01 and 0.02 + 0.01
        and l_quantity < 24;
		
-------------------------SQL-5

select
        supp_nation,
        cust_nation,
        l_year,
        sum(volume) as revenue
from
        (
                select
                        n1.n_name as supp_nation,
                        n2.n_name as cust_nation,
                        extract(year from l_shipdate) as l_year,
                        l_extendedprice * (1 - l_discount) as volume
                from
                        supplier,
                        lineitem,
                        orders,
                        customer,
                        nation n1,
                        nation n2
                where
                        s_suppkey = l_suppkey
                        and o_orderkey = l_orderkey
                        and c_custkey = o_custkey
                        and s_nationkey = n1.n_nationkey
                        and c_nationkey = n2.n_nationkey
                        and (
                                (n1.n_name = 'CANADA' and n2.n_name = 'BRAZIL')
                                or (n1.n_name = 'BRAZIL' and n2.n_name = 'CANADA')
                        )
                        and l_shipdate between date '1995-01-01' and date '1996-12-31'
        ) as shipping
group by
        supp_nation,
        cust_nation,
        l_year
order by
        supp_nation,
        cust_nation,
        l_year;

-------------------------SQL-6
		
select
        o_year,
        sum(case
                when nation = 'BRAZIL' then volume
                else 0
        end) / sum(volume) as mkt_share
from
        (
                select
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) as volume,
                        n2.n_name as nation
                from
                        part,
                        supplier,
                        lineitem,
                        orders,
                        customer,
                        nation n1,
                        nation n2,
                        region
                where
                        p_partkey = l_partkey
                        and s_suppkey = l_suppkey
                        and l_orderkey = o_orderkey
                        and o_custkey = c_custkey
                        and c_nationkey = n1.n_nationkey
                        and n1.n_regionkey = r_regionkey
                        and r_name = 'AMERICA'
                        and s_nationkey = n2.n_nationkey
                        and o_orderdate between date '1995-01-01' and date '1996-12-31'
                        and p_type = 'LARGE ANODIZED COPPER'
        ) as all_nations
group by
        o_year
order by
        o_year;

-------------------------SQL-7

select
        nation,
        o_year,
        sum(amount) as sum_profit
from
        (
                select
                        n_name as nation,
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
                from
                        part,
                        supplier,
                        lineitem,
                        partsupp,
                        orders,
                        nation
                where
                        s_suppkey = l_suppkey
                        and ps_suppkey = l_suppkey
                        and ps_partkey = l_partkey
                        and p_partkey = l_partkey
                        and o_orderkey = l_orderkey
                        and s_nationkey = n_nationkey
                        and p_name like '%maroon%'
        ) as profit
group by
        nation,
        o_year
order by
        nation,
        o_year desc;

測試結果

 

更多SQL測試可參照 https://help.aliyun.com/document_detail/156330.html?spm=a2c4g.11186623.6.775.10865130GYKkgh

更多TPC-H 信息可以參考 https://blog.csdn.net/leixingbang1989/article/details/8766047

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章