TPC-H工具使用:生成數據及存儲至數據庫中

前言

在一次作業中需要用tpch(什麼是tpch請自行google或百度)工具生成數據集做相應測試,故將整個配置過程和自己踩的坑做如下記錄,望有助其他人節約時間。

環境

操作系統:Linux(Ubuntu16.04)
TPC-H工具:2.17.3
PostgreSQL版本:9.6.0
TPC-H工具下載網站:http://www.tpc.org/tpch/

一、生成dbgen和qgen

  1. 解壓TPCH-tools工具在dbgen目錄下找到並更改makefile.suite 生成dbgen
#makefile.suite 的更改參數如下

CC      = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
#                                  SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are:  ATT, DOS, HP, IBM, ICL, MVS, 
#                                  SGI, SUN, U2200, VMS, LINUX, WIN32 
# Current values for WORKLOAD are:  TPCH

DATABASE = POSTGRESQL     #程序給定參數沒有postgresql ,修改tpcd.h 添加POSTGRESQL腳本
MACHINE = LINUX
WORKLOAD = TPCH

  1. 由於TPCH數據庫參數沒有PostgreSQL數據庫選項,需要自己增加PG數據的腳本,在dbgen目錄下更改tpcd.h文件
//修改tpcd.h

#ifdef POSTGRESQL
#define GEN_QUERY_PLAN  "EXPLAIN"      
#define START_TRAN      "BEGIN TRANSACTION"
#define END_TRAN        "COMMIT;"
#define SET_OUTPUT      ""
#define SET_ROWCOUNT    "LIMIT %d\n"
#define SET_DBASE       ""
#endif /* VECTORWISE */


  1. 保存修改在終端中cd到dbgen目錄下,執行下列命令
//保存更改,在dbgen目錄下執行

make -f makefile.suite

//執行成功後在dbgen目錄下生成dbgen和qgen文件



二、運行dbgen生成.tbl數據

#在dbgen目錄下執行
./dbgen -s 1 -f   #-s 1 表示生成1G數據  -f覆蓋之前產生的文件

# 執行成功後會在dbgen目錄下生成八個.tbl文件,可通過下列命令查看(在dbgen目錄下)

ls *.tbl

#看到產生八個tbl文件

三、建立數據庫

在postgresql中建立tpch數據庫,並創建表,相關表的創建語句可以從dss.ddl中複製

CREATE TABLE NATION  ( N_NATIONKEY  INTEGER NOT NULL,
                            N_NAME       CHAR(25) NOT NULL,
                            N_REGIONKEY  INTEGER NOT NULL,
                            N_COMMENT    VARCHAR(152));

CREATE TABLE REGION  ( R_REGIONKEY  INTEGER NOT NULL,
                            R_NAME       CHAR(25) NOT NULL,
                            R_COMMENT    VARCHAR(152));

CREATE TABLE PART  ( P_PARTKEY     INTEGER NOT NULL,
                          P_NAME        VARCHAR(55) NOT NULL,
                          P_MFGR        CHAR(25) NOT NULL,
                          P_BRAND       CHAR(10) NOT NULL,
                          P_TYPE        VARCHAR(25) NOT NULL,
                          P_SIZE        INTEGER NOT NULL,
                          P_CONTAINER   CHAR(10) NOT NULL,
                          P_RETAILPRICE DECIMAL(15,2) NOT NULL,
                          P_COMMENT     VARCHAR(23) NOT NULL );

CREATE TABLE SUPPLIER ( S_SUPPKEY     INTEGER NOT NULL,
                             S_NAME        CHAR(25) NOT NULL,
                             S_ADDRESS     VARCHAR(40) NOT NULL,
                             S_NATIONKEY   INTEGER NOT NULL,
                             S_PHONE       CHAR(15) NOT NULL,
                             S_ACCTBAL     DECIMAL(15,2) NOT NULL,
                             S_COMMENT     VARCHAR(101) NOT NULL);

CREATE TABLE PARTSUPP ( PS_PARTKEY     INTEGER NOT NULL,
                             PS_SUPPKEY     INTEGER NOT NULL,
                             PS_AVAILQTY    INTEGER NOT NULL,
                             PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
                             PS_COMMENT     VARCHAR(199) NOT NULL );

CREATE TABLE CUSTOMER ( C_CUSTKEY     INTEGER NOT NULL,
                             C_NAME        VARCHAR(25) NOT NULL,
                             C_ADDRESS     VARCHAR(40) NOT NULL,
                             C_NATIONKEY   INTEGER NOT NULL,
                             C_PHONE       CHAR(15) NOT NULL,
                             C_ACCTBAL     DECIMAL(15,2)   NOT NULL,
                             C_MKTSEGMENT  CHAR(10) NOT NULL,
                             C_COMMENT     VARCHAR(117) NOT NULL);

CREATE TABLE ORDERS  ( O_ORDERKEY       INTEGER NOT NULL,
                           O_CUSTKEY        INTEGER NOT NULL,
                           O_ORDERSTATUS    CHAR(1) NOT NULL,
                           O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
                           O_ORDERDATE      DATE NOT NULL,
                           O_ORDERPRIORITY  CHAR(15) NOT NULL,  
                           O_CLERK          CHAR(15) NOT NULL, 
                           O_SHIPPRIORITY   INTEGER NOT NULL,
                           O_COMMENT        VARCHAR(79) NOT NULL);

CREATE TABLE LINEITEM ( L_ORDERKEY    INTEGER NOT NULL,
                             L_PARTKEY     INTEGER NOT NULL,
                             L_SUPPKEY     INTEGER NOT NULL,
                             L_LINENUMBER  INTEGER NOT NULL,
                             L_QUANTITY    DECIMAL(15,2) NOT NULL,
                             L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
                             L_DISCOUNT    DECIMAL(15,2) NOT NULL,
                             L_TAX         DECIMAL(15,2) NOT NULL,
                             L_RETURNFLAG  CHAR(1) NOT NULL,
                             L_LINESTATUS  CHAR(1) NOT NULL,
                             L_SHIPDATE    DATE NOT NULL,
                             L_COMMITDATE  DATE NOT NULL,
                             L_RECEIPTDATE DATE NOT NULL,
                             L_SHIPINSTRUCT CHAR(25) NOT NULL,
                             L_SHIPMODE     CHAR(10) NOT NULL,
                             L_COMMENT      VARCHAR(44) NOT NULL);

四、導入數據

生成的tbl數據每一行的末尾會有一個“|”,導致PG數據庫讀取時報錯,需要將最後一個“|”去掉,在dbgen目錄下找到print.c, 註釋145和147行,如下所示

       }

//#ifdef EOL_HANDLING
        if (sep)
//#endif /* EOL_HANDLING */
        fprintf(target, "%c", SEPARATOR);

        return(0);
}

最後,將數據導入PostgreSQL數據庫中

su - postgres  //進入PostgreSQL數據庫
psql  //執行sql語句
\c tpch  //切換到tpch數據庫

Copy region FROM '/2.17.3/dbgen/tbl/region.tbl' WITH DELIMITER AS '|';
Copy nation FROM '/2.17.3/dbgen/tbl/nation.tbl' WITH DELIMITER AS '|';
Copy part FROM '/2.17.3/dbgen/tbl/part.tbl' WITH DELIMITER AS '|';
Copy supplier FROM '/2.17.3/dbgen/tbl/supplier.tbl' WITH DELIMITER AS '|';
Copy customer FROM '/2.17.3/dbgen/tbl/customer.tbl' WITH DELIMITER AS '|';
Copy lineitem FROM '/2.17.3/dbgen/tbl/lineitem.tbl' WITH DELIMITER AS '|';
Copy partsupp FROM '/2.17.3/dbgen/tbl/partsupp.tbl' WITH DELIMITER AS '|';
Copy orders FROM '/2.17.3/dbgen/tbl/orders.tbl' WITH DELIMITER AS '|';

五、給各表加約束條件

數據表的約束條件存放在dss.ri 文件中,複製並做相應更改在數據庫中執行生成相關約束。

-- For table REGION
ALTER TABLE REGION
ADD PRIMARY KEY (R_REGIONKEY);

-- For table NATION
ALTER TABLE NATION
ADD PRIMARY KEY (N_NATIONKEY);

ALTER TABLE NATION
ADD FOREIGN KEY (N_REGIONKEY) references REGION;

COMMIT WORK;

-- For table PART
ALTER TABLE PART
ADD PRIMARY KEY (P_PARTKEY);

COMMIT WORK;

-- For table SUPPLIER
ALTER TABLE SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);

ALTER TABLE SUPPLIER
ADD FOREIGN KEY (S_NATIONKEY) references NATION;

COMMIT WORK;

-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);

COMMIT WORK;

-- For table CUSTOMER
ALTER TABLE CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);

ALTER TABLE CUSTOMER
ADD FOREIGN KEY (C_NATIONKEY) references NATION;

COMMIT WORK;

-- For table LINEITEM
ALTER TABLE LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);

COMMIT WORK;

-- For table ORDERS
ALTER TABLE ORDERS
ADD PRIMARY KEY (O_ORDERKEY);

COMMIT WORK;

-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;

COMMIT WORK;

ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_PARTKEY) references PART;

COMMIT WORK;

-- For table ORDERS
ALTER TABLE ORDERS
ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;

COMMIT WORK;

-- For table LINEITEM
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_ORDERKEY)  references ORDERS;

COMMIT WORK;

ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;

COMMIT WORK;

六、生成查詢語句

複製qgen 和dists.dss 到queries ,cd到queries目錄下執行


./qgen -d 1 >d1.sql  //-d表示默認參數,1表示按照模板一生成sql語句

參考博客

http://www.cnblogs.com/joyeecheung/p/3599698.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章