前言
在一次作業中需要用tpch(什麼是tpch請自行google或百度)工具生成數據集做相應測試,故將整個配置過程和自己踩的坑做如下記錄,望有助其他人節約時間。
環境
操作系統:Linux(Ubuntu16.04)
TPC-H工具:2.17.3
PostgreSQL版本:9.6.0
TPC-H工具下載網站:http://www.tpc.org/tpch/
一、生成dbgen和qgen
- 解壓TPCH-tools工具在dbgen目錄下找到並更改makefile.suite 生成dbgen
#makefile.suite 的更改參數如下
CC = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
# SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS,
# SGI, SUN, U2200, VMS, LINUX, WIN32
# Current values for WORKLOAD are: TPCH
DATABASE = POSTGRESQL #程序給定參數沒有postgresql ,修改tpcd.h 添加POSTGRESQL腳本
MACHINE = LINUX
WORKLOAD = TPCH
- 由於TPCH數據庫參數沒有PostgreSQL數據庫選項,需要自己增加PG數據的腳本,在dbgen目錄下更改tpcd.h文件
//修改tpcd.h
#ifdef POSTGRESQL
#define GEN_QUERY_PLAN "EXPLAIN"
#define START_TRAN "BEGIN TRANSACTION"
#define END_TRAN "COMMIT;"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "LIMIT %d\n"
#define SET_DBASE ""
#endif /* VECTORWISE */
- 保存修改在終端中cd到dbgen目錄下,執行下列命令
//保存更改,在dbgen目錄下執行
make -f makefile.suite
//執行成功後在dbgen目錄下生成dbgen和qgen文件
二、運行dbgen生成.tbl數據
#在dbgen目錄下執行
./dbgen -s 1 -f #-s 1 表示生成1G數據 -f覆蓋之前產生的文件
# 執行成功後會在dbgen目錄下生成八個.tbl文件,可通過下列命令查看(在dbgen目錄下)
ls *.tbl
#看到產生八個tbl文件
三、建立數據庫
在postgresql中建立tpch數據庫,並創建表,相關表的創建語句可以從dss.ddl中複製
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL,
N_NAME CHAR(25) NOT NULL,
N_REGIONKEY INTEGER NOT NULL,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL,
R_NAME CHAR(25) NOT NULL,
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL,
P_NAME VARCHAR(55) NOT NULL,
P_MFGR CHAR(25) NOT NULL,
P_BRAND CHAR(10) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER CHAR(10) NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL,
S_NAME CHAR(25) NOT NULL,
S_ADDRESS VARCHAR(40) NOT NULL,
S_NATIONKEY INTEGER NOT NULL,
S_PHONE CHAR(15) NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INTEGER NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
O_CUSTKEY INTEGER NOT NULL,
O_ORDERSTATUS CHAR(1) NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE DATE NOT NULL,
O_ORDERPRIORITY CHAR(15) NOT NULL,
O_CLERK CHAR(15) NOT NULL,
O_SHIPPRIORITY INTEGER NOT NULL,
O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);
四、導入數據
生成的tbl數據每一行的末尾會有一個“|”,導致PG數據庫讀取時報錯,需要將最後一個“|”去掉,在dbgen目錄下找到print.c, 註釋145和147行,如下所示
}
//#ifdef EOL_HANDLING
if (sep)
//#endif /* EOL_HANDLING */
fprintf(target, "%c", SEPARATOR);
return(0);
}
最後,將數據導入PostgreSQL數據庫中
su - postgres //進入PostgreSQL數據庫
psql //執行sql語句
\c tpch //切換到tpch數據庫
Copy region FROM '/2.17.3/dbgen/tbl/region.tbl' WITH DELIMITER AS '|';
Copy nation FROM '/2.17.3/dbgen/tbl/nation.tbl' WITH DELIMITER AS '|';
Copy part FROM '/2.17.3/dbgen/tbl/part.tbl' WITH DELIMITER AS '|';
Copy supplier FROM '/2.17.3/dbgen/tbl/supplier.tbl' WITH DELIMITER AS '|';
Copy customer FROM '/2.17.3/dbgen/tbl/customer.tbl' WITH DELIMITER AS '|';
Copy lineitem FROM '/2.17.3/dbgen/tbl/lineitem.tbl' WITH DELIMITER AS '|';
Copy partsupp FROM '/2.17.3/dbgen/tbl/partsupp.tbl' WITH DELIMITER AS '|';
Copy orders FROM '/2.17.3/dbgen/tbl/orders.tbl' WITH DELIMITER AS '|';
五、給各表加約束條件
數據表的約束條件存放在dss.ri 文件中,複製並做相應更改在數據庫中執行生成相關約束。
-- For table REGION
ALTER TABLE REGION
ADD PRIMARY KEY (R_REGIONKEY);
-- For table NATION
ALTER TABLE NATION
ADD PRIMARY KEY (N_NATIONKEY);
ALTER TABLE NATION
ADD FOREIGN KEY (N_REGIONKEY) references REGION;
COMMIT WORK;
-- For table PART
ALTER TABLE PART
ADD PRIMARY KEY (P_PARTKEY);
COMMIT WORK;
-- For table SUPPLIER
ALTER TABLE SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);
ALTER TABLE SUPPLIER
ADD FOREIGN KEY (S_NATIONKEY) references NATION;
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);
COMMIT WORK;
-- For table CUSTOMER
ALTER TABLE CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);
ALTER TABLE CUSTOMER
ADD FOREIGN KEY (C_NATIONKEY) references NATION;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD PRIMARY KEY (O_ORDERKEY);
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;
COMMIT WORK;
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_PARTKEY) references PART;
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_ORDERKEY) references ORDERS;
COMMIT WORK;
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;
COMMIT WORK;
六、生成查詢語句
複製qgen 和dists.dss 到queries ,cd到queries目錄下執行
./qgen -d 1 >d1.sql //-d表示默認參數,1表示按照模板一生成sql語句
參考博客