場景描述
要形成Qlik自動化抽數。
需要Qlik根據一份配置表(excel表),自動去源數據庫獲取數據,並存儲成qvd文件。數據要支持增量方式(其實就是ETL工作:向源庫獲取同構表)
未來需要增加、減少同構表,只需要在配置表中添加刪除記錄、並標記抽取方式(增量、全量等)即可,大大減少了工作量。
程序開發
〇、主題代碼結構
- [Main]:系統常量設定,application自動生成,不做更改
- [Const]:程序常量聲明,如DB連接名,文件根目錄路徑名等
- [SubProgs]:子程序聲明,定義一些可複用的程序塊
- [LoadData]:抽數主程序,獲取源數據庫數據,並存儲到qvd中
- [CombineData]:其他個性化的數據處理,這裏是對兩個DB源,同名表進行union(concatenate)
一、常量聲明Const
// 全局常量
Set DefaultDBConn = 'LIS_NodeA'; //默認源DB庫連接名
Set SenseRootPath = 'lib://QlikSense'; //存儲抽取日誌、配置文件等根目錄的連接名
// 配置文件
Set ConfigPath = '$(SenseRootPath)/Config/';
Set LogFileName = 'ATSLoadLog.qvd'; //日誌文件的qvd名稱
Set DataPath = '$(SenseRootPath)/Data/'; // 每個view&table的qvd文件路徑
Set ViewType = 'Interface'; // 'logs' 數據表類型:日誌表(logs)/接口表(Interface)
// 前端配置文件,用於處理集合表達式
Set DBEffDays = 10;
二、子程序聲明
定義可複用的子程序塊,這塊代碼可以考慮以文件形式(txt)放在服務器,application去調用字塊程序。特別是log 就能開發給其他application使用
// 創建日誌文件(日誌不存在時)
Sub CreateLogFile(p_ErpLogPath,p_LogFileName)
LogTable:
Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
End Sub;
// 存儲 table load 記錄,增量數據需要根據該日誌時間,來確定增量時間點
Sub StoreLogs(p_ViewType, p_DwViewName, p_ErpLogPath, p_LogFileName, p_StatusFlag, p_ThisExeTime)
Let LogComments = Replace(ScriptErrorList,chr(10),';');
ScriptErrorList = ''; // 清空錯誤列表
Let LogCreationTime = Text(Now());
LogTable:
Load * From [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Concatenate (LogTable)
Load * Inline [ViewType ,SourceViewName ,StatusFlag ,ExeTime ,Comments ,CreationDate
'$(p_ViewType)','$(p_DwViewName)',$(p_StatusFlag),'$(p_ThisExeTime)','$(LogComments)','$(LogCreationTime)'];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Drop Table LogTable;
End Sub;
// 必需變量: ViewType(模塊名), DwViewName(待存儲的表名), ErpQvdPath(qvd保存路徑), QvdName(保存名稱)
// ErpLogPath(日誌路徑), LogFileName(日誌名稱), ThisExeTime(執行時間)
// 可選變量: PreviousErrorCount 如果遍歷多個時上一次的錯誤計數
Sub StoreQvd(p_ViewType, p_DwViewName, p_ErpQvdPath, p_QvdName, p_PreviousErrorCount, p_ErpLogPath, p_LogFileName, p_ThisExeTime)
// 保存Qvd文件 & 寫入日誌文件
Let LogNotExists = IsNull(FileTime('$(p_ErpLogPath)$(p_LogFileName)'));
If $(LogNotExists) = -1 Then
LogTable:
Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Drop Table LogTable;
End If;
trace 'ScriptErrorCount=$(ScriptErrorCount)';
trace 'p_PreviousErrorCount=$(p_PreviousErrorCount)';
If (If(IsNull($(p_PreviousErrorCount)), $(ScriptErrorCount), $(p_PreviousErrorCount)-$(ScriptErrorCount))) = 0 Then
Store $(p_DwViewName) into [$(p_ErpQvdPath)$(p_QvdName).qvd](qvd);
Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'S', '$(p_ThisExeTime)');
Else
Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'E', '$(p_ThisExeTime)');
End If;
End Sub;
三、抽取數據主代碼(關鍵)
抽取數據時,讀取excel表,裏面記錄了要抽取的表/視圖、以及相應的抽取方式,內容如下
- source_view_name:要抽取的表名/視圖名(按該名字去源庫執行select語句,所以名稱一定不能錯)
- qvd_name:存儲成qvd文件時的文件名
- primary_key:表主鍵,當增量查詢時必填
- etl_type:抽取方式
- IUD:原表記錄存在新增(Insert)、更新(Update)、刪除(Delete)操作時,用【IUD】方式抽取
- IU :原表記錄只有新增(Insert)、更新(Update)操作時,用【IU】方式抽取(表記錄不會被刪除)
- I :原表記錄只有新增(Insert)操作時,用 【I】方式抽取
- time_stamp_col:時間戳字段,增量更新時必填,通過時間戳確定數據變更時間,以確定增量記錄
- owner:表/視圖所屬的db用戶
- db_conn:在sense中定義的數據庫連接名稱
在sense editor中配置好連接名,即可寫入一下代碼,來對數據進行ETL
// 讀取excel表配置
SourceViews:
LOAD
SOURCE_VIEW_NAME,
QVD_NAME,
PRIMARY_KEY,
ETL_TYPE,
TIME_STAMP_COL,
OWNER,
DB_CONN
FROM [$(ConfigPath)ATS_Tables.xlsx]
(ooxml, embedded labels, table is Intf);
Let SourceViewsCnt = NoOfRows('SourceViews');
Trace '==> DefaultDBConn = $(DefaultDBConn)';
// Load DB data
Let DBConn = '$(DefaultDBConn)'; // 配置默認DB連接
LIB CONNECT TO [$(DBConn)];
For i = 0 to $(SourceViewsCnt)-1
Let RecordErrorCount = ScriptErrorCount;
// Let LogComments = 'Success';
//Begin: 獲取數據基礎信息
Let Owner = peek('OWNER',i,'SourceViews')&'.';
Let DwViewName = peek('SOURCE_VIEW_NAME',i,'SourceViews');
Let PrimaryKey = peek('PRIMARY_KEY',i,'SourceViews');
Let QvdName = peek('QVD_NAME',i,'SourceViews');
Let EtlType = 'ALL';//peek('ETL_TYPE',i,'SourceViews');
Let TimeStampCol = peek('TIME_STAMP_COL',i,'SourceViews');
Let ViewDbConn = peek('DB_CONN',i,'SourceViews');
If '$(DBConn)' = '$(ViewDbConn)' Then
Trace '==> DB Connection is $(DBConn)';
Else
DisConnect;
Let DBConn = '$(ViewDbConn)';
LIB CONNECT TO [$(DBConn)];
End If;
Trace ' => DwViewName = $(DwViewName); EtlType = $(EtlType)';
Let QvdNotExist = IsNull(QvdCreateTime('$(DataPath)'&'$(QvdName)'&'.qvd'));
Let LogNotExist = IsNull(QvdCreateTime('$(DataPath)'&'$(LogFileName)'));
If $(LogNotExist) = -1 Then
Call CreateLogFile('$(DataPath)','$(LogFileName)');
End If;
//End: 獲取數據基礎信息
Trace ' => QvdNotExist = $(QvdNotExist)';
//Begin: 確定數據起止時間
LastExeTime:
Load Text(Date(Max(ExeTime),'YYYY/MM/DD hh:mm:ss')) AS ExeTime From [$(DataPath)$(LogFileName)](qvd)
Where SourceViewName = '$(DwViewName)'
And StatusFlag = 'S';
Let LastExeTime = If(IsNull(FieldValue('ExeTime',1)),'1990/01/01 00:00:00',Text(FieldValue('ExeTime',1)));
Let ThisExeTime = Text(Now());
Drop Table LastExeTime;
//Begin: 確定數據起止時間
Let NowTime = Text(Now());
Trace '==> 處理表 $(DwViewName) 開始時間 $(NowTime) ';
//Begin 根據配置,做數據增量全量判斷
If $(QvdNotExist) = 0 Then
If (EtlType = 'IUD') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd)
Where Not Exists ($(PrimaryKey));
Inner Join ($(DwViewName))
Select $(PrimaryKey)
From $(Owner)$(DwViewName);
ElseIf (EtlType = 'IU') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd)
Where Not Exists ($(PrimaryKey));
ElseIf (EtlType = 'I') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd);
ElseIf (EtlType = 'ALL') Then
$(DwViewName):
Select *
From $(Owner)$(DwViewName);
End If;
Else
$(DwViewName):
Select *
From $(Owner)$(DwViewName);
End If;
//End 根據配置,做數據增量全量判斷
Let NowTime = Text(Now());
Trace '==> 處理表 $(DwViewName) 結束時間 $(NowTime) ';
// 保存數據至Qvd & 寫日誌
Call StoreQvd('$(ViewType)', '$(DwViewName)', '$(DataPath)', '$(QvdName)', '$(RecordErrorCount)', '$(DataPath)', '$(LogFileName)', '$(ThisExeTime)');
Drop Table $(DwViewName);
Next i;
Set ErrorMode = 1;
Drop Table SourceViews;
//End: Load Data
DisConnect;
四、數據合併(個性化處理、可以忽略)
這裏代碼對應場景是 有N個數據源(DB),有一部分表的表結構在各個源庫中是一模一樣的,只是表名差別(業務上是同一類業務下的不同子分類,後臺表分了庫)。使用代碼進行自動union,形成一張大表。
Set filePrefixSet = 'ATS_INTERFACE_BATCHES,ATS_INTERFACE_DETAILS,ATS_INTERFACE_REFUNDS';
Set ViewType = 'QlikInterface';
Let fileTypeCnt = SubStringCount('$(filePrefixSet)',',') + 1;
For fileTypeIdx = 1 to fileTypeCnt
Let filePrefix = SubField('$(filePrefixSet)',',',fileTypeIdx);
Let prefixLen = Len('$(DataPath)')+Len('$(filePrefix)')+2;
Set fileCnt = 0;
For Each file in FileList('$(DataPath)'&'$(filePrefix)'&'_*.qvd')
Let fileCnt = fileCnt + 1;
Let srcSysIdf = Replace(Mid('$(file)',prefixLen),'.qvd','');
If fileCnt = 1 Then
$(filePrefix):
Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
From [$(file)](qvd);
Else
Concatenate ($(filePrefix))
Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
From [$(file)](qvd);
End If;
Next
Store $(filePrefix) Into [$(DataPath)$(filePrefix).qvd](qvd);
Drop Table $(filePrefix);
Next