场景描述
要形成Qlik自动化抽数。
需要Qlik根据一份配置表(excel表),自动去源数据库获取数据,并存储成qvd文件。数据要支持增量方式(其实就是ETL工作:向源库获取同构表)
未来需要增加、减少同构表,只需要在配置表中添加删除记录、并标记抽取方式(增量、全量等)即可,大大减少了工作量。
程序开发
〇、主题代码结构
- [Main]:系统常量设定,application自动生成,不做更改
- [Const]:程序常量声明,如DB连接名,文件根目录路径名等
- [SubProgs]:子程序声明,定义一些可复用的程序块
- [LoadData]:抽数主程序,获取源数据库数据,并存储到qvd中
- [CombineData]:其他个性化的数据处理,这里是对两个DB源,同名表进行union(concatenate)
一、常量声明Const
// 全局常量
Set DefaultDBConn = 'LIS_NodeA'; //默认源DB库连接名
Set SenseRootPath = 'lib://QlikSense'; //存储抽取日志、配置文件等根目录的连接名
// 配置文件
Set ConfigPath = '$(SenseRootPath)/Config/';
Set LogFileName = 'ATSLoadLog.qvd'; //日志文件的qvd名称
Set DataPath = '$(SenseRootPath)/Data/'; // 每个view&table的qvd文件路径
Set ViewType = 'Interface'; // 'logs' 数据表类型:日志表(logs)/接口表(Interface)
// 前端配置文件,用于处理集合表达式
Set DBEffDays = 10;
二、子程序声明
定义可复用的子程序块,这块代码可以考虑以文件形式(txt)放在服务器,application去调用字块程序。特别是log 就能开发给其他application使用
// 创建日志文件(日志不存在时)
Sub CreateLogFile(p_ErpLogPath,p_LogFileName)
LogTable:
Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
End Sub;
// 存储 table load 记录,增量数据需要根据该日志时间,来确定增量时间点
Sub StoreLogs(p_ViewType, p_DwViewName, p_ErpLogPath, p_LogFileName, p_StatusFlag, p_ThisExeTime)
Let LogComments = Replace(ScriptErrorList,chr(10),';');
ScriptErrorList = ''; // 清空错误列表
Let LogCreationTime = Text(Now());
LogTable:
Load * From [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Concatenate (LogTable)
Load * Inline [ViewType ,SourceViewName ,StatusFlag ,ExeTime ,Comments ,CreationDate
'$(p_ViewType)','$(p_DwViewName)',$(p_StatusFlag),'$(p_ThisExeTime)','$(LogComments)','$(LogCreationTime)'];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Drop Table LogTable;
End Sub;
// 必需变量: ViewType(模块名), DwViewName(待存储的表名), ErpQvdPath(qvd保存路径), QvdName(保存名称)
// ErpLogPath(日志路径), LogFileName(日志名称), ThisExeTime(执行时间)
// 可选变量: PreviousErrorCount 如果遍历多个时上一次的错误计数
Sub StoreQvd(p_ViewType, p_DwViewName, p_ErpQvdPath, p_QvdName, p_PreviousErrorCount, p_ErpLogPath, p_LogFileName, p_ThisExeTime)
// 保存Qvd文件 & 写入日志文件
Let LogNotExists = IsNull(FileTime('$(p_ErpLogPath)$(p_LogFileName)'));
If $(LogNotExists) = -1 Then
LogTable:
Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
Drop Table LogTable;
End If;
trace 'ScriptErrorCount=$(ScriptErrorCount)';
trace 'p_PreviousErrorCount=$(p_PreviousErrorCount)';
If (If(IsNull($(p_PreviousErrorCount)), $(ScriptErrorCount), $(p_PreviousErrorCount)-$(ScriptErrorCount))) = 0 Then
Store $(p_DwViewName) into [$(p_ErpQvdPath)$(p_QvdName).qvd](qvd);
Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'S', '$(p_ThisExeTime)');
Else
Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'E', '$(p_ThisExeTime)');
End If;
End Sub;
三、抽取数据主代码(关键)
抽取数据时,读取excel表,里面记录了要抽取的表/视图、以及相应的抽取方式,内容如下
- source_view_name:要抽取的表名/视图名(按该名字去源库执行select语句,所以名称一定不能错)
- qvd_name:存储成qvd文件时的文件名
- primary_key:表主键,当增量查询时必填
- etl_type:抽取方式
- IUD:原表记录存在新增(Insert)、更新(Update)、删除(Delete)操作时,用【IUD】方式抽取
- IU :原表记录只有新增(Insert)、更新(Update)操作时,用【IU】方式抽取(表记录不会被删除)
- I :原表记录只有新增(Insert)操作时,用 【I】方式抽取
- time_stamp_col:时间戳字段,增量更新时必填,通过时间戳确定数据变更时间,以确定增量记录
- owner:表/视图所属的db用户
- db_conn:在sense中定义的数据库连接名称
在sense editor中配置好连接名,即可写入一下代码,来对数据进行ETL
// 读取excel表配置
SourceViews:
LOAD
SOURCE_VIEW_NAME,
QVD_NAME,
PRIMARY_KEY,
ETL_TYPE,
TIME_STAMP_COL,
OWNER,
DB_CONN
FROM [$(ConfigPath)ATS_Tables.xlsx]
(ooxml, embedded labels, table is Intf);
Let SourceViewsCnt = NoOfRows('SourceViews');
Trace '==> DefaultDBConn = $(DefaultDBConn)';
// Load DB data
Let DBConn = '$(DefaultDBConn)'; // 配置默认DB连接
LIB CONNECT TO [$(DBConn)];
For i = 0 to $(SourceViewsCnt)-1
Let RecordErrorCount = ScriptErrorCount;
// Let LogComments = 'Success';
//Begin: 获取数据基础信息
Let Owner = peek('OWNER',i,'SourceViews')&'.';
Let DwViewName = peek('SOURCE_VIEW_NAME',i,'SourceViews');
Let PrimaryKey = peek('PRIMARY_KEY',i,'SourceViews');
Let QvdName = peek('QVD_NAME',i,'SourceViews');
Let EtlType = 'ALL';//peek('ETL_TYPE',i,'SourceViews');
Let TimeStampCol = peek('TIME_STAMP_COL',i,'SourceViews');
Let ViewDbConn = peek('DB_CONN',i,'SourceViews');
If '$(DBConn)' = '$(ViewDbConn)' Then
Trace '==> DB Connection is $(DBConn)';
Else
DisConnect;
Let DBConn = '$(ViewDbConn)';
LIB CONNECT TO [$(DBConn)];
End If;
Trace ' => DwViewName = $(DwViewName); EtlType = $(EtlType)';
Let QvdNotExist = IsNull(QvdCreateTime('$(DataPath)'&'$(QvdName)'&'.qvd'));
Let LogNotExist = IsNull(QvdCreateTime('$(DataPath)'&'$(LogFileName)'));
If $(LogNotExist) = -1 Then
Call CreateLogFile('$(DataPath)','$(LogFileName)');
End If;
//End: 获取数据基础信息
Trace ' => QvdNotExist = $(QvdNotExist)';
//Begin: 确定数据起止时间
LastExeTime:
Load Text(Date(Max(ExeTime),'YYYY/MM/DD hh:mm:ss')) AS ExeTime From [$(DataPath)$(LogFileName)](qvd)
Where SourceViewName = '$(DwViewName)'
And StatusFlag = 'S';
Let LastExeTime = If(IsNull(FieldValue('ExeTime',1)),'1990/01/01 00:00:00',Text(FieldValue('ExeTime',1)));
Let ThisExeTime = Text(Now());
Drop Table LastExeTime;
//Begin: 确定数据起止时间
Let NowTime = Text(Now());
Trace '==> 处理表 $(DwViewName) 开始时间 $(NowTime) ';
//Begin 根据配置,做数据增量全量判断
If $(QvdNotExist) = 0 Then
If (EtlType = 'IUD') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd)
Where Not Exists ($(PrimaryKey));
Inner Join ($(DwViewName))
Select $(PrimaryKey)
From $(Owner)$(DwViewName);
ElseIf (EtlType = 'IU') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd)
Where Not Exists ($(PrimaryKey));
ElseIf (EtlType = 'I') Then
$(DwViewName):
Select p.*
From $(Owner)$(DwViewName) p
,(Select $(PrimaryKey)
From $(Owner)$(DwViewName)
Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
Where p.$(PrimaryKey) = temp.$(PrimaryKey);
Concatenate ($(DwViewName))
Load *
From [$(DataPath)$(QvdName).qvd](qvd);
ElseIf (EtlType = 'ALL') Then
$(DwViewName):
Select *
From $(Owner)$(DwViewName);
End If;
Else
$(DwViewName):
Select *
From $(Owner)$(DwViewName);
End If;
//End 根据配置,做数据增量全量判断
Let NowTime = Text(Now());
Trace '==> 处理表 $(DwViewName) 结束时间 $(NowTime) ';
// 保存数据至Qvd & 写日志
Call StoreQvd('$(ViewType)', '$(DwViewName)', '$(DataPath)', '$(QvdName)', '$(RecordErrorCount)', '$(DataPath)', '$(LogFileName)', '$(ThisExeTime)');
Drop Table $(DwViewName);
Next i;
Set ErrorMode = 1;
Drop Table SourceViews;
//End: Load Data
DisConnect;
四、数据合并(个性化处理、可以忽略)
这里代码对应场景是 有N个数据源(DB),有一部分表的表结构在各个源库中是一模一样的,只是表名差别(业务上是同一类业务下的不同子分类,后台表分了库)。使用代码进行自动union,形成一张大表。
Set filePrefixSet = 'ATS_INTERFACE_BATCHES,ATS_INTERFACE_DETAILS,ATS_INTERFACE_REFUNDS';
Set ViewType = 'QlikInterface';
Let fileTypeCnt = SubStringCount('$(filePrefixSet)',',') + 1;
For fileTypeIdx = 1 to fileTypeCnt
Let filePrefix = SubField('$(filePrefixSet)',',',fileTypeIdx);
Let prefixLen = Len('$(DataPath)')+Len('$(filePrefix)')+2;
Set fileCnt = 0;
For Each file in FileList('$(DataPath)'&'$(filePrefix)'&'_*.qvd')
Let fileCnt = fileCnt + 1;
Let srcSysIdf = Replace(Mid('$(file)',prefixLen),'.qvd','');
If fileCnt = 1 Then
$(filePrefix):
Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
From [$(file)](qvd);
Else
Concatenate ($(filePrefix))
Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
From [$(file)](qvd);
End If;
Next
Store $(filePrefix) Into [$(DataPath)$(filePrefix).qvd](qvd);
Drop Table $(filePrefix);
Next