Qlik Sense - 根据配置的记录,实现自动抽取源DB表/视图

场景描述

要形成Qlik自动化抽数。

需要Qlik根据一份配置表(excel表),自动去源数据库获取数据,并存储成qvd文件。数据要支持增量方式(其实就是ETL工作:向源库获取同构表)

未来需要增加、减少同构表,只需要在配置表中添加删除记录、并标记抽取方式(增量、全量等)即可,大大减少了工作量。

程序开发

〇、主题代码结构

  1. [Main]:系统常量设定,application自动生成,不做更改
  2. [Const]:程序常量声明,如DB连接名,文件根目录路径名等
  3. [SubProgs]:子程序声明,定义一些可复用的程序块
  4. [LoadData]:抽数主程序,获取源数据库数据,并存储到qvd中
  5. [CombineData]:其他个性化的数据处理,这里是对两个DB源,同名表进行union(concatenate)

一、常量声明Const

// 全局常量
Set DefaultDBConn  = 'LIS_NodeA';      //默认源DB库连接名
Set SenseRootPath = 'lib://QlikSense'; //存储抽取日志、配置文件等根目录的连接名

// 配置文件
Set ConfigPath  = '$(SenseRootPath)/Config/';
Set LogFileName = 'ATSLoadLog.qvd';         //日志文件的qvd名称
Set DataPath    = '$(SenseRootPath)/Data/'; // 每个view&table的qvd文件路径

Set ViewType = 'Interface'; // 'logs' 数据表类型:日志表(logs)/接口表(Interface)

// 前端配置文件,用于处理集合表达式
Set DBEffDays = 10;

二、子程序声明

定义可复用的子程序块,这块代码可以考虑以文件形式(txt)放在服务器,application去调用字块程序。特别是log 就能开发给其他application使用

// 创建日志文件(日志不存在时)
Sub CreateLogFile(p_ErpLogPath,p_LogFileName)
  LogTable:
    Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
    Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
End Sub;

// 存储 table load 记录,增量数据需要根据该日志时间,来确定增量时间点
Sub StoreLogs(p_ViewType, p_DwViewName, p_ErpLogPath, p_LogFileName, p_StatusFlag, p_ThisExeTime)
  Let LogComments = Replace(ScriptErrorList,chr(10),';');
  ScriptErrorList = ''; // 清空错误列表
  Let LogCreationTime = Text(Now());
  LogTable:
    Load * From [$(p_ErpLogPath)$(p_LogFileName)](qvd);  
    
    Concatenate (LogTable)
    Load * Inline [ViewType ,SourceViewName ,StatusFlag ,ExeTime ,Comments ,CreationDate
    '$(p_ViewType)','$(p_DwViewName)',$(p_StatusFlag),'$(p_ThisExeTime)','$(LogComments)','$(LogCreationTime)'];
  Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
  Drop Table LogTable;
End Sub;

// 必需变量: ViewType(模块名), DwViewName(待存储的表名), ErpQvdPath(qvd保存路径), QvdName(保存名称)
//           ErpLogPath(日志路径), LogFileName(日志名称), ThisExeTime(执行时间)
// 可选变量: PreviousErrorCount  如果遍历多个时上一次的错误计数
Sub StoreQvd(p_ViewType, p_DwViewName, p_ErpQvdPath, p_QvdName, p_PreviousErrorCount, p_ErpLogPath, p_LogFileName, p_ThisExeTime)
  // 保存Qvd文件 & 写入日志文件
  Let LogNotExists = IsNull(FileTime('$(p_ErpLogPath)$(p_LogFileName)')); 

  If $(LogNotExists) = -1 Then
    LogTable:
    Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
    Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
    Drop Table LogTable;
  End If;
  trace 'ScriptErrorCount=$(ScriptErrorCount)';
  trace 'p_PreviousErrorCount=$(p_PreviousErrorCount)';
  If (If(IsNull($(p_PreviousErrorCount)), $(ScriptErrorCount), $(p_PreviousErrorCount)-$(ScriptErrorCount))) = 0 Then
      Store $(p_DwViewName) into [$(p_ErpQvdPath)$(p_QvdName).qvd](qvd);
      Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'S', '$(p_ThisExeTime)');
  Else
      Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'E', '$(p_ThisExeTime)');
  End If; 

End Sub;

三、抽取数据主代码(关键)

抽取数据时,读取excel表,里面记录了要抽取的表/视图、以及相应的抽取方式,内容如下

  • source_view_name:要抽取的表名/视图名(按该名字去源库执行select语句,所以名称一定不能错)
  • qvd_name:存储成qvd文件时的文件名
  • primary_key:表主键,当增量查询时必填
  • etl_type:抽取方式
    • IUD:原表记录存在新增(Insert)、更新(Update)、删除(Delete)操作时,用【IUD】方式抽取
    • IU   :原表记录只有新增(Insert)、更新(Update)操作时,用【IU】方式抽取(表记录不会被删除)
    • I     :原表记录只有新增(Insert)操作时,用 【I】方式抽取
  • time_stamp_col:时间戳字段,增量更新时必填,通过时间戳确定数据变更时间,以确定增量记录
  • owner:表/视图所属的db用户
  • db_conn:在sense中定义的数据库连接名称

在sense editor中配置好连接名,即可写入一下代码,来对数据进行ETL

// 读取excel表配置
SourceViews:
LOAD
    SOURCE_VIEW_NAME,
    QVD_NAME,
    PRIMARY_KEY,
    ETL_TYPE,
    TIME_STAMP_COL,
    OWNER,
    DB_CONN
FROM [$(ConfigPath)ATS_Tables.xlsx]
(ooxml, embedded labels, table is Intf);

Let SourceViewsCnt = NoOfRows('SourceViews');

Trace '==> DefaultDBConn = $(DefaultDBConn)';
// Load DB data
Let DBConn = '$(DefaultDBConn)'; // 配置默认DB连接
LIB CONNECT TO [$(DBConn)];

For i = 0 to $(SourceViewsCnt)-1
  Let RecordErrorCount = ScriptErrorCount;
  // Let LogComments = 'Success';
  
  //Begin: 获取数据基础信息
  Let Owner        = peek('OWNER',i,'SourceViews')&'.';
  Let DwViewName   = peek('SOURCE_VIEW_NAME',i,'SourceViews');
  Let PrimaryKey   = peek('PRIMARY_KEY',i,'SourceViews');
  Let QvdName      = peek('QVD_NAME',i,'SourceViews');
  Let EtlType      = 'ALL';//peek('ETL_TYPE',i,'SourceViews');
  Let TimeStampCol = peek('TIME_STAMP_COL',i,'SourceViews');
  Let ViewDbConn   = peek('DB_CONN',i,'SourceViews');  
  
  If '$(DBConn)' = '$(ViewDbConn)'  Then
    Trace '==> DB Connection is $(DBConn)';
  Else
    DisConnect;
    Let DBConn = '$(ViewDbConn)';
    LIB CONNECT TO [$(DBConn)];
  End If;
  
  Trace ' => DwViewName = $(DwViewName); EtlType    = $(EtlType)';
  
  Let QvdNotExist  = IsNull(QvdCreateTime('$(DataPath)'&'$(QvdName)'&'.qvd'));
  Let LogNotExist  = IsNull(QvdCreateTime('$(DataPath)'&'$(LogFileName)'));
  If $(LogNotExist) = -1 Then
    Call CreateLogFile('$(DataPath)','$(LogFileName)');
  End If;
  //End: 获取数据基础信息
  
  Trace ' => QvdNotExist = $(QvdNotExist)';
  //Begin: 确定数据起止时间
  LastExeTime:
  Load Text(Date(Max(ExeTime),'YYYY/MM/DD hh:mm:ss')) AS ExeTime From [$(DataPath)$(LogFileName)](qvd)
  Where SourceViewName = '$(DwViewName)'
    And StatusFlag = 'S';
  
  Let LastExeTime  = If(IsNull(FieldValue('ExeTime',1)),'1990/01/01 00:00:00',Text(FieldValue('ExeTime',1)));
  Let ThisExeTime  = Text(Now());
  Drop Table LastExeTime;
  //Begin: 确定数据起止时间
  Let NowTime  = Text(Now());
  Trace '==> 处理表 $(DwViewName) 开始时间  $(NowTime)  ';    
  //Begin 根据配置,做数据增量全量判断
  If $(QvdNotExist) = 0 Then
    If (EtlType = 'IUD') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd)
       Where Not Exists ($(PrimaryKey));
      
      Inner Join ($(DwViewName))
        Select $(PrimaryKey) 
          From $(Owner)$(DwViewName);
        
    ElseIf (EtlType = 'IU') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd)
       Where Not Exists ($(PrimaryKey));
   
    ElseIf (EtlType = 'I') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd);
    
    ElseIf (EtlType = 'ALL') Then
      $(DwViewName):
      Select *
        From $(Owner)$(DwViewName);
    End If;
  Else
    $(DwViewName):
    Select *
        From $(Owner)$(DwViewName);
  End If;
  //End 根据配置,做数据增量全量判断
 Let NowTime  = Text(Now());
 Trace '==> 处理表 $(DwViewName) 结束时间  $(NowTime)  ';  
  // 保存数据至Qvd & 写日志
  Call StoreQvd('$(ViewType)', '$(DwViewName)', '$(DataPath)', '$(QvdName)', '$(RecordErrorCount)', '$(DataPath)', '$(LogFileName)', '$(ThisExeTime)');
  Drop Table $(DwViewName);

Next i;

Set ErrorMode = 1;

Drop Table SourceViews;

//End: Load Data
DisConnect;

 

四、数据合并(个性化处理、可以忽略)

这里代码对应场景是 有N个数据源(DB),有一部分表的表结构在各个源库中是一模一样的,只是表名差别(业务上是同一类业务下的不同子分类,后台表分了库)。使用代码进行自动union,形成一张大表。

Set filePrefixSet = 'ATS_INTERFACE_BATCHES,ATS_INTERFACE_DETAILS,ATS_INTERFACE_REFUNDS';
Set ViewType = 'QlikInterface';
Let fileTypeCnt = SubStringCount('$(filePrefixSet)',',') + 1;

For fileTypeIdx = 1 to fileTypeCnt
	Let filePrefix = SubField('$(filePrefixSet)',',',fileTypeIdx);
    Let prefixLen = Len('$(DataPath)')+Len('$(filePrefix)')+2;
    Set fileCnt = 0;
	For Each file in FileList('$(DataPath)'&'$(filePrefix)'&'_*.qvd')
      Let fileCnt = fileCnt + 1;
      Let srcSysIdf = Replace(Mid('$(file)',prefixLen),'.qvd','');
      If fileCnt = 1 Then
        $(filePrefix):
         Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
         From [$(file)](qvd);
      Else
        Concatenate ($(filePrefix))
         Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
         From [$(file)](qvd);
      End If;
    Next
    Store $(filePrefix) Into [$(DataPath)$(filePrefix).qvd](qvd);
    Drop Table $(filePrefix);
Next

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章