Qlik Sense - 根據配置的記錄,實現自動抽取源DB表/視圖

場景描述

要形成Qlik自動化抽數。

需要Qlik根據一份配置表(excel表),自動去源數據庫獲取數據,並存儲成qvd文件。數據要支持增量方式(其實就是ETL工作:向源庫獲取同構表)

未來需要增加、減少同構表,只需要在配置表中添加刪除記錄、並標記抽取方式(增量、全量等)即可,大大減少了工作量。

程序開發

〇、主題代碼結構

  1. [Main]:系統常量設定,application自動生成,不做更改
  2. [Const]:程序常量聲明,如DB連接名,文件根目錄路徑名等
  3. [SubProgs]:子程序聲明,定義一些可複用的程序塊
  4. [LoadData]:抽數主程序,獲取源數據庫數據,並存儲到qvd中
  5. [CombineData]:其他個性化的數據處理,這裏是對兩個DB源,同名表進行union(concatenate)

一、常量聲明Const

// 全局常量
Set DefaultDBConn  = 'LIS_NodeA';      //默認源DB庫連接名
Set SenseRootPath = 'lib://QlikSense'; //存儲抽取日誌、配置文件等根目錄的連接名

// 配置文件
Set ConfigPath  = '$(SenseRootPath)/Config/';
Set LogFileName = 'ATSLoadLog.qvd';         //日誌文件的qvd名稱
Set DataPath    = '$(SenseRootPath)/Data/'; // 每個view&table的qvd文件路徑

Set ViewType = 'Interface'; // 'logs' 數據表類型:日誌表(logs)/接口表(Interface)

// 前端配置文件,用於處理集合表達式
Set DBEffDays = 10;

二、子程序聲明

定義可複用的子程序塊,這塊代碼可以考慮以文件形式(txt)放在服務器,application去調用字塊程序。特別是log 就能開發給其他application使用

// 創建日誌文件(日誌不存在時)
Sub CreateLogFile(p_ErpLogPath,p_LogFileName)
  LogTable:
    Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
    Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
End Sub;

// 存儲 table load 記錄,增量數據需要根據該日誌時間,來確定增量時間點
Sub StoreLogs(p_ViewType, p_DwViewName, p_ErpLogPath, p_LogFileName, p_StatusFlag, p_ThisExeTime)
  Let LogComments = Replace(ScriptErrorList,chr(10),';');
  ScriptErrorList = ''; // 清空錯誤列表
  Let LogCreationTime = Text(Now());
  LogTable:
    Load * From [$(p_ErpLogPath)$(p_LogFileName)](qvd);  
    
    Concatenate (LogTable)
    Load * Inline [ViewType ,SourceViewName ,StatusFlag ,ExeTime ,Comments ,CreationDate
    '$(p_ViewType)','$(p_DwViewName)',$(p_StatusFlag),'$(p_ThisExeTime)','$(LogComments)','$(LogCreationTime)'];
  Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
  Drop Table LogTable;
End Sub;

// 必需變量: ViewType(模塊名), DwViewName(待存儲的表名), ErpQvdPath(qvd保存路徑), QvdName(保存名稱)
//           ErpLogPath(日誌路徑), LogFileName(日誌名稱), ThisExeTime(執行時間)
// 可選變量: PreviousErrorCount  如果遍歷多個時上一次的錯誤計數
Sub StoreQvd(p_ViewType, p_DwViewName, p_ErpQvdPath, p_QvdName, p_PreviousErrorCount, p_ErpLogPath, p_LogFileName, p_ThisExeTime)
  // 保存Qvd文件 & 寫入日誌文件
  Let LogNotExists = IsNull(FileTime('$(p_ErpLogPath)$(p_LogFileName)')); 

  If $(LogNotExists) = -1 Then
    LogTable:
    Load * Inline [ViewType,SourceViewName,StatusFlag,ExeTime,Comments,CreationDate];
    Store LogTable Into [$(p_ErpLogPath)$(p_LogFileName)](qvd);
    Drop Table LogTable;
  End If;
  trace 'ScriptErrorCount=$(ScriptErrorCount)';
  trace 'p_PreviousErrorCount=$(p_PreviousErrorCount)';
  If (If(IsNull($(p_PreviousErrorCount)), $(ScriptErrorCount), $(p_PreviousErrorCount)-$(ScriptErrorCount))) = 0 Then
      Store $(p_DwViewName) into [$(p_ErpQvdPath)$(p_QvdName).qvd](qvd);
      Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'S', '$(p_ThisExeTime)');
  Else
      Call StoreLogs('$(p_ViewType)', '$(p_DwViewName)', '$(p_ErpLogPath)', '$(p_LogFileName)', 'E', '$(p_ThisExeTime)');
  End If; 

End Sub;

三、抽取數據主代碼(關鍵)

抽取數據時,讀取excel表,裏面記錄了要抽取的表/視圖、以及相應的抽取方式,內容如下

  • source_view_name:要抽取的表名/視圖名(按該名字去源庫執行select語句,所以名稱一定不能錯)
  • qvd_name:存儲成qvd文件時的文件名
  • primary_key:表主鍵,當增量查詢時必填
  • etl_type:抽取方式
    • IUD:原表記錄存在新增(Insert)、更新(Update)、刪除(Delete)操作時,用【IUD】方式抽取
    • IU   :原表記錄只有新增(Insert)、更新(Update)操作時,用【IU】方式抽取(表記錄不會被刪除)
    • I     :原表記錄只有新增(Insert)操作時,用 【I】方式抽取
  • time_stamp_col:時間戳字段,增量更新時必填,通過時間戳確定數據變更時間,以確定增量記錄
  • owner:表/視圖所屬的db用戶
  • db_conn:在sense中定義的數據庫連接名稱

在sense editor中配置好連接名,即可寫入一下代碼,來對數據進行ETL

// 讀取excel表配置
SourceViews:
LOAD
    SOURCE_VIEW_NAME,
    QVD_NAME,
    PRIMARY_KEY,
    ETL_TYPE,
    TIME_STAMP_COL,
    OWNER,
    DB_CONN
FROM [$(ConfigPath)ATS_Tables.xlsx]
(ooxml, embedded labels, table is Intf);

Let SourceViewsCnt = NoOfRows('SourceViews');

Trace '==> DefaultDBConn = $(DefaultDBConn)';
// Load DB data
Let DBConn = '$(DefaultDBConn)'; // 配置默認DB連接
LIB CONNECT TO [$(DBConn)];

For i = 0 to $(SourceViewsCnt)-1
  Let RecordErrorCount = ScriptErrorCount;
  // Let LogComments = 'Success';
  
  //Begin: 獲取數據基礎信息
  Let Owner        = peek('OWNER',i,'SourceViews')&'.';
  Let DwViewName   = peek('SOURCE_VIEW_NAME',i,'SourceViews');
  Let PrimaryKey   = peek('PRIMARY_KEY',i,'SourceViews');
  Let QvdName      = peek('QVD_NAME',i,'SourceViews');
  Let EtlType      = 'ALL';//peek('ETL_TYPE',i,'SourceViews');
  Let TimeStampCol = peek('TIME_STAMP_COL',i,'SourceViews');
  Let ViewDbConn   = peek('DB_CONN',i,'SourceViews');  
  
  If '$(DBConn)' = '$(ViewDbConn)'  Then
    Trace '==> DB Connection is $(DBConn)';
  Else
    DisConnect;
    Let DBConn = '$(ViewDbConn)';
    LIB CONNECT TO [$(DBConn)];
  End If;
  
  Trace ' => DwViewName = $(DwViewName); EtlType    = $(EtlType)';
  
  Let QvdNotExist  = IsNull(QvdCreateTime('$(DataPath)'&'$(QvdName)'&'.qvd'));
  Let LogNotExist  = IsNull(QvdCreateTime('$(DataPath)'&'$(LogFileName)'));
  If $(LogNotExist) = -1 Then
    Call CreateLogFile('$(DataPath)','$(LogFileName)');
  End If;
  //End: 獲取數據基礎信息
  
  Trace ' => QvdNotExist = $(QvdNotExist)';
  //Begin: 確定數據起止時間
  LastExeTime:
  Load Text(Date(Max(ExeTime),'YYYY/MM/DD hh:mm:ss')) AS ExeTime From [$(DataPath)$(LogFileName)](qvd)
  Where SourceViewName = '$(DwViewName)'
    And StatusFlag = 'S';
  
  Let LastExeTime  = If(IsNull(FieldValue('ExeTime',1)),'1990/01/01 00:00:00',Text(FieldValue('ExeTime',1)));
  Let ThisExeTime  = Text(Now());
  Drop Table LastExeTime;
  //Begin: 確定數據起止時間
  Let NowTime  = Text(Now());
  Trace '==> 處理表 $(DwViewName) 開始時間  $(NowTime)  ';    
  //Begin 根據配置,做數據增量全量判斷
  If $(QvdNotExist) = 0 Then
    If (EtlType = 'IUD') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd)
       Where Not Exists ($(PrimaryKey));
      
      Inner Join ($(DwViewName))
        Select $(PrimaryKey) 
          From $(Owner)$(DwViewName);
        
    ElseIf (EtlType = 'IU') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd)
       Where Not Exists ($(PrimaryKey));
   
    ElseIf (EtlType = 'I') Then
      $(DwViewName):
      Select p.*
        From $(Owner)$(DwViewName) p
            ,(Select $(PrimaryKey) 
                From $(Owner)$(DwViewName)
               Where $(TimeStampCol) > TO_DATE('$(LastExeTime)','YYYY/MM/DD HH24:MI:SS')
                 And $(TimeStampCol) <= TO_DATE('$(ThisExeTime)','YYYY/MM/DD HH24:MI:SS')) temp
       Where p.$(PrimaryKey) = temp.$(PrimaryKey);
        
      Concatenate ($(DwViewName))
        Load *
        From [$(DataPath)$(QvdName).qvd](qvd);
    
    ElseIf (EtlType = 'ALL') Then
      $(DwViewName):
      Select *
        From $(Owner)$(DwViewName);
    End If;
  Else
    $(DwViewName):
    Select *
        From $(Owner)$(DwViewName);
  End If;
  //End 根據配置,做數據增量全量判斷
 Let NowTime  = Text(Now());
 Trace '==> 處理表 $(DwViewName) 結束時間  $(NowTime)  ';  
  // 保存數據至Qvd & 寫日誌
  Call StoreQvd('$(ViewType)', '$(DwViewName)', '$(DataPath)', '$(QvdName)', '$(RecordErrorCount)', '$(DataPath)', '$(LogFileName)', '$(ThisExeTime)');
  Drop Table $(DwViewName);

Next i;

Set ErrorMode = 1;

Drop Table SourceViews;

//End: Load Data
DisConnect;

 

四、數據合併(個性化處理、可以忽略)

這裏代碼對應場景是 有N個數據源(DB),有一部分表的表結構在各個源庫中是一模一樣的,只是表名差別(業務上是同一類業務下的不同子分類,後臺表分了庫)。使用代碼進行自動union,形成一張大表。

Set filePrefixSet = 'ATS_INTERFACE_BATCHES,ATS_INTERFACE_DETAILS,ATS_INTERFACE_REFUNDS';
Set ViewType = 'QlikInterface';
Let fileTypeCnt = SubStringCount('$(filePrefixSet)',',') + 1;

For fileTypeIdx = 1 to fileTypeCnt
	Let filePrefix = SubField('$(filePrefixSet)',',',fileTypeIdx);
    Let prefixLen = Len('$(DataPath)')+Len('$(filePrefix)')+2;
    Set fileCnt = 0;
	For Each file in FileList('$(DataPath)'&'$(filePrefix)'&'_*.qvd')
      Let fileCnt = fileCnt + 1;
      Let srcSysIdf = Replace(Mid('$(file)',prefixLen),'.qvd','');
      If fileCnt = 1 Then
        $(filePrefix):
         Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
         From [$(file)](qvd);
      Else
        Concatenate ($(filePrefix))
         Load '$(srcSysIdf)' AS SRC_SYS_IDF,*
         From [$(file)](qvd);
      End If;
    Next
    Store $(filePrefix) Into [$(DataPath)$(filePrefix).qvd](qvd);
    Drop Table $(filePrefix);
Next

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章