【Oracle】數據清洗案例

數據清洗

方案一:

每半小時爲一個基點,基點前最後一筆數據必保留,然後向下保留基點後最近兩筆不重複數據。。

如圖所示:示例數據將保留選中部分

1

CREATE OR REPLACE PROCEDURE CLEAR_CD_GUIDEWAY_DATARECORD(IN_DATE IN DATE) IS
  TIME_INTERVAL NUMBER := 30; -- 清洗數據時間基準。單位: 分鐘
  CURSOR DATARECORD IS
    SELECT T.*
      FROM TEST_CD_GUIDEWAY_DATARECORD T
     WHERE TRUNC(T.PRODATE) = TRUNC(IN_DATE)
     ORDER BY T.MACHINEIP, T.PRODATE;
  ITEM DATARECORD%ROWTYPE;
  LAST_1_ITEM DATARECORD%ROWTYPE; -- 上一條記錄
  REFER_TIME DATE := TRUNC(IN_DATE, 'DD'); -- 參照時間
  ICOUNT     NUMBER := 0;
BEGIN
  FOR ITEM IN DATARECORD LOOP
    /*調整參照時間*/
    REFER_TIME := TRUNC(ITEM.CREATEDATE, 'HH');
    IF (ITEM.CREATEDATE - REFER_TIME) * 24 * 60 >= TIME_INTERVAL THEN
      REFER_TIME := TRUNC(ITEM.CREATEDATE, 'HH') + TIME_INTERVAL / 24 / 60;
      -- DBMS_OUTPUT.PUT_LINE(TO_CHAR(REFER_TIME, 'yyyy/mm/dd hh24:mi:ss'));
    END IF;
    IF LAST_1_ITEM.MACHINEIP = ITEM.MACHINEIP AND
       LAST_1_ITEM.QUANTITY = ITEM.QUANTITY THEN
       /*  ★★★ 僅爲演示:所以修改其 UPDATER 字段爲 delete 意味着刪除 ★★★  */
      UPDATE TEST_CD_GUIDEWAY_DATARECORD T
         SET T.UPDATER = 'delete'
       WHERE T.SYSID = ITEM.SYSID;
    ELSE
      IF LAST_1_ITEM.MACHINEIP IS NULL OR
         LAST_1_ITEM.CREATEDATE < REFER_TIME THEN
        /* 基準數據 */ 
        DBMS_OUTPUT.PUT_LINE(ITEM.SYSID ||
                             TO_CHAR(ITEM.CREATEDATE,
                                     'yyyy/mm/dd hh24:mi:ss'));
        ICOUNT := 0;
      ELSIF ICOUNT > 2 THEN
        UPDATE TEST_CD_GUIDEWAY_DATARECORD T
           SET T.UPDATER = 'delete'
         WHERE T.SYSID = LAST_1_ITEM.SYSID;
      END IF;
      ICOUNT := ICOUNT + 1;
      LAST_1_ITEM := ITEM;
    END IF;
  END LOOP;
  COMMIT;
EXCEPTION
  WHEN OTHERS THEN
    ROLLBACK;
END CLEAR_CD_GUIDEWAY_DATARECORD;

運行結果:

exec CLEAR_CD_GUIDEWAY_DATARECORD(TO_DATE('20190916', 'yyyymmdd'));

r

方案二:

每半小時爲一個基點,基點數據必保留,然後向上、向下保留基點與前後最近的一筆不重複數據。最開始和最結束的數據保留。

如圖所示:示例數據將保留選中部分

在這裏插入圖片描述

CREATE OR REPLACE PROCEDURE CLEAR_CD_GUIDEWAY_DATARECORD(IN_DATE IN DATE) IS

  CURSOR BASE_DATE_RECORD IS
    SELECT P.*
      FROM (SELECT T.*,
                   ROW_NUMBER() OVER(PARTITION BY T.MACHINEIP, TRUNC(T.PRODATE, 'hh24'), SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') - 30 / 60 / 24) ORDER BY T.CREATEDATE) NO,
                   DECODE(SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') -
                               30 / 60 / 24),
                          -1,
                          TRUNC(T.PRODATE, 'hh24'),
                          TRUNC(T.PRODATE, 'hh24') + 30 / 60 / 24) BDATE
              FROM CD_GUIDEWAY_DATARECORD T
             WHERE T.PRODATE BETWEEN TRUNC(IN_DATE) AND TRUNC(IN_DATE + 1) + 30 / 60 / 24) P
     WHERE P.NO = 1;
  BASE_DATE      BASE_DATE_RECORD%ROWTYPE;
BEGIN

  FOR BASE_DATE IN BASE_DATE_RECORD LOOP
    /**/
    DBMS_OUTPUT.PUT_LINE(TO_CHAR(BASE_DATE.BDATE, 'yyyy/dd/mm hh24:mi:ss'));
    /**/
  
    UPDATE CD_GUIDEWAY_DATARECORD T
    SET T.UPDATER = 'delete'
    /*DELETE FROM CD_GUIDEWAY_DATARECORD T*/
     WHERE T.SYSID <> BASE_DATE.SYSID
       AND TRUNC(T.PRODATE) = TRUNC(IN_DATE)
       AND T.PRODATE BETWEEN BASE_DATE.BDATE - 15 / 60 / 24 AND
           BASE_DATE.BDATE + 15 / 60 / 24
       AND T.MACHINEIP = BASE_DATE.MACHINEIP
       AND T.SYSID NOT IN
           (SELECT P.SYSID
              FROM (SELECT T.*,
                           ROW_NUMBER() OVER(PARTITION BY SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') - 30 / 60 / 24) ORDER BY ABS(T.PRODATE - BASE_DATE.PRODATE)) NO
                      FROM CD_GUIDEWAY_DATARECORD T
                     WHERE T.MACHINEIP = BASE_DATE.MACHINEIP
                       AND T.PRODATE BETWEEN BASE_DATE.BDATE - 15 / 60 / 24 AND
                           BASE_DATE.BDATE + 15 / 60 / 24
                       AND T.QUANTITY <> BASE_DATE.QUANTITY
                     ORDER BY T.PRODATE DESC) P
             WHERE P.NO <= 1);
  
  END LOOP;
  COMMIT;
EXCEPTION
  WHEN OTHERS THEN
    ROLLBACK;
END CLEAR_CD_GUIDEWAY_DATARECORD;

運行結果:

exec CLEAR_CD_GUIDEWAY_DATARECORD(TO_DATE('20190916', 'yyyymmdd'));

ss

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章