數據清洗
方案一:
每半小時爲一個基點,基點前最後一筆數據必保留,然後向下保留基點後最近兩筆不重複數據。。
如圖所示:示例數據將保留選中部分
CREATE OR REPLACE PROCEDURE CLEAR_CD_GUIDEWAY_DATARECORD(IN_DATE IN DATE) IS
TIME_INTERVAL NUMBER := 30; -- 清洗數據時間基準。單位: 分鐘
CURSOR DATARECORD IS
SELECT T.*
FROM TEST_CD_GUIDEWAY_DATARECORD T
WHERE TRUNC(T.PRODATE) = TRUNC(IN_DATE)
ORDER BY T.MACHINEIP, T.PRODATE;
ITEM DATARECORD%ROWTYPE;
LAST_1_ITEM DATARECORD%ROWTYPE; -- 上一條記錄
REFER_TIME DATE := TRUNC(IN_DATE, 'DD'); -- 參照時間
ICOUNT NUMBER := 0;
BEGIN
FOR ITEM IN DATARECORD LOOP
/*調整參照時間*/
REFER_TIME := TRUNC(ITEM.CREATEDATE, 'HH');
IF (ITEM.CREATEDATE - REFER_TIME) * 24 * 60 >= TIME_INTERVAL THEN
REFER_TIME := TRUNC(ITEM.CREATEDATE, 'HH') + TIME_INTERVAL / 24 / 60;
-- DBMS_OUTPUT.PUT_LINE(TO_CHAR(REFER_TIME, 'yyyy/mm/dd hh24:mi:ss'));
END IF;
IF LAST_1_ITEM.MACHINEIP = ITEM.MACHINEIP AND
LAST_1_ITEM.QUANTITY = ITEM.QUANTITY THEN
/* ★★★ 僅爲演示:所以修改其 UPDATER 字段爲 delete 意味着刪除 ★★★ */
UPDATE TEST_CD_GUIDEWAY_DATARECORD T
SET T.UPDATER = 'delete'
WHERE T.SYSID = ITEM.SYSID;
ELSE
IF LAST_1_ITEM.MACHINEIP IS NULL OR
LAST_1_ITEM.CREATEDATE < REFER_TIME THEN
/* 基準數據 */
DBMS_OUTPUT.PUT_LINE(ITEM.SYSID ||
TO_CHAR(ITEM.CREATEDATE,
'yyyy/mm/dd hh24:mi:ss'));
ICOUNT := 0;
ELSIF ICOUNT > 2 THEN
UPDATE TEST_CD_GUIDEWAY_DATARECORD T
SET T.UPDATER = 'delete'
WHERE T.SYSID = LAST_1_ITEM.SYSID;
END IF;
ICOUNT := ICOUNT + 1;
LAST_1_ITEM := ITEM;
END IF;
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
ROLLBACK;
END CLEAR_CD_GUIDEWAY_DATARECORD;
運行結果:
exec CLEAR_CD_GUIDEWAY_DATARECORD(TO_DATE('20190916', 'yyyymmdd'));
方案二:
每半小時爲一個基點,基點數據必保留,然後向上、向下保留基點與前後最近的一筆不重複數據。最開始和最結束的數據保留。
如圖所示:示例數據將保留選中部分
CREATE OR REPLACE PROCEDURE CLEAR_CD_GUIDEWAY_DATARECORD(IN_DATE IN DATE) IS
CURSOR BASE_DATE_RECORD IS
SELECT P.*
FROM (SELECT T.*,
ROW_NUMBER() OVER(PARTITION BY T.MACHINEIP, TRUNC(T.PRODATE, 'hh24'), SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') - 30 / 60 / 24) ORDER BY T.CREATEDATE) NO,
DECODE(SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') -
30 / 60 / 24),
-1,
TRUNC(T.PRODATE, 'hh24'),
TRUNC(T.PRODATE, 'hh24') + 30 / 60 / 24) BDATE
FROM CD_GUIDEWAY_DATARECORD T
WHERE T.PRODATE BETWEEN TRUNC(IN_DATE) AND TRUNC(IN_DATE + 1) + 30 / 60 / 24) P
WHERE P.NO = 1;
BASE_DATE BASE_DATE_RECORD%ROWTYPE;
BEGIN
FOR BASE_DATE IN BASE_DATE_RECORD LOOP
/**/
DBMS_OUTPUT.PUT_LINE(TO_CHAR(BASE_DATE.BDATE, 'yyyy/dd/mm hh24:mi:ss'));
/**/
UPDATE CD_GUIDEWAY_DATARECORD T
SET T.UPDATER = 'delete'
/*DELETE FROM CD_GUIDEWAY_DATARECORD T*/
WHERE T.SYSID <> BASE_DATE.SYSID
AND TRUNC(T.PRODATE) = TRUNC(IN_DATE)
AND T.PRODATE BETWEEN BASE_DATE.BDATE - 15 / 60 / 24 AND
BASE_DATE.BDATE + 15 / 60 / 24
AND T.MACHINEIP = BASE_DATE.MACHINEIP
AND T.SYSID NOT IN
(SELECT P.SYSID
FROM (SELECT T.*,
ROW_NUMBER() OVER(PARTITION BY SIGN(T.PRODATE - TRUNC(T.PRODATE, 'hh24') - 30 / 60 / 24) ORDER BY ABS(T.PRODATE - BASE_DATE.PRODATE)) NO
FROM CD_GUIDEWAY_DATARECORD T
WHERE T.MACHINEIP = BASE_DATE.MACHINEIP
AND T.PRODATE BETWEEN BASE_DATE.BDATE - 15 / 60 / 24 AND
BASE_DATE.BDATE + 15 / 60 / 24
AND T.QUANTITY <> BASE_DATE.QUANTITY
ORDER BY T.PRODATE DESC) P
WHERE P.NO <= 1);
END LOOP;
COMMIT;
EXCEPTION
WHEN OTHERS THEN
ROLLBACK;
END CLEAR_CD_GUIDEWAY_DATARECORD;
運行結果:
exec CLEAR_CD_GUIDEWAY_DATARECORD(TO_DATE('20190916', 'yyyymmdd'));