oracle模糊查詢:分區局部全文索引方式(四)

DATABASE:dwtest(210開發庫)

--按根級行業ID和大洲進行混合分區
DROP TABLE iquery.lpx_kw_tmp02_bak2;
CREATE TABLE iquery.lpx_kw_tmp02_bak2(
 KEYWORDS                                           VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
)PARTITION BY RANGE(CATEGORY_ROOT_ID)
(    PARTITION p1 VALUES LESS THAN(1) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p2 VALUES LESS THAN(2) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p3 VALUES LESS THAN(3) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p4 VALUES LESS THAN(4) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p5 VALUES LESS THAN(5) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p6 VALUES LESS THAN(6) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p7 VALUES LESS THAN(7) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p8 VALUES LESS THAN(8) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p9 VALUES LESS THAN(9) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p10 VALUES LESS THAN(10) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p11 VALUES LESS THAN(11) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p12 VALUES LESS THAN(12) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p13 VALUES LESS THAN(13) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p14 VALUES LESS THAN(14) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p15 VALUES LESS THAN(15) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p16 VALUES LESS THAN(16) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p17 VALUES LESS THAN(17) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p18 VALUES LESS THAN(18) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p19 VALUES LESS THAN(19) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p20 VALUES LESS THAN(20) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p21 VALUES LESS THAN(21) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p22 VALUES LESS THAN(23) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p23 VALUES LESS THAN(26) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p24 VALUES LESS THAN(28) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p25 VALUES LESS THAN(30) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p26 VALUES LESS THAN(32) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p27 VALUES LESS THAN(34) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p28 VALUES LESS THAN(36) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p29 VALUES LESS THAN(39) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p30 VALUES LESS THAN(41) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p31 VALUES LESS THAN(42) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p32 VALUES LESS THAN(43) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p33 VALUES LESS THAN(44) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p34 VALUES LESS THAN(66) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p35 VALUES LESS THAN(80) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p36 VALUES LESS THAN(322) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p37 VALUES LESS THAN(339) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p38 VALUES LESS THAN(502) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p39 VALUES LESS THAN(509) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p40 VALUES LESS THAN(1500) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p41 VALUES LESS THAN(2000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p42 VALUES LESS THAN(3000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p43 VALUES LESS THAN(999999999) TABLESPACE TBS_EN_DSS_2009
);


INSERT INTO iquery.lpx_kw_tmp02_bak2
SELECT keywords,
       REGIN_NAME,
       CATEGORY_ROOT_ID,
       SUM(SRH_PV_THIS_5MON) AS SRH_PV_THIS_5MON
  FROM iquery.lpx_kw_tmp02
 WHERE CATEGORY_ROOT_ID IS NOT NULL  --去除類目爲空的記錄
 GROUP BY keywords,
       REGION_ID,
       REGIN_NAME,
       CATEGORY_ROOT_ID;
COMMIT;

7799710 rows created.

SQL>
Commit complete.

SQL> select count(1) from iquery.lpx_kw_tmp02_bak2;


  COUNT(1)
----------
   7799710



--造測試數據
DROP TABLE iquery.lpx_kw_test;
CREATE TABLE iquery.lpx_kw_test TABLESPACE TBS_EN_DSS_2009
AS
SELECT *
  FROM (
        SELECT SUBSTR(keywords, INSTR(keywords, ' ', 1)+1, INSTR(keywords, ' ', 1, 2) - INSTR(keywords, ' ', 1) - 1) AS kw,
               a.*,
               row_number() OVER(PARTITION BY category_root_id ORDER BY dbms_random.value DESC) AS rw
          FROM iquery.lpx_kw_tmp02_bak2  a
         WHERE length(keywords) - length(REPLACE(keywords, ' ', '')) > 2
       )
 WHERE rw < 100 + 1
   AND length(kw) > 5
 ;
 
 SQL> select count(1) from iquery.lpx_kw_test;


  COUNT(1)
----------
      1577

 

--根據行業和大洲進行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select kw, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE a.keywords LIKE '%'||ref_cursor.kw||'%'
    AND a.category_root_id = ref_cursor.category_root_id   --根級行業
    AND a.regin_name = ref_cursor.regin_name               --買家所在大洲
  GROUP BY ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/



--無全文索引的性能
total records:1577
total time(ms):35000
avg time(ms):22.19403931515535827520608750792644261256

 

 

 

 

--創建全文索引
grant ctxapp to etl;
exec ctx_ddl.create_preference('lpxuan_kw','BASIC_LEXER');
--exec ctx_ddl.drop_preference('lpxuan_kw');
CREATE INDEX idx_kw_keyw on iquery.lpx_kw_tmp02_bak2(keywords) INDEXTYPE IS CTXSYS.CONTEXT parameters('lexer lpxuan_kw')
LOCAL (partition p1, partition p2, partition p3, partition p4, partition p5,partition p6, partition p7, partition p8, partition p9, partition p10,
       partition p11, partition p12, partition p13, partition p14, partition p15,partition p16, partition p17, partition p18, partition p19, partition p20,
       partition p21, partition p22, partition p23, partition p24, partition p25,partition p26, partition p27, partition p28, partition p29, partition p30,
       partition p31, partition p32, partition p33, partition p34, partition p35,partition p36, partition p37, partition p38, partition p39, partition p40,
       partition p41, partition p42, partition p43);

--eg:
/*
create index html2_idx on HTML2(newsdescription) indextype is ctxsys.context local
(partition indx parameters('lexer my_lexer'),partition indx01 tablespace
users01  parameters('lexer my_lexer'),partition indx02 tablespace users02 param
eters('lexer my_lexer'),partition indx03 tablespace users03  parameters('lexer m
y_lexer') );
*/


CREATE or replace procedure rel_kw_sync_index as
      begin
      ctx_ddl.sync_index('idx_kw_keyw');
      end;
      /

Procedure created.

Elapsed: 00:00:00.08

VARIABLE jobno number;
BEGIN
  DBMS_JOB.SUBMIT(:jobno,'rel_kw_sync_index();',
  SYSDATE, 'SYSDATE + (1/24/4)');
  commit;
  END;
  /


--根據行業和大洲進行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select REPLACE(REPLACE(kw, '-', '/-'), '&', '/&') AS kw, kw AS kw_bak, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE contains(a.keywords,ref_cursor.kw) > 0             --走全文索引
    AND a.category_root_id = ref_cursor.category_root_id   --根級行業
    AND a.regin_name = ref_cursor.regin_name               --買家所在大洲
  GROUP BY ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/


--有全文索引的性能
total records:1577
total time(ms):9000
avg time(ms):5.70703868103994927076727964489537095751

PL/SQL procedure successfully completed.

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章