2013-08-07 星期三 下午
---------------研究聯合索引---------------------
SQL> conn hr/hr
Connected.
SQL> create table test1 as select * from all_objects;
Table created.
SQL> create index ind_id_typ on test1(object_id,object_type); --創建聯合索引
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> create index ind_id on test1(object_id); --創建單字段索引
Index created.
SQL> create index ind_type on test1(object_type); --創建單字段索引
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_id=28;
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 1747753053
--------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 95 | 2 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 1 | 95 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_ID | 1 | | 1 (0)| 00:00:01 |
--------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"=28)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
2 consistent gets
0 physical reads
0 redo size
995 bytes sent via SQL*Net to client
389 bytes received via SQL*Net from client
1 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
0 rows processed
當聯合索引和單字段索引同時存在的話,優先選擇單一索引,因爲掃的葉子塊字節數會少。
SQL> drop index ind_id;
Index dropped.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_id=28;
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 1725600915
------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 95 | 3 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 1 | 95 | 3 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | IND_ID_TYP | 1 | | 2 (0)| 00:00:01 |
------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_ID"=28)
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
2 consistent gets
0 physical reads
0 redo size
995 bytes sent via SQL*Net to client
389 bytes received via SQL*Net from client
1 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
0 rows processed
查詢是走聯合索引的。
如果where條件落在聯合索引的第二列上,是否會走索引?
SQL> select * from test1 where object_type='JOB';
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 3495195170
----------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 2924 | 271K| 101 (0)| 00:00:02 |
| 1 | TABLE ACCESS BY INDEX ROWID| TEST1 | 2924 | 271K| 101 (0)| 00:00:02 |
|* 2 | INDEX RANGE SCAN | IND_TYPE | 2924 | | 9 (0)| 00:00:01 |
----------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("OBJECT_TYPE"='JOB') --因爲有個單字段索引,所以優先選擇。
SQL> drop index ind_type;
Index dropped.
SQL> exec dbms_stats.gather_table_stats(user,'test1',cascade=>true);
PL/SQL procedure successfully completed.
SQL> select * from test1 where object_type='JOB';
no rows selected
Execution Plan
----------------------------------------------------------
Plan hash value: 4122059633
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5 | 475 | 133 (2)| 00:00:02 |
|* 1 | TABLE ACCESS FULL| TEST1 | 5 | 475 | 133 (2)| 00:00:02 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("OBJECT_TYPE"='JOB')
此時沒有用索引:
聯合索引:id + type +rowid,id,type都參與排序,但是type是在ID組內排序的,
大體上來看,type的值是沒有順序的,是散亂的,而且root階段和分支節點存儲的值範圍是按照第一個字段id來的,
索引當where條件中出現type的時候,是用不到聯合索引的。
兩外需要注意的是,聯合索引的第一列要求重複率比較低效果才很明顯。
---------------------------------------------
創建位圖索引應用的案例
create table test(gender not null,location not null,age_group not null,data)
as select decode(ceil(dbms_random.value(0,2)),'1','M','2','F') gender,
ceil(dbms_random.value(1,50)) location,
decode(ceil(dbms_random.value(0,5)),'1','18 and under','2','19-25','3','26-30','4','31-50','5','41 and over') age_group,
rpad('*',20,'*') data from t2;
SQL> select count(1) from test;
COUNT(1)
----------
1000000
需求1:
select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
需求2:
select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under'
需求3:
select count(1) from test where location in(1,10,30);
需求4:
select count(1) from test where age_group='41 and over' and gender='F'
分析怎樣規劃索引能最大限度的滿足上面四個需求的實現?
where條件中過濾字段重複率是很高的,如果用B樹索引做的話:
方案一(建立B樹索引):
1、gender、location、age_group三個字段上作聯合索引
SQL> create index ind_g_l_a on test(gender,location,age_group); 滿足需求1和需求2
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);
PL/SQL procedure successfully completed.
需求1:
SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
這個索引用的比較好的原因是,gender只有兩組,只要過濾了其中的一組,location就是升序排列的,
就可以用索引的。
Execution Plan
----------------------------------------------------------
Plan hash value: 2282520444
--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 23 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | INLIST ITERATOR | | | | | |
|* 3 | INDEX RANGE SCAN| IND_G_L_A | 6080 | 85120 | 23 (0)| 00:00:01 |
--------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR
"LOCATION"=30) AND "AGE_GROUP"='41 and over')
需求2:
SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';
這個索引用的比較好的原因是,gender只有兩組,只要過濾了其中的一組,location就是升序排列的,
就可以用索引的。
Execution Plan
----------------------------------------------------------
Plan hash value: 1740203256
---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 33 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | CONCATENATION | | | | | |
|* 3 | INDEX RANGE SCAN | IND_G_L_A | 2041 | 28574 | 10 (0)| 00:00:01 |
| 4 | INLIST ITERATOR | | | | | |
|* 5 | INDEX RANGE SCAN| IND_G_L_A | 6018 | 84252 | 23 (0)| 00:00:01 |
---------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("GENDER"='F' AND "LOCATION"=22 AND "AGE_GROUP"='18 and
under')
5 - access("GENDER"='M' AND ("LOCATION"=1 OR "LOCATION"=10 OR
"LOCATION"=30) AND "AGE_GROUP"='18 and under')
filter(LNNVL("LOCATION"=22) OR LNNVL("GENDER"='F'))
需求3:
SQL> select count(1) from test where location in(1,10,30); --雖然用了索引,但是不是用的很好,代價比較高。
原因是where條件沒有落在索引的第一個字段上,由於location排列是散亂的,所以全掃了索引。沒有分組。
Execution Plan
----------------------------------------------------------
Plan hash value: 1529956083
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 793 (6)| 00:00:10 |
| 1 | SORT AGGREGATE | | 1 | 3 | | |
|* 2 | INDEX FAST FULL SCAN| IND_G_L_A | 60799 | 178K| 793 (6)| 00:00:10 |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)
需求4:
SQL> select count(1) from test where age_group='41 and over' and gender='F'; --代價相對較高
雖然也分組了,如果where落在在location,肯定用索引的,但是,落在了age_group,此時第三列是散亂的。
Execution Plan
----------------------------------------------------------
Plan hash value: 311740366
------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 11 | 409 (4)| 00:00:05 |
| 1 | SORT AGGREGATE | | 1 | 11 | | |
|* 2 | INDEX SKIP SCAN| IND_G_L_A | 100K| 1074K| 409 (4)| 00:00:05 |
------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("GENDER"='F' AND "AGE_GROUP"='41 and over')
filter("AGE_GROUP"='41 and over')
Statistics
----------------------------------------------------------
1 recursive calls
0 db block gets
450 consistent gets
0 physical reads
0 redo size
413 bytes sent via SQL*Net to client
400 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
1 rows processed
create index ind_g_l on test(gender,location);僅僅能滿足1,2
create index ind_a_l on test(age_group,location);滿足4
create index ind_l_g_a on test(location,gender,age_group); 滿足1,2,3
-------------------------------------------------------
--=方案二:創建位圖索引
SQL> drop index ind_g_l_a;
Index dropped.
SQL> create bitmap index gender_idx on test(gender);
Index created.
SQL> create bitmap index location_idx on test(location);
Index created.
SQL> create bitmap index age_group_idx on test(age_group);
Index created.
SQL> exec dbms_stats.gather_table_stats(user,'test',cascade=>true,estimate_percent=>100);
PL/SQL procedure successfully completed.
需求1:
SQL> select count(1) from test where gender='M' and location in(1,10,30) and age_group='41 and over';
Execution Plan
----------------------------------------------------------
Plan hash value: 320981916
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 53 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | BITMAP CONVERSION COUNT | | 5036 | 70504 | 53 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
| 4 | BITMAP OR | | | | | |
|* 5 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 6 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 7 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 8 | BITMAP INDEX SINGLE VALUE | AGE_GROUP_IDX | | | | |
|* 9 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
-----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
5 - access("LOCATION"=1)
6 - access("LOCATION"=10)
7 - access("LOCATION"=30)
8 - access("AGE_GROUP"='41 and over')
9 - access("GENDER"='M')
需求2:
SQL> select count(1) from test where (gender='M' and location in(1,10,30) or gender='F' and location=22) and age_group='18 and under';
Execution Plan
----------------------------------------------------------
Plan hash value: 809694946
-------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 14 | 68 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 14 | | |
| 2 | BITMAP CONVERSION COUNT | | 7078 | 99092 | 68 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
|* 4 | BITMAP INDEX SINGLE VALUE | AGE_GROUP_IDX | | | | |
| 5 | BITMAP OR | | | | | |
| 6 | BITMAP AND | | | | | |
| 7 | BITMAP OR | | | | | |
|* 8 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 9 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 10 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
|* 11 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
| 12 | BITMAP AND | | | | | |
|* 13 | BITMAP INDEX SINGLE VALUE | LOCATION_IDX | | | | |
|* 14 | BITMAP INDEX SINGLE VALUE | GENDER_IDX | | | | |
-------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("AGE_GROUP"='18 and under')
8 - access("LOCATION"=1)
9 - access("LOCATION"=10)
10 - access("LOCATION"=30)
11 - access("GENDER"='M')
13 - access("LOCATION"=22)
14 - access("GENDER"='F')
需求3:
SQL> select count(1) from test where location in(1,10,30);
Execution Plan
----------------------------------------------------------
Plan hash value: 2259268895
---------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 3 | 13 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 3 | | |
| 2 | INLIST ITERATOR | | | | | |
| 3 | BITMAP CONVERSION COUNT | | 50540 | 148K| 13 (0)| 00:00:01 |
|* 4 | BITMAP INDEX SINGLE VALUE| LOCATION_IDX | | | | |
---------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("LOCATION"=1 OR "LOCATION"=10 OR "LOCATION"=30)
需求4:
SQL> select count(1) from test where age_group='41 and over' and gender='F';
Execution Plan
----------------------------------------------------------
Plan hash value: 2381702022
----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 11 | 40 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 11 | | |
| 2 | BITMAP CONVERSION COUNT | | 99957 | 1073K| 40 (0)| 00:00:01 |
| 3 | BITMAP AND | | | | | |
|* 4 | BITMAP INDEX SINGLE VALUE| AGE_GROUP_IDX | | | | |
|* 5 | BITMAP INDEX SINGLE VALUE| GENDER_IDX | | | | |
----------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
4 - access("AGE_GROUP"='41 and over')
5 - access("GENDER"='F')
**************************************************************
結論:
綜上實驗來看,從全局考慮,創建位圖索引要優於B樹索引。
創建索引的技術思路:
1、第一個方面:技術場景
2、第二個方面:業務需求(轉化爲實現的SQL)場景
3、綜合考慮方案的選擇,既要總體上滿足現有的需求,而且要兼顧需求的擴展,
綜合考慮整體的性能,要顧全大局。
-----------------------------------