MongoDB索引優化

1. 一圖看懂索引原理

[外鏈圖片轉存失敗,源站可能有防盜鏈機制,建議將圖片保存下來直接上傳(img-zmNoONVY-1576482903566)(file:///C:/Users/wpzhou/AppData/Local/YNote/data/[email protected]/ef5fa05d07374631971c5ed2fe1c24fc/clipboard.png)]

# 初始化數據
> db.comment.insertMany(
... [
... {'timestamp': 3, 'anonymous': true, 'rating': 1},
... {'timestamp': 2, 'anonymous': false, 'rating': 5},
... {'timestamp': 1, 'anonymous': false, 'rating': 3},
... {'timestamp': 4, 'anonymous': false, 'rating': 2}
... ])
> db.comment.createIndex({'anonymous':1, 'rating': 1})
{
	"createdCollectionAutomatically" : false,
	"numIndexesBefore" : 1,
	"numIndexesAfter" : 2,
	"ok" : 1
}
> db.comment.getIndexes()
[
	{
		"v" : 2,
		"key" : {
			"_id" : 1
		},
		"name" : "_id_",
		"ns" : "test.comment"
	},
	{
		"v" : 2,
		"key" : {
			"anonymous" : 1,
			"rating" : 1
		},
		"name" : "anonymous_1_rating_1",
		"ns" : "test.comment"
	}
]

2. 查看執行計劃

  • 使用explain 查看執行計劃,傳入參數可以獲取具體的執行過程,比如DocsExamined、KeysExamined

    • executionStats :捕獲獲勝計劃執行的相關信息。

    • allPlansExecution :捕獲選擇執行計劃期間執行的相關信息

  • 字段解釋

    • 見[MongoDB 執行計劃字段解釋]
> db.comment.find(  
... { timestamp: { $gte: 2, $lte: 4 }, anonymous: false }  
... ).sort( { rating: -1 }  
... ).explain()
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.comment",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"$and" : [
				{
					"anonymous" : {
						"$eq" : false
					}
				},
				{
					"timestamp" : {
						"$lte" : 4
					}
				},
				{
					"timestamp" : {
						"$gte" : 2
					}
				}
			]
		},
		"winningPlan" : {
			"stage" : "FETCH",
			"filter" : {
				"$and" : [
					{
						"timestamp" : {
							"$lte" : 4
						}
					},
					{
						"timestamp" : {
							"$gte" : 2
						}
					}
				]
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"anonymous" : 1,
					"rating" : 1
				},
				"indexName" : "anonymous_1_rating_1",
				"isMultiKey" : false,
				"multiKeyPaths" : {
					"anonymous" : [ ],
					"rating" : [ ]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "backward",
				"indexBounds" : {
					"anonymous" : [
						"[false, false]"
					],
					"rating" : [
						"[MaxKey, MinKey]"
					]
				}
			}
		},
		"rejectedPlans" : [ ]
	},
	"serverInfo" : {
		"host" : "150501d113",
		"port" : 27017,
		"version" : "3.6.1",
		"gitVersion" : "025d4f4fe61efd1fb6f0005be20cb45a004093d1"
	},
	"ok" : 1
}

# 這個只能看到大概的執行計劃,比如使用的哪個索引; 如果想要看到具體的執行過程,可使用explain的參數
> db.comment.find( { timestamp: { $gte: 2, $lte: 4 }, anonymous: false }   ).sort( { rating: -1 }   ).explain("executionStats")  
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.comment",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"$and" : [
				{
					"anonymous" : {
						"$eq" : false
					}
				},
				{
					"timestamp" : {
						"$lte" : 4
					}
				},
				{
					"timestamp" : {
						"$gte" : 2
					}
				}
			]
		},
		"winningPlan" : {
			"stage" : "FETCH",
			"filter" : {
				"$and" : [
					{
						"timestamp" : {
							"$lte" : 4
						}
					},
					{
						"timestamp" : {
							"$gte" : 2
						}
					}
				]
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"anonymous" : 1,
					"rating" : 1
				},
				"indexName" : "anonymous_1_rating_1",
				"isMultiKey" : false,
				"multiKeyPaths" : {
					"anonymous" : [ ],
					"rating" : [ ]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "backward",
				"indexBounds" : {
					"anonymous" : [
						"[false, false]"
					],
					"rating" : [
						"[MaxKey, MinKey]"
					]
				}
			}
		},
		"rejectedPlans" : [ ]
	},
	"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 2,
		"executionTimeMillis" : 1,
		"totalKeysExamined" : 3,
		"totalDocsExamined" : 3,
		"executionStages" : {
			"stage" : "FETCH",
			"filter" : {
				"$and" : [
					{
						"timestamp" : {
							"$lte" : 4
						}
					},
					{
						"timestamp" : {
							"$gte" : 2
						}
					}
				]
			},
			"nReturned" : 2,
			"executionTimeMillisEstimate" : 0,
			"works" : 4,
			"advanced" : 2,
			"needTime" : 1,
			"needYield" : 0,
			"saveState" : 0,
			"restoreState" : 0,
			"isEOF" : 1,
			"invalidates" : 0,
			"docsExamined" : 3,
			"alreadyHasObj" : 0,
			"inputStage" : {
				"stage" : "IXSCAN",
				"nReturned" : 3,
				"executionTimeMillisEstimate" : 0,
				"works" : 4,
				"advanced" : 3,
				"needTime" : 0,
				"needYield" : 0,
				"saveState" : 0,
				"restoreState" : 0,
				"isEOF" : 1,
				"invalidates" : 0,
				"keyPattern" : {
					"anonymous" : 1,
					"rating" : 1
				},
				"indexName" : "anonymous_1_rating_1",
				"isMultiKey" : false,
				"multiKeyPaths" : {
					"anonymous" : [ ],
					"rating" : [ ]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "backward",
				"indexBounds" : {
					"anonymous" : [
						"[false, false]"
					],
					"rating" : [
						"[MaxKey, MinKey]"
					]
				},
				"keysExamined" : 3,
				"seeks" : 1,
				"dupsTested" : 0,
				"dupsDropped" : 0,
				"seenInvalidated" : 0
			}
		}
	},
	"serverInfo" : {
		"host" : "150501d113",
		"port" : 27017,
		"version" : "3.6.1",
		"gitVersion" : "025d4f4fe61efd1fb6f0005be20cb45a004093d1"
	},
	"ok" : 1
}

3. 如何建索引

  • 語法:
> db.comment.createIndex({'anonymous':1, 'rating': 1})
{
	"createdCollectionAutomatically" : false,
	"numIndexesBefore" : 1,
	"numIndexesAfter" : 2,
	"ok" : 1
}
  • 針對使用比較多的查詢建索引

    • 等值測試:在索引中加入所有需要做等值測試的字段,任意順序。
    • 排序字段(多排序字段的升/降序問題 ): 根據查詢的順序有序的向索引中添加字段。
    • 範圍過濾:以字段的基數(Collection中字段的不同值的數量)從低到高的向索引中添加範圍過濾字段。

3. 索引的優化

最佳索引的原則

  1. 包含查詢中所有可以做過濾及需要排序的字段
  2. 任何用於範圍掃描的字段以及用於排序的字段都必須在做等值查詢的字段之後

通過分析執行計劃,調整索引,從而讓查詢命中索引;

最小化docsExamined、keysExamined

4. 索引的選擇機制

  1. 選擇出最優索引
  2. 假設不存在最優索引,會做嘗試,然後選擇表現最好的索引;存在多條索引的情況下,MongoDB首選nscanned值最低的索引。
  3. 如果存在多個不同的最佳索引,mongo 將隨機選擇
  4. 最後優化器會記住類似查詢的選擇(直到大規模文件變動或者索引變動)

5. 優化實踐

country_themes 優化

# 常用的慢查詢
{"theme.themeId": theme_id}
{"theme.themeId": {"$in": theme_id_list}}


cond = {
"country": "GLOBAL", 
"theme.status": 2,
"theme.category": {"$regex": category}
}
rs = list(anthcraft.country_themes.find(cond, {"theme": 1}).sort("theme.packageTime", -1)
    
cond = {
    "country": 'GLOBAL',
    "theme.onGooglePlay": True,
    "theme.jumpToGooglePlay": True,
    "theme.packageName": {"$exists": True},
    "theme.status": 2,
    "theme.category": {"$regex": theme_category}
}
rs = list(anthcraft.country_themes.find(cond, {"theme": 1}).sort("theme.packageTime", -1)

# 優化
  db.country_themes.createIndex({ 'theme.themeId': 1}, { background: true } ) 
  # 查詢速度有很多提升,慢查詢log已經看不到theme.themeId相關的查詢了
  
  db.country_themes.createIndex({ 'country': 1, "theme.status":1, "theme.category":1}, { background: true }) 
  
  db.country_themes.createIndex({ 'theme.packageTime': -1}, { background: true } ) 
  # 在建theme.packageTime索引之前,走country相關的索引;但是建了該索引之後,直接用theme.packageTime索引了,並沒有用country索引; 所以將theme.packageTime 刪了
  # 但是查詢速度並沒有太大的提升,跟加索引之前差不多;但是執行計劃有變
  #COLLSCAN keysExamined:0 docsExamined:41535
  #IXSCAN { country: 1.0, theme.status: 1.0, theme.category: 1.0 } keysExamined:18914 docsExamined:16603
  
  db.country_themes.find({
      "country": 'GLOBAL',
      "theme.onGooglePlay": true,
      "theme.jumpToGooglePlay": true,
      "theme.packageName": {"$exists": true},
      "theme.status": 2,
      "theme.category": {"$regex": "5384154ae4b049f321413f11"}
  }).sort({"theme.packageTime": -1}).explain()
  
  > db.country_themes.count({
  ...     "country": 'GLOBAL',
  ...     "theme.status": 2
  ... })
  18913
  > db.country_themes.count()
  41670
  > db.country_themes.count({"country": 'GLOBAL'})
  33744
  
  # 懷疑$regex 並沒有走索引,資料顯示只有前綴型的正則表達式命中索引
  db.country_themes.find({
      "country": 'GLOBAL',
      "theme.onGooglePlay": true,
      "theme.jumpToGooglePlay": true,
      "theme.packageName": {"$exists": true},
      "theme.status": 2,
      "theme.category": {"$regex": "5384154ae4b049f321413f11"}
  })

wallpapers mongo 優化

# 常用的慢查詢
{ $query: { status: { $in: [ 2, 3 ] }, category: /.*5c07392be4b061d99da5ddad.*/i, wallpaperId: { $lt: 21550 } }, $orderby: { weight: 1, wallpaperId: -1 } }
{ $query: { status: { $in: [ 2, 3 ] } , category: /.*530ed1c0e4b0ce13ef9146e3.*/i }, $orderby: { weight: 1, wallpaperId: -1 } }
{ $query: { status: { $in: [ 2, 3 ] }, category: /^(?!.*(5a6989a8e4b0cf38e952d122|5b5fdb76e4b0cac798f75633)).*$/, wallpaperId: { $lt: 19055 } }, $orderby: { weight: 1, wallpaperId: -1 } }

# 根據常用查詢,創建索引
db.wallpapers.createIndex({ 'status': 1, 'wallpaperId': 1,'category': 1}) 

# status、wallpaperId使用索引,category未使用;只有前綴型的正則表達式命中索引
db.wallpapers.find({'status': { $in: [ 2, 3 ]}, 'wallpaperId': { $lt: 21550 }, 'category': {"$regex": ".*530ed1c0e4b0ce13ef9146e3.*"}}).sort({'weight': 1, 'wallpaperId': -1 }).explain()

# status、wallpaperId使用索引,category未使用;當前的正則不符合前綴型的正則表達式
db.wallpapers.find({'status': { $in: [ 2, 3 ]}, 'wallpaperId': { $lt: 21550 }, 'category': {"$regex": "^(?!.*(5a6989a8e4b0cf38e952d122|5b5fdb76e4b0cac798f75633)).*$"}}).sort({'weight': 1, 'wallpaperId': -1 }).explain()

# status、wallpaperId、category使用索引
db.wallpapers.find({'status': { $in: [ 2, 3 ]}, 'wallpaperId': { $lt: 21550 }, 'category': {"$regex": "^5a6989a8e4b0cf38e952d122"}}).sort({'weight': 1, 'wallpaperId': -1 }).explain()
# 未使用索引
db.wallpapers.find({'category':  "5a6989a8e4b0cf38e952d122"}).sort({'weight': 1, 'wallpaperId': -1 }).explain()

# status、category使用索引; 跳過wallpaperId任然可以用索引,這點很棒,之前以爲不會用呢
db.wallpapers.find({'status': { $in: [ 2, 3 ]}, 'category': {"$regex": "^5a6989a8e4b0cf38e952d122"}}).sort({'weight': 1, 'wallpaperId': -1 }).explain()

# 但是常用查詢中的category都是非前綴型的正則,所以走不了索引; 對category 不建索引
db.wallpapers.dropIndex('status_1_wallpaperId_1_category_1')
db.wallpapers.createIndex({ 'status': 1, 'wallpaperId': 1}, { background: true }) 

# 然而,速度並沒有提升很多,基本沒有太多的變化
# 因爲原來就有索引 wallpaperId,然後status $in: [ 2, 3 ] 的記錄條數20171/23986=84%
# 所以新建的索引{ 'status': 1, 'wallpaperId': 1} 並不會有明顯的性能提升

# 總結: 下次見索引的時候,先分析一下目標列上的數據分佈情況;太集中分佈的,不推薦建索引

themes mongo 優化

# 慢查詢
{ $query: { themeId: { $lt: 194369 }, status: { $in: [ 2, 3 ] }, channels.10011: { $exists: false }, isShared: { $ne: 0 }, point: 0 }, $orderby: { themeId: -1 } }
{ status: { $in: [ 2, 3 ] }, isShared: { $ne: 0 }, tag: "taste", category: /^(?!.*(5384154ae4b049f321413f11)).*$/, $and: [ {}, { $or: [ { recommendTime: { $lt: new Date(1499988600000) } }, { recommendTime: new Date(1499988600000), themeId: { $lt: 3085871 } } ] } ] }, $orderby: { recommendTime: -1, themeId: -1 } }
{ themeId: { $ne: 438875 }, status: { $in: [ 2, 3 ] }, isShared: { $ne: 0 }, channels.10010: { $exists: false }, $and: [ { countrys.IN: { $exists: true } }, { $or: [ { category: /.*530ed96ee4b0ce13ef9146f6.*/i }, { category: /.*530ed94be4b0ce13ef9146f2.*/i } ] } ] }, $orderby: { globalRankScale: -1 }

# 看看各篩選條件記錄分佈
> db.themes.count({'status': { $in: [ 2, 3 ] }, 'isShared': { $ne: 0 }})
44336
> db.themes.count({'status': { $in: [ 2, 3 ] }})
44393
> db.themes.count({'status': { $in: [ 2, 3 ] }, 'themeId': { $lt: 365746 } })
6438
> db.themes.count({'themeId': { $lt: 365746 } })
336185

# 查詢條件的分佈
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep -v "IXSCAN { status" -c 
0
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 -c
14292
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep status  -c
14292
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep tag  -c
14292
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep -w  tag  -c
3808
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep  countrys  -c
143
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep  category   -c
4127
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep  themeId   -c
14292
wpzhou@150501d113:~/test/mongo_log$ grep anthcraft.themes mongod.log0 | grep  recommendTime  -c
3806

# 基本每個查詢都涉及到status、themeId; 而且按記錄分佈來說,status、themeId建索引比較有用

# 但是遍歷log,發現基本所有的慢查詢用的都是status 的索引
# 有兩種,設計到category的理論上應該走category_1_status_1_isShared_1, 但是走status; 不過按記錄條數分佈,走這兩個索引都差不多,遍歷的記錄條數都爲 db.themes.count({'status': { $in: [ 2, 3 ] }}) 
# 未設計category的,可能走status 或者themeId;從記錄條數分佈看,status更優

# 綜上,建立新索引 {'status':1, 'themeId': -1} themeId 被用來排序比較多$orderby: { themeId: -1 }
db.themes.createIndex({'status':1, 'themeId': -1}, { background: true }) 
--noIndexBuildRetry

# 查看當前正在建索引的操作
db.currentOp(
                {
                  $or: [
                    { op: "command", "query.createIndexes": { $exists: true } },
                    { op: "insert", ns: /\.system\.indexes\b/ }
                  ]
                }
            )
發佈了27 篇原創文章 · 獲贊 3 · 訪問量 9萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章