合併mysql 請求
這個需求是在多表合併當成單表的時候存在非常多的請求。在合併前是各個表的壓力。合併後就變成了單個表的壓力。相當於是qps 的累加。如果在應用測緩存了請求,進行合併,那麼tiDB 端的壓力就會小特別多。
設計
mergeProcess
- 爲了合併請求就需要緩存結果。緩存就很熟悉了。
"github.com/patrickmn/go-cache"
- 合併請求那肯定是要有方法去做。
type MergeHandler func(oldData, newData interface{}) interface{}
- 合併完請求最後處理的時候也是一個方法.
type ProcessHandler func(context.Context, interface{}) error
-
合併完做處理的時候,你總不能停止服務吧?那麼我們需要兩個緩存,在一個緩存處理的時候,另一個緩存寫。總是不同的切換。
-
切換的時間點,一般都是兩個維度,一個是時間上限,一個是數據量上限。
綜合以上的考慮,最終的結構體如下:
type MergeProcess struct {
waitMergeMaxTime int64 // 合併等待最大時長
dataMaxSize int64
firstDataCache, secondDataCache *DataCache
//handler ProcessHandler
mergeHandler MergeHandler
currentCache **DataCache
isRotating int32
sync.Mutex
}
這裏把 ProcessHandler 去掉是因爲做了第二次優化,ProcessHandler 不再綁定在指定的worker 上。這裏再次抽象出一組worker 來,在submit 工作後不停的是process。
mergePrcoess 主要的工作是
- 提供dataCache的封裝,客戶端無需關注當前是在哪一個cache 執行操作。插入數據功能是需要提供的。
- rotation 功能,切換 兩個緩存,以及處理數據。
- 在定時檢測時間數據量,到達閾值之後執行rotation 功能。
核心代碼如下:
type MergeHandler func(oldData, newData interface{}) interface{}
type CtxWithInterface struct {
Ctx context.Context
V interface{}
}
func NewCtxWithInterface(ctx context.Context, v interface{}) *CtxWithInterface {
return &CtxWithInterface{
Ctx: ctx,
V: v,
}
}
type MergeProcess struct {
waitMergeMaxTime int64 // 合併等待最大時長
dataMaxSize int64
firstDataCache, secondDataCache *DataCache
//handler ProcessHandler
mergeHandler MergeHandler
currentCache **DataCache
isRotating int32
sync.Mutex
}
func (mp *MergeProcess) String() string {
return fmt.Sprintf("WaitMergeMaxTime=%d DataMaxSize=%d FirstDataMap=%s FirstDataMap=%s isRotating=%d", mp.waitMergeMaxTime,
mp.dataMaxSize, mp.firstDataCache, mp.secondDataCache, mp.isRotating)
}
func (mp *MergeProcess) getCurrentDataCache() *DataCache {
return *mp.currentCache
}
func (mp *MergeProcess) Insert(ctx context.Context, key string, newData interface{}) error {
return mp.getCurrentDataCache().Insert(key, NewCtxWithInterface(ctx, newData), mp.mergeHandler)
}
func (mp *MergeProcess) rotation() {
mp.Lock()
defer mp.Unlock()
startTime := utils.GetTimeStamp()
defer func() {
atomic.CompareAndSwapInt32(&mp.isRotating, 1, 0)
}()
isRotating := atomic.LoadInt32(&mp.isRotating)
if isRotating == 1 {
utils.DebugPrintf("still rotation return")
return
}
// start rotation
atomic.StoreInt32(&mp.isRotating, 1)
currentCache := mp.getCurrentDataCache()
mp.swapCurrentCache()
utils.DebugPrintf("oldcurrentCache=%+v currentCache=%+v same=%t", currentCache, mp.getCurrentDataCache(), currentCache == mp.getCurrentDataCache())
if err := mp.getCurrentDataCache().HandleCacheV2(); err != nil {
logger.LogErrorf("HandleCacheV2 err=%s", err)
}
utils.DebugPrintf("rotation finish !!!!!!!!!!!!!! useTime=%d mp=%s", utils.GetTimeStamp()-startTime, mp)
}
func (mp *MergeProcess) swapCurrentCache() {
currentCache := mp.getCurrentDataCache()
if currentCache == mp.firstDataCache {
mp.currentCache = &mp.secondDataCache
} else {
mp.currentCache = &mp.firstDataCache
}
}
func (mp *MergeProcess) isNeedRotation() bool {
currentSet := mp.getCurrentDataCache()
if currentSet.count >= mp.dataMaxSize {
return true
}
return false
}
func (mp *MergeProcess) Run() {
// check is time merge or size match limit size
maxRunTicker := time.Tick(time.Duration(mp.waitMergeMaxTime) * time.Second)
checkTicker := time.Tick(time.Second)
defer func() {
if err := recover(); err != nil {
stack := utils.Stack(3)
logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
go mp.Run()
}
}()
for {
select {
case <-maxRunTicker:
mp.rotation()
case <-checkTicker:
if isNeed := mp.isNeedRotation(); isNeed {
utils.DebugPrint("isNeedRotation true start rotation")
mp.rotation()
}
}
}
}
func NewMergeProcess(waitMergeTime, dataMaxSize int64, mergeHandler MergeHandler, worker *Worker) *MergeProcess {
firstCache := NewDataCache("firstDataCache", worker)
secondCache := NewDataCache("secondDataCache", worker)
//secondCache := NewDataSet("secondDataCache", processHandler)
mp := &MergeProcess{
waitMergeMaxTime: waitMergeTime,
dataMaxSize: dataMaxSize,
firstDataCache: firstCache,
secondDataCache: secondCache,
isRotating: 0,
//handler: processHandler,
currentCache: &firstCache,
mergeHandler: mergeHandler,
}
go mp.Run()
return mp
}
dataCache
dataCache 功能主要是管理數據,執行合併的操作。以及在rotation大的時候暴露具體方法給上一層使用,處理請求。
- 插入,合併數據。
- 處理數據。
package merge_utils
import (
"errors"
"fmt"
"github.com/patrickmn/go-cache"
"sync/atomic"
)
type DataCache struct {
cache *cache.Cache
localLock *local_lock.LocalLock //
count int64
name string
worker *Worker
}
func (dc *DataCache) String() string {
return fmt.Sprintf("name=%s count=%d", dc.name, dc.count)
}
func (dc *DataCache) HandleCacheV2() error {
/*
should be careful about this func. should block util all job was finish
currently, use total count push and finish size to check.
*/
startTime := utils.GetTimeStamp()
itemCount := dc.cache.ItemCount()
if itemCount == 0 {
utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
return nil
}
allItem := dc.cache.Items()
for _, item := range allItem {
dc.worker.Submit(item.Object)
}
// wait finish
dc.worker.waitUntilFinish()
// clear all
dc.cache.Flush()
dc.worker.clear()
atomic.StoreInt64(&dc.count, 0)
utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
return nil
}
func (dc *DataCache) HandleCache(processHandler ProcessHandler) error {
// todo in this process can not insert data any more. can block? how? Concurrency run?
allItem := dc.cache.Items()
errMsg := ""
for key, item := range allItem {
tmp := item.Object.(CtxWithInterface)
err := processHandler(tmp.Ctx, tmp.V)
if err != nil {
errMsg += fmt.Sprintf("err=%s key=%s value=%s", err, key, item.Object)
}
dc.cache.Delete(key)
}
//utils.DebugPrintf("self=%s", dc)
atomic.StoreInt64(&dc.count, 0)
if errMsg == "" {
return nil
}
return errors.New(errMsg)
}
func (ds *DataCache) Insert(key string, newData interface{}, mergeHandler MergeHandler) error {
// lock this key
ds.localLock.Acquire(key)
defer ds.localLock.Release(key)
utils.DebugPrintf("setName=%s key=%s v=%+v", ds.name, key, newData)
oldData, ok := ds.cache.Get(key)
if ok {
oldTmp := oldData.(*CtxWithInterface)
newTmp := newData.(*CtxWithInterface)
mergeData := mergeHandler(oldTmp.V, newTmp.V)
ds.cache.Set(key, NewCtxWithInterface(newTmp.Ctx, mergeData), cache.NoExpiration)
utils.DebugPrintf("merge Insert self=%s", ds)
return nil
}
atomic.AddInt64(&ds.count, 1)
utils.DebugPrintf("Insert self=%s", ds)
ds.cache.Set(key, newData, cache.NoExpiration)
return nil
}
// use
func NewDataCache(name string, worker *Worker) *DataCache {
m1 := make(map[string]cache.Item)
firstMap := cache.NewFrom(cache.NoExpiration, cache.NoExpiration, m1)
ll := local_lock.NewDefaultLocalLock(name)
//worker := NewDefaultDataSetWorker(handler)
return &DataCache{
count: 0,
cache: firstMap,
localLock: ll,
name: name,
worker: worker,
}
}
worker
dataCache HandleCache 流程是真個dataCache 的核心。如果直接遍歷,那時間肯定是過慢的。這個時候就需要有一個worker 的概念。一個worker 組併發的去消費緩存裏面的數據。worker 的核心功能點也不多。包含以下幾點:
- processHandler 處理函數。
- Submit,開放提交任務。
- Notify chan 用來通知這一組任務執行完畢。
- totalSize, finishSize 來表示任務總大小。以及已完成的數據量大小。 相等時便觸發通知。
type Worker struct {
workerNumber int64 // go routine number will not quit
processDataChan chan *CtxWithInterface // data chan
handler ProcessHandler
chanSize int64
totalSize int64
finishSize int64
Notify chan struct{}
}
相關實現:
package merge_utils
import (
"context"
"fmt"
"sync/atomic"
"time"
)
//var debug = true
type ProcessHandler func(context.Context, interface{}) error
type Worker struct {
workerNumber int64 // go routine number will not quit
processDataChan chan *CtxWithInterface // data chan
handler ProcessHandler
chanSize int64
totalSize int64
finishSize int64
Notify chan struct{}
}
func NewDataSetWorker(workerNumber, chanSize int64, handler ProcessHandler) *Worker {
worker := &Worker{
workerNumber: workerNumber,
chanSize: chanSize,
handler: handler,
Notify: make(chan struct{}),
}
worker.finishSize = 0
worker.totalSize = 0
worker.processDataChan = make(chan *CtxWithInterface, chanSize)
// run here
worker.Run()
return worker
}
func NewDefaultDataSetWorker(handler ProcessHandler) *Worker {
return NewDataSetWorker(10, 100, handler)
}
func (w *Worker) String() string {
return fmt.Sprintf("workerNumber=%d chanSize=%d handler=%s totalSize=%d finishSize=%d", w.workerNumber, w.chanSize,
utils.GetFuncName(w.handler), w.totalSize, w.finishSize)
}
func (w *Worker) count() {
atomic.AddInt64(&w.finishSize, 1)
if atomic.LoadInt64(&w.finishSize) == w.totalSize {
w.notify()
}
}
func (w *Worker) notify() {
utils.DebugPrintf("r34r3 rotation")
w.Notify <- struct{}{}
}
func (w *Worker) waitUntilFinish() {
<-w.Notify
}
func (w *Worker) clear() {
atomic.StoreInt64(&w.totalSize, 0)
atomic.StoreInt64(&w.finishSize, 0)
}
func (w *Worker) run() {
// worker can not be done
defer func() {
if err := recover(); err != nil {
fmt.Printf("err=%+v", err)
stack := utils.Stack(3)
logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
w.count()
go w.run()
}
}()
for {
select {
case data := <-w.processDataChan:
// handler can not painc show handle
utils.DebugPrintf("processDataChan w=%s", w)
ctx, v := data.Ctx, data.V
if err := w.handler(ctx, v); err != nil {
logger.LogErrorf("err=%s data=%+v", err, data)
}
// after done count down
w.count()
utils.DebugPrintf("w=%s", w)
}
}
}
func (w *Worker) Submit(data interface{}) {
atomic.AddInt64(&w.totalSize, 1)
w.processDataChan <- data.(*CtxWithInterface)
}
func (w *Worker) Run() {
for i := int64(0); i < w.workerNumber; i++ {
go w.run()
}
}