合併mysql 請求

合併mysql 請求

這個需求是在多表合併當成單表的時候存在非常多的請求。在合併前是各個表的壓力。合併後就變成了單個表的壓力。相當於是qps 的累加。如果在應用測緩存了請求,進行合併,那麼tiDB 端的壓力就會小特別多。

設計

mergeProcess

  • 爲了合併請求就需要緩存結果。緩存就很熟悉了。
	"github.com/patrickmn/go-cache"
  • 合併請求那肯定是要有方法去做。
type MergeHandler func(oldData, newData interface{}) interface{}
  • 合併完請求最後處理的時候也是一個方法.
type ProcessHandler func(context.Context, interface{}) error
  • 合併完做處理的時候,你總不能停止服務吧?那麼我們需要兩個緩存,在一個緩存處理的時候,另一個緩存寫。總是不同的切換。

  • 切換的時間點,一般都是兩個維度,一個是時間上限,一個是數據量上限。

綜合以上的考慮,最終的結構體如下:

type MergeProcess struct {
	waitMergeMaxTime                int64 // 合併等待最大時長
	dataMaxSize                     int64
	firstDataCache, secondDataCache *DataCache
	//handler                     ProcessHandler
	mergeHandler MergeHandler
	currentCache **DataCache
	isRotating   int32
	sync.Mutex
}

這裏把 ProcessHandler 去掉是因爲做了第二次優化,ProcessHandler 不再綁定在指定的worker 上。這裏再次抽象出一組worker 來,在submit 工作後不停的是process。

mergePrcoess 主要的工作是

  1. 提供dataCache的封裝,客戶端無需關注當前是在哪一個cache 執行操作。插入數據功能是需要提供的。
  2. rotation 功能,切換 兩個緩存,以及處理數據。
  3. 在定時檢測時間數據量,到達閾值之後執行rotation 功能。

核心代碼如下:

type MergeHandler func(oldData, newData interface{}) interface{}

type CtxWithInterface struct {
	Ctx context.Context
	V   interface{}
}

func NewCtxWithInterface(ctx context.Context, v interface{}) *CtxWithInterface {
	return &CtxWithInterface{
		Ctx: ctx,
		V:   v,
	}
}

type MergeProcess struct {
	waitMergeMaxTime                int64 // 合併等待最大時長
	dataMaxSize                     int64
	firstDataCache, secondDataCache *DataCache
	//handler                     ProcessHandler
	mergeHandler MergeHandler
	currentCache **DataCache
	isRotating   int32
	sync.Mutex
}
func (mp *MergeProcess) String() string {
	return fmt.Sprintf("WaitMergeMaxTime=%d DataMaxSize=%d FirstDataMap=%s FirstDataMap=%s isRotating=%d", mp.waitMergeMaxTime,
		mp.dataMaxSize, mp.firstDataCache, mp.secondDataCache, mp.isRotating)
}

func (mp *MergeProcess) getCurrentDataCache() *DataCache {
	return *mp.currentCache
}

func (mp *MergeProcess) Insert(ctx context.Context, key string, newData interface{}) error {
	return mp.getCurrentDataCache().Insert(key, NewCtxWithInterface(ctx, newData), mp.mergeHandler)
}

func (mp *MergeProcess) rotation() {
	mp.Lock()
	defer mp.Unlock()

	startTime := utils.GetTimeStamp()
	defer func() {
		atomic.CompareAndSwapInt32(&mp.isRotating, 1, 0)
	}()

	isRotating := atomic.LoadInt32(&mp.isRotating)
	if isRotating == 1 {
		utils.DebugPrintf("still rotation return")
		return
	}

	// start rotation
	atomic.StoreInt32(&mp.isRotating, 1)
	currentCache := mp.getCurrentDataCache()
	mp.swapCurrentCache()
	utils.DebugPrintf("oldcurrentCache=%+v currentCache=%+v same=%t", currentCache, mp.getCurrentDataCache(), currentCache == mp.getCurrentDataCache())

	if err := mp.getCurrentDataCache().HandleCacheV2(); err != nil {
		logger.LogErrorf("HandleCacheV2 err=%s", err)
	}
	utils.DebugPrintf("rotation finish !!!!!!!!!!!!!! useTime=%d mp=%s", utils.GetTimeStamp()-startTime, mp)

}

func (mp *MergeProcess) swapCurrentCache() {
	currentCache := mp.getCurrentDataCache()
	if currentCache == mp.firstDataCache {
		mp.currentCache = &mp.secondDataCache
	} else {
		mp.currentCache = &mp.firstDataCache
	}
}

func (mp *MergeProcess) isNeedRotation() bool {

	currentSet := mp.getCurrentDataCache()
	if currentSet.count >= mp.dataMaxSize {
		return true
	}
	return false
}

func (mp *MergeProcess) Run() {
	// check is time merge or size match limit size
	maxRunTicker := time.Tick(time.Duration(mp.waitMergeMaxTime) * time.Second)
	checkTicker := time.Tick(time.Second)

	defer func() {
		if err := recover(); err != nil {
			stack := utils.Stack(3)
			logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
			go mp.Run()
		}
	}()

	for {
		select {
		case <-maxRunTicker:
			mp.rotation()

		case <-checkTicker:
			if isNeed := mp.isNeedRotation(); isNeed {
				utils.DebugPrint("isNeedRotation true start rotation")
				mp.rotation()
			}
		}
	}
}

func NewMergeProcess(waitMergeTime, dataMaxSize int64, mergeHandler MergeHandler, worker *Worker) *MergeProcess {

	firstCache := NewDataCache("firstDataCache", worker)
	secondCache := NewDataCache("secondDataCache", worker)
	//secondCache := NewDataSet("secondDataCache", processHandler)
	mp := &MergeProcess{
		waitMergeMaxTime: waitMergeTime,
		dataMaxSize:      dataMaxSize,
		firstDataCache:   firstCache,
		secondDataCache:  secondCache,
		isRotating:       0,
		//handler:          processHandler,
		currentCache: &firstCache,
		mergeHandler: mergeHandler,
	}

	go mp.Run()

	return mp
}

dataCache

dataCache 功能主要是管理數據,執行合併的操作。以及在rotation大的時候暴露具體方法給上一層使用,處理請求。

  • 插入,合併數據。
  • 處理數據。
package merge_utils

import (
	"errors"
	"fmt"
	"github.com/patrickmn/go-cache"
	"sync/atomic"
)

type DataCache struct {
	cache     *cache.Cache
	localLock *local_lock.LocalLock //
	count     int64
	name      string
	worker *Worker
}

func (dc *DataCache) String() string {
	return fmt.Sprintf("name=%s count=%d", dc.name, dc.count)
}

func (dc *DataCache) HandleCacheV2() error {
	/*
		should be careful about this func. should block util all job was finish
		currently, use total count push and finish size to check.
	*/

	startTime := utils.GetTimeStamp()
	itemCount := dc.cache.ItemCount()
	if itemCount == 0 {
		utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
		return nil
	}
	allItem := dc.cache.Items()

	for _, item := range allItem {
		dc.worker.Submit(item.Object)
	}
	// wait finish
	dc.worker.waitUntilFinish()
	// clear all
	dc.cache.Flush()
	dc.worker.clear()
	atomic.StoreInt64(&dc.count, 0)
	utils.DebugPrintf("HandleCacheV2 use_time=%d", utils.GetTimeStamp()-startTime)
	return nil
}

func (dc *DataCache) HandleCache(processHandler ProcessHandler) error {
	// todo in this process can not insert data any more. can block? how? Concurrency run?
	allItem := dc.cache.Items()
	errMsg := ""
	for key, item := range allItem {
		tmp := item.Object.(CtxWithInterface)
		err := processHandler(tmp.Ctx, tmp.V)
		if err != nil {
			errMsg += fmt.Sprintf("err=%s key=%s value=%s", err, key, item.Object)
		}
		dc.cache.Delete(key)
	}
	//utils.DebugPrintf("self=%s", dc)
	atomic.StoreInt64(&dc.count, 0)

	if errMsg == "" {
		return nil
	}

	return errors.New(errMsg)
}

func (ds *DataCache) Insert(key string, newData interface{}, mergeHandler MergeHandler) error {
	// lock this key
	ds.localLock.Acquire(key)
	defer ds.localLock.Release(key)

	utils.DebugPrintf("setName=%s key=%s v=%+v", ds.name, key, newData)

	oldData, ok := ds.cache.Get(key)
	if ok {
		oldTmp := oldData.(*CtxWithInterface)
		newTmp := newData.(*CtxWithInterface)
		mergeData := mergeHandler(oldTmp.V, newTmp.V)
		ds.cache.Set(key, NewCtxWithInterface(newTmp.Ctx, mergeData), cache.NoExpiration)
		utils.DebugPrintf("merge Insert self=%s", ds)

		return nil
	}
	atomic.AddInt64(&ds.count, 1)
	utils.DebugPrintf("Insert self=%s", ds)

	ds.cache.Set(key, newData, cache.NoExpiration)
	return nil
}

// use
func NewDataCache(name string, worker *Worker) *DataCache {
	m1 := make(map[string]cache.Item)
	firstMap := cache.NewFrom(cache.NoExpiration, cache.NoExpiration, m1)
	ll := local_lock.NewDefaultLocalLock(name)
	//worker := NewDefaultDataSetWorker(handler)
	return &DataCache{
		count:     0,
		cache:     firstMap,
		localLock: ll,
		name:      name,
		worker:    worker,
	}
}

worker

dataCache HandleCache 流程是真個dataCache 的核心。如果直接遍歷,那時間肯定是過慢的。這個時候就需要有一個worker 的概念。一個worker 組併發的去消費緩存裏面的數據。worker 的核心功能點也不多。包含以下幾點:

  1. processHandler 處理函數。
  2. Submit,開放提交任務。
  3. Notify chan 用來通知這一組任務執行完畢。
  4. totalSize, finishSize 來表示任務總大小。以及已完成的數據量大小。 相等時便觸發通知。
type Worker struct {
	workerNumber    int64                  // go routine number will not quit
	processDataChan chan *CtxWithInterface // data chan
	handler         ProcessHandler
	chanSize        int64
	totalSize       int64
	finishSize      int64
	Notify          chan struct{}
}

相關實現:

package merge_utils

import (
	"context"
	"fmt"
	"sync/atomic"
	"time"
)

//var debug = true
type ProcessHandler func(context.Context, interface{}) error

type Worker struct {
	workerNumber    int64                  // go routine number will not quit
	processDataChan chan *CtxWithInterface // data chan
	handler         ProcessHandler
	chanSize        int64
	totalSize       int64
	finishSize      int64
	Notify          chan struct{}
}

func NewDataSetWorker(workerNumber, chanSize int64, handler ProcessHandler) *Worker {
	worker := &Worker{
		workerNumber: workerNumber,
		chanSize:     chanSize,
		handler:      handler,
		Notify:       make(chan struct{}),
	}
	worker.finishSize = 0
	worker.totalSize = 0
	worker.processDataChan = make(chan *CtxWithInterface, chanSize)
	// run here
	worker.Run()
	return worker
}

func NewDefaultDataSetWorker(handler ProcessHandler) *Worker {
	return NewDataSetWorker(10, 100, handler)
}

func (w *Worker) String() string {
	return fmt.Sprintf("workerNumber=%d chanSize=%d handler=%s totalSize=%d finishSize=%d", w.workerNumber, w.chanSize,
		utils.GetFuncName(w.handler), w.totalSize, w.finishSize)
}

func (w *Worker) count() {
	atomic.AddInt64(&w.finishSize, 1)
	if atomic.LoadInt64(&w.finishSize) == w.totalSize {
		w.notify()
	}
}
func (w *Worker) notify() {
	utils.DebugPrintf("r34r3 rotation")

	w.Notify <- struct{}{}
}

func (w *Worker) waitUntilFinish() {
	<-w.Notify
}

func (w *Worker) clear() {
	atomic.StoreInt64(&w.totalSize, 0)
	atomic.StoreInt64(&w.finishSize, 0)
}

func (w *Worker) run() {
	// worker can not be done
	defer func() {
		if err := recover(); err != nil {
			fmt.Printf("err=%+v", err)
			stack := utils.Stack(3)
			logger.LogErrorf("[Recovery] %s %s panic recovered:\n%s\n%s", time.Now().Format("2006/01/02 - 15:04:05"), err, stack, false)
			w.count()
			go w.run()
		}
	}()

	for {
		select {
		case data := <-w.processDataChan:
			// handler can not painc show handle
			utils.DebugPrintf("processDataChan w=%s", w)
			ctx, v := data.Ctx, data.V
			if err := w.handler(ctx, v); err != nil {
				logger.LogErrorf("err=%s data=%+v", err, data)
			}
			// after done count down
			w.count()
			utils.DebugPrintf("w=%s", w)

		}
	}
}

func (w *Worker) Submit(data interface{}) {

	atomic.AddInt64(&w.totalSize, 1)
	w.processDataChan <- data.(*CtxWithInterface)
}

func (w *Worker) Run() {

	for i := int64(0); i < w.workerNumber; i++ {
		go w.run()
	}
}

發佈了96 篇原創文章 · 獲贊 26 · 訪問量 6萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章