我們知道,我們很多時候是需要知道我們的一個程序的運行狀態的,那這個時候就是需要用到監控。這裏,我們使用的監控是 Prometheus ,那我們的這個監控的指標怎麼寫呢,筆者找了點資料,寫了幾個簡單的 Demo。這個 Demo 一定是存在不足的,大家可以評論告知。
引包
import (
. "flag"
"github.com/gin-gonic/gin"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"log"
"math/rand"
"strconv"
"sync"
"time"
)
指標的定義
totalCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "processed_total",
Help: "Total number of jobs processed by the workers",
},
[]string{"worker_id", "type"},
)
http_request_total = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "http_request_total" ,
Namespace: "prometheus_front_server",
Help: "The total number of processed http requests",
})
inflightCounterVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "inflight",
Help: "Number of jobs inflight",
},
[]string{"type"},
)
processingTimeVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "process_time_seconds",
Help: "Amount of time spent processing jobs",
},
[]string{"worker_id", "type"},
)
先來看這三個 vec 的指標:
// track the total number of jobs processed by the worker
totalCounterVec.WithLabelValues(strconv.FormatInt(int64(workerID), 10), job.Type).Inc()
// decrement the inflight tracker
inflightCounterVec.WithLabelValues(job.Type).Dec()
processingTimeVec.WithLabelValues(strconv.FormatInt(int64(workerID), 10), job.Type).Observe(time.Now().Sub(startTime).Seconds())
再來看這個普通的 http_request_total:
func test(ctx *gin.Context){
http_request_total.Inc()
}
現在,我們再來看這個主函數的寫法:
engine := gin.New()
engine.GET("/test" , test)
engine.GET("/metrics" , prometheusHttp)
engine.Run("0.0.0.0:9010")
prometheusHttp 裏面的代碼如下:
// 這裏我是不是很清楚的,因爲我覺得這個 gin 框架應該是可以不這麼做
func prometheusHttp(ctx *gin.Context){
handler := promhttp.Handler()
handler.ServeHTTP(ctx.Writer , ctx.Request)
}
整理
NewCounter
我們利用promauto包提供的 NewCounter 方法定義了一個 Counter 類型的監控指標,只需要填充名字以及幫助信息,該指標就創建完成了。需要注意的是,Counter 類型數據的名字要儘量以 _total 作爲後綴。否則當 Prometheus 與其他系統集成時,可能會出現指標無法識別的問題。每當有請求訪問根目錄時,該指標就會調用 Inc() 方法加一,當然,我們也可以調用 Add()方法累加任意的非負數。
NewGauge
監控累積的請求處理顯然還是不夠的,通常我們還想知道當前正在處理的請求的數量。Prometheus中的Gauge類型數據,與Counter不同,它既能增大也能變小。將正在處理的請求數量定義爲Gauge類型是合適的。e.g:
http_request_in_flight = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "prometheus_front_server" ,
Name: "http_request_in_flight",
Help: "Current number of http requests in flight",
},
)
func test2(ctx *gin.Context){
http_request_in_flight.Inc()
defer http_request_in_flight.Desc()
http_request_total.Inc()
}
Gauge和Counter類型的數據操作起來的差別並不大,唯一的區別是Gauge支持Dec()或者Sub()方法減小指標的值。
NewHistogram
對於一個網絡服務來說,能夠知道它的平均時延是重要的,不過很多時候我們更想知道響應時間的分佈狀況。Prometheus 中的 Histogram 類型就對此類需求提供了很好的支持。e.g:
http_request_duration_seconds = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "prometheus_front_server",
Subsystem: "jobs",
Name: "http_request_duration_seconds",
Help: "Histogram of lantencies for HTTP requests",
ConstLabels: nil,
Buckets: nil,
})
// 測試
http_request_in_flight.Inc()
defer http_request_in_flight.Desc()
http_request_total.Inc()
time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
http_request_duration_seconds.Observe(time.Since(time.Now()).Seconds())
Summary
這個和 Histogram 是一樣的用法
NewCounterVec
不過,有的時候,我們可能希望從更多的特徵維度去衡量一個指標。例如,對於接收到的HTTP請求的數目,我們可能希望知道具體到每個路徑接收到的請求數目。假設當前能夠訪問 / 和 /foo 目錄,顯然定義兩個不同的 Counter,比如 http_request_root_total和 http_request_foo_total,並不是一個很好的方法。一方面擴展性比較差:如果定義更多的訪問路徑就需要創建更多新的監控指標,同時,我們定義的特徵維度往往不止一個,可能我們想知道某個路徑且返回碼爲XXX的請求數目是多少,這種方法就無能爲力了;另一方面,PromQL也無法很好地對這些指標進行聚合分析。
Prometheus對於此類問題的方法是爲指標的每個特徵維度定義一個label,一個label本質上就是一組鍵值對。一個指標可以和多個label相關聯,而一個指標和一組具體的label可以唯一確定一條時間序列。對於上述分別統計每條路徑的請求數目的問題,標準的Prometheus的解決方法如下:e.g:
totalCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "processed_total",
Help: "Total number of jobs processed by the workers",
},
// []string{"path"}
[]string{"worker_id", "type"},
)
這裏,我們是根據這個 work_id , 和 當前的 類型 type 來做的這個 維度:
totalCounterVec.WithLabelValues(workerId, "type").Inc()
後面的幾個 Vec 也是和這個的用法一樣的。
這裏的介紹就這麼多,給一個我測試用的 Demo 的源碼:
package main
import (
. "flag"
"github.com/gin-gonic/gin"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"log"
"math/rand"
"strconv"
"sync"
"time"
)
// Package: awesomeProject
// Version: 1.0
//
// Created by SunYang on 2020/5/15 11:14
var (
types = []string{"emai", "deactivation", "activation", "transaction", "customer_renew", "order_processed"}
workers = 0
totalCounterVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "processed_total",
Help: "Total number of jobs processed by the workers",
},
[]string{"worker_id", "type"},
)
http_request_total = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "http_request_total" ,
Namespace: "prometheus_front_server",
Help: "The total number of processed http requests",
})
http_request_in_flight = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "prometheus_front_server" ,
Name: "http_request_in_flight",
Help: "Current number of http requests in flight",
},
)
http_request_duration_seconds = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "prometheus_front_server",
Subsystem: "jobs",
Name: "http_request_duration_seconds",
Help: "Histogram of lantencies for HTTP requests",
ConstLabels: nil,
Buckets: nil,
})
inflightCounterVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "inflight",
Help: "Number of jobs inflight",
},
[]string{"type"},
)
processingTimeVec = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "worker",
Subsystem: "jobs",
Name: "process_time_seconds",
Help: "Amount of time spent processing jobs",
},
[]string{"worker_id", "type"},
)
)
func init() {
IntVar(&workers, "workers", 10, "Number of workers to use")
}
func getType() string {
return types[rand.Int()%len(types)]
}
type Job struct {
Type string
Sleep time.Duration
}
func main() {
Parse()
// 開始註冊
prometheus.MustRegister(
totalCounterVec,
http_request_total,
http_request_in_flight,
http_request_duration_seconds,
inflightCounterVec,
processingTimeVec)
// create a channel with a 10,000 Job buffer
jobsChannel := make(chan *Job, 10000)
go startJobProcess(jobsChannel)
engine := gin.New()
engine.GET("/test" , test)
engine.GET("/test2" , test2)
engine.GET("/metrics" , prometheusHttp)
engine.Run("0.0.0.0:9010")
}
func prometheusHttp(ctx *gin.Context){
handler := promhttp.Handler()
handler.ServeHTTP(ctx.Writer , ctx.Request)
}
func test(ctx *gin.Context){
http_request_total.Inc()
}
func test2(ctx *gin.Context){
http_request_in_flight.Inc()
defer http_request_in_flight.Desc()
http_request_total.Inc()
time.Sleep(time.Duration(rand.Intn(1000)) * time.Millisecond)
http_request_duration_seconds.Observe(time.Since(time.Now()).Seconds())
}
func startJobProcess(jobs <-chan *Job) {
log.Printf("[INFO] starting %d workers\n", workers)
wait := sync.WaitGroup{}
// notify the sync group we need to wait for 10 goroutines
wait.Add(workers)
// start 10 works
for i := 0; i < workers; i++ {
go func(workerID int) {
// start the worker
startWorker(workerID, jobs)
wait.Done()
}(i)
}
wait.Wait()
}
func startWorker(workerID int, jobs <-chan *Job) {
for {
select {
// read from the job channel
case job := <-jobs:
startTime := time.Now()
// fake processing the request
time.Sleep(job.Sleep)
log.Printf("[%d][%s] Processed job in %0.3f seconds", workerID, job.Type, time.Now().Sub(startTime).Seconds())
// track the total number of jobs processed by the worker
totalCounterVec.WithLabelValues(strconv.FormatInt(int64(workerID), 10), job.Type).Inc()
// decrement the inflight tracker
inflightCounterVec.WithLabelValues(job.Type).Dec()
processingTimeVec.WithLabelValues(strconv.FormatInt(int64(workerID), 10), job.Type).Observe(time.Now().Sub(startTime).Seconds())
}
}
}
func createJobs(jobs chan<- *Job) {
for {
// create a random job
job := makeJob()
// track the job in the inflight tracker
inflightCounterVec.WithLabelValues(job.Type).Inc()
// send the job down the channel
jobs <- job
// don't pile up too quickly
time.Sleep(5 * time.Millisecond)
}
}
func makeJob() *Job {
return &Job{
Type: getType(),
Sleep: time.Duration(rand.Int()%100+10) * time.Millisecond,
}
}