一、監控選型
選擇用https://github.com/grafana/kubernetes-app進行監控kubernetes。
支持對k8s多集羣監控,操作簡單。
Requirements:
- Currently only has support for Prometheus
- For automatic deployment of the exporters, then Kubernetes 1.6 or higher is required.
- Grafana 5.0.0+
1)kube-state-metrics插件
2)node-exporter插件
3)prometheus插件
二、佈署
1、namspace: kube-system
2、rbac:
serviceaccount: prometheus
clusterrole: prometheus
clusterrolebinding: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: ["extensions", "apps"]
resources: ["deployments"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch"]
- apiGroups:
- extensions
resources:
- ingresses
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-system
---
3、deployment: prometheus
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-deployment
namespace: kube-system
#annotations:
# used to scrape app's metrics which deployed in pod
# prometheus.io/scrape: 'true'
# prometheus scrape path, default /metrics
# prometheus.io/path: '/metrics'
# prometheus.io/port relvant port
spec:
replicas: 1
selector:
matchLabels:
app: prometheus-server
template:
metadata:
labels:
app: prometheus-server
spec:
securityContext:
runAsUser: 0
containers:
- name: prometheus
image: prom/prometheus
args:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus/"
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- name: gluster-volume
mountPath: /prometheus
- name: config-volume
mountPath: /etc/prometheus
serviceAccountName: prometheus
volumes:
- name: gluster-volume
emptyDir: {}
#persistentVolumeClaim:
# claimName: gluster-prometheus
- name: config-volume
configMap:
name: prometheus-server-conf
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: kube-system
labels:
k8s-app: prometheus
spec:
selector:
app: prometheus-server
type: NodePort
ports:
- name: web
port: 9090
targetPort: 9090
4、deployment: kube-state-metrics
kind: Deployment
apiVersion: apps/v1
metadata:
name: kube-state-metrics
namespace: kube-system
selfLink: /apis/apps/v1/namespaces/kube-system/deployments/kube-state-metrics
uid: dc0e1a14-17a9-11e9-bbbc-fa163eb8f89d
resourceVersion: '10913395'
generation: 1
creationTimestamp: '2019-01-14T03:09:52Z'
labels:
grafanak8sapp: 'true'
k8s-app: kube-state-metrics
annotations:
deployment.kubernetes.io/revision: '1'
spec:
replicas: 1
selector:
matchLabels:
grafanak8sapp: 'true'
k8s-app: kube-state-metrics
template:
metadata:
creationTimestamp: null
labels:
grafanak8sapp: 'true'
k8s-app: kube-state-metrics
spec:
serviceAccountName: prometheus
containers:
- name: kube-state-metrics
image: 'quay.io/coreos/kube-state-metrics:v1.1.0'
ports:
- name: http-metrics
containerPort: 8080
protocol: TCP
resources: {}
readinessProbe:
httpGet:
path: /healthz
port: 8080
scheme: HTTP
initialDelaySeconds: 5
timeoutSeconds: 5
periodSeconds: 10
successThreshold: 1
failureThreshold: 3
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
imagePullPolicy: IfNotPresent
restartPolicy: Always
terminationGracePeriodSeconds: 30
dnsPolicy: ClusterFirst
securityContext: {}
schedulerName: default-scheduler
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
maxSurge: 25%
revisionHistoryLimit: 2
progressDeadlineSeconds: 600
5、daemonset: node-exporter
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
creationTimestamp: 2019-01-14T03:09:52Z
generation: 1
labels:
daemon: node-exporter
grafanak8sapp: "true"
name: node-exporter
namespace: kube-system
resourceVersion: "10913368"
selfLink: /apis/extensions/v1beta1/namespaces/kube-system/daemonsets/node-exporter
uid: dc1d24f8-17a9-11e9-bbbc-fa163eb8f89d
spec:
revisionHistoryLimit: 10
selector:
matchLabels:
daemon: node-exporter
grafanak8sapp: "true"
template:
metadata:
creationTimestamp: null
labels:
daemon: node-exporter
grafanak8sapp: "true"
name: node-exporter
spec:
serviceAccountName: prometheus
containers:
- args:
- --path.procfs=/proc_host
- --path.sysfs=/host_sys
image: quay.io/prometheus/node-exporter:v0.15.0
imagePullPolicy: IfNotPresent
name: node-exporter
ports:
- containerPort: 9100
hostPort: 9100
name: node-exporter
protocol: TCP
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /host_sys
name: sys
readOnly: true
- mountPath: /proc_host
name: proc
readOnly: true
dnsPolicy: ClusterFirst
hostNetwork: true
hostPID: true
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- hostPath:
path: /proc
type: ""
name: proc
- hostPath:
path: /sys
type: ""
name: sys
templateGeneration: 1
updateStrategy:
type: OnDelete
6、configmap:
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server-conf
labels:
name: prometheus-server-conf
namespace: kube-system
data:
prometheus.yml: |-
global:
scrape_interval: 30s
evaluation_interval: 30s
# A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
#
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
scrape_configs:
- job_name: 'kubernetes-kubelet'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-kube-state'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__meta_kubernetes_pod_label_grafanak8sapp]
regex: .*true.*
action: keep
- source_labels: ['__meta_kubernetes_pod_label_daemon', '__meta_kubernetes_pod_node_name']
regex: 'node-exporter;(.*)'
action: replace
target_label: nodename
三、配置
1、prometheus.yml:
scrape_configs:
- job_name: 'kubernetes-kubelet'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-kube-state'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__meta_kubernetes_pod_label_grafanak8sapp]
regex: .*true.*
action: keep
- source_labels: ['__meta_kubernetes_pod_label_daemon', '__meta_kubernetes_pod_node_name']
regex: 'node-exporter;(.*)'
action: replace
target_label: nodename
四、監控
1、導入模版;
2、添加數據源爲prometheus類型