前言
Kubernetes 支持HPA模塊進行容器伸縮,默認支持CPU和內存等指標。原生的HPA基於Heapster,不支持GPU指標的伸縮,但是支持通過CustomMetrics的方式進行HPA指標的擴展。我們可以通過部署一個基於Prometheus Adapter 作爲CustomMetricServer,它能將Prometheus指標註冊的APIServer接口,提供HPA調用。 通過配置,HPA將CustomMetric作爲擴縮容指標, 可以進行GPU指標的彈性伸縮。
阿里雲容器Kubernetes監控-GPU監控
# kubectl get node
NAME STATUS ROLES AGE VERSION
master-11 Ready master 466d v1.18.20
master-12 Ready master 466d v1.18.20
master-13 Ready master 466d v1.18.20
slave-gpu-103 Ready <none> 159d v1.18.20
slave-gpu-105 Ready <none> 160d v1.18.20
slave-gpu-109 Ready <none> 160d v1.18.20
slave-rtx3080-gpu-111 Ready <none> 6d3h v1.18.20
kubectl label node slave-gpu-103 aliyun.accelerator/nvidia_name=yes
kubectl taint node slave-gpu-103 gpu_type=moviebook:NoSchedule
- 部署Prometheus 的GPU 採集器,網絡採用hostNetwork
# cat gpu-exporter.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
namespace: monitoring
name: ack-prometheus-gpu-exporter
spec:
selector:
matchLabels:
k8s-app: ack-prometheus-gpu-exporter
template:
metadata:
labels:
k8s-app: ack-prometheus-gpu-exporter
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: aliyun.accelerator/nvidia_name
operator: Exists
hostNetwork: true
hostPID: true
containers:
- name: node-gpu-exporter
image: registry.cn-hangzhou.aliyuncs.com/acs/gpu-prometheus-exporter:0.1-5cc5f27
imagePullPolicy: Always
ports:
- name: http-metrics
containerPort: 9445
env:
- name: MY_NODE_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: spec.nodeName
resources:
requests:
memory: 50Mi
cpu: 200m
limits:
memory: 100Mi
cpu: 300m
volumeMounts:
- mountPath: /var/run/docker.sock
name: docker-sock
volumes:
- hostPath:
path: /var/run/docker.sock
type: File
name: docker-sock
tolerations:
- effect: NoSchedule
key: server_type
operator: Exists
---
apiVersion: v1
kind: Service
metadata:
name: node-gpu-exporter
namespace: monitoring
labels:
k8s-app: ack-prometheus-gpu-exporter
spec:
type: ClusterIP
ports:
- name: http-metrics
port: 9445
protocol: TCP
selector:
k8s-app: ack-prometheus-gpu-exporter
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: ack-prometheus-gpu-exporter
labels:
release: ack-prometheus-operator
app: ack-prometheus-gpu-exporter
namespace: monitoring
spec:
selector:
matchLabels:
k8s-app: ack-prometheus-gpu-exporter
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: http-metrics
interval: 30s
#創建GPU 採集器
kubectl apply -f gpu-exporter.yaml
- prometheus 增加監控GPU 服務器實例列表
# kubectl edit cm -n prometheus prometheus-conf
- job_name: 'GPU服務監控'
static_configs:
#- targets: ['node-gpu-exporter.monitoring:9445']
- targets:
- 10.147.100.103:9445
- 10.147.100.105:9445
- 10.147.100.111:9445
- 10.147.100.109:9445
#重啓prometheus 使配置文件生效
#查看prometheus gpu信息相關指標 nvidia_gpu_duty_cycle
部署CustomMetricServer
#準備證書
mkdir /opt/gpu/
cd /opt/gpu/
set -e
set -o pipefail
set -u
b64_opts='--wrap=0'
export PURPOSE=metrics
openssl req -x509 -sha256 -new -nodes -days 365 -newkey rsa:2048 -keyout ${PURPOSE}-ca.key -out ${PURPOSE}-ca.crt -subj "/CN=ca"
echo '{"signing":{"default":{"expiry":"43800h","usages":["signing","key encipherment","'${PURPOSE}'"]}}}' > "${PURPOSE}-ca-config.json"
export SERVICE_NAME=custom-metrics-apiserver
export ALT_NAMES='"custom-metrics-apiserver.monitoring","custom-metrics-apiserver.monitoring.svc"'
echo "{\"CN\":\"${SERVICE_NAME}\", \"hosts\": [${ALT_NAMES}], \"key\": {\"algo\": \"rsa\",\"size\": 2048}}" | \
cfssl gencert -ca=metrics-ca.crt -ca-key=metrics-ca.key -config=metrics-ca-config.json - | cfssljson -bare apiserver
cat <<-EOF > cm-adapter-serving-certs.yaml
apiVersion: v1
kind: Secret
metadata:
name: cm-adapter-serving-certs
data:
serving.crt: $(base64 ${b64_opts} < apiserver.pem)
serving.key: $(base64 ${b64_opts} < apiserver-key.pem)
EOF
#創建配置文件
kubectl -n kube-system apply -f cm-adapter-serving-certs.yaml
#查看證書
#kubectl get secrets -n kube-system |grep cm-adapter-serving-certs
cm-adapter-serving-certs Opaque 2 49s
- 部署PROMETHEUS CUSTOMMETRIC ADAPTER
# cat custom-metrics-apiserver.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: kube-system
name: custom-metrics-apiserver
labels:
app: custom-metrics-apiserver
spec:
replicas: 1
selector:
matchLabels:
app: custom-metrics-apiserver
template:
metadata:
labels:
app: custom-metrics-apiserver
name: custom-metrics-apiserver
spec:
serviceAccountName: custom-metrics-apiserver
containers:
- name: custom-metrics-apiserver
#image: registry.cn-beijing.aliyuncs.com/test-hub/k8s-prometheus-adapter-amd64
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.5.0
args:
- --secure-port=6443
- --tls-cert-file=/var/run/serving-cert/serving.crt
- --tls-private-key-file=/var/run/serving-cert/serving.key
- --logtostderr=true
- --prometheus-url=http://prometheus-service.prometheus.svc.cluster.local:9090/
- --metrics-relist-interval=1m
- --v=10
- --config=/etc/adapter/config.yaml
ports:
- containerPort: 6443
volumeMounts:
- mountPath: /var/run/serving-cert
name: volume-serving-cert
readOnly: true
- mountPath: /etc/adapter/
name: config
readOnly: true
- mountPath: /tmp
name: tmp-vol
volumes:
- name: volume-serving-cert
secret:
secretName: cm-adapter-serving-certs
- name: config
configMap:
name: adapter-config
- name: tmp-vol
emptyDir: {}
---
kind: ServiceAccount
apiVersion: v1
metadata:
name: custom-metrics-apiserver
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
name: custom-metrics-apiserver
namespace: kube-system
spec:
ports:
- port: 443
targetPort: 6443
selector:
app: custom-metrics-apiserver
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: custom-metrics-server-resources
namespace: kube-system
rules:
- apiGroups:
- custom.metrics.k8s.io
resources: ["*"]
verbs: ["*"]
---
apiVersion: v1
kind: ConfigMap
metadata:
name: adapter-config
namespace: kube-system
data:
config.yaml: |
rules:
- seriesQuery: '{uuid!=""}'
resources:
overrides:
node_name: {resource: "node"}
pod_name: {resource: "pod"}
namespace_name: {resource: "namespace"}
name:
matches: ^nvidia_gpu_(.*)$
as: "${1}_over_time"
metricsQuery: ceil(avg_over_time(<<.Series>>{<<.LabelMatchers>>}[3m]))
- seriesQuery: '{uuid!=""}'
resources:
overrides:
node_name: {resource: "node"}
pod_name: {resource: "pod"}
namespace_name: {resource: "namespace"}
name:
matches: ^nvidia_gpu_(.*)$
as: "${1}_current"
metricsQuery: <<.Series>>{<<.LabelMatchers>>}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: custom-metrics-resource-reader
rules:
- apiGroups:
- ""
resources:
- namespaces
- pods
- services
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: hpa-controller-custom-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
name: horizontal-pod-autoscaler
namespace: kube-system
#創建配置文件
kubectl apply -f custom-metrics-apiserver.yaml
#查看pod 狀態
# kubectl get pod -n kube-system |grep custom-metrics-apiserver
custom-metrics-apiserver-56777c5757-b422b 1/1 Running 0 64s
# cat custom-metrics-apiserver-rbac.yaml
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
name: v1beta1.custom.metrics.k8s.io
namespace: kube-system
spec:
service:
name: custom-metrics-apiserver
namespace: kube-system
group: custom.metrics.k8s.io
version: v1beta1
insecureSkipTLSVerify: true
groupPriorityMinimum: 100
versionPriority: 100
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: custom-metrics-resource-reader
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-resource-reader
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: custom-metrics:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: custom-metrics-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
namespace: kube-system
#創建rbac
kubectl apply -f custom-metrics-apiserver-rbac.yaml
#部署完成後,可以通過customMetric的ApiServer調用,驗證Prometheus Adapter部署成功
# kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/*/temperature_celsius_current"
{"kind":"MetricValueList","apiVersion":"custom.metrics.k8s.io/v1beta1","metadata":{"selfLink":"/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/%2A/temperature_celsius_current"},"items":[]}
伸縮指標
#伸縮指標信息
測試GPU 服務的彈性擴縮容
指標名稱 |
說明 |
單位 |
duty_cycle_current |
GPU利用率 |
百分比 |
memory_used_bytes_current |
顯存使用量 |
字節 |
部署HPA
# cat test-hap.yaml
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
name: gpu-hpa-bert-intent-detection
namespace: alot-stream
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: bert-intent-detection
minReplicas: 1
maxReplicas: 4
metrics:
- type: Pods
pods:
metricName: duty_cycle_current #Pod的GPU利用率。
targetAverageValue: 20 #當GPU利用率超過20%,觸發擴容。
#創建文件
kubectl apply -f test-hap.yaml
#查看hpa
# kubectl get hpa -n alot-stream
NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE
gpu-hpa-bert-intent-detection Deployment/bert-intent-detection 0/20 1 4 1 21s
#