基於GPU 顯卡在k8s 集羣上實現hpa 功能

前言

Kubernetes 支持HPA模塊進行容器伸縮,默認支持CPU和內存等指標。原生的HPA基於Heapster,不支持GPU指標的伸縮,但是支持通過CustomMetrics的方式進行HPA指標的擴展。我們可以通過部署一個基於Prometheus Adapter 作爲CustomMetricServer,它能將Prometheus指標註冊的APIServer接口,提供HPA調用。 通過配置,HPA將CustomMetric作爲擴縮容指標, 可以進行GPU指標的彈性伸縮。

阿里雲容器Kubernetes監控-GPU監控

  • k8s集羣準備好gpu 服務器
# kubectl get node 
NAME                    STATUS   ROLES    AGE    VERSION
master-11               Ready    master   466d   v1.18.20
master-12               Ready    master   466d   v1.18.20
master-13               Ready    master   466d   v1.18.20
slave-gpu-103           Ready    <none>   159d   v1.18.20
slave-gpu-105           Ready    <none>   160d   v1.18.20
slave-gpu-109           Ready    <none>   160d   v1.18.20
slave-rtx3080-gpu-111   Ready    <none>   6d3h   v1.18.20
  • 給每個GPU 服務器打上標籤、並添加污點
kubectl label node slave-gpu-103 aliyun.accelerator/nvidia_name=yes
kubectl taint node slave-gpu-103 gpu_type=moviebook:NoSchedule
  • 部署Prometheus 的GPU 採集器,網絡採用hostNetwork
# cat gpu-exporter.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
  namespace: monitoring
  name: ack-prometheus-gpu-exporter
spec:
  selector:
    matchLabels:
      k8s-app: ack-prometheus-gpu-exporter
  template:
    metadata:
      labels:
        k8s-app: ack-prometheus-gpu-exporter
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: aliyun.accelerator/nvidia_name
                operator: Exists
      hostNetwork: true
      hostPID: true
      containers:
      - name: node-gpu-exporter
        image: registry.cn-hangzhou.aliyuncs.com/acs/gpu-prometheus-exporter:0.1-5cc5f27
        imagePullPolicy: Always
        ports:
        - name: http-metrics
          containerPort: 9445
        env:
          - name: MY_NODE_NAME
            valueFrom:
              fieldRef:
                apiVersion: v1
                fieldPath: spec.nodeName
        resources:
          requests:
            memory: 50Mi
            cpu: 200m
          limits:
            memory: 100Mi
            cpu: 300m
        volumeMounts:
        - mountPath: /var/run/docker.sock
          name: docker-sock
      volumes:
      - hostPath:
          path: /var/run/docker.sock
          type: File
        name: docker-sock
      tolerations:
      - effect: NoSchedule
        key: server_type
        operator: Exists
---
apiVersion: v1
kind: Service
metadata:
  name: node-gpu-exporter
  namespace: monitoring
  labels:
    k8s-app: ack-prometheus-gpu-exporter
spec:
  type: ClusterIP
  ports:
  - name: http-metrics
    port: 9445
    protocol: TCP
  selector:
    k8s-app: ack-prometheus-gpu-exporter

---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: ack-prometheus-gpu-exporter
  labels:
    release: ack-prometheus-operator
    app: ack-prometheus-gpu-exporter
  namespace: monitoring
spec:
  selector:
    matchLabels:
      k8s-app: ack-prometheus-gpu-exporter
  namespaceSelector:
    matchNames:
    - monitoring
  endpoints:
  - port: http-metrics
    interval: 30s

#創建GPU 採集器
kubectl apply  -f  gpu-exporter.yaml 
  • prometheus 增加監控GPU 服務器實例列表
# kubectl edit cm -n prometheus  prometheus-conf 

      - job_name: 'GPU服務監控'
        static_configs:
          #- targets: ['node-gpu-exporter.monitoring:9445']
          - targets:
            - 10.147.100.103:9445
            - 10.147.100.105:9445
            - 10.147.100.111:9445
            - 10.147.100.109:9445
#重啓prometheus 使配置文件生效

#查看prometheus gpu信息相關指標 nvidia_gpu_duty_cycle

部署CustomMetricServer

  • 準備PROMETHEUS ADAPTER的證書
#準備證書
mkdir /opt/gpu/
cd /opt/gpu/

set -e
set -o pipefail
set -u
b64_opts='--wrap=0'
 
export PURPOSE=metrics
openssl req -x509 -sha256 -new -nodes -days 365 -newkey rsa:2048 -keyout ${PURPOSE}-ca.key -out ${PURPOSE}-ca.crt -subj "/CN=ca"
echo '{"signing":{"default":{"expiry":"43800h","usages":["signing","key encipherment","'${PURPOSE}'"]}}}' > "${PURPOSE}-ca-config.json"
 
export SERVICE_NAME=custom-metrics-apiserver
export ALT_NAMES='"custom-metrics-apiserver.monitoring","custom-metrics-apiserver.monitoring.svc"'
echo "{\"CN\":\"${SERVICE_NAME}\", \"hosts\": [${ALT_NAMES}], \"key\": {\"algo\": \"rsa\",\"size\": 2048}}" | \
           cfssl gencert -ca=metrics-ca.crt -ca-key=metrics-ca.key -config=metrics-ca-config.json - | cfssljson -bare apiserver
 
cat <<-EOF > cm-adapter-serving-certs.yaml
apiVersion: v1
kind: Secret
metadata:
  name: cm-adapter-serving-certs
data:
  serving.crt: $(base64 ${b64_opts} < apiserver.pem)
  serving.key: $(base64 ${b64_opts} < apiserver-key.pem)
EOF

#創建配置文件 
kubectl -n kube-system apply -f cm-adapter-serving-certs.yaml

#查看證書
#kubectl get secrets  -n kube-system  |grep cm-adapter-serving-certs 
cm-adapter-serving-certs                         Opaque                                2      49s

  • 部署PROMETHEUS CUSTOMMETRIC ADAPTER
# cat  custom-metrics-apiserver.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  namespace: kube-system
  name: custom-metrics-apiserver
  labels:
    app: custom-metrics-apiserver
spec:
  replicas: 1
  selector:
    matchLabels:
      app: custom-metrics-apiserver
  template:
    metadata:
      labels:
        app: custom-metrics-apiserver
      name: custom-metrics-apiserver
    spec:
      serviceAccountName: custom-metrics-apiserver
      containers:
      - name: custom-metrics-apiserver
        #image: registry.cn-beijing.aliyuncs.com/test-hub/k8s-prometheus-adapter-amd64
        image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.5.0
        args:
        - --secure-port=6443
        - --tls-cert-file=/var/run/serving-cert/serving.crt
        - --tls-private-key-file=/var/run/serving-cert/serving.key
        - --logtostderr=true
        - --prometheus-url=http://prometheus-service.prometheus.svc.cluster.local:9090/
        - --metrics-relist-interval=1m
        - --v=10
        - --config=/etc/adapter/config.yaml
        ports:
        - containerPort: 6443
        volumeMounts:
        - mountPath: /var/run/serving-cert
          name: volume-serving-cert
          readOnly: true
        - mountPath: /etc/adapter/
          name: config
          readOnly: true
        - mountPath: /tmp
          name: tmp-vol
      volumes:
      - name: volume-serving-cert
        secret:
          secretName: cm-adapter-serving-certs
      - name: config
        configMap:
          name: adapter-config
      - name: tmp-vol
        emptyDir: {}
---
kind: ServiceAccount
apiVersion: v1
metadata:
  name: custom-metrics-apiserver
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  name: custom-metrics-apiserver
  namespace: kube-system
spec:
  ports:
  - port: 443
    targetPort: 6443
  selector:
    app: custom-metrics-apiserver
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: custom-metrics-server-resources
  namespace: kube-system
rules:
- apiGroups:
  - custom.metrics.k8s.io
  resources: ["*"]
  verbs: ["*"]
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: adapter-config
  namespace: kube-system
data:
  config.yaml: |
    rules:
    - seriesQuery: '{uuid!=""}'
      resources:
        overrides:
          node_name: {resource: "node"}
          pod_name: {resource: "pod"}
          namespace_name: {resource: "namespace"}
      name:
        matches: ^nvidia_gpu_(.*)$
        as: "${1}_over_time"
      metricsQuery: ceil(avg_over_time(<<.Series>>{<<.LabelMatchers>>}[3m]))
    - seriesQuery: '{uuid!=""}'
      resources:
        overrides:
          node_name: {resource: "node"}
          pod_name: {resource: "pod"}
          namespace_name: {resource: "namespace"}
      name:
        matches: ^nvidia_gpu_(.*)$
        as: "${1}_current"
      metricsQuery: <<.Series>>{<<.LabelMatchers>>}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: custom-metrics-resource-reader
rules:
- apiGroups:
  - ""
  resources:
  - namespaces
  - pods
  - services
  verbs:
  - get
  - list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: hpa-controller-custom-metrics
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
  name: horizontal-pod-autoscaler
  namespace: kube-system

#創建配置文件
kubectl apply -f custom-metrics-apiserver.yaml

#查看pod 狀態
# kubectl get pod -n kube-system  |grep custom-metrics-apiserver
custom-metrics-apiserver-56777c5757-b422b   1/1     Running   0          64s
  • 角色授權
# cat custom-metrics-apiserver-rbac.yaml
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
  name: v1beta1.custom.metrics.k8s.io
  namespace: kube-system
spec:
  service:
    name: custom-metrics-apiserver
    namespace: kube-system 
  group: custom.metrics.k8s.io
  version: v1beta1
  insecureSkipTLSVerify: true
  groupPriorityMinimum: 100
  versionPriority: 100
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: custom-metrics-resource-reader
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: custom-metrics-resource-reader
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: kube-system 
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: custom-metrics:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: kube-system 
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: custom-metrics-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: custom-metrics-apiserver
  namespace: kube-system

#創建rbac
kubectl apply  -f custom-metrics-apiserver-rbac.yaml 
  • 驗證部署是否完成
#部署完成後,可以通過customMetric的ApiServer調用,驗證Prometheus Adapter部署成功
# kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/*/temperature_celsius_current"
{"kind":"MetricValueList","apiVersion":"custom.metrics.k8s.io/v1beta1","metadata":{"selfLink":"/apis/custom.metrics.k8s.io/v1beta1/namespaces/default/pods/%2A/temperature_celsius_current"},"items":[]}

伸縮指標

#伸縮指標信息

測試GPU 服務的彈性擴縮容

指標名稱 說明 單位
duty_cycle_current GPU利用率 百分比
memory_used_bytes_current 顯存使用量 字節

部署HPA

# cat test-hap.yaml 
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
metadata:
  name: gpu-hpa-bert-intent-detection
  namespace: alot-stream 
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: bert-intent-detection
  minReplicas: 1
  maxReplicas: 4
  metrics:
  - type: Pods
    pods:
      metricName: duty_cycle_current #Pod的GPU利用率。
      targetAverageValue: 20 #當GPU利用率超過20%,觸發擴容。

#創建文件
kubectl apply  -f test-hap.yaml

#查看hpa
# kubectl get hpa -n alot-stream 
NAME                            REFERENCE                          TARGETS   MINPODS   MAXPODS   REPLICAS   AGE
gpu-hpa-bert-intent-detection   Deployment/bert-intent-detection   0/20      1         4         1          21s

#
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章