preflight
安装包
- kubeadm
- kubelet
- kubectl
- kubernetes-cni
- socat
需要翻墙下载
镜像
启动集群最少需要以下镜像(以1.8.1为例)
# basic
gcr.io/google_containers/pause-amd64:3.0
# kubernetes
gcr.io/google_containers/kube-apiserver-amd64:v1.8.1
gcr.io/google_containers/kube-controller-manager-amd64:v1.8.1
gcr.io/google_containers/kube-scheduler-amd64:v1.8.1
gcr.io/google_containers/kube-proxy-amd64:v1.8.1
# kube-dns
gcr.io/google_containers/k8s-dns-dnsmasq-nanny-amd64:1.14.5
gcr.io/google_containers/k8s-dns-sidecar-amd64:1.14.5
gcr.io/google_containers/k8s-dns-kube-dns-amd64:1.14.5
# cni network
quay.io/calico/node:v2.6.2
quay.io/calico/kube-controllers:v1.0.0
quay.io/calico/cni:v1.11.0
etcd 集群
etcd 版本:3.2.7
walker-1:
[root@walker-1 dashboard]# cat /etc/etcd/etcd.conf | egrep -v "^($|#)"
ETCD_NAME=walker-1
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.6.47:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.16.6.47:2379,http://127.0.0.1:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.6.47:2380"
ETCD_INITIAL_CLUSTER="walker-2=http://172.16.6.249:2380,walker-1=http://172.16.6.47:2380,walker-4=http://172.16.17.119:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.6.47:2379"
walker-2:
[root@walker-2 kubernetes]# cat /etc/etcd/etcd.conf | egrep -v "^($|#)"
ETCD_NAME=walker-2
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.6.249:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.16.6.249:2379, http://127.0.0.1:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.6.249:2380"
ETCD_INITIAL_CLUSTER="walker-1=http://172.16.6.47:2380,walker-2=http://172.16.6.249:2380,walker-4=http://172.16.17.119:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.6.249:2379"
walker-4:
[root@walker-4 ~]# cat /etc/etcd/etcd.conf | egrep -v "^($|#)"
ETCD_NAME=walker-4
ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_LISTEN_PEER_URLS="http://172.16.17.119:2380"
ETCD_LISTEN_CLIENT_URLS="http://172.16.17.119:2379,http://127.0.0.1:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://172.16.17.119:2380"
ETCD_INITIAL_CLUSTER="walker-2=http://172.16.6.249:2380,walker-1=http://172.16.6.47:2380,walker-4=http://172.16.17.119:2380"
ETCD_INITIAL_CLUSTER_STATE="new"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_ADVERTISE_CLIENT_URLS="http://172.16.17.119:2379"
搭建kubernetes集群
拓扑
graph TD
nodex --> lvs
lvs --> controller1
lvs --> controller2
server | ip | hostname |
---|---|---|
lvs | 172.16.6.56 | |
controller1 | 172.16.6.47 | walker-1 |
controller2 | 172.16.6.249 | walker-2 |
node1 | 172.16.17.119 | walker-4 |
其中 controller1, controller2, node1 组成 etcd集群
通过kubeadm初始化
准备一个初始化文件,kubeadm-init.yaml
apiVersion: kubeadm.k8s.io/v1alpha1
kind: MasterConfiguration
etcd:
endpoints:
- http://walker-1:2379
- http://walker-2:2379
- http://walker-4:2379
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
podSubnet: 192.168.0.0/16
kubernetesVersion: v1.8.1
apiServerCertSANs:
- walker-1.novalocal
- walker-2
- 172.16.6.47
- 172.16.6.249
- 172.16.6.79 # vip
执行
[root@walker-1 kubernetes]# kubeadm init --config ./kubeadm-init.yaml --skip-preflight-checks
[kubeadm] WARNING: kubeadm is in beta, please do not use it for production clusters.
[init] Using Kubernetes version: v1.8.1
[init] Using Authorization modes: [Node RBAC]
[preflight] Skipping pre-flight checks
[kubeadm] WARNING: starting in 1.8, tokens expire after 24 hours by default (if you require a non-expiring token use --token-ttl 0)
[certificates] Generated ca certificate and key.
[certificates] Generated apiserver certificate and key.
[certificates] apiserver serving cert is signed for DNS names [walker-1.novalocal kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local walker-1.novalocal walker-2.novalocal] and IPs [10.96.0.1 172.16.6.47 172.16.6.47 172.16.6.249 172.16.6.79]
[certificates] Generated apiserver-kubelet-client certificate and key.
[certificates] Generated sa key and public key.
[certificates] Generated front-proxy-ca certificate and key.
[certificates] Generated front-proxy-client certificate and key.
[certificates] Valid certificates and keys now exist in "/etc/kubernetes/pki"
...
You can now join any number of machines by running the following on each node
as root:
kubeadm join --token 19f284.da47998c9abb01d3 172.16.6.47:6443 --discovery-token-ca-cert-hash sha256:0fd95a9bc67a7bf0ef42da968a0d55d92e52898ec37c971bd77ee501d845b538
执行后,可以发现kube-dns
无法创建。提示:
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 15s default-scheduler Successfully assigned kube-dns-8bb5c479-brk2t to walker-1.novalocal
Normal SuccessfulMountVolume 15s kubelet, walker-1.novalocal MountVolume.SetUp succeeded for volume "kube-dns-config"
Normal SuccessfulMountVolume 15s kubelet, walker-1.novalocal MountVolume.SetUp succeeded for volume "kube-dns-token-4jcng"
Warning FailedCreatePodSandBox 5s (x2 over 11s) kubelet, walker-1.novalocal Failed create pod sandbox.
Warning FailedSync 5s (x2 over 11s) kubelet, walker-1.novalocal Error syncing pod
Normal SandboxChanged 4s (x2 over 10s) kubelet, walker-1.novalocal Pod sandbox changed, it will be killed and re-created.
Pod sandbox changed, it will be killed and re-created
基本上是因为网络插件异常导致,因此需要检查网络插件健康状态。
[root@walker-1 ~]# kubectl get ds --namespace=kube-system
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE
calico-node 0 0 0 0 0 <none> 21d
kube-proxy 1 1 1 1 1 <none> 21d
发现calico-node
的个数为0.
原因是kubernetes的taint
和toleration
机制导致的。因此可以参考kube-proxy中的配置,在和calico
几个相关的容器中添加。
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node.cloudprovider.kubernetes.io/uninitialized
value: "true"
然后calico容器就能在master
上运行了。kube-dns
也顺利启动。
[root@walker-1 kubernetes]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
walker-1.novalocal Ready master 11m v1.8.1
添加备份节点
方便起见,将controller1 上 /etc/kubernetes的所有文件copy到 controller2 上
[root@walker-1 kubernetes]# scp -r /etc/kubernetes/* walker-2:/etc/kubernetes
这里先介绍一下k8s集群的原理。
apiserver
在集群中充当了一个类似gateway
的组件,即所有的请求都通过它(包括多controller和多sheduler之间的选举)。因为是集群间通信的唯一入口,所以首先要保证apiserver的冗余。接下来第一步就是在controller2上先把apiserver配置好。
k8s中为了安全考虑,添加了多种认证方式。比如x509证书、service account、static token 等等。以kubeadm创建的集群,默认是以证书的形式来验证apiserver的合法性。因此在/etc/kubernetes/pki/ 下面有 apiserver.crt
和 apiserver.key
这两个服务端的证书文件。kubeadm
会根据配置文件中的内容来生成apiserver证书。可以通过
[root@walker-1 kubernetes]# openssl x509 -noout -text -in /etc/kubernetes/pki/apiserver.crt
Certificate:
Data:
Version: 3 (0x2)
Serial Number: 8163852820871759178 (0x714bd4e2faa1c54a)
Signature Algorithm: sha256WithRSAEncryption
Issuer: CN=kubernetes
Validity
Not Before: Dec 19 07:17:25 2017 GMT
Not After : Dec 19 07:17:26 2018 GMT
Subject: CN=kube-apiserver
Subject Public Key Info:
Public Key Algorithm: rsaEncryption
Public-Key: (2048 bit)
Modulus:
...
Exponent: 65537 (0x10001)
X509v3 extensions:
X509v3 Key Usage: critical
Digital Signature, Key Encipherment
X509v3 Extended Key Usage:
TLS Web Server Authentication
X509v3 Subject Alternative Name:
DNS:walker-1.novalocal, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster.local, DNS:walker-1.novalocal, DNS:walker-2, DNS:kubernetes, DNS:kubernetes.default, DNS:kubernetes.default.svc, DNS:kubernetes.default.svc.cluster, DNS:kubernetes.default.svc.cluster.local, IP Address:10.96.0.1, IP Address:172.16.6.47, IP Address:172.16.6.47, IP Address:172.16.6.249, IP Address:172.16.6.79, IP Address:127.0.0.1, IP Address:10.96.0.1
Signature Algorithm: sha256WithRSAEncryption
...
来查看apiserver证书的内容。可以看到里面包含了controller1, controller2 以及要使用的vip信息。所以在controller2上我们可以直接使用该证书,而不用在手动生成。
值得一提的是,在 pki 目录下所有的证书都可以被重用(大部分是客户端认证的证书,有一份就好了)。
唯一需要修改的是有关 controller2 上kubelet 的证书信息。
一般我们执行命令,行令流程如下:
client --> kubelet --> apiserver --> etcd
如 kubectl get po
的时候,kubectl 作为客户端程序,向本地的kubelet 监听的端口发起了请求。一般情况下,请求被视为匿名的。这个时候,kubelet在向apiserver请求的时候,apiserver会核对用户权限,然后拒绝掉该请求。并返回401。所以在/etc/kubernetes/ 目录下面会有一个 admin.conf。 kubectl 会从该文件中读取请求的用户信息,并以该用户身份发出请求。为了防止中间人攻击,协议一般采用https。但是需要客户端和服务器之间的双向认证。因此客户端也需要ca 颁发的证书。(因为证书信息是基于service account
的,因此证书可重用。kube-controller 和 kube-shudler 也是如此)
在kubernetes 1.7.x 即其以上的版本中,为了安全考虑,在apiserver的启动参数--admission-control
中,启动了NodeRestriction
策略。而且设置集群认证方式为- --authorization-mode=Node,RBAC
。在k8s中,所有节点间(可以认为是各个节点上的kubelet进程)是通过service account
+ 证书的形式来进行认证的,节点对应的service account 名为system:node:$(hostname)
,且都归属与system:nodes
service account 组。
因为每个节点的hostname不同,所以需要单独配置证书。
可使用如下脚本来生成kubelet的客户端证书
set -e
cat > $HOSTNAME-csr.conf << EOF
[ v3_ext ]
# Extensions to add to a certificate request
keyUsage = critical, digitalSignature, keyEncipherment
extendedKeyUsage = clientAuth
EOF
# generate client private key file
echo "generate client private key file"
openssl genrsa -out $HOSTNAME.key 2048
# generate client CSR(certificate signing request)
echo "generate client CSR(certificate signing request)"
openssl req -new -sha256 -key $HOSTNAME.key -out $HOSTNAME.csr -subj "/O=system:nodes/CN=system:node:$HOSTNAME"
# generate client certification
echo "generate client certification"
#openssl dgst -sha256 x509 -req -in $HOSTNAME.key -CA ca.crt -CAkey ca.key -CAcreateserial -out $HOSTNAME.crt -days 1000 -extensions v3_ext -extfile $HOSTNAME-crt.conf
openssl x509 -sha256 -req -in $HOSTNAME.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out $HOSTNAME.crt -days 1000 -extensions v3_ext -extfile $HOSTNAME-csr.conf
# show created info
echo "show created info"
openssl x509 -noout -text -in $HOSTNAME.crt
# show crt with base64 encode
echo "show crt with base64 encode"
cat $HOSTNAME.crt | base64 -w 0 && echo
# show key with base64 encode
echo "show key with base64 encode"
cat $HOSTNAME.key | base64 -w 0 && echo
将最后打印出来的证书和key的base64编码,替换掉 /etc/kubernetes/kubelet.conf 中的 client-certificate-data
和 client-key-data
。具体如下所示
apiVersion: v1
clusters:
- cluster:
certificate-authority-data: $(cat /etc/kubernetes/pki/ca.crt | base64 -w 0)
server: https://172.16.6.249:6443
name: kubernetes
contexts:
- context:
cluster: kubernetes
user: system:node:$HOSTNAME
name: system:node:$HOSTNAME@kubernetes
current-context: system:node:$HOSTNAME@kubernetes
kind: Config
preferences: {}
users:
- name: system:node:$HOSTNAME
user:
client-certificate-data: $(cat /etc/kubernetes/pki/$HOSTNAME.crt | base64 -w 0)
client-key-data: $(cat /etc/kubernetes/pki/$HOSTNAME.key | base64 -w 0)
同时修改 admin.conf, controller-manager.conf, scheduler.conf 将apiserver地址指向controller2.
重启kubelet服务即可
[root@walker-2 kubernetes]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
walker-1.novalocal NotReady master 4h v1.8.1
walker-2 Ready <none> 2h v1.8.1
[root@walker-1 k8s]# kubectl get po -o wide --namespace=kube-system
NAME READY STATUS RESTARTS AGE IP NODE
calico-kube-controllers-86f67c8fd7-ln78x 1/1 Running 0 1h 172.16.6.47 walker-1.novalocal
calico-node-7rqmc 2/2 Running 0 8m 172.16.6.249 walker-2
calico-node-zqc56 2/2 Running 0 1h 172.16.6.47 walker-1.novalocal
calico-policy-controller-566dc8d645-5dnv8 1/1 Running 0 1h 172.16.6.249 walker-2
kube-apiserver-walker-1.novalocal 1/1 Running 3 1h 172.16.6.47 walker-1.novalocal
kube-apiserver-walker-2 1/1 Running 0 7m 172.16.6.249 walker-2
kube-controller-manager-walker-1.novalocal 1/1 Running 0 1h 172.16.6.47 walker-1.novalocal
kube-controller-manager-walker-2 1/1 Running 0 8m 172.16.6.249 walker-2
kube-dns-8bb5c479-7rsjv 3/3 Running 0 1h 192.168.187.196 walker-1.novalocal
kube-proxy-66c6x 1/1 Running 0 8m 172.16.6.249 walker-2
kube-proxy-k5zwm 1/1 Running 0 1h 172.16.6.47 walker-1.novalocal
kube-scheduler-walker-1.novalocal 1/1 Running 0 1h 172.16.6.47 walker-1.novalocal
kube-scheduler-walker-2 1/1 Running 0 7m 172.16.6.249 walker-2
至此搭建成功,完成的一大半。
Note: 值得一提的是,最好将 etcd 服务器地址用ip表示。使用域名的时候,在controller2 上的apiserver会出现连接etcd超时的情况。尽管在hosts文件里面添加了相关记录,也不行。但是在controller1上却不会受此影响,很诧异。
lvs 搭建
toplogy:
lvs(172.16.6.56)
| ^
| | vip:172.16.6.79
v |
-----------------------
| ^ | ^
| | | |
v | v |
rs1(172.16.6.47) rs2(172.16.6.249)
lvs
- 添加vip
[root@lvs ~]# ip addr add 172.16.6.79 dev eth1
[root@lvs ~]# ip addr
...
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether fa:16:3e:c2:e8:12 brd ff:ff:ff:ff:ff:ff
inet 172.16.6.56/24 brd 172.16.6.255 scope global dynamic eth1
valid_lft 67967sec preferred_lft 67967sec
inet 172.16.6.79/32 scope global eth1
valid_lft forever preferred_lft forever
inet6 fe80::f816:3eff:fec2:e812/64 scope link
valid_lft forever preferred_lft forever
- lvs配置
[root@lvs ~]# ipvsadm -A -t 172.16.6.79:6443 -s rr
[root@lvs ~]# ipvsadm -a -t 172.16.6.79:6443 -r 172.16.6.47:6443
[root@lvs ~]# ipvsadm -a -t 172.16.6.79:6443 -r 172.16.6.249:6443
[root@lvs ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 172.16.6.79:6443 rr
-> 172.16.6.47:6443 Route 1 0 0
-> 172.16.6.249:6443 Route 1 1 0
设置调度方法为轮询。
Note: lvs不会对realserver做健康检查,这意味着当rs2服务失败后,请求仍会被调度到rs2上。为了剔除失败的rs,需要配合keepalived来完成
keepalived 配置如下:
[root@lvs ~]# cat /etc/keepalived/keepalived.conf
global_defs {
router_id LVS_DEVEL
}
vrrp_instance lvsgroup {
state MASTER # 标识该主机为MASTER
interface eth1 # 主机网卡,vip绑定的网卡
virtual_router_id 81
priority 100 # 优先级,要比BACKUP主机的大
advert_int 1 # VRRP multicast 广播周期秒数
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
172.16.6.79 #定义vip,可有多个,换行添加
}
}
virtual_server 172.16.6.79 6443 {
delay_loop 5 # 每隔6s查看realserver状态
lb_algo rr # realserver调度算法
lb_kind DR # lvs工作模式
# persistence_timeout 3 # 同一ip的连接,在50s分配到同一台realserver
protocol TCP # 使用tcp检测realserver状态
real_server 172.16.6.47 6443 {
weight 1
TCP_CHECK {
cennect_timeout 10
}
}
real_server 172.16.6.47 6443 {
weight 1
TCP_CHECK {
cennect_timeout 10
}
}
}
配置好keepalived之后就不用对lvs做任何操作了。keepalived进程会接管lvs,动态更新lvs后端rs.
rs{1,2}
编写realserver 配置脚本lvs_rs.sh
, 内容如下:
#!/bin/bash
SNS_VIP=172.16.6.79
case "$1" in
start)
ifconfig lo:0 $SNS_VIP netmask 255.255.255.255 broadcast $SNS_VIP
route add -host $SNS_VIP dev lo:0
echo "1" >/proc/sys/net/ipv4/conf/lo/arp_ignore
echo "2" >/proc/sys/net/ipv4/conf/lo/arp_announce
echo "1" >/proc/sys/net/ipv4/conf/all/arp_ignore
echo "2" >/proc/sys/net/ipv4/conf/all/arp_announce
sysctl -p >/dev/null 2>&1
echo "LVS RealServer Start OK"
;;
stop)
ifconfig lo:0 down
route del $SNS_VIP >/dev/null 2>&1
echo "0" >/proc/sys/net/ipv4/conf/lo/arp_ignore
echo "0" >/proc/sys/net/ipv4/conf/lo/arp_announce
echo "0" >/proc/sys/net/ipv4/conf/all/arp_ignore
echo "0" >/proc/sys/net/ipv4/conf/all/arp_announce
echo "LVS RealServer Stoped"
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
esac
为了防止rs重启后失效,可加入到开机自启:
[root@rs1 ~]# echo "sh /root/lvs_rs.sh start" >> /etc/rc.d/rc.local
[root@rs1 ~]# chmod +x /etc/rc.d/rc.local
centos7 中,降低了rc.local的执行权限。推荐使用systemd来管理。
测试
[root@controller ~]# telnet 172.16.6.79 6443
Trying 172.16.6.79...
Connected to 172.16.6.79.
Escape character is '^]'.
搭好lvs后,需要修改kubectl, kube-proxy以及cluster-info的cm配置,将apiserver地址指向 vip。
至此集群搭建完毕
参考文档:
https://www.cnblogs.com/keithtt/p/7896948.html
https://kubernetes.io/docs/admin/authentication/
https://kubernetes.io/docs/admin/kubelet-authentication-authorization/
https://kubernetes.io/docs/concepts/cluster-administration/certificates/