本文基於k8s release 1.9分析kubelet sandbox創建與cni網絡配置流程。
首先談下docker的四種網絡模式,它們分別爲:
- bridge模式:使–net =bridge指定,默認設置;bridge模式是Docker默認的網絡設置,此模式會爲每一個容器分配Network Namespace、設置IP等,並將並將一個主機上的Docker容器連接到一個虛擬網橋上。
- host模式:使–net =host指定,如果啓動容器的時候使用host模式,那麼這個容器將不會獲得一個獨立的Network Namespace,而是和宿主機共用一個Network Namespace。容器將不會虛擬出自己的網卡,配置自己的IP等,而是使用宿主機的IP和端口。
- none模式:使–net =none指定,在none模式下,Docker容器擁有自己的Network Namespace,但是,並不爲Docker容器進行任何網絡配置。也就是說,這個Docker容器沒有網卡、IP、路由等信息。需要我們自己爲Docker容器添加網卡、配置IP等。
- container模式:使用–net =container指定,這個模式指定新創建的容器和已經存在的一個容器共享一個Network Namespace,而不是和宿主機共享。新創建的容器不會創建自己的網卡,配置自己的IP,而是和一個指定的容器共享IP、端口範圍等。
舉個栗子,在我的集羣中,dns的pod有3個業務容器,分別爲dns、dnsmasq和sidecar,pause爲網絡容器,查找出這些容器id,並通過容器id查看每個容器的NetworkMode,發現除了pause容器的NetworkMode爲node,其餘容器均爲container:7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29,由docker容器的網絡模式,我們得知dns、dnsmasq和sidecar使用的是pause容器的網絡空間。而pause容器的網絡模式爲none是因爲在該k8s集羣中使用的cni網絡配置,具體的容器網卡、IP、路由等信息是由calico(集羣中使用的cni爲calico)配置。
我們通過kubelet SyncPod函數分析sandbox的創建流程:
// SyncPod syncs the running pod into the desired pod by executing following steps:
//
// 1. Compute sandbox and container changes.
// 2. Kill pod sandbox if necessary.
// 3. Kill any containers that should not be running.
// 4. Create sandbox if necessary.
// 5. Create init containers.
// 6. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus)
glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
if podContainerChanges.SandboxID != "" {
m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
} else {
glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
}
}
// Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges.KillPod {
if !podContainerChanges.CreateSandbox {
glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
} else {
glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
}
killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
return
}
if podContainerChanges.CreateSandbox {
m.purgeInitContainers(pod, podStatus)
}
} else {
// Step 3: kill any running containers in this pod which are not to keep.
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
result.AddSyncResult(killContainerResult)
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
return
}
}
}
// Keep terminated init containers fairly aggressively controlled
// This is an optmization because container removals are typically handled
// by container garbage collector.
m.pruneInitContainersBeforeStart(pod, podStatus)
// We pass the value of the podIP down to generatePodSandboxConfig and
// generateContainerConfig, which in turn passes it to various other
// functions, in order to facilitate functionality that requires this
// value (hosts file and downward API) and avoid races determining
// the pod IP in cases where a container requires restart but the
// podIP isn't in the status manager yet.
//
// We default to the IP in the passed-in pod status, and overwrite it if the
// sandbox needs to be (re)started.
podIP := ""
if podStatus != nil {
podIP = podStatus.IP
}
// Step 4: Create a sandbox for the pod if necessary.
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
var msg string
var err error
glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
return
}
glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
result.Fail(err)
return
}
// If we ever allow updating a pod from non-host-network to
// host-network, we may use a stale IP.
if !kubecontainer.IsHostNetworkPod(pod) {
// Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
}
}
// Get podSandboxConfig for containers to start.
configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
return
}
// Step 5: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container.
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
return
}
glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
return
}
// Successfully started the container; clear the entry in the failure
glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
}
// Step 6: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
container := &pod.Spec.Containers[idx]
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
continue
}
glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
switch {
case err == images.ErrImagePullBackOff:
glog.V(3).Infof("container start failed: %v: %s", err, msg)
default:
utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
}
continue
}
}
return
}
如果是新創建的pod或者是pod的spec參數已經發生改變,則需要重新生成sandbox,這裏主要看createPodSandbox函數
// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
// config中包含的信息主要有,pod name、namespace、UID、labels、annotation、dns、
// container portMappings以及根據annotation中的security.alpha.kubernetes.io/sysctls和
// security.alpha.kubernetes.io/unsafe-sysctls配置linuxConfig等。
podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
// Create pod logs directory
err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
if err != nil {
message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err)
glog.Errorf(message)
return "", message, err
}
podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig)
if err != nil {
message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
return podSandBoxID, "", nil
}
該函數首先調用generatePodSandboxConfig來生成sandbox的配置,然後調用MkdirAll方法來創建pod的日誌目錄,最後調用RunPodSandbox來完成pause創建工作,RunPodSandbox函數如下
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
// Step 1: Pull the image for the sandbox.
image := defaultSandboxImage
podSandboxImage := ds.podSandboxImage
if len(podSandboxImage) != 0 {
image = podSandboxImage
}
// NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
// see: http://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-repository
// Only pull sandbox image when it's not present - v1.PullIfNotPresent.
if err := ensureSandboxImageExists(ds.client, image); err != nil {
return "", err
}
// Step 2: Create the sandbox container.
createConfig, err := ds.makeSandboxDockerConfig(config, image)
if err != nil {
return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
}
createResp, err := ds.client.CreateContainer(*createConfig)
if err != nil {
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
}
if err != nil || createResp == nil {
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
}
ds.setNetworkReady(createResp.ID, false)
defer func(e *error) {
// Set networking ready depending on the error return of
// the parent function
if *e == nil {
ds.setNetworkReady(createResp.ID, true)
}
}(&err)
// Step 3: Create Sandbox Checkpoint.
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
return createResp.ID, err
}
// Step 4: Start the sandbox container.
// Assume kubelet's garbage collector would remove the sandbox later, if
// startContainer failed.
err = ds.client.StartContainer(createResp.ID)
if err != nil {
return createResp.ID, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
}
// Rewrite resolv.conf file generated by docker.
// NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
// not only for pods with host network: the resolver conf will be overwritten
// after sandbox creation to override docker's behaviour. This resolv.conf
// file is shared by all containers of the same pod, and needs to be modified
// only once per pod.
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
containerInfo, err := ds.client.InspectContainer(createResp.ID)
if err != nil {
return createResp.ID, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
}
if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
return createResp.ID, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
}
}
// Do not invoke network plugins if in hostNetwork mode.
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
return createResp.ID, nil
}
// Step 5: Setup networking for the sandbox.
// All pod networking is setup by a CNI plugin discovered at startup time.
// This plugin assigns the pod ip, sets up routes inside the sandbox,
// creates interfaces etc. In theory, its jurisdiction ends with pod
// sandbox networking, but it might insert iptables rules or open ports
// on the host as well, to satisfy parts of the pod spec that aren't
// recognized by the CNI standard yet.
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
if err != nil {
// TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
}
}
return createResp.ID, err
}
RunPodSandbox函數的流程還是比較清晰的,還記得我們在kubelet啓動參數裏面配置的那個pause容器參數不,這裏ensureSandboxImageExists要確保pause鏡像存在節點上,若不存在則先去下載。makeSandboxDockerConfig將PodSandboxConfig轉換成docker的ContainerCreateConfig,然後再根據這個配置生成pause容器,最後在啓動pause容器。容器啓動後再配置dns,這裏dns的配置很有意思,通過docker inspecft查看容器信息,然後再重寫容器位於宿主機的resolv文件
[root@localhost ~]# docker inspect 7bae079c4926 -f={{.ResolvConfPath}}
/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf
再看看rewriteResolvFile函數
// rewriteResolvFile rewrites resolv.conf file generated by docker.
func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
if len(resolvFilePath) == 0 {
glog.Errorf("ResolvConfPath is empty.")
return nil
}
if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
}
var resolvFileContent []string
for _, srv := range dns {
resolvFileContent = append(resolvFileContent, "nameserver "+srv)
}
if len(dnsSearch) > 0 {
resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
}
if len(dnsOptions) > 0 {
resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
}
if len(resolvFileContent) > 0 {
resolvFileContentStr := strings.Join(resolvFileContent, "\n")
resolvFileContentStr += "\n"
glog.V(4).Infof("Will attempt to re-write config file %s with: \n%s", resolvFilePath, resolvFileContent)
if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
glog.Errorf("resolv.conf could not be updated: %v", err)
return err
}
}
return nil
}
整個過程看出來了嗎,其實就是在重寫宿主機上的/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf文件,然後你登錄容器,cat resolve.conf,最後就是rewriteResolvFile函數的結果
nameserver 10.96.0.10
search default.svc.cluster.local svc.cluster.local cluster.local
options ndots:5
到這一步,如果你的pod的網絡模式是host,則不需要調用cni插件完成pod的網絡配置,否則將調用SetUpPod函數完成pod的puse容器網絡創建。
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations map[string]string) error {
defer recordOperation("set_up_pod", time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)
glog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations); err != nil {
return fmt.Errorf("NetworkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}
return nil
}
調用plugin的SetUpPod方法,這裏plugin是一個interface, 具體使用哪個plugin是由kubelet的啓動參數–network-plugin決定的。
func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
if err := plugin.checkInitialized(); err != nil {
return err
}
netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil {
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
}
// Windows doesn't have loNetwork. It comes only with Linux
if plugin.loNetwork != nil {
if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath); err != nil {
glog.Errorf("Error while adding to cni lo network: %s", err)
return err
}
}
_, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
if err != nil {
glog.Errorf("Error while adding to cni network: %s", err)
return err
}
return err
}
GetNetNS獲取容器的netnamespace所在路徑,在非windows系統上,還會調用ddToNetwork來配置loopback設備的網絡。最後調用addToNetwork來配置pause容器的eth0接口的網絡。查看plugin的getDefaultNetwork函數
func (plugin *cniNetworkPlugin) getDefaultNetwork() *cniNetwork {
plugin.RLock()
defer plugin.RUnlock()
return plugin.defaultNetwork
}
它實際調用的是getDefaultCNINetwork函數
func getDefaultCNINetwork(pluginDir, binDir, vendorCNIDirPrefix string) (*cniNetwork, error) {
if pluginDir == "" {
pluginDir = DefaultNetDir
}
files, err := libcni.ConfFiles(pluginDir, []string{".conf", ".conflist", ".json"})
switch {
case err != nil:
return nil, err
case len(files) == 0:
return nil, fmt.Errorf("No networks found in %s", pluginDir)
}
sort.Strings(files)
for _, confFile := range files {
var confList *libcni.NetworkConfigList
if strings.HasSuffix(confFile, ".conflist") {
confList, err = libcni.ConfListFromFile(confFile)
if err != nil {
glog.Warningf("Error loading CNI config list file %s: %v", confFile, err)
continue
}
} else {
conf, err := libcni.ConfFromFile(confFile)
if err != nil {
glog.Warningf("Error loading CNI config file %s: %v", confFile, err)
continue
}
// Ensure the config has a "type" so we know what plugin to run.
// Also catches the case where somebody put a conflist into a conf file.
if conf.Network.Type == "" {
glog.Warningf("Error loading CNI config file %s: no 'type'; perhaps this is a .conflist?", confFile)
continue
}
confList, err = libcni.ConfListFromConf(conf)
if err != nil {
glog.Warningf("Error converting CNI config file %s to list: %v", confFile, err)
continue
}
}
if len(confList.Plugins) == 0 {
glog.Warningf("CNI config list %s has no networks, skipping", confFile)
continue
}
confType := confList.Plugins[0].Network.Type
// Search for vendor-specific plugins as well as default plugins in the CNI codebase.
vendorDir := vendorCNIDir(vendorCNIDirPrefix, confType)
cninet := &libcni.CNIConfig{
Path: []string{vendorDir, binDir},
}
network := &cniNetwork{name: confList.Name, NetworkConfig: confList, CNIConfig: cninet}
return network, nil
}
return nil, fmt.Errorf("No valid networks found in %s", pluginDir)
}
getDefaultCNINetwork會根據kubelet的cni配置參數尋找cni的二進制文件和配置目錄,如果在kubelet沒配置這些參數則默認的netDir爲/etc/cni/net.d,默認的cniDir爲/opt/cni/bin。改函數查找NetDIr目錄下後綴爲conf、conflist和json的文件,根據配置文件最後返回cniNetwork對象,它包含了cni插件名稱和配置信息等。
繼續查看plugin的addToNetwork函數
func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (cnitypes.Result, error) {
rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath)
if err != nil {
glog.Errorf("Error adding network when building cni runtime conf: %v", err)
return nil, err
}
netConf, cniNet := network.NetworkConfig, network.CNIConfig
glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
res, err := cniNet.AddNetworkList(netConf, rt)
if err != nil {
glog.Errorf("Error adding network: %v", err)
return nil, err
}
return res, nil
}
buildCNIRuntimeConf根據getDefaultCNINetwork函數返回的cniNetwork對象和pause容器id以及netnamespace和pod的名稱、命名空間生成cni的運行時配置。這裏容器的網卡名稱爲eth0。
buildCNIRuntimeConf函數如下
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (*libcni.RuntimeConf, error) {
glog.V(4).Infof("Got netns path %v", podNetnsPath)
glog.V(4).Infof("Using podns path %v", podNs)
rt := &libcni.RuntimeConf{
ContainerID: podSandboxID.ID,
NetNS: podNetnsPath,
IfName: network.DefaultInterfaceName,
Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}
// port mappings are a cni capability-based args, rather than parameters
// to a specific plugin
portMappings, err := plugin.host.GetPodPortMappings(podSandboxID.ID)
if err != nil {
return nil, fmt.Errorf("could not retrieve port mappings: %v", err)
}
portMappingsParam := make([]cniPortMapping, 0, len(portMappings))
for _, p := range portMappings {
if p.HostPort <= 0 {
continue
}
portMappingsParam = append(portMappingsParam, cniPortMapping{
HostPort: p.HostPort,
ContainerPort: p.ContainerPort,
Protocol: strings.ToLower(string(p.Protocol)),
HostIP: p.HostIP,
})
}
rt.CapabilityArgs = map[string]interface{}{
"portMappings": portMappingsParam,
}
return rt, nil
}
接着執行AddNetworkList函數
// AddNetworkList executes a sequence of plugins with the ADD command
func (c *CNIConfig) AddNetworkList(list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) {
var prevResult types.Result
for _, net := range list.Plugins {
pluginPath, err := invoke.FindInPath(net.Network.Type, c.Path)
if err != nil {
return nil, err
}
newConf, err := buildOneConfig(list, net, prevResult, rt)
if err != nil {
return nil, err
}
prevResult, err = invoke.ExecPluginWithResult(pluginPath, newConf.Bytes, c.args("ADD", rt))
if err != nil {
return nil, err
}
}
return prevResult, nil
}
該函數會遍歷plugin,根據cni的type在binDir中找到同名插件,返回該插件的全路徑。最後執行ExecPluginWithResult函數,它將調用cni的二進制文件並傳入newConf參數以及RuntimeConf和一個ADD參數,其中ADD代表給容器添加網絡。
分析到這,kubelet的網絡配置已經完成了,我們最終會看到kubelet在生成新pod的時候會先生成一個sandbox容器,kubelet會根據pod的yaml信息和kubelet的cni參數配置生成一個cni runtime配置,最後調用cni插件完成docker容器的網絡配置(關於cni插件調用流程在下一節分析)。