kubelet sandbox創建與cni網絡配置流程 (一)

本文基於k8s release 1.9分析kubelet sandbox創建與cni網絡配置流程。
首先談下docker的四種網絡模式,它們分別爲:

  • bridge模式:使–net =bridge指定,默認設置;bridge模式是Docker默認的網絡設置,此模式會爲每一個容器分配Network Namespace、設置IP等,並將並將一個主機上的Docker容器連接到一個虛擬網橋上。
  • host模式:使–net =host指定,如果啓動容器的時候使用host模式,那麼這個容器將不會獲得一個獨立的Network Namespace,而是和宿主機共用一個Network Namespace。容器將不會虛擬出自己的網卡,配置自己的IP等,而是使用宿主機的IP和端口。
  • none模式:使–net =none指定,在none模式下,Docker容器擁有自己的Network Namespace,但是,並不爲Docker容器進行任何網絡配置。也就是說,這個Docker容器沒有網卡、IP、路由等信息。需要我們自己爲Docker容器添加網卡、配置IP等。
  • container模式:使用–net =container指定,這個模式指定新創建的容器和已經存在的一個容器共享一個Network Namespace,而不是和宿主機共享。新創建的容器不會創建自己的網卡,配置自己的IP,而是和一個指定的容器共享IP、端口範圍等。
    dns

舉個栗子,在我的集羣中,dns的pod有3個業務容器,分別爲dns、dnsmasq和sidecar,pause爲網絡容器,查找出這些容器id,並通過容器id查看每個容器的NetworkMode,發現除了pause容器的NetworkMode爲node,其餘容器均爲container:7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29,由docker容器的網絡模式,我們得知dns、dnsmasq和sidecar使用的是pause容器的網絡空間。而pause容器的網絡模式爲none是因爲在該k8s集羣中使用的cni網絡配置,具體的容器網卡、IP、路由等信息是由calico(集羣中使用的cni爲calico)配置。

我們通過kubelet SyncPod函數分析sandbox的創建流程:

// SyncPod syncs the running pod into the desired pod by executing following steps:
//
//  1. Compute sandbox and container changes.
//  2. Kill pod sandbox if necessary.
//  3. Kill any containers that should not be running.
//  4. Create sandbox if necessary.
//  5. Create init containers.
//  6. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
    // Step 1: Compute sandbox and container changes.
    podContainerChanges := m.computePodActions(pod, podStatus)
    glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
    if podContainerChanges.CreateSandbox {
        ref, err := ref.GetReference(legacyscheme.Scheme, pod)
        if err != nil {
            glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
        }
        if podContainerChanges.SandboxID != "" {
            m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
        } else {
            glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
        }
    }

    // Step 2: Kill the pod if the sandbox has changed.
    if podContainerChanges.KillPod {
        if !podContainerChanges.CreateSandbox {
            glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
        } else {
            glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
        }

        killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
        result.AddPodSyncResult(killResult)
        if killResult.Error() != nil {
            glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
            return
        }

        if podContainerChanges.CreateSandbox {
            m.purgeInitContainers(pod, podStatus)
        }
    } else {
        // Step 3: kill any running containers in this pod which are not to keep.
        for containerID, containerInfo := range podContainerChanges.ContainersToKill {
            glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
            killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
            result.AddSyncResult(killContainerResult)
            if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
                killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
                glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
                return
            }
        }
    }

    // Keep terminated init containers fairly aggressively controlled
    // This is an optmization because container removals are typically handled
    // by container garbage collector.
    m.pruneInitContainersBeforeStart(pod, podStatus)

    // We pass the value of the podIP down to generatePodSandboxConfig and
    // generateContainerConfig, which in turn passes it to various other
    // functions, in order to facilitate functionality that requires this
    // value (hosts file and downward API) and avoid races determining
    // the pod IP in cases where a container requires restart but the
    // podIP isn't in the status manager yet.
    //
    // We default to the IP in the passed-in pod status, and overwrite it if the
    // sandbox needs to be (re)started.
    podIP := ""
    if podStatus != nil {
        podIP = podStatus.IP
    }

    // Step 4: Create a sandbox for the pod if necessary.
    podSandboxID := podContainerChanges.SandboxID
    if podContainerChanges.CreateSandbox {
        var msg string
        var err error

        glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
        createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
        result.AddSyncResult(createSandboxResult)
        podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
        if err != nil {
            createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
            glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
            ref, err := ref.GetReference(legacyscheme.Scheme, pod)
            if err != nil {
                glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
            }
            m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
            return
        }
        glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))

        podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
        if err != nil {
            ref, err := ref.GetReference(legacyscheme.Scheme, pod)
            if err != nil {
                glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
            }
            m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
            glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
            result.Fail(err)
            return
        }

        // If we ever allow updating a pod from non-host-network to
        // host-network, we may use a stale IP.
        if !kubecontainer.IsHostNetworkPod(pod) {
            // Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
            podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
            glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
        }
    }

    // Get podSandboxConfig for containers to start.
    configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
    result.AddSyncResult(configPodSandboxResult)
    podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
    if err != nil {
        message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
        return
    }

    // Step 5: start the init container.
    if container := podContainerChanges.NextInitContainerToStart; container != nil {
        // Start the next init container.
        startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
        result.AddSyncResult(startContainerResult)
        isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
        if isInBackOff {
            startContainerResult.Fail(err, msg)
            glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
            return
        }

        glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
        if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
            startContainerResult.Fail(err, msg)
            utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
            return
        }

        // Successfully started the container; clear the entry in the failure
        glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
    }

    // Step 6: start containers in podContainerChanges.ContainersToStart.
    for _, idx := range podContainerChanges.ContainersToStart {
        container := &pod.Spec.Containers[idx]
        startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
        result.AddSyncResult(startContainerResult)

        isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
        if isInBackOff {
            startContainerResult.Fail(err, msg)
            glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
            continue
        }

        glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
        if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
            startContainerResult.Fail(err, msg)
            // known errors that are logged in other places are logged at higher levels here to avoid
            // repetitive log spam
            switch {
            case err == images.ErrImagePullBackOff:
                glog.V(3).Infof("container start failed: %v: %s", err, msg)
            default:
                utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
            }
            continue
        }
    }

    return
}

如果是新創建的pod或者是pod的spec參數已經發生改變,則需要重新生成sandbox,這裏主要看createPodSandbox函數

// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
    // config中包含的信息主要有,pod name、namespace、UID、labels、annotation、dns、
    // container portMappings以及根據annotation中的security.alpha.kubernetes.io/sysctls和
    // security.alpha.kubernetes.io/unsafe-sysctls配置linuxConfig等。
    podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
    if err != nil {
        message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        return "", message, err
    }

    // Create pod logs directory
    err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
    if err != nil {
        message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err)
        glog.Errorf(message)
        return "", message, err
    }

    podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig)
    if err != nil {
        message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        return "", message, err
    }

    return podSandBoxID, "", nil
}

該函數首先調用generatePodSandboxConfig來生成sandbox的配置,然後調用MkdirAll方法來創建pod的日誌目錄,最後調用RunPodSandbox來完成pause創建工作,RunPodSandbox函數如下

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
    // Step 1: Pull the image for the sandbox.
    image := defaultSandboxImage
    podSandboxImage := ds.podSandboxImage
    if len(podSandboxImage) != 0 {
        image = podSandboxImage
    }

    // NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
    // see: http://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-repository
    // Only pull sandbox image when it's not present - v1.PullIfNotPresent.
    if err := ensureSandboxImageExists(ds.client, image); err != nil {
        return "", err
    }

    // Step 2: Create the sandbox container.
    createConfig, err := ds.makeSandboxDockerConfig(config, image)
    if err != nil {
        return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
    }
    createResp, err := ds.client.CreateContainer(*createConfig)
    if err != nil {
        createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
    }

    if err != nil || createResp == nil {
        return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
    }

    ds.setNetworkReady(createResp.ID, false)
    defer func(e *error) {
        // Set networking ready depending on the error return of
        // the parent function
        if *e == nil {
            ds.setNetworkReady(createResp.ID, true)
        }
    }(&err)

    // Step 3: Create Sandbox Checkpoint.
    if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
        return createResp.ID, err
    }

    // Step 4: Start the sandbox container.
    // Assume kubelet's garbage collector would remove the sandbox later, if
    // startContainer failed.
    err = ds.client.StartContainer(createResp.ID)
    if err != nil {
        return createResp.ID, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
    }

    // Rewrite resolv.conf file generated by docker.
    // NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
    // not only for pods with host network: the resolver conf will be overwritten
    // after sandbox creation to override docker's behaviour. This resolv.conf
    // file is shared by all containers of the same pod, and needs to be modified
    // only once per pod.
    if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
        containerInfo, err := ds.client.InspectContainer(createResp.ID)
        if err != nil {
            return createResp.ID, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
        }

        if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
            return createResp.ID, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
        }
    }

    // Do not invoke network plugins if in hostNetwork mode.
    if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
        return createResp.ID, nil
    }

    // Step 5: Setup networking for the sandbox.
    // All pod networking is setup by a CNI plugin discovered at startup time.
    // This plugin assigns the pod ip, sets up routes inside the sandbox,
    // creates interfaces etc. In theory, its jurisdiction ends with pod
    // sandbox networking, but it might insert iptables rules or open ports
    // on the host as well, to satisfy parts of the pod spec that aren't
    // recognized by the CNI standard yet.
    cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
    err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
    if err != nil {
        // TODO(random-liu): Do we need to teardown network here?
        if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
            glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
        }
    }
    return createResp.ID, err
}

RunPodSandbox函數的流程還是比較清晰的,還記得我們在kubelet啓動參數裏面配置的那個pause容器參數不,這裏ensureSandboxImageExists要確保pause鏡像存在節點上,若不存在則先去下載。makeSandboxDockerConfig將PodSandboxConfig轉換成docker的ContainerCreateConfig,然後再根據這個配置生成pause容器,最後在啓動pause容器。容器啓動後再配置dns,這裏dns的配置很有意思,通過docker inspecft查看容器信息,然後再重寫容器位於宿主機的resolv文件

[root@localhost ~]# docker inspect 7bae079c4926 -f={{.ResolvConfPath}}
/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf

再看看rewriteResolvFile函數

// rewriteResolvFile rewrites resolv.conf file generated by docker.
func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
    if len(resolvFilePath) == 0 {
        glog.Errorf("ResolvConfPath is empty.")
        return nil
    }

    if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
        return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
    }

    var resolvFileContent []string
    for _, srv := range dns {
        resolvFileContent = append(resolvFileContent, "nameserver "+srv)
    }

    if len(dnsSearch) > 0 {
        resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
    }

    if len(dnsOptions) > 0 {
        resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
    }

    if len(resolvFileContent) > 0 {
        resolvFileContentStr := strings.Join(resolvFileContent, "\n")
        resolvFileContentStr += "\n"

        glog.V(4).Infof("Will attempt to re-write config file %s with: \n%s", resolvFilePath, resolvFileContent)
        if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
            glog.Errorf("resolv.conf could not be updated: %v", err)
            return err
        }
    }

    return nil
}

整個過程看出來了嗎,其實就是在重寫宿主機上的/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf文件,然後你登錄容器,cat resolve.conf,最後就是rewriteResolvFile函數的結果

nameserver 10.96.0.10
search default.svc.cluster.local svc.cluster.local cluster.local
options ndots:5

到這一步,如果你的pod的網絡模式是host,則不需要調用cni插件完成pod的網絡配置,否則將調用SetUpPod函數完成pod的puse容器網絡創建。

func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations map[string]string) error {
    defer recordOperation("set_up_pod", time.Now())
    fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
    pm.podLock(fullPodName).Lock()
    defer pm.podUnlock(fullPodName)

    glog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
    if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations); err != nil {
        return fmt.Errorf("NetworkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
    }

    return nil
}

調用plugin的SetUpPod方法,這裏plugin是一個interface, 具體使用哪個plugin是由kubelet的啓動參數–network-plugin決定的。

func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
    if err := plugin.checkInitialized(); err != nil {
        return err
    }
    netnsPath, err := plugin.host.GetNetNS(id.ID)
    if err != nil {
        return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
    }

    // Windows doesn't have loNetwork. It comes only with Linux
    if plugin.loNetwork != nil {
        if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath); err != nil {
            glog.Errorf("Error while adding to cni lo network: %s", err)
            return err
        }
    }

    _, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
    if err != nil {
        glog.Errorf("Error while adding to cni network: %s", err)
        return err
    }

    return err
}

GetNetNS獲取容器的netnamespace所在路徑,在非windows系統上,還會調用ddToNetwork來配置loopback設備的網絡。最後調用addToNetwork來配置pause容器的eth0接口的網絡。查看plugin的getDefaultNetwork函數

func (plugin *cniNetworkPlugin) getDefaultNetwork() *cniNetwork {
    plugin.RLock()
    defer plugin.RUnlock()
    return plugin.defaultNetwork
}

它實際調用的是getDefaultCNINetwork函數

func getDefaultCNINetwork(pluginDir, binDir, vendorCNIDirPrefix string) (*cniNetwork, error) {
    if pluginDir == "" {
        pluginDir = DefaultNetDir
    }
    files, err := libcni.ConfFiles(pluginDir, []string{".conf", ".conflist", ".json"})
    switch {
    case err != nil:
        return nil, err
    case len(files) == 0:
        return nil, fmt.Errorf("No networks found in %s", pluginDir)
    }

    sort.Strings(files)
    for _, confFile := range files {
        var confList *libcni.NetworkConfigList
        if strings.HasSuffix(confFile, ".conflist") {
            confList, err = libcni.ConfListFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config list file %s: %v", confFile, err)
                continue
            }
        } else {
            conf, err := libcni.ConfFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config file %s: %v", confFile, err)
                continue
            }
            // Ensure the config has a "type" so we know what plugin to run.
            // Also catches the case where somebody put a conflist into a conf file.
            if conf.Network.Type == "" {
                glog.Warningf("Error loading CNI config file %s: no 'type'; perhaps this is a .conflist?", confFile)
                continue
            }

            confList, err = libcni.ConfListFromConf(conf)
            if err != nil {
                glog.Warningf("Error converting CNI config file %s to list: %v", confFile, err)
                continue
            }
        }
        if len(confList.Plugins) == 0 {
            glog.Warningf("CNI config list %s has no networks, skipping", confFile)
            continue
        }
        confType := confList.Plugins[0].Network.Type

        // Search for vendor-specific plugins as well as default plugins in the CNI codebase.
        vendorDir := vendorCNIDir(vendorCNIDirPrefix, confType)
        cninet := &libcni.CNIConfig{
            Path: []string{vendorDir, binDir},
        }
        network := &cniNetwork{name: confList.Name, NetworkConfig: confList, CNIConfig: cninet}
        return network, nil
    }
    return nil, fmt.Errorf("No valid networks found in %s", pluginDir)
}

getDefaultCNINetwork會根據kubelet的cni配置參數尋找cni的二進制文件和配置目錄,如果在kubelet沒配置這些參數則默認的netDir爲/etc/cni/net.d,默認的cniDir爲/opt/cni/bin。改函數查找NetDIr目錄下後綴爲conf、conflist和json的文件,根據配置文件最後返回cniNetwork對象,它包含了cni插件名稱和配置信息等。
繼續查看plugin的addToNetwork函數

func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (cnitypes.Result, error) {
    rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath)
    if err != nil {
        glog.Errorf("Error adding network when building cni runtime conf: %v", err)
        return nil, err
    }

    netConf, cniNet := network.NetworkConfig, network.CNIConfig
    glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
    res, err := cniNet.AddNetworkList(netConf, rt)
    if err != nil {
        glog.Errorf("Error adding network: %v", err)
        return nil, err
    }

    return res, nil
}

buildCNIRuntimeConf根據getDefaultCNINetwork函數返回的cniNetwork對象和pause容器id以及netnamespace和pod的名稱、命名空間生成cni的運行時配置。這裏容器的網卡名稱爲eth0。
buildCNIRuntimeConf函數如下

func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (*libcni.RuntimeConf, error) {
    glog.V(4).Infof("Got netns path %v", podNetnsPath)
    glog.V(4).Infof("Using podns path %v", podNs)

    rt := &libcni.RuntimeConf{
        ContainerID: podSandboxID.ID,
        NetNS:       podNetnsPath,
        IfName:      network.DefaultInterfaceName,
        Args: [][2]string{
            {"IgnoreUnknown", "1"},
            {"K8S_POD_NAMESPACE", podNs},
            {"K8S_POD_NAME", podName},
            {"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
        },
    }

    // port mappings are a cni capability-based args, rather than parameters
    // to a specific plugin
    portMappings, err := plugin.host.GetPodPortMappings(podSandboxID.ID)
    if err != nil {
        return nil, fmt.Errorf("could not retrieve port mappings: %v", err)
    }
    portMappingsParam := make([]cniPortMapping, 0, len(portMappings))
    for _, p := range portMappings {
        if p.HostPort <= 0 {
            continue
        }
        portMappingsParam = append(portMappingsParam, cniPortMapping{
            HostPort:      p.HostPort,
            ContainerPort: p.ContainerPort,
            Protocol:      strings.ToLower(string(p.Protocol)),
            HostIP:        p.HostIP,
        })
    }
    rt.CapabilityArgs = map[string]interface{}{
        "portMappings": portMappingsParam,
    }

    return rt, nil
}

接着執行AddNetworkList函數

// AddNetworkList executes a sequence of plugins with the ADD command
func (c *CNIConfig) AddNetworkList(list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) {
    var prevResult types.Result
    for _, net := range list.Plugins {
        pluginPath, err := invoke.FindInPath(net.Network.Type, c.Path)
        if err != nil {
            return nil, err
        }

        newConf, err := buildOneConfig(list, net, prevResult, rt)
        if err != nil {
            return nil, err
        }

        prevResult, err = invoke.ExecPluginWithResult(pluginPath, newConf.Bytes, c.args("ADD", rt))
        if err != nil {
            return nil, err
        }
    }

    return prevResult, nil
}

該函數會遍歷plugin,根據cni的type在binDir中找到同名插件,返回該插件的全路徑。最後執行ExecPluginWithResult函數,它將調用cni的二進制文件並傳入newConf參數以及RuntimeConf和一個ADD參數,其中ADD代表給容器添加網絡。
分析到這,kubelet的網絡配置已經完成了,我們最終會看到kubelet在生成新pod的時候會先生成一個sandbox容器,kubelet會根據pod的yaml信息和kubelet的cni參數配置生成一個cni runtime配置,最後調用cni插件完成docker容器的網絡配置(關於cni插件調用流程在下一節分析)。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章