criService 實現了接口 runtime.RuntimeServiceServer

1. RunPodSandbox 函數

路徑 pkg/server/sandbox_run.go，創建以及啓動 sandbox，確認成功是 sandbox 狀態爲 ready

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
	config := r.GetConfig()
	log.G(ctx).Debugf("Sandbox config %+v", config)

1.1 生成 ID，生成 name，註冊 name <--> key 映射關係，防治並行創建

// Generate unique id and name for the sandbox and reserve the name.
id := util.GenerateID()
metadata := config.GetMetadata()
if metadata == nil {
	return nil, errors.New("sandbox config must include metadata")
}
name := makeSandboxName(metadata)
log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name)
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
// same sandbox.
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
	return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name)
}

1.2 實例化 Sandbox，初始狀態爲 unknown

// Create initial internal sandbox object.
sandbox := sandboxstore.NewSandbox(
	sandboxstore.Metadata{
		ID:             id,
		Name:           name,
		Config:         config,
		RuntimeHandler: r.GetRuntimeHandler(),
	},
	sandboxstore.Status{
		State: sandboxstore.StateUnknown,
	},
)

1.3 確保有鏡像，如果沒有鏡像則 pull 鏡像

// Ensure sandbox container image snapshot.
image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config)
if err != nil {
	return nil, errors.Wrapf(err, "failed to get sandbox image %q", c.config.SandboxImage)
}
containerdImage, err := c.toContainerdImage(ctx, *image)
if err != nil {
	return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
}

1.4 獲取 sandbox runtime

註解 io.kubernetes.cri.untrusted-workload = true，設置這個 untrusted 返回 untrusted runtime，否則返回默認 runtime io.containerd.runc.v1

[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "overlayfs"
default_runtime_name = "runc"
no_pivot = false
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
runtime_type = ""
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
runtime_type = ""
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v1"
runtime_engine = ""
runtime_root = ""
privileged_without_host_devices = false

// getSandboxRuntime returns the runtime configuration for sandbox.
// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned,
// or else default runtime will be returned.
func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) {
	if untrustedWorkload(config) {
		// If the untrusted annotation is provided, runtimeHandler MUST be empty.
		if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted {
			return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
		}

		//  If the untrusted workload is requesting access to the host/node, this request will fail.
		//
		//  Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
		// runtime may support this.  For example, in a virtual-machine isolated runtime, privileged
		// is a supported option, granting the workload to access the entire guest VM instead of host.
		// TODO(windows): Deprecate this so that we don't need to handle it for windows.
		if hostAccessingSandbox(config) {
			return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")
		}

		runtimeHandler = criconfig.RuntimeUntrusted
	}

	if runtimeHandler == "" {
		runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName
	}

	handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler]
	if !ok {
		return criconfig.Runtime{}, errors.Errorf("no runtime for %q is configured", runtimeHandler)
	}
	return handler, nil
}

1.5 需要爲 pod 設置網絡

如果不是 host 網絡模式，需要創建 namespace

NewNetNS 創建網絡 namespace，在目錄 /var/run/netns/cni-%x-%x-%x-%x-%x

if podNetwork {
	// If it is not in host network namespace then create a namespace and set the sandbox
	// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
	// namespaces. If the pod is in host network namespace then both are empty and should not
	// be used.
	sandbox.NetNS, err = netns.NewNetNS()
	if err != nil {
		return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
	}
	sandbox.NetNSPath = sandbox.NetNS.GetPath()

2. setupPodNetwork 爲 sandbox 創建網絡

整理傳給 CNI 插件的配置，包括 sandbox ID，網絡 namespace，以及基本配置，如果包括 bandwidth，dns

// setupPodNetwork setups up the network for a pod
func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error {
	var (
		id     = sandbox.ID
		config = sandbox.Config
		path   = sandbox.NetNSPath
	)
	if c.netPlugin == nil {
		return errors.New("cni config not initialized")
	}

	opts, err := cniNamespaceOpts(id, config)
	if err != nil {
		return errors.Wrap(err, "get cni namespace options")
	}

2..1 netPlugin.Setup 最終調用 AddNetworkList CNI 插件接口爲 sandbox 配置網絡

最終調用 plugin 二進制爲 sandbox 配置網絡

這裏輕描淡寫，知道個過程過，假設配置配置網絡成功，接着看看做了哪些工作

result, err := c.netPlugin.Setup(ctx, id, path, opts...)
if err != nil {
	return err
}
logDebugCNIResult(ctx, id, result)
// Check if the default interface has IP config
if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 {
	sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(configs.IPConfigs)
	sandbox.CNIResult = result
	return nil
}

3. 生成 runtime spec 配置

可以使用 crictl pods，crictl inspectp $id 查看配置

func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
	imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
	// Creates a spec Generator with the default spec.
	// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
	specOpts := []oci.SpecOpts{
		customopts.WithoutRunMount,
		customopts.WithoutDefaultSecuritySettings,
		customopts.WithRelativeRoot(relativeRootfsPath),
		oci.WithEnv(imageConfig.Env),
		oci.WithRootFSReadonly(),
		oci.WithHostname(config.GetHostname()),
	}
	if imageConfig.WorkingDir != "" {
		specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
	}

3.1 label 的類型爲 sandbox

// Generate spec options that will be applied to the spec later.
specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
	return nil, errors.Wrap(err, "failed to generate sanbdox container spec options")
}

sandboxLabels := buildLabels(config.Labels, containerKindSandbox)

4. 存儲 sandbox 信息，創建 root 工作目錄

	container, err := c.client.NewContainer(ctx, id, opts...)
	if err != nil {
		return nil, errors.Wrap(err, "failed to create containerd container")
	}


	// Create sandbox container root directories.
	sandboxRootDir := c.getSandboxRootDir(id)
	if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
		return nil, errors.Wrapf(err, "failed to create sandbox root directory %q",
			sandboxRootDir)
	}

4.1 setupSandboxFiles 主要創建 hostname resolv.conf hosts 等文件

// Setup files required for the sandbox.
if err = c.setupSandboxFiles(id, config); err != nil {
	return nil, errors.Wrapf(err, "failed to setup sandbox files")
}

5. 創建 sandbox 任務

這個其實最終是發送 task 請求，分別爲 CreateTaskRequest，StartRequest，創建以及啓動任務

taskOpts := c.taskOpts(ociRuntime.Type)
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
	return nil, errors.Wrap(err, "failed to create containerd task")
}


// wait is a long running background request, no timeout needed.
exitCh, err := task.Wait(ctrdutil.NamespacedContext())
if err != nil {
	return nil, errors.Wrap(err, "failed to wait for sandbox container task")
}

if err := task.Start(ctx); err != nil {
	return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)
}

5.1 比如使用默認 tasks-service io.containerd.service.v1

func (l *local) Create(ctx context.Context, r *api.CreateTaskRequest, _ ...grpc.CallOption) (*api.CreateTaskResponse, error) {
	container, err := l.getContainer(ctx, r.ContainerID)
	if err != nil {
		return nil, errdefs.ToGRPC(err)
	}
	checkpointPath, err := getRestorePath(container.Runtime.Name, r.Options)
	if err != nil {
		return nil, err
	}

5.2 比如 io.containerd.runc.v1

實現路徑爲 contaienrd/runtime/v1/runtime.go

// Create a new task
func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
	namespace, err := namespaces.NamespaceRequired(ctx)
	if err != nil {
		return nil, err
	}

	if err := identifiers.Validate(id); err != nil {
		return nil, errors.Wrapf(err, "invalid task id")
	}

	ropts, err := r.getRuncOptions(ctx, id)
	if err != nil {
		return nil, err
	}

啓動 shim 進程

/usr/bin/containerd-shim-runc-v1 -namespace k8s.io -id d84185af26fcc146b4787ed08543c49d327bb97171ed6b669618f9793a8545fc -address /run/containerd/containerd.sock

shimopt := ShimLocal(r.config, r.events)
if !r.config.NoShim {
	var cgroup string
	if opts.TaskOptions != nil {
		v, err := typeurl.UnmarshalAny(opts.TaskOptions)
		if err != nil {
			return nil, err
		}
		cgroup = v.(*runctypes.CreateOptions).ShimCgroup
	}
	exitHandler := func() {
		log.G(ctx).WithField("id", id).Info("shim reaped")

		if _, err := r.tasks.Get(ctx, id); err != nil {
			// Task was never started or was already successfully deleted
			return
		}

		if err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id); err != nil {
			log.G(ctx).WithError(err).WithFields(logrus.Fields{
				"id":        id,
				"namespace": namespace,
			}).Warn("failed to clean up after killed shim")
		}
	}
	shimopt = ShimRemote(r.config, r.address, cgroup, exitHandler)
}

與 shim 建立GRPC 連接，發送 CreateTaskRequest

sopts := &shim.CreateTaskRequest{
	ID:         id,
	Bundle:     bundle.path,
	Runtime:    rt,
	Stdin:      opts.IO.Stdin,
	Stdout:     opts.IO.Stdout,
	Stderr:     opts.IO.Stderr,
	Terminal:   opts.IO.Terminal,
	Checkpoint: opts.Checkpoint,
	Options:    opts.TaskOptions,
}
for _, m := range opts.Rootfs {
	sopts.Rootfs = append(sopts.Rootfs, &types.Mount{
		Type:    m.Type,
		Source:  m.Source,
		Options: m.Options,
	})
}
cr, err := s.Create(ctx, sopts)
if err != nil {
	return nil, errdefs.FromGRPC(err)
}

startTaskRequest 一樣的流程

6. 更新 sandbox 狀態爲 ready

if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
	// Set the pod sandbox as ready after successfully start sandbox container.
	status.Pid = task.Pid()
	status.State = sandboxstore.StateReady
	status.CreatedAt = info.CreatedAt
	return status, nil
}); err != nil {
	return nil, errors.Wrap(err, "failed to update sandbox status")
}

總結：

RunPodSandbox 獲取配置，生成 ID，name 註冊 name <--> 映射關係，防止重複併發創建

確保 image 本地節點存在，不存在冊 pull image

獲取 runtime，根據 pod 註解，以及配置文件，如果 untrusted 則返回該 runtime，否則返回默認 runtime

爲 sandbox 創建網絡，與 docker-shim 不一樣的是這個先創建網絡

生成 spec 配置

發送 GRPC 創建以及啓動請求，成功將 sandbox 狀態改爲 ready

【containerd 源碼分析】containerd cri PodRunSandbox 源碼分析之二

1. RunPodSandbox 函數

1.2 實例化 Sandbox，初始狀態爲 unknown

1.3 確保有鏡像，如果沒有鏡像則 pull 鏡像

1.4 獲取 sandbox runtime

1.5 需要爲 pod 設置網絡

2. setupPodNetwork 爲 sandbox 創建網絡

2..1 netPlugin.Setup 最終調用 AddNetworkList CNI 插件接口爲 sandbox 配置網絡

3. 生成 runtime spec 配置

3.1 label 的類型爲 sandbox

4. 存儲 sandbox 信息，創建 root 工作目錄

4.1 setupSandboxFiles 主要創建 hostname resolv.conf hosts 等文件

5. 創建 sandbox 任務

5.1 比如使用默認 tasks-service io.containerd.service.v1

5.2 比如 io.containerd.runc.v1

與 shim 建立GRPC 連接，發送 CreateTaskRequest

6. 更新 sandbox 狀態爲 ready

總結：

Kafka存儲機制

aws語音呼叫調用，告警電話

【轉】[C#] WebAPI 防止併發調用二（冥等性）

HTTP URL 詳解

【kubernetes/k8s源碼分析】coredns 源碼分析之四 cache 插件

【kubernetes/k8s源碼分析】kata container create 創建源碼分析

【kubeedge概念】kubeedge架構與部署安裝

【kubernetes/k8s源碼分析】kata container agent create container 源碼分析

【containerd 源碼分析】containerd cri 啓動註冊流程源碼分析

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結