【kubernetes/k8s源碼分析】kata container create 創建源碼分析

github: https://github.com/kata-containers/runtime

 

USAGE:

   kata-runtime create [command options] <container-id>

   <container-id> is your name for the instance of the container that you are starting. The name you provide for the container instance must be unique on your host.

DESCRIPTION:

   The create command creates an instance of a container for a bundle. The bundle is a directory with a specification file named "config.json" and a root filesystem.
   The specification file includes an args parameter. The args parameter is used to specify command(s) that get run when the container is started. To change the command(s) that get executed on start, edit the args parameter of the spec.

 

    參考: https://github.com/kata-containers/documentation/blob/master/design/architecture.md

 

 

1. create 函數

    路徑 runtime/cli/create.go

func create(ctx context.Context, containerID, bundlePath, console, pidFilePath string, detach, systemdCgroup bool,
	runtimeConfig oci.RuntimeConfig) error {
	var err error

	span, ctx := katautils.Trace(ctx, "create")
	defer span.Finish()

	kataLog = kataLog.WithField("container", containerID)
	setExternalLoggers(ctx, kataLog)
	span.SetTag("container", containerID)

    1.1 bundle 路徑包括啓動容器的配置

    /var/run/containers/storage/overlay-containers/8058e8535b46c8fee88bc49360149fb7fa0790da2ecafa277706ac5f3199768b/userdata 目錄下包含

    attach  config.json  ctl  hostname  pidfile  resolv.conf  shm    

if bundlePath == "" {
	cwd, err := os.Getwd()
	if err != nil {
		return err
	}

	kataLog.WithField("directory", cwd).Debug("Defaulting bundle path to current directory")

	bundlePath = cwd
}

    1.2 類型分爲 sandbox 和 pod

var process vc.Process
switch containerType {
case vc.PodSandbox:
	_, process, err = katautils.CreateSandbox(ctx, vci, ociSpec, runtimeConfig, rootFs, containerID, bundlePath, console, disableOutput, systemdCgroup, false)
	if err != nil {
		return err
	}
case vc.PodContainer:
	process, err = katautils.CreateContainer(ctx, vci, nil, ociSpec, rootFs, containerID, bundlePath, console, disableOutput, false)
	if err != nil {
		return err
	}
}

    1.2.1 CreateSandbox 函數

    主要是創建 sandbox 容器,SandboxConfig 函數主要是把 OCI compatible (CompatOCISpec)運行時配置文件轉爲 virtcontainers sandbox (ContainerConfig)的配置結構

// CreateSandbox create a sandbox container
func CreateSandbox(ctx context.Context, vci vc.VC, ociSpec oci.CompatOCISpec, runtimeConfig oci.RuntimeConfig, rootFs vc.RootFs,
	containerID, bundlePath, console string, disableOutput, systemdCgroup, builtIn bool) (_ vc.VCSandbox, _ vc.Process, err error) {
	span, ctx := Trace(ctx, "createSandbox")
	defer span.Finish()

	sandboxConfig, err := oci.SandboxConfig(ociSpec, runtimeConfig, bundlePath, containerID, console, disableOutput, systemdCgroup)
	if err != nil {
		return nil, vc.Process{}, err
	}

	if builtIn {
		sandboxConfig.Stateful = true
	}

     1.2.1.1 SetupNetworkNamespace 函數

     創建網絡 namespace,如果不存在則創建,路徑爲 /var/run/netns/cni-xxxx-xxxx-xxxx-xxxx

// Important to create the network namespace before the sandbox is
// created, because it is not responsible for the creation of the
// netns if it does not exist.
if err := SetupNetworkNamespace(&sandboxConfig.NetworkConfig); err != nil {
	return nil, vc.Process{}, err
}

     1.2.1.2 進入 net namespace 執行

// Run pre-start OCI hooks.
err = EnterNetNS(sandboxConfig.NetworkConfig.NetNSPath, func() error {
	return PreStartHooks(ctx, ociSpec, containerID, bundlePath)
})
if err != nil {
	return nil, vc.Process{}, err
}

 

     開始進入 virtcontainers 目錄中

2. CreateSandbox 函數

      路徑 runtime/virtcontainers/api.go,看看 createSandboxFromConfig 究竟做了哪些工作

// CreateSandbox is the virtcontainers sandbox creation entry point.
// CreateSandbox creates a sandbox and its containers. It does not start them.
func CreateSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (VCSandbox, error) {
	span, ctx := trace(ctx, "CreateSandbox")
	defer span.Finish()

	s, err := createSandboxFromConfig(ctx, sandboxConfig, factory)
	if err == nil {
		s.releaseStatelessSandbox()
	}

	return s, err
}

    2.1 createSandboxFromConfig 函數

  • Create the sandbox
  • Create the sandbox network
  • Start the VM
  • Create Containers
  • completely created now, we can store
func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (_ *Sandbox, err error) {
	span, ctx := trace(ctx, "createSandboxFromConfig")
	defer span.Finish()

	// Create the sandbox.
	s, err := createSandbox(ctx, sandboxConfig, factory)
	if err != nil {
		return nil, err
	}

 

3. Create the sandbox

      根據配置創建 sandbox,容器列表,hypervisor 以及 agent

// createSandbox creates a sandbox from a sandbox description, the containers list, the hypervisor
// and the agent passed through the Config structure.
// It will create and store the sandbox structure, and then ask the hypervisor
// to physically create that sandbox i.e. starts a VM for that sandbox to eventually
// be started.
func createSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) {
	span, ctx := trace(ctx, "createSandbox")
	defer span.Finish()

	if err := createAssets(ctx, &sandboxConfig); err != nil {
		return nil, err
	}

	s, err := newSandbox(ctx, sandboxConfig, factory)
	if err != nil {
		return nil, err
	}

    3.1 newSandbox 函數

     實例化 agent,主要關注 shim(start) 與 proxy (start stop)接口

     實例化 Hypervisor,這個比較簡單

func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factory) (*Sandbox, error) {
	span, ctx := trace(ctx, "newSandbox")
	defer span.Finish()

	if !sandboxConfig.valid() {
		return nil, fmt.Errorf("Invalid sandbox configuration")
	}

	agent := newAgent(sandboxConfig.AgentType)

	hypervisor, err := newHypervisor(sandboxConfig.HypervisorType)
	if err != nil {
		return nil, err
	}

     3.1.1 實例化 Sandbox

     可以看到包含,hypervisor,agent volume,network 等,具體使用在具體分析

s := &Sandbox{
	id:              sandboxConfig.ID,
	factory:         factory,
	hypervisor:      hypervisor,
	agent:           agent,
	config:          &sandboxConfig,
	volumes:         sandboxConfig.Volumes,
	containers:      map[string]*Container{},
	runPath:         store.SandboxRuntimeRootPath(sandboxConfig.ID),
	configPath:      store.SandboxConfigurationRootPath(sandboxConfig.ID),
	state:           types.SandboxState{},
	annotationsLock: &sync.RWMutex{},
	wg:              &sync.WaitGroup{},
	shmSize:         sandboxConfig.ShmSize,
	sharePidNs:      sandboxConfig.SharePidNs,
	stateful:        sandboxConfig.Stateful,
	networkNS:       NetworkNamespace{NetNsPath: sandboxConfig.NetworkConfig.NetNSPath},
	ctx:             ctx,
}

    3.2 hypervisor.createSandbox 函數

     假設實現的爲 qemu,路徑爲 runtime/virtcontainers/qemu.go

     setup  爲 qemu 設置一些啓動參數,比如 imagePath,bridge UUID 等

// createSandbox is the Hypervisor sandbox creation implementation for govmmQemu.
func (q *qemu) createSandbox(ctx context.Context, id string, networkNS NetworkNamespace, hypervisorConfig *HypervisorConfig, store *store.VCStore) error {
	// Save the tracing context
	q.ctx = ctx

	span, _ := q.trace("createSandbox")
	defer span.Finish()

	if err := q.setup(id, hypervisorConfig, store); err != nil {
		return err
	}

    3.2.1 qemu 啓動的配置參數

qemuConfig := govmmQemu.Config{
	Name:        fmt.Sprintf("sandbox-%s", q.id),
	UUID:        q.state.UUID,
	Path:        qemuPath,
	Ctx:         q.qmpMonitorCh.ctx,
	Machine:     machine,
	SMP:         smp,
	Memory:      memory,
	Devices:     devices,
	CPUModel:    cpuModel,
	Kernel:      kernel,
	RTC:         rtc,
	QMPSockets:  qmpSockets,
	Knobs:       knobs,
	Incoming:    incoming,
	VGA:         "none",
	GlobalParam: "kvm-pit.lost_tick_policy=discard",
	Bios:        firmwarePath,
	PidFile:     pidFile,
}

    3.3. agent.createSandbox

     路徑 runtime/virtcontainers/kata_agent.go,主要時前期的一些檢查以及配置參數,比如 imagePath等

func (k *kataAgent) createSandbox(sandbox *Sandbox) error {
	span, _ := k.trace("createSandbox")
	defer span.Finish()

	return k.configure(sandbox.hypervisor, sandbox.id, k.getSharePath(sandbox.id), k.proxyBuiltIn, nil)
}

 

4. Create the sandbox network

func (s *Sandbox) createNetwork() error {
	if s.config.NetworkConfig.DisableNewNetNs ||
		s.config.NetworkConfig.NetNSPath == "" {
		return nil
	}

	span, _ := s.trace("createNetwork")
	defer span.Finish()

	s.networkNS = NetworkNamespace{
		NetNsPath:    s.config.NetworkConfig.NetNSPath,
		NetNsCreated: s.config.NetworkConfig.NetNsCreated,
	}

    4.1 network.Add 函數

     在網絡 namespace 中添加所有需要的網絡接口

// Add adds all needed interfaces inside the network namespace.
func (n *Network) Add(ctx context.Context, config *NetworkConfig, hypervisor hypervisor, hotplug bool) ([]Endpoint, error) {
	span, _ := n.trace(ctx, "add")
	defer span.Finish()

	networkLogger().Debug("Network added")

	return endpoints, nil
}

    4.2 createEndpointsFromScan

      打開網絡 namespace 的文件描述符,netlinkHandle.LinkList 相當於命令 ip link show,列出所有網絡接口

func createEndpointsFromScan(networkNSPath string, config *NetworkConfig) ([]Endpoint, error) {
	var endpoints []Endpoint

	netnsHandle, err := netns.GetFromPath(networkNSPath)
	if err != nil {
		return []Endpoint{}, err
	}
	defer netnsHandle.Close()

	netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
	if err != nil {
		return []Endpoint{}, err
	}
	defer netlinkHandle.Delete()

	linkList, err := netlinkHandle.LinkList()
	if err != nil {
		return []Endpoint{}, err
	}

     4.2.1 createEndpoint 函數 

        根據網絡信息獲得不同類型 endpoint結構,實現了 Endpoint 接口

func createEndpoint(netInfo NetworkInfo, idx int, model NetInterworkingModel) (Endpoint, error) {
	var endpoint Endpoint
	// TODO: This is the incoming interface
	// based on the incoming interface we should create
	// an appropriate EndPoint based on interface type
	// This should be a switch

	// Check if interface is a physical interface. Do not create
	// tap interface/bridge if it is.
	isPhysical, err := isPhysicalIface(netInfo.Iface.Name)
	if err != nil {
		return nil, err
	}

     4.2.1.1 根據不同類型獲得相應的 endpoint 實現了 Endpoint 接口

if socketPath != "" {
	networkLogger().WithField("interface", netInfo.Iface.Name).Info("VhostUser network interface found")
	endpoint, err = createVhostUserEndpoint(netInfo, socketPath)
} else if netInfo.Iface.Type == "macvlan" {
	networkLogger().Infof("macvlan interface found")
	endpoint, err = createBridgedMacvlanNetworkEndpoint(idx, netInfo.Iface.Name, model)
} else if netInfo.Iface.Type == "macvtap" {
	networkLogger().Infof("macvtap interface found")
	endpoint, err = createMacvtapNetworkEndpoint(netInfo)
} else if netInfo.Iface.Type == "tap" {
	networkLogger().Info("tap interface found")
	endpoint, err = createTapNetworkEndpoint(idx, netInfo.Iface.Name)
} else if netInfo.Iface.Type == "veth" {
	endpoint, err = createVethNetworkEndpoint(idx, netInfo.Iface.Name, model)
} else if netInfo.Iface.Type == "ipvlan" {
	endpoint, err = createIPVlanNetworkEndpoint(idx, netInfo.Iface.Name)
} else {
	return nil, fmt.Errorf("Unsupported network interface")
}

     4.2.1.2 比如使用 veth 創建

     VethEndpoint 實現了 Endpoint 接口,createNetworkInterfacePair 隨機生成 mac 地址,實例化 NetworkInterfacePair

func createVethNetworkEndpoint(idx int, ifName string, interworkingModel NetInterworkingModel) (*VethEndpoint, error) {
	if idx < 0 {
		return &VethEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx)
	}

	netPair, err := createNetworkInterfacePair(idx, ifName, interworkingModel)
	if err != nil {
		return nil, err
	}

	endpoint := &VethEndpoint{
		// TODO This is too specific. We may need to create multiple
		// end point types here and then decide how to connect them
		// at the time of hypervisor attach and not here
		NetPair:      netPair,
		EndpointType: VethEndpointType,
	}
	if ifName != "" {
		endpoint.NetPair.VirtIface.Name = ifName
	}

	return endpoint, nil
}

    4.3 VethEndpoint Attach 函數

      比如使用 veth 實現了 Attach 接口,路徑 virtcontainers/veth_endpoint.go,意思就是連接到 hypervisor 網絡接口

// Attach for veth endpoint bridges the network pair and adds the
// tap interface of the network pair to the hypervisor.
func (endpoint *VethEndpoint) Attach(h hypervisor) error {
	if err := xConnectVMNetwork(endpoint, h); err != nil {
		networkLogger().WithError(err).Error("Error bridging virtual endpoint")
		return err
	}

	return h.addDevice(endpoint, netDev)
}

     4.3.1 xConnectVMNetwork 函數

// The endpoint type should dictate how the connection needs to happen.
func xConnectVMNetwork(endpoint Endpoint, h hypervisor) error {
	netPair := endpoint.NetworkPair()

	queues := 0
	caps := h.capabilities()
	if caps.IsMultiQueueSupported() {
		queues = int(h.hypervisorConfig().NumVCPUs)
	}

	disableVhostNet := h.hypervisorConfig().DisableVhostNet

	if netPair.NetInterworkingModel == NetXConnectDefaultModel {
		netPair.NetInterworkingModel = DefaultNetInterworkingModel
	}
	
	logrus.Infof("zzlin xConnectVMNetwork name: %v type: %v", endpoint.Name(), endpoint.Type())

	switch netPair.NetInterworkingModel {
	case NetXConnectBridgedModel:
		return bridgeNetworkPair(endpoint, queues, disableVhostNet)
	case NetXConnectMacVtapModel:
		return tapNetworkPair(endpoint, queues, disableVhostNet)
	case NetXConnectTCFilterModel:
		return setupTCFiltering(endpoint, queues, disableVhostNet)
	case NetXConnectEnlightenedModel:
		return fmt.Errorf("Unsupported networking model")
	default:
		return fmt.Errorf("Invalid internetworking model")
	}
}

    接着上面一個流程講解,比如 network mode 時 macvtap

    4.4 tapNetworkPair 函數

func tapNetworkPair(endpoint Endpoint, queues int, disableVhostNet bool) error {
	netHandle, err := netlink.NewHandle()
	if err != nil {
		return err
	}
	defer netHandle.Delete()

     4.4.1 創建 macvtap link,attach macvtap 接口到容器接口

// Attach the macvtap interface to the underlying container
// interface. Also picks relevant attributes from the parent
tapLink, err := createMacVtap(netHandle, netPair.TAPIface.Name,
	&netlink.Macvtap{
		Macvlan: netlink.Macvlan{
			LinkAttrs: netlink.LinkAttrs{
				TxQLen:      attrs.TxQLen,
				ParentIndex: attrs.Index,
			},
		},
	}, queues)

if err != nil {
	return fmt.Errorf("Could not create TAP interface: %s", err)
}

     調用 addDevice 添加到 qemu 啓動命令行中,待啓動 VM 下節講解

 

5. Start the VM

    啓動 VM,看看如何啓動的

// startVM starts the VM.
func (s *Sandbox) startVM() (err error) {
	span, ctx := s.trace("startVM")
	defer span.Finish()

	s.Logger().Info("Starting VM")

    5.1 network.Run 函數

     如果存在 VM,則調用 assiginSandbox 分配 sandbox

     調用 hypervisor 方法 startSandbox 

if err := s.network.Run(s.networkNS.NetNsPath, func() error {
	if s.factory != nil {
		vm, err := s.factory.GetVM(ctx, VMConfig{
			HypervisorType:   s.config.HypervisorType,
			HypervisorConfig: s.config.HypervisorConfig,
			AgentType:        s.config.AgentType,
			AgentConfig:      s.config.AgentConfig,
			ProxyType:        s.config.ProxyType,
			ProxyConfig:      s.config.ProxyConfig,
		})
		if err != nil {
			return err
		}

		return vm.assignSandbox(s)
	}

	return s.hypervisor.startSandbox(vmStartTimeout)
}); err != nil {
	return err
}

    5.2 qemu startSandbox 函數 start VM 

// startSandbox will start the Sandbox's VM.
func (q *qemu) startSandbox(timeout int) error {
	span, _ := q.trace("startSandbox")
	defer span.Finish()

     5.2.1 創建 vm 目錄,主要在 /run/vc/vm/${container-id}

vmPath := filepath.Join(store.RunVMStoragePath, q.id)
err := os.MkdirAll(vmPath, store.DirMode)
if err != nil {
	return err
}

     5.2.2 LaunchQemu 函數

     發起 qemu 實例,block 直到發起 qemu 進程退出

// The function will block until the launched qemu process exits.  "", nil
// will be returned if the launch succeeds.  Otherwise a string containing
// the contents of stderr + a Go error object will be returned.
func LaunchQemu(config Config, logger QMPLog) (string, error) {
	config.appendName()
	config.appendUUID()
	config.appendMachine()
	config.appendCPUModel()
	config.appendQMPSockets()
	config.appendMemory()
	config.appendDevices()
	config.appendRTC()
	config.appendGlobalParam()
	config.appendVGA()
	config.appendKnobs()
	config.appendKernel()
	config.appendBios()
	config.appendIOThreads()
	config.appendIncoming()
	config.appendPidFile()

	if err := config.appendCPUs(); err != nil {
		return "", err
	}

	ctx := config.Ctx
	if ctx == nil {
		ctx = context.Background()
	}

	return LaunchCustomQemu(ctx, config.Path, config.qemuParams,
		config.fds, nil, logger)
}

     5.2.3 LauchCustomQemu 真正啓動 qemu 進程的命令

/usr/bin/qemu-lite-system-x86_64     

   -name sandbox-aaaa -uuid 75ea5c5e-da5b-43c4-9409-b8b8074017ad

   -machine pc,accel=kvm,kernel_irqchip,nvdimm -cpu host

   -qmp unix:/run/vc/vm/aaaa/qmp.sock,server,nowait

    -m 2048M,slots=10,maxmem=12742M -device pci-bridge,bus=pci.0,id=pci-bridge-0,chassis_nr=1,shpc=on,addr=2,romfile=

    -device virtio-serial-pci,disable-modern=false,id=serial0,romfile= -device virtconsole,chardev=charconsole0,id=console0

    -chardev socket,id=charconsole0,path=/run/vc/vm/aaaa/console.sock,server,nowait

    -device nvdimm,id=nv0,memdev=mem0 -object memory-backend-file,id=mem0,mem-path=/usr/share/kata-containers/kata-containers-image_clearlinux_1.3.1_agent_c7fdd324cda.img,size=536870912

   -device virtio-scsi-pci,id=scsi0,disable-modern=false,romfile= -object rng-random,id=rng0,filename=/dev/urandom -device virtio-rng,rng=rng0,romfile=

   -device virtserialport,chardev=charch0,id=channel0,name=agent.channel.0

   -chardev socket,id=charch0,path=/run/vc/vm/aaaa/kata.sock,server,nowait

   -device virtio-9p-pci,disable-modern=false,fsdev=extra-9p-kataShared,mount_tag=kataShared,romfile= -fsdev local,id=extra-9p-kataShared,path=/run/kata-containers/shared/sandboxes/aaaa,security_model=none -global kvm-pit.lost_tick_policy=discard -vga none -no-user-config -nodefaults -nographic -daemonize

  -kernel /usr/share/kata-containers/vmlinuz-4.14.67.16-4.4.container -append tsc=reliable no_timer_check rcupdate.rcu_expedited=1 i8042.direct=1 i8042.dumbkbd=1 i8042.nopnp=1 i8042.noaux=1 noreplace-smp reboot=k console=hvc0 console=hvc1 iommu=off cryptomgr.notests net.ifnames=0 pci=lastbus=0 root=/dev/pmem0p1 rootflags=dax,data=ordered,errors=remount-ro ro rootfstype=ext4 quiet systemd.show_status=false panic=1 nr_cpus=4 agent.use_vsock=false systemd.unit=kata-containers.target systemd.mask=systemd-networkd.service systemd.mask=systemd-networkd.socket

   -pidfile /run/vc/vm/aaaa/pid -smp 1,cores=1,threads=1,sockets=4,maxcpus=4

func LaunchCustomQemu(ctx context.Context, path string, params []string, fds []*os.File,
	attr *syscall.SysProcAttr, logger QMPLog) (string, error) {
	if logger == nil {
		logger = qmpNullLogger{}
	}

	errStr := ""

	if path == "" {
		path = "qemu-system-x86_64"
	}

    5.3 agent.startSandbox

     功能是在 VM 裏面啓動 sandbox,startProxy 啓動 proxy 服務,proxy 作爲中間代理,負責轉發

     /usr/libexec/kata-containers/kata-proxy -listen-socket unix:///run/vc/sbs/411bf354ffa86c1cf9aa1cd05505d2b3cf8ba0d2bbb53f859084fc0c639a8675/proxy.sock -mux-socket /run/vc/vm/411bf354ffa86c1cf9aa1cd05505d2b3cf8ba0d2bbb53f859084fc0c639a8675/kata.sock -sandbox 411bf354ffa86c1cf9aa1cd05505d2b3cf8ba0d2bbb53f859084fc0c639a8675 -log debug -agent-logs-socket /run/vc/vm/411bf354ffa86c1cf9aa1cd05505d2b3cf8ba0d2bbb53f859084fc0c639a8675/console.sock

func (k *kataAgent) startSandbox(sandbox *Sandbox) error {
	span, _ := k.trace("startSandbox")
	defer span.Finish()

	err := k.startProxy(sandbox)
	if err != nil {
		return err
	}

	defer func() {
		if err != nil {
			k.proxy.stop(k.state.ProxyPid)
		}
	}()

     5.3.1 根據 sandbox 的網絡配置 interface 與 route,更新 interface 發送 GRPC UpdateInterfaceRequest 請求,更新 route 發送 GRPC UpdateRoutesRequest 請求

// Setup network interfaces and routes
//
interfaces, routes, err := generateInterfacesAndRoutes(sandbox.networkNS)
if err != nil {
	return err
}
if err = k.updateInterfaces(interfaces); err != nil {
	return err
}
if _, err = k.updateRoutes(routes); err != nil {
	return err
}

     5.3.2 發送 CreateSandboxRequest 請求創建

req := &grpc.CreateSandboxRequest{
	Hostname:      hostname,
	Storages:      storages,
	SandboxPidns:  sandbox.sharePidNs,
	SandboxId:     sandbox.id,
	GuestHookPath: sandbox.config.HypervisorConfig.GuestHookPath,
}

_, err = k.sendReq(req)
if err != nil {
	return err
}

 

6. create containers

    註冊所有的容器到 proxy,每一個容器啓動一個 shim

// createContainers registers all containers to the proxy, create the
// containers in the guest and starts one shim per container.
func (s *Sandbox) createContainers() error {
	span, _ := s.trace("createContainers")
	defer span.Finish()

	if err := s.updateResources(); err != nil {
		return err
	}

    6.1 newContainer 實例化 Container

     只要是根據 sandbox 的配置實例化 Contaienr 配置

// newContainer creates a Container structure from a sandbox and a container configuration.
func newContainer(sandbox *Sandbox, contConfig ContainerConfig) (*Container, error) {
	span, _ := sandbox.trace("newContainer")
	defer span.Finish()

	if !contConfig.valid() {
		return &Container{}, fmt.Errorf("Invalid container configuration")
	}

	c := &Container{
		id:            contConfig.ID,
		sandboxID:     sandbox.id,
		rootFs:        contConfig.RootFs,
		config:        &contConfig,
		sandbox:       sandbox,
		runPath:       store.ContainerRuntimeRootPath(sandbox.id, contConfig.ID),
		configPath:    store.ContainerConfigurationRootPath(sandbox.id, contConfig.ID),
		containerPath: filepath.Join(sandbox.id, contConfig.ID),
		rootfsSuffix:  "rootfs",
		state:         types.ContainerState{},
		process:       Process{},
		mounts:        contConfig.Mounts,
		ctx:           sandbox.ctx,
	}

     6.1.1 container 的 create 函數

      創建啓動 container,發送到 agent CreateContainerRequest GRPC 請求

// createContainer creates and start a container inside a Sandbox. It has to be
// called only when a new container, not known by the sandbox, has to be created.
func (c *Container) create() (err error) {
	// In case the container creation fails, the following takes care
	// of rolling back all the actions previously performed.
	defer func() {
		if err != nil {
			c.rollbackFailingContainerCreation()
		}
	}()

	if c.checkBlockDeviceSupport() {
		if err = c.hotplugDrive(); err != nil {
			return
		}
	}

     6.1.2 調用 prepareAndStartShim 爲每一個容器啓動一個 shim

     /usr/libexec/kata-containers/kata-shim

-agent unix:///run/vc/sbs/9c14119f6bf0d55c049b2ddceffe87beb45a731ac289e61cc5e069e08e081818/proxy.sock

-container 9c14119f6bf0d55c049b2ddceffe87beb45a731ac289e61cc5e069e08e081818

-exec-id 9c14119f6bf0d55c049b2ddceffe87beb45a731ac289e61cc5e069e08e081818 -terminal -log debug

     kata-shim 啓動命令如下所示:

Usage of kata-shim:
  -agent string
        agent gRPC socket endpoint
  -agent-logs-socket string
        socket to listen on to retrieve agent logs
  -container string
        container id for the shim
  -debug
        enable debug mode
  -exec-id string
        process id for the shim
  -log string
        set shim log level: debug, info, warn, error, fatal or panic (default "warn")
  -proxy-exit-code
        proxy exit code of the process (default true)
  -terminal
        specify if a terminal is setup
  -trace
        enable opentracing support
  -version
        display program version and exit

func prepareAndStartShim(sandbox *Sandbox, shim shim, cid, token, url, consoleURL string, cmd types.Cmd,
	createNSList []ns.NSType, enterNSList []ns.Namespace) (*Process, error) {
	process := &Process{
		Token:     token,
		StartTime: time.Now().UTC(),
	}

	shimParams := ShimParams{
		Container:  cid,
		Token:      token,
		URL:        url,
		Console:    cmd.Console,
		Terminal:   cmd.Interactive,
		Detach:     cmd.Detach,
		CreateNS:   createNSList,
		EnterNS:    enterNSList,
		ConsoleURL: consoleURL,
	}

	pid, err := shim.start(sandbox, shimParams)
	if err != nil {
		return nil, err
	}

	process.Pid = pid

	return process, nil
}

 

總結:

    create sandbox 主要時取得啓動配置參數,包括 hypervitor,校驗等

    create network 列出來所有 namespace 的網絡接口,根據 namespace 中有的網絡接口進行配置,比如 veth,創建 macvtap 接口將 VM 連接到容器接口

    start vm 啓動 vm,比如使用 qemu,則最後使用命令行啓動虛擬機,使用 QMP 檢查虛擬機運行情況

    create containers 就是發送 GRPC 給 kata-agent CreateContainerRequest 請求,實現使用了 runc 代碼,一樣一樣的

 

創建 kata-container 例子

創建個目錄:mkdir busybox

保存 rootfs docker export $(docker create busybox) > busybox.tar

mkdir rootfs  &&  tar -C rootfs -xf busybox.tar

生成配置文件 kata-runtime spec,修改配置文件比如啓動命令

sed -i 's;"sh";"touch aaa && tailf aaa";' config.json 

使用 kata-runtime run container1 創建

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章