【kubernetes/k8s源碼分析】kata container agent create container 源碼分析

 

 

 

1. CreateContainer 函數

     接收到 GRPC 消息爲 CreateContainerRequest 請求

func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (resp *gpb.Empty, err error) {
	if err := a.createContainerChecks(req); err != nil {
		return emptyResp, err
	}

	// re-scan PCI bus
	// looking for hidden devices
	if err = rescanPciBus(); err != nil {
		agentLog.WithError(err).Warn("Could not rescan PCI bus")
	}

        中間略過各種配置轉換

 

2. finishCreateContainer 函數

func (a *agentGRPC) finishCreateContainer(ctr *container, req *pb.CreateContainerRequest, config *configs.Config) (resp *gpb.Empty, err error) {
	containerPath := filepath.Join(libcontainerPath, a.sandbox.id)
	factory, err := libcontainer.New(containerPath, libcontainer.Cgroupfs)
	if err != nil {
		return emptyResp, err
	}

    2.1 libcontainer.New 函數

      看起來是不是很熟悉,在 runc 代碼中,看到了確實直接拿 runc 直接來用,路徑 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 實現了 Factory 接口

// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
	if root != "" {
		if err := os.MkdirAll(root, 0700); err != nil {
			return nil, newGenericError(err, SystemError)
		}
	}
	l := &LinuxFactory{
		Root:      root,
		InitPath:  "/proc/self/exe",
		InitArgs:  []string{os.Args[0], "init"},
		Validator: validate.New(),
		CriuPath:  "criu",
	}
	Cgroupfs(l)
	for _, opt := range options {
		if opt == nil {
			continue
		}
		if err := opt(l); err != nil {
			return nil, err
		}
	}
	return l, nil
}

    2.2 factory.Create 函數

     路徑 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 實現了 Create 方法

    做了一大堆驗證,目錄創建以及權限的設置

func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
	if l.Root == "" {
		return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
	}
	if err := l.validateID(id); err != nil {
		return nil, err
	}

     2.2.1 linuxContainer 結構體實現了 Container 接口,目錄爲 github.com/opencontainers/runc/libcontainer/container_linux.go

c := &linuxContainer{
	id:            id,
	root:          containerRoot,
	config:        config,
	initPath:      l.InitPath,
	initArgs:      l.InitArgs,
	criuPath:      l.CriuPath,
	newuidmapPath: l.NewuidmapPath,
	newgidmapPath: l.NewgidmapPath,
	cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
	c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}

    2.3 execProcess 函數啓動進程

// Shared function between CreateContainer and ExecProcess, because those expect
// a process to be run.
func (a *agentGRPC) execProcess(ctr *container, proc *process, createContainer bool) (err error) {
	if ctr == nil {
		return grpcStatus.Error(codes.InvalidArgument, "Container cannot be nil")
	}

	if proc == nil {
		return grpcStatus.Error(codes.InvalidArgument, "Process cannot be nil")
	}

     2.3.1 執行 Start 或者 Run 方法

     分別講解

if createContainer {
	err = ctr.container.Start(&proc.process)
} else {
	err = ctr.container.Run(&(proc.process))
}
if err != nil {
	return grpcStatus.Errorf(codes.Internal, "Could not run process: %v", err)
}

 

                                                          引入,runc 代碼流程圖

3. container.Start 函數

     如果進程初始化會創建 fifo 管道

func (c *linuxContainer) Start(process *Process) error {
	c.m.Lock()
	defer c.m.Unlock()
	if process.Init {
		if err := c.createExecFifo(); err != nil {
			return err
		}
	}

     3.1 start 函數

func (c *linuxContainer) start(process *Process) error {
	parent, err := c.newParentProcess(process)
	if err != nil {
		return newSystemErrorWithCause(err, "creating new parent process")
	}

     3.1.1 newParentProcess 函數

  • 創建一對pipe,parentPipe和childPipe,作爲 start 進程與容器內部 init 進程通信管道
  • 創建一個命令模版作爲 Parent 進程啓動的模板
  • newInitProcess 封裝 initProcess。主要工作爲添加初始化類型環境變量,將namespace、uid/gid 映射等信息使用 bootstrapData 封裝爲一個 io.Reader

       initProcess 實現了 parentProcess 接口

func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
	parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
	if err != nil {
		return nil, newSystemErrorWithCause(err, "creating new init pipe")
	}
	messageSockPair := filePair{parentInitPipe, childInitPipe}

	parentLogPipe, childLogPipe, err := os.Pipe()
	if err != nil {
		return nil, fmt.Errorf("Unable to create the log pipe:  %s", err)
	}
	logFilePair := filePair{parentLogPipe, childLogPipe}

	cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
	if err != nil {
		return nil, newSystemErrorWithCause(err, "creating new command template")
	}
	if !p.Init {
		return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
	}

	// We only set up fifoFd if we're not doing a `runc exec`. The historic
	// reason for this is that previously we would pass a dirfd that allowed
	// for container rootfs escape (and not doing it in `runc exec` avoided
	// that problem), but we no longer do that. However, there's no need to do
	// this for `runc exec` so we just keep it this way to be safe.
	if err := c.includeExecFifo(cmd); err != nil {
		return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
	}
	return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}

     3.1.2 initProcess start 函數

     創建新的進程。而此時新的進程使用 /proc/self/exec 爲執行入口,參數爲 init,會在 main 函數調用之前執行,所以在新的進程中 func init() 會直接調用,而不會去執行main函數

func (p *initProcess) start() error {
	defer p.messageSockPair.parent.Close()
	err := p.cmd.Start()
	p.process.ops = p
	// close the write-side of the pipes (controlled by child)
	p.messageSockPair.child.Close()
	p.logFilePair.child.Close()
	if err != nil {
		p.process.ops = nil
		return newSystemErrorWithCause(err, "starting init process command")
	}

         cmd 如最後命令所示,Path填充爲 /proc/self/exe(本身 agent)。參數字段 Args 爲 init,表示對容器進行初始化,調用的爲 agent init

         agent 最後直接複用 runc 代碼

     3.1.3 init 函數

func init() {
	if len(os.Args) > 1 && os.Args[1] == "init" {
		runtime.GOMAXPROCS(1)
		runtime.LockOSThread()
		factory, _ := libcontainer.New("")
		if err := factory.StartInitialization(); err != nil {
			agentLog.WithError(err).Error("init failed")
		}
		panic("--this line should have never been executed, congratulations--")
	}
}

 

4. StartInitialization

    從 pipe 讀取 parent 發送過來的配置,從環境變量 _LIBCONTAINER_INITPIPE 讀取管道文件描述符

// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
   var (
      pipefd, fifofd int
      consoleSocket  *os.File
      envInitPipe    = os.Getenv("_LIBCONTAINER_INITPIPE")
      envFifoFd      = os.Getenv("_LIBCONTAINER_FIFOFD")
      envConsole     = os.Getenv("_LIBCONTAINER_CONSOLE")
   )

    4.1 newContainerInit 函數

       處理類型爲 setns 或者 standard 實現了接口 initer 的 Init 方法

func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
	var config *initConfig
	if err := json.NewDecoder(pipe).Decode(&config); err != nil {
		return nil, err
	}
	if err := populateProcessEnvironment(config.Env); err != nil {
		return nil, err
	}
	switch t {
	case initSetns:
		return &linuxSetnsInit{
			pipe:          pipe,
			consoleSocket: consoleSocket,
			config:        config,
		}, nil
	case initStandard:
		return &linuxStandardInit{
			pipe:          pipe,
			consoleSocket: consoleSocket,
			parentPid:     unix.Getppid(),
			config:        config,
			fifoFd:        fifoFd,
		}, nil
	}
	return nil, fmt.Errorf("unknown init type %q", t)
}

 

5. linuxStandardInit Init 方法

    路徑 github.com/opencontainers/runc/libcontainer/standard_init_linux.go 

func (l *linuxStandardInit) Init() error {
	runtime.LockOSThread()
	defer runtime.UnlockOSThread()
  • setupNetwork: 配置容器的網絡,調用第三方 netlink.LinkSetup
  • setupRoute: 配置容器靜態路由信息,調用第三方 netlink.RouteAdd
  • label.Init:   檢查selinux是否被啓動並將結果存入全局變量。
  • finalizeNamespace: 根據config配置將需要的特權capabilities加入白名單,設置user namespace,關閉不需要的文件描述符。
  • unix.Openat: 只寫方式打開fifo管道並寫入0,會一直保持阻塞,直到管道的另一端以讀方式打開,並讀取內容
  • syscall.Exec 系統調用來執行用戶所指定的在容器中運行的程序

      配置 hostname、apparmor、processLabel、sysctl、readonlyPath、maskPath。create 雖然不會執行命令,但會檢查命令路徑,錯誤會在 create 期間返回

    3.1.1 setupNetWork函數

       配置容器的網絡,調用第三方 netlink.LinkSetup,相當於命令 ip link set $link up

       如果不指定任何網絡,只有loopback

// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
	for _, config := range config.Networks {
		strategy, err := getStrategy(config.Type)
		if err != nil {
			return err
		}
		if err := strategy.initialize(config); err != nil {
			return err
		}
	}
	return nil
}

     3.1.2 setupRoute

      配置容器靜態路由信息,調用第三方 netlink.RouteAdd,相當於命令 ip route add $route

func setupRoute(config *configs.Config) error {
	for _, config := range config.Routes {
		_, dst, err := net.ParseCIDR(config.Destination)
		if err != nil {
			return err
		}
		src := net.ParseIP(config.Source)
		if src == nil {
			return fmt.Errorf("Invalid source for route: %s", config.Source)
		}
		gw := net.ParseIP(config.Gateway)
		if gw == nil {
			return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
		}
		l, err := netlink.LinkByName(config.InterfaceName)
		if err != nil {
			return err
		}
		route := &netlink.Route{
			Scope:     netlink.SCOPE_UNIVERSE,
			Dst:       dst,
			Src:       src,
			Gw:        gw,
			LinkIndex: l.Attrs().Index,
		}
		if err := netlink.RouteAdd(route); err != nil {
			return err
		}
	}
	return nil
}

     3.1.3 syncParentReady 函數發送 ready 到 pipe,等待父進程下發 exec 命令

// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
	// Tell parent.
	if err := writeSync(pipe, procReady); err != nil {
		return err
	}

	// Wait for parent to give the all-clear.
	return readSync(pipe, procRun)
}

     3.1.4 只寫方式打開fifo管道並寫入0,會一直保持阻塞,直到管道的另一端以讀方式打開,並讀取內容

// Wait for the FIFO to be opened on the other side before exec-ing the
// user process. We open it through /proc/self/fd/$fd, because the fd that
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
	return newSystemErrorWithCause(err, "open exec fifo")
}

      3..1.5 系統調用來執行用戶所指定的在容器中運行的程序

if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
	return newSystemErrorWithCause(err, "exec user process")
}

 

    kata agent 運行在虛擬機裏面,那怎麼進行調式呢?

內核參數需要添加:agent.log=debug agent.debug_console,在哪裏添加呢

需要修改文件 /etc/kata-containers/configuration.toml,開啓debug模式 enable_debug = true,內核參數添加 agnet.debug_console

Connect to the virtual machine using the debug console    

$ id=$(sudo docker ps -q --no-trunc)
$ console="/var/run/vc/vm/${id}/console.sock"
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章