1. CreateContainer 函數
接收到 GRPC 消息爲 CreateContainerRequest 請求
func (a *agentGRPC) CreateContainer(ctx context.Context, req *pb.CreateContainerRequest) (resp *gpb.Empty, err error) {
if err := a.createContainerChecks(req); err != nil {
return emptyResp, err
}
// re-scan PCI bus
// looking for hidden devices
if err = rescanPciBus(); err != nil {
agentLog.WithError(err).Warn("Could not rescan PCI bus")
}
中間略過各種配置轉換
2. finishCreateContainer 函數
func (a *agentGRPC) finishCreateContainer(ctr *container, req *pb.CreateContainerRequest, config *configs.Config) (resp *gpb.Empty, err error) {
containerPath := filepath.Join(libcontainerPath, a.sandbox.id)
factory, err := libcontainer.New(containerPath, libcontainer.Cgroupfs)
if err != nil {
return emptyResp, err
}
2.1 libcontainer.New 函數
看起來是不是很熟悉,在 runc 代碼中,看到了確實直接拿 runc 直接來用,路徑 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 實現了 Factory 接口
// New returns a linux based container factory based in the root directory and
// configures the factory with the provided option funcs.
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
if root != "" {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
}
l := &LinuxFactory{
Root: root,
InitPath: "/proc/self/exe",
InitArgs: []string{os.Args[0], "init"},
Validator: validate.New(),
CriuPath: "criu",
}
Cgroupfs(l)
for _, opt := range options {
if opt == nil {
continue
}
if err := opt(l); err != nil {
return nil, err
}
}
return l, nil
}
2.2 factory.Create 函數
路徑 github.com/opencontainers/runc/libcontainer/factory_linux.go,LinuxFactory 實現了 Create 方法
做了一大堆驗證,目錄創建以及權限的設置
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
if l.Root == "" {
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
}
if err := l.validateID(id); err != nil {
return nil, err
}
2.2.1 linuxContainer 結構體實現了 Container 接口,目錄爲 github.com/opencontainers/runc/libcontainer/container_linux.go
c := &linuxContainer{
id: id,
root: containerRoot,
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
newuidmapPath: l.NewuidmapPath,
newgidmapPath: l.NewgidmapPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
2.3 execProcess 函數啓動進程
// Shared function between CreateContainer and ExecProcess, because those expect
// a process to be run.
func (a *agentGRPC) execProcess(ctr *container, proc *process, createContainer bool) (err error) {
if ctr == nil {
return grpcStatus.Error(codes.InvalidArgument, "Container cannot be nil")
}
if proc == nil {
return grpcStatus.Error(codes.InvalidArgument, "Process cannot be nil")
}
2.3.1 執行 Start 或者 Run 方法
分別講解
if createContainer {
err = ctr.container.Start(&proc.process)
} else {
err = ctr.container.Run(&(proc.process))
}
if err != nil {
return grpcStatus.Errorf(codes.Internal, "Could not run process: %v", err)
}
引入,runc 代碼流程圖
3. container.Start 函數
如果進程初始化會創建 fifo 管道
func (c *linuxContainer) Start(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
if process.Init {
if err := c.createExecFifo(); err != nil {
return err
}
}
3.1 start 函數
func (c *linuxContainer) start(process *Process) error {
parent, err := c.newParentProcess(process)
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
}
3.1.1 newParentProcess 函數
- 創建一對pipe,parentPipe和childPipe,作爲 start 進程與容器內部 init 進程通信管道
- 創建一個命令模版作爲 Parent 進程啓動的模板
- newInitProcess 封裝 initProcess。主要工作爲添加初始化類型環境變量,將namespace、uid/gid 映射等信息使用 bootstrapData 封裝爲一個 io.Reader
initProcess 實現了 parentProcess 接口
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
messageSockPair := filePair{parentInitPipe, childInitPipe}
parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !p.Init {
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
// We only set up fifoFd if we're not doing a `runc exec`. The historic
// reason for this is that previously we would pass a dirfd that allowed
// for container rootfs escape (and not doing it in `runc exec` avoided
// that problem), but we no longer do that. However, there's no need to do
// this for `runc exec` so we just keep it this way to be safe.
if err := c.includeExecFifo(cmd); err != nil {
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
}
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}
3.1.2 initProcess start 函數
創建新的進程。而此時新的進程使用 /proc/self/exec 爲執行入口,參數爲 init,會在 main 函數調用之前執行,所以在新的進程中 func init() 會直接調用,而不會去執行main函數
func (p *initProcess) start() error {
defer p.messageSockPair.parent.Close()
err := p.cmd.Start()
p.process.ops = p
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
cmd 如最後命令所示,Path填充爲 /proc/self/exe(本身 agent)。參數字段 Args 爲 init,表示對容器進行初始化,調用的爲 agent init
agent 最後直接複用 runc 代碼
3.1.3 init 函數
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, _ := libcontainer.New("")
if err := factory.StartInitialization(); err != nil {
agentLog.WithError(err).Error("init failed")
}
panic("--this line should have never been executed, congratulations--")
}
}
4. StartInitialization
從 pipe 讀取 parent 發送過來的配置,從環境變量 _LIBCONTAINER_INITPIPE 讀取管道文件描述符
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var (
pipefd, fifofd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
4.1 newContainerInit 函數
處理類型爲 setns 或者 standard 實現了接口 initer 的 Init 方法
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return nil, err
}
switch t {
case initSetns:
return &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
}
5. linuxStandardInit Init 方法
路徑 github.com/opencontainers/runc/libcontainer/standard_init_linux.go
func (l *linuxStandardInit) Init() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
- setupNetwork: 配置容器的網絡,調用第三方 netlink.LinkSetup
- setupRoute: 配置容器靜態路由信息,調用第三方 netlink.RouteAdd
- label.Init: 檢查selinux是否被啓動並將結果存入全局變量。
- finalizeNamespace: 根據config配置將需要的特權capabilities加入白名單,設置user namespace,關閉不需要的文件描述符。
- unix.Openat: 只寫方式打開fifo管道並寫入0,會一直保持阻塞,直到管道的另一端以讀方式打開,並讀取內容
- syscall.Exec 系統調用來執行用戶所指定的在容器中運行的程序
配置 hostname、apparmor、processLabel、sysctl、readonlyPath、maskPath。create 雖然不會執行命令,但會檢查命令路徑,錯誤會在 create 期間返回
3.1.1 setupNetWork函數
配置容器的網絡,調用第三方 netlink.LinkSetup,相當於命令 ip link set $link up
如果不指定任何網絡,只有loopback
// setupNetwork sets up and initializes any network interface inside the container.
func setupNetwork(config *initConfig) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.initialize(config); err != nil {
return err
}
}
return nil
}
3.1.2 setupRoute
配置容器靜態路由信息,調用第三方 netlink.RouteAdd,相當於命令 ip route add $route
func setupRoute(config *configs.Config) error {
for _, config := range config.Routes {
_, dst, err := net.ParseCIDR(config.Destination)
if err != nil {
return err
}
src := net.ParseIP(config.Source)
if src == nil {
return fmt.Errorf("Invalid source for route: %s", config.Source)
}
gw := net.ParseIP(config.Gateway)
if gw == nil {
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
}
l, err := netlink.LinkByName(config.InterfaceName)
if err != nil {
return err
}
route := &netlink.Route{
Scope: netlink.SCOPE_UNIVERSE,
Dst: dst,
Src: src,
Gw: gw,
LinkIndex: l.Attrs().Index,
}
if err := netlink.RouteAdd(route); err != nil {
return err
}
}
return nil
}
3.1.3 syncParentReady 函數發送 ready 到 pipe,等待父進程下發 exec 命令
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
return readSync(pipe, procRun)
}
3.1.4 只寫方式打開fifo管道並寫入0,會一直保持阻塞,直到管道的另一端以讀方式打開,並讀取內容
// Wait for the FIFO to be opened on the other side before exec-ing the
// user process. We open it through /proc/self/fd/$fd, because the fd that
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "open exec fifo")
}
3..1.5 系統調用來執行用戶所指定的在容器中運行的程序
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
kata agent 運行在虛擬機裏面,那怎麼進行調式呢?
內核參數需要添加:agent.log=debug agent.debug_console,在哪裏添加呢
需要修改文件 /etc/kata-containers/configuration.toml,開啓debug模式 enable_debug = true,內核參數添加 agnet.debug_console
Connect to the virtual machine using the debug console
$ id=$(sudo docker ps -q --no-trunc)
$ console="/var/run/vc/vm/${id}/console.sock"
$ sudo socat "stdin,raw,echo=0,escape=0x11" "unix-connect:${console}"