cgroup資源隔離內存OOM事件監聽(oom notifier)

CGROUP OOM控制

CGROUP是目前比較流行也比較常用的資源隔離技術,包括docker,hadoop都是使用cgroup做的資源隔離。當對內存做資源隔離時,當進程OOM後,可以選擇直接kill進程,也可以不kill,默認選項是oom之後直接kill。可以通過以下方式關閉該功能:

echo 1 > memory.oom_control

OOM事件捕捉

但是當進程oom將進程kill掉之後,很難捕捉到oom日誌,針對這種情況,cgroup提供了一種監聽oom事件的方式,並提供了C語言實現方式。

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/eventfd.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

static inline void die(const char *msg)
{
	fprintf(stderr, "error: %s: %s(%d)\n", msg, strerror(errno), errno);
	exit(EXIT_FAILURE);
}

static inline void usage(void)
{
	fprintf(stderr, "usage: oom_eventfd_test <cgroup.event_control> <memory.oom_control>\n");
	exit(EXIT_FAILURE);
}

#define BUFSIZE 256

int main(int argc, char *argv[])
{
	char buf[BUFSIZE];
	int efd, cfd, ofd, rb, wb;
	uint64_t u;

	if (argc != 3)
		usage();

	if ((efd = eventfd(0, 0)) == -1)
		die("eventfd");

	if ((cfd = open(argv[1], O_WRONLY)) == -1)
		die("cgroup.event_control");

	if ((ofd = open(argv[2], O_RDONLY)) == -1)
		die("memory.oom_control");

	if ((wb = snprintf(buf, BUFSIZE, "%d %d", efd, ofd)) >= BUFSIZE)
		die("buffer too small");

	if (write(cfd, buf, wb) == -1)
		die("write cgroup.event_control");

	if (close(cfd) == -1)
		die("close cgroup.event_control");

	for (;;) {
		if (read(efd, &u, sizeof(uint64_t)) != sizeof(uint64_t))
			die("read eventfd");

		printf("mem_cgroup oom event received\n");
	}

	return 0;
}

具體可參照https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Resource_Management_Guide/sec-memory.html#ex-OOM-control-notifications

JAVA中捕捉OOM KILL事件

在JAVA中想捕捉oom kill事件,採用java調用c的方式來實現,通過JNI方式有很多插件可以方便的調用c程序。我的工程是maven,使用這個工具
        <dependency>
            <groupId>org.fusesource.hawtjni</groupId>
            <artifactId>hawtjni-runtime</artifactId>
            <version>1.9</version>
        </dependency>
寫自己的native方法來對應c的方法就可以了
package ji;

import org.fusesource.hawtjni.runtime.JniArg;
import org.fusesource.hawtjni.runtime.JniClass;
import org.fusesource.hawtjni.runtime.JniMethod;
import org.fusesource.hawtjni.runtime.Library;

/**
 * Created by ji on 17-5-18.
 */
@JniClass
public class OomNotifierNative {

    private static final Library LIBRARY = new Library("native-oom-notifier", OomNotifierNative.class);

    static {
        LIBRARY.load();
    }


    @JniMethod(cast = "char *")
    public static final native long oom_event_listener(@JniArg(cast = "char *") String ptr, @JniArg(cast = "char *") String ptr2);

}
對應的C方法如下:
#include "notifier.h"
#include <stdio.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/eventfd.h>
#include <errno.h>
#include <string.h>


static inline void die(const char *msg)
{
	fprintf(stderr, "error: %s: %s(%d)\n", msg, strerror(errno), errno);
	exit(EXIT_FAILURE);
}

static inline void usage(void)
{
	fprintf(stderr, "usage: oom_eventfd_test <cgroup.event_control> <memory.oom_control>\n");
	exit(EXIT_FAILURE);
}

#define BUFSIZE 256

int oom_event_listener(char *event_ctrl,char *oom_ctrl){
        char buf[BUFSIZE];
        int efd, cfd, ofd, rb, wb;
        uint64_t u;


        if ((efd = eventfd(0, 0)) == -1)
            die("eventfd");

        if ((cfd = open(event_ctrl, O_WRONLY)) == -1)
            die("cgroup.event_control");

        if ((ofd = open(oom_ctrl, O_RDONLY)) == -1)
            die("memory.oom_control");

        if ((wb = snprintf(buf, BUFSIZE, "%d %d", efd, ofd)) >= BUFSIZE)
            die("buffer too small");

        if (write(cfd, buf, wb) == -1)
            die("write cgroup.event_control");

        if (close(cfd) == -1)
            die("close cgroup.event_control");

        for (;;) {

            if (read(efd, &u, sizeof(uint64_t)) != sizeof(uint64_t))
                die("read eventfd");

            if (access(event_ctrl,0)==-1){
                printf("group not exists\n");
                return 2;
            }

            printf("mem_cgroup oom event received\n");
            return 1;
        }

        return 0;
}
使用maven打包的時候用了下面這個插件
<plugin>
        <groupId>org.fusesource.hawtjni</groupId>
        <artifactId>maven-hawtjni-plugin</artifactId>
        <version>1.9</version>
</plugin>

具體的代碼在這裏:https://github.com/jgteng/cgroup_oom_notifier


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章