perf_event_open學習 —— mmap方式讀取

示例程序2

在上一篇《Linux perf子系統的使用(一)——計數》已經講解了如何使用perf_event_open、read和ioctl對perf子系統進行編程。但有時我們並不需要計數,而是要採樣。比如這麼一個需求:統計一個程序中哪些函數最耗時間。嗯,這個功能確實可以通過perf record命令來做,但是perf record內部又是如何做到的呢?自己實現又是怎樣的呢?perf record是基於統計學原理的。假設以1000Hz的頻率對某個進程採樣,每次採樣記錄下該進程的IP寄存器的值(也就是下一條指令的地址)。通過分析該進程的可執行文件,是可以得知每次採樣的IP值處於哪個函數內部。OK,那麼我們相當於以1000Hz的頻率獲知進程當前所執行的函數。如果某個函數f()佔用了30%的時間,那麼所有采樣中,該函數出現的頻率也應該將近30%,只要採樣數量足夠多。這正是perf record的原理。所以,perf的採樣模式很有用~

但是,採樣比較複雜,主要表現在三點:1、採樣需要設置觸發源,也就是告訴kernel何時進行一次採樣;2、採樣需要設置信號,也就是告訴kernnel,採樣完成後通知誰;3、採樣值的讀取需要使用mmap,因爲採樣有異步性,需要一個環形隊列,另外也是出於性能的考慮。

直接上代碼吧,對照着官方手冊看,學習效率最高:

採集單個值

perf.c

//如果不加,則F_SETSIG未定義
#define _GNU_SOURCE 1

#include <stdio.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

//環形緩衝區大小,16頁,即16*4kB
#define RING_BUFFER_PAGES 16

//目前perf_event_open在glibc中沒有封裝,需要手工封裝一下
int perf_event_open(struct perf_event_attr *attr,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
    return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}

//mmap共享內存的開始地址
void* rbuf;

//環形隊列中每一項元素
struct perf_my_sample
{
    struct perf_event_header header;
    uint64_t ip;
};

//下一條採樣記錄的相對於環形緩衝區開頭的偏移量
uint64_t next_offset=0;

//採樣完成後的信號處理函數
void sample_handler(int sig_num,siginfo_t *sig_info,void *context)
{
    //計算出最新的採樣所在的位置(相對於rbuf的偏移量)
    uint64_t offset=4096+next_offset;
    //指向最新的採樣
    struct perf_my_sample* sample=(void*)((uint8_t*)rbuf+offset);
    //過濾一下記錄
    if(sample->header.type==PERF_RECORD_SAMPLE)
    {
        //得到IP值
        printf("%lx\n",sample->ip);
    }
    //共享內存開頭是一個struct perf_event_mmap_page,提供環形緩衝區的信息
    struct perf_event_mmap_page* rinfo=rbuf;
    //手工wrap一下data_head值,得到下一個記錄的偏移量
    next_offset=rinfo->data_head%(RING_BUFFER_PAGES*4096);
}

//模擬的一個負載
void workload()
{
    int i,c=0;
    for(i=0;i<100000000;i++)
    {
        c+=i*i;
        c-=i*100;
        c+=i*i*i/100;
    }
}

int main()
{
    struct perf_event_attr attr;
    memset(&attr,0,sizeof(struct perf_event_attr));
    attr.size=sizeof(struct perf_event_attr);
    //觸發源爲CPU時鐘
    attr.type=PERF_TYPE_SOFTWARE;
    attr.config=PERF_COUNT_SW_CPU_CLOCK;
    //每100000個CPU時鐘採樣一次
    attr.sample_period=100000;
    //採樣目標是IP
    attr.sample_type=PERF_SAMPLE_IP;
    //初始化爲禁用
    attr.disabled=1;
    int fd=perf_event_open(&attr,0,-1,-1,0);
    if(fd<0)
    {
        perror("Cannot open perf fd!");
        return 1;
    }
    //創建1+16頁共享內存,應用程序只讀,讀取fd產生的內容
    rbuf=mmap(0,(1+RING_BUFFER_PAGES)*4096,PROT_READ,MAP_SHARED,fd,0);
    if(rbuf<0)
    {
        perror("Cannot mmap!");
        return 1;
    }
    //這三個fcntl爲何一定這麼設置不明,但必須這樣
    fcntl(fd,F_SETFL,O_RDWR|O_NONBLOCK|O_ASYNC);
    fcntl(fd,F_SETSIG,SIGIO);
    fcntl(fd,F_SETOWN,getpid());
    //開始設置採樣完成後的信號通知
    struct sigaction sig;
    memset(&sig,0,sizeof(struct sigaction));
    //由sample_handler來處理採樣完成事件
    sig.sa_sigaction=sample_handler;
    //要帶上siginfo_t參數(因爲perf子系統會傳入參數,包括fd)
    sig.sa_flags=SA_SIGINFO;
    if(sigaction(SIGIO,&sig,0)<0)
    {
        perror("Cannot sigaction");
        return 1;
    }
    //開始監測
    ioctl(fd,PERF_EVENT_IOC_RESET,0);
    ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
    workload();
    //停止監測
    ioctl(fd,PERF_EVENT_IOC_DISABLE,0);
    munmap(rbuf,(1+RING_BUFFER_PAGES)*4096);
    close(fd);
    return 0;
}

可以看到一下子比計數模式複雜多了。採樣模式是要基於計數模式的——選擇一個“參考計數器”,並設置一個閾值,每當這個“參考計數器”達到閾值時,觸發一次採樣。每次採樣,kernel會把值放入隊列的末尾。如何得知kernenl完成了一次最新的採樣了呢?一種方法就是定時輪詢,另一種就是響應信號。

如何讀取mmap共享內存中的值呢?首先,共享內存開頭是一個struct perf_event_mmap_page,提供環形緩衝區的信息,對我們最重要的字段就是data_head,官方手冊的介紹是這樣的:

image

注意,data_head一直遞增,不回滾!!所以需要手動處理wrap。另外一個需要注意的地方是,每次事件響應中,得到的data_head是下一次採樣的隊列頭部,所以需要自己保存一個副本next_offset,以供下次使用。

這個struct perf_event_mmap_page獨佔共享內存的第一頁。後面必須跟2n頁,n自己決定。這2n頁用來存放採樣記錄。每一條記錄的結構體如下:
image

因爲我只選擇了採樣IP,即PERF_SAMPLE_IP,所以這個結構體就退化爲了:

struct perf_my_sample
{
    struct perf_event_header header;
    uint64_t ip;
};

另外一個需要注意的地方是mmap中的第三個參數,是PROT_READ,表示應用程序只讀。如果設置爲了PROT_READ|PROT_WRITE,那麼讀取的過程就不一樣了:

image

這樣相當於和kernel做一個同步操作,效率務必下降。而且由於SIGIO這個信號是不可靠信號,所以如果某次採樣完成的通知沒有被截獲,那麼就可能產生死鎖。

gcc perf.c -o perf
sudo ./perf

運行上面的代碼,產生如下輸出:
image

爲了驗證採集到的IP值是否正確,可以反彙編一下:

objdump -d ./perf

image

可以看到採集到的IP值全部落在workload這個函數的地址範圍內。

採集多個值

要採多個值的話,也很方便:

//如果不加,則F_SETSIG未定義
#define _GNU_SOURCE 1

#include <stdio.h>
#include <fcntl.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

//環形緩衝區大小,16頁,即16*4kB
#define RING_BUFFER_PAGES 2

//目前perf_event_open在glibc中沒有封裝,需要手工封裝一下
int perf_event_open(struct perf_event_attr *attr,pid_t pid,int cpu,int group_fd,unsigned long flags)
{
    return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}

//mmap共享內存的開始地址
void* rbuf;

//環形隊列中每一項元素
struct perf_my_sample
{
    struct perf_event_header header;
    uint64_t ip;
    uint64_t nr;
    uint64_t ips[0];
};

//下一條採樣記錄的相對於環形緩衝區開頭的偏移量
uint64_t next_offset=0;

//採樣完成後的信號處理函數
void sample_handler(int sig_num,siginfo_t *sig_info,void *context)
{
    //計算出最新的採樣所在的位置(相對於rbuf的偏移量)
    uint64_t offset=4096+next_offset;
    //指向最新的採樣
    struct perf_my_sample* sample=(void*)((uint8_t*)rbuf+offset);
    //過濾一下記錄
    if(sample->header.type==PERF_RECORD_SAMPLE)
    {
        //得到IP值
        printf("IP: %lx\n",sample->ip);
        if(sample->nr<1024)
        {
            //得到調用鏈長度
            printf("Call Depth: %lu\n",sample->nr);
            //遍歷調用鏈
            int i;
            for(i=0;i<sample->nr;i++)
                printf("  %lx\n",sample->ips[i]);
    	}
    }
    //共享內存開頭是一個struct perf_event_mmap_page,提供環形緩衝區的信息
    struct perf_event_mmap_page* rinfo=rbuf;
    //手工wrap一下data_head值,得到下一個記錄的偏移量
    next_offset=rinfo->data_head%(RING_BUFFER_PAGES*4096);
}

//模擬的一個負載
void workload()
{
    int i,c=0;
    for(i=0;i<1000000000;i++)
    {
        c+=i*i;
        c-=i*100;
        c+=i*i*i/100;
    }
}

int main()
{
    struct perf_event_attr attr;
    memset(&attr,0,sizeof(struct perf_event_attr));
    attr.size=sizeof(struct perf_event_attr);
    //觸發源爲CPU時鐘
    attr.type=PERF_TYPE_SOFTWARE;
    attr.config=PERF_COUNT_SW_CPU_CLOCK;
    //每100000個CPU時鐘採樣一次
    attr.sample_period=100000;
    //採樣目標是IP
    attr.sample_type=PERF_SAMPLE_IP|PERF_SAMPLE_CALLCHAIN;
    //初始化爲禁用
    attr.disabled=1;
    int fd=perf_event_open(&attr,0,-1,-1,0);
    if(fd<0)
    {
        perror("Cannot open perf fd!");
        return 1;
    }
    //創建1+16頁共享內存,應用程序只讀,讀取fd產生的內容
    rbuf=mmap(0,(1+RING_BUFFER_PAGES)*4096,PROT_READ,MAP_SHARED,fd,0);
    if(rbuf<0)
    {
        perror("Cannot mmap!");
        return 1;
    }
    //這三個fcntl爲何一定這麼設置不明,但必須這樣
    fcntl(fd,F_SETFL,O_RDWR|O_NONBLOCK|O_ASYNC);
    fcntl(fd,F_SETSIG,SIGIO);
    fcntl(fd,F_SETOWN,getpid());
    //開始設置採樣完成後的信號通知
    struct sigaction sig;
    memset(&sig,0,sizeof(struct sigaction));
    //由sample_handler來處理採樣完成事件
    sig.sa_sigaction=sample_handler;
    //要帶上siginfo_t參數(因爲perf子系統會傳入參數,包括fd)
    sig.sa_flags=SA_SIGINFO;
    if(sigaction(SIGIO,&sig,0)<0)
    {
        perror("Cannot sigaction");
        return 1;
    }
    //開始監測
    ioctl(fd,PERF_EVENT_IOC_RESET,0);
    ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
    workload();
    //停止監測
    ioctl(fd,PERF_EVENT_IOC_DISABLE,0);
    munmap(rbuf,(1+RING_BUFFER_PAGES)*4096);
    close(fd);
    return 0;
}

示例程序2

在上一篇《Linux perf子系統的使用(二)——採樣(signal方式)》中,我使用了信號來接收採樣完成通知,並在回調函數中讀取最新的採樣值。雖說回調方式有很多優點,但是並不是太通用。更加糟糕的是,信號會打斷幾乎所有的系統調用,使得本來的程序邏輯被破壞。另一個很糟糕的點是,如果一個進程中需要開多個採樣器,那麼就要共享同一個事件回調函數,破壞了封裝性。

因此,最好有一個阻塞式輪詢的辦法。嗯,這就是今天要講的東西——通過poll()函數等待採樣完成。

其實poll()輪詢的實現比信號的方式簡單,只要把perf_event_open()返回的文件描述符當做普通的文件描述符傳入poll()就可以了。在創建perf文件描述附時,唯一需要注意的就是需要手動設置wakeup_events的值。wakeup_events決定了多少次採樣以後進行一次通知(poll模式下就是讓poll返回),一般設置爲1。

直接上代碼吧,和《Linux perf子系統的使用(二)——採樣(signal方式)》中的代碼比較一下就一目瞭然了。

perf_poll.cpp

#include <poll.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <signal.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

// the number of pages to hold ring buffer
#define RING_BUFFER_PAGES 8

// a wrapper for perf_event_open()
static int perf_event_open(struct perf_event_attr *attr,
    pid_t pid,int cpu,int group_fd,unsigned long flags)
{
    return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}

static bool sg_running=true;

// to receive SIGINT to stop sampling and exit
static void on_closing(int signum)
{
    sg_running=false;
}

int main()
{
    // 這裏我強行指定了一個值
    pid_t pid=6268;
    // create a perf fd
    struct perf_event_attr attr;
    memset(&attr,0,sizeof(struct perf_event_attr));
    attr.size=sizeof(struct perf_event_attr);
    // disable at init time
    attr.disabled=1;
    // set what is the event
    attr.type=PERF_TYPE_SOFTWARE;
    attr.config=PERF_COUNT_SW_CPU_CLOCK;
    // how many clocks to trigger sampling
    attr.sample_period=1000000;
    // what to sample is IP
    attr.sample_type=PERF_SAMPLE_IP;
    // notify every 1 overflow
    attr.wakeup_events=1;
    // open perf fd
    int perf_fd=perf_event_open(&attr,pid,-1,-1,0);
    if(perf_fd<0)
    {
        perror("perf_event_open() failed!");
        return errno;
    }
    // create a shared memory to read samples from kernel
    void* shared_mem=mmap(0,(1+RING_BUFFER_PAGES)*4096,PROT_READ,MAP_SHARED,perf_fd,0);
    if(shared_mem==0)
    {
        perror("mmap() failed!");
        return errno;
    }
    // reset and enable
    ioctl(perf_fd,PERF_EVENT_IOC_RESET,0);
    ioctl(perf_fd,PERF_EVENT_IOC_ENABLE,0);
    // the offset from the head of ring-buffer where the next sample is
    uint64_t next_offset=0;
    // poll perf_fd
    struct pollfd perf_poll;
    perf_poll.fd=perf_fd;
    perf_poll.events=POLLIN;
    signal(SIGINT,on_closing);
    while(sg_running)
    {
        if(poll(&perf_poll,1,-1)<0)
        {
            perror("poll() failed!");
            break;
        }
        // the pointer to the completed sample
        struct sample
        {
            struct perf_event_header header;
            uint64_t ip;
        }*
        sample=(struct sample*)((uint8_t*)shared_mem+4096+next_offset);
        // the pointer to the info structure of ring-buffer
        struct perf_event_mmap_page* info=(struct perf_event_mmap_page*)shared_mem;
        // update the offset, wrap the offset
        next_offset=info->data_head%(RING_BUFFER_PAGES*4096);
        // allow only the PERF_RECORD_SAMPLE
        if(sample->header.type!=PERF_RECORD_SAMPLE)
            continue;
        printf("%lx\n",sample->ip);
    }
    printf("clean up\n");
    // disable
    ioctl(perf_fd,PERF_EVENT_IOC_DISABLE,0);
    // unmap shared memory
    munmap(shared_mem,(1+RING_BUFFER_PAGES)*4096);
    // close perf fd
    close(perf_fd);
    return 0;
}

可以看到除了獲取通知的部分由signal改爲poll()以外,幾乎沒有改動。

g++ perf_poll.cpp -o perf_poll
sudo ./perf_poll

==2017年7月28日補充

首先,爲了方便以後的使用,我把perf採樣callchain的功能封裝成了一個C++的類,它能夠針對一個特定的pid進行採樣,支持帶有超時的輪詢。接口聲明如下:

CallChainSampler.h

#ifndef CALLCHAINSAMPLER_H
#define CALLCHAINSAMPLER_H

#include <stdint.h>
#include <unistd.h>

// a class to sample the callchain of a process
class CallChainSampler
{

public:

    // the structure of a sampled callchain
    struct callchain
    {
        // the timestamp when sampling
        uint64_t time;
        // the pid and tid
        uint32_t pid,tid;
        // the depth of callchain, or called the length
        uint64_t depth;
        // <depth>-array, each items is an IP register value
        const uint64_t* ips;
    };

    // constructor
    //  pid: the process's id
    //  period: how many clocks to trigger a sample
    //  pages: how many pages (4K) allocated for the ring-buffer to hold samples
    CallChainSampler(pid_t pid,uint64_t period,uint32_t pages);

    // destructor
    ~CallChainSampler();

    // start sampling
    void start();

    // stop sampling
    void stop();

    // wait and get the next sample
    //  timeout: the max milliseconds that will block
    //  max_depth: the max depth of the call chain
    //  callchain: the sampled callchain to be outputed
    //  return: if get before timeout, return 0,
    //          if timeout, return -1
    //          if an error occurs, return errno
    //  ATTENTION: the field [ips] in callchain should be used immediately, 
    //             don't hold it for too long time
    int sample(int32_t timeout,uint64_t max_depth,struct callchain* callchain);

private:

    // the perf file descriptor
    int fd;
    // the mmap area
    void* mem;
    // how many pages to hold the ring-buffer
    uint32_t pages;
    // the offset in the ring-buffer where the next sample is
    uint64_t offset;

};

#endif
實現基本就是把上面的C代碼封裝一下:

CallChainSampler.cpp

#include "CallChainSampler.h"

#include <poll.h>
#include <errno.h>
#include <assert.h>
#include <string.h>
#include <stdexcept>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <linux/perf_event.h>

// a wrapper for perf_event_open()
static int perf_event_open(struct perf_event_attr *attr,
    pid_t pid,int cpu,int group_fd,unsigned long flags)
{
    return syscall(__NR_perf_event_open,attr,pid,cpu,group_fd,flags);
}

// a tool function to get the time in ms
static uint64_t get_milliseconds()
{
    struct timeval now;
    assert(gettimeofday(&now,0)==0);
    return now.tv_sec*1000+now.tv_usec/1000;
}

#define min(a,b) ((a)<(b)?(a):(b))

CallChainSampler::CallChainSampler(pid_t pid,uint64_t period,uint32_t pages)
{
    // create a perf fd
    struct perf_event_attr attr;
    memset(&attr,0,sizeof(struct perf_event_attr));
    attr.size=sizeof(struct perf_event_attr);
    // disable at init time
    attr.disabled=1;
    // set what is the event
    attr.type=PERF_TYPE_SOFTWARE;
    attr.config=PERF_COUNT_SW_CPU_CLOCK;
    // how many clocks to trigger sampling
    attr.sample_period=period;
    // what to sample is IP
    attr.sample_type=PERF_SAMPLE_TIME|PERF_SAMPLE_TID|PERF_SAMPLE_CALLCHAIN;
    // notify every 1 overflow
    attr.wakeup_events=1;
    // open perf fd
    fd=perf_event_open(&attr,pid,-1,-1,0);
    if(fd<0)
        throw std::runtime_error("perf_event_open() failed!");
    // create a shared memory to read samples from kernel
    mem=mmap(0,(1+pages)*4096,PROT_READ,MAP_SHARED,fd,0);
    if(mem==0)
        throw std::runtime_error("mmap() failed!");
    this->pages=pages;
    // the offset of next sample
    offset=0;
}

CallChainSampler::~CallChainSampler()
{
    stop();
    // unmap shared memory
    munmap(mem,(1+pages)*4096);
    // close perf fd
    close(fd);
}

void CallChainSampler::start()
{
    // enable
    ioctl(fd,PERF_EVENT_IOC_ENABLE,0);
}

void CallChainSampler::stop()
{
    // disable
    ioctl(fd,PERF_EVENT_IOC_DISABLE,0);
}

int CallChainSampler::sample(int32_t timeout,uint64_t max_depth,struct callchain* callchain)
{
    if(callchain==0)
        throw std::runtime_error("arg <callchain> is NULL!");
    // the poll sturct
    struct pollfd pfd;
    pfd.fd=fd;
    pfd.events=POLLIN;
    // the time when start
    uint64_t start=get_milliseconds();
    while(1)
    {
        // the current time
        uint64_t now=get_milliseconds();
        // the milliseconds to wait
        int32_t to_wait;
        if(timeout<0)
            to_wait=-1;
        else
        {
            to_wait=timeout-(int32_t)(now-start);
            if(to_wait<0)
                return -1;
        }
        // wait next sample
        int ret=poll(&pfd,1,to_wait);
        if(ret==0)
            return -1;
        else if(ret==-1)
            return errno;
        // the pointer to the completed sample
        struct sample
        {
            struct perf_event_header header;
            uint32_t pid,tid;
            uint64_t time;
            uint64_t nr;
            uint64_t ips[0];
        }*
        sample=(struct sample*)((uint8_t*)mem+4096+offset);
        // the pointer to the info structure of ring-buffer
        struct perf_event_mmap_page* info=(struct perf_event_mmap_page*)mem;
        // update the offset, wrap the offset
        offset=info->data_head%(pages*4096);
        // allow only the PERF_RECORD_SAMPLE
        if(sample->header.type!=PERF_RECORD_SAMPLE)
            continue;
        // fill the result
        callchain->time=sample->time;
        callchain->pid=sample->pid;
        callchain->tid=sample->tid;
        callchain->depth=min(max_depth,sample->nr);
        callchain->ips=sample->ips;
        return 0;
    }
}

最後要補充一個我最新的發現!perf_event_open()裏面傳入的pid,本質上是一個線程id,也就是tid。它只能監控一個線程,而無法監控一個進程中的所有線程。所以要用到實際項目中,肯定得配合使用epoll來監控所有的線程。

測試代碼如下:

#include <stdio.h>
#include <signal.h>
#include <stdlib.h>

#include "CallChainSampler.h"

CallChainSampler* sampler;

// to receive SIGINT to stop sampling and exit
static void on_closing(int signum)
{
    delete sampler;
    exit(0);
}

int main()
{
    // create a sampler, pid=5281, 10000 clocks trigger a sample
    // and allocate 128 pages to hold the ring-buffer
    sampler=new CallChainSampler(5281,10000,128);
    signal(SIGINT,on_closing);
    sampler->start();
    for(int i=0;i<10000;i++)
    {
        CallChainSampler::callchain callchain;
        // sample, max depth of callchain is 256
        int ret=sampler->sample(-1,256,&callchain);
        printf("%d\n",ret);
        if(ret==0)
        {
            // successful sample, print it out
            printf("time=%lu\n",callchain.time);
            printf("pid,tid=%d,%d\n",callchain.pid,callchain.tid);
            printf("stack:\n");
            for(int j=0;j<callchain.depth;j++)
                printf("[%d]   %lx\n",j,callchain.ips[j]);
        }
    }
    return 0;
}

示例程序3

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
#include <linux/hw_breakpoint.h>
#include <asm/unistd.h>
#include <errno.h>
#include <stdint.h>
#include <inttypes.h>

#ifndef MAP_FAILED
#define MAP_FAILED ((void *)-1)
#endif

struct perf_sample_event {
  struct perf_event_header hdr;
  uint64_t    sample_id;                // if PERF_SAMPLE_IDENTIFIER
  uint64_t    ip;                       // if PERF_SAMPLE_IP
  uint32_t    pid, tid;                 // if PERF_SAMPLE_TID
  uint64_t    time;                     // if PERF_SAMPLE_TIME
  uint64_t    addr;                     // if PERF_SAMPLE_ADDR
  uint64_t    id;                       // if PERF_SAMPLE_ID
  uint64_t    stream_id;                // if PERF_SAMPLE_STREAM_ID
  uint32_t    cpu, res;                 // if PERF_SAMPLE_CPU
  uint64_t    period;                   // if PERF_SAMPLE_PERIOD
  struct      read_format *v;           // if PERF_SAMPLE_READ
  uint64_t    nr;                       // if PERF_SAMPLE_CALLCHAIN
  uint64_t    *ips;                     // if PERF_SAMPLE_CALLCHAIN
  uint32_t    size_raw;                 // if PERF_SAMPLE_RAW
  char        *data_raw;                // if PERF_SAMPLE_RAW
  uint64_t    bnr;                      // if PERF_SAMPLE_BRANCH_STACK
  struct      perf_branch_entry *lbr;   // if PERF_SAMPLE_BRANCH_STACK
  uint64_t    abi;                      // if PERF_SAMPLE_REGS_USER
  uint64_t    *regs;                    // if PERF_SAMPLE_REGS_USER
  uint64_t    size_stack;               // if PERF_SAMPLE_STACK_USER
  char        *data_stack;              // if PERF_SAMPLE_STACK_USER
  uint64_t    dyn_size_stack;           // if PERF_SAMPLE_STACK_USER
  uint64_t    weight;                   // if PERF_SAMPLE_WEIGHT
  uint64_t    data_src;                 // if PERF_SAMPLE_DATA_SRC
  uint64_t    transaction;              // if PERF_SAMPLE_TRANSACTION
  uint64_t    abi_intr;                 // if PERF_SAMPLE_REGS_INTR
  uint64_t    *regs_intr;               // if PERF_SAMPLE_REGS_INTR
};

int fib(int n) {
  if (n == 0) {
    return 0;
  } else if (n == 1 || n == 2) {
    return 1;
  } else {
    return fib(n-1) + fib(n-2);
  }
}

void do_something() {
  int i;
  char* ptr;

  ptr = malloc(100*1024*1024);
  for (i = 0; i < 100*1024*1024; i++) {
    ptr[i] = (char) (i & 0xff); // pagefault
  }
  free(ptr);
}

void insertion_sort(int *nums, size_t n) {
  int i = 1;
  while (i < n) {
    int j = i;
    while (j > 0 && nums[j-1] > nums[j]) {
      int tmp = nums[j];
      nums[j] = nums[j-1];
      nums[j-1] = tmp;
      j -= 1;
    }
    i += 1;
  }
}

static void process_ring_buffer_events(struct perf_event_mmap_page *data, int page_size) {
  struct perf_event_header *header = (uintptr_t) data + page_size + data->data_tail;
  void *end = (uintptr_t) data + page_size + data->data_head; 

  while (header != end) {
    if (header->type == PERF_RECORD_SAMPLE) {
      struct perf_sample_event *event = (uintptr_t) header;
      uint64_t ip = event->ip;
      printf("PERF_RECORD_SAMPLE found with ip: %lld\n", ip);
      uint64_t size_stack = event->size_stack;
      char *data_stack = (uintptr_t) event->data_stack;
      if (data_stack > 0) {
        printf("PERF_RECORD_SAMPLE has size stack: %lld at location: %lld\n", size_stack, data_stack);
      }
    } else {
      printf("other type %d found!", header->type);
    }
    header = (uintptr_t) header + header->size;
  }
}

int main(int argc, char* argv[]) {
  struct perf_event_attr pea;

  int fd1, fd2;
  uint64_t id1, id2;
  uint64_t val1, val2;
  char buf[4096];
  const int NUM_MMAP_PAGES = (1U << 4) + 1;
  // const int NUM_MMAP_PAGES = 17;
  int i;

  int some_nums[1000]; 
  for (int i=0; i < 1000; i++) {
    some_nums[i] = 1000-i;
  }

  memset(&pea, 0, sizeof(struct perf_event_attr));
  pea.type = PERF_TYPE_SOFTWARE;
  pea.size = sizeof(struct perf_event_attr);
  pea.config = PERF_COUNT_SW_CPU_CLOCK;
  pea.disabled = 1;
  pea.exclude_kernel = 1;
  pea.exclude_hv = 0;
  pea.sample_period = 1;
  pea.precise_ip = 3;
  pea.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_STACK_USER;
  // pea.sample_type = PERF_SAMPLE_IP;
  pea.sample_stack_user = 10000;
  fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);

  printf("size of perf_event_mmap_page struct is %d\n", sizeof(struct perf_event_mmap_page));
  int page_size = (int) sysconf(_SC_PAGESIZE);

  printf("page size in general is: %d\n", page_size);

 
   // Map the ring buffer into memory
  struct perf_event_mmap_page *pages = mmap(NULL, page_size * NUM_MMAP_PAGES, PROT_READ
    | PROT_WRITE, MAP_SHARED, fd1, 0);
  if (pages == MAP_FAILED) {
      perror("Error mapping ring buffer");
      return 1;
  }

  ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
  ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
  // do_something();
  // fib(40);
  size_t n = sizeof(some_nums)/sizeof(some_nums[0]);
  insertion_sort(&some_nums, n);
  ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);

  printf("head of perf ring buffer is at: %d", pages->data_head);
  process_ring_buffer_events(pages, page_size);
  munmap(pages, page_size * NUM_MMAP_PAGES);

  return 0;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章