解決virtio-gpu對framebuffer支持及VT切換等問題

0)寫在最前面
本來是研究virtio-gpu雙屏,但後面發現其對fb的支持有問題,並且VT切換時卡死,本文記錄了對這些問題的分析。

1)基礎環境調查

在xorg.conf中配置了:Option “Xinerama” “1”

spice彈出了兩個界面,應該是使能了兩個connector

但是xrandr中只能看到Virtual-0,說明另一個screen未鏈接。

實在不行只能通過fb0和fb1的方式寫入,但是看起來對fb的支持有問題:

# strace a.out 

ioctl(3, FBIOGET_FSCREENINFO, 0x7ffc65153d90) = 0 

ioctl(3, FBIOGET_VSCREENINFO, 0x7ffc65153de0) = 0 

write(1, "1024x768, 32bpp\n", 161024x768, 32bpp 

)       = 16 

mmap(NULL, 3145728, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = -1 EINVAL (Invalid argument) 

dup(2)                                  = 4 

fcntl(4, F_GETFL)                       = 0x8002 (flags O_RDWR|O_LARGEFILE) 

fstat(4, {st_mode=S_IFCHR|0600, st_rdev=makedev(136, 0), ...}) = 0 

write(4, "Error: failed to map framebuffer"..., 68Error: failed to map framebuffer device to memory: Invalid argument 

看流程前面ioctl相關獲取都是正常的,到了mmap就報EINVAL

2)研究解決mmap問題

drm mmap調用棧:

Sep 16 12:14:56 Linx kernel: [ 2570.217975]  [<ffffffffbd52b8d5>] ? dump_stack+0x5c/0x77 

Sep 16 12:14:56 Linx kernel: [ 2570.218018]  [<ffffffffc06dbfe2>] ? virtio_gpu_mmap+0x12/0x60 [virtio_gpu] 

Sep 16 12:14:56 Linx kernel: [ 2570.218026]  [<ffffffffbd3bdbb1>] ? mmap_region+0x341/0x590 

Sep 16 12:14:56 Linx kernel: [ 2570.218030]  [<ffffffffbd3be256>] ? do_mmap+0x456/0x560 

Sep 16 12:14:56 Linx kernel: [ 2570.218035]  [<ffffffffbd4a4986>] ? security_mmap_file+0x66/0xe0 

Sep 16 12:14:56 Linx kernel: [ 2570.218041]  [<ffffffffbd3a02ae>] ? vm_mmap_pgoff+0xbe/0x100 

Sep 16 12:14:56 Linx kernel: [ 2570.218046]  [<ffffffffbd3bc0d1>] ? SyS_mmap_pgoff+0x1b1/0x270 

Sep 16 12:14:56 Linx kernel: [ 2570.218051]  [<ffffffffbd203b5c>] ? do_syscall_64+0x5c/0x170 

Sep 16 12:14:56 Linx kernel: [ 2570.218058]  [<ffffffffbd7fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25 

mmap初始化調用棧:

dump_stack+0x5c/0x77 

virtio_gpufb_create+0x49/0x320 [virtio_gpu] 

drm_setup_crtcs+0x372/0x990 [drm_kms_helper] 

 drm_fb_helper_initial_config+0x20f/0x3da [drm_kms_helper] 

 virtio_gpu_fbdev_init+0xde/0x100 [virtio_gpu] 

 virtio_gpu_driver_load+0x44c/0x650 [virtio_gpu] 

 wake_up_atomic_t+0x30/0x30 

drm_dev_register+0x9c/0xc0 [drm] 

drm_virtio_init+0x60/0x1a0 [virtio_gpu] 

 vp_finalize_features+0x6a/0x90 [virtio_pci] 

 virtio_dev_probe+0x141/0x1e0 [virtio] 

 driver_probe_device+0x21e/0x430 

__driver_attach+0xd6/0xe0 

 driver_probe_device+0x430/0x430 

 bus_for_each_dev+0x67/0xb0 

bus_add_driver+0x40/0x260 

driver_register+0x57/0xd0 

do_one_initcall+0x4c/0x180 

 preempt_schedule_common+0x14/0x20 

_cond_resched+0x19/0x20 

do_init_module+0x5a/0x1f1 

load_module+0x24e3/0x28f0 

__symbol_put+0x60/0x60 

vfs_read+0x114/0x130 

security_capable+0x41/0x60 

 SYSC_finit_module+0x8e/0xe0 

 entry_SYSCALL_64_fastpath+0x1e/0xad 

看不出來,再看fb的mmap調用棧:

fb_mmap+0x45/0x140 

 mmap_region+0x341/0x590 

do_mmap+0x456/0x560 

 security_mmap_file+0x66/0xe0 

 vm_mmap_pgoff+0xbe/0x100 

 SyS_mmap_pgoff+0x1b1/0x270 

do_syscall_64+0x5c/0x170 

 entry_SYSCALL64_slow_path+0x25/0x25 

看了下fb_mmap的代碼:

static int 

fb_mmap(struct file *file, struct vm_area_struct * vma) 

{ 

    struct fb_info *info = file_fb_info(file); 

    struct fb_ops *fb; 

    unsigned long mmio_pgoff; 

    unsigned long start; 

    u32 len; 

    dump_stack();  

    if (!info) 

        return -ENODEV; 

    fb = info->fbops; 

    if (!fb) 

        return -ENODEV; 

    mutex_lock(&info->mm_lock); 

    if (fb->fb_mmap) { 

        int res; 

        res = fb->fb_mmap(info, vma); 

        mutex_unlock(&info->mm_lock); 

        return res; 

    } 
    start = info->fix.smem_start; 

    len = info->fix.smem_len; 

    mmio_pgoff = PAGE_ALIGN((start & ~PAGE_MASK) + len) >> PAGE_SHIFT; 

    if (vma->vm_pgoff >= mmio_pgoff) { 

        if (info->var.accel_flags) { 

            mutex_unlock(&info->mm_lock); 

            return -EINVAL; 

        } 
 

        vma->vm_pgoff -= mmio_pgoff; 

        start = info->fix.mmio_start; 

        len = info->fix.mmio_len; 

    } 

    mutex_unlock(&info->mm_lock); 

    vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 

    fb_pgprotect(file, vma, start); 

    return vm_iomap_memory(vma, start, len); 

} 

再結合virtio_gpufb_create中的代碼,這裏並未設置fix.smem_start和fix.smem_len,因此這兩個參數會被默認置0:

static int virtio_gpufb_create(struct drm_fb_helper *helper, 

             struct drm_fb_helper_surface_size *sizes) 

{ 

... 

    fb = &vfbdev->vgfb.base; 
 

    vfbdev->helper.fb = fb; 

    strcpy(info->fix.id, "virtiodrmfb"); 

    info->flags = FBINFO_DEFAULT; 

    info->fbops = &virtio_gpufb_ops; 

    info->pixmap.flags = FB_PIXMAP_SYSTEM; 

 

    info->screen_base = obj->vmap; 

    info->screen_size = obj->gem_base.size; 

    drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth); 

    drm_fb_helper_fill_var(info, &vfbdev->helper, 

             sizes->fb_width, sizes->fb_height); 

 
 

    info->fix.mmio_start = 0; 

    info->fix.mmio_len = 0; 

 ... 

        start = info->fix.mmio_start; 

        len = info->fix.mmio_len; 

 

    return vm_iomap_memory(vma, start, len); 

} 

以下調用設置了accel_flags,其中的FB_ACCELF_TEXT爲1

drm_fb_helper_fill_var

->info->var.accel_flags = FB_ACCELF_TEXT;

可以看出virtio_gpufb_create上的代碼直接導致fb_mmap中的判斷條件成立返回-EINVAL,這裏即便將info->var.accel_flags重置爲0,也沒用即便進去了也是io_remap_pfn_range早晚會出問題,因爲這裏傳入的iomap的start和len都爲0,這也就是virtio-gpu和qxl之間的分別,qxl使用的是“設備地址”,雖然這對虛擬設備來說也沒什麼卵用。

從上面可以看出fbmem的代碼只適應了物理設備內存,但是它在前面給出了一個接口,這裏其實只要實現了fb_mmap的接口就能將映射到虛擬內存:

static int 

fb_mmap(struct file *file, struct vm_area_struct * vma) 

{ 

    struct fb_info *info = file_fb_info(file); 

    struct fb_ops *fb; 

    unsigned long mmio_pgoff; 

    unsigned long start; 

    u32 len; 

 

    if (!info) 

        return -ENODEV; 

    fb = info->fbops; 

    if (!fb) 

        return -ENODEV; 

    mutex_lock(&info->mm_lock); 

    if (fb->fb_mmap) { 

        int res; 

        res = fb->fb_mmap(info, vma); 

        mutex_unlock(&info->mm_lock); 

        return res; 

    } 

。。。 

} 

之後mmap正常的,但是寫入過程中出現了SIGBUS的錯誤,如下:

# ./a.out  

The framebuffer device was opened successfully. 

1024x768, 32bpp 

The framebuffer device was mapped to memory successfully. 

總線錯誤 

因爲mmap內存是“寫時分配”,也就是在寫入時候觸發page fault異常,然後才正式映射,調用流程如下:

ttm_bo_vm_fault+0x34/0x540 

 tty_insert_flip_string_fixed_flag+0x85/0xe0 

list_del+0x9/0x20 

remove_wait_queue+0x20/0x30 

n_tty_write+0x2d7/0x470 

__wake_up+0x34/0x50 

__do_fault+0x84/0x190 

handle_mm_fault+0x79d/0x1710 

__do_page_fault+0x253/0x510 

async_page_fault+0x28/0x30 

排查後,在ttm_bo_vm_fault中,異常出現在如下區域,這裏的page_offset爲18446744073708503140,num pages爲768,明顯page_offset有問題。

page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + 

     vma->vm_pgoff - drm_vma_node_start(&bo->vma_node); 

page_last = vma_pages(vma) + vma->vm_pgoff - 

     drm_vma_node_start(&bo->vma_node); 

if (unlikely(page_offset >= bo->num_pages)) { 

    retval = VM_FAULT_SIGBUS; 

    goto out_io_unlock; 

} 

細緻排查後page_offset的計算參數獲取如下(pages):address:34243474630; vma->vm_start:34243474530; vma->vm_pgoff:0; vma node start:1048576,這裏的page_offset的計算出現下溢,所以纔得到這麼大的page_offset。

對比了下qxl:

address:34332914588; vma->vm_start:34332914268; vma->vm_pgoff:1060031; vma node start:1060031; 

qxl中的pgoff和vma node start是相同的,或許是virtio-gpu中沒有設置pgoff?
在xserver中會計算fboff:

        fPtr->fboff = (unsigned long) fPtr->fix.smem_start & ~PAGE_MASK; 

        fPtr->fbmem_len = (fPtr->fboff+fPtr->fix.smem_len+~PAGE_MASK) & 

                          PAGE_MASK; 

修改驅動中fix項的smem_len,示例如下:

@@ -337,6 +403,9 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper, 

        info->fbops = &virtio_gpufb_ops; 

        info->pixmap.flags = FB_PIXMAP_SYSTEM; 

+       info->fix.smem_len = obj->gem_base.size; 

        info->screen_base = obj->vmap; 

之後能跑過,然後還是進不去主界面,查看日誌出現告警:

Sep 29 18:31:55 Linx kernel: [   22.227166] Call Trace: 

Sep 29 18:31:55 Linx kernel: [   22.227169]  [<ffffffff8212b955>] ? dump_stack+0x5c/0x77 

Sep 29 18:31:55 Linx kernel: [   22.227172]  [<ffffffff81e77794>] ? __warn+0xc4/0xe0 

Sep 29 18:31:55 Linx kernel: [   22.227174]  [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm] 

Sep 29 18:31:55 Linx kernel: [   22.227176]  [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50 

Sep 29 18:31:55 Linx kernel: [   22.227177]  [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0 

Sep 29 18:31:55 Linx kernel: [   22.227179]  [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0 

Sep 29 18:31:55 Linx kernel: [   22.227181]  [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170 

Sep 29 18:31:55 Linx kernel: [   22.227183]  [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25 

Sep 29 18:31:55 Linx kernel: [   22.227184] ---[ end trace 6e9f62b113d5170a ]--- 

Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in 

這個告警的判斷如下,這個暫時先放下:

static void ttm_bo_vm_open(struct vm_area_struct *vma) 

{ 

    struct ttm_buffer_object *bo = 

     (struct ttm_buffer_object *)vma->vm_private_data; 

    WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); 

    (void)ttm_bo_reference(bo); 

} 

現在的問題看起來是因爲屏幕不會自動刷新:

圖1(左上方區域文字未刷新導致無法顯示矩形圖像):
在這裏插入圖片描述

圖2(左上方區域刷新導致矩形圖像顯示):
在這裏插入圖片描述

有個辦法可以驗證這個問題:

1)在startx卡死時通過ssh登上去獲取fb數據

$ cat /dev/fb0 > screenap

2)關機切換爲VGA顯卡再啓動後執行

$ cat screenap > /dev/fb0

3)這裏只給出部分屏幕截圖,能看到主界面的,說明startx卡死時xserver是向fb中寫入了數據的
在這裏插入圖片描述

看來virtio-gpu缺乏定時刷新的功能,可以添加定時器對其進行定期更新,示例如下:

static void my_timer_func(unsigned long data)

{

virtio_gpu_dirty_update(svgfb, true, 0, 0, xxx, xxx);

add_timer_xxx();

}

添加後屏幕無法自動刷新的問題解決了,矩形框能正常顯示,但startx後仍然無法顯示,同時獲取/dev/fb0中的數據是正常的,這就說明或許xserver啓動後顯示設備的內存映射改變了,再回到先前的日誌告警和代碼:

Sep 29 18:31:55 Linx kernel: [   22.227166] Call Trace: 

Sep 29 18:31:55 Linx kernel: [   22.227169]  [<ffffffff8212b955>] ? dump_stack+0x5c/0x77 

Sep 29 18:31:55 Linx kernel: [   22.227172]  [<ffffffff81e77794>] ? __warn+0xc4/0xe0 

Sep 29 18:31:55 Linx kernel: [   22.227174]  [<ffffffffc050b1df>] ? ttm_bo_vm_open+0x6f/0x80 [ttm] 

Sep 29 18:31:55 Linx kernel: [   22.227176]  [<ffffffff81e7598a>] ? copy_process.part.33+0xd4a/0x1c50 

Sep 29 18:31:55 Linx kernel: [   22.227177]  [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0 

Sep 29 18:31:55 Linx kernel: [   22.227179]  [<ffffffff81e76a64>] ? _do_fork+0xd4/0x3b0 

Sep 29 18:31:55 Linx kernel: [   22.227181]  [<ffffffff81e03b5c>] ? do_syscall_64+0x5c/0x170 

Sep 29 18:31:55 Linx kernel: [   22.227183]  [<ffffffff823fc1ef>] ? entry_SYSCALL64_slow_path+0x25/0x25 

Sep 29 18:31:55 Linx kernel: [   22.227184] ---[ end trace 6e9f62b113d5170a ]--- 

Sep 29 18:31:55 Linx pulseaudio[1739]: Disabling timer-based scheduling because running in 

這個告警的判斷如下,但是看起來並沒有影響,而fb的內存在probe之後應該也是不會改變的,所以得換個角度思考:

static void ttm_bo_vm_open(struct vm_area_struct *vma) 

{ 

    struct ttm_buffer_object *bo = 

     (struct ttm_buffer_object *)vma->vm_private_data; 

    WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); 

    (void)ttm_bo_reference(bo); 

} 

通過各種打印、延時和猜測最後查到該問題是由於KDSETMODE導致的,在startx過程中會切換VT模式爲Graphics模式,主要是爲了屏蔽光標和控制檯打印的影響:

1574  open("/dev/vc/1", O_RDWR|O_NONBLOCK) = -1 ENOENT (No such file or directory) 

1574  open("/dev/tty1", O_RDWR|O_NONBLOCK) = 8 

1574  ioctl(8, VT_GETSTATE, 0x7ffd0fee9b40) = 0 

1574  ioctl(8, VT_ACTIVATE, 0x1)        = 0 

1574  ioctl(8, VT_WAITACTIVE, 0x1)      = 0 

1574  ioctl(8, VIDIOC_RESERVED or VT_GETMODE, 0x7ffd0fee9b50) = 0 

1574  rt_sigaction(SIGUSR1, {0x5594241ab5f0, [USR1], SA_RESTORER|SA_RESTART, 0x7fe3d4396040}, {SIG_IGN, [], SA_RESTORER, 0x7fe3d4396040}, 8) = 0 

1574  ioctl(8, VIDIOC_ENUM_FMT or VT_SETMODE, 0x7ffd0fee9b50) = 0 

1574  ioctl(8, KDSETMODE, 0x1)          = 0 

這裏編寫了個復現示例代碼段如下:

int vconsole_fd; 

vconsole_fd = open("/dev/tty1", O_RDWR); 

ioctl( vconsole_fd, KDSETMODE, KD_GRAPHICS); 

sleep(3); 

ioctl( vconsole_fd, KDSETMODE, KD_TEXT); 

close(vconsole_fd); 

在該sleep期間向/dev/fb0中寫入的所有數據均不會在界面上顯示。

排查後發現是virtio_gpu_dirty_update的定時任務沒有調用virtio_gpu_cmd_resource_flush更新所致,判斷如下:

    if (in_atomic() || store) 

        store_for_later = true; 

這裏的in_atomic確保了此處不能處於spinlock下,因爲後續會調用wait_event,而wait_event中會sleep,再spinlock中不能睡眠。這裏舉個例子說明這個問題:

[進程A]   關搶佔 

[進程A]   獲得鎖 

[進程A]   睡眠調度            ...... 儘管已經關閉了搶佔,[1]依然可以通過主動調用schedule(), schedule_timeout()等主動讓出CPU,調度其它進程。 

[進程B]   關搶佔               ...... [1]已經關閉搶佔,所以這裏相當於nop操作 

[進程B]   獲得鎖失敗         ...... [1]已經獲得了鎖,並且還沒有釋放 

[進程B]   反覆嘗試獲得鎖   ...... 由於關閉了搶佔,已經沒人能夠終止這個反覆嘗試的操作了,所以這裏出現了死鎖 

而在init_timer中是自帶鎖的,因此init_timer永遠不會flush virtio cmd,解決辦法很簡單,也就是在virtio_gpu_dirty_update後邊添加一個支持搶佔的延時執行任務schedule_delayed_work就好。

結果:
後續測試startx後顯示均正常。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章