1、基本原理
如之前分析,kvm虛擬機實際運行於qemu-kvm的進程上下文中,因此,需要建立虛擬機的物理內存空間(GPA)與qemu-kvm進程的虛擬地址空間(HVA)的映射關係。
虛擬機的物理地址空間實際也是不連續的,分成不同的內存區域(slot),因爲物理地址空間中通常還包括BIOS、MMIO、顯存、ISA保留等部分。
qemu-kvm通過ioctl vm指令KVM_SET_USER_MEMORY_REGION來爲虛擬機設置內存。主要建立guest物理地址空間中的內存區域與qemu-kvm虛擬地址空間中的內存區域的映射,從而建立其從GVA到HVA的對應關係,該對應關係主要通過kvm_mem_slot結構體保存,所以實質爲設置kvm_mem_slot結構體。
本文簡介ioctl vm指令KVM_SET_USER_MEMORY_REGION在內核中的執行流程,qemu-kvm用戶態部分暫不包括。
2、基本流程
ioctl vm指令KVM_SET_USER_MEMORY_REGION在內核主要執行流程如下:
kvm_vm_ioctl()
kvm_vm_ioctl_set_memory_region()
kvm_set_memory_region()
__kvm_set_memory_region()
kvm_iommu_unmap_pages() // 原來的slot需要刪除,所以需要unmap掉相應的內存區域
install_new_memslots() //將new分配的memslot寫入kvm->memslots[]數組中
kvm_free_physmem_slot() // 釋放舊內存區域相應的物理內存(HPA)
3、代碼分析
kvm_mem_slot結構:
點擊(此處)摺疊或打開
-
/*
-
* 由於GPA不能直接用於物理 MMU 進行尋址,所以需要將GPA轉換爲HVA,
-
* kvm中利用 kvm_memory_slot 數據結構來記錄每一個地址區間(Guest中的物理
-
* 地址區間)中GPA與HVA的映射關係
-
*/
-
struct kvm_memory_slot {
-
// 虛擬機物理地址(即GPA)對應的頁框號
-
gfn_t base_gfn;
-
// 當前slot中包含的page數
-
unsigned long npages;
-
// 髒頁位圖
-
unsigned long *dirty_bitmap;
-
// 架構相關的部分
-
struct kvm_arch_memory_slot arch;
-
/*
-
* GPA對應的Host虛擬地址(HVA),由於虛擬機都運行在qemu的地址空間中
-
* 而qemu是用戶態程序,所以通常使用根模式下用戶地址空間。
-
*/
-
unsigned long userspace_addr;
-
u32 flags;
-
short id;
- };
kvm_vm_ioctl():
點擊(此處)摺疊或打開
-
/*
-
* kvm ioctl vm指令的入口,傳入的fd爲KVM_CREATE_VM中返回的fd。
-
* 主要用於針對VM虛擬機進行控制,如:內存設置、創建VCPU等。
-
*/
-
static long kvm_vm_ioctl(struct file *filp,
-
unsigned int ioctl, unsigned long arg)
-
{
-
struct kvm *kvm = filp->private_data;
-
void __user *argp = (void __user *)arg;
-
int r;
-
-
if (kvm->mm != current->mm)
-
return -EIO;
-
switch (ioctl) {
-
// 創建VCPU
-
case KVM_CREATE_VCPU:
-
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
-
break;
-
// 建立guest物理地址空間中的內存區域與qemu-kvm虛擬地址空間中的內存區域的映射
-
case KVM_SET_USER_MEMORY_REGION: {
-
// 存放內存區域信息的結構體,該內存區域從qemu-kvm進程的用戶地址空間中分配
-
struct kvm_userspace_memory_region kvm_userspace_mem;
-
-
r = -EFAULT;
-
// 從用戶態拷貝相應數據到內核態,入參argp指向用戶態地址
-
if (copy_from_user(&kvm_userspace_mem, argp,
-
sizeof kvm_userspace_mem))
-
goto out;
-
// 進入實際處理流程
-
r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
-
break;
-
}
- ...
kvm_vm_ioctl()-->kvm_vm_ioctl_set_memory_region()-->kvm_set_memory_region()-->__kvm_set_memory_region()
點擊(此處)摺疊或打開
-
/*
-
* 建立guest物理地址空間中的內存區域與qemu-kvm虛擬地址空間中的內存區域的映射
-
* 相應信息由uerspace_memory_region參數傳入,而其源頭來自於用戶態qemu-kvm。每次
-
* 調用設置一個內存區間。內存區域可以不連續(實際的物理內存區域也經常不連
-
* 續,因爲有可能有保留內存)
-
*/
-
int __kvm_set_memory_region(struct kvm *kvm,
-
struct kvm_userspace_memory_region *mem)
-
{
-
int r;
-
gfn_t base_gfn;
-
unsigned long npages;
-
struct kvm_memory_slot *slot;
-
struct kvm_memory_slot old, new;
-
struct kvm_memslots *slots = NULL, *old_memslots;
-
enum kvm_mr_change change;
-
-
// 標記檢查
-
r = check_memory_region_flags(mem);
-
if (r)
-
goto out;
-
-
r = -EINVAL;
-
/* General sanity checks */
-
// 合規檢查,防止用戶態惡意傳參,導致安全漏洞
-
if (mem->memory_size & (PAGE_SIZE - 1))
-
goto out;
-
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
-
goto out;
-
/* We can read the guest memory with __xxx_user() later on. */
-
if ((mem->slot < KVM_USER_MEM_SLOTS) &&
-
((mem->userspace_addr & (PAGE_SIZE - 1)) ||
-
!access_ok(VERIFY_WRITE,
-
(void __user *)(unsigned
long)mem->userspace_addr,
-
mem->memory_size)))
-
goto out;
-
if (mem->slot >= KVM_MEM_SLOTS_NUM)
-
goto out;
-
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
-
goto out;
-
// 將kvm_userspace_memory_region->slot轉換爲kvm_mem_slot結構,該結構從kvm->memslots獲取
-
slot = id_to_memslot(kvm->memslots, mem->slot);
-
// 內存區域起始位置在Guest物理地址空間中的頁框號
-
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
-
// 內存區域大小轉換爲page單位
-
npages = mem->memory_size >> PAGE_SHIFT;
-
-
r = -EINVAL;
-
if (npages > KVM_MEM_MAX_NR_PAGES)
-
goto out;
-
-
if (!npages)
-
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
-
-
new = old = *slot;
-
-
new.id = mem->slot;
-
new.base_gfn = base_gfn;
-
new.npages = npages;
-
new.flags = mem->flags;
-
-
r = -EINVAL;
-
if (npages) {
-
// 判斷是否需新創建內存區域
-
if (!old.npages)
-
change = KVM_MR_CREATE;
-
// 判斷是否修改現有的內存區域
-
else { /* Modify
an existing slot. */
-
// 修改的區域的HVA不同或者大小不同或者flag中的
-
// KVM_MEM_READONLY標記不同,直接退出。
-
if ((mem->userspace_addr != old.userspace_addr) ||
-
(npages != old.npages) ||
-
((new.flags ^ old.flags) & KVM_MEM_READONLY))
-
goto out;
-
/*
-
* 走到這,說明被修改的區域HVA和大小都是相同的
-
* 判斷區域起始的GFN是否相同,如果是,則說明需
-
* 要在Guest物理地址空間中move這段區域,設置KVM_MR_MOVE標記
-
*/
-
if (base_gfn != old.base_gfn)
-
change = KVM_MR_MOVE;
-
// 如果僅僅是flag不同,則僅修改標記,設置KVM_MR_FLAGS_ONLY標記
-
else if (new.flags != old.flags)
-
change = KVM_MR_FLAGS_ONLY;
-
// 否則,啥也不幹
-
else { /* Nothing to change. */
-
r = 0;
-
goto out;
-
}
-
}
-
} else if (old.npages) {/*如果新設置的區域大小爲0,而老的區域大小不爲0,則表示需要刪除原有區域。*/
-
change = KVM_MR_DELETE;
-
} else /* Modify
a non-existent slot: disallowed. */
-
goto out;
-
-
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-
/* Check for overlaps */
-
r = -EEXIST;
-
// 檢查現有區域中是否重疊的
-
kvm_for_each_memslot(slot, kvm->memslots) {
-
if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-
(slot->id == mem->slot))
-
continue;
-
if (!((base_gfn + npages <= slot->base_gfn) ||
-
(base_gfn >= slot->base_gfn + slot->npages)))
-
goto out;
-
}
-
}
-
-
/* Free page dirty bitmap if unneeded */
-
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
-
new.dirty_bitmap = NULL;
-
-
r = -ENOMEM;
-
// 如果需要創建新區域
-
if (change == KVM_MR_CREATE) {
-
new.userspace_addr = mem->userspace_addr;
-
// 設置新的內存區域架構相關部分
-
if (kvm_arch_create_memslot(&new, npages))
-
goto out_free;
-
}
-
-
/* Allocate page dirty bitmap if needed */
-
if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
-
if (kvm_create_dirty_bitmap(&new) < 0)
-
goto out_free;
-
}
-
// 如果刪除或move內存區域
-
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-
r = -ENOMEM;
-
// 複製kvm->memslots的副本
-
slots = kmemdup(kvm->memslots, sizeof(struct
kvm_memslots),
-
GFP_KERNEL);
-
if (!slots)
-
goto out_free;
-
slot = id_to_memslot(slots, mem->slot);
-
slot->flags |= KVM_MEMSLOT_INVALID;
-
// 安裝新memslots,返回舊的memslots
-
old_memslots = install_new_memslots(kvm, slots, NULL);
-
-
/* slot was deleted or moved, clear
iommu mapping */
-
// 原來的slot需要刪除,所以需要unmap掉相應的內存區域
-
kvm_iommu_unmap_pages(kvm, &old);
-
/* From this point no new shadow pages pointing to a deleted,
-
* or moved, memslot will be created.
-
*
-
* validation of sp->gfn happens in:
-
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
-
* - kvm_is_visible_gfn (mmu_check_roots)
-
*/
-
// flush影子頁表中的條目
-
kvm_arch_flush_shadow_memslot(kvm, slot);
-
slots = old_memslots;
-
}
-
// 處理private memory slots,對其分配用戶態地址,即HVA
-
r = kvm_arch_prepare_memory_region(kvm, &new, mem, change);
-
if (r)
-
goto out_slots;
-
-
r = -ENOMEM;
-
/*
-
* We can re-use the old_memslots from above, the only difference
-
* from the currently installed memslots is the invalid flag. This
-
* will get overwritten by update_memslots anyway.
-
*/
-
if (!slots) {
-
slots = kmemdup(kvm->memslots, sizeof(struct
kvm_memslots),
-
GFP_KERNEL);
-
if (!slots)
-
goto out_free;
-
}
-
-
/*
-
* IOMMU mapping: New slots need to be mapped. Old
slots need to be
-
* un-mapped and re-mapped if their
base changes. Since base change
-
* unmapping is handled above with slot deletion, mapping alone is
-
* needed here. Anything else the iommu might care about for existing
-
* slots (size changes, userspace addr changes and read-only
flag
-
* changes) is disallowed above, so
any other attribute changes getting
-
* here can be skipped.
-
*/
-
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-
r = kvm_iommu_map_pages(kvm, &new);
-
if (r)
-
goto out_slots;
-
}
-
-
/* actual memory is freed via old in kvm_free_physmem_slot
below */
-
if (change == KVM_MR_DELETE) {
-
new.dirty_bitmap = NULL;
-
memset(&new.arch, 0, sizeof(new.arch));
-
}
-
//將new分配的memslot寫入kvm->memslots[]數組中
-
old_memslots = install_new_memslots(kvm, slots, &new);
-
-
kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
// 釋放舊內存區域相應的物理內存(HPA)
-
kvm_free_physmem_slot(&old, &new);
-
kfree(old_memslots);
-
-
return 0;
-
-
out_slots:
-
kfree(slots);
-
out_free:
-
kvm_free_physmem_slot(&new, &old);
-
out:
-
return r;
- }