在前面四篇文章介紹過Xen的事件通道機制後,設計一個簡單的通訊樣例,並進行原理說明
1、建立dom時完成事件通道的建立與初始化(vcpu綁定)
1.1 事件通道的初始化
domain_create中調用 evtchn_init完成事件通道初始化,evtchn_init中關鍵函數爲get_free_port。在第一次初始化過程中,get_free_port()通過memset()將分配的結構體數組全部清零,其中包括結構體evtchn成員state的值。成員state值爲0,意味着初始化後事件通道都處於未分配狀態(ECS_FREE),並且notify_vcpu_id也爲0,即所有的事件通道默認都和dom中vcpu 0進行綁定。
1.2 對xen的修改
爲了實現事件通道的預分配,在創建domain時會對domain的事件通道初始化,函數爲evtchn_init,對該函數進行修改如下:
int evtchn_init(struct domain *d)
{
//struct evtchn *lchn,*rchn;
//int i,j;
spin_lock_init(&d->event_lock);
if( get_free_port(d) != 0 )
return -EINVAL;
evtchn_from_port(d, 0)->state = ECS_RESERVED;
if(d->domain_id==0)
{
evtchn_from_port(d, 121)->state = ECS_UNBOUND;
evtchn_from_port(d, 121)->u.unbound.remote_domid =1;
evtchn_from_port(d, 122)->state = ECS_UNBOUND;
evtchn_from_port(d, 122)->u.unbound.remote_domid =2;
evtchn_from_port(d, 123)->state = ECS_UNBOUND;
evtchn_from_port(d, 123)->u.unbound.remote_domid =3;
evtchn_from_port(d, 124)->state = ECS_UNBOUND;
evtchn_from_port(d, 124)->u.unbound.remote_domid =4;
evtchn_from_port(d, 125)->state = ECS_UNBOUND;
evtchn_from_port(d, 125)->u.unbound.remote_domid =5;
evtchn_from_port(d, 126)->state = ECS_UNBOUND;
evtchn_from_port(d, 126)->u.unbound.remote_domid =6;
}
else if(d->domain_id == 1)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 121;
evtchn_from_port(rcu_lock_domain_by_id(0),121)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),121)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),121)->u.interdomain.remote_port = 127;
}
else if(d->domain_id == 2)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 122;
evtchn_from_port(rcu_lock_domain_by_id(0),122)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),122)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),122)->u.interdomain.remote_port = 127;
}
else if(d->domain_id == 3)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 123;
evtchn_from_port(rcu_lock_domain_by_id(0),123)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),123)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),123)->u.interdomain.remote_port = 127;
}
else if(d->domain_id == 4)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 124;
evtchn_from_port(rcu_lock_domain_by_id(0),124)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),124)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),124)->u.interdomain.remote_port = 127;
}
else if(d->domain_id == 5)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 125;
evtchn_from_port(rcu_lock_domain_by_id(0),125)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),125)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),125)->u.interdomain.remote_port = 127;
}
else if(d->domain_id == 6)
{
evtchn_from_port(d, 127)->state = ECS_INTERDOMAIN;
evtchn_from_port(d, 127)->u.interdomain.remote_dom = rcu_lock_domain_by_id(0);
evtchn_from_port(d, 127)->u.interdomain.remote_port = 126;
evtchn_from_port(rcu_lock_domain_by_id(0),126)->state = ECS_INTERDOMAIN;
evtchn_from_port(rcu_lock_domain_by_id(0),126)->u.interdomain.remote_dom = d;
evtchn_from_port(rcu_lock_domain_by_id(0),126)->u.interdomain.remote_port = 127;
}
#if MAX_VIRT_CPUS > BITS_PER_LONG
d->poll_mask = xmalloc_array(unsigned long, BITS_TO_LONGS(MAX_VIRT_CPUS));
if ( !d->poll_mask )
return -ENOMEM;
bitmap_zero(d->poll_mask, MAX_VIRT_CPUS);
#endif
return 0;
}
在domain 0被創建時預留下事件通道用於域間綁定,在domain U被創建時進行域間綁定操作。
2、建立域間通訊
2.1 dom 0與 dom U事件通道分配
如1.2所示,在xen中已經實現了事件通道的分配,dom 0分配121~126號通道分別於dom 1~dom 6建立域間通訊,當dom 1~dom 6啓動時執行域間綁定過程
2.2爲分配的事件通道綁定處理函數
每一個事件通道處理都對應一箇中斷,因此一旦事件到達,會根據事件通道的中斷號調用對應的中斷處理函數,所以在使用之前也要綁定處理函數。其中evtchn爲alloc_unbound.port。處理函數handler根據自己想要實現的功能設計。
int bind_evtchn_to_irqhandler(unsigned int evtchn,
irq_handler_t handler,
unsigned long irqflags,
const char *devname, void *dev_id)
{
int irq, retval;
irq = bind_evtchn_to_irq(evtchn);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return retval;
}
return irq;
}
3、利用事件通道來發送通知
發送事件通知的函數爲notify_remote_via_evtchn,這是linux內核中的函數,實際也是通過超級調用請求xen進行的通知。
static inline void notify_remote_via_evtchn(int port)
{
struct evtchn_send send = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
}
4、收到事件通知的處理
每一個事件通道綁定完vcpu後,一旦有事件通知就會觸發該vcpu的調度,執行下面的彙編代碼
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
TRACE_IRQS_OFF
/* Check to see if we got the event in the critical
region in xen_iret_direct, after we've reenabled
events and checked for pending events. This simulates
iret instruction's behaviour where it delivers a
pending interrupt when enabling interrupts. */
movl PT_EIP(%esp),%eax
cmpl $xen_iret_start_crit,%eax
jb 1f
cmpl $xen_iret_end_crit,%eax
jae 1f
jmp xen_iret_crit_fixup
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
jmp ret_from_intr
CFI_ENDPROC
ENDPROC(xen_hypervisor_callback
在上面的代碼中選擇處理函數時都會進入xen_evtchn_do_upcall,該函數會根據事件通道的中斷號,調用對應的中斷處理函數。其實現如下
void xen_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
exit_idle();
__xen_evtchn_do_upcall();
irq_exit();
set_irq_regs(old_regs);
}
static void __xen_evtchn_do_upcall(void)
{
int start_word_idx, start_bit_idx;
int word_idx, bit_idx;
int i;
int cpu = get_cpu();
struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
unsigned count;
do {
unsigned long pending_words;
vcpu_info->evtchn_upcall_pending = 0;
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
wmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
start_word_idx = __this_cpu_read(current_word_idx);
start_bit_idx = __this_cpu_read(current_bit_idx);
word_idx = start_word_idx;
for (i = 0; pending_words != 0; i++) {
unsigned long pending_bits;
unsigned long words;
words = MASK_LSBS(pending_words, word_idx);
/*
* If we masked out all events, wrap to beginning.
*/
if (words == 0) {
word_idx = 0;
bit_idx = 0;
continue;
}
word_idx = __ffs(words);
pending_bits = active_evtchns(cpu, s, word_idx);
bit_idx = 0; /* usually scan entire word from start */
if (word_idx == start_word_idx) {
/* We scan the starting word in two parts */
if (i == 0)
/* 1st time: start in the middle */
bit_idx = start_bit_idx;
else
/* 2nd time: mask bits done already */
bit_idx &= (1UL << start_bit_idx) - 1;
}
do {
unsigned long bits;
int port, irq;
struct irq_desc *desc;
bits = MASK_LSBS(pending_bits, bit_idx);
/* If we masked out all events, move on. */
if (bits == 0)
break;
bit_idx = __ffs(bits);
/* Process port. */
port = (word_idx * BITS_PER_LONG) + bit_idx;
irq = evtchn_to_irq[port];
if (irq != -1) {
desc = irq_to_desc(irq);
if (desc)
generic_handle_irq_desc(irq, desc);
}
bit_idx = (bit_idx + 1) % BITS_PER_LONG;
/* Next caller starts at last processed + 1 */
__this_cpu_write(current_word_idx,
bit_idx ? word_idx :
(word_idx+1) % BITS_PER_LONG);
__this_cpu_write(current_bit_idx, bit_idx);
} while (bit_idx != 0);
/* Scan start_l1i twice; all others once. */
if ((word_idx != start_word_idx) || (i != 0))
pending_words &= ~(1UL << word_idx);
word_idx = (word_idx + 1) % BITS_PER_LONG;
}
BUG_ON(!irqs_disabled());
count = __this_cpu_read(xed_nesting_count);
__this_cpu_write(xed_nesting_count, 0);
} while (count != 1 || vcpu_info->evtchn_upcall_pending);
out:
put_cpu();
}