Openstack liberty源碼分析 之 雲主機的啓動過程3

接上篇Openstack liberty源碼分析 之 雲主機的啓動過程2, 簡單回顧下:nova-conductor收到nova-scheduler返回的主機列表後,依次發送異步rpc請求給目標主機的nova-compute服務,下面繼續來看nova-compute服務的處理過程:

nova-compute

根據路由映射,nova-compute中處理雲主機啓動請求的方法爲
nova/compute/manager.py.ComputeManager.py.build_and_run_instance, 該方法沒有做實質性的工作,只是通過eventlet創建一個工作線程用於後續的雲主機啓動工作,以便解耦rpc工作線程,該工作線程後續調用_do_build_and_run_instance方法繼續後續操作,一起來看一看:

#該處省略了裝飾器定義
def _do_build_and_run_instance(self, context, instance, image,
            request_spec, filter_properties, admin_password, 
            injected_files,
            requested_networks, security_groups, 
            block_device_mapping,
            node=None, limits=None):

     #該處省略了異常處理  
     LOG.info(_LI('Starting instance...'), context=context,
                  instance=instance)
     #instance是一個InstancV2實例對象,這裏更新實例狀態,並通過
     #`conductor rpc api`發送同步請求到`conductor`執行實例狀態更新
     instance.vm_state = vm_states.BUILDING
     instance.task_state = None
     instance.save(expected_task_state=
                    (task_states.SCHEDULING, None))

     # b64 decode the files to inject:
     decoded_files = self._decode_files(injected_files)

     #limits包含node的資源限制,包括:內存和磁盤
     if limits is None:
         limits = {}

     if node is None:
         node = self.driver.get_available_nodes(refresh=True)[0]
         LOG.debug('No node specified, defaulting to %s', node,

     #省略了異常處理,將調用請求轉發給_build_and_run_instance執行後續
     #處理
     self._build_and_run_instance(context, instance, image,
                    decoded_files, admin_password, 
                    requested_networks,
                    security_groups, block_device_mapping, 
                    node, limits,
                    filter_properties)
    return build_results.ACTIVE

繼續來看看_build_and_run_instance的實現:

def _build_and_run_instance(self, context, instance, image, 
            injected_files,
            admin_password, requested_networks, 
            security_groups,
            block_device_mapping, node, limits, 
            filter_properties):
     """image是一個包含鏡像信息的字典,‘name’是鏡像的名字,例子中的鏡像
     信息如下:
     {
         u'status': u'active', u'deleted': False, 
         u'container_format': u'bare', u'min_ram': 0, 
         u'updated_at': u'2016-03-24T06:58:33.000000', 
         u'min_disk': 0, 
         u'owner': u'25520b29dce346d38bc4b055c5ffbfcb', 
         u'is_public': True, u'deleted_at': None, 
         u'properties': {}, u'size': 1401421824, 
         u'name': u'ceph-centos-65-x64-20g.qcow2',
         u'checksum': u'a97deac197e76e1f5a427484b1e5df4c', 
         u'created_at': u'2016-03-24T06:57:28.000000', 
         u'disk_format': u'qcow2', 
         u'id': u'226bc6e5-60d7-4a2c-bf0d-a568a1e26e00'
     }
     """
    image_name = image.get('name')

    '''省略異常處理'''

    #獲取/創建ResourceTracker實例,爲後續的資源申請做準備
    rt = self._get_resource_tracker(node)
    #limits包含node的內存,磁盤等資源配額信息,驗證node中的資源是否滿足
    #該次啓動請求,資源不足則拋出異常,可以在日誌文件中看到類似的INFO log
    # ”Attempting claim: memory 2048 MB, disk 20 GB“
    with rt.instance_claim(context, instance, limits):
        # NOTE(russellb) It's important that this validation be
        # done
        # *after* the resource tracker instance claim, as that 
        #is where the host is set on the instance.
        self._validate_instance_group_policy(context, instance,
                        filter_properties)
        #爲雲主機申請網絡資源,完成塊設備驗證及映射,更新實例狀態
        with self._build_resources(context, instance,
                        requested_networks, security_groups, 
                        image,
                        block_device_mapping) as resources:
            instance.vm_state = vm_states.BUILDING
            instance.task_state = task_states.SPAWNING
            # NOTE(JoshNang) This also saves the changes to the
            # instance from _allocate_network_async, as they 
            # aren't
            # saved in that function to prevent races.
            instance.save(expected_task_state=
                            task_states.BLOCK_DEVICE_MAPPING)
            block_device_info = resources['block_device_info']
            network_info = resources['network_info']
            #調用hypervisor的spawn方法啓動雲主機實例,我使用的是
            #libvirt;所以這裏跳轉到`nova/virt/libvirt/driver.py/
            #LibvirtDriver.spawn,見下面的分析
            self.driver.spawn(context, instance, image,
                                     injected_files, 
                                     admin_password,
                                     network_info=network_info,
                           block_device_info=block_device_info)
def spawn(self, context, instance, image_meta, injected_files,
              admin_password, network_info=None, 
              block_device_info=None):
    """主要實現三個功能:
    1. 從glance下載鏡像(如果本地_base目錄沒有的話),然後上傳到後端存儲
    2. 生成libvirt xml文件
    3. 調用libvirt啓動實例
    """
    #根據image字典信息創建`nova/objects/image_meta.py/ImageMeta
    #對象
    image_meta = objects.ImageMeta.from_dict(image_meta)
    #根據模擬器類型,獲取塊設備及光驅的總線類型,默認使用kvm,所以:
    #塊設備,默認使用virtio;光驅,默認使用ide;並且根據
    #block_device_info設置設備映射,最後返回包含
    #{disk_bus,cdrom_bus,mapping}的字典
    disk_info = blockinfo.get_disk_info(CONF.libvirt.virt_type,
                                            instance,
                                            image_meta,
                                            block_device_info)
    #從glance下載鏡像(如果本地_base目錄沒有的話),然後上傳到後端存儲
    #具體分析見後文
    self._create_image(context, instance,
                           disk_info['mapping'],
                           network_info=network_info,
                           block_device_info=block_device_info,
                           files=injected_files,
                           admin_pass=admin_password)
    #生成libvirt xml文件,具體分析見後文
    xml = self._get_guest_xml(context, instance, network_info,
                                  disk_info, image_meta,
                           block_device_info=block_device_info,
                                  write_to_disk=True)
    #調用libvirt啓動實例,具體分析見後文
    self._create_domain_and_network(context, xml, instance, 
                                        network_info,
                                        disk_info,
                        block_device_info=block_device_info)
    LOG.debug("Instance is running", instance=instance)

    def _wait_for_boot():
        """Called at an interval until the VM is running."""
        state = self.get_info(instance).state

        if state == power_state.RUNNING:
            LOG.info(_LI("Instance spawned successfully."),
                         instance=instance)
            raise loopingcall.LoopingCallDone()

   #等待實例創建結果(通過libvirt獲取雲主機狀態判斷)
   timer = loopingcall.FixedIntervalLoopingCall(_wait_for_boot)
        timer.start(interval=0.5).wait()

如果spawn方法正常返回,雲主機實例就創建成功了。可以在Dashboard上看到新創建的雲主機爲’運行’狀態,通過virsh list命令也可以在宿主上看到實例進程。下面先來看看是_create_image如何創建磁盤的。

創建系統磁盤

_create_image方法代碼很長,下面先來看看鏡像磁盤的創建過程;另外在下面的分析中只給出關鍵部分代碼,詳細內容請讀者查閱源碼文件:
nova/virt/libvirt/driver.py/LibvirtDriver._create_image

 def _create_image(self, context, instance,
                   disk_mapping, suffix='',
                   disk_images=None, network_info=None,
                   block_device_info=None, files=None,
                   admin_pass=None, inject_files=True,
                   fallback_from_host=None):
    #由於我們是從鏡像啓動,所以booted_from_volume=False
    booted_from_volume = self._is_booted_from_volume(
                                    instance, disk_mapping)

    ......

    """輸入參數:disk_images:None
    根據instance實例信息填充disk_images
    {
    'kernel_id': u'', 
    'image_id': u'226bc6e5-60d7-4a2c-bf0d-a568a1e26e00', 
    'ramdisk_id': u''
    }
    """
    if not disk_images:
        disk_images = {'image_id': instance.image_ref,
                       'kernel_id': instance.kernel_id,
                       'ramdisk_id': instance.ramdisk_id}

    ......

    #booted_from_volume=False
    if not booted_from_volume:
        #根據`image_id`hash,生成系統磁盤名
        root_fname = imagecache.get_cache_fname(disk_images, 
                                                    'image_id')
        #root_gb是系統盤的大小,我的例子中是20(20G)
        size = instance.root_gb * units.Gi

        #輸入參數:suffix=''
        if size == 0 or suffix == '.rescue':
            size = None

        #由於我們採用的存儲後端是ceph,所以這裏生成的backend=Rbd
        #`disk`參數作爲生成的設備名後綴:'uuid_disk'
        backend = image('disk')
        #正常啓動雲主機,至此task_state=spawning狀態
        if instance.task_state == task_states.RESIZE_FINISH:           
            backend.create_snap(
                        libvirt_utils.RESIZE_SNAPSHOT_NAME)
        #Rbd支持clone操作
        if backend.SUPPORTS_CLONE:
            def clone_fallback_to_fetch(*args, **kwargs):
                try:
                    backend.clone(context, 
                                    disk_images['image_id'])
                except exception.ImageUnacceptable:
                    #如果調用clone發生異常,就調用fecth_image
                    #下載鏡像
                    libvirt_utils.fetch_image(*args, **kwargs)
            fetch_func = clone_fallback_to_fetch
        else:
            #如果後端使用的lvm,那個就是走這裏
            fetch_func = libvirt_utils.fetch_image
        #_try_fetch_image_cache直接調用
        #`backend.cache = Rbd.cache`方法從glance下載鏡像
        #並創建系統盤上傳到後端存儲,如果出現ImageNotFound異常,
        #則會嘗試從本地的()
        self._try_fetch_image_cache(backend, fetch_func, 
                                        context,
                                        root_fname, 
                                       disk_images['image_id'],
                                        instance, size, 
                                        fallback_from_host)

    ......

下面來看nova/virt/libvirt/imagebackend.py/Rbd.cache的實現:

def cache(self, fetch_func, filename, size=None, *args, **kwargs):

    @utils.synchronized(filename, external=True, 
                            lock_path=self.lock_path)
    def fetch_func_sync(target, *args, **kwargs):
        # The image may have been fetched while a subsequent
        # call was waiting to obtain the lock.
        if not os.path.exists(target):
            fetch_func(target=target, *args, **kwargs)

    #合成本地鏡像緩存路徑,可以在nova.conf文件中修改instances_path和
    #image_cache_subdirectory_name配置緩存路徑,我的環境爲:
    #/opt/stack/data/nova/instances/_base
    #有讀者可能想到這裏應該能用高速設備,提升性能了!!!
    base_dir = os.path.join(CONF.instances_path,
                 CONF.image_cache_subdirectory_name)
    if not os.path.exists(base_dir):
        fileutils.ensure_tree(base_dir)
    #拼接鏡像路徑:/opt/stack/data/nova/instances/_base/filename
    #我的例子爲:/opt/stack/data/nova/instances/_base/
    #cb241933d7daa40a536db47d41376dd03a83b517
    base = os.path.join(base_dir, filename)

    #如果鏡像不存在(通常都不存在)就從glance下載
    #(調用RBDDriver.exits方法判斷)
    if not self.check_image_exists() or not 
                                        os.path.exists(base):
        #fetch_func_sync是fetch_func的互斥版本,下面分析繼續
        #create_image
        self.create_image(fetch_func_sync, base, size,
                              *args, **kwargs)

    #Rbd不支持fallocate
    if (size and self.preallocate and self._can_fallocate() and
                os.access(self.path, os.W_OK)):
         utils.execute('fallocate', '-n', '-l', size, self.path)

----------------------------------------------------------

#nova/virt/libvirt/imagebackend.py/Rbd.create_image
def create_image(self, prepare_template, base, size, *args, 
                                                **kwargs):
    """如果本地沒有鏡像緩存就先從glance下載到本地,否則直接從本地緩存導入
    輸入參數prepare_template指向fetch_func_sync,函數調用鏈條如下
    (忽略裝飾器):
    prepare_template(fetch_func_sync)
        -> fetch_func(clone_fallback_to_fetch)
            (如果由於鏡像是qcow格式,拋異常了,就會執行下面的fetch_image調用)
            -> Rbd.clone 
                -> libvirt_utils.fetch_image
    具體請看下文clone的代碼分析
    """
    if not self.check_image_exists():
        prepare_template(target=base, max_size=size, *args, **kwargs)

    # prepare_template() may have cloned the image into a new rbd
    # image already instead of downloading it locally
    #上面的prepare_template方法將鏡像下載到本地緩存後(如果本地沒有的
    #話),RBDDriver再調用`rdb import`將鏡像上傳到nova的存儲後端
    #如果鏡像是raw格式的話,鏡像將不會緩存在本地,rbd直接在rbd pool
    #間完成clone,我想你應該知道:用rbd作爲glance及nova後端,鏡像格式就
    #應該是raw了吧!!!
    if not self.check_image_exists():
        self.driver.import_image(base, self.rbd_name)
    self.verify_base_size(base, size)

    if size and size > self.get_disk_size(self.rbd_name):
        self.driver.resize(self.rbd_name, size)

---------------------------------------------------------

#如果check_image_exists發現本地沒有鏡像緩存,就會觸發下面的clone調用
def clone(self, context, image_id_or_uri):
    #通過glanceclient獲取鏡像元信息
    image_meta = IMAGE_API.get(context, image_id_or_uri,
                                   include_locations=True)
    """
    [
    {'url': u'rbd://1ee20ded-caae-419d-9fe3-5919f129cf55/images/226bc6e5-60d7-4a2c-bf0d-a568a1e26e00/snap', 'metadata': {}}
    ]      
    """                  
    locations = image_meta['locations']

    LOG.debug('Image locations are: %(locs)s' % {'locs': locations})

    #我的鏡像是qcow2格式的,所以這裏拋異常了
    #所以如果以Rbd作爲nova的後端存儲,最好上傳raw格式的鏡像(在後面的分析
    #中可以看到:會把非raw格式的鏡像轉換爲raw,這會帶來性能損耗)
    #還記得LibvirtDriver._create_image方法中說,如果clone異常了,
    #就會再次調用libvirt_utils.fetch_image方法吧!在這裏就看到效果了
    if image_meta.get('disk_format') not in ['raw', 'iso']:
        reason = _('Image is not raw format')
        raise exception.ImageUnacceptable(image_id=image_id_or_uri,
                                              reason=reason)
    #如果是raw格式的鏡像,則執行這裏
    for location in locations:
        #判斷是否支持clone,如果是raw格式就支持;
        #直接調用clone方法克隆image(將image從源pool拷貝到目的pool)
        if self.driver.is_cloneable(location, image_meta):
            return self.driver.clone(location, self.rbd_name)

    #如果是其他格式,則拋異常
    reason = _('No image locations are accessible')
    raise exception.ImageUnacceptable(image_id=image_id_or_uri,
                                          reason=reason)

-----------------------------------------------------------

"""正如上面說的:如果不是raw格式,`Rbd.clone`方法會拋異常,再次調用
`libvirt_utils.fetch_image`下載鏡像,而該方法直接調用
`nova/virt/images.py/fetch_to_raw`方法,一起來看看:
"""
def fetch_to_raw(context, image_href, path, user_id, 
                                project_id, max_size=0):
    """
    1.從glance下載鏡像到本地,保存到'hash(image_id).part'文件
    2.如果需要的話,將鏡像轉換爲raw格式,保存到'hash(image_id).converted'
    3.刪除'hash(image_id).part'文件,重命
    名'hash(image_id).converted'爲'hash(image_id)'
    """
    #path就是之前`Rbd.cache`方法中的base,我的例子中是:
    #/opt/stack/data/nova/instances/_base/
    #cb241933d7daa40a536db47d41376dd03a83b517
    path_tmp = "%s.part" % path
    #調用glanceclient從glance下載image鏡像,並存儲在path_tmp路徑上
    fetch(context, image_href, path_tmp, user_id, project_id,
          max_size=max_size)

    with fileutils.remove_path_on_error(path_tmp):
        #調用`qemu-img info`命令獲取剛纔下載的image鏡像文件的信息
        data = qemu_img_info(path_tmp)

        #鏡像格式
        fmt = data.file_format
        if fmt is None:
            raise exception.ImageUnacceptable(
                reason=_("'qemu-img info' parsing failed."),
                image_id=image_href)

        #不支持後備文件
        backing_file = data.backing_file
        if backing_file is not None:
            raise exception.ImageUnacceptable(image_id=image_href,
                reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") %
                        {'fmt': fmt, 'backing_file': backing_file}))
        """
        # We can't generally shrink incoming images, so 
        #disallow
        # images > size of the flavor we're booting.  Checking 
        #here avoids
        # an immediate DoS where we convert large qcow images 
        #to raw
        # (which may compress well but not be sparse).
        # TODO(p-draigbrady): loop through all flavor sizes, so
        # that
        # we might continue here and not discard the download.
        # If we did that we'd have to do the higher level size 
        #checks
        # irrespective of whether the base image was prepared 
        #or not.
        """
        disk_size = data.virtual_size
        if max_size and max_size < disk_size:
            LOG.error(_LE('%(base)s virtual size %(disk_size)s' 
                  'larger than flavor root disk size (size)s'),
                      {'base': path,
                       'disk_size': disk_size,
                       'size': max_size})
            raise exception.FlavorDiskSmallerThanImage(
                flavor_size=max_size, image_size=disk_size)

        #如果不是raw格式,強制轉換爲raw格式
        if fmt != "raw" and CONF.force_raw_images:
            staged = "%s.converted" % path
            LOG.debug("%s was %s, converting to raw" % 
                                        (image_href, fmt))
            with fileutils.remove_path_on_error(staged):
                try:
                    """調用`qemu-img convert`命令將之前下載的非raw
                    格式鏡像path_tmp轉換爲raw格式,並存儲到staged文
                    件中
                    """
                    convert_image(path_tmp, staged, fmt, 'raw')
                except exception.ImageUnacceptable as exp:
                    # re-raise to include image_href
                    raise  exception.ImageUnacceptable(
                          image_id=image_href,
                        reason=_("Unable to convert image to 
                        raw: %(exp)s") % {'exp': exp})

                #刪除最開始下載的非raw格式鏡像文件
                os.unlink(path_tmp)

                #調用`qemu-img info`命令獲取轉換格式後的image
                #鏡像文件的信息,如果不是raw格式就拋異常
                data = qemu_img_info(staged)
                if data.file_format != "raw":
                    raise exception.ImageUnacceptable(
                        image_id=image_href,
                        reason=_("Converted to raw, but format 
                        is now %s") % data.file_format)

                #重命名轉換的鏡像文件  
                os.rename(staged, path)
        else:
            os.rename(path_tmp, path)

小結:上面分析了以ceph rbd作爲存儲後端的系統磁盤生成過程,有這麼幾個要點:

  • 當以ceph rdb作爲存儲後端的時候,最好使用raw格式的鏡像
  • 如果由於某些原因使用了qcow2之類的鏡像格式,最好將_base緩存目錄放到高速設備上,加快雲主機的啓動速度

    創建/配置非系統磁盤

    回到nova/virt/libvirt/driver.py/LibvirtDriver._create_image

def _create_image(self, context, instance,
                   disk_mapping, suffix='',
                   disk_images=None, network_info=None,
                   block_device_info=None, files=None,
                   admin_pass=None, inject_files=True,
                   fallback_from_host=None):
    """先看看disk_mapping參數:定義了三個設備
    disk_mapping: {
    'disk.config': {'bus': 'ide', 'type': 'cdrom', 'dev': 'hdd'}, 
    'disk': {'bus': 'virtio', 'boot_index': '1', 
                            'type': 'disk', 'dev': u'vda'}, 
    'root': {'bus': 'virtio', 'boot_index': '1', 
                            'type': 'disk', 'dev': u'vda'}}

    """

    #disk_mapping不包含下述磁盤並且處理邏輯與前面創建系統盤相似,
    #直接跳過`disk.local`,`disk.swap`,`ephemerals`部分代碼
    .......

    # Config drive(默認使用配置磁盤)
    if configdrive.required_by(instance):
        LOG.info(_LI('Using config drive'), instance=instance)
        extra_md = {}
        #管理員密碼
        if admin_pass:
            extra_md['admin_pass'] = admin_pass

        #輸入參數files=[], network_info包含詳細的網絡配置信息,
        #instance是InstanceV2對象,包含雲主機詳細信息
        #獲取雲主機實例的配置信息,創建InstanceMetadata對象
        inst_md = instance_metadata.InstanceMetadata(instance,
                            content=files, extra_md=extra_md, 
                            network_info=network_info)
        with configdrive.ConfigDriveBuilder(instance_md=inst_md) 
                                                    as cdb:
            #拼接配置文件的路徑:
            #CONF.instances_path/instance.uuid/disk.conf
            #CONF.instances_path可以在nova.conf中配置
            configdrive_path = 
                   self._get_disk_config_path(instance, suffix)
            LOG.info(_LI('Creating config drive at %(path)s'),
                                {'path': configdrive_path}, 
                                        instance=instance)

            try:
                #調用ConfigDriveBuilder.make_drive方法創建配置文件
                #內部調用CONF.mkisofs_cmd(默認genisoimage)工具創
                #建configdrive_path文件,內容爲雲主機配置信息
                cdb.make_drive(configdrive_path)
            except processutils.ProcessExecutionError as e:
                with excutils.save_and_reraise_exception():
                    LOG.error(_LE('Creating config drive' 
                                'failed with error: %s'),
                                  e, instance=instance)

        try:
            # Tell the storage backend about the config drive
            #根據類型創建配置磁盤後端,我的例子中使用rbd,所以創建的是
            #Rbd實例(否則就是raw實例)
            config_drive_image = self.image_backend.image(
                        instance, 'disk.config' + suffix,
                        self._get_disk_config_image_type())

            #這裏會調用`rbd import`命令將configdrive_path文件導入到
            #ceph中(磁盤名爲:instance_uuid_disk.conf)
            config_drive_image.import_file(
                    instance, configdrive_path, 'disk.config' + 
                                                        suffix)
        finally:
            # NOTE(mikal): if the config drive was imported 
            #into RBD, then we no longer need the local copy
            #刪除本地的配置文件
            if CONF.libvirt.images_type == 'rbd':
                os.unlink(configdrive_path)

    # File injection only if needed
    #默認情況下inject_partition=-2,就是不允許直接將配置注入到磁盤中
    #另外,如果雲主機是從磁盤啓動的,是不支持注入的
    elif inject_files and CONF.libvirt.inject_partition != -2:
        if booted_from_volume:
            LOG.warn(_LW('File injection into a boot from'
                     'volume instance is not supported'), 
                                     instance=instance)
        #注入到系統盤中,具體請看nova/virt/disk/vfs下的相關代碼
        #主要是藉助guestfs模塊實現
        self._inject_data(
                instance, network_info, admin_pass, files, 
                                                    suffix)

   #通常virt_type=kvm,如果是uml,則磁盤需要root權限
   if CONF.libvirt.virt_type == 'uml':
       libvirt_utils.chown(image('disk').path, 'root')  

生成libvirt xml配置

經過上面的_create_image方法所有的磁盤設備都配置好了,下面來看看libvirt xml的生成過程:

#nova/virt/libvirt/driver.py/LibvirtDriver._get_guest_xml
def _get_guest_xml(self, context, instance, network_info, 
                        disk_info,
                        image_meta, rescue=None,
                        block_device_info=None, 
                        write_to_disk=False):
    """NOTE(danms): Stringifying a NetworkInfo will take a 
    lock. Do this ahead of time so that we don't acquire it 
    while also holding the logging lock.
    """
    """代碼邏輯很清晰:
    1. 根據配置生成雲主機配置字典
    2. 將配置字典轉換爲xml格式
    3. xml保存到本地
    """
    network_info_str = str(network_info)
    msg = ('Start _get_guest_xml '
               'network_info=%(network_info)s '
               'disk_info=%(disk_info)s '
               'image_meta=%(image_meta)s rescue=%(rescue)s '
               'block_device_info=%(block_device_info)s' %
               {'network_info': network_info_str, 
               'disk_info': disk_info,
                'image_meta': image_meta, 'rescue': rescue,
                'block_device_info': block_device_info})
    # NOTE(mriedem): block_device_info can contain 
    #auth_password so we need to sanitize the password in the 
    #message.
    LOG.debug(strutils.mask_password(msg), instance=instance)
    conf = self._get_guest_config(instance, network_info, 
                                      image_meta,
                                      disk_info, rescue, 
                                      block_device_info,
                                      context)
    #將雲主機配置轉換爲xml格式
    xml = conf.to_xml()

    #記錄到本地磁盤
    if write_to_disk:
        instance_dir = libvirt_utils.get_instance_path(instance)
        #將xml配置保存到CONF.instance_path/instance_uuid/libvirt.xml
        xml_path = os.path.join(instance_dir, 'libvirt.xml')
        libvirt_utils.write_to_file(xml_path, xml)

    LOG.debug('End _get_guest_xml xml=%(xml)s',
                  {'xml': xml}, instance=instance)
    return xml

_get_guest_xml方法比較簡單,這裏不再分析了,有疑問的讀者可以聯繫我一起討論。

啓動雲主機

函數實現如下:

def _create_domain_and_network(self, context, xml, instance, 
                                   network_info,
                                   disk_info, 
                                   block_device_info=None,
                                   power_on=True, reboot=False,
                                   vifs_already_plugged=False):
    #幾個關鍵的輸入參數如下:
    """
    disk_info:
    {
    'disk_bus': 'virtio', 'cdrom_bus': 'ide', 
    'mapping': {
        'disk.config': {'bus': 'ide', 'type': 'cdrom', 
                                                'dev': 'hdd'}, 
        'disk': {'bus': 'virtio', 'boot_index': '1',
                                'type': 'disk', 'dev': u'vda'}, 
        'root': {'bus': 'virtio', 
            'boot_index': '1', 'type': 'disk', 'dev': u'vda'}
        }
    }

    block_device_info:
    {
        'swap': None, 'root_device_name': u'/dev/vda', 
        'ephemerals': [], 'block_device_mapping': []
    } 

    network_info:包含雲主機的網絡配置信息
    [VIF({'profile': {}, 'ovs_interfaceid': None, 
    'preserve_on_delete': False, 
    'network': Network({'bridge': u'brq20f5ec1b-4f', 'subnets': 
    [Subnet({'ips': [FixedIP({'meta': {}, 'version': 4, 'type': 
    'fixed', 'floating_ips': [], 'address': 
    u'xx.xxx.xxx.xxx'})], 'version': 4, 'meta': {'dhcp_server': 
    u'xx.xxx.xxx.xxx'}, 'dns': [], 'routes': [], 'cidr': 
    u'xx.xxx.xxx.0/xxx', 'gateway': IP({'meta': {}, 'version': 
    4, 'type': 'gateway', 'address': u'10.240.227.1'})})], 
    'meta': {'injected': False, 'tenant_id': 
    u'25520b29dce346d38bc4b055c5ffbfcb', 
    'should_create_bridge': True}, 'id': u'20f5ec1b-4f96-41d8-
    97f3-6776db0d00a7', 'label': u'10.240.227.x'}), 'devname':
     u'tapefe77b47-fe', 'vnic_type': u'normal', 'qbh_params': 
     None, 'meta': {}, 'details': {u'port_filter': True}, 
     'address': u'fa:16:3e:5e:64:80', 'active': False, 'type': 
     u'bridge', 'id': u'efe77b47-fef8-48ff-93ee-8da753a6d2bb',
     'qbg_params': None})]
    """
    #獲取塊設備映射,由輸入參數我們知道block_device_mapping=[]
    block_device_mapping = driver.block_device_info_get_mapping(
            block_device_info)
    #獲取image的metadata
    image_meta = objects.ImageMeta.from_instance(instance)

    #如果開啓了磁盤加密,就用指定的加密算法加密磁盤
    #我們這裏block_device_mapping=[],忽略相關的代碼
    for vol in block_device_mapping:
        .......

    #vif_plugging_timeout=300(默認5分鐘)
    #檢查neutron網絡事件,如果vif是非active狀態,就需要處理plug事件
    #我的例子中events爲:[('network-vif-plugged', u'efe77b47-
    #fef8-48ff-93ee-8da753a6d2bb')]
    timeout = CONF.vif_plugging_timeout
    if (self._conn_supports_start_paused and
        utils.is_neutron() and not
        vifs_already_plugged and power_on and timeout):
            events = self._get_neutron_events(network_info)
        else:
            events = []

     #pause = true
     pause = bool(events)
     guest = None

     #忽略try{ }except處理代碼

     #在啓動雲主機前,需要先準備好虛擬網卡
     #調用ComputeVirtAPI.wait_for_instance_event處理neutron網絡
     #事件,這裏是network-vif-plugged事件,在
     #wait_for_instance_event中啓動eventlet線程處理事件,並等待結束
     #如果發生異常,則調用self._neutron_failed_callback處理。
     with self.virtapi.wait_for_instance_event(
                instance, events, deadline=timeout,
                error_callback=self._neutron_failed_callback):
          #安裝虛擬網卡(我使用的是bridge,最終調用的是
          #LibvirtGenericVIFDriver.plug_bridge方法)
          """  簡單分析如下: 
          調用self.plug_vifs後,內部會通過判斷vif的類型(我的例子中用
          的是bridge)來調用具體的接口,然後具體的調用是這樣的:
          self.plug_vifs -> 
          nova/virt/libvirt/vif.py/LibvirtGenericVIFDriver.plug 
          -> LibvirtGenericVIFDriver.plug_bridge ->
          nova/network/linux_net.py/
          LinuxBridgeInterfaceDriver.ensure_bridge, 最後是通過
          brctl工具創建的bridge,具體的實現讀者可以自行看看
          """
          self.plug_vifs(instance, network_info)
          #設置基本的iptables規則
          self.firewall_driver.setup_basic_filtering(instance,
                                               network_info)
          #爲雲主機設置網絡過濾規則,防火牆策略
         self.firewall_driver.prepare_instance_filter(instance,
                                               network_info)
          with self._lxc_disk_handler(instance, image_meta,
                                            block_device_info, 
                                            disk_info):
               #調用libvirt庫啓動虛擬機
               #xml是雲主機xml配置,pause=true,power_on=true
               #我使用的是qemu-kvm,所以先會通過qemu:///system連接
               #hypervisor,然後執行define,最後啓動雲主機
               guest = self._create_domain(
                        xml, pause=pause, power_on=power_on)
         #no-ops
         self.firewall_driver.apply_instance_filter(instance,
                                                 network_info)


     # Resume only if domain has been paused
     if pause:
         guest.resume()
     return guest

如果一切正常,返回到LibvirtDriver.spawn等待雲主機啓動完成。虛擬機啓動成功後,繼續返回到_build_and_run_instance,在該方法的末尾會更新雲主機狀態,更新數據庫,發送通知給scheduler更新資源使用情況。

總得來說,雲主機啓動過程中各個模塊之間的交互還沒蠻複雜的。有很多細節需要考慮。需要多多花時間斟酌。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章