Fabain4 IT路上的潜行者

Nova 虚拟机启动代码分析


介绍

Nova 虚拟机启动流程包括从novaclient封装url请求 —–> nova-api接受请求 ——> nova-conductor —–> nova-scheduler根据调度选择host节点 —–> nova-compute根据虚拟机信息开始创建 —–> libvirt具体创建虚拟机 —–> 记录创建结果到数据库

Nova boot代码分析

Api 入口nova/api/openstack/compute/servers.py下面的Controller类下的create方法

   def create(self, req, body):                            
        # 这个地方获取从novaclient获取req与body
        # body包括了虚拟机的名称,image信息,network id等格式为{u'server'{.... u'block_device_mapping_v2'[],
        # networks:[]}}
        """Creates a new server for a given user."""
        if not self.is_valid_body(body, 'server'):
            raise exc.HTTPUnprocessableEntity()

        context = req.environ['nova.context']
        server_dict = body['server']
        
        password = self._get_server_admin_password(server_dict)

        if 'name' not in server_dict:
            msg = _("Server name is not defined")
            raise exc.HTTPBadRequest(explanation=msg)

        name = server_dict['name']
        self._validate_server_name(name)
        name = name.strip()

        # 基于卷启动的这里的image_uuid为空
        image_uuid = self._image_from_req_data(body)

        personality = server_dict.get('personality')
        config_drive = None
        if self.ext_mgr.is_loaded('os-config-drive'):
            config_drive = server_dict.get('config_drive')

        # 中间过程主要时用于分析req与userdata, keyname, 创建虚拟机的个数,中间过程太长不全部粘贴,下面看主要过程

        try:
            _get_inst_type = flavors.get_flavor_by_flavor_id
            inst_type = _get_inst_type(flavor_id, ctxt=context,
                                       read_deleted="no")

            # 根据创建需要的信息,调用compute的api模块下的create方法
            (instances, resv_id) = self.compute_api.create(context,               
                        inst_type,
                        image_uuid,
                        display_name=name,
                        scheduler_hints=scheduler_hints,
                        legacy_bdm=legacy_bdm,
                        check_server_group_quota=check_server_group_quota)

上面整个过程,就是解析了传进来的参数,作为self.compute_api_create传递,其中主要就是配额信息的inst_type, block_device_mapping,以及requested_networks

转到nova/compute/api.py API类的create方法

def create(self, context, instance_type,
               image_href, kernel_id=None, ramdisk_id=None,
               min_count=None, max_count=None,
               display_name=None, display_description=None,
               key_name=None, key_data=None, security_group=None,
               availability_zone=None, user_data=None, metadata=None,
               injected_files=None, admin_password=None,
               block_device_mapping=None, access_ip_v4=None,
               access_ip_v6=None, requested_networks=None, config_drive=None,
               auto_disk_config=None, scheduler_hints=None, legacy_bdm=True,
               shutdown_terminate=False, check_server_group_quota=False):
        """Provision instances, sending instance information to the
        scheduler.  The scheduler will determine where the instance(s)
        go and will handle creating the DB entries.

        Returns a tuple of (instances, reservation_id)
        """

        # 检查创建策略,在_check_create_polices中会对租户,网络,block都进行策略检测
        # _check_create_polices中会调用nova.policy.enforce
        self._check_create_policies(context, availability_zone, 
                requested_networks, block_device_mapping)

        if requested_networks and max_count > 1:
            self._check_multiple_instances_and_specified_ip(requested_networks)
            if utils.is_neutron():
                self._check_multiple_instances_neutron_ports(
                    requested_networks)

        # 调用_create_instance方法
        return self._create_instance(               
                       context, instance_type,
                       image_href, kernel_id, ramdisk_id,
                       min_count, max_count,
                       display_name, display_description,
                       key_name, key_data, security_group,
                       availability_zone, user_data, metadata,
                       injected_files, admin_password,
                       access_ip_v4, access_ip_v6,
                       requested_networks, config_drive,
                       block_device_mapping, auto_disk_config,
                       scheduler_hints=scheduler_hints,
                       legacy_bdm=legacy_bdm,
                       shutdown_terminate=shutdown_terminate,
                       check_server_group_quota=check_server_group_quota)

下面转到在相同的api.py中Controller类下的 _create_instance 方法

   def _create_instance(self, context, instance_type,
               image_href, kernel_id, ramdisk_id,
               min_count, max_count,
               display_name, display_description,
               key_name, key_data, security_groups,
               availability_zone, user_data, metadata,
               injected_files, admin_password,
               access_ip_v4, access_ip_v6,
               requested_networks, config_drive,
               block_device_mapping, auto_disk_config,
               reservation_id=None, scheduler_hints=None,
               legacy_bdm=True, shutdown_terminate=False,
               check_server_group_quota=False):
        """Verify all the input parameters regardless of the provisioning
        strategy being performed and schedule the instance(s) for
        creation.
        """

        # Normalize and setup some parameters
        if reservation_id is None:
            reservation_id = utils.generate_uid('r')
         # 获取安全组,如果安全组不存在,会使用default
        security_groups = security_groups or ['default'] 
        # 获取创建虚拟机的number个数
        min_count = min_count or 1
        max_count = max_count or min_count                 
        block_device_mapping = block_device_mapping or []
        if not instance_type:                              
            
            # instance_type为flavor规格,存在后不执行instance_type = flavors.get_default_flavor()
            instance_type = flavors.get_default_flavor()  

        # 获取创建的glance下面的image id,如果是基于卷的没有的会从volume的信息中获取出image_meta
        # 赋值给boot_meta
        if image_href:
            image_id, boot_meta = self._get_image(context, image_href)  
        else:
            image_id = None
            # 通过创建卷启动的云主机,需要去获取镜像信息
            boot_meta = self._get_bdm_image_metadata(    
                context, block_device_mapping, legacy_bdm)

        self._check_auto_disk_config(image=boot_meta,
                                     auto_disk_config=auto_disk_config)

        handle_az = self._handle_availability_zone
        availability_zone, forced_host, forced_node = handle_az(context,
                                                            availability_zone)

        # 这个地方会根据已有的信息去构建基本的信息,具体看self._validate_and_build_base_options
        # 很多信息会在此生成
        base_options, max_net_count = self._validate_and_build_base_options(
                context,
                instance_type, boot_meta, image_href, image_id, kernel_id,
                ramdisk_id, display_name, display_description,
                key_name, key_data, security_groups, availability_zone,
                forced_host, user_data, metadata, injected_files, access_ip_v4,
                access_ip_v6, requested_networks, config_drive,
                block_device_mapping, auto_disk_config, reservation_id,
                max_count)

        # max_net_count is the maximum number of instances requested by the
        # user adjusted for any network quota constraints, including
        # considertaion of connections to each requested network
        if max_net_count == 0:
            raise exception.PortLimitExceeded()
        elif max_net_count < max_count:
            LOG.debug("max count reduced from %(max_count)d to "
                      "%(max_net_count)d due to network port quota",
                      {'max_count': max_count,
                       'max_net_count': max_net_count})
            max_count = max_net_count

        # 获取块设备映射信息
        block_device_mapping = self._check_and_transform_bdm( 
            base_options, instance_type, boot_meta, min_count, max_count,
            block_device_mapping, legacy_bdm)

        instance_group = self._get_requested_instance_group(context,
                                   scheduler_hints, check_server_group_quota)

         # 这个地方非常重要需要着重看
        instances = self._provision_instances(context, instance_type,  
                min_count, max_count, base_options, boot_meta, security_groups,
                block_device_mapping, shutdown_terminate,
                instance_group, check_server_group_quota)

         # 调用scheduler时的过滤属性,下面单独分析
        filter_properties = self._build_filter_properties(context,  
                scheduler_hints, forced_host,
                forced_node, instance_type,
                base_options.get('pci_request_info'))

        for instance in instances:
            self._record_action_start(context, instance,
                                      instance_actions.CREATE)

        # 调用创建虚拟机,并且创建task检测过程,从这一步开始离开nova-api
        # 开始调用nova-conductor, nova-scheduler和nova-compute方法
        self.compute_task_api.build_instances(context,                     
                instances=instances, image=boot_meta,
                filter_properties=filter_properties,
                admin_password=admin_password,
                injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=block_device_mapping,
                legacy_bdm=False)

        return (instances, reservation_id)

先看下同一个类下的_provision_instances(由_create_instance)

  def _provision_instances(self, context, instance_type, min_count,
            max_count, base_options, boot_meta, security_groups,
            block_device_mapping, shutdown_terminate,
            instance_group, check_server_group_quota):
        # Reserve quotas
        # 调用_check_num_instances_quota获取cpu, memory信息,后调用去获取quota信息
        # 这个地方没有看懂,需要细看
        num_instances, quotas = self._check_num_instances_quota(  
                context, instance_type, min_count, max_count)
        LOG.debug("Going to run %s instances..." % num_instances)
        instances = []
        try:
            for i in xrange(num_instances):
                instance = objects.Instance()                    
                # base_options中包括了一些基础信息,如名字配额,状态等这个地方将base_options
                # 更新进去了
                instance.update(base_options)                    
                 # 这个地方非常重要需要
                instance = self.create_db_entry_for_new_instance(    
                        context, instance_type, boot_meta, instance,
                        security_groups, block_device_mapping,
                        num_instances, i, shutdown_terminate)
                pci_requests = base_options['pci_request_info']
                pci_requests.instance_uuid = instance.uuid
                pci_requests.save(context)
                instances.append(instance)

                if instance_group:
                    if check_server_group_quota:
                        count = QUOTAS.count(context,
                                             'server_group_members',
                                             instance_group,
                                             context.user_id)
                        try:
                            QUOTAS.limit_check(context,
                                               server_group_members=count + 1)
                        except exception.OverQuota:
                            msg = _("Quota exceeded, too many servers in "
                                    "group")
                            raise exception.QuotaError(msg)

                    objects.InstanceGroup.add_members(context,
                                                      instance_group.uuid,
                                                      [instance.uuid])

                # send a state update notification for the initial create to
                # show it going from non-existent to BUILDING
                notifications.send_update_with_states(context, instance, None,
                        vm_states.BUILDING, None, None, service="api")

        # In the case of any exceptions, attempt DB cleanup and rollback the
        # quota reservations.
        except Exception:
            with excutils.save_and_reraise_exception():
                try:
                    for instance in instances:
                        try:
                            instance.destroy()
                        except exception.ObjectActionError:
                            pass
                finally:
                    quotas.rollback()

        # Commit the reservations
        # 提交创建虚拟机生成的配额
        quotas.commit()
        return instances

下面看同一个类下的create_db_entry_for_new_instance方法

   def create_db_entry_for_new_instance(self, context, instance_type, image,
            instance, security_group, block_device_mapping, num_instances,
            index, shutdown_terminate=False):
        """Create an entry in the DB for this new instance,
        including any related table updates (such as security group,
        etc).

        This is called by the scheduler after a location for the
        instance has been determined.
        """
        # 更新instance信息通过_populate_instance_for_create生成instance uuid
        # 更新instance状态等
        self._populate_instance_for_create(context, instance, image, index,     
                                           security_group, instance_type)

         # 获取instance 的display-name与instance的hostname
        self._populate_instance_names(instance, num_instances)   

        instance.shutdown_terminate = shutdown_terminate                   

        self.security_group_api.ensure_default(context)
        instance.create(context)

        if num_instances > 1:
            # NOTE(russellb) We wait until this spot to handle
            # multi_instance_display_name_template, because we need
            # the UUID from the instance.
            instance = self._apply_instance_name_template(context, instance,
                                                          index)

        # NOTE (ndipanov): This can now raise exceptions but the instance
        #                  has been created, so delete it and re-raise so
        #                  that other cleanup can happen.
        try:
             # 获取instance的磁盘信息,有没有多块磁盘等,swap磁盘
            self._validate_bdm(                                
                context, instance, instance_type, block_device_mapping)
        except (exception.CinderConnectionFailed, exception.InvalidBDM,
                exception.InvalidVolume):
            with excutils.save_and_reraise_exception():
                instance.destroy(context)

         # 更新instance的涉及的磁盘信息
        self._update_block_device_mapping(
            context, instance_type, instance['uuid'], block_device_mapping) 

        return instance                                               
        # 其实整个过程都在构造创建的instance参数信息

下面看同一个类由(_create_instance调用的)_build_filter_properties方法

```

def _build_filter_properties(self, context, scheduler_hints, forced_host,
        forced_node, instance_type, pci_request_info):
    filter_properties = dict(scheduler_hints=scheduler_hints)
    filter_properties['instance_type'] = instance_type
    # 这里应该是指定的强制创建在某个主机
    if forced_host:
        filter_properties['force_hosts'] = [forced_host]
    # 这里应该是指定的强制创建在某个节点
    if forced_node:
        filter_properties['force_nodes'] = [forced_node]
    if pci_request_info and pci_request_info.requests:
        filter_properties['pci_requests'] = pci_request_info
    return filter_properties                           
    # 这里返回了一个filter_properties的字典,其中包括instance_type, force_hosts, force_nodes,pci信息

```

下面通过上面的_create_instance到同一个方法下面的compute_task_api,代码如下:

  def compute_task_api(self):
        if self._compute_task_api is None:
            # TODO(alaski): Remove calls into here from conductor manager so
            # that this isn't necessary. #1180540
            from nova import conductor
            self._compute_task_api = conductor.ComputeTaskAPI()  # 这里看到调用了conductor下的ComputeTaskAPI()
        return self._compute_task_api

转到nova/conductor/init.py中

def ComputeTaskAPI(*args, **kwargs):
    use_local = kwargs.pop('use_local', False)
    if oslo.config.cfg.CONF.conductor.use_local or use_local:
        api = conductor_api.LocalComputeTaskAPI                    # 这里默认使用了LocalComputeTaskAPI
    else:
        api = conductor_api.ComputeTaskAPI
    return api(*args, **kwargs)

转到nova/conductor/api.py下类LocalComputeTaskAPI下的build_instance方法

  def build_instances(self, context, instances, image,
            filter_properties, admin_password, injected_files,
            requested_networks, security_groups, block_device_mapping,
            legacy_bdm=True):

        # 这里调用了self._manager.build_instances, 
        # utils.spawn_n使用了eventlet.spawn_n(func, *args, **kwargs) 来实现
        utils.spawn_n(self._manager.build_instances, context,              
                instances=instances, image=image,
                filter_properties=filter_properties,
                admin_password=admin_password, injected_files=injected_files,
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=block_device_mapping,
                legacy_bdm=legacy_bdm)

下面看下self._manager.build_instances通过nova/conductor/rpcapi.py中的build_instance 跳转到nova/conductor/manager.py中的build_instances

 def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        
        # 这里调用了nova/scheduler/utils.py中的build_request,request_spec包含了instance属性
        request_spec = scheduler_utils.build_request_spec(context, image,  
                                                          instances)
        # TODO(danms): Remove this in version 2.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(
                objects=[objects.NetworkRequest.from_tuple(t)
                         for t in requested_networks])

        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
            # have a single instance.
            scheduler_utils.populate_retry(filter_properties,        
                instances[0].uuid)

            # 通过nova-scheduler过滤属性来筛选物理主机
            hosts = self.scheduler_client.select_destinations(context, 
                    request_spec, filter_properties)
        except Exception as exc:
            for instance in instances:
                scheduler_driver.handle_schedule_error(context, exc,
                        instance.uuid, request_spec)
            return

        for (instance, host) in itertools.izip(instances, hosts):
            try:
                instance.refresh()
            except (exception.InstanceNotFound,
                    exception.InstanceInfoCacheNotFound):
                LOG.debug('Instance deleted during build', instance=instance)
                continue
            local_filter_props = copy.deepcopy(filter_properties)
        
            # 这里开始为虚拟机选择创建的compute节点
            scheduler_utils.populate_filter_properties(local_filter_props,
                host)
            # The block_device_mapping passed from the api doesn't contain
            # instance specific information
            bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
                    context, instance.uuid)

            # compute_rpcapi 调用的是nova/compute/manager.py中的build_and_run_instance方法
            self.compute_rpcapi.build_and_run_instance(context,                 
                    instance=instance, host=host['host'], image=image,
                    request_spec=request_spec,
                    filter_properties=local_filter_props,
                    admin_password=admin_password,
                    injected_files=injected_files,
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host['nodename'],
                    limits=host['limits'])

转到nova/compute/manager.py中类ComputeManager中的build_and_run_instance方法

  def build_and_run_instance(self, context, instance, image, request_spec,
                     filter_properties, admin_password=None,
                     injected_files=None, requested_networks=None,
                     security_groups=None, block_device_mapping=None,
                     node=None, limits=None):

        # NOTE(danms): Remove this in v4.0 of the RPC API
        if (requested_networks and
                not isinstance(requested_networks,
                               objects.NetworkRequestList)):
            requested_networks = objects.NetworkRequestList(
                objects=[objects.NetworkRequest.from_tuple(t)
                         for t in requested_networks])

        @utils.synchronized(instance.uuid)
        def _locked_do_build_and_run_instance(*args, **kwargs):
    
            # 这里重点
            self._do_build_and_run_instance(*args, **kwargs)

        # NOTE(danms): We spawn here to return the RPC worker thread back to
        # the pool. Since what follows could take a really long time, we don't
        # want to tie up RPC workers.
        utils.spawn_n(_locked_do_build_and_run_instance,
                      context, instance, image, request_spec,
                      filter_properties, admin_password, injected_files,
                      requested_networks, security_groups,
                      block_device_mapping, node, limits)

这里转到_do_build_and_run_instance

   def _do_build_and_run_instance(self, context, instance, image,
            request_spec, filter_properties, admin_password, injected_files,
            requested_networks, security_groups, block_device_mapping,
            node=None, limits=None):

        .......

        # b64 decode the files to inject:
        decoded_files = self._decode_files(injected_files)

        if limits is None:
            limits = {}

        if node is None:
            node = self.driver.get_available_nodes(refresh=True)[0]
            LOG.debug('No node specified, defaulting to %s', node,
                      instance=instance)

        try:
            
            # 这里重点
            self._build_and_run_instance(context, instance, image,
                    decoded_files, admin_password, requested_networks,
                    security_groups, block_device_mapping, node, limits,
                    filter_properties)
        except exception.RescheduledException as e:
            
            ...............

展开同一个类下的self._build_and_run_instance方法

 def _build_and_run_instance(self, context, instance, image, injected_files,
            admin_password, requested_networks, security_groups,
            block_device_mapping, node, limits, filter_properties):

        image_name = image.get('name')
        self._notify_about_instance_usage(context, instance, 'create.start',
                extra_usage_info={'image_name': image_name})
        try:
            # 资源跟踪器
            rt = self._get_resource_tracker(node)                        
            with rt.instance_claim(context, instance, limits):           
                # NOTE(russellb) It's important that this validation be done
                # *after* the resource tracker instance claim, as that is where
                # the host is set on the instance.
                self._validate_instance_group_policy(context, instance,
                        filter_properties)

                # 开始分配资源,分配资源包括网络和存储,这一块需要重点看
                # 这里调用了同一个类下的_build_resources方法
                with self._build_resources(context, instance,  
                        requested_networks, security_groups, image,
                        block_device_mapping) as resources:

                    # 保存instance状态到BUILDING, task_state 到SPAWNING状态
                    instance.vm_state = vm_states.BUILDING               
                    instance.task_state = task_states.SPAWNING
                    instance.save(expected_task_state=
                            task_states.BLOCK_DEVICE_MAPPING)
                    block_device_info = resources['block_device_info']
                    network_info = resources['network_info']

                    # 调用底层virt-api实现创建虚拟机,这里使用libvirt
                    self.driver.spawn(context, instance, image,     
                                      injected_files, admin_password,
                                      network_info=network_info,
                                      block_device_info=block_device_info)
        except Exception as e:
            ........

        # NOTE(alaski): This is only useful during reschedules, remove it now.
        instance.system_metadata.pop('network_allocated', None)

        # 获取虚拟机电源状态
        instance.power_state = self._get_power_state(context, instance) 
        # vm_state变为active状态
        instance.vm_state = vm_states.ACTIVE                           
        # task_state 状态清空
        instance.task_state = None                                    
        instance.launched_at = timeutils.utcnow()

        try:
            instance.save(expected_task_state=task_states.SPAWNING)
        except (exception.InstanceNotFound,
                exception.UnexpectedDeletingTaskStateError) as e:
            with excutils.save_and_reraise_exception():
                self._notify_about_instance_usage(context, instance,
                    'create.end', fault=e)

        self._notify_about_instance_usage(context, instance, 'create.end',       # 通知instance创建完成
                extra_usage_info={'message': _('Success')},
                network_info=network_info)

下面转到资源分配,这里面调用了neutron 的api

    def _build_resources(self, context, instance, requested_networks,
            security_groups, image, block_device_mapping):
        resources = {}
        network_info = None
        try:
            # 这里比较重要会去调用创建网络信息
            network_info = self._build_networks_for_instance(context, instance,
                    requested_networks, security_groups)
            resources['network_info'] = network_info
        except (exception.InstanceNotFound,
            ..............

        try:
            # Verify that all the BDMs have a device_name set and assign a
            # default to the ones missing it with the help of the driver.
            self._default_block_device_names(context, instance, image,
                    block_device_mapping)

            instance.vm_state = vm_states.BUILDING
            instance.task_state = task_states.BLOCK_DEVICE_MAPPING
            instance.save()

            # blockdevice 信息
            block_device_info = self._prep_block_device(context, instance,
                    block_device_mapping)
            resources['block_device_info'] = block_device_info
        except Exception:
            ............

        try:
            yield resources
        except Exception as exc:
            with excutils.save_and_reraise_exception() as ctxt:
                if not isinstance(exc, (exception.InstanceNotFound,
                    exception.UnexpectedDeletingTaskStateError)):
                        LOG.exception(_LE('Instance failed to spawn'),
                                instance=instance)
                # Make sure the async call finishes
                if network_info is not None:
                    network_info.wait(do_raise=False)
                try:
                    self._shutdown_instance(context, instance,
                            block_device_mapping, requested_networks,
                            try_deallocate_networks=False)
                except Exception:
                    ctxt.reraise = False
                    msg = _('Could not clean up failed build,'
                            ' not rescheduling')
                    raise exception.BuildAbortException(
                            instance_uuid=instance.uuid, reason=msg)

下面看self._build_and_run_instance方法

def _build_networks_for_instance(self, context, instance,
            requested_networks, security_groups):

        # If we're here from a reschedule the network may already be allocated.
        if strutils.bool_from_string(
                instance.system_metadata.get('network_allocated', 'False')):
            return self._get_instance_nw_info(context, instance)

        if not self.is_neutron_security_groups:
            security_groups = []

        # 这里的macs在driver中没有实现,返回了None
        macs = self.driver.macs_for_instance(instance)

        # 这里的dhcp_options在driver中没有实现,返回了None
        dhcp_options = self.driver.dhcp_options_for_instance(instance)

        # 这里调用self._allocate_network方法去实现network_info信息
        network_info = self._allocate_network(context, instance,
                requested_networks, macs, security_groups, dhcp_options)

        if not instance.access_ip_v4 and not instance.access_ip_v6:
            # If CONF.default_access_ip_network_name is set, grab the
            # corresponding network and set the access ip values accordingly.
            # Note that when there are multiple ips to choose from, an
            # arbitrary one will be chosen.
            network_name = CONF.default_access_ip_network_name
            if not network_name:
                return network_info

            for vif in network_info:
                if vif['network']['label'] == network_name:
                    for ip in vif.fixed_ips():
                        if ip['version'] == 4:
                            instance.access_ip_v4 = ip['address']
                        if ip['version'] == 6:
                            instance.access_ip_v6 = ip['address']
                    instance.save()
                    break

        return network_info

转到同一个类下的self._allocate_network方法

def _allocate_network(self, context, instance, requested_networks, macs,
                          security_groups, dhcp_options):
        """Start network allocation asynchronously.  Return an instance
        of NetworkInfoAsyncWrapper that can be used to retrieve the
        allocated networks when the operation has finished.
        """
        # NOTE(comstud): Since we're allocating networks asynchronously,
        # this task state has little meaning, as we won't be in this
        # state for very long.
        instance.vm_state = vm_states.BUILDING
        instance.task_state = task_states.NETWORKING
        instance.save(expected_task_state=[None])
        self._update_resource_tracker(context, instance)

        is_vpn = pipelib.is_vpn_image(instance.image_ref)
        
        # network_model.NetworkInfoAsyncWrapper只是一个模型
        # self._allocate_network_async是具体的实现
        return network_model.NetworkInfoAsyncWrapper(
                self._allocate_network_async, context, instance,
                requested_networks, macs, security_groups, is_vpn,
                dhcp_options)

转到self._allocate_network_async方法

  def _allocate_network_async(self, context, instance, requested_networks,
                                macs, security_groups, is_vpn, dhcp_options):
        """Method used to allocate networks in the background.

        Broken out for testing.
        """
        LOG.debug("Allocating IP information in the background.",
                  instance=instance)
        retries = CONF.network_allocate_retries
        if retries < 0:
            LOG.warn(_("Treating negative config value (%(retries)s) for "
                       "'network_allocate_retries' as 0."),
                     {'retries': retries})
        attempts = retries > 1 and retries + 1 or 1
        retry_time = 1
        for attempt in range(1, attempts + 1):
            try:
                
                # self.network_api = network.API()
                # 调用neutron的api nova/network/neutronv2/api.py下的allocate_for_instance
                nwinfo = self.network_api.allocate_for_instance(
                        context, instance, vpn=is_vpn,
                        requested_networks=requested_networks,
                        macs=macs,
                        security_groups=security_groups,
                        dhcp_options=dhcp_options)
                LOG.debug('Instance network_info: |%s|', nwinfo,
                          instance=instance)
                sys_meta = instance.system_metadata
                sys_meta['network_allocated'] = 'True'
                self._instance_update(context, instance.uuid,
                        system_metadata=sys_meta)
                return nwinfo
            except Exception:
                exc_info = sys.exc_info()
                log_info = {'attempt': attempt,
                            'attempts': attempts}
                if attempt == attempts:
                    LOG.exception(_LE('Instance failed network setup '
                                      'after %(attempts)d attempt(s)'),
                                  log_info)
                    raise exc_info[0], exc_info[1], exc_info[2]
                LOG.warn(_('Instance failed network setup '
                           '(attempt %(attempt)d of %(attempts)d)'),
                         log_info, instance=instance)
                time.sleep(retry_time)
                retry_time *= 2
                if retry_time > 30:
                    retry_time = 30

下面转到由nwinfo= self.network_api.allocate_for_instance调用的nova/network/neutronv2/api.py中allocate_for_instance

  def allocate_for_instance(self, context, instance, **kwargs):
        """Allocate network resources for the instance.

        hypervisor_macs = kwargs.get('macs', None)
        available_macs = None
        if hypervisor_macs is not None:
            # Make a copy we can mutate: records macs that have not been used
            # to create a port on a network. If we find a mac with a
            # pre-allocated port we also remove it from this set.
            available_macs = set(hypervisor_macs)
       
        # 这里调用了neutronv2的客户端<neutronclient.v2_0.client.Client object at 0x3255190>
        neutron = neutronv2.get_client(context)
        LOG.debug('allocate_for_instance()', instance=instance)
        if not instance.project_id:
            msg = _('empty project id for instance %s')
            raise exception.InvalidInput(
                reason=msg % instance.uuid)
        requested_networks = kwargs.get('requested_networks')
        dhcp_opts = kwargs.get('dhcp_options', None)
        ports = {}
        net_ids = []
        ordered_networks = []
        if requested_networks:
            for request in requested_networks:
                if request.port_id:
                    port = neutron.show_port(request.port_id)['port']
                    if port.get('device_id'):
                        raise exception.PortInUse(port_id=request.port_id)
                    if hypervisor_macs is not None:
                        if port['mac_address'] not in hypervisor_macs:
                            raise exception.PortNotUsable(
                                port_id=request.port_id,
                                instance=instance.uuid)
                        else:
                            # Don't try to use this MAC if we need to create a
                            # port on the fly later. Identical MACs may be
                            # configured by users into multiple ports so we
                            # discard rather than popping.
                            available_macs.discard(port['mac_address'])
                    request.network_id = port['network_id']
                    ports[request.port_id] = port
                if request.network_id:
                    net_ids.append(request.network_id)
                    ordered_networks.append(request)

        # 如果创建虚拟机时只是指定了net_id的话,上面一大段没有什么特别用处,除非创建时指定了
        # mac地址与端口,这里开始调用了_get_available_networks方法,其实这个地方就是获取租户下
        # 可以用的网络id,只有nets存在后,才能获继续进行网络构建
        nets = self._get_available_networks(context, instance.project_id,
                                            net_ids)
        if not nets:
            LOG.warn(_LW("No network configured!"), instance=instance)
            return network_model.NetworkInfo([])

        if (not requested_networks
            or requested_networks.is_single_unspecified):
            # bug/1267723 - if no network is requested and more
            # than one is available then raise NetworkAmbiguous Exception
            if len(nets) > 1:
                msg = _("Multiple possible networks found, use a Network "
                         "ID to be more specific.")
                raise exception.NetworkAmbiguous(msg)
            ordered_networks.append(
                objects.NetworkRequest(network_id=nets[0]['id']))

        self._check_external_network_attach(context, nets)

        security_groups = kwargs.get('security_groups', [])
        security_group_ids = []

        if len(security_groups):
            search_opts = {'tenant_id': instance.project_id}
            user_security_groups = neutron.list_security_groups(
                **search_opts).get('security_groups')

        for security_group in security_groups:
            name_match = None
            uuid_match = None
            for user_security_group in user_security_groups:
                if user_security_group['name'] == security_group:
                    if name_match:
                        raise exception.NoUniqueMatch(
                            _("Multiple security groups found matching"
                              " '%s'. Use an ID to be more specific.") %
                               security_group)

                    name_match = user_security_group['id']
                if user_security_group['id'] == security_group:
                    uuid_match = user_security_group['id']

            # If a user names the security group the same as
            # another's security groups uuid, the name takes priority.
            if not name_match and not uuid_match:
                raise exception.SecurityGroupNotFound(
                    security_group_id=security_group)
            elif name_match:
                security_group_ids.append(name_match)
            elif uuid_match:
                security_group_ids.append(uuid_match)

        touched_port_ids = []
        created_port_ids = []
        ports_in_requested_order = []
        nets_in_requested_order = []
        for request in ordered_networks:
            # Network lookup for available network_id
            network = None
            for net in nets:
                if net['id'] == request.network_id:
                    network = net
                    break
            # if network_id did not pass validate_networks() and not available
            # here then skip it safely not continuing with a None Network
            else:
                continue

            nets_in_requested_order.append(network)

            if (security_groups and not (
                    network['subnets']
                    and network.get('port_security_enabled', True))):

                raise exception.SecurityGroupCannotBeApplied()
            request.network_id = network['id']
            zone = 'compute:%s' % instance.availability_zone
            port_req_body = {'port': {'device_id': instance.uuid,
                                      'device_owner': zone}}
            try:
                self._populate_neutron_extension_values(context,
                                                        instance,
                                                        request.pci_request_id,
                                                        port_req_body)
                # Requires admin creds to set port bindings
                port_client = (neutron if not
                               self._has_port_binding_extension(context) else
                               neutronv2.get_client(context, admin=True))
                if request.port_id:
                    port = ports[request.port_id]
                    port_client.update_port(port['id'], port_req_body)
                    touched_port_ids.append(port['id'])
                    ports_in_requested_order.append(port['id'])
                else:
                    
                    # 这里调用neutron创建端口
                    created_port = self._create_port(
                            port_client, instance, request.network_id,
                            port_req_body, request.address,
                            security_group_ids, available_macs, dhcp_opts)
                    created_port_ids.append(created_port)
                    ports_in_requested_order.append(created_port)
            except Exception:
                with excutils.save_and_reraise_exception():
                    for port_id in touched_port_ids:
                        try:
                            port_req_body = {'port': {'device_id': ''}}
                            # Requires admin creds to set port bindings
                            if self._has_port_binding_extension(context):
                                port_req_body['port']['binding:host_id'] = None
                                port_client = neutronv2.get_client(
                                    context, admin=True)
                            else:
                                port_client = neutron
                            port_client.update_port(port_id, port_req_body)
                        except Exception:
                            msg = _LE("Failed to update port %s")
                            LOG.exception(msg, port_id)

                    self._delete_ports(neutron, instance, created_port_ids)

        nw_info = self.get_instance_nw_info(context, instance,
                                            networks=nets_in_requested_order,
                                            port_ids=ports_in_requested_order)

        return network_model.NetworkInfo([vif for vif in nw_info
                                          if vif['id'] in created_port_ids +
                                                           touched_port_ids])

转到同一个类下的_get_available_networks方法

 def _get_available_networks(self, context, project_id,
                                net_ids=None, neutron=None):
        """Return a network list available for the tenant.
        The list contains networks owned by the tenant and public networks.
        If net_ids specified, it searches networks with requested IDs only.
        """
        if not neutron:
            neutron = neutronv2.get_client(context)

        if net_ids:
            # If user has specified to attach instance only to specific
            # networks then only add these to **search_opts. This search will
            # also include 'shared' networks.
            search_opts = {'id': net_ids}
            nets = neutron.list_networks(**search_opts).get('networks', [])
        else:
            # (1) Retrieve non-public network list owned by the tenant.
            search_opts = {'tenant_id': project_id, 'shared': False}
            nets = neutron.list_networks(**search_opts).get('networks', [])
            # (2) Retrieve public network list.
            search_opts = {'shared': True}
            nets += neutron.list_networks(**search_opts).get('networks', [])

        _ensure_requested_network_ordering(
            lambda x: x['id'],
            nets,
            net_ids)

        return nets

转到 port_create方法

   def _create_port(self, port_client, instance, network_id, port_req_body,
                     fixed_ip=None, security_group_ids=None,
                     available_macs=None, dhcp_opts=None):
        try:
            if fixed_ip:
                port_req_body['port']['fixed_ips'] = [
                    {'ip_address': str(fixed_ip)}]
            port_req_body['port']['network_id'] = network_id
            port_req_body['port']['admin_state_up'] = True
            port_req_body['port']['tenant_id'] = instance['project_id']
            if security_group_ids:
                port_req_body['port']['security_groups'] = security_group_ids
            if available_macs is not None:
                if not available_macs:
                    raise exception.PortNotFree(
                        instance=instance['uuid'])
                mac_address = available_macs.pop()
                port_req_body['port']['mac_address'] = mac_address
            if dhcp_opts is not None:
                port_req_body['port']['extra_dhcp_opts'] = dhcp_opts
            port_id = port_client.create_port(port_req_body)['port']['id']
            LOG.debug('Successfully created port: %s', port_id,
                      instance=instance)
            return port_id
        except neutron_client_exc.NeutronClientException:
            ...........

这里的port_client就是调用的neutron client
 port_client = (neutron if not
                               self._has_port_binding_extension(context) else
                               neutronv2.get_client(context, admin=True))


下一篇 libvirt boot

Comments