介绍
Nova 虚拟机启动流程包括从novaclient封装url请求 —–> nova-api接受请求 ——> nova-conductor —–> nova-scheduler根据调度选择host节点 —–> nova-compute根据虚拟机信息开始创建 —–> libvirt具体创建虚拟机 —–> 记录创建结果到数据库
Nova boot代码分析
Api 入口nova/api/openstack/compute/servers.py下面的Controller类下的create方法
def create(self, req, body):
# 这个地方获取从novaclient获取req与body
# body包括了虚拟机的名称,image信息,network id等格式为{u'server'{.... u'block_device_mapping_v2'[],
# networks:[]}}
"""Creates a new server for a given user."""
if not self.is_valid_body(body, 'server'):
raise exc.HTTPUnprocessableEntity()
context = req.environ['nova.context']
server_dict = body['server']
password = self._get_server_admin_password(server_dict)
if 'name' not in server_dict:
msg = _("Server name is not defined")
raise exc.HTTPBadRequest(explanation=msg)
name = server_dict['name']
self._validate_server_name(name)
name = name.strip()
# 基于卷启动的这里的image_uuid为空
image_uuid = self._image_from_req_data(body)
personality = server_dict.get('personality')
config_drive = None
if self.ext_mgr.is_loaded('os-config-drive'):
config_drive = server_dict.get('config_drive')
# 中间过程主要时用于分析req与userdata, keyname, 创建虚拟机的个数,中间过程太长不全部粘贴,下面看主要过程
try:
_get_inst_type = flavors.get_flavor_by_flavor_id
inst_type = _get_inst_type(flavor_id, ctxt=context,
read_deleted="no")
# 根据创建需要的信息,调用compute的api模块下的create方法
(instances, resv_id) = self.compute_api.create(context,
inst_type,
image_uuid,
display_name=name,
scheduler_hints=scheduler_hints,
legacy_bdm=legacy_bdm,
check_server_group_quota=check_server_group_quota)
上面整个过程,就是解析了传进来的参数,作为self.compute_api_create传递,其中主要就是配额信息的inst_type, block_device_mapping,以及requested_networks
转到nova/compute/api.py API类的create方法
def create(self, context, instance_type,
image_href, kernel_id=None, ramdisk_id=None,
min_count=None, max_count=None,
display_name=None, display_description=None,
key_name=None, key_data=None, security_group=None,
availability_zone=None, user_data=None, metadata=None,
injected_files=None, admin_password=None,
block_device_mapping=None, access_ip_v4=None,
access_ip_v6=None, requested_networks=None, config_drive=None,
auto_disk_config=None, scheduler_hints=None, legacy_bdm=True,
shutdown_terminate=False, check_server_group_quota=False):
"""Provision instances, sending instance information to the
scheduler. The scheduler will determine where the instance(s)
go and will handle creating the DB entries.
Returns a tuple of (instances, reservation_id)
"""
# 检查创建策略,在_check_create_polices中会对租户,网络,block都进行策略检测
# _check_create_polices中会调用nova.policy.enforce
self._check_create_policies(context, availability_zone,
requested_networks, block_device_mapping)
if requested_networks and max_count > 1:
self._check_multiple_instances_and_specified_ip(requested_networks)
if utils.is_neutron():
self._check_multiple_instances_neutron_ports(
requested_networks)
# 调用_create_instance方法
return self._create_instance(
context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_group,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
scheduler_hints=scheduler_hints,
legacy_bdm=legacy_bdm,
shutdown_terminate=shutdown_terminate,
check_server_group_quota=check_server_group_quota)
下面转到在相同的api.py中Controller类下的 _create_instance 方法
def _create_instance(self, context, instance_type,
image_href, kernel_id, ramdisk_id,
min_count, max_count,
display_name, display_description,
key_name, key_data, security_groups,
availability_zone, user_data, metadata,
injected_files, admin_password,
access_ip_v4, access_ip_v6,
requested_networks, config_drive,
block_device_mapping, auto_disk_config,
reservation_id=None, scheduler_hints=None,
legacy_bdm=True, shutdown_terminate=False,
check_server_group_quota=False):
"""Verify all the input parameters regardless of the provisioning
strategy being performed and schedule the instance(s) for
creation.
"""
# Normalize and setup some parameters
if reservation_id is None:
reservation_id = utils.generate_uid('r')
# 获取安全组,如果安全组不存在,会使用default
security_groups = security_groups or ['default']
# 获取创建虚拟机的number个数
min_count = min_count or 1
max_count = max_count or min_count
block_device_mapping = block_device_mapping or []
if not instance_type:
# instance_type为flavor规格,存在后不执行instance_type = flavors.get_default_flavor()
instance_type = flavors.get_default_flavor()
# 获取创建的glance下面的image id,如果是基于卷的没有的会从volume的信息中获取出image_meta
# 赋值给boot_meta
if image_href:
image_id, boot_meta = self._get_image(context, image_href)
else:
image_id = None
# 通过创建卷启动的云主机,需要去获取镜像信息
boot_meta = self._get_bdm_image_metadata(
context, block_device_mapping, legacy_bdm)
self._check_auto_disk_config(image=boot_meta,
auto_disk_config=auto_disk_config)
handle_az = self._handle_availability_zone
availability_zone, forced_host, forced_node = handle_az(context,
availability_zone)
# 这个地方会根据已有的信息去构建基本的信息,具体看self._validate_and_build_base_options
# 很多信息会在此生成
base_options, max_net_count = self._validate_and_build_base_options(
context,
instance_type, boot_meta, image_href, image_id, kernel_id,
ramdisk_id, display_name, display_description,
key_name, key_data, security_groups, availability_zone,
forced_host, user_data, metadata, injected_files, access_ip_v4,
access_ip_v6, requested_networks, config_drive,
block_device_mapping, auto_disk_config, reservation_id,
max_count)
# max_net_count is the maximum number of instances requested by the
# user adjusted for any network quota constraints, including
# considertaion of connections to each requested network
if max_net_count == 0:
raise exception.PortLimitExceeded()
elif max_net_count < max_count:
LOG.debug("max count reduced from %(max_count)d to "
"%(max_net_count)d due to network port quota",
{'max_count': max_count,
'max_net_count': max_net_count})
max_count = max_net_count
# 获取块设备映射信息
block_device_mapping = self._check_and_transform_bdm(
base_options, instance_type, boot_meta, min_count, max_count,
block_device_mapping, legacy_bdm)
instance_group = self._get_requested_instance_group(context,
scheduler_hints, check_server_group_quota)
# 这个地方非常重要需要着重看
instances = self._provision_instances(context, instance_type,
min_count, max_count, base_options, boot_meta, security_groups,
block_device_mapping, shutdown_terminate,
instance_group, check_server_group_quota)
# 调用scheduler时的过滤属性,下面单独分析
filter_properties = self._build_filter_properties(context,
scheduler_hints, forced_host,
forced_node, instance_type,
base_options.get('pci_request_info'))
for instance in instances:
self._record_action_start(context, instance,
instance_actions.CREATE)
# 调用创建虚拟机,并且创建task检测过程,从这一步开始离开nova-api
# 开始调用nova-conductor, nova-scheduler和nova-compute方法
self.compute_task_api.build_instances(context,
instances=instances, image=boot_meta,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=False)
return (instances, reservation_id)
先看下同一个类下的_provision_instances(由_create_instance)
def _provision_instances(self, context, instance_type, min_count,
max_count, base_options, boot_meta, security_groups,
block_device_mapping, shutdown_terminate,
instance_group, check_server_group_quota):
# Reserve quotas
# 调用_check_num_instances_quota获取cpu, memory信息,后调用去获取quota信息
# 这个地方没有看懂,需要细看
num_instances, quotas = self._check_num_instances_quota(
context, instance_type, min_count, max_count)
LOG.debug("Going to run %s instances..." % num_instances)
instances = []
try:
for i in xrange(num_instances):
instance = objects.Instance()
# base_options中包括了一些基础信息,如名字配额,状态等这个地方将base_options
# 更新进去了
instance.update(base_options)
# 这个地方非常重要需要
instance = self.create_db_entry_for_new_instance(
context, instance_type, boot_meta, instance,
security_groups, block_device_mapping,
num_instances, i, shutdown_terminate)
pci_requests = base_options['pci_request_info']
pci_requests.instance_uuid = instance.uuid
pci_requests.save(context)
instances.append(instance)
if instance_group:
if check_server_group_quota:
count = QUOTAS.count(context,
'server_group_members',
instance_group,
context.user_id)
try:
QUOTAS.limit_check(context,
server_group_members=count + 1)
except exception.OverQuota:
msg = _("Quota exceeded, too many servers in "
"group")
raise exception.QuotaError(msg)
objects.InstanceGroup.add_members(context,
instance_group.uuid,
[instance.uuid])
# send a state update notification for the initial create to
# show it going from non-existent to BUILDING
notifications.send_update_with_states(context, instance, None,
vm_states.BUILDING, None, None, service="api")
# In the case of any exceptions, attempt DB cleanup and rollback the
# quota reservations.
except Exception:
with excutils.save_and_reraise_exception():
try:
for instance in instances:
try:
instance.destroy()
except exception.ObjectActionError:
pass
finally:
quotas.rollback()
# Commit the reservations
# 提交创建虚拟机生成的配额
quotas.commit()
return instances
下面看同一个类下的create_db_entry_for_new_instance方法
def create_db_entry_for_new_instance(self, context, instance_type, image,
instance, security_group, block_device_mapping, num_instances,
index, shutdown_terminate=False):
"""Create an entry in the DB for this new instance,
including any related table updates (such as security group,
etc).
This is called by the scheduler after a location for the
instance has been determined.
"""
# 更新instance信息通过_populate_instance_for_create生成instance uuid
# 更新instance状态等
self._populate_instance_for_create(context, instance, image, index,
security_group, instance_type)
# 获取instance 的display-name与instance的hostname
self._populate_instance_names(instance, num_instances)
instance.shutdown_terminate = shutdown_terminate
self.security_group_api.ensure_default(context)
instance.create(context)
if num_instances > 1:
# NOTE(russellb) We wait until this spot to handle
# multi_instance_display_name_template, because we need
# the UUID from the instance.
instance = self._apply_instance_name_template(context, instance,
index)
# NOTE (ndipanov): This can now raise exceptions but the instance
# has been created, so delete it and re-raise so
# that other cleanup can happen.
try:
# 获取instance的磁盘信息,有没有多块磁盘等,swap磁盘
self._validate_bdm(
context, instance, instance_type, block_device_mapping)
except (exception.CinderConnectionFailed, exception.InvalidBDM,
exception.InvalidVolume):
with excutils.save_and_reraise_exception():
instance.destroy(context)
# 更新instance的涉及的磁盘信息
self._update_block_device_mapping(
context, instance_type, instance['uuid'], block_device_mapping)
return instance
# 其实整个过程都在构造创建的instance参数信息
下面看同一个类由(_create_instance调用的)_build_filter_properties方法
```
def _build_filter_properties(self, context, scheduler_hints, forced_host,
forced_node, instance_type, pci_request_info):
filter_properties = dict(scheduler_hints=scheduler_hints)
filter_properties['instance_type'] = instance_type
# 这里应该是指定的强制创建在某个主机
if forced_host:
filter_properties['force_hosts'] = [forced_host]
# 这里应该是指定的强制创建在某个节点
if forced_node:
filter_properties['force_nodes'] = [forced_node]
if pci_request_info and pci_request_info.requests:
filter_properties['pci_requests'] = pci_request_info
return filter_properties
# 这里返回了一个filter_properties的字典,其中包括instance_type, force_hosts, force_nodes,pci信息
```
下面通过上面的_create_instance到同一个方法下面的compute_task_api,代码如下:
def compute_task_api(self):
if self._compute_task_api is None:
# TODO(alaski): Remove calls into here from conductor manager so
# that this isn't necessary. #1180540
from nova import conductor
self._compute_task_api = conductor.ComputeTaskAPI() # 这里看到调用了conductor下的ComputeTaskAPI()
return self._compute_task_api
转到nova/conductor/init.py中
def ComputeTaskAPI(*args, **kwargs):
use_local = kwargs.pop('use_local', False)
if oslo.config.cfg.CONF.conductor.use_local or use_local:
api = conductor_api.LocalComputeTaskAPI # 这里默认使用了LocalComputeTaskAPI
else:
api = conductor_api.ComputeTaskAPI
return api(*args, **kwargs)
转到nova/conductor/api.py下类LocalComputeTaskAPI下的build_instance方法
def build_instances(self, context, instances, image,
filter_properties, admin_password, injected_files,
requested_networks, security_groups, block_device_mapping,
legacy_bdm=True):
# 这里调用了self._manager.build_instances,
# utils.spawn_n使用了eventlet.spawn_n(func, *args, **kwargs) 来实现
utils.spawn_n(self._manager.build_instances, context,
instances=instances, image=image,
filter_properties=filter_properties,
admin_password=admin_password, injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=legacy_bdm)
下面看下self._manager.build_instances通过nova/conductor/rpcapi.py中的build_instance 跳转到nova/conductor/manager.py中的build_instances
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping=None, legacy_bdm=True):
# TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
# 2.0 of the RPC API.
# 这里调用了nova/scheduler/utils.py中的build_request,request_spec包含了instance属性
request_spec = scheduler_utils.build_request_spec(context, image,
instances)
# TODO(danms): Remove this in version 2.0 of the RPC API
if (requested_networks and
not isinstance(requested_networks,
objects.NetworkRequestList)):
requested_networks = objects.NetworkRequestList(
objects=[objects.NetworkRequest.from_tuple(t)
for t in requested_networks])
try:
# check retry policy. Rather ugly use of instances[0]...
# but if we've exceeded max retries... then we really only
# have a single instance.
scheduler_utils.populate_retry(filter_properties,
instances[0].uuid)
# 通过nova-scheduler过滤属性来筛选物理主机
hosts = self.scheduler_client.select_destinations(context,
request_spec, filter_properties)
except Exception as exc:
for instance in instances:
scheduler_driver.handle_schedule_error(context, exc,
instance.uuid, request_spec)
return
for (instance, host) in itertools.izip(instances, hosts):
try:
instance.refresh()
except (exception.InstanceNotFound,
exception.InstanceInfoCacheNotFound):
LOG.debug('Instance deleted during build', instance=instance)
continue
local_filter_props = copy.deepcopy(filter_properties)
# 这里开始为虚拟机选择创建的compute节点
scheduler_utils.populate_filter_properties(local_filter_props,
host)
# The block_device_mapping passed from the api doesn't contain
# instance specific information
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)
# compute_rpcapi 调用的是nova/compute/manager.py中的build_and_run_instance方法
self.compute_rpcapi.build_and_run_instance(context,
instance=instance, host=host['host'], image=image,
request_spec=request_spec,
filter_properties=local_filter_props,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=bdms, node=host['nodename'],
limits=host['limits'])
转到nova/compute/manager.py中类ComputeManager中的build_and_run_instance方法
def build_and_run_instance(self, context, instance, image, request_spec,
filter_properties, admin_password=None,
injected_files=None, requested_networks=None,
security_groups=None, block_device_mapping=None,
node=None, limits=None):
# NOTE(danms): Remove this in v4.0 of the RPC API
if (requested_networks and
not isinstance(requested_networks,
objects.NetworkRequestList)):
requested_networks = objects.NetworkRequestList(
objects=[objects.NetworkRequest.from_tuple(t)
for t in requested_networks])
@utils.synchronized(instance.uuid)
def _locked_do_build_and_run_instance(*args, **kwargs):
# 这里重点
self._do_build_and_run_instance(*args, **kwargs)
# NOTE(danms): We spawn here to return the RPC worker thread back to
# the pool. Since what follows could take a really long time, we don't
# want to tie up RPC workers.
utils.spawn_n(_locked_do_build_and_run_instance,
context, instance, image, request_spec,
filter_properties, admin_password, injected_files,
requested_networks, security_groups,
block_device_mapping, node, limits)
这里转到_do_build_and_run_instance
def _do_build_and_run_instance(self, context, instance, image,
request_spec, filter_properties, admin_password, injected_files,
requested_networks, security_groups, block_device_mapping,
node=None, limits=None):
.......
# b64 decode the files to inject:
decoded_files = self._decode_files(injected_files)
if limits is None:
limits = {}
if node is None:
node = self.driver.get_available_nodes(refresh=True)[0]
LOG.debug('No node specified, defaulting to %s', node,
instance=instance)
try:
# 这里重点
self._build_and_run_instance(context, instance, image,
decoded_files, admin_password, requested_networks,
security_groups, block_device_mapping, node, limits,
filter_properties)
except exception.RescheduledException as e:
...............
展开同一个类下的self._build_and_run_instance方法
def _build_and_run_instance(self, context, instance, image, injected_files,
admin_password, requested_networks, security_groups,
block_device_mapping, node, limits, filter_properties):
image_name = image.get('name')
self._notify_about_instance_usage(context, instance, 'create.start',
extra_usage_info={'image_name': image_name})
try:
# 资源跟踪器
rt = self._get_resource_tracker(node)
with rt.instance_claim(context, instance, limits):
# NOTE(russellb) It's important that this validation be done
# *after* the resource tracker instance claim, as that is where
# the host is set on the instance.
self._validate_instance_group_policy(context, instance,
filter_properties)
# 开始分配资源,分配资源包括网络和存储,这一块需要重点看
# 这里调用了同一个类下的_build_resources方法
with self._build_resources(context, instance,
requested_networks, security_groups, image,
block_device_mapping) as resources:
# 保存instance状态到BUILDING, task_state 到SPAWNING状态
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.SPAWNING
instance.save(expected_task_state=
task_states.BLOCK_DEVICE_MAPPING)
block_device_info = resources['block_device_info']
network_info = resources['network_info']
# 调用底层virt-api实现创建虚拟机,这里使用libvirt
self.driver.spawn(context, instance, image,
injected_files, admin_password,
network_info=network_info,
block_device_info=block_device_info)
except Exception as e:
........
# NOTE(alaski): This is only useful during reschedules, remove it now.
instance.system_metadata.pop('network_allocated', None)
# 获取虚拟机电源状态
instance.power_state = self._get_power_state(context, instance)
# vm_state变为active状态
instance.vm_state = vm_states.ACTIVE
# task_state 状态清空
instance.task_state = None
instance.launched_at = timeutils.utcnow()
try:
instance.save(expected_task_state=task_states.SPAWNING)
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError) as e:
with excutils.save_and_reraise_exception():
self._notify_about_instance_usage(context, instance,
'create.end', fault=e)
self._notify_about_instance_usage(context, instance, 'create.end', # 通知instance创建完成
extra_usage_info={'message': _('Success')},
network_info=network_info)
下面转到资源分配,这里面调用了neutron 的api
def _build_resources(self, context, instance, requested_networks,
security_groups, image, block_device_mapping):
resources = {}
network_info = None
try:
# 这里比较重要会去调用创建网络信息
network_info = self._build_networks_for_instance(context, instance,
requested_networks, security_groups)
resources['network_info'] = network_info
except (exception.InstanceNotFound,
..............
try:
# Verify that all the BDMs have a device_name set and assign a
# default to the ones missing it with the help of the driver.
self._default_block_device_names(context, instance, image,
block_device_mapping)
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.BLOCK_DEVICE_MAPPING
instance.save()
# blockdevice 信息
block_device_info = self._prep_block_device(context, instance,
block_device_mapping)
resources['block_device_info'] = block_device_info
except Exception:
............
try:
yield resources
except Exception as exc:
with excutils.save_and_reraise_exception() as ctxt:
if not isinstance(exc, (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError)):
LOG.exception(_LE('Instance failed to spawn'),
instance=instance)
# Make sure the async call finishes
if network_info is not None:
network_info.wait(do_raise=False)
try:
self._shutdown_instance(context, instance,
block_device_mapping, requested_networks,
try_deallocate_networks=False)
except Exception:
ctxt.reraise = False
msg = _('Could not clean up failed build,'
' not rescheduling')
raise exception.BuildAbortException(
instance_uuid=instance.uuid, reason=msg)
下面看self._build_and_run_instance方法
def _build_networks_for_instance(self, context, instance,
requested_networks, security_groups):
# If we're here from a reschedule the network may already be allocated.
if strutils.bool_from_string(
instance.system_metadata.get('network_allocated', 'False')):
return self._get_instance_nw_info(context, instance)
if not self.is_neutron_security_groups:
security_groups = []
# 这里的macs在driver中没有实现,返回了None
macs = self.driver.macs_for_instance(instance)
# 这里的dhcp_options在driver中没有实现,返回了None
dhcp_options = self.driver.dhcp_options_for_instance(instance)
# 这里调用self._allocate_network方法去实现network_info信息
network_info = self._allocate_network(context, instance,
requested_networks, macs, security_groups, dhcp_options)
if not instance.access_ip_v4 and not instance.access_ip_v6:
# If CONF.default_access_ip_network_name is set, grab the
# corresponding network and set the access ip values accordingly.
# Note that when there are multiple ips to choose from, an
# arbitrary one will be chosen.
network_name = CONF.default_access_ip_network_name
if not network_name:
return network_info
for vif in network_info:
if vif['network']['label'] == network_name:
for ip in vif.fixed_ips():
if ip['version'] == 4:
instance.access_ip_v4 = ip['address']
if ip['version'] == 6:
instance.access_ip_v6 = ip['address']
instance.save()
break
return network_info
转到同一个类下的self._allocate_network方法
def _allocate_network(self, context, instance, requested_networks, macs,
security_groups, dhcp_options):
"""Start network allocation asynchronously. Return an instance
of NetworkInfoAsyncWrapper that can be used to retrieve the
allocated networks when the operation has finished.
"""
# NOTE(comstud): Since we're allocating networks asynchronously,
# this task state has little meaning, as we won't be in this
# state for very long.
instance.vm_state = vm_states.BUILDING
instance.task_state = task_states.NETWORKING
instance.save(expected_task_state=[None])
self._update_resource_tracker(context, instance)
is_vpn = pipelib.is_vpn_image(instance.image_ref)
# network_model.NetworkInfoAsyncWrapper只是一个模型
# self._allocate_network_async是具体的实现
return network_model.NetworkInfoAsyncWrapper(
self._allocate_network_async, context, instance,
requested_networks, macs, security_groups, is_vpn,
dhcp_options)
转到self._allocate_network_async方法
def _allocate_network_async(self, context, instance, requested_networks,
macs, security_groups, is_vpn, dhcp_options):
"""Method used to allocate networks in the background.
Broken out for testing.
"""
LOG.debug("Allocating IP information in the background.",
instance=instance)
retries = CONF.network_allocate_retries
if retries < 0:
LOG.warn(_("Treating negative config value (%(retries)s) for "
"'network_allocate_retries' as 0."),
{'retries': retries})
attempts = retries > 1 and retries + 1 or 1
retry_time = 1
for attempt in range(1, attempts + 1):
try:
# self.network_api = network.API()
# 调用neutron的api nova/network/neutronv2/api.py下的allocate_for_instance
nwinfo = self.network_api.allocate_for_instance(
context, instance, vpn=is_vpn,
requested_networks=requested_networks,
macs=macs,
security_groups=security_groups,
dhcp_options=dhcp_options)
LOG.debug('Instance network_info: |%s|', nwinfo,
instance=instance)
sys_meta = instance.system_metadata
sys_meta['network_allocated'] = 'True'
self._instance_update(context, instance.uuid,
system_metadata=sys_meta)
return nwinfo
except Exception:
exc_info = sys.exc_info()
log_info = {'attempt': attempt,
'attempts': attempts}
if attempt == attempts:
LOG.exception(_LE('Instance failed network setup '
'after %(attempts)d attempt(s)'),
log_info)
raise exc_info[0], exc_info[1], exc_info[2]
LOG.warn(_('Instance failed network setup '
'(attempt %(attempt)d of %(attempts)d)'),
log_info, instance=instance)
time.sleep(retry_time)
retry_time *= 2
if retry_time > 30:
retry_time = 30
下面转到由nwinfo= self.network_api.allocate_for_instance调用的nova/network/neutronv2/api.py中allocate_for_instance
def allocate_for_instance(self, context, instance, **kwargs):
"""Allocate network resources for the instance.
hypervisor_macs = kwargs.get('macs', None)
available_macs = None
if hypervisor_macs is not None:
# Make a copy we can mutate: records macs that have not been used
# to create a port on a network. If we find a mac with a
# pre-allocated port we also remove it from this set.
available_macs = set(hypervisor_macs)
# 这里调用了neutronv2的客户端<neutronclient.v2_0.client.Client object at 0x3255190>
neutron = neutronv2.get_client(context)
LOG.debug('allocate_for_instance()', instance=instance)
if not instance.project_id:
msg = _('empty project id for instance %s')
raise exception.InvalidInput(
reason=msg % instance.uuid)
requested_networks = kwargs.get('requested_networks')
dhcp_opts = kwargs.get('dhcp_options', None)
ports = {}
net_ids = []
ordered_networks = []
if requested_networks:
for request in requested_networks:
if request.port_id:
port = neutron.show_port(request.port_id)['port']
if port.get('device_id'):
raise exception.PortInUse(port_id=request.port_id)
if hypervisor_macs is not None:
if port['mac_address'] not in hypervisor_macs:
raise exception.PortNotUsable(
port_id=request.port_id,
instance=instance.uuid)
else:
# Don't try to use this MAC if we need to create a
# port on the fly later. Identical MACs may be
# configured by users into multiple ports so we
# discard rather than popping.
available_macs.discard(port['mac_address'])
request.network_id = port['network_id']
ports[request.port_id] = port
if request.network_id:
net_ids.append(request.network_id)
ordered_networks.append(request)
# 如果创建虚拟机时只是指定了net_id的话,上面一大段没有什么特别用处,除非创建时指定了
# mac地址与端口,这里开始调用了_get_available_networks方法,其实这个地方就是获取租户下
# 可以用的网络id,只有nets存在后,才能获继续进行网络构建
nets = self._get_available_networks(context, instance.project_id,
net_ids)
if not nets:
LOG.warn(_LW("No network configured!"), instance=instance)
return network_model.NetworkInfo([])
if (not requested_networks
or requested_networks.is_single_unspecified):
# bug/1267723 - if no network is requested and more
# than one is available then raise NetworkAmbiguous Exception
if len(nets) > 1:
msg = _("Multiple possible networks found, use a Network "
"ID to be more specific.")
raise exception.NetworkAmbiguous(msg)
ordered_networks.append(
objects.NetworkRequest(network_id=nets[0]['id']))
self._check_external_network_attach(context, nets)
security_groups = kwargs.get('security_groups', [])
security_group_ids = []
if len(security_groups):
search_opts = {'tenant_id': instance.project_id}
user_security_groups = neutron.list_security_groups(
**search_opts).get('security_groups')
for security_group in security_groups:
name_match = None
uuid_match = None
for user_security_group in user_security_groups:
if user_security_group['name'] == security_group:
if name_match:
raise exception.NoUniqueMatch(
_("Multiple security groups found matching"
" '%s'. Use an ID to be more specific.") %
security_group)
name_match = user_security_group['id']
if user_security_group['id'] == security_group:
uuid_match = user_security_group['id']
# If a user names the security group the same as
# another's security groups uuid, the name takes priority.
if not name_match and not uuid_match:
raise exception.SecurityGroupNotFound(
security_group_id=security_group)
elif name_match:
security_group_ids.append(name_match)
elif uuid_match:
security_group_ids.append(uuid_match)
touched_port_ids = []
created_port_ids = []
ports_in_requested_order = []
nets_in_requested_order = []
for request in ordered_networks:
# Network lookup for available network_id
network = None
for net in nets:
if net['id'] == request.network_id:
network = net
break
# if network_id did not pass validate_networks() and not available
# here then skip it safely not continuing with a None Network
else:
continue
nets_in_requested_order.append(network)
if (security_groups and not (
network['subnets']
and network.get('port_security_enabled', True))):
raise exception.SecurityGroupCannotBeApplied()
request.network_id = network['id']
zone = 'compute:%s' % instance.availability_zone
port_req_body = {'port': {'device_id': instance.uuid,
'device_owner': zone}}
try:
self._populate_neutron_extension_values(context,
instance,
request.pci_request_id,
port_req_body)
# Requires admin creds to set port bindings
port_client = (neutron if not
self._has_port_binding_extension(context) else
neutronv2.get_client(context, admin=True))
if request.port_id:
port = ports[request.port_id]
port_client.update_port(port['id'], port_req_body)
touched_port_ids.append(port['id'])
ports_in_requested_order.append(port['id'])
else:
# 这里调用neutron创建端口
created_port = self._create_port(
port_client, instance, request.network_id,
port_req_body, request.address,
security_group_ids, available_macs, dhcp_opts)
created_port_ids.append(created_port)
ports_in_requested_order.append(created_port)
except Exception:
with excutils.save_and_reraise_exception():
for port_id in touched_port_ids:
try:
port_req_body = {'port': {'device_id': ''}}
# Requires admin creds to set port bindings
if self._has_port_binding_extension(context):
port_req_body['port']['binding:host_id'] = None
port_client = neutronv2.get_client(
context, admin=True)
else:
port_client = neutron
port_client.update_port(port_id, port_req_body)
except Exception:
msg = _LE("Failed to update port %s")
LOG.exception(msg, port_id)
self._delete_ports(neutron, instance, created_port_ids)
nw_info = self.get_instance_nw_info(context, instance,
networks=nets_in_requested_order,
port_ids=ports_in_requested_order)
return network_model.NetworkInfo([vif for vif in nw_info
if vif['id'] in created_port_ids +
touched_port_ids])
转到同一个类下的_get_available_networks方法
def _get_available_networks(self, context, project_id,
net_ids=None, neutron=None):
"""Return a network list available for the tenant.
The list contains networks owned by the tenant and public networks.
If net_ids specified, it searches networks with requested IDs only.
"""
if not neutron:
neutron = neutronv2.get_client(context)
if net_ids:
# If user has specified to attach instance only to specific
# networks then only add these to **search_opts. This search will
# also include 'shared' networks.
search_opts = {'id': net_ids}
nets = neutron.list_networks(**search_opts).get('networks', [])
else:
# (1) Retrieve non-public network list owned by the tenant.
search_opts = {'tenant_id': project_id, 'shared': False}
nets = neutron.list_networks(**search_opts).get('networks', [])
# (2) Retrieve public network list.
search_opts = {'shared': True}
nets += neutron.list_networks(**search_opts).get('networks', [])
_ensure_requested_network_ordering(
lambda x: x['id'],
nets,
net_ids)
return nets
转到 port_create方法
def _create_port(self, port_client, instance, network_id, port_req_body,
fixed_ip=None, security_group_ids=None,
available_macs=None, dhcp_opts=None):
try:
if fixed_ip:
port_req_body['port']['fixed_ips'] = [
{'ip_address': str(fixed_ip)}]
port_req_body['port']['network_id'] = network_id
port_req_body['port']['admin_state_up'] = True
port_req_body['port']['tenant_id'] = instance['project_id']
if security_group_ids:
port_req_body['port']['security_groups'] = security_group_ids
if available_macs is not None:
if not available_macs:
raise exception.PortNotFree(
instance=instance['uuid'])
mac_address = available_macs.pop()
port_req_body['port']['mac_address'] = mac_address
if dhcp_opts is not None:
port_req_body['port']['extra_dhcp_opts'] = dhcp_opts
port_id = port_client.create_port(port_req_body)['port']['id']
LOG.debug('Successfully created port: %s', port_id,
instance=instance)
return port_id
except neutron_client_exc.NeutronClientException:
...........
这里的port_client就是调用的neutron client
port_client = (neutron if not
self._has_port_binding_extension(context) else
neutronv2.get_client(context, admin=True))