1
# vim: tabstop=4 shiftwidth=4 softtabstop=4
3
# Copyright 2010 United States Government as represented by the
4
# Administrator of the National Aeronautics and Space Administration.
5
# Copyright 2011 Justin Santa Barbara
8
# Licensed under the Apache License, Version 2.0 (the "License"); you may
9
# not use this file except in compliance with the License. You may obtain
10
# a copy of the License at
12
# http://www.apache.org/licenses/LICENSE-2.0
14
# Unless required by applicable law or agreed to in writing, software
15
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
16
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
17
# License for the specific language governing permissions and limitations
20
"""Handles all processes relating to instances (guest vms).
22
The :py:class:`ComputeManager` class is a :py:class:`nova.manager.Manager` that
23
handles RPC calls relating to creating instances. It is responsible for
24
building a disk image, launching it via the underlying virtualization driver,
25
responding to calls to check its state, attaching persistent storage, and
30
:instances_path: Where instances are kept on disk
31
:compute_driver: Name of class that is used to handle virtualization, loaded
32
by :func:`nova.utils.import_object`
45
from eventlet import greenthread
47
from nova import block_device
49
from nova.compute import aggregate_states
50
from nova.compute import instance_types
51
from nova.compute import power_state
52
from nova.compute import task_states
53
from nova.compute import utils as compute_utils
54
from nova.compute import vm_states
55
from nova import exception
56
from nova import flags
58
from nova import log as logging
59
from nova import manager
60
from nova import network
61
from nova.network import model as network_model
62
from nova.notifier import api as notifier
63
from nova.openstack.common import cfg
65
from nova import utils
66
from nova.virt import driver
68
from nova import volume
72
cfg.StrOpt('instances_path',
73
default='$state_path/instances',
74
help='where instances are stored on disk'),
75
cfg.StrOpt('compute_driver',
76
default='nova.virt.connection.get_connection',
77
help='Driver to use for controlling virtualization'),
78
cfg.StrOpt('console_host',
79
default=socket.gethostname(),
80
help='Console proxy host to use to connect '
81
'to instances on this host.'),
82
cfg.IntOpt('live_migration_retry_count',
84
help="Number of 1 second retries needed in live_migration"),
85
cfg.IntOpt("reboot_timeout",
87
help="Automatically hard reboot an instance if it has been "
88
"stuck in a rebooting state longer than N seconds. "
89
"Set to 0 to disable."),
90
cfg.IntOpt("rescue_timeout",
92
help="Automatically unrescue an instance after N seconds. "
93
"Set to 0 to disable."),
94
cfg.IntOpt("resize_confirm_window",
96
help="Automatically confirm resizes after N seconds. "
97
"Set to 0 to disable."),
98
cfg.IntOpt('host_state_interval',
100
help='Interval in seconds for querying the host status'),
101
cfg.IntOpt("running_deleted_instance_timeout",
103
help="Number of seconds after being deleted when a running "
104
"instance should be considered eligible for cleanup."),
105
cfg.IntOpt("running_deleted_instance_poll_interval",
107
help="Number of periodic scheduler ticks to wait between "
108
"runs of the cleanup task."),
109
cfg.StrOpt("running_deleted_instance_action",
111
help="Action to take if a running deleted instance is detected."
112
"Valid options are 'noop', 'log' and 'reap'. "
113
"Set to 'noop' to disable."),
114
cfg.IntOpt("image_cache_manager_interval",
116
help="Number of periodic scheduler ticks to wait between "
117
"runs of the image cache manager."),
118
cfg.IntOpt("heal_instance_info_cache_interval",
120
help="Number of seconds between instance info_cache self "
125
FLAGS.register_opts(compute_opts)
127
LOG = logging.getLogger(__name__)
130
def publisher_id(host=None):
131
return notifier.publisher_id("compute", host)
134
def checks_instance_lock(function):
135
"""Decorator to prevent action against locked instances for non-admins."""
136
@functools.wraps(function)
137
def decorated_function(self, context, instance_uuid, *args, **kwargs):
138
LOG.info(_("check_instance_lock: decorating: |%s|"), function,
140
LOG.info(_("check_instance_lock: arguments: |%(self)s| |%(context)s|"
141
" |%(instance_uuid)s|") % locals(), context=context)
142
locked = self.get_lock(context, instance_uuid)
143
admin = context.is_admin
144
LOG.info(_("check_instance_lock: locked: |%s|"), locked,
146
LOG.info(_("check_instance_lock: admin: |%s|"), admin,
149
# if admin or unlocked call function otherwise log error
150
if admin or not locked:
151
LOG.info(_("check_instance_lock: executing: |%s|"), function,
153
function(self, context, instance_uuid, *args, **kwargs)
155
LOG.error(_("check_instance_lock: not executing |%s|"),
156
function, context=context)
159
return decorated_function
162
def wrap_instance_fault(function):
163
"""Wraps a method to catch exceptions related to instances.
165
This decorator wraps a method to catch any exceptions having to do with
166
an instance that may get thrown. It then logs an instance fault in the db.
168
@functools.wraps(function)
169
def decorated_function(self, context, instance_uuid, *args, **kwargs):
171
return function(self, context, instance_uuid, *args, **kwargs)
172
except exception.InstanceNotFound:
175
with utils.save_and_reraise_exception():
176
self.add_instance_fault_from_exc(context, instance_uuid, e,
179
return decorated_function
182
def _get_image_meta(context, image_ref):
183
image_service, image_id = nova.image.get_image_service(context, image_ref)
184
return image_service.show(context, image_id)
187
class ComputeManager(manager.SchedulerDependentManager):
188
"""Manages the running instances from creation to destruction."""
190
def __init__(self, compute_driver=None, *args, **kwargs):
191
"""Load configuration options and connect to the hypervisor."""
192
# TODO(vish): sync driver creation logic with the rest of the system
193
# and re-document the module docstring
194
if not compute_driver:
195
compute_driver = FLAGS.compute_driver
197
self.driver = utils.check_isinstance(
198
utils.import_object(compute_driver),
199
driver.ComputeDriver)
200
except ImportError as e:
201
LOG.error(_("Unable to load the virtualization driver: %s") % (e))
204
self.network_api = network.API()
205
self.volume_api = volume.API()
206
self.network_manager = utils.import_object(FLAGS.network_manager)
207
self._last_host_check = 0
208
self._last_bw_usage_poll = 0
209
self._last_info_cache_heal = 0
211
super(ComputeManager, self).__init__(service_name="compute",
214
def _instance_update(self, context, instance_id, **kwargs):
215
"""Update an instance in the database using kwargs as value."""
216
return self.db.instance_update(context, instance_id, kwargs)
218
def _set_instance_error_state(self, context, instance_uuid):
220
self._instance_update(context,
221
instance_uuid, vm_state=vm_states.ERROR)
222
except exception.InstanceNotFound:
223
LOG.debug(_("Instance %(instance_uuid)s has been destroyed "
224
"from under us while trying to set it to ERROR") %
228
"""Initialization for a standalone compute service."""
229
self.driver.init_host(host=self.host)
230
context = nova.context.get_admin_context()
231
instances = self.db.instance_get_all_by_host(context, self.host)
232
for instance in instances:
233
instance_uuid = instance['uuid']
234
db_state = instance['power_state']
235
drv_state = self._get_power_state(context, instance)
237
expect_running = (db_state == power_state.RUNNING and
238
drv_state != db_state)
240
LOG.debug(_('Current state is %(drv_state)s, state in DB is '
241
'%(db_state)s.'), locals(), instance=instance)
243
if ((expect_running and FLAGS.resume_guests_state_on_host_boot) or
244
FLAGS.start_guests_on_host_boot):
245
LOG.info(_('Rebooting instance after nova-compute restart.'),
246
locals(), instance=instance)
247
self.reboot_instance(context, instance['uuid'])
248
elif drv_state == power_state.RUNNING:
249
# Hyper-V and VMWareAPI drivers will raise an exception
251
net_info = self._get_instance_nw_info(context, instance)
252
self.driver.ensure_filtering_rules_for_instance(instance,
253
self._legacy_nw_info(net_info))
254
except NotImplementedError:
255
LOG.warning(_('Hypervisor driver does not support '
258
def _get_power_state(self, context, instance):
259
"""Retrieve the power state for the given instance."""
260
LOG.debug(_('Checking state'), instance=instance)
262
return self.driver.get_info(instance)["state"]
263
except exception.NotFound:
264
return power_state.FAILED
266
def get_console_topic(self, context, **kwargs):
267
"""Retrieves the console host for a project on this host.
269
Currently this is just set in the flags for each compute host.
272
#TODO(mdragon): perhaps make this variable by console_type?
273
return self.db.queue_get_for(context,
277
def get_console_pool_info(self, context, console_type):
278
return self.driver.get_console_pool_info(console_type)
280
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
281
def refresh_security_group_rules(self, context, security_group_id,
283
"""Tell the virtualization driver to refresh security group rules.
285
Passes straight through to the virtualization driver.
288
return self.driver.refresh_security_group_rules(security_group_id)
290
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
291
def refresh_security_group_members(self, context,
292
security_group_id, **kwargs):
293
"""Tell the virtualization driver to refresh security group members.
295
Passes straight through to the virtualization driver.
298
return self.driver.refresh_security_group_members(security_group_id)
300
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
301
def refresh_provider_fw_rules(self, context, **kwargs):
302
"""This call passes straight through to the virtualization driver."""
303
return self.driver.refresh_provider_fw_rules(**kwargs)
305
def _get_instance_nw_info(self, context, instance):
306
"""Get a list of dictionaries of network data of an instance.
307
Returns an empty list if stub_network flag is set."""
308
if FLAGS.stub_network:
309
return network_model.NetworkInfo()
311
# get the network info from network
312
network_info = self.network_api.get_instance_nw_info(context,
316
def _legacy_nw_info(self, network_info):
317
"""Converts the model nw_info object to legacy style"""
318
if self.driver.legacy_nwinfo():
319
network_info = compute_utils.legacy_network_info(network_info)
322
def _setup_block_device_mapping(self, context, instance):
323
"""setup volumes for block device mapping"""
324
block_device_mapping = []
327
for bdm in self.db.block_device_mapping_get_all_by_instance(
328
context, instance['id']):
329
LOG.debug(_('Setting up bdm %s'), bdm, instance=instance)
333
if bdm['virtual_name']:
334
virtual_name = bdm['virtual_name']
335
device_name = bdm['device_name']
336
assert block_device.is_swap_or_ephemeral(virtual_name)
337
if virtual_name == 'swap':
338
swap = {'device_name': device_name,
339
'swap_size': bdm['volume_size']}
340
elif block_device.is_ephemeral(virtual_name):
341
eph = {'num': block_device.ephemeral_num(virtual_name),
342
'virtual_name': virtual_name,
343
'device_name': device_name,
344
'size': bdm['volume_size']}
345
ephemerals.append(eph)
348
if ((bdm['snapshot_id'] is not None) and
349
(bdm['volume_id'] is None)):
350
# TODO(yamahata): default name and description
351
snapshot = self.volume_api.get_snapshot(context,
353
vol = self.volume_api.create(context, bdm['volume_size'],
355
# TODO(yamahata): creating volume simultaneously
356
# reduces creation time?
357
self.volume_api.wait_creation(context, vol)
358
self.db.block_device_mapping_update(
359
context, bdm['id'], {'volume_id': vol['id']})
360
bdm['volume_id'] = vol['id']
362
if bdm['volume_id'] is not None:
363
volume = self.volume_api.get(context, bdm['volume_id'])
364
self.volume_api.check_attach(context, volume)
365
cinfo = self._attach_volume_boot(context,
369
self.db.block_device_mapping_update(
371
{'connection_info': utils.dumps(cinfo)})
372
block_device_mapping.append({'connection_info': cinfo,
377
'root_device_name': instance['root_device_name'],
379
'ephemerals': ephemerals,
380
'block_device_mapping': block_device_mapping
383
def _is_instance_terminated(self, instance_uuid):
384
"""Instance in DELETING task state or not found in DB"""
385
context = nova.context.get_admin_context()
387
instance = self.db.instance_get_by_uuid(context, instance_uuid)
388
if instance['task_state'] == task_states.DELETING:
394
def _shutdown_instance_even_if_deleted(self, context, instance_uuid):
395
"""Call terminate_instance even for already deleted instances"""
398
self.terminate_instance(context, instance_uuid)
399
except exception.InstanceNotFound:
400
LOG.info(_("Instance %s already deleted from database. "
401
"Attempting forceful vm deletion")
403
ctxt = nova.context.get_admin_context(read_deleted='yes')
404
self.terminate_instance(ctxt, instance_uuid)
405
except Exception as ex:
406
LOG.exception(_("Exception encountered while terminating the "
407
"instance %s") % instance_uuid)
409
def _run_instance(self, context, instance_uuid,
410
requested_networks=None,
415
"""Launch a new instance with specified options."""
416
context = context.elevated()
418
instance = self.db.instance_get_by_uuid(context, instance_uuid)
419
self._check_instance_not_already_created(context, instance)
420
image_meta = self._check_image_size(context, instance)
421
self._start_building(context, instance)
422
self._notify_about_instance_usage(instance, "create.start")
423
network_info = self._allocate_network(context, instance,
426
block_device_info = self._prep_block_device(context, instance)
427
instance = self._spawn(context, instance, image_meta,
428
network_info, block_device_info,
429
injected_files, admin_password)
431
with utils.save_and_reraise_exception():
432
self._deallocate_network(context, instance)
434
if (is_first_time and not instance['access_ip_v4']
435
and not instance['access_ip_v6']):
436
self._update_access_ip(context, instance, network_info)
438
self._notify_about_instance_usage(instance, "create.end",
439
network_info=network_info)
441
if self._is_instance_terminated(instance_uuid):
442
raise exception.InstanceNotFound
443
except exception.InstanceNotFound:
444
LOG.exception(_("Instance %s not found.") % instance_uuid)
445
# assuming the instance was already deleted, run "delete" again
447
self._shutdown_instance_even_if_deleted(context, instance_uuid)
449
except Exception as e:
450
with utils.save_and_reraise_exception():
451
self._set_instance_error_state(context, instance_uuid)
453
def _update_access_ip(self, context, instance, nw_info):
454
"""Update the access ip values for a given instance.
456
If FLAGS.default_access_ip_network_name is set, this method will
457
grab the corresponding network and set the access ip values
458
accordingly. Note that when there are multiple ips to choose from,
459
an arbitrary one will be chosen.
462
network_name = FLAGS.default_access_ip_network_name
468
if vif['network']['label'] == network_name:
469
for ip in vif.fixed_ips():
470
if ip['version'] == 4:
471
update_info['access_ip_v4'] = ip['address']
472
if ip['version'] == 6:
473
update_info['access_ip_v6'] = ip['address']
475
self.db.instance_update(context, instance.uuid, update_info)
477
def _check_instance_not_already_created(self, context, instance):
478
"""Ensure an instance with the same name is not already present."""
479
if self.driver.instance_exists(instance['name']):
480
_msg = _("Instance has already been created")
481
raise exception.Invalid(_msg)
483
def _check_image_size(self, context, instance):
484
"""Ensure image is smaller than the maximum size allowed by the
487
The image stored in Glance is potentially compressed, so we use two
488
checks to ensure that the size isn't exceeded:
490
1) This one - checks compressed size, this a quick check to
491
eliminate any images which are obviously too large
493
2) Check uncompressed size in nova.virt.xenapi.vm_utils. This
494
is a slower check since it requires uncompressing the entire
495
image, but is accurate because it reflects the image's
498
image_meta = _get_image_meta(context, instance['image_ref'])
501
size_bytes = image_meta['size']
503
# Size is not a required field in the image service (yet), so
504
# we are unable to rely on it being there even though it's in
507
# TODO(jk0): Should size be required in the image service?
510
instance_type_id = instance['instance_type_id']
511
instance_type = instance_types.get_instance_type(instance_type_id)
512
allowed_size_gb = instance_type['root_gb']
514
# NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
515
# since libvirt interpreted the value differently than other
516
# drivers. A value of 0 means don't check size.
517
if not allowed_size_gb:
520
allowed_size_bytes = allowed_size_gb * 1024 * 1024 * 1024
522
image_id = image_meta['id']
523
LOG.debug(_("image_id=%(image_id)s, image_size_bytes="
524
"%(size_bytes)d, allowed_size_bytes="
525
"%(allowed_size_bytes)d") % locals())
527
if size_bytes > allowed_size_bytes:
528
LOG.info(_("Image '%(image_id)s' size %(size_bytes)d exceeded"
529
" instance_type allowed size "
530
"%(allowed_size_bytes)d")
532
raise exception.ImageTooLarge()
536
def _start_building(self, context, instance):
537
"""Save the host and launched_on fields and log appropriately."""
538
LOG.audit(_('Starting instance...'), context=context,
540
self._instance_update(context, instance['uuid'],
541
host=self.host, launched_on=self.host,
542
vm_state=vm_states.BUILDING,
545
def _allocate_network(self, context, instance, requested_networks):
546
"""Allocate networks for an instance and return the network info"""
547
if FLAGS.stub_network:
548
LOG.debug(_('Skipping network allocation for instance'),
550
return network_model.NetworkInfo()
551
self._instance_update(context, instance['uuid'],
552
vm_state=vm_states.BUILDING,
553
task_state=task_states.NETWORKING)
554
is_vpn = instance['image_ref'] == str(FLAGS.vpn_image_id)
556
# allocate and get network info
557
network_info = self.network_api.allocate_for_instance(
558
context, instance, vpn=is_vpn,
559
requested_networks=requested_networks)
561
LOG.exception(_('Instance failed network setup'),
565
LOG.debug(_('Instance network_info: |%s|'), network_info,
570
def _prep_block_device(self, context, instance):
571
"""Set up the block device for an instance with error logging"""
572
self._instance_update(context, instance['uuid'],
573
vm_state=vm_states.BUILDING,
574
task_state=task_states.BLOCK_DEVICE_MAPPING)
576
return self._setup_block_device_mapping(context, instance)
578
LOG.exception(_('Instance failed block device setup'),
582
def _spawn(self, context, instance, image_meta, network_info,
583
block_device_info, injected_files, admin_pass):
584
"""Spawn an instance with error logging and update its power state"""
585
self._instance_update(context, instance['uuid'],
586
vm_state=vm_states.BUILDING,
587
task_state=task_states.SPAWNING)
588
instance['injected_files'] = injected_files
589
instance['admin_pass'] = admin_pass
591
self.driver.spawn(context, instance, image_meta,
592
self._legacy_nw_info(network_info), block_device_info)
594
LOG.exception(_('Instance failed to spawn'), instance=instance)
597
current_power_state = self._get_power_state(context, instance)
598
return self._instance_update(context, instance['uuid'],
599
power_state=current_power_state,
600
vm_state=vm_states.ACTIVE,
602
launched_at=utils.utcnow())
604
def _notify_about_instance_usage(self, instance, event_suffix,
605
usage_info=None, network_info=None):
607
usage_info = utils.usage_from_instance(instance,
608
network_info=network_info)
609
notifier.notify('compute.%s' % self.host,
610
'compute.instance.%s' % event_suffix,
611
notifier.INFO, usage_info)
613
def _deallocate_network(self, context, instance):
614
if not FLAGS.stub_network:
615
LOG.debug(_('Deallocating network for instance'),
617
self.network_api.deallocate_for_instance(context, instance)
619
def _get_instance_volume_bdms(self, context, instance_id):
620
bdms = self.db.block_device_mapping_get_all_by_instance(context,
622
return [bdm for bdm in bdms if bdm['volume_id']]
624
def _get_instance_volume_bdm(self, context, instance_id, volume_id):
625
bdms = self._get_instance_volume_bdms(context, instance_id)
627
# NOTE(vish): Comparing as strings because the os_api doesn't
628
# convert to integer and we may wish to support uuids
630
if str(bdm['volume_id']) == str(volume_id):
633
def _get_instance_volume_block_device_info(self, context, instance_id):
634
bdms = self._get_instance_volume_bdms(context, instance_id)
635
block_device_mapping = []
637
cinfo = utils.loads(bdm['connection_info'])
638
block_device_mapping.append({'connection_info': cinfo,
641
# NOTE(vish): The mapping is passed in so the driver can disconnect
642
# from remote volumes if necessary
643
return {'block_device_mapping': block_device_mapping}
645
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
647
def run_instance(self, context, instance_uuid, **kwargs):
648
@utils.synchronized(instance_uuid)
649
def do_run_instance():
650
self._run_instance(context, instance_uuid, **kwargs)
653
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
654
@checks_instance_lock
656
def start_instance(self, context, instance_uuid):
657
@utils.synchronized(instance_uuid)
658
def do_start_instance():
659
"""Starting an instance on this host."""
660
# TODO(yamahata): injected_files isn't supported.
661
# Anyway OSAPI doesn't support stop/start yet
662
# FIXME(vish): I've kept the files during stop instance, but
663
# I think start will fail due to the files still
664
self._run_instance(context, instance_uuid)
667
def _shutdown_instance(self, context, instance, action_str):
668
"""Shutdown an instance on this host."""
669
context = context.elevated()
670
instance_id = instance['id']
671
instance_uuid = instance['uuid']
672
LOG.audit(_('%(action_str)s instance') % {'action_str': action_str},
673
context=context, instance=instance)
675
self._notify_about_instance_usage(instance, "shutdown.start")
677
# get network info before tearing down
678
network_info = self._get_instance_nw_info(context, instance)
679
# tear down allocated network structure
680
self._deallocate_network(context, instance)
682
# NOTE(vish) get bdms before destroying the instance
683
bdms = self._get_instance_volume_bdms(context, instance_id)
684
block_device_info = self._get_instance_volume_block_device_info(
685
context, instance_id)
686
self.driver.destroy(instance, self._legacy_nw_info(network_info),
690
# NOTE(vish): actual driver detach done in driver.destroy, so
691
# just tell nova-volume that we are done with it.
692
volume = self.volume_api.get(context, bdm['volume_id'])
693
connector = self.driver.get_volume_connector(instance)
694
self.volume_api.terminate_connection(context,
697
self.volume_api.detach(context, volume)
698
except exception.DiskNotFound as exc:
699
LOG.warn(_('Ignoring DiskNotFound: %s') % exc,
702
self._notify_about_instance_usage(instance, "shutdown.end")
704
def _cleanup_volumes(self, context, instance_id):
705
bdms = self.db.block_device_mapping_get_all_by_instance(context,
708
LOG.debug(_("terminating bdm %s") % bdm)
709
if bdm['volume_id'] and bdm['delete_on_termination']:
710
volume = self.volume_api.get(context, bdm['volume_id'])
711
self.volume_api.delete(context, volume)
712
# NOTE(vish): bdms will be deleted on instance destroy
714
def _delete_instance(self, context, instance):
715
"""Delete an instance on this host."""
716
instance_id = instance['id']
717
self._notify_about_instance_usage(instance, "delete.start")
718
self._shutdown_instance(context, instance, 'Terminating')
719
self._cleanup_volumes(context, instance_id)
720
self._instance_update(context,
722
vm_state=vm_states.DELETED,
724
terminated_at=utils.utcnow())
726
self.db.instance_destroy(context, instance_id)
727
self._notify_about_instance_usage(instance, "delete.end")
729
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
730
@checks_instance_lock
732
def terminate_instance(self, context, instance_uuid):
733
"""Terminate an instance on this host."""
734
@utils.synchronized(instance_uuid)
735
def do_terminate_instance():
736
elevated = context.elevated()
737
instance = self.db.instance_get_by_uuid(elevated, instance_uuid)
738
compute_utils.notify_usage_exists(instance, current_period=True)
740
self._delete_instance(context, instance)
741
except exception.InstanceTerminationFailure as error:
742
msg = _('%s. Setting instance vm_state to ERROR')
743
LOG.error(msg % error)
744
self._set_instance_error_state(context, instance_uuid)
745
except exception.InstanceNotFound as e:
747
do_terminate_instance()
749
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
750
@checks_instance_lock
752
def stop_instance(self, context, instance_uuid):
753
"""Stopping an instance on this host."""
754
@utils.synchronized(instance_uuid)
755
def do_stop_instance():
756
instance = self.db.instance_get_by_uuid(context, instance_uuid)
757
self._shutdown_instance(context, instance, 'Stopping')
758
self._instance_update(context,
760
vm_state=vm_states.STOPPED,
764
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
765
@checks_instance_lock
767
def power_off_instance(self, context, instance_uuid):
768
"""Power off an instance on this host."""
769
instance = self.db.instance_get_by_uuid(context, instance_uuid)
770
self._notify_about_instance_usage(instance, "power_off.start")
771
self.driver.power_off(instance)
772
current_power_state = self._get_power_state(context, instance)
773
self._instance_update(context,
775
power_state=current_power_state,
777
self._notify_about_instance_usage(instance, "power_off.end")
779
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
780
@checks_instance_lock
782
def power_on_instance(self, context, instance_uuid):
783
"""Power on an instance on this host."""
784
instance = self.db.instance_get_by_uuid(context, instance_uuid)
785
self._notify_about_instance_usage(instance, "power_on.start")
786
self.driver.power_on(instance)
787
current_power_state = self._get_power_state(context, instance)
788
self._instance_update(context,
790
power_state=current_power_state,
792
self._notify_about_instance_usage(instance, "power_on.end")
794
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
795
@checks_instance_lock
797
def rebuild_instance(self, context, instance_uuid, **kwargs):
798
"""Destroy and re-make this instance.
800
A 'rebuild' effectively purges all existing data from the system and
801
remakes the VM with given 'metadata' and 'personalities'.
803
:param context: `nova.RequestContext` object
804
:param instance_uuid: Instance Identifier (UUID)
805
:param injected_files: Files to inject
806
:param new_pass: password to set on rebuilt instance
809
self._rebuild_instance(context, instance_uuid, kwargs)
810
except exception.ImageNotFound:
811
msg = _("Cannot rebuild instance [%(instance_uuid)s]"
812
", because the given image does not exist.")
813
LOG.error(msg % instance_uuid, context=context)
814
self._set_instance_error_state(context, instance_uuid)
815
except Exception as exc:
816
msg = _("Cannot rebuild instance [%(instance_uuid)s]: %(exc)s")
817
LOG.error(msg % locals(), context=context)
818
self._set_instance_error_state(context, instance_uuid)
820
def _rebuild_instance(self, context, instance_uuid, kwargs):
821
context = context.elevated()
823
LOG.audit(_("Rebuilding instance %s"), instance_uuid, context=context)
825
instance = self.db.instance_get_by_uuid(context, instance_uuid)
826
self._notify_about_instance_usage(instance, "rebuild.start")
827
current_power_state = self._get_power_state(context, instance)
828
self._instance_update(context,
830
power_state=current_power_state,
831
vm_state=vm_states.REBUILDING,
834
network_info = self._get_instance_nw_info(context, instance)
835
self.driver.destroy(instance, self._legacy_nw_info(network_info))
837
self._instance_update(context,
839
vm_state=vm_states.REBUILDING,
840
task_state=task_states.BLOCK_DEVICE_MAPPING)
842
instance.injected_files = kwargs.get('injected_files', [])
843
network_info = self.network_api.get_instance_nw_info(context,
845
device_info = self._setup_block_device_mapping(context, instance)
847
self._instance_update(context,
849
vm_state=vm_states.REBUILDING,
850
task_state=task_states.SPAWNING)
851
# pull in new password here since the original password isn't in the db
852
instance.admin_pass = kwargs.get('new_pass',
853
utils.generate_password(FLAGS.password_length))
855
image_meta = _get_image_meta(context, instance['image_ref'])
857
self.driver.spawn(context, instance, image_meta,
858
self._legacy_nw_info(network_info), device_info)
860
current_power_state = self._get_power_state(context, instance)
861
self._instance_update(context,
863
power_state=current_power_state,
864
vm_state=vm_states.ACTIVE,
866
launched_at=utils.utcnow())
868
self._notify_about_instance_usage(instance, "rebuild.end",
869
network_info=network_info)
871
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
872
@checks_instance_lock
874
def reboot_instance(self, context, instance_uuid, reboot_type="SOFT"):
875
"""Reboot an instance on this host."""
876
LOG.audit(_("Rebooting instance %s"), instance_uuid, context=context)
877
context = context.elevated()
878
instance = self.db.instance_get_by_uuid(context, instance_uuid)
880
self._notify_about_instance_usage(instance, "reboot.start")
882
current_power_state = self._get_power_state(context, instance)
883
self._instance_update(context,
885
power_state=current_power_state,
886
vm_state=vm_states.ACTIVE)
888
if instance['power_state'] != power_state.RUNNING:
889
state = instance['power_state']
890
running = power_state.RUNNING
891
LOG.warn(_('trying to reboot a non-running '
892
'instance: %(instance_uuid)s (state: %(state)s '
893
'expected: %(running)s)') % locals(),
896
network_info = self._get_instance_nw_info(context, instance)
897
self.driver.reboot(instance, self._legacy_nw_info(network_info),
900
current_power_state = self._get_power_state(context, instance)
901
self._instance_update(context,
903
power_state=current_power_state,
904
vm_state=vm_states.ACTIVE,
907
self._notify_about_instance_usage(instance, "reboot.end")
909
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
911
def snapshot_instance(self, context, instance_uuid, image_id,
912
image_type='snapshot', backup_type=None,
914
"""Snapshot an instance on this host.
916
:param context: security context
917
:param instance_uuid: nova.db.sqlalchemy.models.Instance.Uuid
918
:param image_id: glance.db.sqlalchemy.models.Image.Id
919
:param image_type: snapshot | backup
920
:param backup_type: daily | weekly
921
:param rotation: int representing how many backups to keep around;
922
None if rotation shouldn't be used (as in the case of snapshots)
924
context = context.elevated()
925
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
927
current_power_state = self._get_power_state(context, instance_ref)
928
self._instance_update(context,
930
power_state=current_power_state,
931
vm_state=vm_states.ACTIVE)
933
LOG.audit(_('instance %s: snapshotting'), instance_uuid,
936
if instance_ref['power_state'] != power_state.RUNNING:
937
state = instance_ref['power_state']
938
running = power_state.RUNNING
939
LOG.warn(_('trying to snapshot a non-running '
940
'instance: %(instance_uuid)s (state: %(state)s '
941
'expected: %(running)s)') % locals())
943
self._notify_about_instance_usage(instance_ref, "snapshot.start")
946
self.driver.snapshot(context, instance_ref, image_id)
948
self._instance_update(context, instance_ref['id'], task_state=None)
950
if image_type == 'snapshot' and rotation:
951
raise exception.ImageRotationNotAllowed()
953
elif image_type == 'backup' and rotation:
954
self.rotate_backups(context, instance_uuid, backup_type, rotation)
956
elif image_type == 'backup':
957
raise exception.RotationRequiredForBackup()
959
self._notify_about_instance_usage(instance_ref, "snapshot.end")
962
def rotate_backups(self, context, instance_uuid, backup_type, rotation):
963
"""Delete excess backups associated to an instance.
965
Instances are allowed a fixed number of backups (the rotation number);
966
this method deletes the oldest backups that exceed the rotation
969
:param context: security context
970
:param instance_uuid: string representing uuid of instance
971
:param backup_type: daily | weekly
972
:param rotation: int representing how many backups to keep around;
973
None if rotation shouldn't be used (as in the case of snapshots)
975
# NOTE(jk0): Eventually extract this out to the ImageService?
980
batch = image_service.detail(context, filters=filters,
981
marker=marker, sort_key='created_at', sort_dir='desc')
985
marker = batch[-1]['id']
988
image_service = nova.image.get_default_image_service()
989
filters = {'property-image_type': 'backup',
990
'property-backup_type': backup_type,
991
'property-instance_uuid': instance_uuid}
993
images = fetch_images()
994
num_images = len(images)
995
LOG.debug(_("Found %(num_images)d images (rotation: %(rotation)d)")
997
if num_images > rotation:
998
# NOTE(sirp): this deletes all backups that exceed the rotation
1000
excess = len(images) - rotation
1001
LOG.debug(_("Rotating out %d backups") % excess)
1002
for i in xrange(excess):
1003
image = images.pop()
1004
image_id = image['id']
1005
LOG.debug(_("Deleting image %s") % image_id)
1006
image_service.delete(context, image_id)
1008
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1009
@checks_instance_lock
1010
@wrap_instance_fault
1011
def set_admin_password(self, context, instance_uuid, new_pass=None):
1012
"""Set the root/admin password for an instance on this host.
1014
This is generally only called by API password resets after an
1015
image has been built.
1018
context = context.elevated()
1020
if new_pass is None:
1021
# Generate a random password
1022
new_pass = utils.generate_password(FLAGS.password_length)
1026
for i in xrange(max_tries):
1027
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1028
instance_id = instance_ref["id"]
1030
current_power_state = self._get_power_state(context, instance_ref)
1031
expected_state = power_state.RUNNING
1033
if current_power_state != expected_state:
1034
self._instance_update(context, instance_id, task_state=None)
1035
_msg = _('Failed to set admin password. Instance %s is not'
1036
' running') % instance_ref["uuid"]
1037
raise exception.Invalid(_msg)
1040
self.driver.set_admin_password(instance_ref, new_pass)
1041
LOG.audit(_("Instance %s: Root password set"),
1042
instance_ref["uuid"])
1043
self._instance_update(context,
1047
except NotImplementedError:
1048
# NOTE(dprince): if the driver doesn't implement
1049
# set_admin_password we break to avoid a loop
1050
LOG.warn(_('set_admin_password is not implemented '
1052
self._instance_update(context,
1056
except Exception, e:
1057
# Catch all here because this could be anything.
1059
if i == max_tries - 1:
1060
self._set_instance_error_state(context, instance_id)
1061
# We create a new exception here so that we won't
1062
# potentially reveal password information to the
1063
# API caller. The real exception is logged above
1064
_msg = _('Error setting admin password')
1065
raise exception.NovaException(_msg)
1069
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1070
@checks_instance_lock
1071
@wrap_instance_fault
1072
def inject_file(self, context, instance_uuid, path, file_contents):
1073
"""Write a file to the specified path in an instance on this host."""
1074
context = context.elevated()
1075
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1076
current_power_state = self._get_power_state(context, instance_ref)
1077
expected_state = power_state.RUNNING
1078
if current_power_state != expected_state:
1079
LOG.warn(_('trying to inject a file into a non-running '
1080
'instance: %(instance_uuid)s (state: '
1081
'%(current_power_state)s '
1082
'expected: %(expected_state)s)') % locals())
1083
instance_uuid = instance_ref['uuid']
1084
msg = _('instance %(instance_uuid)s: injecting file to %(path)s')
1085
LOG.audit(msg % locals())
1086
self.driver.inject_file(instance_ref, path, file_contents)
1088
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1089
@checks_instance_lock
1090
@wrap_instance_fault
1091
def agent_update(self, context, instance_uuid, url, md5hash):
1092
"""Update agent running on an instance on this host."""
1093
context = context.elevated()
1094
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1095
current_power_state = self._get_power_state(context, instance_ref)
1096
expected_state = power_state.RUNNING
1097
if current_power_state != expected_state:
1098
LOG.warn(_('trying to update agent on a non-running '
1099
'instance: %(instance_uuid)s (state: '
1100
'%(current_power_state)s '
1101
'expected: %(expected_state)s)') % locals())
1102
instance_uuid = instance_ref['uuid']
1103
msg = _('instance %(instance_uuid)s: updating agent to %(url)s')
1104
LOG.audit(msg % locals())
1105
self.driver.agent_update(instance_ref, url, md5hash)
1107
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1108
@checks_instance_lock
1109
@wrap_instance_fault
1110
def rescue_instance(self, context, instance_uuid, **kwargs):
1112
Rescue an instance on this host.
1113
:param rescue_password: password to set on rescue instance
1116
LOG.audit(_('instance %s: rescuing'), instance_uuid, context=context)
1117
context = context.elevated()
1119
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1120
instance_ref.admin_pass = kwargs.get('rescue_password',
1121
utils.generate_password(FLAGS.password_length))
1122
network_info = self._get_instance_nw_info(context, instance_ref)
1123
image_meta = _get_image_meta(context, instance_ref['image_ref'])
1125
with self.error_out_instance_on_exception(context, instance_uuid):
1126
self.driver.rescue(context, instance_ref,
1127
self._legacy_nw_info(network_info), image_meta)
1129
current_power_state = self._get_power_state(context, instance_ref)
1130
self._instance_update(context,
1132
vm_state=vm_states.RESCUED,
1134
power_state=current_power_state)
1136
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1137
@checks_instance_lock
1138
@wrap_instance_fault
1139
def unrescue_instance(self, context, instance_uuid):
1140
"""Rescue an instance on this host."""
1141
LOG.audit(_('instance %s: unrescuing'), instance_uuid, context=context)
1142
context = context.elevated()
1144
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1145
network_info = self._get_instance_nw_info(context, instance_ref)
1147
with self.error_out_instance_on_exception(context, instance_uuid):
1148
self.driver.unrescue(instance_ref,
1149
self._legacy_nw_info(network_info))
1151
current_power_state = self._get_power_state(context, instance_ref)
1152
self._instance_update(context,
1154
vm_state=vm_states.ACTIVE,
1156
power_state=current_power_state)
1158
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1159
@checks_instance_lock
1160
@wrap_instance_fault
1161
def confirm_resize(self, context, instance_uuid, migration_id):
1162
"""Destroys the source instance."""
1163
migration_ref = self.db.migration_get(context, migration_id)
1164
instance_ref = self.db.instance_get_by_uuid(context,
1165
migration_ref.instance_uuid)
1167
self._notify_about_instance_usage(instance_ref,
1168
"resize.confirm.start")
1170
# NOTE(tr3buchet): tear down networks on source host
1171
self.network_api.setup_networks_on_host(context, instance_ref,
1172
migration_ref['source_compute'], teardown=True)
1174
network_info = self._get_instance_nw_info(context, instance_ref)
1175
self.driver.confirm_migration(migration_ref, instance_ref,
1176
self._legacy_nw_info(network_info))
1178
self._notify_about_instance_usage(instance_ref, "resize.confirm.end",
1179
network_info=network_info)
1181
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1182
@checks_instance_lock
1183
@wrap_instance_fault
1184
def revert_resize(self, context, instance_uuid, migration_id):
1185
"""Destroys the new instance on the destination machine.
1187
Reverts the model changes, and powers on the old instance on the
1191
migration_ref = self.db.migration_get(context, migration_id)
1192
instance_ref = self.db.instance_get_by_uuid(context,
1193
migration_ref.instance_uuid)
1195
# NOTE(tr3buchet): tear down networks on destination host
1196
self.network_api.setup_networks_on_host(context, instance_ref,
1199
network_info = self._get_instance_nw_info(context, instance_ref)
1200
self.driver.destroy(instance_ref, self._legacy_nw_info(network_info))
1201
topic = self.db.queue_get_for(context, FLAGS.compute_topic,
1202
migration_ref['source_compute'])
1203
rpc.cast(context, topic,
1204
{'method': 'finish_revert_resize',
1205
'args': {'instance_uuid': instance_ref['uuid'],
1206
'migration_id': migration_ref['id']},
1209
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1210
@checks_instance_lock
1211
@wrap_instance_fault
1212
def finish_revert_resize(self, context, instance_uuid, migration_id):
1213
"""Finishes the second half of reverting a resize.
1215
Power back on the source instance and revert the resized attributes
1219
migration_ref = self.db.migration_get(context, migration_id)
1220
instance_ref = self.db.instance_get_by_uuid(context,
1221
migration_ref.instance_uuid)
1222
network_info = self._get_instance_nw_info(context, instance_ref)
1224
self._notify_about_instance_usage(instance_ref, "resize.revert.start")
1226
old_instance_type = migration_ref['old_instance_type_id']
1227
instance_type = instance_types.get_instance_type(old_instance_type)
1229
self.driver.finish_revert_migration(instance_ref,
1230
self._legacy_nw_info(network_info))
1232
# Just roll back the record. There's no need to resize down since
1233
# the 'old' VM already has the preferred attributes
1234
self._instance_update(context,
1235
instance_ref['uuid'],
1236
memory_mb=instance_type['memory_mb'],
1237
host=migration_ref['source_compute'],
1238
vcpus=instance_type['vcpus'],
1239
root_gb=instance_type['root_gb'],
1240
ephemeral_gb=instance_type['ephemeral_gb'],
1241
instance_type_id=instance_type['id'],
1242
vm_state=vm_states.ACTIVE,
1245
self.db.migration_update(context, migration_id,
1246
{'status': 'reverted'})
1248
self._notify_about_instance_usage(instance_ref, "resize.revert.end")
1250
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1251
@checks_instance_lock
1252
@wrap_instance_fault
1253
def prep_resize(self, context, instance_uuid, instance_type_id, image,
1255
"""Initiates the process of moving a running instance to another host.
1257
Possibly changes the RAM and disk size in the process.
1260
context = context.elevated()
1262
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1264
compute_utils.notify_usage_exists(instance_ref, current_period=True)
1265
self._notify_about_instance_usage(instance_ref, "resize.prep.start")
1267
same_host = instance_ref['host'] == FLAGS.host
1268
if same_host and not FLAGS.allow_resize_to_same_host:
1269
self._set_instance_error_state(context, instance_uuid)
1270
msg = _('destination same as source!')
1271
raise exception.MigrationError(msg)
1273
old_instance_type_id = instance_ref['instance_type_id']
1274
old_instance_type = instance_types.get_instance_type(
1275
old_instance_type_id)
1276
new_instance_type = instance_types.get_instance_type(instance_type_id)
1278
migration_ref = self.db.migration_create(context,
1279
{'instance_uuid': instance_ref['uuid'],
1280
'source_compute': instance_ref['host'],
1281
'dest_compute': FLAGS.host,
1282
'dest_host': self.driver.get_host_ip_addr(),
1283
'old_instance_type_id': old_instance_type['id'],
1284
'new_instance_type_id': instance_type_id,
1285
'status': 'pre-migrating'})
1287
LOG.audit(_('instance %s: migrating'), instance_ref['uuid'],
1289
topic = self.db.queue_get_for(context, FLAGS.compute_topic,
1290
instance_ref['host'])
1291
rpc.cast(context, topic,
1292
{'method': 'resize_instance',
1293
'args': {'instance_uuid': instance_ref['uuid'],
1294
'migration_id': migration_ref['id'],
1297
usage_info = utils.usage_from_instance(instance_ref,
1298
new_instance_type=new_instance_type['name'],
1299
new_instance_type_id=new_instance_type['id'])
1300
self._notify_about_instance_usage(instance_ref, "resize.prep.end",
1301
usage_info=usage_info)
1303
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1304
@checks_instance_lock
1305
@wrap_instance_fault
1306
def resize_instance(self, context, instance_uuid, migration_id, image):
1307
"""Starts the migration of a running instance to another host."""
1308
migration_ref = self.db.migration_get(context, migration_id)
1309
instance_ref = self.db.instance_get_by_uuid(context,
1310
migration_ref.instance_uuid)
1311
instance_type_ref = self.db.instance_type_get(context,
1312
migration_ref.new_instance_type_id)
1314
network_info = self._get_instance_nw_info(context, instance_ref)
1315
self.db.migration_update(context,
1317
{'status': 'migrating'})
1319
self._notify_about_instance_usage(instance_ref, "resize.start",
1320
network_info=network_info)
1323
disk_info = self.driver.migrate_disk_and_power_off(
1324
context, instance_ref, migration_ref['dest_host'],
1325
instance_type_ref, self._legacy_nw_info(network_info))
1326
except Exception, error:
1327
with utils.save_and_reraise_exception():
1328
msg = _('%s. Setting instance vm_state to ERROR')
1329
LOG.error(msg % error)
1330
self._set_instance_error_state(context, instance_uuid)
1332
self.db.migration_update(context,
1334
{'status': 'post-migrating'})
1336
service = self.db.service_get_by_host_and_topic(
1337
context, migration_ref['dest_compute'], FLAGS.compute_topic)
1338
topic = self.db.queue_get_for(context,
1339
FLAGS.compute_topic,
1340
migration_ref['dest_compute'])
1341
params = {'migration_id': migration_id,
1342
'disk_info': disk_info,
1343
'instance_uuid': instance_ref['uuid'],
1345
rpc.cast(context, topic, {'method': 'finish_resize',
1348
self._notify_about_instance_usage(instance_ref, "resize.end",
1349
network_info=network_info)
1351
def _finish_resize(self, context, instance_ref, migration_ref, disk_info,
1353
resize_instance = False
1354
old_instance_type_id = migration_ref['old_instance_type_id']
1355
new_instance_type_id = migration_ref['new_instance_type_id']
1356
if old_instance_type_id != new_instance_type_id:
1357
instance_type = instance_types.get_instance_type(
1358
new_instance_type_id)
1359
instance_ref = self._instance_update(
1362
instance_type_id=instance_type['id'],
1363
memory_mb=instance_type['memory_mb'],
1364
vcpus=instance_type['vcpus'],
1365
root_gb=instance_type['root_gb'],
1366
ephemeral_gb=instance_type['ephemeral_gb'])
1367
resize_instance = True
1369
# NOTE(tr3buchet): setup networks on destination host
1370
self.network_api.setup_networks_on_host(context, instance_ref,
1371
migration_ref['dest_compute'])
1373
network_info = self._get_instance_nw_info(context, instance_ref)
1375
self._notify_about_instance_usage(instance_ref, "finish_resize.start",
1376
network_info=network_info)
1378
self.driver.finish_migration(context, migration_ref, instance_ref,
1380
self._legacy_nw_info(network_info),
1381
image, resize_instance)
1383
self._instance_update(context,
1385
vm_state=vm_states.ACTIVE,
1386
host=migration_ref['dest_compute'],
1387
task_state=task_states.RESIZE_VERIFY)
1389
self.db.migration_update(context, migration_ref.id,
1390
{'status': 'finished'})
1392
self._notify_about_instance_usage(instance_ref, "finish_resize.end",
1393
network_info=network_info)
1395
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1396
@checks_instance_lock
1397
@wrap_instance_fault
1398
def finish_resize(self, context, instance_uuid, migration_id, disk_info,
1400
"""Completes the migration process.
1402
Sets up the newly transferred disk and turns on the instance at its
1406
migration_ref = self.db.migration_get(context, migration_id)
1408
instance_ref = self.db.instance_get_by_uuid(context,
1409
migration_ref.instance_uuid)
1412
self._finish_resize(context, instance_ref, migration_ref,
1414
except Exception, error:
1415
with utils.save_and_reraise_exception():
1416
msg = _('%s. Setting instance vm_state to ERROR')
1417
LOG.error(msg % error)
1418
self._set_instance_error_state(context, instance_ref.uuid)
1420
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1421
@checks_instance_lock
1422
@wrap_instance_fault
1423
def add_fixed_ip_to_instance(self, context, instance_uuid, network_id):
1424
"""Calls network_api to add new fixed_ip to instance
1425
then injects the new network info and resets instance networking.
1428
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1429
self._notify_about_instance_usage(instance_ref, "create_ip.start")
1431
instance_id = instance_ref['id']
1432
self.network_api.add_fixed_ip_to_instance(context,
1436
network_info = self.inject_network_info(context,
1437
instance_ref['uuid'])
1438
self.reset_network(context, instance_ref['uuid'])
1440
self._notify_about_instance_usage(instance_ref, "create_ip.end",
1441
network_info=network_info)
1443
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1444
@checks_instance_lock
1445
@wrap_instance_fault
1446
def remove_fixed_ip_from_instance(self, context, instance_uuid, address):
1447
"""Calls network_api to remove existing fixed_ip from instance
1448
by injecting the altered network info and resetting
1449
instance networking.
1451
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1452
self._notify_about_instance_usage(instance_ref, "delete_ip.start")
1454
instance_id = instance_ref['id']
1455
self.network_api.remove_fixed_ip_from_instance(context,
1459
network_info = self.inject_network_info(context,
1460
instance_ref['uuid'])
1461
self.reset_network(context, instance_ref['uuid'])
1463
self._notify_about_instance_usage(instance_ref, "delete_ip.end",
1464
network_info=network_info)
1466
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1467
@checks_instance_lock
1468
@wrap_instance_fault
1469
def pause_instance(self, context, instance_uuid):
1470
"""Pause an instance on this host."""
1471
LOG.audit(_('instance %s: pausing'), instance_uuid, context=context)
1472
context = context.elevated()
1474
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1475
self.driver.pause(instance_ref)
1477
current_power_state = self._get_power_state(context, instance_ref)
1478
self._instance_update(context,
1480
power_state=current_power_state,
1481
vm_state=vm_states.PAUSED,
1484
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1485
@checks_instance_lock
1486
@wrap_instance_fault
1487
def unpause_instance(self, context, instance_uuid):
1488
"""Unpause a paused instance on this host."""
1489
LOG.audit(_('instance %s: unpausing'), instance_uuid, context=context)
1490
context = context.elevated()
1492
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1493
self.driver.unpause(instance_ref)
1495
current_power_state = self._get_power_state(context, instance_ref)
1496
self._instance_update(context,
1498
power_state=current_power_state,
1499
vm_state=vm_states.ACTIVE,
1502
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1503
def host_power_action(self, context, host=None, action=None):
1504
"""Reboots, shuts down or powers up the host."""
1505
return self.driver.host_power_action(host, action)
1507
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1508
def host_maintenance_mode(self, context, host, mode):
1509
"""Start/Stop host maintenance window. On start, it triggers
1510
guest VMs evacuation."""
1511
return self.driver.host_maintenance_mode(host, mode)
1513
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1514
def set_host_enabled(self, context, host=None, enabled=None):
1515
"""Sets the specified host's ability to accept new instances."""
1516
return self.driver.set_host_enabled(host, enabled)
1518
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1519
@wrap_instance_fault
1520
def get_diagnostics(self, context, instance_uuid):
1521
"""Retrieve diagnostics for an instance on this host."""
1522
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1523
current_power_state = self._get_power_state(context, instance_ref)
1524
if current_power_state == power_state.RUNNING:
1525
LOG.audit(_("instance %s: retrieving diagnostics"), instance_uuid,
1527
return self.driver.get_diagnostics(instance_ref)
1529
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1530
@checks_instance_lock
1531
@wrap_instance_fault
1532
def suspend_instance(self, context, instance_uuid):
1533
"""Suspend the given instance."""
1534
LOG.audit(_('instance %s: suspending'), instance_uuid, context=context)
1535
context = context.elevated()
1537
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1538
self.driver.suspend(instance_ref)
1540
current_power_state = self._get_power_state(context, instance_ref)
1541
self._instance_update(context,
1543
power_state=current_power_state,
1544
vm_state=vm_states.SUSPENDED,
1547
usage_info = utils.usage_from_instance(instance_ref)
1548
notifier.notify('compute.%s' % self.host, 'compute.instance.suspend',
1549
notifier.INFO, usage_info)
1551
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1552
@checks_instance_lock
1553
@wrap_instance_fault
1554
def resume_instance(self, context, instance_uuid):
1555
"""Resume the given suspended instance."""
1556
LOG.audit(_('instance %s: resuming'), instance_uuid, context=context)
1557
context = context.elevated()
1559
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1560
self.driver.resume(instance_ref)
1562
current_power_state = self._get_power_state(context, instance_ref)
1563
self._instance_update(context,
1565
power_state=current_power_state,
1566
vm_state=vm_states.ACTIVE,
1569
usage_info = utils.usage_from_instance(instance_ref)
1570
notifier.notify('compute.%s' % self.host, 'compute.instance.resume',
1571
notifier.INFO, usage_info)
1573
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1574
@wrap_instance_fault
1575
def lock_instance(self, context, instance_uuid):
1576
"""Lock the given instance."""
1577
context = context.elevated()
1579
LOG.debug(_('instance %s: locking'), instance_uuid, context=context)
1580
self._instance_update(context, instance_uuid, locked=True)
1582
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1583
@wrap_instance_fault
1584
def unlock_instance(self, context, instance_uuid):
1585
"""Unlock the given instance."""
1586
context = context.elevated()
1588
LOG.debug(_('instance %s: unlocking'), instance_uuid, context=context)
1589
self._instance_update(context, instance_uuid, locked=False)
1591
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1592
@wrap_instance_fault
1593
def get_lock(self, context, instance_uuid):
1594
"""Return the boolean state of the given instance's lock."""
1595
context = context.elevated()
1596
LOG.debug(_('instance %s: getting locked state'), instance_uuid,
1598
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1599
return instance_ref['locked']
1601
@checks_instance_lock
1602
@wrap_instance_fault
1603
def reset_network(self, context, instance_uuid):
1604
"""Reset networking on the given instance."""
1605
instance = self.db.instance_get_by_uuid(context, instance_uuid)
1606
LOG.debug(_('instance %s: reset network'), instance_uuid,
1608
self.driver.reset_network(instance)
1610
@checks_instance_lock
1611
@wrap_instance_fault
1612
def inject_network_info(self, context, instance_uuid):
1613
"""Inject network info for the given instance."""
1614
LOG.debug(_('instance %s: inject network info'), instance_uuid,
1616
instance = self.db.instance_get_by_uuid(context, instance_uuid)
1617
network_info = self._get_instance_nw_info(context, instance)
1618
LOG.debug(_("network_info to inject: |%s|"), network_info)
1620
self.driver.inject_network_info(instance,
1621
self._legacy_nw_info(network_info))
1624
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1625
@wrap_instance_fault
1626
def get_console_output(self, context, instance_uuid, tail_length=None):
1627
"""Send the console output for the given instance."""
1628
context = context.elevated()
1629
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1630
LOG.audit(_("Get console output for instance %s"), instance_uuid,
1632
output = self.driver.get_console_output(instance_ref)
1634
if tail_length is not None:
1635
output = self._tail_log(output, tail_length)
1637
return output.decode('utf-8', 'replace').encode('ascii', 'replace')
1639
def _tail_log(self, log, length):
1641
length = int(length)
1648
return '\n'.join(log.split('\n')[-int(length):])
1650
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1651
@wrap_instance_fault
1652
def get_vnc_console(self, context, instance_uuid, console_type):
1653
"""Return connection information for a vnc console."""
1654
context = context.elevated()
1655
LOG.debug(_("instance %s: getting vnc console"), instance_uuid)
1656
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1658
token = str(utils.gen_uuid())
1660
if console_type == 'novnc':
1661
# For essex, novncproxy_base_url must include the full path
1662
# including the html file (like http://myhost/vnc_auto.html)
1663
access_url = '%s?token=%s' % (FLAGS.novncproxy_base_url, token)
1664
elif console_type == 'xvpvnc':
1665
access_url = '%s?token=%s' % (FLAGS.xvpvncproxy_base_url, token)
1667
raise exception.ConsoleTypeInvalid(console_type=console_type)
1669
# Retrieve connect info from driver, and then decorate with our
1671
connect_info = self.driver.get_vnc_console(instance_ref)
1672
connect_info['token'] = token
1673
connect_info['access_url'] = access_url
1677
def _attach_volume_boot(self, context, instance, volume, mountpoint):
1678
"""Attach a volume to an instance at boot time. So actual attach
1679
is done by instance creation"""
1681
instance_id = instance['id']
1682
instance_uuid = instance['uuid']
1683
volume_id = volume['id']
1684
context = context.elevated()
1685
LOG.audit(_('Booting with volume %(volume_id)s at %(mountpoint)s'),
1686
locals(), context=context, instance=instance)
1687
connector = self.driver.get_volume_connector(instance)
1688
connection_info = self.volume_api.initialize_connection(context,
1691
self.volume_api.attach(context, volume, instance_id, mountpoint)
1692
return connection_info
1694
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1695
@checks_instance_lock
1696
@wrap_instance_fault
1697
def attach_volume(self, context, instance_uuid, volume_id, mountpoint):
1698
"""Attach a volume to an instance."""
1699
volume = self.volume_api.get(context, volume_id)
1700
context = context.elevated()
1701
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1702
instance_id = instance_ref['id']
1703
msg = _("instance %(instance_uuid)s: attaching volume %(volume_id)s"
1704
" to %(mountpoint)s")
1705
LOG.audit(_('Attaching volume %(volume_id)s to %(mountpoint)s'),
1706
locals(), context=context, instance=instance_ref)
1708
connector = self.driver.get_volume_connector(instance_ref)
1709
connection_info = self.volume_api.initialize_connection(context,
1712
except Exception: # pylint: disable=W0702
1713
with utils.save_and_reraise_exception():
1714
msg = _("instance %(instance_uuid)s: attach failed"
1715
" %(mountpoint)s, removing")
1716
LOG.exception(msg % locals(), context=context)
1717
self.volume_api.unreserve_volume(context, volume)
1719
self.driver.attach_volume(connection_info,
1720
instance_ref['name'],
1722
except Exception: # pylint: disable=W0702
1723
with utils.save_and_reraise_exception():
1724
LOG.exception(_('Attach failed %(mountpoint)s, removing'),
1725
locals(), context=context,
1726
instance=instance_ref)
1727
self.volume_api.terminate_connection(context,
1731
self.volume_api.attach(context, volume, instance_id, mountpoint)
1733
'instance_id': instance_id,
1734
'connection_info': utils.dumps(connection_info),
1735
'device_name': mountpoint,
1736
'delete_on_termination': False,
1737
'virtual_name': None,
1738
'snapshot_id': None,
1739
'volume_id': volume_id,
1740
'volume_size': None,
1742
self.db.block_device_mapping_create(context, values)
1745
def _detach_volume(self, context, instance, bdm):
1746
"""Do the actual driver detach using block device mapping."""
1747
instance_name = instance['name']
1748
instance_uuid = instance['uuid']
1749
mp = bdm['device_name']
1750
volume_id = bdm['volume_id']
1752
LOG.audit(_('Detach volume %(volume_id)s from mountpoint %(mp)s'),
1753
locals(), context=context, instance=instance)
1755
if instance_name not in self.driver.list_instances():
1756
LOG.warn(_('Detaching volume from unknown instance %s'),
1757
instance_uuid, context=context)
1758
self.driver.detach_volume(utils.loads(bdm['connection_info']),
1762
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1763
@checks_instance_lock
1764
@wrap_instance_fault
1765
def detach_volume(self, context, instance_uuid, volume_id):
1766
"""Detach a volume from an instance."""
1767
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1768
instance_id = instance_ref['id']
1769
bdm = self._get_instance_volume_bdm(context, instance_id, volume_id)
1770
self._detach_volume(context, instance_ref, bdm)
1771
volume = self.volume_api.get(context, volume_id)
1772
connector = self.driver.get_volume_connector(instance_ref)
1773
self.volume_api.terminate_connection(context, volume, connector)
1774
self.volume_api.detach(context.elevated(), volume)
1775
self.db.block_device_mapping_destroy_by_instance_and_volume(
1776
context, instance_id, volume_id)
1779
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1780
def remove_volume_connection(self, context, instance_id, volume_id):
1781
"""Remove a volume connection using the volume api"""
1782
# NOTE(vish): We don't want to actually mark the volume
1783
# detached, or delete the bdm, just remove the
1784
# connection from this host.
1786
instance_ref = self.db.instance_get(context, instance_id)
1787
bdm = self._get_instance_volume_bdm(context,
1790
self._detach_volume(context, instance_ref, bdm)
1791
volume = self.volume_api.get(context, volume_id)
1792
connector = self.driver.get_volume_connector(instance_ref)
1793
self.volume_api.terminate_connection(context, volume, connector)
1794
except exception.NotFound:
1797
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1798
def compare_cpu(self, context, cpu_info):
1799
"""Checks that the host cpu is compatible with a cpu given by xml.
1801
:param context: security context
1802
:param cpu_info: json string obtained from virConnect.getCapabilities
1803
:returns: See driver.compare_cpu
1806
return self.driver.compare_cpu(cpu_info)
1808
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1809
def create_shared_storage_test_file(self, context):
1810
"""Makes tmpfile under FLAGS.instance_path.
1812
This method enables compute nodes to recognize that they mounts
1813
same shared storage. (create|check|creanup)_shared_storage_test_file()
1816
:param context: security context
1817
:returns: tmpfile name(basename)
1820
dirpath = FLAGS.instances_path
1821
fd, tmp_file = tempfile.mkstemp(dir=dirpath)
1822
LOG.debug(_("Creating tmpfile %s to notify to other "
1823
"compute nodes that they should mount "
1824
"the same storage.") % tmp_file)
1826
return os.path.basename(tmp_file)
1828
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1829
def check_shared_storage_test_file(self, context, filename):
1830
"""Confirms existence of the tmpfile under FLAGS.instances_path.
1831
Cannot confirm tmpfile return False.
1833
:param context: security context
1834
:param filename: confirm existence of FLAGS.instances_path/thisfile
1837
tmp_file = os.path.join(FLAGS.instances_path, filename)
1838
if not os.path.exists(tmp_file):
1843
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1844
def cleanup_shared_storage_test_file(self, context, filename):
1845
"""Removes existence of the tmpfile under FLAGS.instances_path.
1847
:param context: security context
1848
:param filename: remove existence of FLAGS.instances_path/thisfile
1851
tmp_file = os.path.join(FLAGS.instances_path, filename)
1854
def get_instance_disk_info(self, context, instance_name):
1855
"""Getting infomation of instance's current disk.
1857
Implementation nova.virt.libvirt.connection.
1859
:param context: security context
1860
:param instance_name: instance name
1863
return self.driver.get_instance_disk_info(instance_name)
1865
def pre_live_migration(self, context, instance_id, time=None,
1866
block_migration=False, disk=None):
1867
"""Preparations for live migration at dest host.
1869
:param context: security context
1870
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
1871
:param block_migration: if true, prepare for block migration
1877
# Getting instance info
1878
instance_ref = self.db.instance_get(context, instance_id)
1880
# If any volume is mounted, prepare here.
1881
block_device_info = self._get_instance_volume_block_device_info(
1882
context, instance_id)
1883
if not block_device_info['block_device_mapping']:
1884
LOG.info(_('Instance has no volume.'), instance=instance_ref)
1886
self.driver.pre_live_migration(block_device_info)
1888
# NOTE(tr3buchet): setup networks on destination host
1889
self.network_api.setup_networks_on_host(context, instance_ref,
1893
# Call this method prior to ensure_filtering_rules_for_instance,
1894
# since bridge is not set up, ensure_filtering_rules_for instance
1897
# Retry operation is necessary because continuously request comes,
1898
# concorrent request occurs to iptables, then it complains.
1899
network_info = self._get_instance_nw_info(context, instance_ref)
1901
# TODO(tr3buchet): figure out how on the earth this is necessary
1902
fixed_ips = network_info.fixed_ips()
1904
raise exception.FixedIpNotFoundForInstance(instance_id=instance_id)
1906
max_retry = FLAGS.live_migration_retry_count
1907
for cnt in range(max_retry):
1909
self.driver.plug_vifs(instance_ref,
1910
self._legacy_nw_info(network_info))
1912
except exception.ProcessExecutionError:
1913
if cnt == max_retry - 1:
1916
LOG.warn(_("plug_vifs() failed %(cnt)d."
1917
"Retry up to %(max_retry)d for %(hostname)s.")
1921
# Creating filters to hypervisors and firewalls.
1922
# An example is that nova-instance-instance-xxx,
1923
# which is written to libvirt.xml(Check "virsh nwfilter-list")
1924
# This nwfilter is necessary on the destination host.
1925
# In addition, this method is creating filtering rule
1926
# onto destination host.
1927
self.driver.ensure_filtering_rules_for_instance(instance_ref,
1928
self._legacy_nw_info(network_info))
1930
# Preparation for block migration
1932
self.driver.pre_block_migration(context,
1936
def live_migration(self, context, instance_id,
1937
dest, block_migration=False):
1938
"""Executing live migration.
1940
:param context: security context
1941
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
1942
:param dest: destination host
1943
:param block_migration: if true, prepare for block migration
1946
# Get instance for error handling.
1947
instance_ref = self.db.instance_get(context, instance_id)
1950
# Checking volume node is working correctly when any volumes
1951
# are attached to instances.
1952
if self._get_instance_volume_bdms(context, instance_id):
1955
{'method': 'check_for_export',
1956
'args': {'instance_id': instance_id}})
1959
disk = self.driver.get_instance_disk_info(instance_ref.name)
1964
self.db.queue_get_for(context, FLAGS.compute_topic, dest),
1965
{'method': 'pre_live_migration',
1966
'args': {'instance_id': instance_id,
1967
'block_migration': block_migration,
1971
with utils.save_and_reraise_exception():
1972
instance_uuid = instance_ref['uuid']
1973
LOG.exception(_('Pre live migration failed at %(dest)s'),
1974
locals(), instance=instance_ref)
1975
self.rollback_live_migration(context, instance_ref, dest,
1978
# Executing live migration
1979
# live_migration might raises exceptions, but
1980
# nothing must be recovered in this version.
1981
self.driver.live_migration(context, instance_ref, dest,
1982
self.post_live_migration,
1983
self.rollback_live_migration,
1986
def post_live_migration(self, ctxt, instance_ref,
1987
dest, block_migration=False):
1988
"""Post operations for live migration.
1990
This method is called from live_migration
1991
and mainly updating database record.
1993
:param ctxt: security context
1994
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
1995
:param dest: destination host
1996
:param block_migration: if true, prepare for block migration
2000
LOG.info(_('post_live_migration() is started..'))
2001
instance_id = instance_ref['id']
2002
instance_uuid = instance_ref['uuid']
2004
# Detaching volumes.
2005
for bdm in self._get_instance_volume_bdms(ctxt, instance_id):
2006
# NOTE(vish): We don't want to actually mark the volume
2007
# detached, or delete the bdm, just remove the
2008
# connection from this host.
2009
self.remove_volume_connection(ctxt, instance_id,
2013
# (not necessary in current implementation?)
2015
network_info = self._get_instance_nw_info(ctxt, instance_ref)
2016
# Releasing security group ingress rule.
2017
self.driver.unfilter_instance(instance_ref,
2018
self._legacy_nw_info(network_info))
2020
# Database updating.
2021
# NOTE(jkoelker) This needs to be converted to network api calls
2022
# if nova wants to support floating_ips in
2025
# Not return if floating_ip is not found, otherwise,
2026
# instance never be accessible..
2027
floating_ip = self.db.instance_get_floating_address(ctxt,
2030
LOG.info(_('No floating_ip found'), instance=instance_ref)
2032
floating_ip_ref = self.db.floating_ip_get_by_address(ctxt,
2034
self.db.floating_ip_update(ctxt,
2035
floating_ip_ref['address'],
2037
except exception.NotFound:
2038
LOG.info(_('No floating_ip found.'), instance=instance_ref)
2039
except Exception, e:
2040
LOG.error(_('Live migration: Unexpected error: cannot inherit '
2041
'floating ip.\n%(e)s'), locals(),
2042
instance=instance_ref)
2044
# Define domain at destination host, without doing it,
2045
# pause/suspend/terminate do not work.
2047
self.db.queue_get_for(ctxt, FLAGS.compute_topic, dest),
2048
{"method": "post_live_migration_at_destination",
2049
"args": {'instance_id': instance_ref['id'],
2050
'block_migration': block_migration}})
2052
# Restore volume state
2053
for volume_ref in instance_ref['volumes']:
2054
self.volume_api.update(ctxt, volume_ref, {'status': 'in-use'})
2056
# No instance booting at source host, but instance dir
2057
# must be deleted for preparing next block migration
2059
self.driver.destroy(instance_ref,
2060
self._legacy_nw_info(network_info))
2062
# self.driver.destroy() usually performs vif unplugging
2063
# but we must do it explicitly here when block_migration
2064
# is false, as the network devices at the source must be
2066
self.driver.unplug_vifs(instance_ref,
2067
self._legacy_nw_info(network_info))
2069
# NOTE(tr3buchet): tear down networks on source host
2070
self.network_api.setup_networks_on_host(ctxt, instance_ref,
2071
self.host, teardown=True)
2073
LOG.info(_('Migrating instance to %(dest)s finished successfully.'),
2074
locals(), instance=instance_ref)
2075
LOG.info(_("You may see the error \"libvirt: QEMU error: "
2076
"Domain not found: no domain with matching name.\" "
2077
"This error can be safely ignored."),
2078
instance=instance_ref)
2080
def post_live_migration_at_destination(self, context,
2081
instance_id, block_migration=False):
2082
"""Post operations for live migration .
2084
:param context: security context
2085
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
2086
:param block_migration: if true, prepare for block migration
2089
instance_ref = self.db.instance_get(context, instance_id)
2090
LOG.info(_('Post operation of migraton started'),
2091
instance=instance_ref)
2093
# NOTE(tr3buchet): setup networks on destination host
2094
# this is called a second time because
2095
# multi_host does not create the bridge in
2097
self.network_api.setup_networks_on_host(context, instance_ref,
2100
network_info = self._get_instance_nw_info(context, instance_ref)
2101
self.driver.post_live_migration_at_destination(context, instance_ref,
2102
self._legacy_nw_info(network_info),
2104
# Restore instance state
2105
current_power_state = self._get_power_state(context, instance_ref)
2106
self._instance_update(context,
2109
power_state=current_power_state,
2110
vm_state=vm_states.ACTIVE,
2113
# NOTE(vish): this is necessary to update dhcp
2114
self.network_api.setup_networks_on_host(context,
2118
def rollback_live_migration(self, context, instance_ref,
2119
dest, block_migration):
2120
"""Recovers Instance/volume state from migrating -> running.
2122
:param context: security context
2123
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
2125
This method is called from live migration src host.
2126
This param specifies destination host.
2127
:param block_migration: if true, prepare for block migration
2130
host = instance_ref['host']
2131
self._instance_update(context,
2134
vm_state=vm_states.ACTIVE,
2137
# NOTE(tr3buchet): setup networks on source host (really it's re-setup)
2138
self.network_api.setup_networks_on_host(context, instance_ref,
2141
for bdm in self._get_instance_volume_bdms(context, instance_ref['id']):
2142
volume_id = bdm['volume_id']
2143
volume = self.volume_api.get(context, volume_id)
2144
self.volume_api.update(context, volume, {'status': 'in-use'})
2145
self.volume_api.remove_from_compute(context,
2150
# Block migration needs empty image at destination host
2151
# before migration starts, so if any failure occurs,
2152
# any empty images has to be deleted.
2155
self.db.queue_get_for(context, FLAGS.compute_topic, dest),
2156
{"method": "rollback_live_migration_at_destination",
2157
"args": {'instance_id': instance_ref['id']}})
2159
def rollback_live_migration_at_destination(self, context, instance_id):
2160
""" Cleaning up image directory that is created pre_live_migration.
2162
:param context: security context
2163
:param instance_id: nova.db.sqlalchemy.models.Instance.Id
2165
instance_ref = self.db.instance_get(context, instance_id)
2166
network_info = self._get_instance_nw_info(context, instance_ref)
2168
# NOTE(tr3buchet): tear down networks on destination host
2169
self.network_api.setup_networks_on_host(context, instance_ref,
2170
self.host, teardown=True)
2172
# NOTE(vish): The mapping is passed in so the driver can disconnect
2173
# from remote volumes if necessary
2174
block_device_info = self._get_instance_volume_block_device_info(
2175
context, instance_id)
2176
self.driver.destroy(instance_ref, self._legacy_nw_info(network_info),
2179
@manager.periodic_task
2180
def _heal_instance_info_cache(self, context):
2181
"""Called periodically. On every call, try to update the
2182
info_cache's network information for another instance by
2183
calling to the network manager.
2185
This is implemented by keeping a cache of uuids of instances
2186
that live on this host. On each call, we pop one off of a
2187
list, pull the DB record, and try the call to the network API.
2188
If anything errors, we don't care. It's possible the instance
2189
has been deleted, etc.
2191
heal_interval = FLAGS.heal_instance_info_cache_interval
2192
if not heal_interval:
2194
curr_time = time.time()
2195
if self._last_info_cache_heal + heal_interval > curr_time:
2197
self._last_info_cache_heal = curr_time
2199
instance_uuids = getattr(self, '_instance_uuids_to_heal', None)
2202
while not instance or instance['host'] != self.host:
2205
instance = self.db.instance_get_by_uuid(context,
2206
instance_uuids.pop(0))
2207
except exception.InstanceNotFound:
2208
# Instance is gone. Try to grab another.
2211
# No more in our copy of uuids. Pull from the DB.
2212
db_instances = self.db.instance_get_all_by_host(
2214
if not db_instances:
2215
# None.. just return.
2217
instance = db_instances.pop(0)
2218
instance_uuids = [inst['uuid'] for inst in db_instances]
2219
self._instance_uuids_to_heal = instance_uuids
2221
# We have an instance now and it's ours
2223
# Call to network API to get instance info.. this will
2224
# force an update to the instance's info_cache
2225
self.network_api.get_instance_nw_info(context, instance)
2226
LOG.debug(_("Updated the info_cache for instance %s") %
2229
# We don't care about any failures
2232
@manager.periodic_task
2233
def _poll_rebooting_instances(self, context):
2234
if FLAGS.reboot_timeout > 0:
2235
self.driver.poll_rebooting_instances(FLAGS.reboot_timeout)
2237
@manager.periodic_task
2238
def _poll_rescued_instances(self, context):
2239
if FLAGS.rescue_timeout > 0:
2240
self.driver.poll_rescued_instances(FLAGS.rescue_timeout)
2242
@manager.periodic_task
2243
def _poll_unconfirmed_resizes(self, context):
2244
if FLAGS.resize_confirm_window > 0:
2245
self.driver.poll_unconfirmed_resizes(FLAGS.resize_confirm_window)
2247
@manager.periodic_task
2248
def _poll_bandwidth_usage(self, context, start_time=None, stop_time=None):
2250
start_time = utils.current_audit_period()[1]
2252
curr_time = time.time()
2253
if curr_time - self._last_bw_usage_poll > FLAGS.bandwith_poll_interval:
2254
self._last_bw_usage_poll = curr_time
2255
LOG.info(_("Updating bandwidth usage cache"))
2258
bw_usage = self.driver.get_all_bw_usage(start_time, stop_time)
2259
except NotImplementedError:
2260
# NOTE(mdragon): Not all hypervisors have bandwidth polling
2261
# implemented yet. If they don't it doesn't break anything,
2262
# they just don't get the info in the usage events.
2265
for usage in bw_usage:
2266
mac = usage['mac_address']
2267
self.db.bw_usage_update(context,
2270
usage['bw_in'], usage['bw_out'])
2272
@manager.periodic_task
2273
def _report_driver_status(self, context):
2274
curr_time = time.time()
2275
if curr_time - self._last_host_check > FLAGS.host_state_interval:
2276
self._last_host_check = curr_time
2277
LOG.info(_("Updating host status"))
2278
# This will grab info about the host and queue it
2279
# to be sent to the Schedulers.
2280
self.update_service_capabilities(
2281
self.driver.get_host_stats(refresh=True))
2283
@manager.periodic_task(ticks_between_runs=10)
2284
def _sync_power_states(self, context):
2285
"""Align power states between the database and the hypervisor.
2287
The hypervisor is authoritative for the power_state data, but we don't
2288
want to do an expensive call to the virt driver's list_instances_detail
2289
method. Instead, we do a less-expensive call to get the number of
2290
virtual machines known by the hypervisor and if the number matches the
2291
number of virtual machines known by the database, we proceed in a lazy
2292
loop, one database record at a time, checking if the hypervisor has the
2293
same power state as is in the database. We call eventlet.sleep(0) after
2294
each loop to allow the periodic task eventlet to do other work.
2296
If the instance is not found on the hypervisor, but is in the database,
2297
then it will be set to power_state.NOSTATE.
2299
db_instances = self.db.instance_get_all_by_host(context, self.host)
2301
num_vm_instances = self.driver.get_num_instances()
2302
num_db_instances = len(db_instances)
2304
if num_vm_instances != num_db_instances:
2305
LOG.warn(_("Found %(num_db_instances)s in the database and "
2306
"%(num_vm_instances)s on the hypervisor.") % locals())
2308
for db_instance in db_instances:
2309
# Allow other periodic tasks to do some work...
2310
greenthread.sleep(0)
2311
db_power_state = db_instance['power_state']
2313
vm_instance = self.driver.get_info(db_instance)
2314
vm_power_state = vm_instance['state']
2315
except exception.InstanceNotFound:
2316
# This exception might have been caused by a race condition
2317
# between _sync_power_states and live migrations. Two cases
2318
# are possible as documented below. To this aim, refresh the
2319
# DB instance state.
2321
u = self.db.instance_get_by_uuid(context,
2322
db_instance['uuid'])
2323
if self.host != u['host']:
2324
# on the sending end of nova-compute _sync_power_state
2325
# may have yielded to the greenthread performing a live
2326
# migration; this in turn has changed the resident-host
2327
# for the VM; However, the instance is still active, it
2328
# is just in the process of migrating to another host.
2329
# This implies that the compute source must relinquish
2330
# control to the compute destination.
2331
LOG.info(_("During the sync_power process the "
2332
"instance %(uuid)s has moved from "
2333
"host %(src)s to host %(dst)s") %
2334
{'uuid': db_instance['uuid'],
2337
elif (u['host'] == self.host and
2338
u['vm_state'] == vm_states.MIGRATING):
2339
# on the receiving end of nova-compute, it could happen
2340
# that the DB instance already report the new resident
2341
# but the actual VM has not showed up on the hypervisor
2342
# yet. In this case, let's allow the loop to continue
2343
# and run the state sync in a later round
2344
LOG.info(_("Instance %s is in the process of "
2345
"migrating to this host. Wait next "
2346
"sync_power cycle before setting "
2347
"power state to NOSTATE")
2348
% db_instance['uuid'])
2350
LOG.warn(_("Instance found in database but not "
2351
"known by hypervisor. Setting power "
2352
"state to NOSTATE"), locals(),
2353
instance=db_instance)
2354
vm_power_state = power_state.NOSTATE
2355
except exception.InstanceNotFound:
2356
# no need to update vm_state for deleted instances
2359
if vm_power_state == db_power_state:
2362
if (vm_power_state in (power_state.NOSTATE,
2363
power_state.SHUTOFF,
2364
power_state.SHUTDOWN,
2365
power_state.CRASHED)
2366
and db_instance['vm_state'] == vm_states.ACTIVE):
2367
self._instance_update(context,
2369
power_state=vm_power_state,
2370
vm_state=vm_states.SHUTOFF)
2372
self._instance_update(context,
2374
power_state=vm_power_state)
2376
@manager.periodic_task
2377
def _reclaim_queued_deletes(self, context):
2378
"""Reclaim instances that are queued for deletion."""
2379
if FLAGS.reclaim_instance_interval <= 0:
2380
LOG.debug(_("FLAGS.reclaim_instance_interval <= 0, skipping..."))
2383
instances = self.db.instance_get_all_by_host(context, self.host)
2384
for instance in instances:
2385
old_enough = (not instance.deleted_at or utils.is_older_than(
2386
instance.deleted_at,
2387
FLAGS.reclaim_instance_interval))
2388
soft_deleted = instance.vm_state == vm_states.SOFT_DELETE
2390
if soft_deleted and old_enough:
2391
instance_uuid = instance['uuid']
2392
LOG.info(_('Reclaiming deleted instance'), instance=instance)
2393
self._delete_instance(context, instance)
2395
@manager.periodic_task
2396
def update_available_resource(self, context):
2397
"""See driver.update_available_resource()
2399
:param context: security context
2400
:returns: See driver.update_available_resource()
2403
self.driver.update_available_resource(context, self.host)
2405
def add_instance_fault_from_exc(self, context, instance_uuid, fault,
2407
"""Adds the specified fault to the database."""
2410
if hasattr(fault, "kwargs"):
2411
code = fault.kwargs.get('code', 500)
2413
details = unicode(fault)
2414
if exc_info and code == 500:
2416
details += '\n' + ''.join(traceback.format_tb(tb))
2419
'instance_uuid': instance_uuid,
2421
'message': fault.__class__.__name__,
2422
'details': unicode(details),
2424
self.db.instance_fault_create(context, values)
2426
@manager.periodic_task(
2427
ticks_between_runs=FLAGS.running_deleted_instance_poll_interval)
2428
def _cleanup_running_deleted_instances(self, context):
2429
"""Cleanup any instances which are erroneously still running after
2430
having been deleted.
2432
Valid actions to take are:
2434
1. noop - do nothing
2435
2. log - log which instances are erroneously running
2436
3. reap - shutdown and cleanup any erroneously running instances
2438
The use-case for this cleanup task is: for various reasons, it may be
2439
possible for the database to show an instance as deleted but for that
2440
instance to still be running on a host machine (see bug
2441
https://bugs.launchpad.net/nova/+bug/911366).
2443
This cleanup task is a cross-hypervisor utility for finding these
2444
zombied instances and either logging the discrepancy (likely what you
2445
should do in production), or automatically reaping the instances (more
2446
appropriate for dev environments).
2448
action = FLAGS.running_deleted_instance_action
2450
if action == "noop":
2453
# NOTE(sirp): admin contexts don't ordinarily return deleted records
2454
with utils.temporary_mutation(context, read_deleted="yes"):
2455
for instance in self._running_deleted_instances(context):
2457
name = instance['name']
2458
LOG.warning(_("Detected instance with name label "
2459
"'%(name)s' which is marked as "
2460
"DELETED but still present on host."),
2461
locals(), instance=instance)
2463
elif action == 'reap':
2464
name = instance['name']
2465
LOG.info(_("Destroying instance with name label "
2466
"'%(name)s' which is marked as "
2467
"DELETED but still present on host."),
2468
locals(), instance=instance)
2469
self._shutdown_instance(context, instance, 'Terminating')
2470
self._cleanup_volumes(context, instance['id'])
2472
raise Exception(_("Unrecognized value '%(action)s'"
2473
" for FLAGS.running_deleted_"
2474
"instance_action"), locals(),
2477
def _running_deleted_instances(self, context):
2478
"""Returns a list of instances nova thinks is deleted,
2479
but the hypervisor thinks is still running. This method
2480
should be pushed down to the virt layer for efficiency.
2482
def deleted_instance(instance):
2483
present = instance.name in present_name_labels
2484
erroneously_running = instance.deleted and present
2485
old_enough = (not instance.deleted_at or utils.is_older_than(
2486
instance.deleted_at,
2487
FLAGS.running_deleted_instance_timeout))
2488
if erroneously_running and old_enough:
2491
present_name_labels = set(self.driver.list_instances())
2492
instances = self.db.instance_get_all_by_host(context, self.host)
2493
return [i for i in instances if deleted_instance(i)]
2495
@contextlib.contextmanager
2496
def error_out_instance_on_exception(self, context, instance_uuid):
2499
except Exception, error:
2500
with utils.save_and_reraise_exception():
2501
msg = _('%s. Setting instance vm_state to ERROR')
2502
LOG.error(msg % error)
2503
self._set_instance_error_state(context, instance_uuid)
2505
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
2506
def add_aggregate_host(self, context, aggregate_id, host, **kwargs):
2507
"""Adds a host to a physical hypervisor pool."""
2508
aggregate = self.db.aggregate_get(context, aggregate_id)
2510
self.driver.add_to_aggregate(context, aggregate, host, **kwargs)
2511
except exception.AggregateError:
2512
error = sys.exc_info()
2513
self._undo_aggregate_operation(context,
2514
self.db.aggregate_host_delete,
2516
raise error[0], error[1], error[2]
2518
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
2519
def remove_aggregate_host(self, context, aggregate_id, host, **kwargs):
2520
"""Removes a host from a physical hypervisor pool."""
2521
aggregate = self.db.aggregate_get(context, aggregate_id)
2523
self.driver.remove_from_aggregate(context,
2524
aggregate, host, **kwargs)
2525
except (exception.AggregateError,
2526
exception.InvalidAggregateAction) as e:
2527
error = sys.exc_info()
2528
self._undo_aggregate_operation(
2529
context, self.db.aggregate_host_add,
2531
isinstance(e, exception.AggregateError))
2532
raise error[0], error[1], error[2]
2534
def _undo_aggregate_operation(self, context, op, aggregate_id,
2535
host, set_error=True):
2538
status = {'operational_state': aggregate_states.ERROR}
2539
self.db.aggregate_update(context, aggregate_id, status)
2540
op(context, aggregate_id, host)
2542
LOG.exception(_('Aggregate %(aggregate_id)s: unrecoverable state '
2543
'during operation on %(host)s') % locals())
2545
@manager.periodic_task(
2546
ticks_between_runs=FLAGS.image_cache_manager_interval)
2547
def _run_image_cache_manager_pass(self, context):
2548
"""Run a single pass of the image cache manager."""
2550
if FLAGS.image_cache_manager_interval == 0:
2554
self.driver.manage_image_cache(context)
2555
except NotImplementedError: