86
85
" Set to 0 to disable.")
87
86
flags.DEFINE_integer('host_state_interval', 120,
88
87
'Interval in seconds for querying the host status')
88
flags.DEFINE_integer("running_deleted_instance_timeout", 0,
89
"Number of seconds after being deleted when a"
90
" still-running instance should be considered"
91
" eligible for cleanup.")
92
flags.DEFINE_integer("running_deleted_instance_poll_interval", 30,
93
"Number of periodic scheduler ticks to wait between"
94
" runs of the cleanup task.")
95
flags.DEFINE_string("running_deleted_instance_action", "noop",
96
"Action to take if a running deleted instance is"
97
" detected. Valid options are 'noop', 'log', and"
98
" 'reap'. Set to 'noop' to disable.")
90
100
LOG = logging.getLogger('nova.compute.manager')
122
132
return decorated_function
135
def wrap_instance_fault(function):
136
"""Wraps a method to catch exceptions related to instances.
138
This decorator wraps a method to catch any exceptions having to do with
139
an instance that may get thrown. It then logs an instance fault in the db.
141
@functools.wraps(function)
142
def decorated_function(self, context, instance_uuid, *args, **kwargs):
144
return function(self, context, instance_uuid, *args, **kwargs)
145
except exception.InstanceNotFound:
148
with utils.save_and_reraise_exception():
149
self.add_instance_fault_from_exc(context, instance_uuid, e)
151
return decorated_function
125
154
def _get_image_meta(context, image_ref):
126
155
image_service, image_id = nova.image.get_image_service(context, image_ref)
127
156
return image_service.show(context, image_id)
162
191
context = nova.context.get_admin_context()
163
192
instances = self.db.instance_get_all_by_host(context, self.host)
164
193
for instance in instances:
165
inst_name = instance['name']
194
instance_uuid = instance['uuid']
166
195
db_state = instance['power_state']
167
196
drv_state = self._get_power_state(context, instance)
169
198
expect_running = db_state == power_state.RUNNING \
170
199
and drv_state != db_state
172
LOG.debug(_('Current state of %(inst_name)s is %(drv_state)s, '
201
LOG.debug(_('Current state of %(instance_uuid)s is %(drv_state)s, '
173
202
'state in DB is %(db_state)s.'), locals())
175
204
if (expect_running and FLAGS.resume_guests_state_on_host_boot)\
176
205
or FLAGS.start_guests_on_host_boot:
177
LOG.info(_('Rebooting instance %(inst_name)s after '
206
LOG.info(_('Rebooting instance %(instance_uuid)s after '
178
207
'nova-compute restart.'), locals())
179
self.reboot_instance(context, instance['id'])
208
self.reboot_instance(context, instance['uuid'])
180
209
elif drv_state == power_state.RUNNING:
181
210
# Hyper-V and VMWareAPI drivers will raise an exception
273
301
if ((bdm['snapshot_id'] is not None) and
274
302
(bdm['volume_id'] is None)):
275
303
# TODO(yamahata): default name and description
276
vol = volume_api.create(context, bdm['volume_size'],
277
bdm['snapshot_id'], '', '')
304
vol = self.volume_api.create(context, bdm['volume_size'],
305
bdm['snapshot_id'], '', '')
278
306
# TODO(yamahata): creating volume simultaneously
279
307
# reduces creation time?
280
volume_api.wait_creation(context, vol['id'])
308
self.volume_api.wait_creation(context, vol['id'])
281
309
self.db.block_device_mapping_update(
282
310
context, bdm['id'], {'volume_id': vol['id']})
283
311
bdm['volume_id'] = vol['id']
285
313
if bdm['volume_id'] is not None:
286
volume_api.check_attach(context,
287
volume_id=bdm['volume_id'])
314
self.volume_api.check_attach(context,
315
volume_id=bdm['volume_id'])
288
316
cinfo = self._attach_volume_boot(context, instance,
289
317
bdm['volume_id'],
290
318
bdm['device_name'])
515
551
block_device_mapping.append({'connection_info': cinfo,
517
553
bdm['device_name']})
518
## NOTE(vish): The mapping is passed in so the driver can disconnect
519
## from remote volumes if necessary
554
# NOTE(vish): The mapping is passed in so the driver can disconnect
555
# from remote volumes if necessary
520
556
return {'block_device_mapping': block_device_mapping}
522
558
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
523
560
def run_instance(self, context, instance_uuid, **kwargs):
524
561
self._run_instance(context, instance_uuid, **kwargs)
526
563
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
527
564
@checks_instance_lock
528
566
def start_instance(self, context, instance_uuid):
529
567
"""Starting an instance on this host."""
530
568
# TODO(yamahata): injected_files isn't supported.
546
584
if not FLAGS.stub_network:
547
585
self.network_api.deallocate_for_instance(context, instance)
549
for bdm in self._get_instance_volume_bdms(context, instance_id):
550
volume_id = bdm['volume_id']
552
self._detach_volume(context, instance_uuid, volume_id)
553
except exception.DiskNotFound as exc:
554
LOG.warn(_("Ignoring DiskNotFound: %s") % exc)
556
587
if instance['power_state'] == power_state.SHUTOFF:
557
588
self.db.instance_destroy(context, instance_id)
558
589
raise exception.Error(_('trying to destroy already destroyed'
559
590
' instance: %s') % instance_uuid)
591
# NOTE(vish) get bdms before destroying the instance
592
bdms = self._get_instance_volume_bdms(context, instance_id)
560
593
block_device_info = self._get_instance_volume_block_device_info(
561
594
context, instance_id)
562
self.driver.destroy(instance, network_info, block_device_info, cleanup)
595
self.driver.destroy(instance, network_info, block_device_info)
598
# NOTE(vish): actual driver detach done in driver.destroy, so
599
# just tell nova-volume that we are done with it.
600
self.volume_api.terminate_connection(context,
603
self.volume_api.detach(context, bdm['volume_id'])
604
except exception.DiskNotFound as exc:
605
LOG.warn(_("Ignoring DiskNotFound: %s") % exc)
564
607
def _cleanup_volumes(self, context, instance_id):
565
volume_api = volume.API()
566
608
bdms = self.db.block_device_mapping_get_all_by_instance(context,
569
611
LOG.debug(_("terminating bdm %s") % bdm)
570
612
if bdm['volume_id'] and bdm['delete_on_termination']:
571
volume_api.delete(context, bdm['volume_id'])
613
self.volume_api.delete(context, bdm['volume_id'])
572
614
# NOTE(vish): bdms will be deleted on instance destroy
574
616
def _delete_instance(self, context, instance):
575
617
"""Delete an instance on this host."""
576
618
instance_id = instance['id']
577
self._shutdown_instance(context, instance, 'Terminating', True)
619
self._shutdown_instance(context, instance, 'Terminating')
578
620
self._cleanup_volumes(context, instance_id)
579
621
self._instance_update(context,
601
644
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
602
645
@checks_instance_lock
603
647
def stop_instance(self, context, instance_uuid):
604
648
"""Stopping an instance on this host."""
605
# FIXME(vish): I've kept the files during stop instance, but
606
# I think start will fail due to the files still
607
# existing. I don't really know what the purpose of
608
# stop and start are when compared to pause and unpause
609
649
instance = self.db.instance_get_by_uuid(context, instance_uuid)
610
self._shutdown_instance(context, instance, 'Stopping', False)
650
self._shutdown_instance(context, instance, 'Stopping')
611
651
self._instance_update(context,
613
653
vm_state=vm_states.STOPPED,
883
930
# Catch all here because this could be anything.
885
932
if i == max_tries - 1:
886
# At some point this exception may make it back
887
# to the API caller, and we don't want to reveal
888
# too much. The real exception is logged above
889
933
self._instance_update(context,
891
936
vm_state=vm_states.ERROR)
892
raise exception.Error(_('Internal error'))
937
# We create a new exception here so that we won't
938
# potentially reveal password information to the
939
# API caller. The real exception is logged above
940
_msg = _('Error setting admin password')
941
raise exception.Error(_msg)
896
945
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
897
946
@checks_instance_lock
898
948
def inject_file(self, context, instance_uuid, path, file_contents):
899
949
"""Write a file to the specified path in an instance on this host."""
900
950
context = context.elevated()
905
955
LOG.warn(_('trying to inject a file into a non-running '
906
956
'instance: %(instance_uuid)s (state: %(instance_state)s '
907
957
'expected: %(expected_state)s)') % locals())
908
nm = instance_ref['name']
909
msg = _('instance %(nm)s: injecting file to %(path)s') % locals()
958
instance_uuid = instance_ref['uuid']
959
msg = _('instance %(instance_uuid)s: injecting file to %(path)s')
960
LOG.audit(msg % locals())
911
961
self.driver.inject_file(instance_ref, path, file_contents)
913
963
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
914
964
@checks_instance_lock
915
966
def agent_update(self, context, instance_uuid, url, md5hash):
916
967
"""Update agent running on an instance on this host."""
917
968
context = context.elevated()
922
973
LOG.warn(_('trying to update agent on a non-running '
923
974
'instance: %(instance_uuid)s (state: %(instance_state)s '
924
975
'expected: %(expected_state)s)') % locals())
925
nm = instance_ref['name']
926
msg = _('instance %(nm)s: updating agent to %(url)s') % locals()
976
instance_uuid = instance_ref['uuid']
977
msg = _('instance %(instance_uuid)s: updating agent to %(url)s')
978
LOG.audit(msg % locals())
928
979
self.driver.agent_update(instance_ref, url, md5hash)
930
981
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
931
982
@checks_instance_lock
932
984
def rescue_instance(self, context, instance_uuid, **kwargs):
934
986
Rescue an instance on this host.
1365
1444
self.driver.inject_network_info(instance, network_info)
1367
1446
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1368
def get_console_output(self, context, instance_uuid):
1447
@wrap_instance_fault
1448
def get_console_output(self, context, instance_uuid, tail_length=None):
1369
1449
"""Send the console output for the given instance."""
1370
1450
context = context.elevated()
1371
1451
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1372
1452
LOG.audit(_("Get console output for instance %s"), instance_uuid,
1373
1453
context=context)
1374
1454
output = self.driver.get_console_output(instance_ref)
1456
if tail_length is not None:
1457
output = self._tail_log(output, tail_length)
1375
1459
return output.decode('utf-8', 'replace').encode('ascii', 'replace')
1461
def _tail_log(self, log, length):
1463
length = int(length)
1470
return '\n'.join(log.split('\n')[-int(length):])
1377
1472
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1473
@wrap_instance_fault
1378
1474
def get_ajax_console(self, context, instance_uuid):
1379
1475
"""Return connection information for an ajax console."""
1380
1476
context = context.elevated()
1395
1492
is done by instance creation"""
1397
1494
instance_id = instance['id']
1495
instance_uuid = instance['uuid']
1398
1496
context = context.elevated()
1399
LOG.audit(_("instance %(instance_id)s: booting with "
1497
LOG.audit(_("instance %(instance_uuid)s: booting with "
1400
1498
"volume %(volume_id)s at %(mountpoint)s") %
1401
1499
locals(), context=context)
1402
1500
address = FLAGS.my_ip
1403
volume_api = volume.API()
1404
connection_info = volume_api.initialize_connection(context,
1407
volume_api.attach(context, volume_id, instance_id, mountpoint)
1501
connection_info = self.volume_api.initialize_connection(context,
1504
self.volume_api.attach(context, volume_id, instance_id, mountpoint)
1408
1505
return connection_info
1507
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1410
1508
@checks_instance_lock
1509
@wrap_instance_fault
1411
1510
def attach_volume(self, context, instance_uuid, volume_id, mountpoint):
1412
1511
"""Attach a volume to an instance."""
1413
1512
context = context.elevated()
1417
1516
_("instance %(instance_uuid)s: attaching volume %(volume_id)s"
1418
1517
" to %(mountpoint)s") % locals(), context=context)
1419
volume_api = volume.API()
1420
1518
address = FLAGS.my_ip
1421
connection_info = volume_api.initialize_connection(context,
1519
connection_info = self.volume_api.initialize_connection(context,
1425
1523
self.driver.attach_volume(connection_info,
1426
1524
instance_ref['name'],
1428
1526
except Exception: # pylint: disable=W0702
1429
exc = sys.exc_info()
1430
# NOTE(vish): The inline callback eats the exception info so we
1431
# log the traceback here and reraise the same
1433
LOG.exception(_("instance %(instance_uuid)s: attach failed"
1434
" %(mountpoint)s, removing") % locals(), context=context)
1435
volume_api.terminate_connection(context, volume_id, address)
1527
with utils.save_and_reraise_exception():
1528
LOG.exception(_("instance %(instance_uuid)s: attach failed"
1529
" %(mountpoint)s, removing") % locals(),
1531
self.volume_api.terminate_connection(context, volume_id,
1438
volume_api.attach(context, volume_id, instance_id, mountpoint)
1534
self.volume_api.attach(context, volume_id, instance_id, mountpoint)
1440
1536
'instance_id': instance_id,
1441
1537
'connection_info': utils.dumps(connection_info),
1449
1545
self.db.block_device_mapping_create(context, values)
1452
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1453
@checks_instance_lock
1454
def _detach_volume(self, context, instance_uuid, volume_id,
1455
destroy_bdm=False, mark_detached=True,
1456
force_detach=False):
1457
"""Detach a volume from an instance."""
1458
context = context.elevated()
1459
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1460
instance_id = instance_ref['id']
1461
bdms = self.db.block_device_mapping_get_all_by_instance(
1462
context, instance_id)
1464
# NOTE(vish): Comparing as strings because the os_api doesn't
1465
# convert to integer and we may wish to support uuids
1467
if str(item['volume_id']) == str(volume_id):
1548
def _detach_volume(self, context, instance, bdm):
1549
"""Do the actual driver detach using block device mapping."""
1550
instance_name = instance['name']
1551
instance_uuid = instance['uuid']
1470
1552
mp = bdm['device_name']
1553
volume_id = bdm['volume_id']
1472
1555
LOG.audit(_("Detach volume %(volume_id)s from mountpoint %(mp)s"
1473
" on instance %(instance_id)s") % locals(), context=context)
1474
volume_api = volume.API()
1475
if (instance_ref['name'] not in self.driver.list_instances() and
1556
" on instance %(instance_uuid)s") % locals(), context=context)
1558
if instance_name not in self.driver.list_instances():
1477
1559
LOG.warn(_("Detaching volume from unknown instance %s"),
1478
instance_id, context=context)
1480
self.driver.detach_volume(utils.loads(bdm['connection_info']),
1481
instance_ref['name'],
1483
address = FLAGS.my_ip
1484
volume_api.terminate_connection(context, volume_id, address)
1486
volume_api.detach(context, volume_id)
1488
self.db.block_device_mapping_destroy_by_instance_and_volume(
1489
context, instance_id, volume_id)
1560
instance_uuid, context=context)
1561
self.driver.detach_volume(utils.loads(bdm['connection_info']),
1565
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1566
@checks_instance_lock
1567
@wrap_instance_fault
1492
1568
def detach_volume(self, context, instance_uuid, volume_id):
1493
1569
"""Detach a volume from an instance."""
1494
return self._detach_volume(context, instance_uuid, volume_id, True)
1570
instance_ref = self.db.instance_get_by_uuid(context, instance_uuid)
1571
instance_id = instance_ref['id']
1572
bdm = self._get_instance_volume_bdm(context, instance_id, volume_id)
1573
self._detach_volume(context, instance_ref, bdm)
1574
self.volume_api.terminate_connection(context, volume_id, FLAGS.my_ip)
1575
self.volume_api.detach(context.elevated(), volume_id)
1576
self.db.block_device_mapping_destroy_by_instance_and_volume(
1577
context, instance_id, volume_id)
1496
1580
@exception.wrap_exception(notifier=notifier, publisher_id=publisher_id())
1497
1581
def remove_volume_connection(self, context, instance_id, volume_id):
1498
"""Detach a volume from an instance.,"""
1582
"""Remove a volume connection using the volume api"""
1499
1583
# NOTE(vish): We don't want to actually mark the volume
1500
1584
# detached, or delete the bdm, just remove the
1501
1585
# connection from this host.
1503
1587
instance_ref = self.db.instance_get(context, instance_id)
1504
self._detach_volume(context, instance_ref['uuid'], volume_id,
1588
bdm = self._get_instance_volume_bdm(context,
1591
self._detach_volume(context, instance_ref,
1592
bdm['volume_id'], bdm['device_name'])
1593
self.volume_api.terminate_connection(context,
1506
1596
except exception.NotFound:
1574
1664
return self.driver.update_available_resource(context, self.host)
1666
def get_instance_disk_info(self, context, instance_name):
1667
"""Getting infomation of instance's current disk.
1669
Implementation nova.virt.libvirt.connection.
1671
:param context: security context
1672
:param instance_name: instance name
1675
return self.driver.get_instance_disk_info(instance_name)
1576
1677
def pre_live_migration(self, context, instance_id, time=None,
1577
1678
block_migration=False, disk=None):
1578
1679
"""Preparations for live migration at dest host.
1674
1774
'disk': disk}})
1676
1776
except Exception:
1677
exc = sys.exc_info()
1678
i_name = instance_ref.name
1679
msg = _("Pre live migration for %(i_name)s failed at %(dest)s")
1680
LOG.exception(msg % locals())
1681
self.rollback_live_migration(context, instance_ref,
1682
dest, block_migration)
1777
with utils.save_and_reraise_exception():
1778
instance_uuid = instance_ref['uuid']
1779
msg = _("Pre live migration for %(instance_uuid)s failed at"
1781
LOG.exception(msg % locals())
1782
self.rollback_live_migration(context, instance_ref, dest,
1685
1785
# Executing live migration
1686
1786
# live_migration might raises exceptions, but
1976
2085
self.db.instance_fault_create(context, values)
1978
def add_instance_fault(self, context, instance_uuid, code=500,
1979
message='', details=''):
1980
"""Adds a fault to the database using the specified values."""
1982
'instance_uuid': instance_uuid,
1987
self.db.instance_fault_create(context, values)
2087
@manager.periodic_task(
2088
ticks_between_runs=FLAGS.running_deleted_instance_poll_interval)
2089
def _cleanup_running_deleted_instances(self, context):
2090
"""Cleanup any instances which are erroneously still running after
2091
having been deleted.
2093
Valid actions to take are:
2095
1. noop - do nothing
2096
2. log - log which instances are erroneously running
2097
3. reap - shutdown and cleanup any erroneously running instances
2099
The use-case for this cleanup task is: for various reasons, it may be
2100
possible for the database to show an instance as deleted but for that
2101
instance to still be running on a host machine (see bug
2102
https://bugs.launchpad.net/nova/+bug/911366).
2104
This cleanup task is a cross-hypervisor utility for finding these
2105
zombied instances and either logging the discrepancy (likely what you
2106
should do in production), or automatically reaping the instances (more
2107
appropriate for dev environments).
2109
action = FLAGS.running_deleted_instance_action
2111
if action == "noop":
2114
present_name_labels = set(self.driver.list_instances())
2116
# NOTE(sirp): admin contexts don't ordinarily return deleted records
2117
with utils.temporary_mutation(context, read_deleted="yes"):
2118
instances = self.db.instance_get_all_by_host(context, self.host)
2119
for instance in instances:
2120
present = instance.name in present_name_labels
2121
erroneously_running = instance.deleted and present
2122
old_enough = (not instance.deleted_at or utils.is_older_than(
2123
instance.deleted_at,
2124
FLAGS.running_deleted_instance_timeout))
2126
if erroneously_running and old_enough:
2127
instance_id = instance['id']
2128
instance_uuid = instance['uuid']
2129
name_label = instance['name']
2132
LOG.warning(_("Detected instance %(instance_uuid)s"
2133
" with name label '%(name_label)s' which"
2134
" is marked as DELETED but still present"
2135
" on host."), locals())
2137
elif action == 'reap':
2138
LOG.info(_("Destroying instance %(instance_uuid)s with"
2139
" name label '%(name_label)s' which is"
2140
" marked as DELETED but still present on"
2141
" host."), locals())
2142
self._shutdown_instance(
2143
context, instance, 'Terminating', True)
2144
self._cleanup_volumes(context, instance_id)
2146
raise Exception(_("Unrecognized value '%(action)s'"
2147
" for FLAGS.running_deleted_"
2148
"instance_action"), locals())