259
271
"""Get last modification time of directory."""
260
272
return sentry_unit.directory_stat(directory)['mtime']
262
def _get_proc_start_time(self, sentry_unit, service, pgrep_full=False):
263
"""Get process' start time.
265
Determine start time of the process based on the last modification
266
time of the /proc/pid directory. If pgrep_full is True, the process
267
name is matched against the full command line.
270
cmd = 'pgrep -o -f {}'.format(service)
272
cmd = 'pgrep -o {}'.format(service)
273
cmd = cmd + ' | grep -v pgrep || exit 0'
274
cmd_out = sentry_unit.run(cmd)
275
self.log.debug('CMDout: ' + str(cmd_out))
277
self.log.debug('Pid for %s %s' % (service, str(cmd_out[0])))
278
proc_dir = '/proc/{}'.format(cmd_out[0].strip())
279
return self._get_dir_mtime(sentry_unit, proc_dir)
274
def _get_proc_start_time(self, sentry_unit, service, pgrep_full=None):
275
"""Get start time of a process based on the last modification time
276
of the /proc/pid directory.
278
:sentry_unit: The sentry unit to check for the service on
279
:service: service name to look for in process table
280
:pgrep_full: [Deprecated] Use full command line search mode with pgrep
281
:returns: epoch time of service process start
282
:param commands: list of bash commands
283
:param sentry_units: list of sentry unit pointers
284
:returns: None if successful; Failure message otherwise
286
if pgrep_full is not None:
287
# /!\ DEPRECATION WARNING (beisner):
288
# No longer implemented, as pidof is now used instead of pgrep.
289
# https://bugs.launchpad.net/charm-helpers/+bug/1474030
290
self.log.warn('DEPRECATION WARNING: pgrep_full bool is no '
291
'longer implemented re: lp 1474030.')
293
pid_list = self.get_process_id_list(sentry_unit, service)
295
proc_dir = '/proc/{}'.format(pid)
296
self.log.debug('Pid for {} on {}: {}'.format(
297
service, sentry_unit.info['unit_name'], pid))
299
return self._get_dir_mtime(sentry_unit, proc_dir)
281
301
def service_restarted(self, sentry_unit, service, filename,
282
pgrep_full=False, sleep_time=20):
302
pgrep_full=None, sleep_time=20):
283
303
"""Check if service was restarted.
285
305
Compare a service's start time vs a file's last modification time
286
306
(such as a config file for that service) to determine if the service
287
307
has been restarted.
309
# /!\ DEPRECATION WARNING (beisner):
310
# This method is prone to races in that no before-time is known.
311
# Use validate_service_config_changed instead.
313
# NOTE(beisner) pgrep_full is no longer implemented, as pidof is now
314
# used instead of pgrep. pgrep_full is still passed through to ensure
315
# deprecation WARNS. lp1474030
316
self.log.warn('DEPRECATION WARNING: use '
317
'validate_service_config_changed instead of '
318
'service_restarted due to known races.')
289
320
time.sleep(sleep_time)
290
321
if (self._get_proc_start_time(sentry_unit, service, pgrep_full) >=
291
322
self._get_file_mtime(sentry_unit, filename)):
296
327
def service_restarted_since(self, sentry_unit, mtime, service,
297
pgrep_full=False, sleep_time=20,
328
pgrep_full=None, sleep_time=20,
329
retry_count=30, retry_sleep_time=10):
299
330
"""Check if service was been started after a given time.
302
333
sentry_unit (sentry): The sentry unit to check for the service on
303
334
mtime (float): The epoch time to check against
304
335
service (string): service name to look for in process table
305
pgrep_full (boolean): Use full command line search mode with pgrep
306
sleep_time (int): Seconds to sleep before looking for process
307
retry_count (int): If service is not found, how many times to retry
336
pgrep_full: [Deprecated] Use full command line search mode with pgrep
337
sleep_time (int): Initial sleep time (s) before looking for file
338
retry_sleep_time (int): Time (s) to sleep between retries
339
retry_count (int): If file is not found, how many times to retry
310
342
bool: True if service found and its start time it newer than mtime,
311
343
False if service is older than mtime or if service was
314
self.log.debug('Checking %s restarted since %s' % (service, mtime))
346
# NOTE(beisner) pgrep_full is no longer implemented, as pidof is now
347
# used instead of pgrep. pgrep_full is still passed through to ensure
348
# deprecation WARNS. lp1474030
350
unit_name = sentry_unit.info['unit_name']
351
self.log.debug('Checking that %s service restarted since %s on '
352
'%s' % (service, mtime, unit_name))
315
353
time.sleep(sleep_time)
316
proc_start_time = self._get_proc_start_time(sentry_unit, service,
318
while retry_count > 0 and not proc_start_time:
319
self.log.debug('No pid file found for service %s, will retry %i '
320
'more times' % (service, retry_count))
322
proc_start_time = self._get_proc_start_time(sentry_unit, service,
324
retry_count = retry_count - 1
354
proc_start_time = None
356
while tries <= retry_count and not proc_start_time:
358
proc_start_time = self._get_proc_start_time(sentry_unit,
361
self.log.debug('Attempt {} to get {} proc start time on {} '
362
'OK'.format(tries, service, unit_name))
364
# NOTE(beisner) - race avoidance, proc may not exist yet.
365
# https://bugs.launchpad.net/charm-helpers/+bug/1474030
366
self.log.debug('Attempt {} to get {} proc start time on {} '
367
'failed\n{}'.format(tries, service,
369
time.sleep(retry_sleep_time)
326
372
if not proc_start_time:
327
373
self.log.warn('No proc start time found, assuming service did '
330
376
if proc_start_time >= mtime:
331
self.log.debug('proc start time is newer than provided mtime'
332
'(%s >= %s)' % (proc_start_time, mtime))
377
self.log.debug('Proc start time is newer than provided mtime'
378
'(%s >= %s) on %s (OK)' % (proc_start_time,
335
self.log.warn('proc start time (%s) is older than provided mtime '
336
'(%s), service did not restart' % (proc_start_time,
382
self.log.warn('Proc start time (%s) is older than provided mtime '
383
'(%s) on %s, service did not '
384
'restart' % (proc_start_time, mtime, unit_name))
340
387
def config_updated_since(self, sentry_unit, filename, mtime,
388
sleep_time=20, retry_count=30,
389
retry_sleep_time=10):
342
390
"""Check if file was modified after a given time.
345
393
sentry_unit (sentry): The sentry unit to check the file mtime on
346
394
filename (string): The file to check mtime of
347
395
mtime (float): The epoch time to check against
348
sleep_time (int): Seconds to sleep before looking for process
396
sleep_time (int): Initial sleep time (s) before looking for file
397
retry_sleep_time (int): Time (s) to sleep between retries
398
retry_count (int): If file is not found, how many times to retry
351
401
bool: True if file was modified more recently than mtime, False if
352
file was modified before mtime,
402
file was modified before mtime, or if file not found.
354
self.log.debug('Checking %s updated since %s' % (filename, mtime))
404
unit_name = sentry_unit.info['unit_name']
405
self.log.debug('Checking that %s updated since %s on '
406
'%s' % (filename, mtime, unit_name))
355
407
time.sleep(sleep_time)
356
file_mtime = self._get_file_mtime(sentry_unit, filename)
410
while tries <= retry_count and not file_mtime:
412
file_mtime = self._get_file_mtime(sentry_unit, filename)
413
self.log.debug('Attempt {} to get {} file mtime on {} '
414
'OK'.format(tries, filename, unit_name))
416
# NOTE(beisner) - race avoidance, file may not exist yet.
417
# https://bugs.launchpad.net/charm-helpers/+bug/1474030
418
self.log.debug('Attempt {} to get {} file mtime on {} '
419
'failed\n{}'.format(tries, filename,
421
time.sleep(retry_sleep_time)
425
self.log.warn('Could not determine file mtime, assuming '
426
'file does not exist')
357
429
if file_mtime >= mtime:
358
430
self.log.debug('File mtime is newer than provided mtime '
359
'(%s >= %s)' % (file_mtime, mtime))
431
'(%s >= %s) on %s (OK)' % (file_mtime,
362
self.log.warn('File mtime %s is older than provided mtime %s'
363
% (file_mtime, mtime))
435
self.log.warn('File mtime is older than provided mtime'
436
'(%s < on %s) on %s' % (file_mtime,
366
440
def validate_service_config_changed(self, sentry_unit, mtime, service,
367
filename, pgrep_full=False,
368
sleep_time=20, retry_count=2):
441
filename, pgrep_full=None,
442
sleep_time=20, retry_count=30,
443
retry_sleep_time=10):
369
444
"""Check service and file were updated after mtime
507
608
'{}'.format(e_proc_name, a_proc_name))
509
610
a_pids_length = len(a_pids)
510
if e_pids_length != a_pids_length:
511
return ('PID count mismatch. {} ({}) expected, actual: '
611
fail_msg = ('PID count mismatch. {} ({}) expected, actual: '
512
612
'{}, {} ({})'.format(e_sentry_name, e_proc_name,
513
613
e_pids_length, a_pids_length,
616
# If expected is not bool, ensure PID quantities match
617
if not isinstance(e_pids_length, bool) and \
618
a_pids_length != e_pids_length:
620
# If expected is bool True, ensure 1 or more PIDs exist
621
elif isinstance(e_pids_length, bool) and \
622
e_pids_length is True and a_pids_length < 1:
624
# If expected is bool False, ensure 0 PIDs exist
625
elif isinstance(e_pids_length, bool) and \
626
e_pids_length is False and a_pids_length != 0:
516
629
self.log.debug('PID check OK: {} {} {}: '
517
630
'{}'.format(e_sentry_name, e_proc_name,
531
644
return 'Dicts within list are not identical'
648
def validate_sectionless_conf(self, file_contents, expected):
649
"""A crude conf parser. Useful to inspect configuration files which
650
do not have section headers (as would be necessary in order to use
651
the configparser). Such as openstack-dashboard or rabbitmq confs."""
652
for line in file_contents.split('\n'):
654
args = line.split('=')
657
key = args[0].strip()
658
value = args[1].strip()
659
if key in expected.keys():
660
if expected[key] != value:
661
msg = ('Config mismatch. Expected, actual: {}, '
662
'{}'.format(expected[key], value))
663
amulet.raise_status(amulet.FAIL, msg=msg)
665
def get_unit_hostnames(self, units):
666
"""Return a dict of juju unit names to hostnames."""
669
host_names[unit.info['unit_name']] = \
670
str(unit.file_contents('/etc/hostname').strip())
671
self.log.debug('Unit host names: {}'.format(host_names))
674
def run_cmd_unit(self, sentry_unit, cmd):
675
"""Run a command on a unit, return the output and exit code."""
676
output, code = sentry_unit.run(cmd)
678
self.log.debug('{} `{}` command returned {} '
679
'(OK)'.format(sentry_unit.info['unit_name'],
682
msg = ('{} `{}` command returned {} '
683
'{}'.format(sentry_unit.info['unit_name'],
685
amulet.raise_status(amulet.FAIL, msg=msg)
686
return str(output), code
688
def file_exists_on_unit(self, sentry_unit, file_name):
689
"""Check if a file exists on a unit."""
691
sentry_unit.file_stat(file_name)
695
except Exception as e:
696
msg = 'Error checking file {}: {}'.format(file_name, e)
697
amulet.raise_status(amulet.FAIL, msg=msg)
699
def file_contents_safe(self, sentry_unit, file_name,
700
max_wait=60, fatal=False):
701
"""Get file contents from a sentry unit. Wrap amulet file_contents
702
with retry logic to address races where a file checks as existing,
703
but no longer exists by the time file_contents is called.
704
Return None if file not found. Optionally raise if fatal is True."""
705
unit_name = sentry_unit.info['unit_name']
706
file_contents = False
708
while not file_contents and tries < (max_wait / 4):
710
file_contents = sentry_unit.file_contents(file_name)
712
self.log.debug('Attempt {} to open file {} from {} '
713
'failed'.format(tries, file_name,
723
msg = 'Failed to get file contents from unit.'
724
amulet.raise_status(amulet.FAIL, msg)
726
def port_knock_tcp(self, host="localhost", port=22, timeout=15):
727
"""Open a TCP socket to check for a listening sevice on a host.
729
:param host: host name or IP address, default to localhost
730
:param port: TCP port number, default to 22
731
:param timeout: Connect timeout, default to 15 seconds
732
:returns: True if successful, False if connect failed
735
# Resolve host name if possible
737
connect_host = socket.gethostbyname(host)
738
host_human = "{} ({})".format(connect_host, host)
739
except socket.error as e:
740
self.log.warn('Unable to resolve address: '
741
'{} ({}) Trying anyway!'.format(host, e))
743
host_human = connect_host
745
# Attempt socket connection
747
knock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
748
knock.settimeout(timeout)
749
knock.connect((connect_host, port))
751
self.log.debug('Socket connect OK for host '
752
'{} on port {}.'.format(host_human, port))
754
except socket.error as e:
755
self.log.debug('Socket connect FAIL for'
756
' {} port {} ({})'.format(host_human, port, e))
759
def port_knock_units(self, sentry_units, port=22,
760
timeout=15, expect_success=True):
761
"""Open a TCP socket to check for a listening sevice on each
764
:param sentry_units: list of sentry unit pointers
765
:param port: TCP port number, default to 22
766
:param timeout: Connect timeout, default to 15 seconds
767
:expect_success: True by default, set False to invert logic
768
:returns: None if successful, Failure message otherwise
770
for unit in sentry_units:
771
host = unit.info['public-address']
772
connected = self.port_knock_tcp(host, port, timeout)
773
if not connected and expect_success:
774
return 'Socket connect failed.'
775
elif connected and not expect_success:
776
return 'Socket connected unexpectedly.'
778
def get_uuid_epoch_stamp(self):
779
"""Returns a stamp string based on uuid4 and epoch time. Useful in
780
generating test messages which need to be unique-ish."""
781
return '[{}-{}]'.format(uuid.uuid4(), time.time())
783
# amulet juju action helpers:
784
def run_action(self, unit_sentry, action,
785
_check_output=subprocess.check_output):
786
"""Run the named action on a given unit sentry.
788
_check_output parameter is used for dependency injection.
792
unit_id = unit_sentry.info["unit_name"]
793
command = ["juju", "action", "do", "--format=json", unit_id, action]
794
self.log.info("Running command: %s\n" % " ".join(command))
795
output = _check_output(command, universal_newlines=True)
796
data = json.loads(output)
797
action_id = data[u'Action queued with id']
800
def wait_on_action(self, action_id, _check_output=subprocess.check_output):
801
"""Wait for a given action, returning if it completed or not.
803
_check_output parameter is used for dependency injection.
805
command = ["juju", "action", "fetch", "--format=json", "--wait=0",
807
output = _check_output(command, universal_newlines=True)
808
data = json.loads(output)
809
return data.get(u"status") == "completed"
811
def status_get(self, unit):
812
"""Return the current service status of this unit."""
813
raw_status, return_code = unit.run(
814
"status-get --format=json --include-data")
816
return ("unknown", "")
817
status = json.loads(raw_status)
818
return (status["status"], status["message"])