77
78
return super(PerfTestDataJsonSerialisation, self).default(obj)
81
class SetupPathsConfig:
82
"""Paths for transferring data to a target or running on that target."""
84
self.installer_script_path = _get_static_script_path(
86
self.collectd_config_path = _get_static_script_path(
88
self.installer_script_dest_path = '/tmp/installer.sh'
89
self.runner_script_dest_path = '/tmp/runner.sh'
90
self.collectd_config_dest_file = '/tmp/collectd.config'
93
def _get_static_script_path(script_path):
94
full_path = os.path.abspath(__file__)
95
current_dir = os.path.dirname(full_path)
96
return os.path.join(current_dir, script_path)
80
99
SETUP_SCRIPT_PATH = 'perf_static/setup-perf-monitoring.sh'
81
100
COLLECTD_CONFIG_PATH = 'perf_static/collectd.conf'
103
SetupPaths = SetupPathsConfig()
84
106
log = logging.getLogger("run_perfscale_test")
109
def add_basic_perfscale_arguments(parser):
110
"""Add the basic required args needed for a perfscale test."""
111
add_basic_testing_arguments(parser)
114
help='Enable HA before running perfscale test.',
87
118
def run_perfscale_test(target_test, bs_manager, args):
88
119
"""Run a perfscale test collect the data and generate a report.
116
147
apply_any_workarounds(client)
117
148
bootstrap_timing = TimingData(bs_start, bs_end)
119
setup_system_monitoring(admin_client)
150
maybe_enable_ha(admin_client, args)
152
machine_ids = setup_system_monitoring(admin_client)
121
154
deploy_details = target_test(client, args)
123
156
results_dir = dump_performance_metrics_logs(
124
bs_manager.log_dir, admin_client)
157
bs_manager.log_dir, admin_client, machine_ids)
125
158
cleanup_start = datetime.utcnow()
126
159
# Cleanup happens when we move out of context
127
160
cleanup_end = datetime.utcnow()
132
165
cleanup=cleanup_timing,
135
controller_log_file = os.path.join(
141
generate_reports(controller_log_file, results_dir, deployments)
144
def dump_performance_metrics_logs(log_dir, admin_client):
145
results_dir = os.path.join(
168
generate_reports(bs_manager.log_dir, results_dir, deployments, machine_ids)
171
def dump_performance_metrics_logs(log_dir, admin_client, machine_ids):
172
"""Pull metric logs and data off every controller machine in action.
174
Store the retrieved data in a machine-id named directory underneath the
175
genereated (and returned) base directory.
177
:return: Path string indicating the base path of data retrieved from the
180
base_results_dir = os.path.join(
146
181
os.path.abspath(log_dir), 'performance_results/')
147
os.makedirs(results_dir)
150
('--', '-r', '0:/var/lib/collectd/rrd/localhost/*',
183
for machine_id in machine_ids:
184
results_dir = os.path.join(
185
base_results_dir, 'machine-{}'.format(machine_id))
186
os.makedirs(results_dir)
154
188
admin_client.juju(
155
'scp', ('0:/tmp/mongodb-stats.log', results_dir)
192
'{}:/var/lib/collectd/rrd/localhost/*'.format(machine_id),
157
except subprocess.CalledProcessError as e:
158
log.error('Failed to copy mongodb stats: {}'.format(e))
197
'scp', ('{}:/tmp/mongodb-stats.log'.format(machine_id),
200
except subprocess.CalledProcessError as e:
201
log.error('Failed to copy mongodb stats for machine {}: {}'.format(
203
return base_results_dir
162
206
def apply_any_workarounds(client):
173
217
subprocess.check_output(constraint_cmd)
176
def generate_reports(controller_log, results_dir, deployments):
177
"""Generate reports and graphs from run results."""
178
cpu_image = generate_cpu_graph_image(results_dir)
179
memory_image = generate_memory_graph_image(results_dir)
180
network_image = generate_network_graph_image(results_dir)
220
def maybe_enable_ha(admin_client, args):
222
log.info('Enabling HA.')
223
admin_client.enable_ha()
224
admin_client.wait_for_ha()
227
def generate_reports(log_dir, results_dir, deployments, machine_ids):
228
"""Generate graph image from run results for each controller in action."""
230
for m_id in machine_ids:
231
machine_results_dir = os.path.join(
232
results_dir, 'machine-{}'.format(m_id))
233
_create_graph_images_for_machine(m_id, machine_results_dir)
235
# This will take care of making sure machine-0 is named log_message_chunks.
236
log_chunks = _get_controller_log_message_chunks(
237
log_dir, machine_ids, deployments)
240
deployments=deployments,
244
json_dump_path = os.path.join(results_dir, 'report-data.json')
245
with open(json_dump_path, 'wt') as f:
246
json.dump(details, f, cls=PerfTestDataJsonSerialisation)
249
def _get_controller_log_message_chunks(log_dir, machine_ids, deployments):
250
"""Produce 'chunked' logs for the provided controller ids.
252
:return: dict containing chunked logs for a controller machine. The key
253
indicates which controller it's from.
254
i.e. log_message_chunks_2 is from machine-2. (Note. naming is due to
255
backwards compatibility for existing data collection.)
258
for m_id in machine_ids:
259
machine_log_file = os.path.join(
262
'machine-{}'.format(m_id),
263
'machine-{}.log.gz'.format(m_id))
265
log_name = 'log_message_chunks_{}'.format(m_id)
266
log_chunks[log_name] = breakdown_log_by_events_timeframe(
268
deployments['bootstrap'],
269
deployments['cleanup'],
270
deployments['deploys'])
271
# Keep backwards compatible data naming (for before collecting HA results).
272
log_chunks['log_message_chunks'] = log_chunks.pop('log_message_chunks_0')
276
def _create_graph_images_for_machine(machine_id, results_dir):
277
"""Create graph images from the data from `machine_id`s details."""
278
generate_cpu_graph_image(results_dir)
279
generate_memory_graph_image(results_dir)
280
generate_network_graph_image(results_dir)
182
282
destination_dir = os.path.join(results_dir, 'mongodb')
183
283
os.mkdir(destination_dir)
185
285
perf_graphing.create_mongodb_rrd_files(results_dir, destination_dir)
186
except perf_graphing.SourceFileNotFound:
188
'Failed to create the MongoDB RRD file. Source file not found.'
286
except (perf_graphing.SourceFileNotFound, perf_graphing.NoDataPresent):
191
287
# Sometimes mongostats fails to startup and start logging. Unsure yet
192
288
# why this is. For now generate the report without the mongodb details,
193
289
# the rest of the report is still useful.
194
mongo_query_image = None
195
mongo_memory_image = None
291
'Failed to create the MongoDB RRD file. '
292
'Source file empty or not found.'
197
mongo_query_image = generate_mongo_query_graph_image(results_dir)
198
mongo_memory_image = generate_mongo_memory_graph_image(results_dir)
200
log_message_chunks = breakdown_log_by_events_timeframe(
202
deployments['bootstrap'],
203
deployments['cleanup'],
204
deployments['deploys'])
208
memory_graph=memory_image,
209
network_graph=network_image,
210
mongo_graph=mongo_query_image,
211
mongo_memory_graph=mongo_memory_image,
212
deployments=deployments,
213
log_message_chunks=log_message_chunks
216
json_dump_path = os.path.join(results_dir, 'report-data.json')
217
with open(json_dump_path, 'wt') as f:
218
json.dump(details, f, cls=PerfTestDataJsonSerialisation)
220
create_html_report(results_dir, details)
295
generate_mongo_query_graph_image(results_dir)
296
generate_mongo_memory_graph_image(results_dir)
223
299
def breakdown_log_by_events_timeframe(log, bootstrap, cleanup, deployments):
311
387
def generate_graph_image(base_dir, results_dir, name, generator):
388
"""Generate graph image files.
390
The images will have the machine id encoded within the names.
391
i.e. machine-0-cpu.png
312
393
metric_files_dir = os.path.join(os.path.abspath(base_dir), results_dir)
313
output_file = os.path.join(
314
os.path.abspath(base_dir), '{}.png'.format(name))
315
return create_report_graph(metric_files_dir, output_file, generator)
394
output_file = _image_name(base_dir, name)
396
return create_report_graph(metric_files_dir, output_file, generator)
397
except perf_graphing.SourceFileNotFound:
398
# It's possible that a HA controller isn't around long enough to
399
# actually gather some data from resulting in a lack of rrd file for
401
log.warning('Failed to generate {}.'.format(output_file))
404
def _image_name(base_dir, name):
405
# Encode the machine id into the image name. The machine id is part of the
406
# directory structure.
407
basename = os.path.basename(os.path.normpath(base_dir))
408
return os.path.join(base_dir, '{}-{}.png'.format(basename, name))
318
411
def create_report_graph(rrd_dir, output_file, generator):
475
def get_controller_machines(admin_client):
476
"""Returns list of machine ids for all active controller machines."""
477
machines = admin_client.get_controller_members()
478
return [m.machine_id for m in machines]
382
481
def setup_system_monitoring(admin_client):
383
# Using ssh get into the machine-0 (or all api/state servers)
384
# Install the required packages and start up logging of systems collections
385
# and mongodb details.
387
installer_script_path = _get_static_script_path(SETUP_SCRIPT_PATH)
388
collectd_config_path = _get_static_script_path(COLLECTD_CONFIG_PATH)
389
installer_script_dest_path = '/tmp/installer.sh'
390
runner_script_dest_path = '/tmp/runner.sh'
391
collectd_config_dest_file = '/tmp/collectd.config'
395
(collectd_config_path, '0:{}'.format(collectd_config_dest_file)))
399
(installer_script_path, '0:{}'.format(installer_script_dest_path)))
400
admin_client.juju('ssh', ('0', 'chmod +x {}'.format(
401
installer_script_dest_path)))
404
('0', '{installer} {config_file} {output_file}'.format(
405
installer=installer_script_dest_path,
406
config_file=collectd_config_dest_file,
407
output_file=runner_script_dest_path)))
482
"""Setup metrics collections for all controller machines in action."""
483
# For all contrller machines we need to get what machines they are and
486
controller_machine_ids = get_controller_machines(admin_client)
488
for machine_id in controller_machine_ids:
489
_setup_system_monitoring(admin_client, machine_id)
491
# Start logging separate to setup so things start almost at the same time
492
# (not waiting around for other machines to setup.)
493
for machine_id in controller_machine_ids:
494
_enable_monitoring(admin_client, machine_id)
496
return controller_machine_ids
499
def _setup_system_monitoring(admin_client, machine_id):
500
"""Install required metrics monitoring software on supplied machine id.
502
Using ssh & scp get into the controller machines install the required
503
packages and start up logging of systems collections and mongodb details.
507
(SetupPaths.collectd_config_path, '{}:{}'.format(
508
machine_id, SetupPaths.collectd_config_dest_file)))
512
(SetupPaths.installer_script_path, '{}:{}'.format(
513
machine_id, SetupPaths.installer_script_dest_path)))
514
admin_client.juju('ssh', (machine_id, 'chmod +x {}'.format(
515
SetupPaths.installer_script_dest_path)))
518
def _enable_monitoring(admin_client, machine_id):
409
519
# Start collection
410
520
# Respawn incase the initial execution fails for whatever reason.
411
admin_client.juju('ssh', ('0', '--', 'daemon --respawn {}'.format(
412
runner_script_dest_path)))
415
def _get_static_script_path(script_path):
416
full_path = os.path.abspath(__file__)
417
current_dir = os.path.dirname(full_path)
418
return os.path.join(current_dir, script_path)
523
(machine_id, '{installer} {config_file} {output_file}'.format(
524
installer=SetupPaths.installer_script_dest_path,
525
config_file=SetupPaths.collectd_config_dest_file,
526
output_file=SetupPaths.runner_script_dest_path)))
528
admin_client.juju('ssh', (machine_id, '--', 'daemon --respawn {}'.format(
529
SetupPaths.runner_script_dest_path)))