~andrewjbeach/juju-ci-tools/make-local-patcher

« back to all changes in this revision

Viewing changes to generate_perfscale_results.py

Committer: Christopher Lee
Date: 2016-10-26 04:29:27 UTC
mto: This revision was merged to the branch mainline in revision 1698.
Revision ID: chris.lee@canonical.com-20161026042927-j3wf3tt7pjpr1ok6

Add handling of HA perfscale reporting.

files modified:
generate_perfscale_results.py

perf_graphing.py

perfscale_deployment.py

tests/test_generate_perfscale_results.py

Show diffs side-by-side

added added

removed removed

generate_perfscale_results.py

breakdown_log_by_timeframes,

)

import perf_graphing

from utility import add_basic_testing_arguments

__metaclass__ = type

return super(PerfTestDataJsonSerialisation, self).default(obj)

class SetupPathsConfig:

"""Paths for transferring data to a target or running on that target."""

def __init__(self):

self.installer_script_path = _get_static_script_path(

SETUP_SCRIPT_PATH)

self.collectd_config_path = _get_static_script_path(

COLLECTD_CONFIG_PATH)

self.installer_script_dest_path = '/tmp/installer.sh'

self.runner_script_dest_path = '/tmp/runner.sh'

self.collectd_config_dest_file = '/tmp/collectd.config'

def _get_static_script_path(script_path):

full_path = os.path.abspath(__file__)

current_dir = os.path.dirname(full_path)

return os.path.join(current_dir, script_path)

SETUP_SCRIPT_PATH = 'perf_static/setup-perf-monitoring.sh'

100

COLLECTD_CONFIG_PATH = 'perf_static/collectd.conf'

101

102

103

SetupPaths = SetupPathsConfig()

104

105

106

log = logging.getLogger("run_perfscale_test")

107

108

109

def add_basic_perfscale_arguments(parser):

110

"""Add the basic required args needed for a perfscale test."""

111

add_basic_testing_arguments(parser)

112

parser.add_argument(

113

'--enable-ha',

114

help='Enable HA before running perfscale test.',

115

action='store_true')

116

117

118

def run_perfscale_test(target_test, bs_manager, args):

119

"""Run a perfscale test collect the data and generate a report.

120

116

147

apply_any_workarounds(client)

117

148

bootstrap_timing = TimingData(bs_start, bs_end)

118

149

119

setup_system_monitoring(admin_client)

150

maybe_enable_ha(admin_client, args)

151

152

machine_ids = setup_system_monitoring(admin_client)

120

153

121

154

deploy_details = target_test(client, args)

122

155

finally:

123

156

results_dir = dump_performance_metrics_logs(

124

bs_manager.log_dir, admin_client)

157

bs_manager.log_dir, admin_client, machine_ids)

125

158

cleanup_start = datetime.utcnow()

126

159

# Cleanup happens when we move out of context

127

160

cleanup_end = datetime.utcnow()

132

165

cleanup=cleanup_timing,

133

166

)

134

167

135

controller_log_file = os.path.join(

136

bs_manager.log_dir,

137

'controller',

138

'machine-0',

139

'machine-0.log.gz')

140

141

generate_reports(controller_log_file, results_dir, deployments)

142

143

144

def dump_performance_metrics_logs(log_dir, admin_client):

145

results_dir = os.path.join(

168

generate_reports(bs_manager.log_dir, results_dir, deployments, machine_ids)

169

170

171

def dump_performance_metrics_logs(log_dir, admin_client, machine_ids):

172

"""Pull metric logs and data off every controller machine in action.

173

174

Store the retrieved data in a machine-id named directory underneath the

175

genereated (and returned) base directory.

176

177

:return: Path string indicating the base path of data retrieved from the

178

controllers.

179

"""

180

base_results_dir = os.path.join(

146

181

os.path.abspath(log_dir), 'performance_results/')

147

os.makedirs(results_dir)

148

admin_client.juju(

149

'scp',

150

('--', '-r', '0:/var/lib/collectd/rrd/localhost/*',

151

results_dir)

152

)

153

try:

182

183

for machine_id in machine_ids:

184

results_dir = os.path.join(

185

base_results_dir, 'machine-{}'.format(machine_id))

186

os.makedirs(results_dir)

187

154

188

admin_client.juju(

155

'scp', ('0:/tmp/mongodb-stats.log', results_dir)

189

'scp',

190

('--',

191

'-r',

192

'{}:/var/lib/collectd/rrd/localhost/*'.format(machine_id),

193

results_dir)

156

194

)

157

except subprocess.CalledProcessError as e:

158

log.error('Failed to copy mongodb stats: {}'.format(e))

159

return results_dir

195

try:

196

admin_client.juju(

197

'scp', ('{}:/tmp/mongodb-stats.log'.format(machine_id),

198

results_dir)

199

)

200

except subprocess.CalledProcessError as e:

201

log.error('Failed to copy mongodb stats for machine {}: {}'.format(

202

machine_id, e))

203

return base_results_dir

160

204

161

205

162

206

def apply_any_workarounds(client):

173

217

subprocess.check_output(constraint_cmd)

174

218

175

219

176

def generate_reports(controller_log, results_dir, deployments):

177

"""Generate reports and graphs from run results."""

178

cpu_image = generate_cpu_graph_image(results_dir)

179

memory_image = generate_memory_graph_image(results_dir)

180

network_image = generate_network_graph_image(results_dir)

220

def maybe_enable_ha(admin_client, args):

221

if args.enable_ha:

222

log.info('Enabling HA.')

223

admin_client.enable_ha()

224

admin_client.wait_for_ha()

225

226

227

def generate_reports(log_dir, results_dir, deployments, machine_ids):

228

"""Generate graph image from run results for each controller in action."""

229

230

for m_id in machine_ids:

231

machine_results_dir = os.path.join(

232

results_dir, 'machine-{}'.format(m_id))

233

_create_graph_images_for_machine(m_id, machine_results_dir)

234

235

# This will take care of making sure machine-0 is named log_message_chunks.

236

log_chunks = _get_controller_log_message_chunks(

237

log_dir, machine_ids, deployments)

238

239

details = dict(

240

deployments=deployments,

241

**log_chunks

242

)

243

244

json_dump_path = os.path.join(results_dir, 'report-data.json')

245

with open(json_dump_path, 'wt') as f:

246

json.dump(details, f, cls=PerfTestDataJsonSerialisation)

247

248

249

def _get_controller_log_message_chunks(log_dir, machine_ids, deployments):

250

"""Produce 'chunked' logs for the provided controller ids.

251

252

:return: dict containing chunked logs for a controller machine. The key

253

indicates which controller it's from.

254

i.e. log_message_chunks_2 is from machine-2. (Note. naming is due to

255

backwards compatibility for existing data collection.)

256

"""

257

log_chunks = dict()

258

for m_id in machine_ids:

259

machine_log_file = os.path.join(

260

log_dir,

261

'controller',

262

'machine-{}'.format(m_id),

263

'machine-{}.log.gz'.format(m_id))

264

265

log_name = 'log_message_chunks_{}'.format(m_id)

266

log_chunks[log_name] = breakdown_log_by_events_timeframe(

267

machine_log_file,

268

deployments['bootstrap'],

269

deployments['cleanup'],

270

deployments['deploys'])

271

# Keep backwards compatible data naming (for before collecting HA results).

272

log_chunks['log_message_chunks'] = log_chunks.pop('log_message_chunks_0')

273

return log_chunks

274

275

276

def _create_graph_images_for_machine(machine_id, results_dir):

277

"""Create graph images from the data from `machine_id`s details."""

278

generate_cpu_graph_image(results_dir)

279

generate_memory_graph_image(results_dir)

280

generate_network_graph_image(results_dir)

181

281

182

282

destination_dir = os.path.join(results_dir, 'mongodb')

183

283

os.mkdir(destination_dir)

184

284

try:

185

285

perf_graphing.create_mongodb_rrd_files(results_dir, destination_dir)

186

except perf_graphing.SourceFileNotFound:

187

log.error(

188

'Failed to create the MongoDB RRD file. Source file not found.'

189

)

190

286

except (perf_graphing.SourceFileNotFound, perf_graphing.NoDataPresent):

191

287

# Sometimes mongostats fails to startup and start logging. Unsure yet

192

288

# why this is. For now generate the report without the mongodb details,

193

289

# the rest of the report is still useful.

194

mongo_query_image = None

195

mongo_memory_image = None

290

log.error(

291

'Failed to create the MongoDB RRD file. '

292

'Source file empty or not found.'

293

)

196

294

else:

197

mongo_query_image = generate_mongo_query_graph_image(results_dir)

198

mongo_memory_image = generate_mongo_memory_graph_image(results_dir)

199

200

log_message_chunks = breakdown_log_by_events_timeframe(

201

controller_log,

202

deployments['bootstrap'],

203

deployments['cleanup'],

204

deployments['deploys'])

205

206

details = dict(

207

cpu_graph=cpu_image,

208

memory_graph=memory_image,

209

network_graph=network_image,

210

mongo_graph=mongo_query_image,

211

mongo_memory_graph=mongo_memory_image,

212

deployments=deployments,

213

log_message_chunks=log_message_chunks

214

)

215

216

json_dump_path = os.path.join(results_dir, 'report-data.json')

217

with open(json_dump_path, 'wt') as f:

218

json.dump(details, f, cls=PerfTestDataJsonSerialisation)

219

220

create_html_report(results_dir, details)

295

generate_mongo_query_graph_image(results_dir)

296

generate_mongo_memory_graph_image(results_dir)

221

297

222

298

223

299

def breakdown_log_by_events_timeframe(log, bootstrap, cleanup, deployments):

309

385

310

386

311

387

def generate_graph_image(base_dir, results_dir, name, generator):

388

"""Generate graph image files.

389

390

The images will have the machine id encoded within the names.

391

i.e. machine-0-cpu.png

392

"""

312

393

metric_files_dir = os.path.join(os.path.abspath(base_dir), results_dir)

313

output_file = os.path.join(

314

os.path.abspath(base_dir), '{}.png'.format(name))

315

return create_report_graph(metric_files_dir, output_file, generator)

394

output_file = _image_name(base_dir, name)

395

try:

396

return create_report_graph(metric_files_dir, output_file, generator)

397

except perf_graphing.SourceFileNotFound:

398

# It's possible that a HA controller isn't around long enough to

399

# actually gather some data from resulting in a lack of rrd file for

400

# that metric.

401

log.warning('Failed to generate {}.'.format(output_file))

402

403

404

def _image_name(base_dir, name):

405

# Encode the machine id into the image name. The machine id is part of the

406

# directory structure.

407

basename = os.path.basename(os.path.normpath(base_dir))

408

return os.path.join(base_dir, '{}-{}.png'.format(basename, name))

316

409

317

410

318

411

def create_report_graph(rrd_dir, output_file, generator):

379

472

pass

380

473

381

474

475

def get_controller_machines(admin_client):

476

"""Returns list of machine ids for all active controller machines."""

477

machines = admin_client.get_controller_members()

478

return [m.machine_id for m in machines]

479

480

382

481

def setup_system_monitoring(admin_client):

383

# Using ssh get into the machine-0 (or all api/state servers)

384

# Install the required packages and start up logging of systems collections

385

# and mongodb details.

386

387

installer_script_path = _get_static_script_path(SETUP_SCRIPT_PATH)

388

collectd_config_path = _get_static_script_path(COLLECTD_CONFIG_PATH)

389

installer_script_dest_path = '/tmp/installer.sh'

390

runner_script_dest_path = '/tmp/runner.sh'

391

collectd_config_dest_file = '/tmp/collectd.config'

392

393

admin_client.juju(

394

'scp',

395

(collectd_config_path, '0:{}'.format(collectd_config_dest_file)))

396

397

admin_client.juju(

398

'scp',

399

(installer_script_path, '0:{}'.format(installer_script_dest_path)))

400

admin_client.juju('ssh', ('0', 'chmod +x {}'.format(

401

installer_script_dest_path)))

402

admin_client.juju(

403

'ssh',

404

('0', '{installer} {config_file} {output_file}'.format(

405

installer=installer_script_dest_path,

406

config_file=collectd_config_dest_file,

407

output_file=runner_script_dest_path)))

408

482

"""Setup metrics collections for all controller machines in action."""

483

# For all contrller machines we need to get what machines they are and

484

# install on them.

485

486

controller_machine_ids = get_controller_machines(admin_client)

487

488

for machine_id in controller_machine_ids:

489

_setup_system_monitoring(admin_client, machine_id)

490

491

# Start logging separate to setup so things start almost at the same time

492

# (not waiting around for other machines to setup.)

493

for machine_id in controller_machine_ids:

494

_enable_monitoring(admin_client, machine_id)

495

496

return controller_machine_ids

497

498

499

def _setup_system_monitoring(admin_client, machine_id):

500

"""Install required metrics monitoring software on supplied machine id.

501

502

Using ssh & scp get into the controller machines install the required

503

packages and start up logging of systems collections and mongodb details.

504

"""

505

admin_client.juju(

506

'scp',

507

(SetupPaths.collectd_config_path, '{}:{}'.format(

508

machine_id, SetupPaths.collectd_config_dest_file)))

509

510

admin_client.juju(

511

'scp',

512

(SetupPaths.installer_script_path, '{}:{}'.format(

513

machine_id, SetupPaths.installer_script_dest_path)))

514

admin_client.juju('ssh', (machine_id, 'chmod +x {}'.format(

515

SetupPaths.installer_script_dest_path)))

516

517

518

def _enable_monitoring(admin_client, machine_id):

409

519

# Start collection

410

520

# Respawn incase the initial execution fails for whatever reason.

411

admin_client.juju('ssh', ('0', '--', 'daemon --respawn {}'.format(

412

runner_script_dest_path)))

413

414

415

def _get_static_script_path(script_path):

416

full_path = os.path.abspath(__file__)

417

current_dir = os.path.dirname(full_path)

418

return os.path.join(current_dir, script_path)

521

admin_client.juju(

522

'ssh',

523

(machine_id, '{installer} {config_file} {output_file}'.format(

524

installer=SetupPaths.installer_script_dest_path,

525

config_file=SetupPaths.collectd_config_dest_file,

526

output_file=SetupPaths.runner_script_dest_path)))

527

528

admin_client.juju('ssh', (machine_id, '--', 'daemon --respawn {}'.format(

529

SetupPaths.runner_script_dest_path)))

Older »