1
# Copyright 2014-2015 Canonical Limited.
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
7
# http://www.apache.org/licenses/LICENSE-2.0
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
15
"""Compatibility with the nrpe-external-master charm"""
16
# Copyright 2012 Canonical Ltd.
19
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
31
from charmhelpers.core.hookenv import (
40
from charmhelpers.core.host import service
41
from charmhelpers.core import host
43
# This module adds compatibility with the nrpe-external-master and plain nrpe
44
# subordinate charms. To use it in your charm:
46
# 1. Update metadata.yaml
50
# nrpe-external-master:
51
# interface: nrpe-external-master
59
# interface: local-monitors
63
# 2. Add the following to config.yaml
69
# Used by the nrpe subordinate charms.
70
# A string that will be prepended to instance name to set the host name
71
# in nagios. So for instance the hostname would be something like:
73
# If you're running multiple environments with the same services in them
74
# this allows you to differentiate between them.
75
# nagios_servicegroups:
79
# A comma-separated list of nagios servicegroups.
80
# If left empty, the nagios_context will be used as the servicegroup
82
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
84
# 4. Update your hooks.py with something like this:
86
# from charmsupport.nrpe import NRPE
88
# def update_nrpe_config():
89
# nrpe_compat = NRPE()
90
# nrpe_compat.add_check(
91
# shortname = "myservice",
92
# description = "Check MyService",
93
# check_cmd = "check_http -w 2 -c 10 http://localhost"
95
# nrpe_compat.add_check(
97
# "Check for widget failures",
98
# check_cmd = "/srv/myapp/scripts/widget_check"
100
# nrpe_compat.write()
102
# def config_changed():
104
# update_nrpe_config()
106
# def nrpe_external_master_relation_changed():
107
# update_nrpe_config()
109
# def local_monitors_relation_changed():
110
# update_nrpe_config()
112
# 4.a If your charm is a subordinate charm set primary=False
114
# from charmsupport.nrpe import NRPE
116
# def update_nrpe_config():
117
# nrpe_compat = NRPE(primary=False)
119
# 5. ln -s hooks.py nrpe-external-master-relation-changed
120
# ln -s hooks.py local-monitors-relation-changed
123
class CheckException(Exception):
128
shortname_re = '[A-Za-z0-9-_]+$'
129
service_template = ("""
130
#---------------------------------------------------
131
# This file is Juju managed
132
#---------------------------------------------------
135
host_name {nagios_hostname}
136
service_description {nagios_hostname}[{shortname}] """
138
check_command check_nrpe!{command}
139
servicegroups {nagios_servicegroup}
143
def __init__(self, shortname, description, check_cmd):
144
super(Check, self).__init__()
145
# XXX: could be better to calculate this from the service name
146
if not re.match(self.shortname_re, shortname):
147
raise CheckException("shortname must match {}".format(
149
self.shortname = shortname
150
self.command = "check_{}".format(shortname)
151
# Note: a set of invalid characters is defined by the
152
# Nagios server config
153
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
154
self.description = description
155
self.check_cmd = self._locate_cmd(check_cmd)
157
def _get_check_filename(self):
158
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
160
def _get_service_filename(self, hostname):
161
return os.path.join(NRPE.nagios_exportdir,
162
'service__{}_{}.cfg'.format(hostname, self.command))
164
def _locate_cmd(self, check_cmd):
166
'/usr/lib/nagios/plugins',
167
'/usr/local/lib/nagios/plugins',
169
parts = shlex.split(check_cmd)
170
for path in search_path:
171
if os.path.exists(os.path.join(path, parts[0])):
172
command = os.path.join(path, parts[0])
174
command += " " + " ".join(parts[1:])
176
log('Check command not found: {}'.format(parts[0]))
179
def _remove_service_files(self):
180
if not os.path.exists(NRPE.nagios_exportdir):
182
for f in os.listdir(NRPE.nagios_exportdir):
183
if f.endswith('_{}.cfg'.format(self.command)):
184
os.remove(os.path.join(NRPE.nagios_exportdir, f))
186
def remove(self, hostname):
187
nrpe_check_file = self._get_check_filename()
188
if os.path.exists(nrpe_check_file):
189
os.remove(nrpe_check_file)
190
self._remove_service_files()
192
def write(self, nagios_context, hostname, nagios_servicegroups):
193
nrpe_check_file = self._get_check_filename()
194
with open(nrpe_check_file, 'w') as nrpe_check_config:
195
nrpe_check_config.write("# check {}\n".format(self.shortname))
196
nrpe_check_config.write("command[{}]={}\n".format(
197
self.command, self.check_cmd))
199
if not os.path.exists(NRPE.nagios_exportdir):
200
log('Not writing service config as {} is not accessible'.format(
201
NRPE.nagios_exportdir))
203
self.write_service_config(nagios_context, hostname,
204
nagios_servicegroups)
206
def write_service_config(self, nagios_context, hostname,
207
nagios_servicegroups):
208
self._remove_service_files()
211
'nagios_hostname': hostname,
212
'nagios_servicegroup': nagios_servicegroups,
213
'description': self.description,
214
'shortname': self.shortname,
215
'command': self.command,
217
nrpe_service_text = Check.service_template.format(**templ_vars)
218
nrpe_service_file = self._get_service_filename(hostname)
219
with open(nrpe_service_file, 'w') as nrpe_service_config:
220
nrpe_service_config.write(str(nrpe_service_text))
223
subprocess.call(self.check_cmd)
227
nagios_logdir = '/var/log/nagios'
228
nagios_exportdir = '/var/lib/nagios/export'
229
nrpe_confdir = '/etc/nagios/nrpe.d'
230
homedir = '/var/lib/nagios' # home dir provided by nagios-nrpe-server
232
def __init__(self, hostname=None, primary=True):
233
super(NRPE, self).__init__()
234
self.config = config()
235
self.primary = primary
236
self.nagios_context = self.config['nagios_context']
237
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
238
self.nagios_servicegroups = self.config['nagios_servicegroups']
240
self.nagios_servicegroups = self.nagios_context
241
self.unit_name = local_unit().replace('/', '-')
243
self.hostname = hostname
245
nagios_hostname = get_nagios_hostname()
247
self.hostname = nagios_hostname
249
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
251
# Iff in an nrpe-external-master relation hook, set primary status
252
relation = relation_ids('nrpe-external-master')
254
log("Setting charm primary status {}".format(primary))
255
for rid in relation_ids('nrpe-external-master'):
256
relation_set(relation_id=rid, relation_settings={'primary': self.primary})
258
def add_check(self, *args, **kwargs):
259
self.checks.append(Check(*args, **kwargs))
261
def remove_check(self, *args, **kwargs):
262
if kwargs.get('shortname') is None:
263
raise ValueError('shortname of check must be specified')
265
# Use sensible defaults if they're not specified - these are not
266
# actually used during removal, but they're required for constructing
267
# the Check object; check_disk is chosen because it's part of the
268
# nagios-plugins-basic package.
269
if kwargs.get('check_cmd') is None:
270
kwargs['check_cmd'] = 'check_disk'
271
if kwargs.get('description') is None:
272
kwargs['description'] = ''
274
check = Check(*args, **kwargs)
275
check.remove(self.hostname)
279
nagios_uid = pwd.getpwnam('nagios').pw_uid
280
nagios_gid = grp.getgrnam('nagios').gr_gid
282
log("Nagios user not set up, nrpe checks not updated")
285
if not os.path.exists(NRPE.nagios_logdir):
286
os.mkdir(NRPE.nagios_logdir)
287
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
290
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
291
for nrpecheck in self.checks:
292
nrpecheck.write(self.nagios_context, self.hostname,
293
self.nagios_servicegroups)
294
nrpe_monitors[nrpecheck.shortname] = {
295
"command": nrpecheck.command,
298
service('restart', 'nagios-nrpe-server')
300
monitor_ids = relation_ids("local-monitors") + \
301
relation_ids("nrpe-external-master")
302
for rid in monitor_ids:
303
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
306
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
308
Query relation with nrpe subordinate, return the nagios_host_context
310
:param str relation_name: Name of relation nrpe sub joined to
312
for rel in relations_of_type(relation_name):
313
if 'nagios_host_context' in rel:
314
return rel['nagios_host_context']
317
def get_nagios_hostname(relation_name='nrpe-external-master'):
319
Query relation with nrpe subordinate, return the nagios_hostname
321
:param str relation_name: Name of relation nrpe sub joined to
323
for rel in relations_of_type(relation_name):
324
if 'nagios_hostname' in rel:
325
return rel['nagios_hostname']
328
def get_nagios_unit_name(relation_name='nrpe-external-master'):
330
Return the nagios unit name prepended with host_context if needed
332
:param str relation_name: Name of relation nrpe sub joined to
334
host_context = get_nagios_hostcontext(relation_name)
336
unit = "%s:%s" % (host_context, local_unit())
342
def add_init_service_checks(nrpe, services, unit_name, immediate_check=True):
344
Add checks for each service in list
346
:param NRPE nrpe: NRPE object to add check to
347
:param list services: List of services to check
348
:param str unit_name: Unit name to use in check description
349
:param bool immediate_check: For sysv init, run the service check immediately
352
# Don't add a check for these services from neutron-gateway
353
if svc in ['ext-port', 'os-charm-phy-nic-mtu']:
356
upstart_init = '/etc/init/%s.conf' % svc
357
sysv_init = '/etc/init.d/%s' % svc
359
if host.init_is_systemd():
362
description='process check {%s}' % unit_name,
363
check_cmd='check_systemd.py %s' % svc
365
elif os.path.exists(upstart_init):
368
description='process check {%s}' % unit_name,
369
check_cmd='check_upstart_job %s' % svc
371
elif os.path.exists(sysv_init):
372
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
373
checkpath = '%s/service-check-%s.txt' % (nrpe.homedir, svc)
375
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
376
'-s /etc/init.d/%s status' % svc
378
cron_file = '*/5 * * * * root %s > %s\n' % (croncmd, checkpath)
379
f = open(cronpath, 'w')
384
description='service check {%s}' % unit_name,
385
check_cmd='check_status_file.py -f %s' % checkpath,
387
# if /var/lib/nagios doesn't exist open(checkpath, 'w') will fail
389
if immediate_check and os.path.isdir(nrpe.homedir):
390
f = open(checkpath, 'w')
394
stderr=subprocess.STDOUT
397
os.chmod(checkpath, 0o644)
400
def copy_nrpe_checks():
402
Copy the nrpe checks into place
405
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
406
nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
407
'charmhelpers', 'contrib', 'openstack',
410
if not os.path.exists(NAGIOS_PLUGINS):
411
os.makedirs(NAGIOS_PLUGINS)
412
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
413
if os.path.isfile(fname):
415
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
418
def add_haproxy_checks(nrpe, unit_name):
420
Add checks for each service in list
422
:param NRPE nrpe: NRPE object to add check to
423
:param str unit_name: Unit name to use in check description
426
shortname='haproxy_servers',
427
description='Check HAProxy {%s}' % unit_name,
428
check_cmd='check_haproxy.sh')
430
shortname='haproxy_queue',
431
description='Check HAProxy queue depth {%s}' % unit_name,
432
check_cmd='check_haproxy_queue_depth.sh')