1
# Copyright 2014-2015 Canonical Limited.
3
# This file is part of charm-helpers.
5
# charm-helpers is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU Lesser General Public License version 3 as
7
# published by the Free Software Foundation.
9
# charm-helpers is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU Lesser General Public License for more details.
14
# You should have received a copy of the GNU Lesser General Public License
15
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
17
"""Compatibility with the nrpe-external-master charm"""
18
# Copyright 2012 Canonical Ltd.
21
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
33
from charmhelpers.core.hookenv import (
42
from charmhelpers.core.host import service
44
# This module adds compatibility with the nrpe-external-master and plain nrpe
45
# subordinate charms. To use it in your charm:
47
# 1. Update metadata.yaml
51
# nrpe-external-master:
52
# interface: nrpe-external-master
60
# interface: local-monitors
64
# 2. Add the following to config.yaml
70
# Used by the nrpe subordinate charms.
71
# A string that will be prepended to instance name to set the host name
72
# in nagios. So for instance the hostname would be something like:
74
# If you're running multiple environments with the same services in them
75
# this allows you to differentiate between them.
76
# nagios_servicegroups:
80
# A comma-separated list of nagios servicegroups.
81
# If left empty, the nagios_context will be used as the servicegroup
83
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
85
# 4. Update your hooks.py with something like this:
87
# from charmsupport.nrpe import NRPE
89
# def update_nrpe_config():
90
# nrpe_compat = NRPE()
91
# nrpe_compat.add_check(
92
# shortname = "myservice",
93
# description = "Check MyService",
94
# check_cmd = "check_http -w 2 -c 10 http://localhost"
96
# nrpe_compat.add_check(
98
# "Check for widget failures",
99
# check_cmd = "/srv/myapp/scripts/widget_check"
101
# nrpe_compat.write()
103
# def config_changed():
105
# update_nrpe_config()
107
# def nrpe_external_master_relation_changed():
108
# update_nrpe_config()
110
# def local_monitors_relation_changed():
111
# update_nrpe_config()
113
# 5. ln -s hooks.py nrpe-external-master-relation-changed
114
# ln -s hooks.py local-monitors-relation-changed
117
class CheckException(Exception):
122
shortname_re = '[A-Za-z0-9-_]+$'
123
service_template = ("""
124
#---------------------------------------------------
125
# This file is Juju managed
126
#---------------------------------------------------
129
host_name {nagios_hostname}
130
service_description {nagios_hostname}[{shortname}] """
132
check_command check_nrpe!{command}
133
servicegroups {nagios_servicegroup}
137
def __init__(self, shortname, description, check_cmd):
138
super(Check, self).__init__()
139
# XXX: could be better to calculate this from the service name
140
if not re.match(self.shortname_re, shortname):
141
raise CheckException("shortname must match {}".format(
143
self.shortname = shortname
144
self.command = "check_{}".format(shortname)
145
# Note: a set of invalid characters is defined by the
146
# Nagios server config
147
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
148
self.description = description
149
self.check_cmd = self._locate_cmd(check_cmd)
151
def _get_check_filename(self):
152
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
154
def _get_service_filename(self, hostname):
155
return os.path.join(NRPE.nagios_exportdir,
156
'service__{}_{}.cfg'.format(hostname, self.command))
158
def _locate_cmd(self, check_cmd):
160
'/usr/lib/nagios/plugins',
161
'/usr/local/lib/nagios/plugins',
163
parts = shlex.split(check_cmd)
164
for path in search_path:
165
if os.path.exists(os.path.join(path, parts[0])):
166
command = os.path.join(path, parts[0])
168
command += " " + " ".join(parts[1:])
170
log('Check command not found: {}'.format(parts[0]))
173
def _remove_service_files(self):
174
if not os.path.exists(NRPE.nagios_exportdir):
176
for f in os.listdir(NRPE.nagios_exportdir):
177
if f.endswith('_{}.cfg'.format(self.command)):
178
os.remove(os.path.join(NRPE.nagios_exportdir, f))
180
def remove(self, hostname):
181
nrpe_check_file = self._get_check_filename()
182
if os.path.exists(nrpe_check_file):
183
os.remove(nrpe_check_file)
184
self._remove_service_files()
186
def write(self, nagios_context, hostname, nagios_servicegroups):
187
nrpe_check_file = self._get_check_filename()
188
with open(nrpe_check_file, 'w') as nrpe_check_config:
189
nrpe_check_config.write("# check {}\n".format(self.shortname))
190
nrpe_check_config.write("command[{}]={}\n".format(
191
self.command, self.check_cmd))
193
if not os.path.exists(NRPE.nagios_exportdir):
194
log('Not writing service config as {} is not accessible'.format(
195
NRPE.nagios_exportdir))
197
self.write_service_config(nagios_context, hostname,
198
nagios_servicegroups)
200
def write_service_config(self, nagios_context, hostname,
201
nagios_servicegroups):
202
self._remove_service_files()
205
'nagios_hostname': hostname,
206
'nagios_servicegroup': nagios_servicegroups,
207
'description': self.description,
208
'shortname': self.shortname,
209
'command': self.command,
211
nrpe_service_text = Check.service_template.format(**templ_vars)
212
nrpe_service_file = self._get_service_filename(hostname)
213
with open(nrpe_service_file, 'w') as nrpe_service_config:
214
nrpe_service_config.write(str(nrpe_service_text))
217
subprocess.call(self.check_cmd)
221
nagios_logdir = '/var/log/nagios'
222
nagios_exportdir = '/var/lib/nagios/export'
223
nrpe_confdir = '/etc/nagios/nrpe.d'
225
def __init__(self, hostname=None):
226
super(NRPE, self).__init__()
227
self.config = config()
228
self.nagios_context = self.config['nagios_context']
229
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
230
self.nagios_servicegroups = self.config['nagios_servicegroups']
232
self.nagios_servicegroups = self.nagios_context
233
self.unit_name = local_unit().replace('/', '-')
235
self.hostname = hostname
237
nagios_hostname = get_nagios_hostname()
239
self.hostname = nagios_hostname
241
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
244
def add_check(self, *args, **kwargs):
245
self.checks.append(Check(*args, **kwargs))
247
def remove_check(self, *args, **kwargs):
248
if kwargs.get('shortname') is None:
249
raise ValueError('shortname of check must be specified')
251
# Use sensible defaults if they're not specified - these are not
252
# actually used during removal, but they're required for constructing
253
# the Check object; check_disk is chosen because it's part of the
254
# nagios-plugins-basic package.
255
if kwargs.get('check_cmd') is None:
256
kwargs['check_cmd'] = 'check_disk'
257
if kwargs.get('description') is None:
258
kwargs['description'] = ''
260
check = Check(*args, **kwargs)
261
check.remove(self.hostname)
265
nagios_uid = pwd.getpwnam('nagios').pw_uid
266
nagios_gid = grp.getgrnam('nagios').gr_gid
268
log("Nagios user not set up, nrpe checks not updated")
271
if not os.path.exists(NRPE.nagios_logdir):
272
os.mkdir(NRPE.nagios_logdir)
273
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
276
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
277
for nrpecheck in self.checks:
278
nrpecheck.write(self.nagios_context, self.hostname,
279
self.nagios_servicegroups)
280
nrpe_monitors[nrpecheck.shortname] = {
281
"command": nrpecheck.command,
284
service('restart', 'nagios-nrpe-server')
286
monitor_ids = relation_ids("local-monitors") + \
287
relation_ids("nrpe-external-master")
288
for rid in monitor_ids:
289
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
292
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
294
Query relation with nrpe subordinate, return the nagios_host_context
296
:param str relation_name: Name of relation nrpe sub joined to
298
for rel in relations_of_type(relation_name):
299
if 'nagios_host_context' in rel:
300
return rel['nagios_host_context']
303
def get_nagios_hostname(relation_name='nrpe-external-master'):
305
Query relation with nrpe subordinate, return the nagios_hostname
307
:param str relation_name: Name of relation nrpe sub joined to
309
for rel in relations_of_type(relation_name):
310
if 'nagios_hostname' in rel:
311
return rel['nagios_hostname']
314
def get_nagios_unit_name(relation_name='nrpe-external-master'):
316
Return the nagios unit name prepended with host_context if needed
318
:param str relation_name: Name of relation nrpe sub joined to
320
host_context = get_nagios_hostcontext(relation_name)
322
unit = "%s:%s" % (host_context, local_unit())
328
def add_init_service_checks(nrpe, services, unit_name):
330
Add checks for each service in list
332
:param NRPE nrpe: NRPE object to add check to
333
:param list services: List of services to check
334
:param str unit_name: Unit name to use in check description
337
upstart_init = '/etc/init/%s.conf' % svc
338
sysv_init = '/etc/init.d/%s' % svc
339
if os.path.exists(upstart_init):
340
# Don't add a check for these services from neutron-gateway
341
if svc not in ['ext-port', 'os-charm-phy-nic-mtu']:
344
description='process check {%s}' % unit_name,
345
check_cmd='check_upstart_job %s' % svc
347
elif os.path.exists(sysv_init):
348
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
349
cron_file = ('*/5 * * * * root '
350
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
351
'-s /etc/init.d/%s status > '
352
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
355
f = open(cronpath, 'w')
360
description='process check {%s}' % unit_name,
361
check_cmd='check_status_file.py -f '
362
'/var/lib/nagios/service-check-%s.txt' % svc,
366
def copy_nrpe_checks():
368
Copy the nrpe checks into place
371
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
372
nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
373
'charmhelpers', 'contrib', 'openstack',
376
if not os.path.exists(NAGIOS_PLUGINS):
377
os.makedirs(NAGIOS_PLUGINS)
378
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
379
if os.path.isfile(fname):
381
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
384
def add_haproxy_checks(nrpe, unit_name):
386
Add checks for each service in list
388
:param NRPE nrpe: NRPE object to add check to
389
:param str unit_name: Unit name to use in check description
392
shortname='haproxy_servers',
393
description='Check HAProxy {%s}' % unit_name,
394
check_cmd='check_haproxy.sh')
396
shortname='haproxy_queue',
397
description='Check HAProxy queue depth {%s}' % unit_name,
398
check_cmd='check_haproxy_queue_depth.sh')