1
# Copyright 2014-2015 Canonical Limited.
3
# This file is part of charm-helpers.
5
# charm-helpers is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU Lesser General Public License version 3 as
7
# published by the Free Software Foundation.
9
# charm-helpers is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
# GNU Lesser General Public License for more details.
14
# You should have received a copy of the GNU Lesser General Public License
15
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
17
"""Compatibility with the nrpe-external-master charm"""
18
# Copyright 2012 Canonical Ltd.
21
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
31
from charmhelpers.core.hookenv import (
40
from charmhelpers.core.host import service
42
# This module adds compatibility with the nrpe-external-master and plain nrpe
43
# subordinate charms. To use it in your charm:
45
# 1. Update metadata.yaml
49
# nrpe-external-master:
50
# interface: nrpe-external-master
58
# interface: local-monitors
62
# 2. Add the following to config.yaml
68
# Used by the nrpe subordinate charms.
69
# A string that will be prepended to instance name to set the host name
70
# in nagios. So for instance the hostname would be something like:
72
# If you're running multiple environments with the same services in them
73
# this allows you to differentiate between them.
74
# nagios_servicegroups:
78
# A comma-separated list of nagios servicegroups.
79
# If left empty, the nagios_context will be used as the servicegroup
81
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
83
# 4. Update your hooks.py with something like this:
85
# from charmsupport.nrpe import NRPE
87
# def update_nrpe_config():
88
# nrpe_compat = NRPE()
89
# nrpe_compat.add_check(
90
# shortname = "myservice",
91
# description = "Check MyService",
92
# check_cmd = "check_http -w 2 -c 10 http://localhost"
94
# nrpe_compat.add_check(
96
# "Check for widget failures",
97
# check_cmd = "/srv/myapp/scripts/widget_check"
101
# def config_changed():
103
# update_nrpe_config()
105
# def nrpe_external_master_relation_changed():
106
# update_nrpe_config()
108
# def local_monitors_relation_changed():
109
# update_nrpe_config()
111
# 5. ln -s hooks.py nrpe-external-master-relation-changed
112
# ln -s hooks.py local-monitors-relation-changed
115
class CheckException(Exception):
120
shortname_re = '[A-Za-z0-9-_]+$'
121
service_template = ("""
122
#---------------------------------------------------
123
# This file is Juju managed
124
#---------------------------------------------------
127
host_name {nagios_hostname}
128
service_description {nagios_hostname}[{shortname}] """
130
check_command check_nrpe!{command}
131
servicegroups {nagios_servicegroup}
135
def __init__(self, shortname, description, check_cmd):
136
super(Check, self).__init__()
137
# XXX: could be better to calculate this from the service name
138
if not re.match(self.shortname_re, shortname):
139
raise CheckException("shortname must match {}".format(
141
self.shortname = shortname
142
self.command = "check_{}".format(shortname)
143
# Note: a set of invalid characters is defined by the
144
# Nagios server config
145
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
146
self.description = description
147
self.check_cmd = self._locate_cmd(check_cmd)
149
def _locate_cmd(self, check_cmd):
151
'/usr/lib/nagios/plugins',
152
'/usr/local/lib/nagios/plugins',
154
parts = shlex.split(check_cmd)
155
for path in search_path:
156
if os.path.exists(os.path.join(path, parts[0])):
157
command = os.path.join(path, parts[0])
159
command += " " + " ".join(parts[1:])
161
log('Check command not found: {}'.format(parts[0]))
164
def write(self, nagios_context, hostname, nagios_servicegroups=None):
165
nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
167
with open(nrpe_check_file, 'w') as nrpe_check_config:
168
nrpe_check_config.write("# check {}\n".format(self.shortname))
169
nrpe_check_config.write("command[{}]={}\n".format(
170
self.command, self.check_cmd))
172
if not os.path.exists(NRPE.nagios_exportdir):
173
log('Not writing service config as {} is not accessible'.format(
174
NRPE.nagios_exportdir))
176
self.write_service_config(nagios_context, hostname,
177
nagios_servicegroups)
179
def write_service_config(self, nagios_context, hostname,
180
nagios_servicegroups=None):
181
for f in os.listdir(NRPE.nagios_exportdir):
182
if re.search('.*{}.cfg'.format(self.command), f):
183
os.remove(os.path.join(NRPE.nagios_exportdir, f))
185
if not nagios_servicegroups:
186
nagios_servicegroups = nagios_context
189
'nagios_hostname': hostname,
190
'nagios_servicegroup': nagios_servicegroups,
191
'description': self.description,
192
'shortname': self.shortname,
193
'command': self.command,
195
nrpe_service_text = Check.service_template.format(**templ_vars)
196
nrpe_service_file = '{}/service__{}_{}.cfg'.format(
197
NRPE.nagios_exportdir, hostname, self.command)
198
with open(nrpe_service_file, 'w') as nrpe_service_config:
199
nrpe_service_config.write(str(nrpe_service_text))
202
subprocess.call(self.check_cmd)
206
nagios_logdir = '/var/log/nagios'
207
nagios_exportdir = '/var/lib/nagios/export'
208
nrpe_confdir = '/etc/nagios/nrpe.d'
210
def __init__(self, hostname=None):
211
super(NRPE, self).__init__()
212
self.config = config()
213
self.nagios_context = self.config['nagios_context']
214
if 'nagios_servicegroups' in self.config:
215
self.nagios_servicegroups = self.config['nagios_servicegroups']
217
self.nagios_servicegroups = 'juju'
218
self.unit_name = local_unit().replace('/', '-')
220
self.hostname = hostname
222
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
225
def add_check(self, *args, **kwargs):
226
self.checks.append(Check(*args, **kwargs))
230
nagios_uid = pwd.getpwnam('nagios').pw_uid
231
nagios_gid = grp.getgrnam('nagios').gr_gid
233
log("Nagios user not set up, nrpe checks not updated")
236
if not os.path.exists(NRPE.nagios_logdir):
237
os.mkdir(NRPE.nagios_logdir)
238
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
241
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
242
for nrpecheck in self.checks:
243
nrpecheck.write(self.nagios_context, self.hostname,
244
self.nagios_servicegroups)
245
nrpe_monitors[nrpecheck.shortname] = {
246
"command": nrpecheck.command,
249
service('restart', 'nagios-nrpe-server')
251
for rid in relation_ids("local-monitors"):
252
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
255
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
257
Query relation with nrpe subordinate, return the nagios_host_context
259
:param str relation_name: Name of relation nrpe sub joined to
261
for rel in relations_of_type(relation_name):
262
if 'nagios_hostname' in rel:
263
return rel['nagios_host_context']
266
def get_nagios_hostname(relation_name='nrpe-external-master'):
268
Query relation with nrpe subordinate, return the nagios_hostname
270
:param str relation_name: Name of relation nrpe sub joined to
272
for rel in relations_of_type(relation_name):
273
if 'nagios_hostname' in rel:
274
return rel['nagios_hostname']
277
def get_nagios_unit_name(relation_name='nrpe-external-master'):
279
Return the nagios unit name prepended with host_context if needed
281
:param str relation_name: Name of relation nrpe sub joined to
283
host_context = get_nagios_hostcontext(relation_name)
285
unit = "%s:%s" % (host_context, local_unit())
291
def add_init_service_checks(nrpe, services, unit_name):
293
Add checks for each service in list
295
:param NRPE nrpe: NRPE object to add check to
296
:param list services: List of services to check
297
:param str unit_name: Unit name to use in check description
300
upstart_init = '/etc/init/%s.conf' % svc
301
sysv_init = '/etc/init.d/%s' % svc
302
if os.path.exists(upstart_init):
305
description='process check {%s}' % unit_name,
306
check_cmd='check_upstart_job %s' % svc
308
elif os.path.exists(sysv_init):
309
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
310
cron_file = ('*/5 * * * * root '
311
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
312
'-s /etc/init.d/%s status > '
313
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
316
f = open(cronpath, 'w')
321
description='process check {%s}' % unit_name,
322
check_cmd='check_status_file.py -f '
323
'/var/lib/nagios/service-check-%s.txt' % svc,