135
by James Page
Fix liberty/mitaka typo from previous test definition update batch. |
1 |
# Copyright 2014-2015 Canonical Limited.
|
2 |
#
|
|
3 |
# This file is part of charm-helpers.
|
|
4 |
#
|
|
5 |
# charm-helpers is free software: you can redistribute it and/or modify
|
|
6 |
# it under the terms of the GNU Lesser General Public License version 3 as
|
|
7 |
# published by the Free Software Foundation.
|
|
8 |
#
|
|
9 |
# charm-helpers is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU Lesser General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU Lesser General Public License
|
|
15 |
# along with charm-helpers. If not, see <http://www.gnu.org/licenses/>.
|
|
16 |
||
17 |
"""Compatibility with the nrpe-external-master charm"""
|
|
18 |
# Copyright 2012 Canonical Ltd.
|
|
19 |
#
|
|
20 |
# Authors:
|
|
21 |
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
|
|
22 |
||
23 |
import subprocess |
|
24 |
import pwd |
|
25 |
import grp |
|
26 |
import os |
|
27 |
import glob |
|
28 |
import shutil |
|
29 |
import re |
|
30 |
import shlex |
|
31 |
import yaml |
|
32 |
||
33 |
from charmhelpers.core.hookenv import ( |
|
34 |
config, |
|
35 |
local_unit, |
|
36 |
log, |
|
37 |
relation_ids, |
|
38 |
relation_set, |
|
39 |
relations_of_type, |
|
40 |
)
|
|
41 |
||
42 |
from charmhelpers.core.host import service |
|
43 |
||
44 |
# This module adds compatibility with the nrpe-external-master and plain nrpe
|
|
45 |
# subordinate charms. To use it in your charm:
|
|
46 |
#
|
|
47 |
# 1. Update metadata.yaml
|
|
48 |
#
|
|
49 |
# provides:
|
|
50 |
# (...)
|
|
51 |
# nrpe-external-master:
|
|
52 |
# interface: nrpe-external-master
|
|
53 |
# scope: container
|
|
54 |
#
|
|
55 |
# and/or
|
|
56 |
#
|
|
57 |
# provides:
|
|
58 |
# (...)
|
|
59 |
# local-monitors:
|
|
60 |
# interface: local-monitors
|
|
61 |
# scope: container
|
|
62 |
||
63 |
#
|
|
64 |
# 2. Add the following to config.yaml
|
|
65 |
#
|
|
66 |
# nagios_context:
|
|
67 |
# default: "juju"
|
|
68 |
# type: string
|
|
69 |
# description: |
|
|
70 |
# Used by the nrpe subordinate charms.
|
|
71 |
# A string that will be prepended to instance name to set the host name
|
|
72 |
# in nagios. So for instance the hostname would be something like:
|
|
73 |
# juju-myservice-0
|
|
74 |
# If you're running multiple environments with the same services in them
|
|
75 |
# this allows you to differentiate between them.
|
|
76 |
# nagios_servicegroups:
|
|
77 |
# default: ""
|
|
78 |
# type: string
|
|
79 |
# description: |
|
|
80 |
# A comma-separated list of nagios servicegroups.
|
|
81 |
# If left empty, the nagios_context will be used as the servicegroup
|
|
82 |
#
|
|
83 |
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
|
|
84 |
#
|
|
85 |
# 4. Update your hooks.py with something like this:
|
|
86 |
#
|
|
87 |
# from charmsupport.nrpe import NRPE
|
|
88 |
# (...)
|
|
89 |
# def update_nrpe_config():
|
|
90 |
# nrpe_compat = NRPE()
|
|
91 |
# nrpe_compat.add_check(
|
|
92 |
# shortname = "myservice",
|
|
93 |
# description = "Check MyService",
|
|
94 |
# check_cmd = "check_http -w 2 -c 10 http://localhost"
|
|
95 |
# )
|
|
96 |
# nrpe_compat.add_check(
|
|
97 |
# "myservice_other",
|
|
98 |
# "Check for widget failures",
|
|
99 |
# check_cmd = "/srv/myapp/scripts/widget_check"
|
|
100 |
# )
|
|
101 |
# nrpe_compat.write()
|
|
102 |
#
|
|
103 |
# def config_changed():
|
|
104 |
# (...)
|
|
105 |
# update_nrpe_config()
|
|
106 |
#
|
|
107 |
# def nrpe_external_master_relation_changed():
|
|
108 |
# update_nrpe_config()
|
|
109 |
#
|
|
110 |
# def local_monitors_relation_changed():
|
|
111 |
# update_nrpe_config()
|
|
112 |
#
|
|
113 |
# 5. ln -s hooks.py nrpe-external-master-relation-changed
|
|
114 |
# ln -s hooks.py local-monitors-relation-changed
|
|
115 |
||
116 |
||
117 |
class CheckException(Exception): |
|
118 |
pass
|
|
119 |
||
120 |
||
121 |
class Check(object): |
|
122 |
shortname_re = '[A-Za-z0-9-_]+$' |
|
123 |
service_template = (""" |
|
124 |
#---------------------------------------------------
|
|
125 |
# This file is Juju managed
|
|
126 |
#---------------------------------------------------
|
|
127 |
define service {{
|
|
128 |
use active-service
|
|
129 |
host_name {nagios_hostname} |
|
130 |
service_description {nagios_hostname}[{shortname}] """ |
|
131 |
"""{description}
|
|
132 |
check_command check_nrpe!{command}
|
|
133 |
servicegroups {nagios_servicegroup}
|
|
134 |
}}
|
|
135 |
""") |
|
136 |
||
137 |
def __init__(self, shortname, description, check_cmd): |
|
138 |
super(Check, self).__init__() |
|
139 |
# XXX: could be better to calculate this from the service name
|
|
140 |
if not re.match(self.shortname_re, shortname): |
|
141 |
raise CheckException("shortname must match {}".format( |
|
142 |
Check.shortname_re)) |
|
143 |
self.shortname = shortname |
|
144 |
self.command = "check_{}".format(shortname) |
|
145 |
# Note: a set of invalid characters is defined by the
|
|
146 |
# Nagios server config
|
|
147 |
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
|
|
148 |
self.description = description |
|
149 |
self.check_cmd = self._locate_cmd(check_cmd) |
|
150 |
||
151 |
def _get_check_filename(self): |
|
152 |
return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command)) |
|
153 |
||
154 |
def _get_service_filename(self, hostname): |
|
155 |
return os.path.join(NRPE.nagios_exportdir, |
|
156 |
'service__{}_{}.cfg'.format(hostname, self.command)) |
|
157 |
||
158 |
def _locate_cmd(self, check_cmd): |
|
159 |
search_path = ( |
|
160 |
'/usr/lib/nagios/plugins', |
|
161 |
'/usr/local/lib/nagios/plugins', |
|
162 |
)
|
|
163 |
parts = shlex.split(check_cmd) |
|
164 |
for path in search_path: |
|
165 |
if os.path.exists(os.path.join(path, parts[0])): |
|
166 |
command = os.path.join(path, parts[0]) |
|
167 |
if len(parts) > 1: |
|
168 |
command += " " + " ".join(parts[1:]) |
|
169 |
return command |
|
170 |
log('Check command not found: {}'.format(parts[0])) |
|
171 |
return '' |
|
172 |
||
173 |
def _remove_service_files(self): |
|
174 |
if not os.path.exists(NRPE.nagios_exportdir): |
|
175 |
return
|
|
176 |
for f in os.listdir(NRPE.nagios_exportdir): |
|
177 |
if f.endswith('_{}.cfg'.format(self.command)): |
|
178 |
os.remove(os.path.join(NRPE.nagios_exportdir, f)) |
|
179 |
||
180 |
def remove(self, hostname): |
|
181 |
nrpe_check_file = self._get_check_filename() |
|
182 |
if os.path.exists(nrpe_check_file): |
|
183 |
os.remove(nrpe_check_file) |
|
184 |
self._remove_service_files() |
|
185 |
||
186 |
def write(self, nagios_context, hostname, nagios_servicegroups): |
|
187 |
nrpe_check_file = self._get_check_filename() |
|
188 |
with open(nrpe_check_file, 'w') as nrpe_check_config: |
|
189 |
nrpe_check_config.write("# check {}\n".format(self.shortname)) |
|
190 |
nrpe_check_config.write("command[{}]={}\n".format( |
|
191 |
self.command, self.check_cmd)) |
|
192 |
||
193 |
if not os.path.exists(NRPE.nagios_exportdir): |
|
194 |
log('Not writing service config as {} is not accessible'.format( |
|
195 |
NRPE.nagios_exportdir)) |
|
196 |
else: |
|
197 |
self.write_service_config(nagios_context, hostname, |
|
198 |
nagios_servicegroups) |
|
199 |
||
200 |
def write_service_config(self, nagios_context, hostname, |
|
201 |
nagios_servicegroups): |
|
202 |
self._remove_service_files() |
|
203 |
||
204 |
templ_vars = { |
|
205 |
'nagios_hostname': hostname, |
|
206 |
'nagios_servicegroup': nagios_servicegroups, |
|
207 |
'description': self.description, |
|
208 |
'shortname': self.shortname, |
|
209 |
'command': self.command, |
|
210 |
}
|
|
211 |
nrpe_service_text = Check.service_template.format(**templ_vars) |
|
212 |
nrpe_service_file = self._get_service_filename(hostname) |
|
213 |
with open(nrpe_service_file, 'w') as nrpe_service_config: |
|
214 |
nrpe_service_config.write(str(nrpe_service_text)) |
|
215 |
||
216 |
def run(self): |
|
217 |
subprocess.call(self.check_cmd) |
|
218 |
||
219 |
||
220 |
class NRPE(object): |
|
221 |
nagios_logdir = '/var/log/nagios' |
|
222 |
nagios_exportdir = '/var/lib/nagios/export' |
|
223 |
nrpe_confdir = '/etc/nagios/nrpe.d' |
|
224 |
||
225 |
def __init__(self, hostname=None): |
|
226 |
super(NRPE, self).__init__() |
|
227 |
self.config = config() |
|
228 |
self.nagios_context = self.config['nagios_context'] |
|
229 |
if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']: |
|
230 |
self.nagios_servicegroups = self.config['nagios_servicegroups'] |
|
231 |
else: |
|
232 |
self.nagios_servicegroups = self.nagios_context |
|
233 |
self.unit_name = local_unit().replace('/', '-') |
|
234 |
if hostname: |
|
235 |
self.hostname = hostname |
|
236 |
else: |
|
237 |
nagios_hostname = get_nagios_hostname() |
|
238 |
if nagios_hostname: |
|
239 |
self.hostname = nagios_hostname |
|
240 |
else: |
|
241 |
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name) |
|
242 |
self.checks = [] |
|
243 |
||
244 |
def add_check(self, *args, **kwargs): |
|
245 |
self.checks.append(Check(*args, **kwargs)) |
|
246 |
||
247 |
def remove_check(self, *args, **kwargs): |
|
248 |
if kwargs.get('shortname') is None: |
|
249 |
raise ValueError('shortname of check must be specified') |
|
250 |
||
251 |
# Use sensible defaults if they're not specified - these are not
|
|
252 |
# actually used during removal, but they're required for constructing
|
|
253 |
# the Check object; check_disk is chosen because it's part of the
|
|
254 |
# nagios-plugins-basic package.
|
|
255 |
if kwargs.get('check_cmd') is None: |
|
256 |
kwargs['check_cmd'] = 'check_disk' |
|
257 |
if kwargs.get('description') is None: |
|
258 |
kwargs['description'] = '' |
|
259 |
||
260 |
check = Check(*args, **kwargs) |
|
261 |
check.remove(self.hostname) |
|
262 |
||
263 |
def write(self): |
|
264 |
try: |
|
265 |
nagios_uid = pwd.getpwnam('nagios').pw_uid |
|
266 |
nagios_gid = grp.getgrnam('nagios').gr_gid |
|
267 |
except: |
|
268 |
log("Nagios user not set up, nrpe checks not updated") |
|
269 |
return
|
|
270 |
||
271 |
if not os.path.exists(NRPE.nagios_logdir): |
|
272 |
os.mkdir(NRPE.nagios_logdir) |
|
273 |
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid) |
|
274 |
||
275 |
nrpe_monitors = {} |
|
276 |
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}} |
|
277 |
for nrpecheck in self.checks: |
|
278 |
nrpecheck.write(self.nagios_context, self.hostname, |
|
279 |
self.nagios_servicegroups) |
|
280 |
nrpe_monitors[nrpecheck.shortname] = { |
|
281 |
"command": nrpecheck.command, |
|
282 |
}
|
|
283 |
||
284 |
service('restart', 'nagios-nrpe-server') |
|
285 |
||
286 |
monitor_ids = relation_ids("local-monitors") + \ |
|
287 |
relation_ids("nrpe-external-master") |
|
288 |
for rid in monitor_ids: |
|
289 |
relation_set(relation_id=rid, monitors=yaml.dump(monitors)) |
|
290 |
||
291 |
||
292 |
def get_nagios_hostcontext(relation_name='nrpe-external-master'): |
|
293 |
"""
|
|
294 |
Query relation with nrpe subordinate, return the nagios_host_context
|
|
295 |
||
296 |
:param str relation_name: Name of relation nrpe sub joined to
|
|
297 |
"""
|
|
298 |
for rel in relations_of_type(relation_name): |
|
299 |
if 'nagios_host_context' in rel: |
|
300 |
return rel['nagios_host_context'] |
|
301 |
||
302 |
||
303 |
def get_nagios_hostname(relation_name='nrpe-external-master'): |
|
304 |
"""
|
|
305 |
Query relation with nrpe subordinate, return the nagios_hostname
|
|
306 |
||
307 |
:param str relation_name: Name of relation nrpe sub joined to
|
|
308 |
"""
|
|
309 |
for rel in relations_of_type(relation_name): |
|
310 |
if 'nagios_hostname' in rel: |
|
311 |
return rel['nagios_hostname'] |
|
312 |
||
313 |
||
314 |
def get_nagios_unit_name(relation_name='nrpe-external-master'): |
|
315 |
"""
|
|
316 |
Return the nagios unit name prepended with host_context if needed
|
|
317 |
||
318 |
:param str relation_name: Name of relation nrpe sub joined to
|
|
319 |
"""
|
|
320 |
host_context = get_nagios_hostcontext(relation_name) |
|
321 |
if host_context: |
|
322 |
unit = "%s:%s" % (host_context, local_unit()) |
|
323 |
else: |
|
324 |
unit = local_unit() |
|
325 |
return unit |
|
326 |
||
327 |
||
328 |
def add_init_service_checks(nrpe, services, unit_name): |
|
329 |
"""
|
|
330 |
Add checks for each service in list
|
|
331 |
||
332 |
:param NRPE nrpe: NRPE object to add check to
|
|
333 |
:param list services: List of services to check
|
|
334 |
:param str unit_name: Unit name to use in check description
|
|
335 |
"""
|
|
336 |
for svc in services: |
|
337 |
upstart_init = '/etc/init/%s.conf' % svc |
|
338 |
sysv_init = '/etc/init.d/%s' % svc |
|
339 |
if os.path.exists(upstart_init): |
|
340 |
# Don't add a check for these services from neutron-gateway
|
|
341 |
if svc not in ['ext-port', 'os-charm-phy-nic-mtu']: |
|
342 |
nrpe.add_check( |
|
343 |
shortname=svc, |
|
344 |
description='process check {%s}' % unit_name, |
|
345 |
check_cmd='check_upstart_job %s' % svc |
|
346 |
)
|
|
347 |
elif os.path.exists(sysv_init): |
|
348 |
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc |
|
349 |
cron_file = ('*/5 * * * * root ' |
|
350 |
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
|
|
351 |
'-s /etc/init.d/%s status > ' |
|
352 |
'/var/lib/nagios/service-check-%s.txt\n' % (svc, |
|
353 |
svc) |
|
354 |
)
|
|
355 |
f = open(cronpath, 'w') |
|
356 |
f.write(cron_file) |
|
357 |
f.close() |
|
358 |
nrpe.add_check( |
|
359 |
shortname=svc, |
|
360 |
description='process check {%s}' % unit_name, |
|
361 |
check_cmd='check_status_file.py -f ' |
|
362 |
'/var/lib/nagios/service-check-%s.txt' % svc, |
|
363 |
)
|
|
364 |
||
365 |
||
366 |
def copy_nrpe_checks(): |
|
367 |
"""
|
|
368 |
Copy the nrpe checks into place
|
|
369 |
||
370 |
"""
|
|
371 |
NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins' |
|
372 |
nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks', |
|
373 |
'charmhelpers', 'contrib', 'openstack', |
|
374 |
'files') |
|
375 |
||
376 |
if not os.path.exists(NAGIOS_PLUGINS): |
|
377 |
os.makedirs(NAGIOS_PLUGINS) |
|
378 |
for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")): |
|
379 |
if os.path.isfile(fname): |
|
380 |
shutil.copy2(fname, |
|
381 |
os.path.join(NAGIOS_PLUGINS, os.path.basename(fname))) |
|
382 |
||
383 |
||
384 |
def add_haproxy_checks(nrpe, unit_name): |
|
385 |
"""
|
|
386 |
Add checks for each service in list
|
|
387 |
||
388 |
:param NRPE nrpe: NRPE object to add check to
|
|
389 |
:param str unit_name: Unit name to use in check description
|
|
390 |
"""
|
|
391 |
nrpe.add_check( |
|
392 |
shortname='haproxy_servers', |
|
393 |
description='Check HAProxy {%s}' % unit_name, |
|
394 |
check_cmd='check_haproxy.sh') |
|
395 |
nrpe.add_check( |
|
396 |
shortname='haproxy_queue', |
|
397 |
description='Check HAProxy queue depth {%s}' % unit_name, |
|
398 |
check_cmd='check_haproxy_queue_depth.sh') |