~james-page/charms/trusty/swift-proxy/trunk

135 by James Page
Fix liberty/mitaka typo from previous test definition update batch.
1
# Copyright 2014-2015 Canonical Limited.
2
#
3
# This file is part of charm-helpers.
4
#
5
# charm-helpers is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU Lesser General Public License version 3 as
7
# published by the Free Software Foundation.
8
#
9
# charm-helpers is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Lesser General Public License for more details.
13
#
14
# You should have received a copy of the GNU Lesser General Public License
15
# along with charm-helpers.  If not, see <http://www.gnu.org/licenses/>.
16
17
"""Compatibility with the nrpe-external-master charm"""
18
# Copyright 2012 Canonical Ltd.
19
#
20
# Authors:
21
#  Matthew Wedgwood <matthew.wedgwood@canonical.com>
22
23
import subprocess
24
import pwd
25
import grp
26
import os
27
import glob
28
import shutil
29
import re
30
import shlex
31
import yaml
32
33
from charmhelpers.core.hookenv import (
34
    config,
35
    local_unit,
36
    log,
37
    relation_ids,
38
    relation_set,
39
    relations_of_type,
40
)
41
42
from charmhelpers.core.host import service
43
44
# This module adds compatibility with the nrpe-external-master and plain nrpe
45
# subordinate charms. To use it in your charm:
46
#
47
# 1. Update metadata.yaml
48
#
49
#   provides:
50
#     (...)
51
#     nrpe-external-master:
52
#       interface: nrpe-external-master
53
#       scope: container
54
#
55
#   and/or
56
#
57
#   provides:
58
#     (...)
59
#     local-monitors:
60
#       interface: local-monitors
61
#       scope: container
62
63
#
64
# 2. Add the following to config.yaml
65
#
66
#    nagios_context:
67
#      default: "juju"
68
#      type: string
69
#      description: |
70
#        Used by the nrpe subordinate charms.
71
#        A string that will be prepended to instance name to set the host name
72
#        in nagios. So for instance the hostname would be something like:
73
#            juju-myservice-0
74
#        If you're running multiple environments with the same services in them
75
#        this allows you to differentiate between them.
76
#    nagios_servicegroups:
77
#      default: ""
78
#      type: string
79
#      description: |
80
#        A comma-separated list of nagios servicegroups.
81
#        If left empty, the nagios_context will be used as the servicegroup
82
#
83
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
84
#
85
# 4. Update your hooks.py with something like this:
86
#
87
#    from charmsupport.nrpe import NRPE
88
#    (...)
89
#    def update_nrpe_config():
90
#        nrpe_compat = NRPE()
91
#        nrpe_compat.add_check(
92
#            shortname = "myservice",
93
#            description = "Check MyService",
94
#            check_cmd = "check_http -w 2 -c 10 http://localhost"
95
#            )
96
#        nrpe_compat.add_check(
97
#            "myservice_other",
98
#            "Check for widget failures",
99
#            check_cmd = "/srv/myapp/scripts/widget_check"
100
#            )
101
#        nrpe_compat.write()
102
#
103
#    def config_changed():
104
#        (...)
105
#        update_nrpe_config()
106
#
107
#    def nrpe_external_master_relation_changed():
108
#        update_nrpe_config()
109
#
110
#    def local_monitors_relation_changed():
111
#        update_nrpe_config()
112
#
113
# 5. ln -s hooks.py nrpe-external-master-relation-changed
114
#    ln -s hooks.py local-monitors-relation-changed
115
116
117
class CheckException(Exception):
118
    pass
119
120
121
class Check(object):
122
    shortname_re = '[A-Za-z0-9-_]+$'
123
    service_template = ("""
124
#---------------------------------------------------
125
# This file is Juju managed
126
#---------------------------------------------------
127
define service {{
128
    use                             active-service
129
    host_name                       {nagios_hostname}
130
    service_description             {nagios_hostname}[{shortname}] """
131
                        """{description}
132
    check_command                   check_nrpe!{command}
133
    servicegroups                   {nagios_servicegroup}
134
}}
135
""")
136
137
    def __init__(self, shortname, description, check_cmd):
138
        super(Check, self).__init__()
139
        # XXX: could be better to calculate this from the service name
140
        if not re.match(self.shortname_re, shortname):
141
            raise CheckException("shortname must match {}".format(
142
                Check.shortname_re))
143
        self.shortname = shortname
144
        self.command = "check_{}".format(shortname)
145
        # Note: a set of invalid characters is defined by the
146
        # Nagios server config
147
        # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
148
        self.description = description
149
        self.check_cmd = self._locate_cmd(check_cmd)
150
151
    def _get_check_filename(self):
152
        return os.path.join(NRPE.nrpe_confdir, '{}.cfg'.format(self.command))
153
154
    def _get_service_filename(self, hostname):
155
        return os.path.join(NRPE.nagios_exportdir,
156
                            'service__{}_{}.cfg'.format(hostname, self.command))
157
158
    def _locate_cmd(self, check_cmd):
159
        search_path = (
160
            '/usr/lib/nagios/plugins',
161
            '/usr/local/lib/nagios/plugins',
162
        )
163
        parts = shlex.split(check_cmd)
164
        for path in search_path:
165
            if os.path.exists(os.path.join(path, parts[0])):
166
                command = os.path.join(path, parts[0])
167
                if len(parts) > 1:
168
                    command += " " + " ".join(parts[1:])
169
                return command
170
        log('Check command not found: {}'.format(parts[0]))
171
        return ''
172
173
    def _remove_service_files(self):
174
        if not os.path.exists(NRPE.nagios_exportdir):
175
            return
176
        for f in os.listdir(NRPE.nagios_exportdir):
177
            if f.endswith('_{}.cfg'.format(self.command)):
178
                os.remove(os.path.join(NRPE.nagios_exportdir, f))
179
180
    def remove(self, hostname):
181
        nrpe_check_file = self._get_check_filename()
182
        if os.path.exists(nrpe_check_file):
183
            os.remove(nrpe_check_file)
184
        self._remove_service_files()
185
186
    def write(self, nagios_context, hostname, nagios_servicegroups):
187
        nrpe_check_file = self._get_check_filename()
188
        with open(nrpe_check_file, 'w') as nrpe_check_config:
189
            nrpe_check_config.write("# check {}\n".format(self.shortname))
190
            nrpe_check_config.write("command[{}]={}\n".format(
191
                self.command, self.check_cmd))
192
193
        if not os.path.exists(NRPE.nagios_exportdir):
194
            log('Not writing service config as {} is not accessible'.format(
195
                NRPE.nagios_exportdir))
196
        else:
197
            self.write_service_config(nagios_context, hostname,
198
                                      nagios_servicegroups)
199
200
    def write_service_config(self, nagios_context, hostname,
201
                             nagios_servicegroups):
202
        self._remove_service_files()
203
204
        templ_vars = {
205
            'nagios_hostname': hostname,
206
            'nagios_servicegroup': nagios_servicegroups,
207
            'description': self.description,
208
            'shortname': self.shortname,
209
            'command': self.command,
210
        }
211
        nrpe_service_text = Check.service_template.format(**templ_vars)
212
        nrpe_service_file = self._get_service_filename(hostname)
213
        with open(nrpe_service_file, 'w') as nrpe_service_config:
214
            nrpe_service_config.write(str(nrpe_service_text))
215
216
    def run(self):
217
        subprocess.call(self.check_cmd)
218
219
220
class NRPE(object):
221
    nagios_logdir = '/var/log/nagios'
222
    nagios_exportdir = '/var/lib/nagios/export'
223
    nrpe_confdir = '/etc/nagios/nrpe.d'
224
225
    def __init__(self, hostname=None):
226
        super(NRPE, self).__init__()
227
        self.config = config()
228
        self.nagios_context = self.config['nagios_context']
229
        if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
230
            self.nagios_servicegroups = self.config['nagios_servicegroups']
231
        else:
232
            self.nagios_servicegroups = self.nagios_context
233
        self.unit_name = local_unit().replace('/', '-')
234
        if hostname:
235
            self.hostname = hostname
236
        else:
237
            nagios_hostname = get_nagios_hostname()
238
            if nagios_hostname:
239
                self.hostname = nagios_hostname
240
            else:
241
                self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
242
        self.checks = []
243
244
    def add_check(self, *args, **kwargs):
245
        self.checks.append(Check(*args, **kwargs))
246
247
    def remove_check(self, *args, **kwargs):
248
        if kwargs.get('shortname') is None:
249
            raise ValueError('shortname of check must be specified')
250
251
        # Use sensible defaults if they're not specified - these are not
252
        # actually used during removal, but they're required for constructing
253
        # the Check object; check_disk is chosen because it's part of the
254
        # nagios-plugins-basic package.
255
        if kwargs.get('check_cmd') is None:
256
            kwargs['check_cmd'] = 'check_disk'
257
        if kwargs.get('description') is None:
258
            kwargs['description'] = ''
259
260
        check = Check(*args, **kwargs)
261
        check.remove(self.hostname)
262
263
    def write(self):
264
        try:
265
            nagios_uid = pwd.getpwnam('nagios').pw_uid
266
            nagios_gid = grp.getgrnam('nagios').gr_gid
267
        except:
268
            log("Nagios user not set up, nrpe checks not updated")
269
            return
270
271
        if not os.path.exists(NRPE.nagios_logdir):
272
            os.mkdir(NRPE.nagios_logdir)
273
            os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
274
275
        nrpe_monitors = {}
276
        monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
277
        for nrpecheck in self.checks:
278
            nrpecheck.write(self.nagios_context, self.hostname,
279
                            self.nagios_servicegroups)
280
            nrpe_monitors[nrpecheck.shortname] = {
281
                "command": nrpecheck.command,
282
            }
283
284
        service('restart', 'nagios-nrpe-server')
285
286
        monitor_ids = relation_ids("local-monitors") + \
287
            relation_ids("nrpe-external-master")
288
        for rid in monitor_ids:
289
            relation_set(relation_id=rid, monitors=yaml.dump(monitors))
290
291
292
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
293
    """
294
    Query relation with nrpe subordinate, return the nagios_host_context
295
296
    :param str relation_name: Name of relation nrpe sub joined to
297
    """
298
    for rel in relations_of_type(relation_name):
299
        if 'nagios_host_context' in rel:
300
            return rel['nagios_host_context']
301
302
303
def get_nagios_hostname(relation_name='nrpe-external-master'):
304
    """
305
    Query relation with nrpe subordinate, return the nagios_hostname
306
307
    :param str relation_name: Name of relation nrpe sub joined to
308
    """
309
    for rel in relations_of_type(relation_name):
310
        if 'nagios_hostname' in rel:
311
            return rel['nagios_hostname']
312
313
314
def get_nagios_unit_name(relation_name='nrpe-external-master'):
315
    """
316
    Return the nagios unit name prepended with host_context if needed
317
318
    :param str relation_name: Name of relation nrpe sub joined to
319
    """
320
    host_context = get_nagios_hostcontext(relation_name)
321
    if host_context:
322
        unit = "%s:%s" % (host_context, local_unit())
323
    else:
324
        unit = local_unit()
325
    return unit
326
327
328
def add_init_service_checks(nrpe, services, unit_name):
329
    """
330
    Add checks for each service in list
331
332
    :param NRPE nrpe: NRPE object to add check to
333
    :param list services: List of services to check
334
    :param str unit_name: Unit name to use in check description
335
    """
336
    for svc in services:
337
        upstart_init = '/etc/init/%s.conf' % svc
338
        sysv_init = '/etc/init.d/%s' % svc
339
        if os.path.exists(upstart_init):
340
            # Don't add a check for these services from neutron-gateway
341
            if svc not in ['ext-port', 'os-charm-phy-nic-mtu']:
342
                nrpe.add_check(
343
                    shortname=svc,
344
                    description='process check {%s}' % unit_name,
345
                    check_cmd='check_upstart_job %s' % svc
346
                )
347
        elif os.path.exists(sysv_init):
348
            cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
349
            cron_file = ('*/5 * * * * root '
350
                         '/usr/local/lib/nagios/plugins/check_exit_status.pl '
351
                         '-s /etc/init.d/%s status > '
352
                         '/var/lib/nagios/service-check-%s.txt\n' % (svc,
353
                                                                     svc)
354
                         )
355
            f = open(cronpath, 'w')
356
            f.write(cron_file)
357
            f.close()
358
            nrpe.add_check(
359
                shortname=svc,
360
                description='process check {%s}' % unit_name,
361
                check_cmd='check_status_file.py -f '
362
                          '/var/lib/nagios/service-check-%s.txt' % svc,
363
            )
364
365
366
def copy_nrpe_checks():
367
    """
368
    Copy the nrpe checks into place
369
370
    """
371
    NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
372
    nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
373
                                  'charmhelpers', 'contrib', 'openstack',
374
                                  'files')
375
376
    if not os.path.exists(NAGIOS_PLUGINS):
377
        os.makedirs(NAGIOS_PLUGINS)
378
    for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
379
        if os.path.isfile(fname):
380
            shutil.copy2(fname,
381
                         os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
382
383
384
def add_haproxy_checks(nrpe, unit_name):
385
    """
386
    Add checks for each service in list
387
388
    :param NRPE nrpe: NRPE object to add check to
389
    :param str unit_name: Unit name to use in check description
390
    """
391
    nrpe.add_check(
392
        shortname='haproxy_servers',
393
        description='Check HAProxy {%s}' % unit_name,
394
        check_cmd='check_haproxy.sh')
395
    nrpe.add_check(
396
        shortname='haproxy_queue',
397
        description='Check HAProxy queue depth {%s}' % unit_name,
398
        check_cmd='check_haproxy_queue_depth.sh')