~bloodearnest/charms/trusty/rabbitmq-server/add-nagios-service-groups

« back to all changes in this revision

Viewing changes to lib/charmhelpers/contrib/charmsupport/nrpe.py

  • Committer: David Ames
  • Date: 2013-11-15 19:15:16 UTC
  • mto: This revision was merged to the branch mainline in revision 45.
  • Revision ID: david.ames@canonical.com-20131115191516-rrvszp2cgdi3hrqt
Enable nrpe-external-master-relation. Use charmhelpers (embedded for now)

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
"""Compatibility with the nrpe-external-master charm"""
 
2
# Copyright 2012 Canonical Ltd.
 
3
#
 
4
# Authors:
 
5
#  Matthew Wedgwood <matthew.wedgwood@canonical.com>
 
6
 
 
7
import subprocess
 
8
import pwd
 
9
import grp
 
10
import os
 
11
import re
 
12
import shlex
 
13
import yaml
 
14
 
 
15
from charmhelpers.core.hookenv import (
 
16
    config,
 
17
    local_unit,
 
18
    log,
 
19
    relation_ids,
 
20
    relation_set,
 
21
)
 
22
 
 
23
from charmhelpers.core.host import service
 
24
 
 
25
# This module adds compatibility with the nrpe-external-master and plain nrpe
 
26
# subordinate charms. To use it in your charm:
 
27
#
 
28
# 1. Update metadata.yaml
 
29
#
 
30
#   provides:
 
31
#     (...)
 
32
#     nrpe-external-master:
 
33
#       interface: nrpe-external-master
 
34
#       scope: container
 
35
#
 
36
#   and/or
 
37
#
 
38
#   provides:
 
39
#     (...)
 
40
#     local-monitors:
 
41
#       interface: local-monitors
 
42
#       scope: container
 
43
 
 
44
#
 
45
# 2. Add the following to config.yaml
 
46
#
 
47
#    nagios_context:
 
48
#      default: "juju"
 
49
#      type: string
 
50
#      description: |
 
51
#        Used by the nrpe subordinate charms.
 
52
#        A string that will be prepended to instance name to set the host name
 
53
#        in nagios. So for instance the hostname would be something like:
 
54
#            juju-myservice-0
 
55
#        If you're running multiple environments with the same services in them
 
56
#        this allows you to differentiate between them.
 
57
#
 
58
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
 
59
#
 
60
# 4. Update your hooks.py with something like this:
 
61
#
 
62
#    from charmsupport.nrpe import NRPE
 
63
#    (...)
 
64
#    def update_nrpe_config():
 
65
#        nrpe_compat = NRPE()
 
66
#        nrpe_compat.add_check(
 
67
#            shortname = "myservice",
 
68
#            description = "Check MyService",
 
69
#            check_cmd = "check_http -w 2 -c 10 http://localhost"
 
70
#            )
 
71
#        nrpe_compat.add_check(
 
72
#            "myservice_other",
 
73
#            "Check for widget failures",
 
74
#            check_cmd = "/srv/myapp/scripts/widget_check"
 
75
#            )
 
76
#        nrpe_compat.write()
 
77
#
 
78
#    def config_changed():
 
79
#        (...)
 
80
#        update_nrpe_config()
 
81
#
 
82
#    def nrpe_external_master_relation_changed():
 
83
#        update_nrpe_config()
 
84
#
 
85
#    def local_monitors_relation_changed():
 
86
#        update_nrpe_config()
 
87
#
 
88
# 5. ln -s hooks.py nrpe-external-master-relation-changed
 
89
#    ln -s hooks.py local-monitors-relation-changed
 
90
 
 
91
 
 
92
class CheckException(Exception):
 
93
    pass
 
94
 
 
95
 
 
96
class Check(object):
 
97
    shortname_re = '[A-Za-z0-9-_]+$'
 
98
    service_template = ("""
 
99
#---------------------------------------------------
 
100
# This file is Juju managed
 
101
#---------------------------------------------------
 
102
define service {{
 
103
    use                             active-service
 
104
    host_name                       {nagios_hostname}
 
105
    service_description             {nagios_hostname}[{shortname}] """
 
106
                        """{description}
 
107
    check_command                   check_nrpe!{command}
 
108
    servicegroups                   {nagios_servicegroup}
 
109
}}
 
110
""")
 
111
 
 
112
    def __init__(self, shortname, description, check_cmd):
 
113
        super(Check, self).__init__()
 
114
        # XXX: could be better to calculate this from the service name
 
115
        if not re.match(self.shortname_re, shortname):
 
116
            raise CheckException("shortname must match {}".format(
 
117
                Check.shortname_re))
 
118
        self.shortname = shortname
 
119
        self.command = "check_{}".format(shortname)
 
120
        # Note: a set of invalid characters is defined by the
 
121
        # Nagios server config
 
122
        # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
 
123
        self.description = description
 
124
        self.check_cmd = self._locate_cmd(check_cmd)
 
125
 
 
126
    def _locate_cmd(self, check_cmd):
 
127
        search_path = (
 
128
            '/',
 
129
            os.path.join(os.environ['CHARM_DIR'],
 
130
                         'files/nrpe-external-master'),
 
131
            '/usr/lib/nagios/plugins',
 
132
        )
 
133
        parts = shlex.split(check_cmd)
 
134
        for path in search_path:
 
135
            if os.path.exists(os.path.join(path, parts[0])):
 
136
                command = os.path.join(path, parts[0])
 
137
                if len(parts) > 1:
 
138
                    command += " " + " ".join(parts[1:])
 
139
                return command
 
140
        log('Check command not found: {}'.format(parts[0]))
 
141
        return ''
 
142
 
 
143
    def write(self, nagios_context, hostname):
 
144
        nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
 
145
            self.command)
 
146
        with open(nrpe_check_file, 'w') as nrpe_check_config:
 
147
            nrpe_check_config.write("# check {}\n".format(self.shortname))
 
148
            nrpe_check_config.write("command[{}]={}\n".format(
 
149
                self.command, self.check_cmd))
 
150
 
 
151
        if not os.path.exists(NRPE.nagios_exportdir):
 
152
            log('Not writing service config as {} is not accessible'.format(
 
153
                NRPE.nagios_exportdir))
 
154
        else:
 
155
            self.write_service_config(nagios_context, hostname)
 
156
 
 
157
    def write_service_config(self, nagios_context, hostname):
 
158
        for f in os.listdir(NRPE.nagios_exportdir):
 
159
            if re.search('.*{}.cfg'.format(self.command), f):
 
160
                os.remove(os.path.join(NRPE.nagios_exportdir, f))
 
161
 
 
162
        templ_vars = {
 
163
            'nagios_hostname': hostname,
 
164
            'nagios_servicegroup': nagios_context,
 
165
            'description': self.description,
 
166
            'shortname': self.shortname,
 
167
            'command': self.command,
 
168
        }
 
169
        nrpe_service_text = Check.service_template.format(**templ_vars)
 
170
        nrpe_service_file = '{}/service__{}_{}.cfg'.format(
 
171
            NRPE.nagios_exportdir, hostname, self.command)
 
172
        with open(nrpe_service_file, 'w') as nrpe_service_config:
 
173
            nrpe_service_config.write(str(nrpe_service_text))
 
174
 
 
175
    def run(self):
 
176
        subprocess.call(self.check_cmd)
 
177
 
 
178
 
 
179
class NRPE(object):
 
180
    nagios_logdir = '/var/log/nagios'
 
181
    nagios_exportdir = '/var/lib/nagios/export'
 
182
    nrpe_confdir = '/etc/nagios/nrpe.d'
 
183
 
 
184
    def __init__(self):
 
185
        super(NRPE, self).__init__()
 
186
        self.config = config()
 
187
        self.nagios_context = self.config['nagios_context']
 
188
        self.unit_name = local_unit().replace('/', '-')
 
189
        self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
 
190
        self.checks = []
 
191
 
 
192
    def add_check(self, *args, **kwargs):
 
193
        self.checks.append(Check(*args, **kwargs))
 
194
 
 
195
    def write(self):
 
196
        try:
 
197
            nagios_uid = pwd.getpwnam('nagios').pw_uid
 
198
            nagios_gid = grp.getgrnam('nagios').gr_gid
 
199
        except:
 
200
            log("Nagios user not set up, nrpe checks not updated")
 
201
            return
 
202
 
 
203
        if not os.path.exists(NRPE.nagios_logdir):
 
204
            os.mkdir(NRPE.nagios_logdir)
 
205
            os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
 
206
 
 
207
        nrpe_monitors = {}
 
208
        monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
 
209
        for nrpecheck in self.checks:
 
210
            nrpecheck.write(self.nagios_context, self.hostname)
 
211
            nrpe_monitors[nrpecheck.shortname] = {
 
212
                "command": nrpecheck.command,
 
213
            }
 
214
 
 
215
        service('restart', 'nagios-nrpe-server')
 
216
 
 
217
        for rid in relation_ids("local-monitors"):
 
218
            relation_set(relation_id=rid, monitors=yaml.dump(monitors))