4
# Copyright: (c) 2015 Gear Consulting Pty Ltd <http://libertysys.com.au/>
5
# License: GPLv3 <http://www.gnu.org/licenses/gpl.html>
6
# Description: NTP metrics as a Nagios check.
8
# This program is free software: you can redistribute it and/or modify it under
9
# the terms of the GNU General Public License as published by the Free Software
10
# Foundation, either version 3 of the License, or (at your option) any later
13
# This program is distributed in the hope that it will be useful, but WITHOUT
14
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18
# You should have received a copy of the GNU General Public License along with
19
# this program. If not, see <http://www.gnu.org/licenses/>.
30
def ishostnamey(name):
31
"""Return true if the passed name is roughly hostnamey. NTP is rather casual about how it
32
reports hostnames and IP addresses, so we can't be too strict. This function simply tests
33
that all of the characters in the string are letters, digits, dash, or period."""
34
return re.search(r'^[\w.-]*$', name) is not None and name.find('_') == -1
37
def isipaddressy(name):
38
"""Return true if the passed name is roughly IP addressy. NTP is rather casual about how it
39
reports hostnames and IP addresses, so we can't be too strict. This function simply tests
40
that all of the characters in the string are hexadecimal digits, period, or colon."""
41
return re.search(r'^[0-9a-f.:]*$', name) is not None
44
class CheckNTPMon(object):
54
self.warnpeers = warnpeers
55
self.okpeers = okpeers
56
self.warnoffset = warnoffset
57
self.critoffset = critoffset
58
self.warnreach = warnreach
59
self.critreach = critreach
62
"""Return 0 if the number of peers is OK
63
Return 1 if the number of peers is WARNING
64
Return 2 if the number of peers is CRITICAL"""
66
print "OK: %d usable peers" % n
68
elif n < self.warnpeers:
69
print "CRITICAL: Too few peers (%d) - must be at least %d" % (n, self.warnpeers)
72
print "WARNING: Too few peers (%d) - should be at least %d" % (n, self.okpeers)
75
def offset(self, offset):
76
"""Return 0 if the offset is OK
77
Return 1 if the offset is WARNING
78
Return 2 if the offset is CRITICAL"""
79
if abs(offset) > self.critoffset:
80
print "CRITICAL: Offset too high (%g) - must be less than %g" % \
81
(offset, self.critoffset)
83
if abs(offset) > self.warnoffset:
84
print "WARNING: Offset too high (%g) - should be less than %g" % \
85
(offset, self.warnoffset)
88
print "OK: Offset normal (%g)" % (offset)
91
def reachability(self, percent):
92
"""Return 0 if the reachability percentage is OK
93
Return 1 if the reachability percentage is warning
94
Return 2 if the reachability percentage is critical
95
Raise a ValueError if reachability is not a percentage"""
96
if percent < 0 or percent > 100:
97
raise ValueError('Value must be a percentage')
98
if percent <= self.critreach:
99
print "CRITICAL: Reachability too low (%g%%) - must be more than %g%%" % \
100
(percent, self.critreach)
102
elif percent <= self.warnreach:
103
print "WARNING: Reachability too low (%g%%) - should be more than %g%%" % \
104
(percent, self.warnreach)
107
print "OK: Reachability normal (%g%%)" % (percent)
110
def sync(self, synchost):
111
"""Return 0 if the synchost is non-zero in length and is a roughly valid host identifier, return 2 otherwise."""
112
synced = len(synchost) > 0 and (ishostnamey(synchost) or isipaddressy(synchost))
114
print "OK: time is in sync with %s" % (synchost)
116
print "CRITICAL: no sync host selected"
117
return 0 if synced else 2
120
class NTPPeers(object):
121
"""Turn the peer lines returned by 'ntpq -pn' into a data structure usable for checks."""
124
r'remote\s+refid\s+st\s+t\s+when\s+poll\s+reach\s+',
126
r'No association ID.s returned',
128
ignorepeers = [".LOCL.", ".INIT.", ".XFAC."]
130
def isnoiseline(self, line):
131
for regex in self.noiselines:
132
if re.search(regex, line) is not None:
136
def shouldignore(self, fields, l):
137
if len(fields) != 10:
138
warnings.warn('Invalid ntpq peer line - there are %d fields: %s' % (len(fields), l))
140
if fields[1] in self.ignorepeers:
142
if int(fields[2]) > 15:
146
def parsetally(self, tally, peerdata, offset):
147
"""Parse the tally code and add the appropriate items to the peer data based on that code.
148
See the explanation of tally codes in the ntpq documentation for how these work:
149
- http://www.eecis.udel.edu/~mills/ntp/html/decode.html#peer
150
- http://www.eecis.udel.edu/~mills/ntp/html/ntpq.html
151
- http://psp2.ntp.org/bin/view/Support/TroubleshootingNTP
153
if tally in ['*', 'o'] and 'syncpeer' not in self.ntpdata:
154
# this is our sync peer
155
self.ntpdata['syncpeer'] = peerdata['peer']
156
self.ntpdata['offsetsyncpeer'] = offset
157
self.ntpdata['survivors'] += 1
158
self.ntpdata['offsetsurvivors'] += offset
159
elif tally in ['+', '#']:
161
self.ntpdata['survivors'] += 1
162
self.ntpdata['offsetsurvivors'] += offset
163
elif tally in [' ', 'x', '.', '-']:
165
self.ntpdata['discards'] += 1
166
self.ntpdata['offsetdiscards'] += offset
168
self.ntpdata['unknown'] += 1
172
def __init__(self, peerlines, check=None):
175
'offsetsurvivors': 0,
186
if self.isnoiseline(l):
189
# first column is the tally field, the rest are whitespace-separated fields
191
fields = l[1:-1].split()
193
if self.shouldignore(fields, l):
196
fieldnames = ['peer', 'refid', 'stratum', 'type', 'lastpoll', 'interval', 'reach',
197
'delay', 'offset', 'jitter']
198
peerdata = dict(zip(fieldnames, fields))
200
offset = abs(float(peerdata['offset']))
201
if not self.parsetally(tally, peerdata, offset):
202
warnings.warn('Unknown tally code detected - please report a bug: %s' % (l))
205
self.ntpdata['peers'] += 1
206
self.ntpdata['offsetall'] += offset
208
# reachability - this counts the number of bits set in the reachability field
209
# (which is displayed in octal in the ntpq output)
210
# http://stackoverflow.com/questions/9829578/fast-way-of-counting-bits-in-python
211
self.ntpdata['totalreach'] += bin(int(peerdata['reach'], 8)).count("1")
213
# reachability as a percentage of the last 8 polls, across all peers
214
self.ntpdata['reachability'] = float(self.ntpdata['totalreach']) * 100 / self.ntpdata['peers'] / 8
217
if self.ntpdata['survivors'] > 0:
218
self.ntpdata['averageoffsetsurvivors'] = \
219
self.ntpdata['offsetsurvivors'] / self.ntpdata['survivors']
220
if self.ntpdata['discards'] > 0:
221
self.ntpdata['averageoffsetdiscards'] = \
222
self.ntpdata['offsetdiscards'] / self.ntpdata['discards']
223
self.ntpdata['averageoffset'] = self.ntpdata['offsetall'] / self.ntpdata['peers']
226
if self.ntpdata.get('syncpeer'):
227
print "Synced to: %s, offset %g ms" % \
228
(self.ntpdata['syncpeer'], self.ntpdata['offsetsyncpeer'])
230
print "No remote sync peer"
231
print "%d total peers, average offset %g ms" % \
232
(self.ntpdata['peers'], self.ntpdata['averageoffset'])
233
if self.ntpdata['survivors'] > 0:
234
print "%d good peers, average offset %g ms" % \
235
(self.ntpdata['survivors'], self.ntpdata['averageoffsetsurvivors'])
236
if self.ntpdata['discards'] > 0:
237
print "%d discarded peers, average offset %g ms" % \
238
(self.ntpdata['discards'], self.ntpdata['averageoffsetdiscards'])
239
print "Average reachability of all peers: %d%%" % (self.ntpdata['reachability'])
241
def check_peers(self, check=None):
242
"""Check the number of usable peers"""
244
check = self.check if self.check else CheckNTPMon()
245
return check.peers(self.ntpdata['peers'])
247
def check_offset(self, check=None):
248
"""Check the offset from the sync peer, returning critical, warning,
249
or OK based on the CheckNTPMon results.
250
If there is no sync peer, use the average offset of survivors instead.
251
If there are no survivors, use the average offset of discards instead, and return warning as a minimum.
252
If there are no discards, return critical.
255
check = self.check if self.check else CheckNTPMon()
256
if 'offsetsyncpeer' in self.ntpdata:
257
return check.offset(self.ntpdata['offsetsyncpeer'])
258
if 'averageoffsetsurvivors' in self.ntpdata:
259
return check.offset(self.ntpdata['averageoffsetsurvivors'])
260
if 'averageoffsetdiscards' in self.ntpdata:
261
result = check.offset(self.ntpdata['averageoffsetdiscards'])
262
return 1 if result < 1 else result
264
print "CRITICAL: No peers for which to check offset"
267
def check_reachability(self, check=None):
268
"""Check reachability of all peers"""
270
check = self.check if self.check else CheckNTPMon()
271
return check.reachability(self.ntpdata['reachability'])
273
def check_sync(self, check=None):
274
"""Check whether host is in sync with a peer"""
276
check = self.check if self.check else CheckNTPMon()
277
if self.ntpdata.get('syncpeer') is None:
278
print "CRITICAL: No sync peer"
280
return check.sync(self.ntpdata['syncpeer'])
282
def checks(self, methods=None, check=None):
285
methods = [self.check_offset, self.check_peers, self.check_reachability, self.check_sync]
286
for method in methods:
296
output = subprocess.check_output(["ntpq", "-pn"])
297
lines = output.split("\n")
299
traceback.print_exc(file=sys.stdout)
304
methodnames = ['offset', 'peers', 'reachability', 'sync']
306
'warnpeers': [ 2, int, 'Minimum number of peers to be considered non-critical'],
307
'okpeers': [ 4, int, 'Minimum number of peers to be considered OK'],
308
'warnoffset': [ 10, float, 'Minimum offset to be considered warning'],
309
'critoffset': [ 50, float, 'Minimum offset to be considered critical'],
310
'warnreach': [ 75, float, 'Minimum peer reachability percentage to be considered OK'],
311
'critreach': [ 50, float, 'Minimum peer reachability percentage to be considered non-crtical'],
314
# Create check ranges; will be used by parse_args to store options
315
checkntpmon = CheckNTPMon()
318
parser = argparse.ArgumentParser(description='Nagios NTP check incorporating the logic of NTPmon')
319
parser.add_argument('--check', choices=methodnames,
320
help='Select check to run; if omitted, run all checks and return the worst result.')
321
parser.add_argument('--debug', action='store_true',
322
help='Include "ntpq -pn" output and internal state dump along with check results.')
323
for o in options.keys():
324
helptext = options[o][2] + ' (default: %d)' % (options[o][0])
325
parser.add_argument('--' + o, default=options[o][0], help=helptext, type=options[o][1])
326
args = parser.parse_args(namespace=checkntpmon)
329
lines = NTPPeers.query()
332
print "Cannot get peers from ntpq."
333
print "Please check that an NTP server is installed and functional."
336
# initialise our object with the results of ntpq and our preferred check thresholds
337
ntp = NTPPeers(lines, checkntpmon)
340
print "\n".join(lines)
343
# work out which method to run
344
# (methods must be in the same order as methodnames above)
345
methods = [ntp.check_offset, ntp.check_peers, ntp.check_reachability, ntp.check_sync]
346
checkmethods = dict(zip(methodnames, methods))
348
# if check argument is specified, run just that check
350
if checkmethods.get(args.check):
351
method = checkmethods[args.check]
353
# else check all the methods
355
ret = ntp.checks(methods)
359
if __name__ == "__main__":