3
Script to automate disk SMART testing
5
Copyright (C) 2010 Canonical Ltd.
8
Jeff Lane <jeffrey.lane@canonical.com>
9
Brendan Donegan <brendan.donegan@canonical.com>
11
This program is free software: you can redistribute it and/or modify
12
it under the terms of the GNU General Public License version 2,
13
as published by the Free Software Foundation.
15
This program is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License
21
along with this program. If not, see <http://www.gnu.org/licenses/>.
23
The purpose of this script is to simply interact with an onboard hard disk and
24
check for SMART capability and then do a little bit of interaction to make sure
25
we can at least do some limited interaction with the hard disk's SMART
28
In this case, we probe to see if SMART is available and enabled, then we run
29
the short self test. Return 0 if it's all good, return 1 if it fails.
31
NOTE: This may not work correctly on systems where the onboard storage is
32
controlled by a hardware RAID controller, on external RAID systems, SAN, and
33
USB/eSATA/eSAS attached storage devices.
37
v1.1: Put delay before first attempt to acces log, rather than after
38
v1.0: added debugger class and code to allow for verbose debug output if needed
40
v0.4: corrected some minor things
41
added option parsing to allow for many disks, or disks other than
44
V0.3: Removed the arbitrary wait time and implemented a polling method
45
to shorten the test time.
46
Added in Pass/Fail criteria for the final outcome.
47
Added in documentation.
49
V0.2: added minor debug routine
51
V0.1: Fixed some minor bugs and added the SmartEnabled() function
62
from subprocess import Popen, PIPE
63
from argparse import ArgumentParser
66
class ListHandler(logging.StreamHandler):
68
def emit(self, record):
69
if isinstance(record.msg, (list, tuple)):
70
for msg in record.msg:
71
if type(msg) is bytes:
73
logger = logging.getLogger(record.name)
74
new_record = logger.makeRecord(record.name, record.levelno,
75
record.pathname, record.lineno, msg, record.args,
76
record.exc_info, record.funcName)
77
logging.StreamHandler.emit(self, new_record)
80
logging.StreamHandler.emit(self, record)
83
def is_smart_enabled(disk):
84
# Check with smartctl to see if SMART is available and enabled on the disk
85
command = 'smartctl -i %s' % disk
86
diskinfo_bytes = (Popen(command, stdout=PIPE, shell=True)
88
diskinfo = diskinfo_bytes.decode().splitlines()
90
logging.debug('SMART Info for disk %s', disk)
91
logging.debug(diskinfo)
93
return (len(diskinfo) > 2
94
and 'Enabled' in diskinfo[-2]
95
and 'Available' in diskinfo[-3])
98
def run_smart_test(disk, type='short'):
99
ctl_command = 'smartctl -t %s %s' % (type, disk)
100
logging.debug('Beginning test with %s', ctl_command)
102
smart_proc = Popen(ctl_command, stderr=PIPE, stdout=PIPE,
103
universal_newlines=True, shell=True)
104
ctl_output, ctl_error = smart_proc.communicate()
106
logging.debug(ctl_error + ctl_output)
108
return smart_proc.returncode
111
def get_smart_entries(disk, type='selftest'):
113
command = 'smartctl -l %s %s' % (type, disk)
114
stdout = Popen(command, stdout=PIPE, shell=True).stdout
118
line = stdout.readline().decode()
120
raise Exception('Failed to parse SMART log entries')
122
if line.startswith('SMART'):
125
# Get lengths from header
126
line = stdout.readline().decode()
127
if not line.startswith('Num'):
129
columns = ['number', 'description', 'status',
130
'remaining', 'lifetime', 'lba']
131
lengths = [line.index(i) for i in line.split()]
132
lengths[columns.index('remaining')] += len('Remaining') - len('100%')
133
lengths.append(len(line))
135
# Get remaining lines
137
for line_bytes in stdout.readlines():
138
line = line_bytes.decode()
139
if line.startswith('#'):
141
for i, column in enumerate(columns):
142
entry[column] = line[lengths[i]:lengths[i + 1]].strip()
144
# Convert some columns to integers
145
entry['number'] = int(entry['number'][1:])
146
entry['lifetime'] = int(entry['lifetime'])
147
entries.append(entry)
152
description = 'Tests that SMART capabilities on disks that support SMART function.'
153
parser = ArgumentParser(description=description)
154
parser.add_argument('-b', '--block-dev',
157
help=('the DISK to run this test against '
158
'[default: %(default)s]'))
159
parser.add_argument('-d', '--debug',
162
help='prints some debug info')
163
parser.add_argument('-s', '--sleep',
166
help=('number of seconds to sleep between checks '
167
'[default: %(default)s].'))
168
parser.add_argument('-t', '--timeout',
170
help='number of seconds to timeout from sleeping.')
171
args = parser.parse_args()
174
format = '%(levelname)-8s %(message)s'
175
handler = ListHandler()
176
handler.setFormatter(logging.Formatter(format))
177
logger = logging.getLogger()
178
logger.addHandler(handler)
181
logger.setLevel(logging.DEBUG)
183
logger.setLevel(logging.INFO)
185
# Make sure we're root, because smartctl doesn't work otherwise.
186
if not os.geteuid()==0:
187
parser.error("You must be root to run this program")
189
# If SMART is available and enabled, we proceed. Otherwise, we exit as the
190
# test is pointless in this case.
191
disk = args.block_dev
192
if not is_smart_enabled(disk):
193
logging.warning('SMART not available on %s' % disk)
196
# Initiate a self test and start polling until the test is done
197
previous_entries = get_smart_entries(disk)
198
logging.info("Starting SMART self-test on %s" % disk)
199
if run_smart_test(disk) != 0:
200
logging.error("Error reported during smartctl test")
203
if len(previous_entries) > 20:
204
# Abort the previous instance
205
# so that polling can identify the difference
207
previous_entries = get_smart_entries(disk)
209
# Priming read... this is here in case our test is finished or fails
210
# immediate after it begins.
211
logging.debug('Polling selftest.log for status')
214
# Poll every sleep seconds until test is complete$
215
time.sleep(args.sleep)
217
current_entries = get_smart_entries(disk)
218
logging.debug('%s %s %s %s' % (current_entries[0]['number'],
219
current_entries[0]['description'],
220
current_entries[0]['status'],
221
current_entries[0]['remaining']))
222
if current_entries != previous_entries \
223
and current_entries[0]["status"] != 'Self-test routine in progress':
226
if args.timeout is not None:
227
if args.timeout <= 0:
228
logging.debug('Polling timed out')
231
args.timeout -= args.sleep
233
status = current_entries[0]['status']
235
if status != 'Completed without error':
236
log = get_smart_entries(disk)
237
logging.error("FAIL: SMART Self-Test appears to have failed for some reason. "
238
"Run 'sudo smartctl -l selftest %s' to see the SMART log" % disk)
239
logging.debug("Last self-test run status: %s" % status)
242
logging.info("PASS: SMART Self-Test completed without error")
246
if __name__ == '__main__':