1
"""Strptime-related classes and functions.
4
LocaleTime -- Discovers and stores locale-specific time information
5
TimeRE -- Creates regexes for pattern matching a string of text containing
9
_getlang -- Figure out what language is being used for the locale
10
strptime -- Calculates the time struct represented by the passed-in string
16
from re import compile as re_compile
17
from re import IGNORECASE
18
from re import escape as re_escape
19
from datetime import date as datetime_date
21
from thread import allocate_lock as _thread_allocate_lock
23
from dummy_thread import allocate_lock as _thread_allocate_lock
28
# Figure out what the current language is set to.
29
return locale.getlocale(locale.LC_TIME)
31
class LocaleTime(object):
32
"""Stores and handles locale-specific information related to time.
35
f_weekday -- full weekday names (7-item list)
36
a_weekday -- abbreviated weekday names (7-item list)
37
f_month -- full month names (13-item list; dummy value in [0], which
39
a_month -- abbreviated month names (13-item list, dummy value in
40
[0], which is added by code)
41
am_pm -- AM/PM representation (2-item list)
42
LC_date_time -- format string for date/time representation (string)
43
LC_date -- format string for date representation (string)
44
LC_time -- format string for time representation (string)
45
timezone -- daylight- and non-daylight-savings timezone representation
47
lang -- Language used by instance (2-item tuple)
51
"""Set all attributes.
53
Order of methods called matters for dependency reasons.
55
The locale language is set at the offset and then checked again before
56
exiting. This is to make sure that the attributes were not set with a
57
mix of information from more than one locale. This would most likely
58
happen when using threads where one thread calls a locale-dependent
59
function while another thread changes the locale while the function in
60
the other thread is still running. Proper coding would call for
61
locks to prevent changing the locale while locale-dependent code is
62
running. The check here is done in case someone does not think about
65
Only other possible issue is if someone changed the timezone and did
66
not call tz.tzset . That is an issue for the programmer, though,
67
since changing the timezone is worthless without that call.
70
self.lang = _getlang()
74
self.__calc_timezone()
75
self.__calc_date_time()
76
if _getlang() != self.lang:
77
raise ValueError("locale changed during initialization")
79
def __pad(self, seq, front):
80
# Add '' to seq to either the front (is True), else the back.
88
def __calc_weekday(self):
89
# Set self.a_weekday and self.f_weekday using the calendar
91
a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
92
f_weekday = [calendar.day_name[i].lower() for i in range(7)]
93
self.a_weekday = a_weekday
94
self.f_weekday = f_weekday
96
def __calc_month(self):
97
# Set self.f_month and self.a_month using the calendar module.
98
a_month = [calendar.month_abbr[i].lower() for i in range(13)]
99
f_month = [calendar.month_name[i].lower() for i in range(13)]
100
self.a_month = a_month
101
self.f_month = f_month
103
def __calc_am_pm(self):
104
# Set self.am_pm by using time.strftime().
106
# The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
107
# magical; just happened to have used it everywhere else where a
108
# static date was needed.
111
time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
112
am_pm.append(time.strftime("%p", time_tuple).lower())
115
def __calc_date_time(self):
116
# Set self.date_time, self.date, & self.time by using
119
# Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
120
# overloaded numbers is minimized. The order in which searches for
121
# values within the format string is very important; it eliminates
122
# possible ambiguity for what something represents.
123
time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
124
date_time = [None, None, None]
125
date_time[0] = time.strftime("%c", time_tuple).lower()
126
date_time[1] = time.strftime("%x", time_tuple).lower()
127
date_time[2] = time.strftime("%X", time_tuple).lower()
128
replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
129
(self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
130
(self.a_month[3], '%b'), (self.am_pm[1], '%p'),
131
('1999', '%Y'), ('99', '%y'), ('22', '%H'),
132
('44', '%M'), ('55', '%S'), ('76', '%j'),
133
('17', '%d'), ('03', '%m'), ('3', '%m'),
134
# '3' needed for when no leading zero.
135
('2', '%w'), ('10', '%I')]
136
replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
137
for tz in tz_values])
138
for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
139
current_format = date_time[offset]
140
for old, new in replacement_pairs:
141
# Must deal with possible lack of locale info
142
# manifesting itself as the empty string (e.g., Swedish's
143
# lack of AM/PM info) or a platform returning a tuple of empty
144
# strings (e.g., MacOS 9 having timezone as ('','')).
146
current_format = current_format.replace(old, new)
147
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
148
# 2005-01-03 occurs before the first Monday of the year. Otherwise
150
time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
151
if '00' in time.strftime(directive, time_tuple):
155
date_time[offset] = current_format.replace('11', U_W)
156
self.LC_date_time = date_time[0]
157
self.LC_date = date_time[1]
158
self.LC_time = date_time[2]
160
def __calc_timezone(self):
161
# Set self.timezone by using time.tzname.
162
# Do not worry about possibility of time.tzname[0] == timetzname[1]
163
# and time.daylight; handle that in strptime .
166
except AttributeError:
168
no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
170
has_saving = frozenset([time.tzname[1].lower()])
172
has_saving = frozenset()
173
self.timezone = (no_saving, has_saving)
177
"""Handle conversion from format directives to regexes."""
179
def __init__(self, locale_time=None):
180
"""Create keys/values.
182
Order of execution is important for dependency reasons.
186
self.locale_time = locale_time
188
self.locale_time = LocaleTime()
189
base = super(TimeRE, self)
191
# The " \d" part of the regex is to make %c from ANSI C work
192
'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
193
'f': r"(?P<f>[0-9]{1,6})",
194
'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
195
'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
196
'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
197
'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
198
'M': r"(?P<M>[0-5]\d|\d)",
199
'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
200
'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
201
'w': r"(?P<w>[0-6])",
202
# W is set below by using 'U'
204
#XXX: Does 'Y' need to worry about having less or more than
206
'Y': r"(?P<Y>\d\d\d\d)",
207
'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
208
'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
209
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
210
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
211
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
212
'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone
216
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
217
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
218
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
219
base.__setitem__('X', self.pattern(self.locale_time.LC_time))
221
def __seqToRE(self, to_convert, directive):
222
"""Convert a list to a regex string for matching a directive.
224
Want possible matching values to be from longest to shortest. This
225
prevents the possibility of a match occuring for a value that also
226
a substring of a larger value that should have matched (e.g., 'abc'
227
matching when 'abcdef' should have been the match).
230
to_convert = sorted(to_convert, key=len, reverse=True)
231
for value in to_convert:
236
regex = '|'.join(re_escape(stuff) for stuff in to_convert)
237
regex = '(?P<%s>%s' % (directive, regex)
240
def pattern(self, format):
241
"""Return regex pattern for the format string.
243
Need to make sure that any characters that might be interpreted as
244
regex syntax are escaped.
247
processed_format = ''
248
# The sub() call escapes all characters that might be misconstrued
249
# as regex syntax. Cannot use re.escape since we have to deal with
250
# format directives (%m, etc.).
251
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
252
format = regex_chars.sub(r"\\\1", format)
253
whitespace_replacement = re_compile('\s+')
254
format = whitespace_replacement.sub('\s+', format)
256
directive_index = format.index('%')+1
257
processed_format = "%s%s%s" % (processed_format,
258
format[:directive_index-1],
259
self[format[directive_index]])
260
format = format[directive_index+1:]
261
return "%s%s" % (processed_format, format)
263
def compile(self, format):
264
"""Return a compiled re object for the format string."""
265
return re_compile(self.pattern(format), IGNORECASE)
267
_cache_lock = _thread_allocate_lock()
268
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
270
_TimeRE_cache = TimeRE()
271
_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
274
def _calc_julian_from_U_or_W(year, week_of_year, day_of_week, week_starts_Mon):
275
"""Calculate the Julian day based on the year, week of the year, and day of
276
the week, with week_start_day representing whether the week of the year
277
assumes the week starts on Sunday or Monday (6 or 0)."""
278
first_weekday = datetime_date(year, 1, 1).weekday()
279
# If we are dealing with the %U directive (week starts on Sunday), it's
280
# easier to just shift the view to Sunday being the first day of the
282
if not week_starts_Mon:
283
first_weekday = (first_weekday + 1) % 7
284
day_of_week = (day_of_week + 1) % 7
285
# Need to watch out for a week 0 (when the first day of the year is not
286
# the same as that specified by %U or %W).
287
week_0_length = (7 - first_weekday) % 7
288
if week_of_year == 0:
289
return 1 + day_of_week - first_weekday
291
days_to_week = week_0_length + (7 * (week_of_year - 1))
292
return 1 + days_to_week + day_of_week
295
def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
296
"""Return a time struct based on the input string and the format string."""
297
global _TimeRE_cache, _regex_cache
299
if _getlang() != _TimeRE_cache.locale_time.lang:
300
_TimeRE_cache = TimeRE()
302
if len(_regex_cache) > _CACHE_MAX_SIZE:
304
locale_time = _TimeRE_cache.locale_time
305
format_regex = _regex_cache.get(format)
308
format_regex = _TimeRE_cache.compile(format)
309
# KeyError raised when a bad format is found; can be specified as
310
# \\, in which case it was a stray % but with a space after it
311
except KeyError, err:
312
bad_directive = err.args[0]
313
if bad_directive == "\\":
316
raise ValueError("'%s' is a bad directive in format '%s'" %
317
(bad_directive, format))
318
# IndexError only occurs when the format string is "%"
320
raise ValueError("stray %% in format '%s'" % format)
321
_regex_cache[format] = format_regex
322
found = format_regex.match(data_string)
324
raise ValueError("time data %r does not match format %r" %
325
(data_string, format))
326
if len(data_string) != found.end():
327
raise ValueError("unconverted data remains: %s" %
328
data_string[found.end():])
331
hour = minute = second = fraction = 0
333
# Default to -1 to signify that values not known; not critical to have,
336
week_of_year_start = -1
337
# weekday and julian defaulted to -1 so as to signal need to calculate
339
weekday = julian = -1
340
found_dict = found.groupdict()
341
for group_key in found_dict.iterkeys():
342
# Directives not explicitly handled below:
344
# handled by making out of other directives
346
# worthless without day of the week
348
year = int(found_dict['y'])
349
# Open Group specification for strptime() states that a %y
350
#value in the range of [00, 68] is in the century 2000, while
351
#[69,99] is in the century 1900
356
elif group_key == 'Y':
357
year = int(found_dict['Y'])
358
elif group_key == 'm':
359
month = int(found_dict['m'])
360
elif group_key == 'B':
361
month = locale_time.f_month.index(found_dict['B'].lower())
362
elif group_key == 'b':
363
month = locale_time.a_month.index(found_dict['b'].lower())
364
elif group_key == 'd':
365
day = int(found_dict['d'])
366
elif group_key == 'H':
367
hour = int(found_dict['H'])
368
elif group_key == 'I':
369
hour = int(found_dict['I'])
370
ampm = found_dict.get('p', '').lower()
371
# If there was no AM/PM indicator, we'll treat this like AM
372
if ampm in ('', locale_time.am_pm[0]):
373
# We're in AM so the hour is correct unless we're
374
# looking at 12 midnight.
375
# 12 midnight == 12 AM == hour 0
378
elif ampm == locale_time.am_pm[1]:
379
# We're in PM so we need to add 12 to the hour unless
380
# we're looking at 12 noon.
381
# 12 noon == 12 PM == hour 12
384
elif group_key == 'M':
385
minute = int(found_dict['M'])
386
elif group_key == 'S':
387
second = int(found_dict['S'])
388
elif group_key == 'f':
390
# Pad to always return microseconds.
391
s += "0" * (6 - len(s))
393
elif group_key == 'A':
394
weekday = locale_time.f_weekday.index(found_dict['A'].lower())
395
elif group_key == 'a':
396
weekday = locale_time.a_weekday.index(found_dict['a'].lower())
397
elif group_key == 'w':
398
weekday = int(found_dict['w'])
403
elif group_key == 'j':
404
julian = int(found_dict['j'])
405
elif group_key in ('U', 'W'):
406
week_of_year = int(found_dict[group_key])
408
# U starts week on Sunday.
409
week_of_year_start = 6
411
# W starts week on Monday.
412
week_of_year_start = 0
413
elif group_key == 'Z':
414
# Since -1 is default value only need to worry about setting tz if
415
# it can be something other than -1.
416
found_zone = found_dict['Z'].lower()
417
for value, tz_values in enumerate(locale_time.timezone):
418
if found_zone in tz_values:
419
# Deal with bad locale setup where timezone names are the
420
# same and yet time.daylight is true; too ambiguous to
421
# be able to tell what timezone has daylight savings
422
if (time.tzname[0] == time.tzname[1] and
423
time.daylight and found_zone not in ("utc", "gmt")):
428
# If we know the week of the year and what day of that week, we can figure
429
# out the Julian day of the year.
430
if julian == -1 and week_of_year != -1 and weekday != -1:
431
week_starts_Mon = True if week_of_year_start == 0 else False
432
julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
434
# Cannot pre-calculate datetime_date() since can change in Julian
435
# calculation and thus could have different value for the day of the week
438
# Need to add 1 to result since first day of the year is 1, not 0.
439
julian = datetime_date(year, month, day).toordinal() - \
440
datetime_date(year, 1, 1).toordinal() + 1
441
else: # Assume that if they bothered to include Julian day it will
443
datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
444
year = datetime_result.year
445
month = datetime_result.month
446
day = datetime_result.day
448
weekday = datetime_date(year, month, day).weekday()
449
return (time.struct_time((year, month, day,
450
hour, minute, second,
451
weekday, julian, tz)), fraction)
453
def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
454
return _strptime(data_string, format)[0]