1
1
# Author: David Goodger
2
2
# Contact: goodger@users.sourceforge.net
3
# Revision: $Revision: 1.36 $
4
# Date: $Date: 2004/09/19 00:34:03 $
3
# Revision: $Revision: 3138 $
4
# Date: $Date: 2005-03-27 17:05:34 +0200 (Sun, 27 Mar 2005) $
5
5
# Copyright: This module has been placed in the public domain.
70
70
if (self.encoding and self.encoding.lower() == 'unicode'
71
71
or isinstance(data, UnicodeType)):
73
encodings = [self.encoding, 'utf-8']
75
encodings.append(locale.nl_langinfo(locale.CODESET))
79
encodings.append(locale.getlocale()[1])
83
encodings.append(locale.getdefaultlocale()[1])
86
encodings.append('latin-1')
73
encodings = [self.encoding]
75
# Apply heuristics only if no encoding is explicitly given.
76
encodings.append('utf-8')
78
encodings.append(locale.nl_langinfo(locale.CODESET))
82
encodings.append(locale.getlocale()[1])
86
encodings.append(locale.getdefaultlocale()[1])
89
encodings.append('latin-1')
87
92
for enc in encodings:
91
96
decoded = unicode(data, enc, self.error_handler)
92
97
self.successful_encoding = enc
94
except (UnicodeError, LookupError):
98
# Return decoded, removing BOMs.
99
return decoded.replace(u'\ufeff', u'')
100
except (UnicodeError, LookupError), error:
102
if error is not None:
103
error_details = '\n(%s: %s)' % (error.__class__.__name__, error)
96
104
raise UnicodeError(
97
'Unable to decode input data. Tried the following encodings: %s.'
98
% ', '.join([repr(enc) for enc in encodings if enc]))
105
'Unable to decode input data. Tried the following encodings: '
107
% (', '.join([repr(enc) for enc in encodings if enc]),
101
111
class Output(TransformSpec):