~ubuntu-branches/ubuntu/dapper/python-docutils/dapper

« back to all changes in this revision

Viewing changes to docutils/io.py

  • Committer: Bazaar Package Importer
  • Author(s): Matthias Klose
  • Date: 2005-12-13 13:47:41 UTC
  • mfrom: (1.1.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20051213134741-fhuh5tq3n346q1oj
Tags: 0.3.9-0.1ubuntu1
Synchronize with Debian unstable.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
# Author: David Goodger
2
2
# Contact: goodger@users.sourceforge.net
3
 
# Revision: $Revision: 1.36 $
4
 
# Date: $Date: 2004/09/19 00:34:03 $
 
3
# Revision: $Revision: 3138 $
 
4
# Date: $Date: 2005-03-27 17:05:34 +0200 (Sun, 27 Mar 2005) $
5
5
# Copyright: This module has been placed in the public domain.
6
6
 
7
7
"""
70
70
        if (self.encoding and self.encoding.lower() == 'unicode'
71
71
            or isinstance(data, UnicodeType)):
72
72
            return data
73
 
        encodings = [self.encoding, 'utf-8']
74
 
        try:
75
 
            encodings.append(locale.nl_langinfo(locale.CODESET))
76
 
        except:
77
 
            pass
78
 
        try:
79
 
            encodings.append(locale.getlocale()[1])
80
 
        except:
81
 
            pass
82
 
        try:
83
 
            encodings.append(locale.getdefaultlocale()[1])
84
 
        except:
85
 
            pass
86
 
        encodings.append('latin-1')
 
73
        encodings = [self.encoding]
 
74
        if not self.encoding:
 
75
            # Apply heuristics only if no encoding is explicitly given.
 
76
            encodings.append('utf-8')
 
77
            try:
 
78
                encodings.append(locale.nl_langinfo(locale.CODESET))
 
79
            except:
 
80
                pass
 
81
            try:
 
82
                encodings.append(locale.getlocale()[1])
 
83
            except:
 
84
                pass
 
85
            try:
 
86
                encodings.append(locale.getdefaultlocale()[1])
 
87
            except:
 
88
                pass
 
89
            encodings.append('latin-1')
 
90
        error = None
 
91
        error_details = ''
87
92
        for enc in encodings:
88
93
            if not enc:
89
94
                continue
90
95
            try:
91
96
                decoded = unicode(data, enc, self.error_handler)
92
97
                self.successful_encoding = enc
93
 
                return decoded
94
 
            except (UnicodeError, LookupError):
 
98
                # Return decoded, removing BOMs.
 
99
                return decoded.replace(u'\ufeff', u'')
 
100
            except (UnicodeError, LookupError), error:
95
101
                pass
 
102
        if error is not None:
 
103
            error_details = '\n(%s: %s)' % (error.__class__.__name__, error)
96
104
        raise UnicodeError(
97
 
            'Unable to decode input data.  Tried the following encodings: %s.'
98
 
            % ', '.join([repr(enc) for enc in encodings if enc]))
 
105
            'Unable to decode input data.  Tried the following encodings: '
 
106
            '%s.%s'
 
107
            % (', '.join([repr(enc) for enc in encodings if enc]),
 
108
               error_details))
99
109
 
100
110
 
101
111
class Output(TransformSpec):