~ubuntu-branches/ubuntu/natty/python3.1/natty-security

Viewing changes to Lib/test/test_unicode.py

Committer: Bazaar Package Importer
Author(s): Matthias Klose
Date: 2010-07-06 16:52:42 UTC
mfrom: (1.2.1 upstream) (2.1.11 sid)
Revision ID: james.westby@ubuntu.com-20100706165242-2xv4i019r3et6c0j

Tags: 3.1.2+20100706-1ubuntu1

* Merge with Debian; remaining changes:
- Regenerate the control file.
- Add debian/patches/overwrite-semaphore-check for Lucid buildds.

files added:
Lib/distutils/tests/test_log.py

Lib/lib2to3/fixes/fix_exitfunc.py

Lib/test/sha256.pem

Lib/test/test_sunau.py

Misc/NEWS~

Modules/_scproxy.c

debian/PVER-dbg.prerm.in

debian/patches/bdist-wininst-notfound.diff

debian/patches/bsddb-version.diff

debian/patches/cthreads.diff

debian/patches/deb-locations.diff

debian/patches/deb-setup.diff

debian/patches/debug-build.diff

debian/patches/disable-utimes.diff

debian/patches/distutils-install-layout.diff

debian/patches/distutils-link.diff

debian/patches/distutils-sysconfig.diff

debian/patches/doc-build.diff

debian/patches/doc-nodownload.diff

debian/patches/enable-fpectl.diff

debian/patches/hurd-broken-poll.diff

debian/patches/hurd-disable-nonworking-constants.diff

debian/patches/hurd-path_max.diff

debian/patches/issue8032.diff

debian/patches/issue8140.diff

debian/patches/issue8233.diff

debian/patches/langpack-gettext.diff

debian/patches/linecache.diff

debian/patches/link-opt.diff

debian/patches/locale-module.diff

debian/patches/makesetup-bashism.diff

debian/patches/no-large-file-support.diff

debian/patches/no-zip-on-sys.path.diff

debian/patches/overwrite-semaphore-check.diff

debian/patches/plat-linux2_alpha.diff

debian/patches/plat-linux2_hppa.diff

debian/patches/plat-linux2_mips.diff

debian/patches/plat-linux2_sparc.diff

debian/patches/platform-lsbrelease.diff

debian/patches/profile-doc.diff

debian/patches/profiled-build.diff

debian/patches/series.in

debian/patches/setup-modules.diff

debian/patches/site-locations.diff

debian/patches/statvfs-f_flag-constants.diff

debian/patches/svn-updates.diff

debian/patches/test-sundry.diff

debian/patches/tkinter-import.diff

debian/patches/webbrowser.diff

debian/source

debian/source/format

files removed:
debian/PVER-dbg.symbols.i386.in

debian/control.doc

debian/libPVER.symbols.i386.in

debian/libPVER.symbols.lpia.in

debian/patches/arm-float.dpatch

debian/patches/bdist-wininst-notfound.dpatch

debian/patches/bsddb-version.dpatch

debian/patches/cthreads.dpatch

debian/patches/deb-locations.dpatch

debian/patches/deb-setup.dpatch

debian/patches/debug-build.dpatch

debian/patches/disable-utimes.dpatch

debian/patches/distutils-install-layout.dpatch

debian/patches/distutils-link.dpatch

debian/patches/distutils-sysconfig.dpatch

debian/patches/doc-build.dpatch

debian/patches/doc-nodownload.dpatch

debian/patches/enable-fpectl.dpatch

debian/patches/hurd-broken-poll.dpatch

debian/patches/hurd-disable-nonworking-constants.dpatch

debian/patches/hurd-path_max.dpatch

debian/patches/issue8032.dpatch

debian/patches/issue8140.dpatch

debian/patches/issue8233.dpatch

debian/patches/langpack-gettext.dpatch

debian/patches/linecache.dpatch

debian/patches/link-opt.dpatch

debian/patches/locale-module.dpatch

debian/patches/makesetup-bashism.dpatch

debian/patches/no-large-file-support.dpatch

debian/patches/no-zip-on-sys.path.dpatch

debian/patches/overwrite-semaphore-check.dpatch

debian/patches/patchlevel.dpatch

debian/patches/platform-lsbrelease.dpatch

debian/patches/profile-doc.dpatch

debian/patches/profiled-build.dpatch

debian/patches/setup-modules.dpatch

debian/patches/site-locations.dpatch

debian/patches/svn-updates.dpatch

debian/patches/template.dpatch

debian/patches/test-sundry.dpatch

debian/patches/tkinter-import.dpatch

debian/patches/webbrowser.dpatch

files modified:
Demo/embed/demo.c

Demo/embed/loop.c

Demo/md5test/md5driver.py

Demo/pysvr/pysvr.c

Doc/about.rst

Doc/bugs.rst

Doc/c-api/arg.rst

Doc/c-api/buffer.rst

Doc/c-api/datetime.rst

Doc/c-api/file.rst

Doc/c-api/init.rst

Doc/c-api/list.rst

Doc/c-api/unicode.rst

Doc/distutils/builtdist.rst

Doc/distutils/sourcedist.rst

Doc/documenting/index.rst

Doc/documenting/style.rst

Doc/extending/newtypes.rst

Doc/faq/extending.rst

Doc/faq/programming.rst

Doc/glossary.rst

Doc/howto/doanddont.rst

Doc/howto/functional.rst

Doc/howto/webservers.rst

Doc/library/2to3.rst

Doc/library/asynchat.rst

Doc/library/binascii.rst

Doc/library/collections.rst

Doc/library/decimal.rst

Doc/library/dis.rst

Doc/library/doctest.rst

Doc/library/exceptions.rst

Doc/library/fcntl.rst

Doc/library/ftplib.rst

Doc/library/functions.rst

Doc/library/hashlib.rst

Doc/library/http.client.rst

Doc/library/http.cookiejar.rst

Doc/library/http.server.rst

Doc/library/io.rst

Doc/library/itertools.rst

Doc/library/json.rst

Doc/library/math.rst

Doc/library/multiprocessing.rst

Doc/library/numbers.rst

Doc/library/optparse.rst

Doc/library/os.path.rst

Doc/library/os.rst

Doc/library/re.rst

Doc/library/reprlib.rst

Doc/library/socket.rst

Doc/library/ssl.rst

Doc/library/stdtypes.rst

Doc/library/string.rst

Doc/library/struct.rst

Doc/library/subprocess.rst

Doc/library/symtable.rst

Doc/library/sys.rst

Doc/library/telnetlib.rst

Doc/library/tempfile.rst

Doc/library/test.rst

Doc/library/unittest.rst

Doc/library/urllib.parse.rst

Doc/library/urllib.request.rst

Doc/library/weakref.rst

Doc/library/winreg.rst

Doc/library/zipfile.rst

Doc/library/zlib.rst

Doc/reference/compound_stmts.rst

Doc/reference/datamodel.rst

Doc/reference/expressions.rst

Doc/reference/lexical_analysis.rst

Doc/reference/simple_stmts.rst

Doc/tools/sphinxext/indexsidebar.html

Doc/tools/sphinxext/layout.html

Doc/tools/sphinxext/pyspecific.py

Doc/tutorial/datastructures.rst

Doc/using/mac.rst

Doc/whatsnew/2.1.rst

Doc/whatsnew/2.6.rst

Doc/whatsnew/3.0.rst

Include/abstract.h

Include/ceval.h

Include/datetime.h

Include/descrobject.h

Include/dictobject.h

Include/object.h

Include/objimpl.h

Include/pyctype.h

Include/pyerrors.h

Include/pymacconfig.h

Include/pyport.h

Include/pythonrun.h

Include/setobject.h

Include/structseq.h

Include/symtable.h

Include/sysmodule.h

Include/traceback.h

Include/unicodeobject.h

LICENSE

Lib/_abcoll.py

Lib/_pyio.py

Lib/asyncore.py

Lib/base64.py

Lib/codecs.py

Lib/compileall.py

Lib/decimal.py

Lib/distutils/command/sdist.py

Lib/distutils/log.py

Lib/distutils/msvc9compiler.py

Lib/distutils/tests/test_sdist.py

Lib/distutils/unixccompiler.py

Lib/email/charset.py

Lib/email/encoders.py

Lib/email/feedparser.py

Lib/email/message.py

Lib/email/test/test_email.py

Lib/encodings/utf_16.py

Lib/encodings/utf_32.py

Lib/ftplib.py

Lib/gzip.py

Lib/html/parser.py

Lib/http/client.py

Lib/idlelib/PyShell.py

Lib/idlelib/textView.py

Lib/inspect.py

Lib/lib2to3/fixes/fix_import.py

Lib/lib2to3/fixes/fix_itertools_imports.py

Lib/lib2to3/fixes/fix_operator.py

Lib/lib2to3/fixes/fix_reduce.py

Lib/lib2to3/fixes/fix_tuple_params.py

Lib/lib2to3/fixes/fix_xrange.py

Lib/lib2to3/main.py

Lib/lib2to3/pgen2/tokenize.py

Lib/lib2to3/pytree.py

Lib/lib2to3/refactor.py

Lib/lib2to3/tests/test_fixers.py

Lib/lib2to3/tests/test_parser.py

Lib/lib2to3/tests/test_pytree.py

Lib/lib2to3/tests/test_refactor.py

Lib/linecache.py

Lib/macpath.py

Lib/mailcap.py

Lib/multiprocessing/forking.py

Lib/multiprocessing/pool.py

Lib/multiprocessing/process.py

Lib/ntpath.py

Lib/os.py

Lib/pickle.py

Lib/pickletools.py

Lib/pipes.py

Lib/platform.py

Lib/pydoc.py

Lib/shutil.py

Lib/site.py

Lib/smtpd.py

Lib/socketserver.py

Lib/sqlite3/test/regression.py

Lib/ssl.py

Lib/struct.py

Lib/subprocess.py

Lib/sunau.py

Lib/tabnanny.py

Lib/tarfile.py

Lib/test/list_tests.py

Lib/test/pickletester.py

Lib/test/regrtest.py

Lib/test/support.py

Lib/test/test_ascii_formatd.py

Lib/test/test_asyncore.py

Lib/test/test_audioop.py

Lib/test/test_base64.py

Lib/test/test_builtin.py

Lib/test/test_bytes.py

Lib/test/test_cmd_line.py

Lib/test/test_codeccallbacks.py

Lib/test/test_codecs.py

Lib/test/test_collections.py

Lib/test/test_compileall.py

Lib/test/test_complex.py

Lib/test/test_curses.py

Lib/test/test_datetime.py

Lib/test/test_decimal.py

Lib/test/test_enumerate.py

Lib/test/test_htmlparser.py

Lib/test/test_httplib.py

Lib/test/test_httpservers.py

Lib/test/test_inspect.py

Lib/test/test_io.py

Lib/test/test_linecache.py

Lib/test/test_locale.py

Lib/test/test_long.py

Lib/test/test_math.py

Lib/test/test_multiprocessing.py

Lib/test/test_optparse.py

Lib/test/test_os.py

Lib/test/test_ossaudiodev.py

Lib/test/test_parser.py

Lib/test/test_pipes.py

Lib/test/test_poplib.py

Lib/test/test_pydoc.py

Lib/test/test_robotparser.py

Lib/test/test_select.py

Lib/test/test_set.py

Lib/test/test_shutil.py

Lib/test/test_signal.py

Lib/test/test_smtplib.py

Lib/test/test_socketserver.py

Lib/test/test_ssl.py

Lib/test/test_struct.py

Lib/test/test_structmembers.py

Lib/test/test_subprocess.py

Lib/test/test_sundry.py

Lib/test/test_sys.py

Lib/test/test_tarfile.py

Lib/test/test_tcl.py

Lib/test/test_unicode.py

Lib/test/test_urllib.py

Lib/test/test_urllib2.py

Lib/test/test_urllib2net.py

Lib/test/test_urllibnet.py

Lib/test/test_urlparse.py

Lib/test/test_uuid.py

Lib/test/test_warnings.py

Lib/test/test_winreg.py

Lib/test/test_winsound.py

Lib/test/test_zlib.py

Lib/threading.py

Lib/tkinter/_fix.py

Lib/turtle.py

Lib/urllib/parse.py

Lib/urllib/request.py

Lib/uuid.py

Lib/xmlrpc/server.py

Mac/BuildScript/build-installer.py

Mac/BuildScript/scripts/postflight.framework

Mac/Makefile.in

Mac/PythonLauncher/Makefile.in

Mac/README

Makefile.pre.in

Misc/ACKS

Misc/NEWS

Misc/maintainers.rst

Misc/python.man

Misc/setuid-prog.c

Modules/_bisectmodule.c

Modules/_codecsmodule.c

Modules/_collectionsmodule.c

Modules/_csv.c

Modules/_ctypes/_ctypes.c

Modules/_ctypes/_ctypes_test.c

Modules/_ctypes/callbacks.c

Modules/_ctypes/callproc.c

Modules/_ctypes/cfield.c

Modules/_ctypes/ctypes.h

Modules/_ctypes/darwin/dlfcn_simple.c

Modules/_ctypes/malloc_closure.c

Modules/_ctypes/stgdict.c

Modules/_curses_panel.c

Modules/_cursesmodule.c

Modules/_dbmmodule.c

Modules/_functoolsmodule.c

Modules/_gdbmmodule.c

Modules/_gestalt.c

Modules/_hashopenssl.c

Modules/_heapqmodule.c

Modules/_io/bufferedio.c

Modules/_io/bytesio.c

Modules/_io/fileio.c

Modules/_io/iobase.c

Modules/_io/textio.c

Modules/_json.c

Modules/_localemodule.c

Modules/_lsprof.c

Modules/_multiprocessing/connection.h

Modules/_multiprocessing/multiprocessing.c

Modules/_multiprocessing/multiprocessing.h

Modules/_multiprocessing/pipe_connection.c

Modules/_multiprocessing/semaphore.c

Modules/_multiprocessing/socket_connection.c

Modules/_multiprocessing/win32_functions.c

Modules/_pickle.c

Modules/_randommodule.c

Modules/_sqlite/connection.c

Modules/_sqlite/module.c

Modules/_ssl.c

Modules/_struct.c

Modules/_testcapimodule.c

Modules/_threadmodule.c

Modules/_tkinter.c

Modules/addrinfo.h

Modules/arraymodule.c

Modules/audioop.c

Modules/binascii.c

Modules/bz2module.c

Modules/cjkcodecs/_codecs_cn.c

Modules/cjkcodecs/_codecs_hk.c

Modules/cjkcodecs/_codecs_iso2022.c

Modules/cjkcodecs/_codecs_jp.c

Modules/cjkcodecs/_codecs_kr.c

Modules/cjkcodecs/_codecs_tw.c

Modules/cjkcodecs/alg_jisx0201.h

Modules/cjkcodecs/cjkcodecs.h

Modules/cjkcodecs/emu_jisx0213_2000.h

Modules/cjkcodecs/multibytecodec.c

Modules/cjkcodecs/multibytecodec.h

Modules/cmathmodule.c

Modules/config.c.in

Modules/cryptmodule.c

Modules/datetimemodule.c

Modules/errnomodule.c

Modules/fcntlmodule.c

Modules/fpectlmodule.c

Modules/fpetestmodule.c

Modules/gcmodule.c

Modules/getaddrinfo.c

Modules/getbuildinfo.c

Modules/getnameinfo.c

Modules/getpath.c

Modules/grpmodule.c

Modules/itertoolsmodule.c

Modules/main.c

Modules/mathmodule.c

Modules/md5module.c

Modules/mmapmodule.c

Modules/nismodule.c

Modules/operator.c

Modules/ossaudiodev.c

Modules/parsermodule.c

Modules/posixmodule.c

Modules/pwdmodule.c

Modules/pyexpat.c

Modules/python.c

Modules/readline.c

Modules/resource.c

Modules/rotatingtree.c

Modules/selectmodule.c

Modules/sha1module.c

Modules/sha256module.c

Modules/sha512module.c

Modules/signalmodule.c

Modules/socketmodule.c

Modules/socketmodule.h

Modules/spwdmodule.c

Modules/symtablemodule.c

Modules/syslogmodule.c

Modules/termios.c

Modules/testcapi_long.h

Modules/timemodule.c

Modules/tkappinit.c

Modules/unicodedata.c

Modules/xxmodule.c

Modules/xxsubtype.c

Modules/zipimport.c

Modules/zlibmodule.c

Objects/abstract.c

Objects/boolobject.c

Objects/bytearrayobject.c

Objects/bytes_methods.c

Objects/bytesobject.c

Objects/cellobject.c

Objects/classobject.c

Objects/codeobject.c

Objects/complexobject.c

Objects/descrobject.c

Objects/dictobject.c

Objects/enumobject.c

Objects/exceptions.c

Objects/fileobject.c

Objects/floatobject.c

Objects/frameobject.c

Objects/funcobject.c

Objects/genobject.c

Objects/iterobject.c

Objects/listobject.c

Objects/longobject.c

Objects/methodobject.c

Objects/moduleobject.c

Objects/object.c

Objects/obmalloc.c

Objects/rangeobject.c

Objects/setobject.c

Objects/sliceobject.c

Objects/stringlib/eq.h

Objects/stringlib/partition.h

Objects/stringlib/string_format.h

Objects/stringlib/transmogrify.h

Objects/structseq.c

Objects/tupleobject.c

Objects/typeobject.c

Objects/unicodectype.c

Objects/unicodeobject.c

Objects/weakrefobject.c

PC/VS7.1/make_buildinfo.c

PC/VS8.0/make_buildinfo.c

PC/_msi.c

PC/_subprocess.c

PC/bdist_wininst/archive.h

PC/bdist_wininst/extract.c

PC/bdist_wininst/install.c

PC/config.c

PC/dl_nt.c

PC/errmap.h

PC/example_nt/example.c

PC/frozen_dllmain.c

PC/generrmap.c

PC/getpathp.c

PC/import_nt.c

PC/make_versioninfo.c

PC/msvcrtmodule.c

PC/os2emx/config.c

PC/os2emx/dlfcn.c

PC/os2emx/dllentry.c

PC/os2emx/getpathp.c

PC/os2emx/pythonpm.c

PC/os2vacpp/getpathp.c

PC/winreg.c

PC/winsound.c

PCbuild/make_buildinfo.c

Parser/acceler.c

Parser/asdl.py

Parser/bitset.c

Parser/firstsets.c

Parser/grammar.c

Parser/grammar1.c

Parser/intrcheck.c

Parser/listnode.c

Parser/metagrammar.c

Parser/myreadline.c

Parser/node.c

Parser/parser.c

Parser/parsetok.c

Parser/pgen.c

Parser/pgenmain.c

Parser/printgrammar.c

Parser/tokenizer.c

Parser/tokenizer.h

Python/_warnings.c

Python/asdl.c

Python/ast.c

Python/bltinmodule.c

Python/ceval.c

Python/codecs.c

Python/compile.c

Python/dynload_aix.c

Python/dynload_atheos.c

Python/dynload_hpux.c

Python/dynload_next.c

Python/dynload_os2.c

Python/dynload_shlib.c

Python/dynload_win.c

Python/errors.c

Python/frozen.c

Python/frozenmain.c

Python/future.c

Python/getargs.c

Python/getcwd.c

Python/getopt.c

Python/graminit.c

Python/import.c

Python/importdl.c

Python/importdl.h

Python/makeopcodetargets.py

Python/marshal.c

Python/modsupport.c

Python/mysnprintf.c

Python/mystrtoul.c

Python/opcode_targets.h

Python/peephole.c

Python/pyarena.c

Python/pymath.c

Python/pystate.c

Python/pystrcmp.c

Python/pystrtod.c

Python/pythonrun.c

Python/structmember.c

Python/symtable.c

Python/sysmodule.c

Python/thread.c

Python/thread_atheos.h

Python/thread_cthread.h

Python/thread_foobar.h

Python/thread_lwp.h

Python/thread_nt.h

Python/thread_os2.h

Python/thread_pth.h

Python/thread_pthread.h

Python/thread_sgi.h

Python/thread_solaris.h

Python/thread_wince.h

Python/traceback.c

Tools/msi/msi.py

Tools/msi/msisupport.c

configure

configure.in

debian/PVER-minimal.README.Debian.in

debian/changelog

debian/control

debian/control.in

debian/libpython.symbols.in

debian/pyhtml2devhelp.py

debian/pymindeps.py

debian/rules

pyconfig.h.in

setup.py

Show diffs side-by-side

added added

removed removed

Lib/test/test_unicode.py

687

self.assertRaises(IndexError, "{:}".format)

688

self.assertRaises(IndexError, "{:s}".format)

689

self.assertRaises(IndexError, "{}".format)

690

big = "23098475029384702983476098230754973209482573"

691

self.assertRaises(ValueError, ("{" + big + "}").format)

692

self.assertRaises(ValueError, ("{[" + big + "]}").format, [0])

690

693

691

694

# issue 6089

692

695

self.assertRaises(ValueError, "{0[0]x}".format, [None])

945

948

# * strict decoding testing for all of the

946

949

# UTF8_ERROR cases in PyUnicode_DecodeUTF8

947

950

951

def test_utf8_decode_valid_sequences(self):

952

sequences = [

953

# single byte

954

(b'\x00', '\x00'), (b'a', 'a'), (b'\x7f', '\x7f'),

955

# 2 bytes

956

(b'\xc2\x80', '\x80'), (b'\xdf\xbf', '\u07ff'),

957

# 3 bytes

958

(b'\xe0\xa0\x80', '\u0800'), (b'\xed\x9f\xbf', '\ud7ff'),

959

(b'\xee\x80\x80', '\uE000'), (b'\xef\xbf\xbf', '\uffff'),

960

# 4 bytes

961

(b'\xF0\x90\x80\x80', '\U00010000'),

962

(b'\xf4\x8f\xbf\xbf', '\U0010FFFF')

963

]

964

for seq, res in sequences:

965

self.assertEqual(seq.decode('utf-8'), res)

966

967

968

def test_utf8_decode_invalid_sequences(self):

969

# continuation bytes in a sequence of 2, 3, or 4 bytes

970

continuation_bytes = [bytes([x]) for x in range(0x80, 0xC0)]

971

# start bytes of a 2-byte sequence equivalent to codepoints < 0x7F

972

invalid_2B_seq_start_bytes = [bytes([x]) for x in range(0xC0, 0xC2)]

973

# start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF

974

invalid_4B_seq_start_bytes = [bytes([x]) for x in range(0xF5, 0xF8)]

975

invalid_start_bytes = (

976

continuation_bytes + invalid_2B_seq_start_bytes +

977

invalid_4B_seq_start_bytes + [bytes([x]) for x in range(0xF7, 0x100)]

978

)

979

980

for byte in invalid_start_bytes:

981

self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8')

982

983

for sb in invalid_2B_seq_start_bytes:

984

for cb in continuation_bytes:

985

self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8')

986

987

for sb in invalid_4B_seq_start_bytes:

988

for cb1 in continuation_bytes[:3]:

989

for cb3 in continuation_bytes[:3]:

990

self.assertRaises(UnicodeDecodeError,

991

(sb+cb1+b'\x80'+cb3).decode, 'utf-8')

992

993

for cb in [bytes([x]) for x in range(0x80, 0xA0)]:

994

self.assertRaises(UnicodeDecodeError,

995

(b'\xE0'+cb+b'\x80').decode, 'utf-8')

996

self.assertRaises(UnicodeDecodeError,

997

(b'\xE0'+cb+b'\xBF').decode, 'utf-8')

998

# surrogates

999

for cb in [bytes([x]) for x in range(0xA0, 0xC0)]:

1000

self.assertRaises(UnicodeDecodeError,

1001

(b'\xED'+cb+b'\x80').decode, 'utf-8')

1002

self.assertRaises(UnicodeDecodeError,

1003

(b'\xED'+cb+b'\xBF').decode, 'utf-8')

1004

for cb in [bytes([x]) for x in range(0x80, 0x90)]:

1005

self.assertRaises(UnicodeDecodeError,

1006

(b'\xF0'+cb+b'\x80\x80').decode, 'utf-8')

1007

self.assertRaises(UnicodeDecodeError,

1008

(b'\xF0'+cb+b'\xBF\xBF').decode, 'utf-8')

1009

for cb in [bytes([x]) for x in range(0x90, 0xC0)]:

1010

self.assertRaises(UnicodeDecodeError,

1011

(b'\xF4'+cb+b'\x80\x80').decode, 'utf-8')

1012

self.assertRaises(UnicodeDecodeError,

1013

(b'\xF4'+cb+b'\xBF\xBF').decode, 'utf-8')

1014

1015

def test_issue8271(self):

1016

# Issue #8271: during the decoding of an invalid UTF-8 byte sequence,

1017

# only the start byte and the continuation byte(s) are now considered

1018

# invalid, instead of the number of bytes specified by the start byte.

1019

# See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,

1020

# table 3-8, Row 2) for more information about the algorithm used.

1021

FFFD = '\ufffd'

1022

sequences = [

1023

# invalid start bytes

1024

(b'\x80', FFFD), # continuation byte

1025

(b'\x80\x80', FFFD*2), # 2 continuation bytes

1026

(b'\xc0', FFFD),

1027

(b'\xc0\xc0', FFFD*2),

1028

(b'\xc1', FFFD),

1029

(b'\xc1\xc0', FFFD*2),

1030

(b'\xc0\xc1', FFFD*2),

1031

# with start byte of a 2-byte sequence

1032

(b'\xc2', FFFD), # only the start byte

1033

(b'\xc2\xc2', FFFD*2), # 2 start bytes

1034

(b'\xc2\xc2\xc2', FFFD*3), # 2 start bytes

1035

(b'\xc2\x41', FFFD+'A'), # invalid continuation byte

1036

# with start byte of a 3-byte sequence

1037

(b'\xe1', FFFD), # only the start byte

1038

(b'\xe1\xe1', FFFD*2), # 2 start bytes

1039

(b'\xe1\xe1\xe1', FFFD*3), # 3 start bytes

1040

(b'\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes

1041

(b'\xe1\x80', FFFD), # only 1 continuation byte

1042

(b'\xe1\x41', FFFD+'A'), # invalid continuation byte

1043

(b'\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb

1044

(b'\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes

1045

(b'\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte

1046

(b'\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid

1047

(b'\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid

1048

# with start byte of a 4-byte sequence

1049

(b'\xf1', FFFD), # only the start byte

1050

(b'\xf1\xf1', FFFD*2), # 2 start bytes

1051

(b'\xf1\xf1\xf1', FFFD*3), # 3 start bytes

1052

(b'\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes

1053

(b'\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes

1054

(b'\xf1\x80', FFFD), # only 1 continuation bytes

1055

(b'\xf1\x80\x80', FFFD), # only 2 continuation bytes

1056

(b'\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid

1057

(b'\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid

1058

(b'\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid

1059

(b'\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid

1060

(b'\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid

1061

(b'\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid

1062

(b'\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid

1063

(b'\xf1\x41\xf1\x80', FFFD+'A'+FFFD),

1064

(b'\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2),

1065

(b'\xf1\xf1\x80\x41', FFFD*2+'A'),

1066

(b'\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2),

1067

# with invalid start byte of a 4-byte sequence (rfc2279)

1068

(b'\xf5', FFFD), # only the start byte

1069

(b'\xf5\xf5', FFFD*2), # 2 start bytes

1070

(b'\xf5\x80', FFFD*2), # only 1 continuation byte

1071

(b'\xf5\x80\x80', FFFD*3), # only 2 continuation byte

1072

(b'\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes

1073

(b'\xf5\x80\x41', FFFD*2+'A'), # 1 valid cb and 1 invalid

1074

(b'\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD),

1075

(b'\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'),

1076

# with invalid start byte of a 5-byte sequence (rfc2279)

1077

(b'\xf8', FFFD), # only the start byte

1078

(b'\xf8\xf8', FFFD*2), # 2 start bytes

1079

(b'\xf8\x80', FFFD*2), # only one continuation byte

1080

(b'\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid

1081

(b'\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes

1082

# with invalid start byte of a 6-byte sequence (rfc2279)

1083

(b'\xfc', FFFD), # only the start byte

1084

(b'\xfc\xfc', FFFD*2), # 2 start bytes

1085

(b'\xfc\x80\x80', FFFD*3), # only 2 continuation bytes

1086

(b'\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes

1087

# invalid start byte

1088

(b'\xfe', FFFD),

1089

(b'\xfe\x80\x80', FFFD*3),

1090

# other sequences

1091

(b'\xf1\x80\x41\x42\x43', '\ufffd\x41\x42\x43'),

1092

(b'\xf1\x80\xff\x42\x43', '\ufffd\ufffd\x42\x43'),

1093

(b'\xf1\x80\xc2\x81\x43', '\ufffd\x81\x43'),

1094

(b'\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',

1095

'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),

1096

]

1097

for n, (seq, res) in enumerate(sequences):

1098

self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict')

1099

self.assertEqual(seq.decode('utf-8', 'replace'), res)

1100

self.assertEqual((seq+b'b').decode('utf-8', 'replace'), res+'b')

1101

self.assertEqual(seq.decode('utf-8', 'ignore'),

1102

res.replace('\uFFFD', ''))

1103

948

1104

def test_codecs_idna(self):

949

1105

# Test whether trailing dot is preserved

950

1106

self.assertEqual("www.python.org.".encode("idna"), b"www.python.org.")

1223

1379

self.assertRaises(MemoryError, alloc)

1224

1380

self.assertRaises(MemoryError, alloc)

1225

1381

1382

def test_format_subclass(self):

1383

class S(str):

1384

def __str__(self):

1385

return '__str__ overridden'

1386

s = S('xxx')

1387

self.assertEquals("%s" % s, '__str__ overridden')

1388

self.assertEquals("{}".format(s), '__str__ overridden')

1389

1226

1390

1227

1391

def test_main():

1228

1392

support.run_unittest(__name__)

Older »