~pythonregexp2.7/python/issue2636-01+09-02

« back to all changes in this revision

Viewing changes to Objects/stringobject.c

Committer: Jeffrey C. "The TimeHorse" Jacobs
Date: 2008-09-22 00:16:16 UTC
mfrom: (39022.1.34 Regexp-2.7)
Revision ID: darklord@timehorse.com-20080922001616-p1wdip9lfp0zl5cu

Merged in changes from the Atomic Grouping / Possessive Qualifiers branch.

files added:
Doc/howto/webservers.rst

Doc/includes/mp_benchmarks.py

Doc/includes/mp_distributing.py

Doc/includes/mp_newtype.py

Doc/includes/mp_pool.py

Doc/includes/mp_synchronize.py

Doc/includes/mp_webserver.py

Doc/includes/mp_workers.py

Doc/library/2to3.rst

Doc/library/ast.rst

Doc/library/multiprocessing.rst

Doc/library/symtable.rst

Doc/reference/grammar.rst

Lib/ast.py

Lib/lib2to3/fixer_base.py

Lib/lib2to3/fixer_util.py

Lib/lib2to3/fixes/fix_imports2.py

Lib/lib2to3/fixes/fix_metaclass.py

Lib/lib2to3/fixes/fix_paren.py

Lib/lib2to3/fixes/fix_sys_exc.py

Lib/lib2to3/fixes/fix_urllib.py

Lib/lib2to3/main.py

Lib/multiprocessing

Lib/multiprocessing/__init__.py

Lib/multiprocessing/connection.py

Lib/multiprocessing/dummy

Lib/multiprocessing/dummy/__init__.py

Lib/multiprocessing/dummy/connection.py

Lib/multiprocessing/forking.py

Lib/multiprocessing/heap.py

Lib/multiprocessing/managers.py

Lib/multiprocessing/pool.py

Lib/multiprocessing/process.py

Lib/multiprocessing/queues.py

Lib/multiprocessing/reduction.py

Lib/multiprocessing/sharedctypes.py

Lib/multiprocessing/synchronize.py

Lib/multiprocessing/util.py

Lib/test/crashers/iter.py

Lib/test/test_fileio.py

Lib/test/test_multiprocessing.py

Lib/test/wrongcert.pem

Modules/_multiprocessing

Modules/_multiprocessing/connection.h

Modules/_multiprocessing/multiprocessing.c

Modules/_multiprocessing/multiprocessing.h

Modules/_multiprocessing/pipe_connection.c

Modules/_multiprocessing/semaphore.c

Modules/_multiprocessing/socket_connection.c

Modules/_multiprocessing/win32_functions.c

Objects/stringobject.c

PC/VC6/_msi.dsp

PC/VC6/_multiprocessing.dsp

PC/VS8.0/_multiprocessing.vcproj

PC/VS8.0/kill_python.c

PCbuild/_multiprocessing.vcproj

Tools/msi/crtlicense.txt

files removed:
Doc/library/_ast.rst

Lib/lib2to3/fixes/basefix.py

Lib/lib2to3/fixes/util.py

Lib/test/test_socket_ssl.py

Objects/bytesobject.c

PCbuild/_bsddb44.vcproj

files modified:
.bzrignore

Demo/classes/Dates.py

Demo/classes/bitvec.py

Demo/curses/life.py

Demo/curses/ncurses.py

Demo/embed/Makefile

Demo/md5test/md5driver.py

Demo/parser/unparse.py

Demo/pdist/cmptree.py

Demo/rpc/nfsclient.py

Demo/rpc/rpc.py

Demo/rpc/xdr.py

Demo/scripts/fact.py

Demo/scripts/ftpstats.py

Demo/scripts/lpwatch.py

Demo/scripts/markov.py

Demo/scripts/newslist.py

Demo/scripts/pi.py

Demo/scripts/unbirthday.py

Demo/sockets/ftp.py

Demo/threads/Coroutine.py

Demo/threads/Generator.py

Demo/tkinter/guido/hanoi.py

Demo/tkinter/guido/rmt.py

Demo/tkinter/guido/solitaire.py

Demo/tkinter/guido/sortvisu.py

Doc/ACKS.txt

Doc/Makefile

Doc/README.txt

Doc/bugs.rst

Doc/c-api/arg.rst

Doc/c-api/conversion.rst

Doc/c-api/float.rst

Doc/c-api/int.rst

Doc/c-api/memory.rst

Doc/c-api/object.rst

Doc/c-api/sequence.rst

Doc/c-api/typeobj.rst

Doc/conf.py

Doc/distutils/apiref.rst

Doc/distutils/builtdist.rst

Doc/distutils/commandref.rst

Doc/distutils/setupscript.rst

Doc/extending/embedding.rst

Doc/extending/windows.rst

Doc/glossary.rst

Doc/howto/index.rst

Doc/howto/sockets.rst

Doc/howto/unicode.rst

Doc/library/__future__.rst

Doc/library/abc.rst

Doc/library/asynchat.rst

Doc/library/asyncore.rst

Doc/library/basehttpserver.rst

Doc/library/binhex.rst

Doc/library/bsddb.rst

Doc/library/carbon.rst

Doc/library/cgi.rst

Doc/library/cgihttpserver.rst

Doc/library/cmath.rst

Doc/library/codecs.rst

Doc/library/collections.rst

Doc/library/commands.rst

Doc/library/compileall.rst

Doc/library/cookie.rst

Doc/library/csv.rst

Doc/library/ctypes.rst

Doc/library/dbhash.rst

Doc/library/decimal.rst

Doc/library/development.rst

Doc/library/email.message.rst

Doc/library/fractions.rst

Doc/library/framework.rst

Doc/library/ftplib.rst

Doc/library/functions.rst

Doc/library/future_builtins.rst

Doc/library/gc.rst

Doc/library/getopt.rst

Doc/library/hotshot.rst

Doc/library/idle.rst

Doc/library/imp.rst

Doc/library/imputil.rst

Doc/library/index.rst

Doc/library/itertools.rst

Doc/library/language.rst

Doc/library/logging.rst

Doc/library/mac.rst

Doc/library/macos.rst

Doc/library/macosa.rst

Doc/library/macpath.rst

Doc/library/mailbox.rst

Doc/library/math.rst

Doc/library/mimetools.rst

Doc/library/msilib.rst

Doc/library/numbers.rst

Doc/library/operator.rst

Doc/library/optparse.rst

Doc/library/os.path.rst

Doc/library/os.rst

Doc/library/parser.rst

Doc/library/pickle.rst

Doc/library/plistlib.rst

Doc/library/popen2.rst

Doc/library/pprint.rst

Doc/library/py_compile.rst

Doc/library/random.rst

Doc/library/re.rst

Doc/library/rfc822.rst

Doc/library/rlcompleter.rst

Doc/library/robotparser.rst

Doc/library/select.rst

Doc/library/shutil.rst

Doc/library/signal.rst

Doc/library/site.rst

Doc/library/smtplib.rst

Doc/library/socketserver.rst

Doc/library/someos.rst

Doc/library/sqlite3.rst

Doc/library/ssl.rst

Doc/library/stdtypes.rst

Doc/library/string.rst

Doc/library/subprocess.rst

Doc/library/sys.rst

Doc/library/tarfile.rst

Doc/library/tempfile.rst

Doc/library/test.rst

Doc/library/thread.rst

Doc/library/threading.rst

Doc/library/time.rst

Doc/library/tix.rst

Doc/library/tkinter.rst

Doc/library/turtle.rst

Doc/library/types.rst

Doc/library/undoc.rst

Doc/library/unicodedata.rst

Doc/library/unittest.rst

Doc/library/urllib.rst

Doc/library/urllib2.rst

Doc/library/urlparse.rst

Doc/library/warnings.rst

Doc/library/webbrowser.rst

Doc/library/wsgiref.rst

Doc/library/xmlrpclib.rst

Doc/library/zipfile.rst

Doc/library/zlib.rst

Doc/reference/compound_stmts.rst

Doc/reference/datamodel.rst

Doc/reference/expressions.rst

Doc/reference/index.rst

Doc/reference/lexical_analysis.rst

Doc/reference/simple_stmts.rst

Doc/tools/sphinxext/opensearch.xml

Doc/tools/sphinxext/pyspecific.py

Doc/tutorial/appetite.rst

Doc/tutorial/classes.rst

Doc/tutorial/controlflow.rst

Doc/tutorial/errors.rst

Doc/tutorial/index.rst

Doc/tutorial/inputoutput.rst

Doc/tutorial/interpreter.rst

Doc/tutorial/introduction.rst

Doc/tutorial/stdlib2.rst

Doc/using/cmdline.rst

Doc/using/mac.rst

Doc/using/windows.rst

Doc/whatsnew/2.6.rst

Grammar/Grammar

Include/Python.h

Include/abstract.h

Include/bytesobject.h

Include/cobject.h

Include/floatobject.h

Include/intobject.h

Include/object.h

Include/patchlevel.h

Include/pymath.h

Include/pymem.h

Include/pyport.h

Include/pythonrun.h

Include/pythread.h

Include/stringobject.h

Include/symtable.h

Include/traceback.h

Lib/BaseHTTPServer.py

Lib/CGIHTTPServer.py

Lib/ConfigParser.py

Lib/Cookie.py

Lib/DocXMLRPCServer.py

Lib/Queue.py

Lib/SimpleHTTPServer.py

Lib/SimpleXMLRPCServer.py

Lib/UserList.py

Lib/UserString.py

Lib/_abcoll.py

Lib/_threading_local.py

Lib/abc.py

Lib/aifc.py

Lib/asynchat.py

Lib/asyncore.py

Lib/bdb.py

Lib/bisect.py

Lib/bsddb/__init__.py

Lib/bsddb/db.py

Lib/bsddb/dbobj.py

Lib/bsddb/dbshelve.py

Lib/bsddb/dbtables.py

Lib/bsddb/dbutils.py

Lib/bsddb/test/test_all.py

Lib/bsddb/test/test_associate.py

Lib/bsddb/test/test_basics.py

Lib/bsddb/test/test_compare.py

Lib/bsddb/test/test_compat.py

Lib/bsddb/test/test_cursor_pget_bug.py

Lib/bsddb/test/test_dbobj.py

Lib/bsddb/test/test_dbshelve.py

Lib/bsddb/test/test_dbtables.py

Lib/bsddb/test/test_distributed_transactions.py

Lib/bsddb/test/test_early_close.py

Lib/bsddb/test/test_get_none.py

Lib/bsddb/test/test_join.py

Lib/bsddb/test/test_lock.py

Lib/bsddb/test/test_misc.py

Lib/bsddb/test/test_pickle.py

Lib/bsddb/test/test_queue.py

Lib/bsddb/test/test_recno.py

Lib/bsddb/test/test_replication.py

Lib/bsddb/test/test_sequence.py

Lib/bsddb/test/test_thread.py

Lib/cgi.py

Lib/code.py

Lib/compiler/transformer.py

Lib/cookielib.py

Lib/csv.py

Lib/ctypes/__init__.py

Lib/ctypes/test/test_errno.py

Lib/ctypes/test/test_memfunctions.py

Lib/ctypes/test/test_pep3118.py

Lib/ctypes/test/test_pointers.py

Lib/ctypes/test/test_simplesubclasses.py

Lib/ctypes/wintypes.py

Lib/curses/textpad.py

Lib/dbhash.py

Lib/decimal.py

Lib/difflib.py

Lib/distutils/__init__.py

Lib/distutils/command/build_ext.py

Lib/distutils/command/build_py.py

Lib/distutils/core.py

Lib/distutils/cygwinccompiler.py

Lib/distutils/dist.py

Lib/distutils/msvc9compiler.py

Lib/distutils/tests/test_dist.py

Lib/distutils/unixccompiler.py

Lib/dummy_thread.py

Lib/email/_parseaddr.py

Lib/email/base64mime.py

Lib/email/charset.py

Lib/email/generator.py

Lib/email/header.py

Lib/email/message.py

Lib/email/quoprimime.py

Lib/filecmp.py

Lib/fileinput.py

Lib/fractions.py

Lib/functools.py

Lib/hashlib.py

Lib/hmac.py

Lib/httplib.py

Lib/idlelib/idlever.py

Lib/imghdr.py

Lib/inspect.py

Lib/io.py

Lib/lib-tk/Tkinter.py

Lib/lib2to3/Grammar.txt

Lib/lib2to3/fixes/fix_apply.py

Lib/lib2to3/fixes/fix_basestring.py

Lib/lib2to3/fixes/fix_buffer.py

Lib/lib2to3/fixes/fix_callable.py

Lib/lib2to3/fixes/fix_dict.py

Lib/lib2to3/fixes/fix_except.py

Lib/lib2to3/fixes/fix_exec.py

Lib/lib2to3/fixes/fix_execfile.py

Lib/lib2to3/fixes/fix_filter.py

Lib/lib2to3/fixes/fix_funcattrs.py

Lib/lib2to3/fixes/fix_future.py

Lib/lib2to3/fixes/fix_has_key.py

Lib/lib2to3/fixes/fix_idioms.py

Lib/lib2to3/fixes/fix_import.py

Lib/lib2to3/fixes/fix_imports.py

Lib/lib2to3/fixes/fix_input.py

Lib/lib2to3/fixes/fix_intern.py

Lib/lib2to3/fixes/fix_itertools.py

Lib/lib2to3/fixes/fix_itertools_imports.py

Lib/lib2to3/fixes/fix_long.py

Lib/lib2to3/fixes/fix_map.py

Lib/lib2to3/fixes/fix_methodattrs.py

Lib/lib2to3/fixes/fix_ne.py

Lib/lib2to3/fixes/fix_next.py

Lib/lib2to3/fixes/fix_nonzero.py

Lib/lib2to3/fixes/fix_numliterals.py

Lib/lib2to3/fixes/fix_print.py

Lib/lib2to3/fixes/fix_raise.py

Lib/lib2to3/fixes/fix_raw_input.py

Lib/lib2to3/fixes/fix_renames.py

Lib/lib2to3/fixes/fix_repr.py

Lib/lib2to3/fixes/fix_standarderror.py

Lib/lib2to3/fixes/fix_throw.py

Lib/lib2to3/fixes/fix_tuple_params.py

Lib/lib2to3/fixes/fix_types.py

Lib/lib2to3/fixes/fix_unicode.py

Lib/lib2to3/fixes/fix_ws_comma.py

Lib/lib2to3/fixes/fix_xrange.py

Lib/lib2to3/fixes/fix_xreadlines.py

Lib/lib2to3/fixes/fix_zip.py

Lib/lib2to3/pytree.py

Lib/lib2to3/refactor.py

Lib/lib2to3/tests/data/py2_test_grammar.py

Lib/lib2to3/tests/data/py3_test_grammar.py

Lib/lib2to3/tests/support.py

Lib/lib2to3/tests/test_all_fixers.py

Lib/lib2to3/tests/test_fixers.py

Lib/lib2to3/tests/test_util.py

Lib/locale.py

Lib/logging/__init__.py

Lib/logging/config.py

Lib/mailbox.py

Lib/mimetools.py

Lib/modulefinder.py

Lib/msilib/__init__.py

Lib/numbers.py

Lib/optparse.py

Lib/os.py

Lib/pdb.py

Lib/pickle.py

Lib/plat-mac/Audio_mac.py

Lib/plat-mac/Carbon/__init__.py

Lib/plat-mac/EasyDialogs.py

Lib/plat-mac/FrameWork.py

Lib/plat-mac/MiniAEFrame.py

Lib/plat-mac/PixMapWrapper.py

Lib/plat-mac/aepack.py

Lib/plat-mac/aetools.py

Lib/plat-mac/aetypes.py

Lib/plat-mac/applesingle.py

Lib/plat-mac/appletrawmain.py

Lib/plat-mac/appletrunner.py

Lib/plat-mac/argvemulator.py

Lib/plat-mac/bgenlocations.py

Lib/plat-mac/buildtools.py

Lib/plat-mac/bundlebuilder.py

Lib/plat-mac/cfmfile.py

Lib/plat-mac/findertools.py

Lib/plat-mac/gensuitemodule.py

Lib/plat-mac/ic.py

Lib/plat-mac/icopen.py

Lib/plat-mac/lib-scriptpackages/CodeWarrior/__init__.py

Lib/plat-mac/lib-scriptpackages/Explorer/__init__.py

Lib/plat-mac/lib-scriptpackages/Finder/__init__.py

Lib/plat-mac/lib-scriptpackages/Netscape/__init__.py

Lib/plat-mac/lib-scriptpackages/StdSuites/__init__.py

Lib/plat-mac/lib-scriptpackages/SystemEvents/__init__.py

Lib/plat-mac/lib-scriptpackages/Terminal/__init__.py

Lib/plat-mac/lib-scriptpackages/_builtinSuites/__init__.py

Lib/plat-mac/macerrors.py

Lib/plat-mac/macostools.py

Lib/plat-mac/macresource.py

Lib/plat-mac/pimp.py

Lib/plat-mac/terminalcommand.py

Lib/plat-mac/videoreader.py

Lib/platform.py

Lib/pprint.py

Lib/profile.py

Lib/pstats.py

Lib/pydoc.py

Lib/random.py

Lib/re.py

Lib/rfc822.py

Lib/rlcompleter.py

Lib/robotparser.py

Lib/sets.py

Lib/shelve.py

Lib/shutil.py

Lib/socket.py

Lib/sqlite3/dbapi2.py

Lib/sqlite3/test/regression.py

Lib/sre_compile.py

Lib/ssl.py

Lib/string.py

Lib/stringold.py

Lib/subprocess.py

Lib/sunaudio.py

Lib/symtable.py

Lib/tarfile.py

Lib/test/crashers/loosing_mro_ref.py

Lib/test/decimaltestdata/abs.decTest

Lib/test/decimaltestdata/add.decTest

Lib/test/decimaltestdata/and.decTest

Lib/test/decimaltestdata/base.decTest

Lib/test/decimaltestdata/clamp.decTest

Lib/test/decimaltestdata/class.decTest

Lib/test/decimaltestdata/compare.decTest

Lib/test/decimaltestdata/comparetotal.decTest

Lib/test/decimaltestdata/comparetotmag.decTest

Lib/test/decimaltestdata/copy.decTest

Lib/test/decimaltestdata/copyabs.decTest

Lib/test/decimaltestdata/copynegate.decTest

Lib/test/decimaltestdata/copysign.decTest

Lib/test/decimaltestdata/ddAbs.decTest

Lib/test/decimaltestdata/ddAdd.decTest

Lib/test/decimaltestdata/ddAnd.decTest

Lib/test/decimaltestdata/ddBase.decTest

Lib/test/decimaltestdata/ddCanonical.decTest

Lib/test/decimaltestdata/ddClass.decTest

Lib/test/decimaltestdata/ddCompare.decTest

Lib/test/decimaltestdata/ddCompareSig.decTest

Lib/test/decimaltestdata/ddCompareTotal.decTest

Lib/test/decimaltestdata/ddCompareTotalMag.decTest

Lib/test/decimaltestdata/ddCopy.decTest

Lib/test/decimaltestdata/ddCopyAbs.decTest

Lib/test/decimaltestdata/ddCopyNegate.decTest

Lib/test/decimaltestdata/ddCopySign.decTest

Lib/test/decimaltestdata/ddDivide.decTest

Lib/test/decimaltestdata/ddDivideInt.decTest

Lib/test/decimaltestdata/ddEncode.decTest

Lib/test/decimaltestdata/ddFMA.decTest

Lib/test/decimaltestdata/ddInvert.decTest

Lib/test/decimaltestdata/ddLogB.decTest

Lib/test/decimaltestdata/ddMax.decTest

Lib/test/decimaltestdata/ddMaxMag.decTest

Lib/test/decimaltestdata/ddMin.decTest

Lib/test/decimaltestdata/ddMinMag.decTest

Lib/test/decimaltestdata/ddMinus.decTest

Lib/test/decimaltestdata/ddMultiply.decTest

Lib/test/decimaltestdata/ddNextMinus.decTest

Lib/test/decimaltestdata/ddNextPlus.decTest

Lib/test/decimaltestdata/ddNextToward.decTest

Lib/test/decimaltestdata/ddOr.decTest

Lib/test/decimaltestdata/ddPlus.decTest

Lib/test/decimaltestdata/ddQuantize.decTest

Lib/test/decimaltestdata/ddReduce.decTest

Lib/test/decimaltestdata/ddRemainder.decTest

Lib/test/decimaltestdata/ddRemainderNear.decTest

Lib/test/decimaltestdata/ddRotate.decTest

Lib/test/decimaltestdata/ddSameQuantum.decTest

Lib/test/decimaltestdata/ddScaleB.decTest

Lib/test/decimaltestdata/ddShift.decTest

Lib/test/decimaltestdata/ddSubtract.decTest

Lib/test/decimaltestdata/ddToIntegral.decTest

Lib/test/decimaltestdata/ddXor.decTest

Lib/test/decimaltestdata/decDouble.decTest

Lib/test/decimaltestdata/decQuad.decTest

Lib/test/decimaltestdata/decSingle.decTest

Lib/test/decimaltestdata/divide.decTest

Lib/test/decimaltestdata/divideint.decTest

Lib/test/decimaltestdata/dqAbs.decTest

Lib/test/decimaltestdata/dqAdd.decTest

Lib/test/decimaltestdata/dqAnd.decTest

Lib/test/decimaltestdata/dqBase.decTest

Lib/test/decimaltestdata/dqCanonical.decTest

Lib/test/decimaltestdata/dqClass.decTest

Lib/test/decimaltestdata/dqCompare.decTest

Lib/test/decimaltestdata/dqCompareSig.decTest

Lib/test/decimaltestdata/dqCompareTotal.decTest

Lib/test/decimaltestdata/dqCompareTotalMag.decTest

Lib/test/decimaltestdata/dqCopy.decTest

Lib/test/decimaltestdata/dqCopyAbs.decTest

Lib/test/decimaltestdata/dqCopyNegate.decTest

Lib/test/decimaltestdata/dqCopySign.decTest

Lib/test/decimaltestdata/dqDivide.decTest

Lib/test/decimaltestdata/dqDivideInt.decTest

Lib/test/decimaltestdata/dqEncode.decTest

Lib/test/decimaltestdata/dqFMA.decTest

Lib/test/decimaltestdata/dqInvert.decTest

Lib/test/decimaltestdata/dqLogB.decTest

Lib/test/decimaltestdata/dqMax.decTest

Lib/test/decimaltestdata/dqMaxMag.decTest

Lib/test/decimaltestdata/dqMin.decTest

Lib/test/decimaltestdata/dqMinMag.decTest

Lib/test/decimaltestdata/dqMinus.decTest

Lib/test/decimaltestdata/dqMultiply.decTest

Lib/test/decimaltestdata/dqNextMinus.decTest

Lib/test/decimaltestdata/dqNextPlus.decTest

Lib/test/decimaltestdata/dqNextToward.decTest

Lib/test/decimaltestdata/dqOr.decTest

Lib/test/decimaltestdata/dqPlus.decTest

Lib/test/decimaltestdata/dqQuantize.decTest

Lib/test/decimaltestdata/dqReduce.decTest

Lib/test/decimaltestdata/dqRemainder.decTest

Lib/test/decimaltestdata/dqRemainderNear.decTest

Lib/test/decimaltestdata/dqRotate.decTest

Lib/test/decimaltestdata/dqSameQuantum.decTest

Lib/test/decimaltestdata/dqScaleB.decTest

Lib/test/decimaltestdata/dqShift.decTest

Lib/test/decimaltestdata/dqSubtract.decTest

Lib/test/decimaltestdata/dqToIntegral.decTest

Lib/test/decimaltestdata/dqXor.decTest

Lib/test/decimaltestdata/dsBase.decTest

Lib/test/decimaltestdata/dsEncode.decTest

Lib/test/decimaltestdata/exp.decTest

Lib/test/decimaltestdata/fma.decTest

Lib/test/decimaltestdata/inexact.decTest

Lib/test/decimaltestdata/invert.decTest

Lib/test/decimaltestdata/ln.decTest

Lib/test/decimaltestdata/log10.decTest

Lib/test/decimaltestdata/logb.decTest

Lib/test/decimaltestdata/max.decTest

Lib/test/decimaltestdata/maxmag.decTest

Lib/test/decimaltestdata/min.decTest

Lib/test/decimaltestdata/minmag.decTest

Lib/test/decimaltestdata/minus.decTest

Lib/test/decimaltestdata/multiply.decTest

Lib/test/decimaltestdata/nextminus.decTest

Lib/test/decimaltestdata/nextplus.decTest

Lib/test/decimaltestdata/nexttoward.decTest

Lib/test/decimaltestdata/or.decTest

Lib/test/decimaltestdata/plus.decTest

Lib/test/decimaltestdata/power.decTest

Lib/test/decimaltestdata/powersqrt.decTest

Lib/test/decimaltestdata/quantize.decTest

Lib/test/decimaltestdata/randomBound32.decTest

Lib/test/decimaltestdata/randoms.decTest

Lib/test/decimaltestdata/reduce.decTest

Lib/test/decimaltestdata/remainder.decTest

Lib/test/decimaltestdata/remainderNear.decTest

Lib/test/decimaltestdata/rescale.decTest

Lib/test/decimaltestdata/rotate.decTest

Lib/test/decimaltestdata/rounding.decTest

Lib/test/decimaltestdata/samequantum.decTest

Lib/test/decimaltestdata/scaleb.decTest

Lib/test/decimaltestdata/shift.decTest

Lib/test/decimaltestdata/squareroot.decTest

Lib/test/decimaltestdata/subtract.decTest

Lib/test/decimaltestdata/testall.decTest

Lib/test/decimaltestdata/tointegral.decTest

Lib/test/decimaltestdata/tointegralx.decTest

Lib/test/decimaltestdata/xor.decTest

Lib/test/exception_hierarchy.txt

Lib/test/pystone.py

Lib/test/regrtest.py

Lib/test/seq_tests.py

Lib/test/string_tests.py

Lib/test/test___all__.py

Lib/test/test_abc.py

Lib/test/test_array.py

Lib/test/test_ast.py

Lib/test/test_asyncore.py

Lib/test/test_audioop.py

Lib/test/test_bigmem.py

Lib/test/test_bisect.py

Lib/test/test_builtin.py

Lib/test/test_bytes.py

Lib/test/test_bz2.py

Lib/test/test_cgi.py

Lib/test/test_coercion.py

Lib/test/test_collections.py

Lib/test/test_compile.py

Lib/test/test_compiler.py

Lib/test/test_cookielib.py

Lib/test/test_copy.py

Lib/test/test_cpickle.py

Lib/test/test_csv.py

Lib/test/test_datetime.py

Lib/test/test_decimal.py

Lib/test/test_descr.py

Lib/test/test_dict.py

Lib/test/test_dummy_thread.py

Lib/test/test_dummy_threading.py

Lib/test/test_exceptions.py

Lib/test/test_file.py

Lib/test/test_float.py

Lib/test/test_fractions.py

Lib/test/test_functools.py

Lib/test/test_grammar.py

Lib/test/test_grp.py

Lib/test/test_hash.py

Lib/test/test_hashlib.py

Lib/test/test_heapq.py

Lib/test/test_hmac.py

Lib/test/test_imp.py

Lib/test/test_import.py

Lib/test/test_importhooks.py

Lib/test/test_io.py

Lib/test/test_itertools.py

Lib/test/test_list.py

Lib/test/test_locale.py

Lib/test/test_logging.py

Lib/test/test_long.py

Lib/test/test_macostools.py

Lib/test/test_mailbox.py

Lib/test/test_math.py

Lib/test/test_mimetools.py

Lib/test/test_multibytecodec.py

Lib/test/test_normalization.py

Lib/test/test_operator.py

Lib/test/test_os.py

Lib/test/test_ossaudiodev.py

Lib/test/test_parser.py

Lib/test/test_pep352.py

Lib/test/test_platform.py

Lib/test/test_posix.py

Lib/test/test_pprint.py

Lib/test/test_pwd.py

Lib/test/test_py3kwarn.py

Lib/test/test_pydoc.py

Lib/test/test_queue.py

Lib/test/test_random.py

Lib/test/test_re.py

Lib/test/test_rfc822.py

Lib/test/test_richcmp.py

Lib/test/test_robotparser.py

Lib/test/test_scope.py

Lib/test/test_set.py

Lib/test/test_shelve.py

Lib/test/test_shutil.py

Lib/test/test_site.py

Lib/test/test_slice.py

Lib/test/test_smtplib.py

Lib/test/test_socket.py

Lib/test/test_socketserver.py

Lib/test/test_sort.py

Lib/test/test_ssl.py

Lib/test/test_str.py

Lib/test/test_string.py

Lib/test/test_strop.py

Lib/test/test_struct.py

Lib/test/test_structmembers.py

Lib/test/test_sundry.py

Lib/test/test_support.py

Lib/test/test_symtable.py

Lib/test/test_syntax.py

Lib/test/test_sys.py

Lib/test/test_threading.py

Lib/test/test_threading_local.py

Lib/test/test_traceback.py

Lib/test/test_typechecks.py

Lib/test/test_types.py

Lib/test/test_unicode.py

Lib/test/test_unicodedata.py

Lib/test/test_urllib.py

Lib/test/test_urllib2.py

Lib/test/test_urllibnet.py

Lib/test/test_urlparse.py

Lib/test/test_userstring.py

Lib/test/test_warnings.py

Lib/test/test_weakref.py

Lib/test/test_xrange.py

Lib/test/test_zipfile.py

Lib/test/test_zipfile64.py

Lib/test/threaded_import_hangers.py

Lib/textwrap.py

Lib/threading.py

Lib/tokenize.py

Lib/traceback.py

Lib/unittest.py

Lib/urllib.py

Lib/urllib2.py

Lib/urlparse.py

Lib/warnings.py

Lib/weakref.py

Lib/wsgiref/handlers.py

Lib/wsgiref/headers.py

Lib/wsgiref/simple_server.py

Lib/wsgiref/util.py

Lib/xml/dom/minidom.py

Lib/xml/sax/__init__.py

Lib/xml/sax/xmlreader.py

Lib/xmlrpclib.py

Lib/zipfile.py

Mac/IDLE/Makefile.in

Mac/Makefile.in

Mac/Modules/MacOS.c

Mac/Modules/file/_Filemodule.c

Makefile.pre.in

Misc/ACKS

Misc/NEWS

Misc/RPM/python-2.6.spec

Misc/build.sh

Misc/developers.txt

Misc/find_recursionlimit.py

Misc/valgrind-python.supp

Modules/Setup.dist

Modules/_bisectmodule.c

Modules/_bsddb.c

Modules/_codecsmodule.c

Modules/_collectionsmodule.c

Modules/_csv.c

Modules/_ctypes/_ctypes.c

Modules/_ctypes/callproc.c

Modules/_ctypes/ctypes.h

Modules/_ctypes/stgdict.c

Modules/_cursesmodule.c

Modules/_fileio.c

Modules/_functoolsmodule.c

Modules/_hashopenssl.c

Modules/_heapqmodule.c

Modules/_json.c

Modules/_localemodule.c

Modules/_sqlite/connection.c

Modules/_sqlite/cursor.c

Modules/_sqlite/microprotocols.c

Modules/_sqlite/microprotocols.h

Modules/_sqlite/module.c

Modules/_sqlite/row.c

Modules/_sqlite/statement.c

Modules/_sqlite/util.c

Modules/_sqlite/util.h

Modules/_sre.c

Modules/_ssl.c

Modules/_struct.c

Modules/_testcapimodule.c

Modules/_tkinter.c

Modules/almodule.c

Modules/arraymodule.c

Modules/audioop.c

Modules/binascii.c

Modules/bsddb.h

Modules/bz2module.c

Modules/cPickle.c

Modules/cStringIO.c

Modules/cjkcodecs/multibytecodec.c

Modules/cmathmodule.c

Modules/config.c.in

Modules/datetimemodule.c

Modules/dlmodule.c

Modules/errnomodule.c

Modules/fcntlmodule.c

Modules/future_builtins.c

Modules/gcmodule.c

Modules/imageop.c

Modules/itertoolsmodule.c

Modules/mathmodule.c

Modules/md5.c

Modules/mmapmodule.c

Modules/nismodule.c

Modules/parsermodule.c

Modules/posixmodule.c

Modules/selectmodule.c

Modules/signalmodule.c

Modules/socketmodule.c

Modules/socketmodule.h

Modules/stropmodule.c

Modules/symtablemodule.c

Modules/threadmodule.c

Modules/timemodule.c

Modules/unicodedata.c

Modules/unicodedata_db.h

Modules/unicodename_db.h

Objects/abstract.c

Objects/bufferobject.c

Objects/bytearrayobject.c

Objects/classobject.c

Objects/cobject.c

Objects/dictobject.c

Objects/enumobject.c

Objects/exceptions.c

Objects/fileobject.c

Objects/floatobject.c

Objects/frameobject.c

Objects/intobject.c

Objects/listobject.c

Objects/longobject.c

Objects/object.c

Objects/obmalloc.c

Objects/rangeobject.c

Objects/setobject.c

Objects/sliceobject.c

Objects/stringlib/find.h

Objects/stringlib/formatter.h

Objects/stringlib/localeutil.h

Objects/stringlib/string_format.h

Objects/structseq.c

Objects/tupleobject.c

Objects/typeobject.c

Objects/unicodectype.c

Objects/unicodeobject.c

Objects/unicodetype_db.h

Objects/weakrefobject.c

PC/VC6/_bsddb.dsp

PC/VC6/_socket.dsp

PC/VC6/_sqlite3.dsp

PC/VC6/build_ssl.py

PC/VC6/pcbuild.dsw

PC/VC6/python.dsp

PC/VC6/pythoncore.dsp

PC/VC6/readme.txt

PC/VS7.1/pythoncore.vcproj

PC/VS7.1/readme.txt

PC/VS8.0/_bsddb.vcproj

PC/VS8.0/_bsddb44.vcproj

PC/VS8.0/_ctypes.vcproj

PC/VS8.0/_ctypes_test.vcproj

PC/VS8.0/_elementtree.vcproj

PC/VS8.0/_hashlib.vcproj

PC/VS8.0/_msi.vcproj

PC/VS8.0/_socket.vcproj

PC/VS8.0/_sqlite3.vcproj

PC/VS8.0/_ssl.vcproj

PC/VS8.0/_testcapi.vcproj

PC/VS8.0/_tkinter.vcproj

PC/VS8.0/bdist_wininst.vcproj

PC/VS8.0/bz2.vcproj

PC/VS8.0/kill_python.vcproj

PC/VS8.0/make_buildinfo.vcproj

PC/VS8.0/make_versioninfo.vcproj

PC/VS8.0/pcbuild.sln

PC/VS8.0/pyexpat.vcproj

PC/VS8.0/pyproject.vsprops

PC/VS8.0/python.vcproj

PC/VS8.0/pythoncore.vcproj

PC/VS8.0/pythonw.vcproj

PC/VS8.0/select.vcproj

PC/VS8.0/sqlite3.vcproj

PC/VS8.0/unicodedata.vcproj

PC/VS8.0/w9xpopen.vcproj

PC/VS8.0/winsound.vcproj

PC/_subprocess.c

PC/msvcrtmodule.c

PC/pyconfig.h

PCbuild/_bsddb.vcproj

PCbuild/_ctypes.vcproj

PCbuild/_elementtree.vcproj

PCbuild/bdist_wininst.vcproj

PCbuild/build_tkinter.py

PCbuild/kill_python.c

PCbuild/kill_python.vcproj

PCbuild/make_versioninfo.vcproj

PCbuild/pcbuild.sln

PCbuild/pyproject.vsprops

PCbuild/python.vcproj

PCbuild/pythoncore.vcproj

PCbuild/readme.txt

PCbuild/sqlite3.vcproj

PCbuild/vs9to8.py

Parser/asdl_c.py

Parser/intrcheck.c

Parser/node.c

Python/Python-ast.c

Python/_warnings.c

Python/asdl.c

Python/ast.c

Python/bltinmodule.c

Python/ceval.c

Python/compile.c

Python/errors.c

Python/getargs.c

Python/graminit.c

Python/import.c

Python/marshal.c

Python/pystate.c

Python/pystrtod.c

Python/pythonrun.c

Python/symtable.c

Python/sysmodule.c

Python/thread.c

Python/traceback.c

README

Tools/buildbot/build-amd64.bat

Tools/buildbot/build.bat

Tools/buildbot/buildmsi.bat

Tools/buildbot/clean-amd64.bat

Tools/buildbot/clean.bat

Tools/buildbot/external-amd64.bat

Tools/buildbot/external-common.bat

Tools/buildbot/external.bat

Tools/buildbot/test-amd64.bat

Tools/buildbot/test.bat

Tools/faqwiz/move-faqwiz.sh

Tools/msi/merge.py

Tools/msi/msi.py

Tools/msi/msilib.py

Tools/msi/uuids.py

Tools/pybench/Calls.py

Tools/scripts/2to3

Tools/scripts/idle *

Tools/scripts/svneol.py

Tools/unicode/makeunicodedata.py

configure

configure.in

pyconfig.h.in

setup.py

Show diffs side-by-side

added added

removed removed

Objects/stringobject.c

/* String (str/bytes) object implementation */

#define PY_SSIZE_T_CLEAN

#include "Python.h"

#include <ctype.h>

#ifdef COUNT_ALLOCS

int null_strings, one_strings;

#endif

static PyStringObject *characters[UCHAR_MAX + 1];

static PyStringObject *nullstring;

/* This dictionary holds all interned strings. Note that references to

strings in this dictionary are *not* counted in the string's ob_refcnt.

When the interned string reaches a refcnt of 0 the string deallocation

function will delete the reference from this dictionary.

Another way to look at this is that to say that the actual reference

count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)

static PyObject *interned;

For both PyString_FromString() and PyString_FromStringAndSize(), the

parameter `size' denotes number of characters to allocate, not counting any

null terminating character.

For PyString_FromString(), the parameter `str' points to a null-terminated

string containing exactly `size' bytes.

For PyString_FromStringAndSize(), the parameter the parameter `str' is

either NULL or else points to a string containing at least `size' bytes.

For PyString_FromStringAndSize(), the string in the `str' parameter does

not have to be null-terminated. (Therefore it is safe to construct a

substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)

If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'

bytes (setting the last byte to the null terminating character) and you can

fill in the data yourself. If `str' is non-NULL then the resulting

PyString object must be treated as immutable and you must not fill in nor

alter the data yourself, since the strings may be shared.

The PyObject member `op->ob_size', which denotes the number of "extra

items" in a variable-size object, will contain the number of bytes

allocated for string data, not counting the null terminating character. It

is therefore equal to the equal to the `size' parameter (for

PyString_FromStringAndSize()) or the length of the string in the `str'

parameter (for PyString_FromString()).

PyObject *

PyString_FromStringAndSize(const char *str, Py_ssize_t size)

{

if (size < 0) {

PyErr_SetString(PyExc_SystemError,

"Negative size passed to PyString_FromStringAndSize");

return NULL;

}

if (size == 0 && (op = nullstring) != NULL) {

#ifdef COUNT_ALLOCS

null_strings++;

#endif

Py_INCREF(op);

return (PyObject *)op;

}

if (size == 1 && str != NULL &&

(op = characters[*str & UCHAR_MAX]) != NULL)

{

#ifdef COUNT_ALLOCS

one_strings++;

#endif

Py_INCREF(op);

return (PyObject *)op;

}

if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {

PyErr_SetString(PyExc_OverflowError, "string is too large");

return NULL;

}

/* Inline PyObject_NewVar */

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

if (op == NULL)

return PyErr_NoMemory();

PyObject_INIT_VAR(op, &PyString_Type, size);

op->ob_shash = -1;

op->ob_sstate = SSTATE_NOT_INTERNED;

if (str != NULL)

Py_MEMCPY(op->ob_sval, str, size);

op->ob_sval[size] = '\0';

/* share short strings */

if (size == 0) {

PyObject *t = (PyObject *)op;

PyString_InternInPlace(&t);

op = (PyStringObject *)t;

nullstring = op;

Py_INCREF(op);

} else if (size == 1 && str != NULL) {

100

PyObject *t = (PyObject *)op;

101

PyString_InternInPlace(&t);

102

op = (PyStringObject *)t;

103

characters[*str & UCHAR_MAX] = op;

104

Py_INCREF(op);

105

}

106

return (PyObject *) op;

107

}

108

109

PyObject *

110

PyString_FromString(const char *str)

111

{

112

113

114

115

assert(str != NULL);

116

size = strlen(str);

117

if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {

118

PyErr_SetString(PyExc_OverflowError,

119

"string is too long for a Python string");

120

return NULL;

121

}

122

if (size == 0 && (op = nullstring) != NULL) {

123

#ifdef COUNT_ALLOCS

124

null_strings++;

125

#endif

126

Py_INCREF(op);

127

return (PyObject *)op;

128

}

129

if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {

130

#ifdef COUNT_ALLOCS

131

one_strings++;

132

#endif

133

Py_INCREF(op);

134

return (PyObject *)op;

135

}

136

137

/* Inline PyObject_NewVar */

138

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

139

if (op == NULL)

140

return PyErr_NoMemory();

141

PyObject_INIT_VAR(op, &PyString_Type, size);

142

op->ob_shash = -1;

143

op->ob_sstate = SSTATE_NOT_INTERNED;

144

Py_MEMCPY(op->ob_sval, str, size+1);

145

/* share short strings */

146

if (size == 0) {

147

PyObject *t = (PyObject *)op;

148

PyString_InternInPlace(&t);

149

op = (PyStringObject *)t;

150

nullstring = op;

151

Py_INCREF(op);

152

} else if (size == 1) {

153

PyObject *t = (PyObject *)op;

154

PyString_InternInPlace(&t);

155

op = (PyStringObject *)t;

156

characters[*str & UCHAR_MAX] = op;

157

Py_INCREF(op);

158

}

159

return (PyObject *) op;

160

}

161

162

PyObject *

163

PyString_FromFormatV(const char *format, va_list vargs)

164

{

165

va_list count;

166

Py_ssize_t n = 0;

167

const char* f;

168

char *s;

169

PyObject* string;

170

171

#ifdef VA_LIST_IS_ARRAY

172

Py_MEMCPY(count, vargs, sizeof(va_list));

173

#else

174

#ifdef __va_copy

175

__va_copy(count, vargs);

176

#else

177

count = vargs;

178

#endif

179

#endif

180

/* step 1: figure out how large a buffer we need */

181

for (f = format; *f; f++) {

182

if (*f == '%') {

183

const char* p = f;

184

while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

185

;

186

187

/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since

188

* they don't affect the amount of space we reserve.

189

190

if ((*f == 'l' || *f == 'z') &&

191

(f[1] == 'd' || f[1] == 'u'))

192

++f;

193

194

switch (*f) {

195

case 'c':

196

(void)va_arg(count, int);

197

/* fall through... */

198

case '%':

199

n++;

200

break;

201

case 'd': case 'u': case 'i': case 'x':

202

(void) va_arg(count, int);

203

/* 20 bytes is enough to hold a 64-bit

204

integer. Decimal takes the most space.

205

This isn't enough for octal. */

206

n += 20;

207

break;

208

case 's':

209

s = va_arg(count, char*);

210

n += strlen(s);

211

break;

212

case 'p':

213

(void) va_arg(count, int);

214

/* maximum 64-bit pointer representation:

215

* 0xffffffffffffffff

216

* so 19 characters is enough.

217

* XXX I count 18 -- what's the extra for?

218

219

n += 19;

220

break;

221

default:

222

/* if we stumble upon an unknown

223

formatting code, copy the rest of

224

the format string to the output

225

string. (we cannot just skip the

226

code, since there's no way to know

227

what's in the argument list) */

228

n += strlen(p);

229

goto expand;

230

}

231

} else

232

n++;

233

}

234

expand:

235

/* step 2: fill the buffer */

236

/* Since we've analyzed how much space we need for the worst case,

237

use sprintf directly instead of the slower PyOS_snprintf. */

238

string = PyString_FromStringAndSize(NULL, n);

239

if (!string)

240

return NULL;

241

242

s = PyString_AsString(string);

243

244

for (f = format; *f; f++) {

245

if (*f == '%') {

246

const char* p = f++;

247

Py_ssize_t i;

248

int longflag = 0;

249

int size_tflag = 0;

250

/* parse the width.precision part (we're only

251

interested in the precision value, if any) */

252

n = 0;

253

while (isdigit(Py_CHARMASK(*f)))

254

n = (n*10) + *f++ - '0';

255

if (*f == '.') {

256

f++;

257

n = 0;

258

while (isdigit(Py_CHARMASK(*f)))

259

n = (n*10) + *f++ - '0';

260

}

261

while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))

262

f++;

263

/* handle the long flag, but only for %ld and %lu.

264

others can be added when necessary. */

265

if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {

266

longflag = 1;

267

++f;

268

}

269

/* handle the size_t flag. */

270

if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {

271

size_tflag = 1;

272

++f;

273

}

274

275

switch (*f) {

276

case 'c':

277

*s++ = va_arg(vargs, int);

278

break;

279

case 'd':

280

if (longflag)

281

sprintf(s, "%ld", va_arg(vargs, long));

282

else if (size_tflag)

283

sprintf(s, "%" PY_FORMAT_SIZE_T "d",

284

va_arg(vargs, Py_ssize_t));

285

else

286

sprintf(s, "%d", va_arg(vargs, int));

287

s += strlen(s);

288

break;

289

case 'u':

290

if (longflag)

291

sprintf(s, "%lu",

292

va_arg(vargs, unsigned long));

293

else if (size_tflag)

294

sprintf(s, "%" PY_FORMAT_SIZE_T "u",

295

va_arg(vargs, size_t));

296

else

297

sprintf(s, "%u",

298

va_arg(vargs, unsigned int));

299

s += strlen(s);

300

break;

301

case 'i':

302

sprintf(s, "%i", va_arg(vargs, int));

303

s += strlen(s);

304

break;

305

case 'x':

306

sprintf(s, "%x", va_arg(vargs, int));

307

s += strlen(s);

308

break;

309

case 's':

310

p = va_arg(vargs, char*);

311

i = strlen(p);

312

if (n > 0 && i > n)

313

i = n;

314

Py_MEMCPY(s, p, i);

315

s += i;

316

break;

317

case 'p':

318

sprintf(s, "%p", va_arg(vargs, void*));

319

/* %p is ill-defined: ensure leading 0x. */

320

if (s[1] == 'X')

321

s[1] = 'x';

322

else if (s[1] != 'x') {

323

memmove(s+2, s, strlen(s)+1);

324

s[0] = '0';

325

s[1] = 'x';

326

}

327

s += strlen(s);

328

break;

329

case '%':

330

*s++ = '%';

331

break;

332

default:

333

strcpy(s, p);

334

s += strlen(s);

335

goto end;

336

}

337

} else

338

*s++ = *f;

339

}

340

341

end:

342

_PyString_Resize(&string, s - PyString_AS_STRING(string));

343

return string;

344

}

345

346

PyObject *

347

PyString_FromFormat(const char *format, ...)

348

{

349

PyObject* ret;

350

va_list vargs;

351

352

#ifdef HAVE_STDARG_PROTOTYPES

353

va_start(vargs, format);

354

#else

355

va_start(vargs);

356

#endif

357

ret = PyString_FromFormatV(format, vargs);

358

va_end(vargs);

359

return ret;

360

}

361

362

363

PyObject *PyString_Decode(const char *s,

364

Py_ssize_t size,

365

const char *encoding,

366

const char *errors)

367

{

368

PyObject *v, *str;

369

370

str = PyString_FromStringAndSize(s, size);

371

if (str == NULL)

372

return NULL;

373

v = PyString_AsDecodedString(str, encoding, errors);

374

Py_DECREF(str);

375

return v;

376

}

377

378

PyObject *PyString_AsDecodedObject(PyObject *str,

379

const char *encoding,

380

const char *errors)

381

{

382

PyObject *v;

383

384

if (!PyString_Check(str)) {

385

PyErr_BadArgument();

386

goto onError;

387

}

388

389

if (encoding == NULL) {

390

#ifdef Py_USING_UNICODE

391

encoding = PyUnicode_GetDefaultEncoding();

392

#else

393

PyErr_SetString(PyExc_ValueError, "no encoding specified");

394

goto onError;

395

#endif

396

}

397

398

/* Decode via the codec registry */

399

v = PyCodec_Decode(str, encoding, errors);

400

if (v == NULL)

401

goto onError;

402

403

return v;

404

405

onError:

406

return NULL;

407

}

408

409

PyObject *PyString_AsDecodedString(PyObject *str,

410

const char *encoding,

411

const char *errors)

412

{

413

PyObject *v;

414

415

v = PyString_AsDecodedObject(str, encoding, errors);

416

if (v == NULL)

417

goto onError;

418

419

#ifdef Py_USING_UNICODE

420

/* Convert Unicode to a string using the default encoding */

421

if (PyUnicode_Check(v)) {

422

PyObject *temp = v;

423

v = PyUnicode_AsEncodedString(v, NULL, NULL);

424

Py_DECREF(temp);

425

if (v == NULL)

426

goto onError;

427

}

428

#endif

429

if (!PyString_Check(v)) {

430

PyErr_Format(PyExc_TypeError,

431

"decoder did not return a string object (type=%.400s)",

432

Py_TYPE(v)->tp_name);

433

Py_DECREF(v);

434

goto onError;

435

}

436

437

return v;

438

439

onError:

440

return NULL;

441

}

442

443

PyObject *PyString_Encode(const char *s,

444

Py_ssize_t size,

445

const char *encoding,

446

const char *errors)

447

{

448

PyObject *v, *str;

449

450

str = PyString_FromStringAndSize(s, size);

451

if (str == NULL)

452

return NULL;

453

v = PyString_AsEncodedString(str, encoding, errors);

454

Py_DECREF(str);

455

return v;

456

}

457

458

PyObject *PyString_AsEncodedObject(PyObject *str,

459

const char *encoding,

460

const char *errors)

461

{

462

PyObject *v;

463

464

if (!PyString_Check(str)) {

465

PyErr_BadArgument();

466

goto onError;

467

}

468

469

if (encoding == NULL) {

470

#ifdef Py_USING_UNICODE

471

encoding = PyUnicode_GetDefaultEncoding();

472

#else

473

PyErr_SetString(PyExc_ValueError, "no encoding specified");

474

goto onError;

475

#endif

476

}

477

478

/* Encode via the codec registry */

479

v = PyCodec_Encode(str, encoding, errors);

480

if (v == NULL)

481

goto onError;

482

483

return v;

484

485

onError:

486

return NULL;

487

}

488

489

PyObject *PyString_AsEncodedString(PyObject *str,

490

const char *encoding,

491

const char *errors)

492

{

493

PyObject *v;

494

495

v = PyString_AsEncodedObject(str, encoding, errors);

496

if (v == NULL)

497

goto onError;

498

499

#ifdef Py_USING_UNICODE

500

/* Convert Unicode to a string using the default encoding */

501

if (PyUnicode_Check(v)) {

502

PyObject *temp = v;

503

v = PyUnicode_AsEncodedString(v, NULL, NULL);

504

Py_DECREF(temp);

505

if (v == NULL)

506

goto onError;

507

}

508

#endif

509

if (!PyString_Check(v)) {

510

PyErr_Format(PyExc_TypeError,

511

"encoder did not return a string object (type=%.400s)",

512

Py_TYPE(v)->tp_name);

513

Py_DECREF(v);

514

goto onError;

515

}

516

517

return v;

518

519

onError:

520

return NULL;

521

}

522

523

static void

524

string_dealloc(PyObject *op)

525

{

526

switch (PyString_CHECK_INTERNED(op)) {

527

case SSTATE_NOT_INTERNED:

528

break;

529

530

case SSTATE_INTERNED_MORTAL:

531

/* revive dead object temporarily for DelItem */

532

Py_REFCNT(op) = 3;

533

if (PyDict_DelItem(interned, op) != 0)

534

Py_FatalError(

535

"deletion of interned string failed");

536

break;

537

538

case SSTATE_INTERNED_IMMORTAL:

539

Py_FatalError("Immortal interned string died.");

540

541

default:

542

Py_FatalError("Inconsistent interned string state.");

543

}

544

Py_TYPE(op)->tp_free(op);

545

}

546

547

/* Unescape a backslash-escaped string. If unicode is non-zero,

548

the string is a u-literal. If recode_encoding is non-zero,

549

the string is UTF-8 encoded and should be re-encoded in the

550

specified encoding. */

551

552

PyObject *PyString_DecodeEscape(const char *s,

553

Py_ssize_t len,

554

const char *errors,

555

Py_ssize_t unicode,

556

const char *recode_encoding)

557

{

558

int c;

559

char *p, *buf;

560

const char *end;

561

PyObject *v;

562

Py_ssize_t newlen = recode_encoding ? 4*len:len;

563

v = PyString_FromStringAndSize((char *)NULL, newlen);

564

if (v == NULL)

565

return NULL;

566

p = buf = PyString_AsString(v);

567

end = s + len;

568

while (s < end) {

569

if (*s != '\\') {

570

non_esc:

571

#ifdef Py_USING_UNICODE

572

if (recode_encoding && (*s & 0x80)) {

573

PyObject *u, *w;

574

char *r;

575

const char* t;

576

Py_ssize_t rn;

577

t = s;

578

/* Decode non-ASCII bytes as UTF-8. */

579

while (t < end && (*t & 0x80)) t++;

580

u = PyUnicode_DecodeUTF8(s, t - s, errors);

581

if(!u) goto failed;

582

583

/* Recode them in target encoding. */

584

w = PyUnicode_AsEncodedString(

585

u, recode_encoding, errors);

586

Py_DECREF(u);

587

if (!w) goto failed;

588

589

/* Append bytes to output buffer. */

590

assert(PyString_Check(w));

591

r = PyString_AS_STRING(w);

592

rn = PyString_GET_SIZE(w);

593

Py_MEMCPY(p, r, rn);

594

p += rn;

595

Py_DECREF(w);

596

s = t;

597

} else {

598

*p++ = *s++;

599

}

600

#else

601

*p++ = *s++;

602

#endif

603

continue;

604

}

605

s++;

606

if (s==end) {

607

PyErr_SetString(PyExc_ValueError,

608

"Trailing \\ in string");

609

goto failed;

610

}

611

switch (*s++) {

612

/* XXX This assumes ASCII! */

613

case '\n': break;

614

case '\\': *p++ = '\\'; break;

615

case '\'': *p++ = '\''; break;

616

case '\"': *p++ = '\"'; break;

617

case 'b': *p++ = '\b'; break;

618

case 'f': *p++ = '\014'; break; /* FF */

619

case 't': *p++ = '\t'; break;

620

case 'n': *p++ = '\n'; break;

621

case 'r': *p++ = '\r'; break;

622

case 'v': *p++ = '\013'; break; /* VT */

623

case 'a': *p++ = '\007'; break; /* BEL, not classic C */

624

case '0': case '1': case '2': case '3':

625

case '4': case '5': case '6': case '7':

626

c = s[-1] - '0';

627

if (s < end && '0' <= *s && *s <= '7') {

628

c = (c<<3) + *s++ - '0';

629

if (s < end && '0' <= *s && *s <= '7')

630

c = (c<<3) + *s++ - '0';

631

}

632

*p++ = c;

633

break;

634

case 'x':

635

if (s+1 < end &&

636

isxdigit(Py_CHARMASK(s[0])) &&

637

isxdigit(Py_CHARMASK(s[1])))

638

{

639

unsigned int x = 0;

640

c = Py_CHARMASK(*s);

641

s++;

642

if (isdigit(c))

643

x = c - '0';

644

else if (islower(c))

645

x = 10 + c - 'a';

646

else

647

x = 10 + c - 'A';

648

x = x << 4;

649

c = Py_CHARMASK(*s);

650

s++;

651

if (isdigit(c))

652

x += c - '0';

653

else if (islower(c))

654

x += 10 + c - 'a';

655

else

656

x += 10 + c - 'A';

657

*p++ = x;

658

break;

659

}

660

if (!errors || strcmp(errors, "strict") == 0) {

661

PyErr_SetString(PyExc_ValueError,

662

"invalid \\x escape");

663

goto failed;

664

}

665

if (strcmp(errors, "replace") == 0) {

666

*p++ = '?';

667

} else if (strcmp(errors, "ignore") == 0)

668

/* do nothing */;

669

else {

670

PyErr_Format(PyExc_ValueError,

671

"decoding error; "

672

"unknown error handling code: %.400s",

673

errors);

674

goto failed;

675

}

676

#ifndef Py_USING_UNICODE

677

case 'u':

678

case 'U':

679

case 'N':

680

if (unicode) {

681

PyErr_SetString(PyExc_ValueError,

682

"Unicode escapes not legal "

683

"when Unicode disabled");

684

goto failed;

685

}

686

#endif

687

default:

688

*p++ = '\\';

689

s--;

690

goto non_esc; /* an arbitry number of unescaped

691

UTF-8 bytes may follow. */

692

}

693

}

694

if (p-buf < newlen)

695

_PyString_Resize(&v, p - buf);

696

return v;

697

failed:

698

Py_DECREF(v);

699

return NULL;

700

}

701

702

/* -------------------------------------------------------------------- */

703

/* object api */

704

705

static Py_ssize_t

706

string_getsize(register PyObject *op)

707

{

708

char *s;

709

Py_ssize_t len;

710

if (PyString_AsStringAndSize(op, &s, &len))

711

return -1;

712

return len;

713

}

714

715

static /*const*/ char *

716

string_getbuffer(register PyObject *op)

717

{

718

char *s;

719

Py_ssize_t len;

720

if (PyString_AsStringAndSize(op, &s, &len))

721

return NULL;

722

return s;

723

}

724

725

Py_ssize_t

726

PyString_Size(register PyObject *op)

727

{

728

if (!PyString_Check(op))

729

return string_getsize(op);

730

return Py_SIZE(op);

731

}

732

733

/*const*/ char *

734

PyString_AsString(register PyObject *op)

735

{

736

if (!PyString_Check(op))

737

return string_getbuffer(op);

738

return ((PyStringObject *)op) -> ob_sval;

739

}

740

741

int

742

PyString_AsStringAndSize(register PyObject *obj,

743

744

745

{

746

if (s == NULL) {

747

PyErr_BadInternalCall();

748

return -1;

749

}

750

751

if (!PyString_Check(obj)) {

752

#ifdef Py_USING_UNICODE

753

if (PyUnicode_Check(obj)) {

754

obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);

755

if (obj == NULL)

756

return -1;

757

}

758

else

759

#endif

760

{

761

PyErr_Format(PyExc_TypeError,

762

"expected string or Unicode object, "

763

"%.200s found", Py_TYPE(obj)->tp_name);

764

return -1;

765

}

766

}

767

768

*s = PyString_AS_STRING(obj);

769

if (len != NULL)

770

*len = PyString_GET_SIZE(obj);

771

else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {

772

PyErr_SetString(PyExc_TypeError,

773

"expected string without null bytes");

774

return -1;

775

}

776

return 0;

777

}

778

779

/* -------------------------------------------------------------------- */

780

/* Methods */

781

782

#include "stringlib/stringdefs.h"

783

#include "stringlib/fastsearch.h"

784

785

#include "stringlib/count.h"

786

#include "stringlib/find.h"

787

#include "stringlib/partition.h"

788

789

#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping

790

#include "stringlib/localeutil.h"

791

792

793

794

static int

795

string_print(PyStringObject *op, FILE *fp, int flags)

796

{

797

Py_ssize_t i, str_len;

798

char c;

799

int quote;

800

801

/* XXX Ought to check for interrupts when writing long strings */

802

if (! PyString_CheckExact(op)) {

803

int ret;

804

/* A str subclass may have its own __str__ method. */

805

op = (PyStringObject *) PyObject_Str((PyObject *)op);

806

if (op == NULL)

807

return -1;

808

ret = string_print(op, fp, flags);

809

Py_DECREF(op);

810

return ret;

811

}

812

if (flags & Py_PRINT_RAW) {

813

char *data = op->ob_sval;

814

Py_ssize_t size = Py_SIZE(op);

815

Py_BEGIN_ALLOW_THREADS

816

while (size > INT_MAX) {

817

/* Very long strings cannot be written atomically.

818

* But don't write exactly INT_MAX bytes at a time

819

* to avoid memory aligment issues.

820

821

const int chunk_size = INT_MAX & ~0x3FFF;

822

fwrite(data, 1, chunk_size, fp);

823

data += chunk_size;

824

size -= chunk_size;

825

}

826

#ifdef __VMS

827

if (size) fwrite(data, (int)size, 1, fp);

828

#else

829

fwrite(data, 1, (int)size, fp);

830

#endif

831

Py_END_ALLOW_THREADS

832

return 0;

833

}

834

835

/* figure out which quote to use; single is preferred */

836

quote = '\'';

837

if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

838

!memchr(op->ob_sval, '"', Py_SIZE(op)))

839

quote = '"';

840

841

str_len = Py_SIZE(op);

842

Py_BEGIN_ALLOW_THREADS

843

fputc(quote, fp);

844

for (i = 0; i < str_len; i++) {

845

/* Since strings are immutable and the caller should have a

846

reference, accessing the interal buffer should not be an issue

847

with the GIL released. */

848

c = op->ob_sval[i];

849

if (c == quote || c == '\\')

850

fprintf(fp, "\\%c", c);

851

else if (c == '\t')

852

fprintf(fp, "\\t");

853

else if (c == '\n')

854

fprintf(fp, "\\n");

855

else if (c == '\r')

856

fprintf(fp, "\\r");

857

else if (c < ' ' || c >= 0x7f)

858

fprintf(fp, "\\x%02x", c & 0xff);

859

else

860

fputc(c, fp);

861

}

862

fputc(quote, fp);

863

Py_END_ALLOW_THREADS

864

return 0;

865

}

866

867

PyObject *

868

PyString_Repr(PyObject *obj, int smartquotes)

869

{

870

871

size_t newsize = 2 + 4 * Py_SIZE(op);

872

PyObject *v;

873

if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {

874

PyErr_SetString(PyExc_OverflowError,

875

"string is too large to make repr");

876

return NULL;

877

}

878

v = PyString_FromStringAndSize((char *)NULL, newsize);

879

if (v == NULL) {

880

return NULL;

881

}

882

else {

883

884

885

886

int quote;

887

888

/* figure out which quote to use; single is preferred */

889

quote = '\'';

890

if (smartquotes &&

891

memchr(op->ob_sval, '\'', Py_SIZE(op)) &&

892

!memchr(op->ob_sval, '"', Py_SIZE(op)))

893

quote = '"';

894

895

p = PyString_AS_STRING(v);

896

*p++ = quote;

897

for (i = 0; i < Py_SIZE(op); i++) {

898

/* There's at least enough room for a hex escape

899

and a closing quote. */

900

assert(newsize - (p - PyString_AS_STRING(v)) >= 5);

901

c = op->ob_sval[i];

902

if (c == quote || c == '\\')

903

*p++ = '\\', *p++ = c;

904

else if (c == '\t')

905

*p++ = '\\', *p++ = 't';

906

else if (c == '\n')

907

*p++ = '\\', *p++ = 'n';

908

else if (c == '\r')

909

*p++ = '\\', *p++ = 'r';

910

else if (c < ' ' || c >= 0x7f) {

911

/* For performance, we don't want to call

912

PyOS_snprintf here (extra layers of

913

function call). */

914

sprintf(p, "\\x%02x", c & 0xff);

915

p += 4;

916

}

917

else

918

*p++ = c;

919

}

920

assert(newsize - (p - PyString_AS_STRING(v)) >= 1);

921

*p++ = quote;

922

*p = '\0';

923

_PyString_Resize(

924

&v, (p - PyString_AS_STRING(v)));

925

return v;

926

}

927

}

928

929

static PyObject *

930

string_repr(PyObject *op)

931

{

932

return PyString_Repr(op, 1);

933

}

934

935

static PyObject *

936

string_str(PyObject *s)

937

{

938

assert(PyString_Check(s));

939

if (PyString_CheckExact(s)) {

940

Py_INCREF(s);

941

return s;

942

}

943

else {

944

/* Subtype -- return genuine string with the same value. */

945

PyStringObject *t = (PyStringObject *) s;

946

return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));

947

}

948

}

949

950

static Py_ssize_t

951

string_length(PyStringObject *a)

952

{

953

return Py_SIZE(a);

954

}

955

956

static PyObject *

957

string_concat(register PyStringObject *a, register PyObject *bb)

958

{

959

960

961

if (!PyString_Check(bb)) {

962

#ifdef Py_USING_UNICODE

963

if (PyUnicode_Check(bb))

964

return PyUnicode_Concat((PyObject *)a, bb);

965

#endif

966

if (PyByteArray_Check(bb))

967

return PyByteArray_Concat((PyObject *)a, bb);

968

PyErr_Format(PyExc_TypeError,

969

"cannot concatenate 'str' and '%.200s' objects",

970

Py_TYPE(bb)->tp_name);

971

return NULL;

972

}

973

#define b ((PyStringObject *)bb)

974

/* Optimize cases with empty left or right operand */

975

if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&

976

PyString_CheckExact(a) && PyString_CheckExact(b)) {

977

if (Py_SIZE(a) == 0) {

978

Py_INCREF(bb);

979

return bb;

980

}

981

Py_INCREF(a);

982

return (PyObject *)a;

983

}

984

size = Py_SIZE(a) + Py_SIZE(b);

985

/* Check that string sizes are not negative, to prevent an

986

overflow in cases where we are passed incorrectly-created

987

strings with negative lengths (due to a bug in other code).

988

989

if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||

990

Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {

991

PyErr_SetString(PyExc_OverflowError,

992

"strings are too large to concat");

993

return NULL;

994

}

995

996

/* Inline PyObject_NewVar */

997

if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {

998

PyErr_SetString(PyExc_OverflowError,

999

"strings are too large to concat");

1000

return NULL;

1001

}

1002

op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);

1003

if (op == NULL)

1004

return PyErr_NoMemory();

1005

PyObject_INIT_VAR(op, &PyString_Type, size);

1006

op->ob_shash = -1;

1007

op->ob_sstate = SSTATE_NOT_INTERNED;

1008

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

1009

Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));

1010

op->ob_sval[size] = '\0';

1011

return (PyObject *) op;

1012

#undef b

1013

}

1014

1015

static PyObject *

1016

string_repeat(register PyStringObject *a, register Py_ssize_t n)

1017

{

1018

1019

1020

1021

1022

size_t nbytes;

1023

if (n < 0)

1024

n = 0;

1025

/* watch out for overflows: the size can overflow int,

1026

* and the # of bytes needed can overflow size_t

1027

1028

size = Py_SIZE(a) * n;

1029

if (n && size / n != Py_SIZE(a)) {

1030

PyErr_SetString(PyExc_OverflowError,

1031

"repeated string is too long");

1032

return NULL;

1033

}

1034

if (size == Py_SIZE(a) && PyString_CheckExact(a)) {

1035

Py_INCREF(a);

1036

return (PyObject *)a;

1037

}

1038

nbytes = (size_t)size;

1039

if (nbytes + sizeof(PyStringObject) <= nbytes) {

1040

PyErr_SetString(PyExc_OverflowError,

1041

"repeated string is too long");

1042

return NULL;

1043

}

1044

op = (PyStringObject *)

1045

PyObject_MALLOC(sizeof(PyStringObject) + nbytes);

1046

if (op == NULL)

1047

return PyErr_NoMemory();

1048

PyObject_INIT_VAR(op, &PyString_Type, size);

1049

op->ob_shash = -1;

1050

op->ob_sstate = SSTATE_NOT_INTERNED;

1051

op->ob_sval[size] = '\0';

1052

if (Py_SIZE(a) == 1 && n > 0) {

1053

memset(op->ob_sval, a->ob_sval[0] , n);

1054

return (PyObject *) op;

1055

}

1056

i = 0;

1057

if (i < size) {

1058

Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));

1059

i = Py_SIZE(a);

1060

}

1061

while (i < size) {

1062

j = (i <= size-i) ? i : size-i;

1063

Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);

1064

i += j;

1065

}

1066

return (PyObject *) op;

1067

}

1068

1069

/* String slice a[i:j] consists of characters a[i] ... a[j-1] */

1070

1071

static PyObject *

1072

string_slice(register PyStringObject *a, register Py_ssize_t i,

1073

1074

/* j -- may be negative! */

1075

{

1076

if (i < 0)

1077

i = 0;

1078

if (j < 0)

1079

j = 0; /* Avoid signed/unsigned bug in next line */

1080

if (j > Py_SIZE(a))

1081

j = Py_SIZE(a);

1082

if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {

1083

/* It's the same as a */

1084

Py_INCREF(a);

1085

return (PyObject *)a;

1086

}

1087

if (j < i)

1088

j = i;

1089

return PyString_FromStringAndSize(a->ob_sval + i, j-i);

1090

}

1091

1092

static int

1093

string_contains(PyObject *str_obj, PyObject *sub_obj)

1094

{

1095

if (!PyString_CheckExact(sub_obj)) {

1096

#ifdef Py_USING_UNICODE

1097

if (PyUnicode_Check(sub_obj))

1098

return PyUnicode_Contains(str_obj, sub_obj);

1099

#endif

1100

if (!PyString_Check(sub_obj)) {

1101

PyErr_Format(PyExc_TypeError,

1102

"'in <string>' requires string as left operand, "

1103

"not %.200s", Py_TYPE(sub_obj)->tp_name);

1104

return -1;

1105

}

1106

}

1107

1108

return stringlib_contains_obj(str_obj, sub_obj);

1109

}

1110

1111

static PyObject *

1112

string_item(PyStringObject *a, register Py_ssize_t i)

1113

{

1114

char pchar;

1115

PyObject *v;

1116

if (i < 0 || i >= Py_SIZE(a)) {

1117

PyErr_SetString(PyExc_IndexError, "string index out of range");

1118

return NULL;

1119

}

1120

pchar = a->ob_sval[i];

1121

v = (PyObject *)characters[pchar & UCHAR_MAX];

1122

if (v == NULL)

1123

v = PyString_FromStringAndSize(&pchar, 1);

1124

else {

1125

#ifdef COUNT_ALLOCS

1126

one_strings++;

1127

#endif

1128

Py_INCREF(v);

1129

}

1130

return v;

1131

}

1132

1133

static PyObject*

1134

string_richcompare(PyStringObject *a, PyStringObject *b, int op)

1135

{

1136

int c;

1137

Py_ssize_t len_a, len_b;

1138

Py_ssize_t min_len;

1139

PyObject *result;

1140

1141

/* Make sure both arguments are strings. */

1142

if (!(PyString_Check(a) && PyString_Check(b))) {

1143

result = Py_NotImplemented;

1144

goto out;

1145

}

1146

if (a == b) {

1147

switch (op) {

1148

case Py_EQ:case Py_LE:case Py_GE:

1149

result = Py_True;

1150

goto out;

1151

case Py_NE:case Py_LT:case Py_GT:

1152

result = Py_False;

1153

goto out;

1154

}

1155

}

1156

if (op == Py_EQ) {

1157

/* Supporting Py_NE here as well does not save

1158

much time, since Py_NE is rarely used. */

1159

if (Py_SIZE(a) == Py_SIZE(b)

1160

&& (a->ob_sval[0] == b->ob_sval[0]

1161

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {

1162

result = Py_True;

1163

} else {

1164

result = Py_False;

1165

}

1166

goto out;

1167

}

1168

len_a = Py_SIZE(a); len_b = Py_SIZE(b);

1169

min_len = (len_a < len_b) ? len_a : len_b;

1170

if (min_len > 0) {

1171

c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);

1172

if (c==0)

1173

c = memcmp(a->ob_sval, b->ob_sval, min_len);

1174

} else

1175

c = 0;

1176

if (c == 0)

1177

c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;

1178

switch (op) {

1179

case Py_LT: c = c < 0; break;

1180

case Py_LE: c = c <= 0; break;

1181

case Py_EQ: assert(0); break; /* unreachable */

1182

case Py_NE: c = c != 0; break;

1183

case Py_GT: c = c > 0; break;

1184

case Py_GE: c = c >= 0; break;

1185

default:

1186

result = Py_NotImplemented;

1187

goto out;

1188

}

1189

result = c ? Py_True : Py_False;

1190

out:

1191

Py_INCREF(result);

1192

return result;

1193

}

1194

1195

int

1196

_PyString_Eq(PyObject *o1, PyObject *o2)

1197

{

1198

PyStringObject *a = (PyStringObject*) o1;

1199

PyStringObject *b = (PyStringObject*) o2;

1200

return Py_SIZE(a) == Py_SIZE(b)

1201

&& *a->ob_sval == *b->ob_sval

1202

&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;

1203

}

1204

1205

static long

1206

string_hash(PyStringObject *a)

1207

{

1208

1209

1210

1211

1212

if (a->ob_shash != -1)

1213

return a->ob_shash;

1214

len = Py_SIZE(a);

1215

p = (unsigned char *) a->ob_sval;

1216

x = *p << 7;

1217

while (--len >= 0)

1218

x = (1000003*x) ^ *p++;

1219

x ^= Py_SIZE(a);

1220

if (x == -1)

1221

x = -2;

1222

a->ob_shash = x;

1223

return x;

1224

}

1225

1226

static PyObject*

1227

string_subscript(PyStringObject* self, PyObject* item)

1228

{

1229

if (PyIndex_Check(item)) {

1230

Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);

1231

if (i == -1 && PyErr_Occurred())

1232

return NULL;

1233

if (i < 0)

1234

i += PyString_GET_SIZE(self);

1235

return string_item(self, i);

1236

}

1237

else if (PySlice_Check(item)) {

1238

Py_ssize_t start, stop, step, slicelength, cur, i;

1239

char* source_buf;

1240

char* result_buf;

1241

PyObject* result;

1242

1243

if (PySlice_GetIndicesEx((PySliceObject*)item,

1244

PyString_GET_SIZE(self),

1245

&start, &stop, &step, &slicelength) < 0) {

1246

return NULL;

1247

}

1248

1249

if (slicelength <= 0) {

1250

return PyString_FromStringAndSize("", 0);

1251

}

1252

else if (start == 0 && step == 1 &&

1253

slicelength == PyString_GET_SIZE(self) &&

1254

PyString_CheckExact(self)) {

1255

Py_INCREF(self);

1256

return (PyObject *)self;

1257

}

1258

else if (step == 1) {

1259

return PyString_FromStringAndSize(

1260

PyString_AS_STRING(self) + start,

1261

slicelength);

1262

}

1263

else {

1264

source_buf = PyString_AsString((PyObject*)self);

1265

result_buf = (char *)PyMem_Malloc(slicelength);

1266

if (result_buf == NULL)

1267

return PyErr_NoMemory();

1268

1269

for (cur = start, i = 0; i < slicelength;

1270

cur += step, i++) {

1271

result_buf[i] = source_buf[cur];

1272

}

1273

1274

result = PyString_FromStringAndSize(result_buf,

1275

slicelength);

1276

PyMem_Free(result_buf);

1277

return result;

1278

}

1279

}

1280

else {

1281

PyErr_Format(PyExc_TypeError,

1282

"string indices must be integers, not %.200s",

1283

Py_TYPE(item)->tp_name);

1284

return NULL;

1285

}

1286

}

1287

1288

static Py_ssize_t

1289

string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)

1290

{

1291

if ( index != 0 ) {

1292

PyErr_SetString(PyExc_SystemError,

1293

"accessing non-existent string segment");

1294

return -1;

1295

}

1296

*ptr = (void *)self->ob_sval;

1297

return Py_SIZE(self);

1298

}

1299

1300

static Py_ssize_t

1301

string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)

1302

{

1303

PyErr_SetString(PyExc_TypeError,

1304

"Cannot use string as modifiable buffer");

1305

return -1;

1306

}

1307

1308

static Py_ssize_t

1309

string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)

1310

{

1311

if ( lenp )

1312

*lenp = Py_SIZE(self);

1313

return 1;

1314

}

1315

1316

static Py_ssize_t

1317

string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)

1318

{

1319

if ( index != 0 ) {

1320

PyErr_SetString(PyExc_SystemError,

1321

"accessing non-existent string segment");

1322

return -1;

1323

}

1324

*ptr = self->ob_sval;

1325

return Py_SIZE(self);

1326

}

1327

1328

static int

1329

string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)

1330

{

1331

return PyBuffer_FillInfo(view, (PyObject*)self,

1332

(void *)self->ob_sval, Py_SIZE(self),

1333

1, flags);

1334

}

1335

1336

static PySequenceMethods string_as_sequence = {

1337

(lenfunc)string_length, /*sq_length*/

1338

(binaryfunc)string_concat, /*sq_concat*/

1339

(ssizeargfunc)string_repeat, /*sq_repeat*/

1340

(ssizeargfunc)string_item, /*sq_item*/

1341

(ssizessizeargfunc)string_slice, /*sq_slice*/

1342

0, /*sq_ass_item*/

1343

0, /*sq_ass_slice*/

1344

(objobjproc)string_contains /*sq_contains*/

1345

};

1346

1347

static PyMappingMethods string_as_mapping = {

1348

(lenfunc)string_length,

1349

(binaryfunc)string_subscript,

1350

1351

};

1352

1353

static PyBufferProcs string_as_buffer = {

1354

(readbufferproc)string_buffer_getreadbuf,

1355

(writebufferproc)string_buffer_getwritebuf,

1356

(segcountproc)string_buffer_getsegcount,

1357

(charbufferproc)string_buffer_getcharbuf,

1358

(getbufferproc)string_buffer_getbuffer,

1359

0, /* XXX */

1360

};

1361

1362

1363

1364

#define LEFTSTRIP 0

1365

#define RIGHTSTRIP 1

1366

#define BOTHSTRIP 2

1367

1368

/* Arrays indexed by above */

1369

static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};

1370

1371

#define STRIPNAME(i) (stripformat[i]+3)

1372

1373

1374

/* Don't call if length < 2 */

1375

#define Py_STRING_MATCH(target, offset, pattern, length) \

1376

(target[offset] == pattern[0] && \

1377

target[offset+length-1] == pattern[length-1] && \

1378

!memcmp(target+offset+1, pattern+1, length-2) )

1379

1380

1381

/* Overallocate the initial list to reduce the number of reallocs for small

1382

split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three

1383

resizes, to sizes 4, 8, then 16. Most observed string splits are for human

1384

text (roughly 11 words per line) and field delimited data (usually 1-10

1385

fields). For large strings the split algorithms are bandwidth limited

1386

so increasing the preallocation likely will not improve things.*/

1387

1388

#define MAX_PREALLOC 12

1389

1390

/* 5 splits gives 6 elements */

1391

#define PREALLOC_SIZE(maxsplit) \

1392

(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)

1393

1394

#define SPLIT_APPEND(data, left, right) \

1395

str = PyString_FromStringAndSize((data) + (left), \

1396

(right) - (left)); \

1397

if (str == NULL) \

1398

goto onError; \

1399

if (PyList_Append(list, str)) { \

1400

Py_DECREF(str); \

1401

goto onError; \

1402

} \

1403

else \

1404

Py_DECREF(str);

1405

1406

#define SPLIT_ADD(data, left, right) { \

1407

str = PyString_FromStringAndSize((data) + (left), \

1408

(right) - (left)); \

1409

if (str == NULL) \

1410

goto onError; \

1411

if (count < MAX_PREALLOC) { \

1412

PyList_SET_ITEM(list, count, str); \

1413

} else { \

1414

if (PyList_Append(list, str)) { \

1415

Py_DECREF(str); \

1416

goto onError; \

1417

} \

1418

else \

1419

Py_DECREF(str); \

1420

} \

1421

count++; }

1422

1423

/* Always force the list to the expected size. */

1424

#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count

1425

1426

#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }

1427

#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }

1428

#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }

1429

#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }

1430

1431

Py_LOCAL_INLINE(PyObject *)

1432

split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1433

{

1434

const char *s = PyString_AS_STRING(self);

1435

Py_ssize_t i, j, count=0;

1436

PyObject *str;

1437

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1438

1439

if (list == NULL)

1440

return NULL;

1441

1442

i = j = 0;

1443

1444

while (maxsplit-- > 0) {

1445

SKIP_SPACE(s, i, len);

1446

if (i==len) break;

1447

j = i; i++;

1448

SKIP_NONSPACE(s, i, len);

1449

if (j == 0 && i == len && PyString_CheckExact(self)) {

1450

/* No whitespace in self, so just use it as list[0] */

1451

Py_INCREF(self);

1452

PyList_SET_ITEM(list, 0, (PyObject *)self);

1453

count++;

1454

break;

1455

}

1456

SPLIT_ADD(s, j, i);

1457

}

1458

1459

if (i < len) {

1460

/* Only occurs when maxsplit was reached */

1461

/* Skip any remaining whitespace and copy to end of string */

1462

SKIP_SPACE(s, i, len);

1463

if (i != len)

1464

SPLIT_ADD(s, i, len);

1465

}

1466

FIX_PREALLOC_SIZE(list);

1467

return list;

1468

onError:

1469

Py_DECREF(list);

1470

return NULL;

1471

}

1472

1473

Py_LOCAL_INLINE(PyObject *)

1474

split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1475

{

1476

const char *s = PyString_AS_STRING(self);

1477

1478

PyObject *str;

1479

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1480

1481

if (list == NULL)

1482

return NULL;

1483

1484

i = j = 0;

1485

while ((j < len) && (maxcount-- > 0)) {

1486

for(; j<len; j++) {

1487

/* I found that using memchr makes no difference */

1488

if (s[j] == ch) {

1489

SPLIT_ADD(s, i, j);

1490

i = j = j + 1;

1491

break;

1492

}

1493

}

1494

}

1495

if (i == 0 && count == 0 && PyString_CheckExact(self)) {

1496

/* ch not in self, so just use self as list[0] */

1497

Py_INCREF(self);

1498

PyList_SET_ITEM(list, 0, (PyObject *)self);

1499

count++;

1500

}

1501

else if (i <= len) {

1502

SPLIT_ADD(s, i, len);

1503

}

1504

FIX_PREALLOC_SIZE(list);

1505

return list;

1506

1507

onError:

1508

Py_DECREF(list);

1509

return NULL;

1510

}

1511

1512

PyDoc_STRVAR(split__doc__,

1513

"S.split([sep [,maxsplit]]) -> list of strings\n\

1514

\n\

1515

Return a list of the words in the string S, using sep as the\n\

1516

delimiter string. If maxsplit is given, at most maxsplit\n\

1517

splits are done. If sep is not specified or is None, any\n\

1518

whitespace string is a separator and empty strings are removed\n\

1519

from the result.");

1520

1521

static PyObject *

1522

string_split(PyStringObject *self, PyObject *args)

1523

{

1524

Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;

1525

Py_ssize_t maxsplit = -1, count=0;

1526

const char *s = PyString_AS_STRING(self), *sub;

1527

PyObject *list, *str, *subobj = Py_None;

1528

#ifdef USE_FAST

1529

Py_ssize_t pos;

1530

#endif

1531

1532

if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))

1533

return NULL;

1534

if (maxsplit < 0)

1535

maxsplit = PY_SSIZE_T_MAX;

1536

if (subobj == Py_None)

1537

return split_whitespace(self, len, maxsplit);

1538

if (PyString_Check(subobj)) {

1539

sub = PyString_AS_STRING(subobj);

1540

n = PyString_GET_SIZE(subobj);

1541

}

1542

#ifdef Py_USING_UNICODE

1543

else if (PyUnicode_Check(subobj))

1544

return PyUnicode_Split((PyObject *)self, subobj, maxsplit);

1545

#endif

1546

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1547

return NULL;

1548

1549

if (n == 0) {

1550

PyErr_SetString(PyExc_ValueError, "empty separator");

1551

return NULL;

1552

}

1553

else if (n == 1)

1554

return split_char(self, len, sub[0], maxsplit);

1555

1556

list = PyList_New(PREALLOC_SIZE(maxsplit));

1557

if (list == NULL)

1558

return NULL;

1559

1560

#ifdef USE_FAST

1561

i = j = 0;

1562

while (maxsplit-- > 0) {

1563

pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);

1564

if (pos < 0)

1565

break;

1566

j = i+pos;

1567

SPLIT_ADD(s, i, j);

1568

i = j + n;

1569

}

1570

#else

1571

i = j = 0;

1572

while ((j+n <= len) && (maxsplit-- > 0)) {

1573

for (; j+n <= len; j++) {

1574

if (Py_STRING_MATCH(s, j, sub, n)) {

1575

SPLIT_ADD(s, i, j);

1576

i = j = j + n;

1577

break;

1578

}

1579

}

1580

}

1581

#endif

1582

SPLIT_ADD(s, i, len);

1583

FIX_PREALLOC_SIZE(list);

1584

return list;

1585

1586

onError:

1587

Py_DECREF(list);

1588

return NULL;

1589

}

1590

1591

PyDoc_STRVAR(partition__doc__,

1592

"S.partition(sep) -> (head, sep, tail)\n\

1593

\n\

1594

Searches for the separator sep in S, and returns the part before it,\n\

1595

the separator itself, and the part after it. If the separator is not\n\

1596

found, returns S and two empty strings.");

1597

1598

static PyObject *

1599

string_partition(PyStringObject *self, PyObject *sep_obj)

1600

{

1601

const char *sep;

1602

Py_ssize_t sep_len;

1603

1604

if (PyString_Check(sep_obj)) {

1605

sep = PyString_AS_STRING(sep_obj);

1606

sep_len = PyString_GET_SIZE(sep_obj);

1607

}

1608

#ifdef Py_USING_UNICODE

1609

else if (PyUnicode_Check(sep_obj))

1610

return PyUnicode_Partition((PyObject *) self, sep_obj);

1611

#endif

1612

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1613

return NULL;

1614

1615

return stringlib_partition(

1616

(PyObject*) self,

1617

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1618

sep_obj, sep, sep_len

1619

);

1620

}

1621

1622

PyDoc_STRVAR(rpartition__doc__,

1623

"S.rpartition(sep) -> (tail, sep, head)\n\

1624

\n\

1625

Searches for the separator sep in S, starting at the end of S, and returns\n\

1626

the part before it, the separator itself, and the part after it. If the\n\

1627

separator is not found, returns two empty strings and S.");

1628

1629

static PyObject *

1630

string_rpartition(PyStringObject *self, PyObject *sep_obj)

1631

{

1632

const char *sep;

1633

Py_ssize_t sep_len;

1634

1635

if (PyString_Check(sep_obj)) {

1636

sep = PyString_AS_STRING(sep_obj);

1637

sep_len = PyString_GET_SIZE(sep_obj);

1638

}

1639

#ifdef Py_USING_UNICODE

1640

else if (PyUnicode_Check(sep_obj))

1641

return PyUnicode_RPartition((PyObject *) self, sep_obj);

1642

#endif

1643

else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))

1644

return NULL;

1645

1646

return stringlib_rpartition(

1647

(PyObject*) self,

1648

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1649

sep_obj, sep, sep_len

1650

);

1651

}

1652

1653

Py_LOCAL_INLINE(PyObject *)

1654

rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)

1655

{

1656

const char *s = PyString_AS_STRING(self);

1657

Py_ssize_t i, j, count=0;

1658

PyObject *str;

1659

PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));

1660

1661

if (list == NULL)

1662

return NULL;

1663

1664

i = j = len-1;

1665

1666

while (maxsplit-- > 0) {

1667

RSKIP_SPACE(s, i);

1668

if (i<0) break;

1669

j = i; i--;

1670

RSKIP_NONSPACE(s, i);

1671

if (j == len-1 && i < 0 && PyString_CheckExact(self)) {

1672

/* No whitespace in self, so just use it as list[0] */

1673

Py_INCREF(self);

1674

PyList_SET_ITEM(list, 0, (PyObject *)self);

1675

count++;

1676

break;

1677

}

1678

SPLIT_ADD(s, i + 1, j + 1);

1679

}

1680

if (i >= 0) {

1681

/* Only occurs when maxsplit was reached */

1682

/* Skip any remaining whitespace and copy to beginning of string */

1683

RSKIP_SPACE(s, i);

1684

if (i >= 0)

1685

SPLIT_ADD(s, 0, i + 1);

1686

1687

}

1688

FIX_PREALLOC_SIZE(list);

1689

if (PyList_Reverse(list) < 0)

1690

goto onError;

1691

return list;

1692

onError:

1693

Py_DECREF(list);

1694

return NULL;

1695

}

1696

1697

Py_LOCAL_INLINE(PyObject *)

1698

rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)

1699

{

1700

const char *s = PyString_AS_STRING(self);

1701

1702

PyObject *str;

1703

PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));

1704

1705

if (list == NULL)

1706

return NULL;

1707

1708

i = j = len - 1;

1709

while ((i >= 0) && (maxcount-- > 0)) {

1710

for (; i >= 0; i--) {

1711

if (s[i] == ch) {

1712

SPLIT_ADD(s, i + 1, j + 1);

1713

j = i = i - 1;

1714

break;

1715

}

1716

}

1717

}

1718

if (i < 0 && count == 0 && PyString_CheckExact(self)) {

1719

/* ch not in self, so just use self as list[0] */

1720

Py_INCREF(self);

1721

PyList_SET_ITEM(list, 0, (PyObject *)self);

1722

count++;

1723

}

1724

else if (j >= -1) {

1725

SPLIT_ADD(s, 0, j + 1);

1726

}

1727

FIX_PREALLOC_SIZE(list);

1728

if (PyList_Reverse(list) < 0)

1729

goto onError;

1730

return list;

1731

1732

onError:

1733

Py_DECREF(list);

1734

return NULL;

1735

}

1736

1737

PyDoc_STRVAR(rsplit__doc__,

1738

"S.rsplit([sep [,maxsplit]]) -> list of strings\n\

1739

\n\

1740

Return a list of the words in the string S, using sep as the\n\

1741

delimiter string, starting at the end of the string and working\n\

1742

to the front. If maxsplit is given, at most maxsplit splits are\n\

1743

done. If sep is not specified or is None, any whitespace string\n\

1744

is a separator.");

1745

1746

static PyObject *

1747

string_rsplit(PyStringObject *self, PyObject *args)

1748

{

1749

Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;

1750

Py_ssize_t maxsplit = -1, count=0;

1751

const char *s, *sub;

1752

PyObject *list, *str, *subobj = Py_None;

1753

1754

if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))

1755

return NULL;

1756

if (maxsplit < 0)

1757

maxsplit = PY_SSIZE_T_MAX;

1758

if (subobj == Py_None)

1759

return rsplit_whitespace(self, len, maxsplit);

1760

if (PyString_Check(subobj)) {

1761

sub = PyString_AS_STRING(subobj);

1762

n = PyString_GET_SIZE(subobj);

1763

}

1764

#ifdef Py_USING_UNICODE

1765

else if (PyUnicode_Check(subobj))

1766

return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);

1767

#endif

1768

else if (PyObject_AsCharBuffer(subobj, &sub, &n))

1769

return NULL;

1770

1771

if (n == 0) {

1772

PyErr_SetString(PyExc_ValueError, "empty separator");

1773

return NULL;

1774

}

1775

else if (n == 1)

1776

return rsplit_char(self, len, sub[0], maxsplit);

1777

1778

list = PyList_New(PREALLOC_SIZE(maxsplit));

1779

if (list == NULL)

1780

return NULL;

1781

1782

j = len;

1783

i = j - n;

1784

1785

s = PyString_AS_STRING(self);

1786

while ( (i >= 0) && (maxsplit-- > 0) ) {

1787

for (; i>=0; i--) {

1788

if (Py_STRING_MATCH(s, i, sub, n)) {

1789

SPLIT_ADD(s, i + n, j);

1790

j = i;

1791

i -= n;

1792

break;

1793

}

1794

}

1795

}

1796

SPLIT_ADD(s, 0, j);

1797

FIX_PREALLOC_SIZE(list);

1798

if (PyList_Reverse(list) < 0)

1799

goto onError;

1800

return list;

1801

1802

onError:

1803

Py_DECREF(list);

1804

return NULL;

1805

}

1806

1807

1808

PyDoc_STRVAR(join__doc__,

1809

"S.join(sequence) -> string\n\

1810

\n\

1811

Return a string which is the concatenation of the strings in the\n\

1812

sequence. The separator between elements is S.");

1813

1814

static PyObject *

1815

string_join(PyStringObject *self, PyObject *orig)

1816

{

1817

char *sep = PyString_AS_STRING(self);

1818

const Py_ssize_t seplen = PyString_GET_SIZE(self);

1819

PyObject *res = NULL;

1820

char *p;

1821

Py_ssize_t seqlen = 0;

1822

size_t sz = 0;

1823

Py_ssize_t i;

1824

PyObject *seq, *item;

1825

1826

seq = PySequence_Fast(orig, "");

1827

if (seq == NULL) {

1828

return NULL;

1829

}

1830

1831

seqlen = PySequence_Size(seq);

1832

if (seqlen == 0) {

1833

Py_DECREF(seq);

1834

return PyString_FromString("");

1835

}

1836

if (seqlen == 1) {

1837

item = PySequence_Fast_GET_ITEM(seq, 0);

1838

if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {

1839

Py_INCREF(item);

1840

Py_DECREF(seq);

1841

return item;

1842

}

1843

}

1844

1845

/* There are at least two things to join, or else we have a subclass

1846

* of the builtin types in the sequence.

1847

* Do a pre-pass to figure out the total amount of space we'll

1848

* need (sz), see whether any argument is absurd, and defer to

1849

* the Unicode join if appropriate.

1850

1851

for (i = 0; i < seqlen; i++) {

1852

const size_t old_sz = sz;

1853

item = PySequence_Fast_GET_ITEM(seq, i);

1854

if (!PyString_Check(item)){

1855

#ifdef Py_USING_UNICODE

1856

if (PyUnicode_Check(item)) {

1857

/* Defer to Unicode join.

1858

* CAUTION: There's no gurantee that the

1859

* original sequence can be iterated over

1860

* again, so we must pass seq here.

1861

1862

PyObject *result;

1863

result = PyUnicode_Join((PyObject *)self, seq);

1864

Py_DECREF(seq);

1865

return result;

1866

}

1867

#endif

1868

PyErr_Format(PyExc_TypeError,

1869

"sequence item %zd: expected string,"

1870

" %.80s found",

1871

i, Py_TYPE(item)->tp_name);

1872

Py_DECREF(seq);

1873

return NULL;

1874

}

1875

sz += PyString_GET_SIZE(item);

1876

if (i != 0)

1877

sz += seplen;

1878

if (sz < old_sz || sz > PY_SSIZE_T_MAX) {

1879

PyErr_SetString(PyExc_OverflowError,

1880

"join() result is too long for a Python string");

1881

Py_DECREF(seq);

1882

return NULL;

1883

}

1884

}

1885

1886

/* Allocate result space. */

1887

res = PyString_FromStringAndSize((char*)NULL, sz);

1888

if (res == NULL) {

1889

Py_DECREF(seq);

1890

return NULL;

1891

}

1892

1893

/* Catenate everything. */

1894

p = PyString_AS_STRING(res);

1895

for (i = 0; i < seqlen; ++i) {

1896

size_t n;

1897

item = PySequence_Fast_GET_ITEM(seq, i);

1898

n = PyString_GET_SIZE(item);

1899

Py_MEMCPY(p, PyString_AS_STRING(item), n);

1900

p += n;

1901

if (i < seqlen - 1) {

1902

Py_MEMCPY(p, sep, seplen);

1903

p += seplen;

1904

}

1905

}

1906

1907

Py_DECREF(seq);

1908

return res;

1909

}

1910

1911

PyObject *

1912

_PyString_Join(PyObject *sep, PyObject *x)

1913

{

1914

assert(sep != NULL && PyString_Check(sep));

1915

assert(x != NULL);

1916

return string_join((PyStringObject *)sep, x);

1917

}

1918

1919

Py_LOCAL_INLINE(void)

1920

string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)

1921

{

1922

if (*end > len)

1923

*end = len;

1924

else if (*end < 0)

1925

*end += len;

1926

if (*end < 0)

1927

*end = 0;

1928

if (*start < 0)

1929

*start += len;

1930

if (*start < 0)

1931

*start = 0;

1932

}

1933

1934

Py_LOCAL_INLINE(Py_ssize_t)

1935

string_find_internal(PyStringObject *self, PyObject *args, int dir)

1936

{

1937

PyObject *subobj;

1938

const char *sub;

1939

Py_ssize_t sub_len;

1940

Py_ssize_t start=0, end=PY_SSIZE_T_MAX;

1941

PyObject *obj_start=Py_None, *obj_end=Py_None;

1942

1943

if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,

1944

&obj_start, &obj_end))

1945

return -2;

1946

/* To support None in "start" and "end" arguments, meaning

1947

the same as if they were not passed.

1948

1949

if (obj_start != Py_None)

1950

if (!_PyEval_SliceIndex(obj_start, &start))

1951

return -2;

1952

if (obj_end != Py_None)

1953

if (!_PyEval_SliceIndex(obj_end, &end))

1954

return -2;

1955

1956

if (PyString_Check(subobj)) {

1957

sub = PyString_AS_STRING(subobj);

1958

sub_len = PyString_GET_SIZE(subobj);

1959

}

1960

#ifdef Py_USING_UNICODE

1961

else if (PyUnicode_Check(subobj))

1962

return PyUnicode_Find(

1963

(PyObject *)self, subobj, start, end, dir);

1964

#endif

1965

else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))

1966

/* XXX - the "expected a character buffer object" is pretty

1967

confusing for a non-expert. remap to something else ? */

1968

return -2;

1969

1970

if (dir > 0)

1971

return stringlib_find_slice(

1972

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1973

sub, sub_len, start, end);

1974

else

1975

return stringlib_rfind_slice(

1976

PyString_AS_STRING(self), PyString_GET_SIZE(self),

1977

sub, sub_len, start, end);

1978

}

1979

1980

1981

PyDoc_STRVAR(find__doc__,

1982

"S.find(sub [,start [,end]]) -> int\n\

1983

\n\

1984

Return the lowest index in S where substring sub is found,\n\

1985

such that sub is contained within s[start:end]. Optional\n\

1986

arguments start and end are interpreted as in slice notation.\n\

1987

\n\

1988

Return -1 on failure.");

1989

1990

static PyObject *

1991

string_find(PyStringObject *self, PyObject *args)

1992

{

1993

Py_ssize_t result = string_find_internal(self, args, +1);

1994

if (result == -2)

1995

return NULL;

1996

return PyInt_FromSsize_t(result);

1997

}

1998

1999

2000

PyDoc_STRVAR(index__doc__,

2001

"S.index(sub [,start [,end]]) -> int\n\

2002

\n\

2003

Like S.find() but raise ValueError when the substring is not found.");

2004

2005

static PyObject *

2006

string_index(PyStringObject *self, PyObject *args)

2007

{

2008

Py_ssize_t result = string_find_internal(self, args, +1);

2009

if (result == -2)

2010

return NULL;

2011

if (result == -1) {

2012

PyErr_SetString(PyExc_ValueError,

2013

"substring not found");

2014

return NULL;

2015

}

2016

return PyInt_FromSsize_t(result);

2017

}

2018

2019

2020

PyDoc_STRVAR(rfind__doc__,

2021

"S.rfind(sub [,start [,end]]) -> int\n\

2022

\n\

2023

Return the highest index in S where substring sub is found,\n\

2024

such that sub is contained within s[start:end]. Optional\n\

2025

arguments start and end are interpreted as in slice notation.\n\

2026

\n\

2027

Return -1 on failure.");

2028

2029

static PyObject *

2030

string_rfind(PyStringObject *self, PyObject *args)

2031

{

2032

Py_ssize_t result = string_find_internal(self, args, -1);

2033

if (result == -2)

2034

return NULL;

2035

return PyInt_FromSsize_t(result);

2036

}

2037

2038

2039

PyDoc_STRVAR(rindex__doc__,

2040

"S.rindex(sub [,start [,end]]) -> int\n\

2041

\n\

2042

Like S.rfind() but raise ValueError when the substring is not found.");

2043

2044

static PyObject *

2045

string_rindex(PyStringObject *self, PyObject *args)

2046

{

2047

Py_ssize_t result = string_find_internal(self, args, -1);

2048

if (result == -2)

2049

return NULL;

2050

if (result == -1) {

2051

PyErr_SetString(PyExc_ValueError,

2052

"substring not found");

2053

return NULL;

2054

}

2055

return PyInt_FromSsize_t(result);

2056

}

2057

2058

2059

Py_LOCAL_INLINE(PyObject *)

2060

do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)

2061

{

2062

char *s = PyString_AS_STRING(self);

2063

Py_ssize_t len = PyString_GET_SIZE(self);

2064

char *sep = PyString_AS_STRING(sepobj);

2065

Py_ssize_t seplen = PyString_GET_SIZE(sepobj);

2066

Py_ssize_t i, j;

2067

2068

i = 0;

2069

if (striptype != RIGHTSTRIP) {

2070

while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {

2071

i++;

2072

}

2073

}

2074

2075

j = len;

2076

if (striptype != LEFTSTRIP) {

2077

do {

2078

j--;

2079

} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));

2080

j++;

2081

}

2082

2083

if (i == 0 && j == len && PyString_CheckExact(self)) {

2084

Py_INCREF(self);

2085

return (PyObject*)self;

2086

}

2087

else

2088

return PyString_FromStringAndSize(s+i, j-i);

2089

}

2090

2091

2092

Py_LOCAL_INLINE(PyObject *)

2093

do_strip(PyStringObject *self, int striptype)

2094

{

2095

char *s = PyString_AS_STRING(self);

2096

Py_ssize_t len = PyString_GET_SIZE(self), i, j;

2097

2098

i = 0;

2099

if (striptype != RIGHTSTRIP) {

2100

while (i < len && isspace(Py_CHARMASK(s[i]))) {

2101

i++;

2102

}

2103

}

2104

2105

j = len;

2106

if (striptype != LEFTSTRIP) {

2107

do {

2108

j--;

2109

} while (j >= i && isspace(Py_CHARMASK(s[j])));

2110

j++;

2111

}

2112

2113

if (i == 0 && j == len && PyString_CheckExact(self)) {

2114

Py_INCREF(self);

2115

return (PyObject*)self;

2116

}

2117

else

2118

return PyString_FromStringAndSize(s+i, j-i);

2119

}

2120

2121

2122

Py_LOCAL_INLINE(PyObject *)

2123

do_argstrip(PyStringObject *self, int striptype, PyObject *args)

2124

{

2125

PyObject *sep = NULL;

2126

2127

if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))

2128

return NULL;

2129

2130

if (sep != NULL && sep != Py_None) {

2131

if (PyString_Check(sep))

2132

return do_xstrip(self, striptype, sep);

2133

#ifdef Py_USING_UNICODE

2134

else if (PyUnicode_Check(sep)) {

2135

PyObject *uniself = PyUnicode_FromObject((PyObject *)self);

2136

PyObject *res;

2137

if (uniself==NULL)

2138

return NULL;

2139

res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,

2140

striptype, sep);

2141

Py_DECREF(uniself);

2142

return res;

2143

}

2144

#endif

2145

PyErr_Format(PyExc_TypeError,

2146

#ifdef Py_USING_UNICODE

2147

"%s arg must be None, str or unicode",

2148

#else

2149

"%s arg must be None or str",

2150

#endif

2151

STRIPNAME(striptype));

2152

return NULL;

2153

}

2154

2155

return do_strip(self, striptype);

2156

}

2157

2158

2159

PyDoc_STRVAR(strip__doc__,

2160

"S.strip([chars]) -> string or unicode\n\

2161

\n\

2162

Return a copy of the string S with leading and trailing\n\

2163

whitespace removed.\n\

2164

If chars is given and not None, remove characters in chars instead.\n\

2165

If chars is unicode, S will be converted to unicode before stripping");

2166

2167

static PyObject *

2168

string_strip(PyStringObject *self, PyObject *args)

2169

{

2170

if (PyTuple_GET_SIZE(args) == 0)

2171

return do_strip(self, BOTHSTRIP); /* Common case */

2172

else

2173

return do_argstrip(self, BOTHSTRIP, args);

2174

}

2175

2176

2177

PyDoc_STRVAR(lstrip__doc__,

2178

"S.lstrip([chars]) -> string or unicode\n\

2179

\n\

2180

Return a copy of the string S with leading whitespace removed.\n\

2181

If chars is given and not None, remove characters in chars instead.\n\

2182

If chars is unicode, S will be converted to unicode before stripping");

2183

2184

static PyObject *

2185

string_lstrip(PyStringObject *self, PyObject *args)

2186

{

2187

if (PyTuple_GET_SIZE(args) == 0)

2188

return do_strip(self, LEFTSTRIP); /* Common case */

2189

else

2190

return do_argstrip(self, LEFTSTRIP, args);

2191

}

2192

2193

2194

PyDoc_STRVAR(rstrip__doc__,

2195

"S.rstrip([chars]) -> string or unicode\n\

2196

\n\

2197

Return a copy of the string S with trailing whitespace removed.\n\

2198

If chars is given and not None, remove characters in chars instead.\n\

2199

If chars is unicode, S will be converted to unicode before stripping");

2200

2201

static PyObject *

2202

string_rstrip(PyStringObject *self, PyObject *args)

2203

{

2204

if (PyTuple_GET_SIZE(args) == 0)

2205

return do_strip(self, RIGHTSTRIP); /* Common case */

2206

else

2207

return do_argstrip(self, RIGHTSTRIP, args);

2208

}

2209

2210

2211

PyDoc_STRVAR(lower__doc__,

2212

"S.lower() -> string\n\

2213

\n\

2214

Return a copy of the string S converted to lowercase.");

2215

2216

/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */

2217

#ifndef _tolower

2218

#define _tolower tolower

2219

#endif

2220

2221

static PyObject *

2222

string_lower(PyStringObject *self)

2223

{

2224

char *s;

2225

Py_ssize_t i, n = PyString_GET_SIZE(self);

2226

PyObject *newobj;

2227

2228

newobj = PyString_FromStringAndSize(NULL, n);

2229

if (!newobj)

2230

return NULL;

2231

2232

s = PyString_AS_STRING(newobj);

2233

2234

Py_MEMCPY(s, PyString_AS_STRING(self), n);

2235

2236

for (i = 0; i < n; i++) {

2237

int c = Py_CHARMASK(s[i]);

2238

if (isupper(c))

2239

s[i] = _tolower(c);

2240

}

2241

2242

return newobj;

2243

}

2244

2245

PyDoc_STRVAR(upper__doc__,

2246

"S.upper() -> string\n\

2247

\n\

2248

Return a copy of the string S converted to uppercase.");

2249

2250

#ifndef _toupper

2251

#define _toupper toupper

2252

#endif

2253

2254

static PyObject *

2255

string_upper(PyStringObject *self)

2256

{

2257

char *s;

2258

Py_ssize_t i, n = PyString_GET_SIZE(self);

2259

PyObject *newobj;

2260

2261

newobj = PyString_FromStringAndSize(NULL, n);

2262

if (!newobj)

2263

return NULL;

2264

2265

s = PyString_AS_STRING(newobj);

2266

2267

Py_MEMCPY(s, PyString_AS_STRING(self), n);

2268

2269

for (i = 0; i < n; i++) {

2270

int c = Py_CHARMASK(s[i]);

2271

if (islower(c))

2272

s[i] = _toupper(c);

2273

}

2274

2275

return newobj;

2276

}

2277

2278

PyDoc_STRVAR(title__doc__,

2279

"S.title() -> string\n\

2280

\n\

2281

Return a titlecased version of S, i.e. words start with uppercase\n\

2282

characters, all remaining cased characters have lowercase.");

2283

2284

static PyObject*

2285

string_title(PyStringObject *self)

2286

{

2287

char *s = PyString_AS_STRING(self), *s_new;

2288

Py_ssize_t i, n = PyString_GET_SIZE(self);

2289

int previous_is_cased = 0;

2290

PyObject *newobj;

2291

2292

newobj = PyString_FromStringAndSize(NULL, n);

2293

if (newobj == NULL)

2294

return NULL;

2295

s_new = PyString_AsString(newobj);

2296

for (i = 0; i < n; i++) {

2297

int c = Py_CHARMASK(*s++);

2298

if (islower(c)) {

2299

if (!previous_is_cased)

2300

c = toupper(c);

2301

previous_is_cased = 1;

2302

} else if (isupper(c)) {

2303

if (previous_is_cased)

2304

c = tolower(c);

2305

previous_is_cased = 1;

2306

} else

2307

previous_is_cased = 0;

2308

*s_new++ = c;

2309

}

2310

return newobj;

2311

}

2312

2313

PyDoc_STRVAR(capitalize__doc__,

2314

"S.capitalize() -> string\n\

2315

\n\

2316

Return a copy of the string S with only its first character\n\

2317

capitalized.");

2318

2319

static PyObject *

2320

string_capitalize(PyStringObject *self)

2321

{

2322

char *s = PyString_AS_STRING(self), *s_new;

2323

Py_ssize_t i, n = PyString_GET_SIZE(self);

2324

PyObject *newobj;

2325

2326

newobj = PyString_FromStringAndSize(NULL, n);

2327

if (newobj == NULL)

2328

return NULL;

2329

s_new = PyString_AsString(newobj);

2330

if (0 < n) {

2331

int c = Py_CHARMASK(*s++);

2332

if (islower(c))

2333

*s_new = toupper(c);

2334

else

2335

*s_new = c;

2336

s_new++;

2337

}

2338

for (i = 1; i < n; i++) {

2339

int c = Py_CHARMASK(*s++);

2340

if (isupper(c))

2341

*s_new = tolower(c);

2342

else

2343

*s_new = c;

2344

s_new++;

2345

}

2346

return newobj;

2347

}

2348

2349

2350

PyDoc_STRVAR(count__doc__,

2351

"S.count(sub[, start[, end]]) -> int\n\

2352

\n\

2353

Return the number of non-overlapping occurrences of substring sub in\n\

2354

string S[start:end]. Optional arguments start and end are interpreted\n\

2355

as in slice notation.");

2356

2357

static PyObject *

2358

string_count(PyStringObject *self, PyObject *args)

2359

{

2360

PyObject *sub_obj;

2361

const char *str = PyString_AS_STRING(self), *sub;

2362

Py_ssize_t sub_len;

2363

Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;

2364

2365

if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,

2366

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

2367

return NULL;

2368

2369

if (PyString_Check(sub_obj)) {

2370

sub = PyString_AS_STRING(sub_obj);

2371

sub_len = PyString_GET_SIZE(sub_obj);

2372

}

2373

#ifdef Py_USING_UNICODE

2374

else if (PyUnicode_Check(sub_obj)) {

2375

Py_ssize_t count;

2376

count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);

2377

if (count == -1)

2378

return NULL;

2379

else

2380

return PyInt_FromSsize_t(count);

2381

}

2382

#endif

2383

else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))

2384

return NULL;

2385

2386

string_adjust_indices(&start, &end, PyString_GET_SIZE(self));

2387

2388

return PyInt_FromSsize_t(

2389

stringlib_count(str + start, end - start, sub, sub_len)

2390

);

2391

}

2392

2393

PyDoc_STRVAR(swapcase__doc__,

2394

"S.swapcase() -> string\n\

2395

\n\

2396

Return a copy of the string S with uppercase characters\n\

2397

converted to lowercase and vice versa.");

2398

2399

static PyObject *

2400

string_swapcase(PyStringObject *self)

2401

{

2402

char *s = PyString_AS_STRING(self), *s_new;

2403

Py_ssize_t i, n = PyString_GET_SIZE(self);

2404

PyObject *newobj;

2405

2406

newobj = PyString_FromStringAndSize(NULL, n);

2407

if (newobj == NULL)

2408

return NULL;

2409

s_new = PyString_AsString(newobj);

2410

for (i = 0; i < n; i++) {

2411

int c = Py_CHARMASK(*s++);

2412

if (islower(c)) {

2413

*s_new = toupper(c);

2414

}

2415

else if (isupper(c)) {

2416

*s_new = tolower(c);

2417

}

2418

else

2419

*s_new = c;

2420

s_new++;

2421

}

2422

return newobj;

2423

}

2424

2425

2426

PyDoc_STRVAR(translate__doc__,

2427

"S.translate(table [,deletechars]) -> string\n\

2428

\n\

2429

Return a copy of the string S, where all characters occurring\n\

2430

in the optional argument deletechars are removed, and the\n\

2431

remaining characters have been mapped through the given\n\

2432

translation table, which must be a string of length 256.");

2433

2434

static PyObject *

2435

string_translate(PyStringObject *self, PyObject *args)

2436

{

2437

2438

const char *table;

2439

2440

PyObject *input_obj = (PyObject*)self;

2441

const char *output_start, *del_table=NULL;

2442

Py_ssize_t inlen, tablen, dellen = 0;

2443

PyObject *result;

2444

int trans_table[256];

2445

PyObject *tableobj, *delobj = NULL;

2446

2447

if (!PyArg_UnpackTuple(args, "translate", 1, 2,

2448

&tableobj, &delobj))

2449

return NULL;

2450

2451

if (PyString_Check(tableobj)) {

2452

table = PyString_AS_STRING(tableobj);

2453

tablen = PyString_GET_SIZE(tableobj);

2454

}

2455

else if (tableobj == Py_None) {

2456

table = NULL;

2457

tablen = 256;

2458

}

2459

#ifdef Py_USING_UNICODE

2460

else if (PyUnicode_Check(tableobj)) {

2461

/* Unicode .translate() does not support the deletechars

2462

parameter; instead a mapping to None will cause characters

2463

to be deleted. */

2464

if (delobj != NULL) {

2465

PyErr_SetString(PyExc_TypeError,

2466

"deletions are implemented differently for unicode");

2467

return NULL;

2468

}

2469

return PyUnicode_Translate((PyObject *)self, tableobj, NULL);

2470

}

2471

#endif

2472

else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))

2473

return NULL;

2474

2475

if (tablen != 256) {

2476

PyErr_SetString(PyExc_ValueError,

2477

"translation table must be 256 characters long");

2478

return NULL;

2479

}

2480

2481

if (delobj != NULL) {

2482

if (PyString_Check(delobj)) {

2483

del_table = PyString_AS_STRING(delobj);

2484

dellen = PyString_GET_SIZE(delobj);

2485

}

2486

#ifdef Py_USING_UNICODE

2487

else if (PyUnicode_Check(delobj)) {

2488

PyErr_SetString(PyExc_TypeError,

2489

"deletions are implemented differently for unicode");

2490

return NULL;

2491

}

2492

#endif

2493

else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))

2494

return NULL;

2495

}

2496

else {

2497

del_table = NULL;

2498

dellen = 0;

2499

}

2500

2501

inlen = PyString_GET_SIZE(input_obj);

2502

result = PyString_FromStringAndSize((char *)NULL, inlen);

2503

if (result == NULL)

2504

return NULL;

2505

output_start = output = PyString_AsString(result);

2506

input = PyString_AS_STRING(input_obj);

2507

2508

if (dellen == 0 && table != NULL) {

2509

/* If no deletions are required, use faster code */

2510

for (i = inlen; --i >= 0; ) {

2511

c = Py_CHARMASK(*input++);

2512

if (Py_CHARMASK((*output++ = table[c])) != c)

2513

changed = 1;

2514

}

2515

if (changed || !PyString_CheckExact(input_obj))

2516

return result;

2517

Py_DECREF(result);

2518

Py_INCREF(input_obj);

2519

return input_obj;

2520

}

2521

2522

if (table == NULL) {

2523

for (i = 0; i < 256; i++)

2524

trans_table[i] = Py_CHARMASK(i);

2525

} else {

2526

for (i = 0; i < 256; i++)

2527

trans_table[i] = Py_CHARMASK(table[i]);

2528

}

2529

2530

for (i = 0; i < dellen; i++)

2531

trans_table[(int) Py_CHARMASK(del_table[i])] = -1;

2532

2533

for (i = inlen; --i >= 0; ) {

2534

c = Py_CHARMASK(*input++);

2535

if (trans_table[c] != -1)

2536

if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)

2537

continue;

2538

changed = 1;

2539

}

2540

if (!changed && PyString_CheckExact(input_obj)) {

2541

Py_DECREF(result);

2542

Py_INCREF(input_obj);

2543

return input_obj;

2544

}

2545

/* Fix the size of the resulting string */

2546

if (inlen > 0)

2547

_PyString_Resize(&result, output - output_start);

2548

return result;

2549

}

2550

2551

2552

#define FORWARD 1

2553

#define REVERSE -1

2554

2555

/* find and count characters and substrings */

2556

2557

#define findchar(target, target_len, c) \

2558

((char *)memchr((const void *)(target), c, target_len))

2559

2560

/* String ops must return a string. */

2561

/* If the object is subclass of string, create a copy */

2562

Py_LOCAL(PyStringObject *)

2563

return_self(PyStringObject *self)

2564

{

2565

if (PyString_CheckExact(self)) {

2566

Py_INCREF(self);

2567

return self;

2568

}

2569

return (PyStringObject *)PyString_FromStringAndSize(

2570

PyString_AS_STRING(self),

2571

PyString_GET_SIZE(self));

2572

}

2573

2574

Py_LOCAL_INLINE(Py_ssize_t)

2575

countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)

2576

{

2577

Py_ssize_t count=0;

2578

const char *start=target;

2579

const char *end=target+target_len;

2580

2581

while ( (start=findchar(start, end-start, c)) != NULL ) {

2582

count++;

2583

if (count >= maxcount)

2584

break;

2585

start += 1;

2586

}

2587

return count;

2588

}

2589

2590

Py_LOCAL(Py_ssize_t)

2591

findstring(const char *target, Py_ssize_t target_len,

2592

const char *pattern, Py_ssize_t pattern_len,

2593

Py_ssize_t start,

2594

Py_ssize_t end,

2595

int direction)

2596

{

2597

if (start < 0) {

2598

start += target_len;

2599

if (start < 0)

2600

start = 0;

2601

}

2602

if (end > target_len) {

2603

end = target_len;

2604

} else if (end < 0) {

2605

end += target_len;

2606

if (end < 0)

2607

end = 0;

2608

}

2609

2610

/* zero-length substrings always match at the first attempt */

2611

if (pattern_len == 0)

2612

return (direction > 0) ? start : end;

2613

2614

end -= pattern_len;

2615

2616

if (direction < 0) {

2617

for (; end >= start; end--)

2618

if (Py_STRING_MATCH(target, end, pattern, pattern_len))

2619

return end;

2620

} else {

2621

for (; start <= end; start++)

2622

if (Py_STRING_MATCH(target, start, pattern, pattern_len))

2623

return start;

2624

}

2625

return -1;

2626

}

2627

2628

Py_LOCAL_INLINE(Py_ssize_t)

2629

countstring(const char *target, Py_ssize_t target_len,

2630

const char *pattern, Py_ssize_t pattern_len,

2631

Py_ssize_t start,

2632

Py_ssize_t end,

2633

int direction, Py_ssize_t maxcount)

2634

{

2635

Py_ssize_t count=0;

2636

2637

if (start < 0) {

2638

start += target_len;

2639

if (start < 0)

2640

start = 0;

2641

}

2642

if (end > target_len) {

2643

end = target_len;

2644

} else if (end < 0) {

2645

end += target_len;

2646

if (end < 0)

2647

end = 0;

2648

}

2649

2650

/* zero-length substrings match everywhere */

2651

if (pattern_len == 0 || maxcount == 0) {

2652

if (target_len+1 < maxcount)

2653

return target_len+1;

2654

return maxcount;

2655

}

2656

2657

end -= pattern_len;

2658

if (direction < 0) {

2659

for (; (end >= start); end--)

2660

if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {

2661

count++;

2662

if (--maxcount <= 0) break;

2663

end -= pattern_len-1;

2664

}

2665

} else {

2666

for (; (start <= end); start++)

2667

if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {

2668

count++;

2669

if (--maxcount <= 0)

2670

break;

2671

start += pattern_len-1;

2672

}

2673

}

2674

return count;

2675

}

2676

2677

2678

/* Algorithms for different cases of string replacement */

2679

2680

/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */

2681

Py_LOCAL(PyStringObject *)

2682

replace_interleave(PyStringObject *self,

2683

const char *to_s, Py_ssize_t to_len,

2684

Py_ssize_t maxcount)

2685

{

2686

char *self_s, *result_s;

2687

Py_ssize_t self_len, result_len;

2688

Py_ssize_t count, i, product;

2689

PyStringObject *result;

2690

2691

self_len = PyString_GET_SIZE(self);

2692

2693

/* 1 at the end plus 1 after every character */

2694

count = self_len+1;

2695

if (maxcount < count)

2696

count = maxcount;

2697

2698

/* Check for overflow */

2699

/* result_len = count * to_len + self_len; */

2700

product = count * to_len;

2701

if (product / to_len != count) {

2702

PyErr_SetString(PyExc_OverflowError,

2703

"replace string is too long");

2704

return NULL;

2705

}

2706

result_len = product + self_len;

2707

if (result_len < 0) {

2708

PyErr_SetString(PyExc_OverflowError,

2709

"replace string is too long");

2710

return NULL;

2711

}

2712

2713

if (! (result = (PyStringObject *)

2714

PyString_FromStringAndSize(NULL, result_len)) )

2715

return NULL;

2716

2717

self_s = PyString_AS_STRING(self);

2718

result_s = PyString_AS_STRING(result);

2719

2720

/* TODO: special case single character, which doesn't need memcpy */

2721

2722

/* Lay the first one down (guaranteed this will occur) */

2723

Py_MEMCPY(result_s, to_s, to_len);

2724

result_s += to_len;

2725

count -= 1;

2726

2727

for (i=0; i<count; i++) {

2728

*result_s++ = *self_s++;

2729

Py_MEMCPY(result_s, to_s, to_len);

2730

result_s += to_len;

2731

}

2732

2733

/* Copy the rest of the original string */

2734

Py_MEMCPY(result_s, self_s, self_len-i);

2735

2736

return result;

2737

}

2738

2739

/* Special case for deleting a single character */

2740

/* len(self)>=1, len(from)==1, to="", maxcount>=1 */

2741

Py_LOCAL(PyStringObject *)

2742

replace_delete_single_character(PyStringObject *self,

2743

char from_c, Py_ssize_t maxcount)

2744

{

2745

char *self_s, *result_s;

2746

char *start, *next, *end;

2747

Py_ssize_t self_len, result_len;

2748

Py_ssize_t count;

2749

PyStringObject *result;

2750

2751

self_len = PyString_GET_SIZE(self);

2752

self_s = PyString_AS_STRING(self);

2753

2754

count = countchar(self_s, self_len, from_c, maxcount);

2755

if (count == 0) {

2756

return return_self(self);

2757

}

2758

2759

result_len = self_len - count; /* from_len == 1 */

2760

assert(result_len>=0);

2761

2762

if ( (result = (PyStringObject *)

2763

PyString_FromStringAndSize(NULL, result_len)) == NULL)

2764

return NULL;

2765

result_s = PyString_AS_STRING(result);

2766

2767

start = self_s;

2768

end = self_s + self_len;

2769

while (count-- > 0) {

2770

next = findchar(start, end-start, from_c);

2771

if (next == NULL)

2772

break;

2773

Py_MEMCPY(result_s, start, next-start);

2774

result_s += (next-start);

2775

start = next+1;

2776

}

2777

Py_MEMCPY(result_s, start, end-start);

2778

2779

return result;

2780

}

2781

2782

/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */

2783

2784

Py_LOCAL(PyStringObject *)

2785

replace_delete_substring(PyStringObject *self,

2786

const char *from_s, Py_ssize_t from_len,

2787

Py_ssize_t maxcount) {

2788

char *self_s, *result_s;

2789

char *start, *next, *end;

2790

Py_ssize_t self_len, result_len;

2791

Py_ssize_t count, offset;

2792

PyStringObject *result;

2793

2794

self_len = PyString_GET_SIZE(self);

2795

self_s = PyString_AS_STRING(self);

2796

2797

count = countstring(self_s, self_len,

2798

from_s, from_len,

2799

0, self_len, 1,

2800

maxcount);

2801

2802

if (count == 0) {

2803

/* no matches */

2804

return return_self(self);

2805

}

2806

2807

result_len = self_len - (count * from_len);

2808

assert (result_len>=0);

2809

2810

if ( (result = (PyStringObject *)

2811

PyString_FromStringAndSize(NULL, result_len)) == NULL )

2812

return NULL;

2813

2814

result_s = PyString_AS_STRING(result);

2815

2816

start = self_s;

2817

end = self_s + self_len;

2818

while (count-- > 0) {

2819

offset = findstring(start, end-start,

2820

from_s, from_len,

2821

0, end-start, FORWARD);

2822

if (offset == -1)

2823

break;

2824

next = start + offset;

2825

2826

Py_MEMCPY(result_s, start, next-start);

2827

2828

result_s += (next-start);

2829

start = next+from_len;

2830

}

2831

Py_MEMCPY(result_s, start, end-start);

2832

return result;

2833

}

2834

2835

/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */

2836

Py_LOCAL(PyStringObject *)

2837

replace_single_character_in_place(PyStringObject *self,

2838

char from_c, char to_c,

2839

Py_ssize_t maxcount)

2840

{

2841

char *self_s, *result_s, *start, *end, *next;

2842

Py_ssize_t self_len;

2843

PyStringObject *result;

2844

2845

/* The result string will be the same size */

2846

self_s = PyString_AS_STRING(self);

2847

self_len = PyString_GET_SIZE(self);

2848

2849

next = findchar(self_s, self_len, from_c);

2850

2851

if (next == NULL) {

2852

/* No matches; return the original string */

2853

return return_self(self);

2854

}

2855

2856

/* Need to make a new string */

2857

result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);

2858

if (result == NULL)

2859

return NULL;

2860

result_s = PyString_AS_STRING(result);

2861

Py_MEMCPY(result_s, self_s, self_len);

2862

2863

/* change everything in-place, starting with this one */

2864

start = result_s + (next-self_s);

2865

*start = to_c;

2866

start++;

2867

end = result_s + self_len;

2868

2869

while (--maxcount > 0) {

2870

next = findchar(start, end-start, from_c);

2871

if (next == NULL)

2872

break;

2873

*next = to_c;

2874

start = next+1;

2875

}

2876

2877

return result;

2878

}

2879

2880

/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */

2881

Py_LOCAL(PyStringObject *)

2882

replace_substring_in_place(PyStringObject *self,

2883

const char *from_s, Py_ssize_t from_len,

2884

const char *to_s, Py_ssize_t to_len,

2885

Py_ssize_t maxcount)

2886

{

2887

char *result_s, *start, *end;

2888

char *self_s;

2889

Py_ssize_t self_len, offset;

2890

PyStringObject *result;

2891

2892

/* The result string will be the same size */

2893

2894

self_s = PyString_AS_STRING(self);

2895

self_len = PyString_GET_SIZE(self);

2896

2897

offset = findstring(self_s, self_len,

2898

from_s, from_len,

2899

0, self_len, FORWARD);

2900

if (offset == -1) {

2901

/* No matches; return the original string */

2902

return return_self(self);

2903

}

2904

2905

/* Need to make a new string */

2906

result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);

2907

if (result == NULL)

2908

return NULL;

2909

result_s = PyString_AS_STRING(result);

2910

Py_MEMCPY(result_s, self_s, self_len);

2911

2912

/* change everything in-place, starting with this one */

2913

start = result_s + offset;

2914

Py_MEMCPY(start, to_s, from_len);

2915

start += from_len;

2916

end = result_s + self_len;

2917

2918

while ( --maxcount > 0) {

2919

offset = findstring(start, end-start,

2920

from_s, from_len,

2921

0, end-start, FORWARD);

2922

if (offset==-1)

2923

break;

2924

Py_MEMCPY(start+offset, to_s, from_len);

2925

start += offset+from_len;

2926

}

2927

2928

return result;

2929

}

2930

2931

/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */

2932

Py_LOCAL(PyStringObject *)

2933

replace_single_character(PyStringObject *self,

2934

char from_c,

2935

const char *to_s, Py_ssize_t to_len,

2936

Py_ssize_t maxcount)

2937

{

2938

char *self_s, *result_s;

2939

char *start, *next, *end;

2940

Py_ssize_t self_len, result_len;

2941

Py_ssize_t count, product;

2942

PyStringObject *result;

2943

2944

self_s = PyString_AS_STRING(self);

2945

self_len = PyString_GET_SIZE(self);

2946

2947

count = countchar(self_s, self_len, from_c, maxcount);

2948

if (count == 0) {

2949

/* no matches, return unchanged */

2950

return return_self(self);

2951

}

2952

2953

/* use the difference between current and new, hence the "-1" */

2954

/* result_len = self_len + count * (to_len-1) */

2955

product = count * (to_len-1);

2956

if (product / (to_len-1) != count) {

2957

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2958

return NULL;

2959

}

2960

result_len = self_len + product;

2961

if (result_len < 0) {

2962

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

2963

return NULL;

2964

}

2965

2966

if ( (result = (PyStringObject *)

2967

PyString_FromStringAndSize(NULL, result_len)) == NULL)

2968

return NULL;

2969

result_s = PyString_AS_STRING(result);

2970

2971

start = self_s;

2972

end = self_s + self_len;

2973

while (count-- > 0) {

2974

next = findchar(start, end-start, from_c);

2975

if (next == NULL)

2976

break;

2977

2978

if (next == start) {

2979

/* replace with the 'to' */

2980

Py_MEMCPY(result_s, to_s, to_len);

2981

result_s += to_len;

2982

start += 1;

2983

} else {

2984

/* copy the unchanged old then the 'to' */

2985

Py_MEMCPY(result_s, start, next-start);

2986

result_s += (next-start);

2987

Py_MEMCPY(result_s, to_s, to_len);

2988

result_s += to_len;

2989

start = next+1;

2990

}

2991

}

2992

/* Copy the remainder of the remaining string */

2993

Py_MEMCPY(result_s, start, end-start);

2994

2995

return result;

2996

}

2997

2998

/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */

2999

Py_LOCAL(PyStringObject *)

3000

replace_substring(PyStringObject *self,

3001

const char *from_s, Py_ssize_t from_len,

3002

const char *to_s, Py_ssize_t to_len,

3003

Py_ssize_t maxcount) {

3004

char *self_s, *result_s;

3005

char *start, *next, *end;

3006

Py_ssize_t self_len, result_len;

3007

Py_ssize_t count, offset, product;

3008

PyStringObject *result;

3009

3010

self_s = PyString_AS_STRING(self);

3011

self_len = PyString_GET_SIZE(self);

3012

3013

count = countstring(self_s, self_len,

3014

from_s, from_len,

3015

0, self_len, FORWARD, maxcount);

3016

if (count == 0) {

3017

/* no matches, return unchanged */

3018

return return_self(self);

3019

}

3020

3021

/* Check for overflow */

3022

/* result_len = self_len + count * (to_len-from_len) */

3023

product = count * (to_len-from_len);

3024

if (product / (to_len-from_len) != count) {

3025

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3026

return NULL;

3027

}

3028

result_len = self_len + product;

3029

if (result_len < 0) {

3030

PyErr_SetString(PyExc_OverflowError, "replace string is too long");

3031

return NULL;

3032

}

3033

3034

if ( (result = (PyStringObject *)

3035

PyString_FromStringAndSize(NULL, result_len)) == NULL)

3036

return NULL;

3037

result_s = PyString_AS_STRING(result);

3038

3039

start = self_s;

3040

end = self_s + self_len;

3041

while (count-- > 0) {

3042

offset = findstring(start, end-start,

3043

from_s, from_len,

3044

0, end-start, FORWARD);

3045

if (offset == -1)

3046

break;

3047

next = start+offset;

3048

if (next == start) {

3049

/* replace with the 'to' */

3050

Py_MEMCPY(result_s, to_s, to_len);

3051

result_s += to_len;

3052

start += from_len;

3053

} else {

3054

/* copy the unchanged old then the 'to' */

3055

Py_MEMCPY(result_s, start, next-start);

3056

result_s += (next-start);

3057

Py_MEMCPY(result_s, to_s, to_len);

3058

result_s += to_len;

3059

start = next+from_len;

3060

}

3061

}

3062

/* Copy the remainder of the remaining string */

3063

Py_MEMCPY(result_s, start, end-start);

3064

3065

return result;

3066

}

3067

3068

3069

Py_LOCAL(PyStringObject *)

3070

replace(PyStringObject *self,

3071

const char *from_s, Py_ssize_t from_len,

3072

const char *to_s, Py_ssize_t to_len,

3073

Py_ssize_t maxcount)

3074

{

3075

if (maxcount < 0) {

3076

maxcount = PY_SSIZE_T_MAX;

3077

} else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {

3078

/* nothing to do; return the original string */

3079

return return_self(self);

3080

}

3081

3082

if (maxcount == 0 ||

3083

(from_len == 0 && to_len == 0)) {

3084

/* nothing to do; return the original string */

3085

return return_self(self);

3086

}

3087

3088

/* Handle zero-length special cases */

3089

3090

if (from_len == 0) {

3091

/* insert the 'to' string everywhere. */

3092

/* >>> "Python".replace("", ".") */

3093

/* '.P.y.t.h.o.n.' */

3094

return replace_interleave(self, to_s, to_len, maxcount);

3095

}

3096

3097

/* Except for "".replace("", "A") == "A" there is no way beyond this */

3098

/* point for an empty self string to generate a non-empty string */

3099

/* Special case so the remaining code always gets a non-empty string */

3100

if (PyString_GET_SIZE(self) == 0) {

3101

return return_self(self);

3102

}

3103

3104

if (to_len == 0) {

3105

/* delete all occurances of 'from' string */

3106

if (from_len == 1) {

3107

return replace_delete_single_character(

3108

self, from_s[0], maxcount);

3109

} else {

3110

return replace_delete_substring(self, from_s, from_len, maxcount);

3111

}

3112

}

3113

3114

/* Handle special case where both strings have the same length */

3115

3116

if (from_len == to_len) {

3117

if (from_len == 1) {

3118

return replace_single_character_in_place(

3119

self,

3120

from_s[0],

3121

to_s[0],

3122

maxcount);

3123

} else {

3124

return replace_substring_in_place(

3125

self, from_s, from_len, to_s, to_len, maxcount);

3126

}

3127

}

3128

3129

/* Otherwise use the more generic algorithms */

3130

if (from_len == 1) {

3131

return replace_single_character(self, from_s[0],

3132

to_s, to_len, maxcount);

3133

} else {

3134

/* len('from')>=2, len('to')>=1 */

3135

return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);

3136

}

3137

}

3138

3139

PyDoc_STRVAR(replace__doc__,

3140

"S.replace (old, new[, count]) -> string\n\

3141

\n\

3142

Return a copy of string S with all occurrences of substring\n\

3143

old replaced by new. If the optional argument count is\n\

3144

given, only the first count occurrences are replaced.");

3145

3146

static PyObject *

3147

string_replace(PyStringObject *self, PyObject *args)

3148

{

3149

Py_ssize_t count = -1;

3150

PyObject *from, *to;

3151

const char *from_s, *to_s;

3152

Py_ssize_t from_len, to_len;

3153

3154

if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))

3155

return NULL;

3156

3157

if (PyString_Check(from)) {

3158

from_s = PyString_AS_STRING(from);

3159

from_len = PyString_GET_SIZE(from);

3160

}

3161

#ifdef Py_USING_UNICODE

3162

if (PyUnicode_Check(from))

3163

return PyUnicode_Replace((PyObject *)self,

3164

from, to, count);

3165

#endif

3166

else if (PyObject_AsCharBuffer(from, &from_s, &from_len))

3167

return NULL;

3168

3169

if (PyString_Check(to)) {

3170

to_s = PyString_AS_STRING(to);

3171

to_len = PyString_GET_SIZE(to);

3172

}

3173

#ifdef Py_USING_UNICODE

3174

else if (PyUnicode_Check(to))

3175

return PyUnicode_Replace((PyObject *)self,

3176

from, to, count);

3177

#endif

3178

else if (PyObject_AsCharBuffer(to, &to_s, &to_len))

3179

return NULL;

3180

3181

return (PyObject *)replace((PyStringObject *) self,

3182

from_s, from_len,

3183

to_s, to_len, count);

3184

}

3185

3186

/** End DALKE **/

3187

3188

/* Matches the end (direction >= 0) or start (direction < 0) of self

3189

* against substr, using the start and end arguments. Returns

3190

* -1 on error, 0 if not found and 1 if found.

3191

3192

Py_LOCAL(int)

3193

_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,

3194

Py_ssize_t end, int direction)

3195

{

3196

Py_ssize_t len = PyString_GET_SIZE(self);

3197

Py_ssize_t slen;

3198

const char* sub;

3199

const char* str;

3200

3201

if (PyString_Check(substr)) {

3202

sub = PyString_AS_STRING(substr);

3203

slen = PyString_GET_SIZE(substr);

3204

}

3205

#ifdef Py_USING_UNICODE

3206

else if (PyUnicode_Check(substr))

3207

return PyUnicode_Tailmatch((PyObject *)self,

3208

substr, start, end, direction);

3209

#endif

3210

else if (PyObject_AsCharBuffer(substr, &sub, &slen))

3211

return -1;

3212

str = PyString_AS_STRING(self);

3213

3214

string_adjust_indices(&start, &end, len);

3215

3216

if (direction < 0) {

3217

/* startswith */

3218

if (start+slen > len)

3219

return 0;

3220

} else {

3221

/* endswith */

3222

if (end-start < slen || start > len)

3223

return 0;

3224

3225

if (end-slen > start)

3226

start = end - slen;

3227

}

3228

if (end-start >= slen)

3229

return ! memcmp(str+start, sub, slen);

3230

return 0;

3231

}

3232

3233

3234

PyDoc_STRVAR(startswith__doc__,

3235

"S.startswith(prefix[, start[, end]]) -> bool\n\

3236

\n\

3237

Return True if S starts with the specified prefix, False otherwise.\n\

3238

With optional start, test S beginning at that position.\n\

3239

With optional end, stop comparing S at that position.\n\

3240

prefix can also be a tuple of strings to try.");

3241

3242

static PyObject *

3243

string_startswith(PyStringObject *self, PyObject *args)

3244

{

3245

Py_ssize_t start = 0;

3246

Py_ssize_t end = PY_SSIZE_T_MAX;

3247

PyObject *subobj;

3248

int result;

3249

3250

if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,

3251

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3252

return NULL;

3253

if (PyTuple_Check(subobj)) {

3254

Py_ssize_t i;

3255

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3256

result = _string_tailmatch(self,

3257

PyTuple_GET_ITEM(subobj, i),

3258

start, end, -1);

3259

if (result == -1)

3260

return NULL;

3261

else if (result) {

3262

Py_RETURN_TRUE;

3263

}

3264

}

3265

Py_RETURN_FALSE;

3266

}

3267

result = _string_tailmatch(self, subobj, start, end, -1);

3268

if (result == -1)

3269

return NULL;

3270

else

3271

return PyBool_FromLong(result);

3272

}

3273

3274

3275

PyDoc_STRVAR(endswith__doc__,

3276

"S.endswith(suffix[, start[, end]]) -> bool\n\

3277

\n\

3278

Return True if S ends with the specified suffix, False otherwise.\n\

3279

With optional start, test S beginning at that position.\n\

3280

With optional end, stop comparing S at that position.\n\

3281

suffix can also be a tuple of strings to try.");

3282

3283

static PyObject *

3284

string_endswith(PyStringObject *self, PyObject *args)

3285

{

3286

Py_ssize_t start = 0;

3287

Py_ssize_t end = PY_SSIZE_T_MAX;

3288

PyObject *subobj;

3289

int result;

3290

3291

if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,

3292

_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))

3293

return NULL;

3294

if (PyTuple_Check(subobj)) {

3295

Py_ssize_t i;

3296

for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {

3297

result = _string_tailmatch(self,

3298

PyTuple_GET_ITEM(subobj, i),

3299

start, end, +1);

3300

if (result == -1)

3301

return NULL;

3302

else if (result) {

3303

Py_RETURN_TRUE;

3304

}

3305

}

3306

Py_RETURN_FALSE;

3307

}

3308

result = _string_tailmatch(self, subobj, start, end, +1);

3309

if (result == -1)

3310

return NULL;

3311

else

3312

return PyBool_FromLong(result);

3313

}

3314

3315

3316

PyDoc_STRVAR(encode__doc__,

3317

"S.encode([encoding[,errors]]) -> object\n\

3318

\n\

3319

Encodes S using the codec registered for encoding. encoding defaults\n\

3320

to the default encoding. errors may be given to set a different error\n\

3321

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3322

a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\

3323

'xmlcharrefreplace' as well as any other name registered with\n\

3324

codecs.register_error that is able to handle UnicodeEncodeErrors.");

3325

3326

static PyObject *

3327

string_encode(PyStringObject *self, PyObject *args)

3328

{

3329

char *encoding = NULL;

3330

char *errors = NULL;

3331

PyObject *v;

3332

3333

if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))

3334

return NULL;

3335

v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);

3336

if (v == NULL)

3337

goto onError;

3338

if (!PyString_Check(v) && !PyUnicode_Check(v)) {

3339

PyErr_Format(PyExc_TypeError,

3340

"encoder did not return a string/unicode object "

3341

"(type=%.400s)",

3342

Py_TYPE(v)->tp_name);

3343

Py_DECREF(v);

3344

return NULL;

3345

}

3346

return v;

3347

3348

onError:

3349

return NULL;

3350

}

3351

3352

3353

PyDoc_STRVAR(decode__doc__,

3354

"S.decode([encoding[,errors]]) -> object\n\

3355

\n\

3356

Decodes S using the codec registered for encoding. encoding defaults\n\

3357

to the default encoding. errors may be given to set a different error\n\

3358

handling scheme. Default is 'strict' meaning that encoding errors raise\n\

3359

a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\

3360

as well as any other name registerd with codecs.register_error that is\n\

3361

able to handle UnicodeDecodeErrors.");

3362

3363

static PyObject *

3364

string_decode(PyStringObject *self, PyObject *args)

3365

{

3366

char *encoding = NULL;

3367

char *errors = NULL;

3368

PyObject *v;

3369

3370

if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))

3371

return NULL;

3372

v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);

3373

if (v == NULL)

3374

goto onError;

3375

if (!PyString_Check(v) && !PyUnicode_Check(v)) {

3376

PyErr_Format(PyExc_TypeError,

3377

"decoder did not return a string/unicode object "

3378

"(type=%.400s)",

3379

Py_TYPE(v)->tp_name);

3380

Py_DECREF(v);

3381

return NULL;

3382

}

3383

return v;

3384

3385

onError:

3386

return NULL;

3387

}

3388

3389

3390

PyDoc_STRVAR(expandtabs__doc__,

3391

"S.expandtabs([tabsize]) -> string\n\

3392

\n\

3393

Return a copy of S where all tab characters are expanded using spaces.\n\

3394

If tabsize is not given, a tab size of 8 characters is assumed.");

3395

3396

static PyObject*

3397

string_expandtabs(PyStringObject *self, PyObject *args)

3398

{

3399

const char *e, *p, *qe;

3400

char *q;

3401

Py_ssize_t i, j, incr;

3402

PyObject *u;

3403

int tabsize = 8;

3404

3405

if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))

3406

return NULL;

3407

3408

/* First pass: determine size of output string */

3409

i = 0; /* chars up to and including most recent \n or \r */

3410

j = 0; /* chars since most recent \n or \r (use in tab calculations) */

3411

e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */

3412

for (p = PyString_AS_STRING(self); p < e; p++)

3413

if (*p == '\t') {

3414

if (tabsize > 0) {

3415

incr = tabsize - (j % tabsize);

3416

if (j > PY_SSIZE_T_MAX - incr)

3417

goto overflow1;

3418

j += incr;

3419

}

3420

}

3421

else {

3422

if (j > PY_SSIZE_T_MAX - 1)

3423

goto overflow1;

3424

j++;

3425

if (*p == '\n' || *p == '\r') {

3426

if (i > PY_SSIZE_T_MAX - j)

3427

goto overflow1;

3428

i += j;

3429

j = 0;

3430

}

3431

}

3432

3433

if (i > PY_SSIZE_T_MAX - j)

3434

goto overflow1;

3435

3436

/* Second pass: create output string and fill it */

3437

u = PyString_FromStringAndSize(NULL, i + j);

3438

if (!u)

3439

return NULL;

3440

3441

j = 0; /* same as in first pass */

3442

q = PyString_AS_STRING(u); /* next output char */

3443

qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */

3444

3445

for (p = PyString_AS_STRING(self); p < e; p++)

3446

if (*p == '\t') {

3447

if (tabsize > 0) {

3448

i = tabsize - (j % tabsize);

3449

j += i;

3450

while (i--) {

3451

if (q >= qe)

3452

goto overflow2;

3453

*q++ = ' ';

3454

}

3455

}

3456

}

3457

else {

3458

if (q >= qe)

3459

goto overflow2;

3460

*q++ = *p;

3461

j++;

3462

if (*p == '\n' || *p == '\r')

3463

j = 0;

3464

}

3465

3466

return u;

3467

3468

overflow2:

3469

Py_DECREF(u);

3470

overflow1:

3471

PyErr_SetString(PyExc_OverflowError, "new string is too long");

3472

return NULL;

3473

}

3474

3475

Py_LOCAL_INLINE(PyObject *)

3476

pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)

3477

{

3478

PyObject *u;

3479

3480

if (left < 0)

3481

left = 0;

3482

if (right < 0)

3483

right = 0;

3484

3485

if (left == 0 && right == 0 && PyString_CheckExact(self)) {

3486

Py_INCREF(self);

3487

return (PyObject *)self;

3488

}

3489

3490

u = PyString_FromStringAndSize(NULL,

3491

left + PyString_GET_SIZE(self) + right);

3492

if (u) {

3493

if (left)

3494

memset(PyString_AS_STRING(u), fill, left);

3495

Py_MEMCPY(PyString_AS_STRING(u) + left,

3496

PyString_AS_STRING(self),

3497

PyString_GET_SIZE(self));

3498

if (right)

3499

memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),

3500

fill, right);

3501

}

3502

3503

return u;

3504

}

3505

3506

PyDoc_STRVAR(ljust__doc__,

3507

"S.ljust(width[, fillchar]) -> string\n"

3508

"\n"

3509

"Return S left justified in a string of length width. Padding is\n"

3510

"done using the specified fill character (default is a space).");

3511

3512

static PyObject *

3513

string_ljust(PyStringObject *self, PyObject *args)

3514

{

3515

Py_ssize_t width;

3516

char fillchar = ' ';

3517

3518

if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))

3519

return NULL;

3520

3521

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3522

Py_INCREF(self);

3523

return (PyObject*) self;

3524

}

3525

3526

return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);

3527

}

3528

3529

3530

PyDoc_STRVAR(rjust__doc__,

3531

"S.rjust(width[, fillchar]) -> string\n"

3532

"\n"

3533

"Return S right justified in a string of length width. Padding is\n"

3534

"done using the specified fill character (default is a space)");

3535

3536

static PyObject *

3537

string_rjust(PyStringObject *self, PyObject *args)

3538

{

3539

Py_ssize_t width;

3540

char fillchar = ' ';

3541

3542

if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))

3543

return NULL;

3544

3545

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3546

Py_INCREF(self);

3547

return (PyObject*) self;

3548

}

3549

3550

return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);

3551

}

3552

3553

3554

PyDoc_STRVAR(center__doc__,

3555

"S.center(width[, fillchar]) -> string\n"

3556

"\n"

3557

"Return S centered in a string of length width. Padding is\n"

3558

"done using the specified fill character (default is a space)");

3559

3560

static PyObject *

3561

string_center(PyStringObject *self, PyObject *args)

3562

{

3563

Py_ssize_t marg, left;

3564

Py_ssize_t width;

3565

char fillchar = ' ';

3566

3567

if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))

3568

return NULL;

3569

3570

if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {

3571

Py_INCREF(self);

3572

return (PyObject*) self;

3573

}

3574

3575

marg = width - PyString_GET_SIZE(self);

3576

left = marg / 2 + (marg & width & 1);

3577

3578

return pad(self, left, marg - left, fillchar);

3579

}

3580

3581

PyDoc_STRVAR(zfill__doc__,

3582

"S.zfill(width) -> string\n"

3583

"\n"

3584

"Pad a numeric string S with zeros on the left, to fill a field\n"

3585

"of the specified width. The string S is never truncated.");

3586

3587

static PyObject *

3588

string_zfill(PyStringObject *self, PyObject *args)

3589

{

3590

Py_ssize_t fill;

3591

PyObject *s;

3592

char *p;

3593

Py_ssize_t width;

3594

3595

if (!PyArg_ParseTuple(args, "n:zfill", &width))

3596

return NULL;

3597

3598

if (PyString_GET_SIZE(self) >= width) {

3599

if (PyString_CheckExact(self)) {

3600

Py_INCREF(self);

3601

return (PyObject*) self;

3602

}

3603

else

3604

return PyString_FromStringAndSize(

3605

PyString_AS_STRING(self),

3606

PyString_GET_SIZE(self)

3607

);

3608

}

3609

3610

fill = width - PyString_GET_SIZE(self);

3611

3612

s = pad(self, fill, 0, '0');

3613

3614

if (s == NULL)

3615

return NULL;

3616

3617

p = PyString_AS_STRING(s);

3618

if (p[fill] == '+' || p[fill] == '-') {

3619

/* move sign to beginning of string */

3620

p[0] = p[fill];

3621

p[fill] = '0';

3622

}

3623

3624

return (PyObject*) s;

3625

}

3626

3627

PyDoc_STRVAR(isspace__doc__,

3628

"S.isspace() -> bool\n\

3629

\n\

3630

Return True if all characters in S are whitespace\n\

3631

and there is at least one character in S, False otherwise.");

3632

3633

static PyObject*

3634

string_isspace(PyStringObject *self)

3635

{

3636

3637

= (unsigned char *) PyString_AS_STRING(self);

3638

3639

3640

/* Shortcut for single character strings */

3641

if (PyString_GET_SIZE(self) == 1 &&

3642

isspace(*p))

3643

return PyBool_FromLong(1);

3644

3645

/* Special case for empty strings */

3646

if (PyString_GET_SIZE(self) == 0)

3647

return PyBool_FromLong(0);

3648

3649

e = p + PyString_GET_SIZE(self);

3650

for (; p < e; p++) {

3651

if (!isspace(*p))

3652

return PyBool_FromLong(0);

3653

}

3654

return PyBool_FromLong(1);

3655

}

3656

3657

3658

PyDoc_STRVAR(isalpha__doc__,

3659

"S.isalpha() -> bool\n\

3660

\n\

3661

Return True if all characters in S are alphabetic\n\

3662

and there is at least one character in S, False otherwise.");

3663

3664

static PyObject*

3665

string_isalpha(PyStringObject *self)

3666

{

3667

3668

= (unsigned char *) PyString_AS_STRING(self);

3669

3670

3671

/* Shortcut for single character strings */

3672

if (PyString_GET_SIZE(self) == 1 &&

3673

isalpha(*p))

3674

return PyBool_FromLong(1);

3675

3676

/* Special case for empty strings */

3677

if (PyString_GET_SIZE(self) == 0)

3678

return PyBool_FromLong(0);

3679

3680

e = p + PyString_GET_SIZE(self);

3681

for (; p < e; p++) {

3682

if (!isalpha(*p))

3683

return PyBool_FromLong(0);

3684

}

3685

return PyBool_FromLong(1);

3686

}

3687

3688

3689

PyDoc_STRVAR(isalnum__doc__,

3690

"S.isalnum() -> bool\n\

3691

\n\

3692

Return True if all characters in S are alphanumeric\n\

3693

and there is at least one character in S, False otherwise.");

3694

3695

static PyObject*

3696

string_isalnum(PyStringObject *self)

3697

{

3698

3699

= (unsigned char *) PyString_AS_STRING(self);

3700

3701

3702

/* Shortcut for single character strings */

3703

if (PyString_GET_SIZE(self) == 1 &&

3704

isalnum(*p))

3705

return PyBool_FromLong(1);

3706

3707

/* Special case for empty strings */

3708

if (PyString_GET_SIZE(self) == 0)

3709

return PyBool_FromLong(0);

3710

3711

e = p + PyString_GET_SIZE(self);

3712

for (; p < e; p++) {

3713

if (!isalnum(*p))

3714

return PyBool_FromLong(0);

3715

}

3716

return PyBool_FromLong(1);

3717

}

3718

3719

3720

PyDoc_STRVAR(isdigit__doc__,

3721

"S.isdigit() -> bool\n\

3722

\n\

3723

Return True if all characters in S are digits\n\

3724

and there is at least one character in S, False otherwise.");

3725

3726

static PyObject*

3727

string_isdigit(PyStringObject *self)

3728

{

3729

3730

= (unsigned char *) PyString_AS_STRING(self);

3731

3732

3733

/* Shortcut for single character strings */

3734

if (PyString_GET_SIZE(self) == 1 &&

3735

isdigit(*p))

3736

return PyBool_FromLong(1);

3737

3738

/* Special case for empty strings */

3739

if (PyString_GET_SIZE(self) == 0)

3740

return PyBool_FromLong(0);

3741

3742

e = p + PyString_GET_SIZE(self);

3743

for (; p < e; p++) {

3744

if (!isdigit(*p))

3745

return PyBool_FromLong(0);

3746

}

3747

return PyBool_FromLong(1);

3748

}

3749

3750

3751

PyDoc_STRVAR(islower__doc__,

3752

"S.islower() -> bool\n\

3753

\n\

3754

Return True if all cased characters in S are lowercase and there is\n\

3755

at least one cased character in S, False otherwise.");

3756

3757

static PyObject*

3758

string_islower(PyStringObject *self)

3759

{

3760

3761

= (unsigned char *) PyString_AS_STRING(self);

3762

3763

int cased;

3764

3765

/* Shortcut for single character strings */

3766

if (PyString_GET_SIZE(self) == 1)

3767

return PyBool_FromLong(islower(*p) != 0);

3768

3769

/* Special case for empty strings */

3770

if (PyString_GET_SIZE(self) == 0)

3771

return PyBool_FromLong(0);

3772

3773

e = p + PyString_GET_SIZE(self);

3774

cased = 0;

3775

for (; p < e; p++) {

3776

if (isupper(*p))

3777

return PyBool_FromLong(0);

3778

else if (!cased && islower(*p))

3779

cased = 1;

3780

}

3781

return PyBool_FromLong(cased);

3782

}

3783

3784

3785

PyDoc_STRVAR(isupper__doc__,

3786

"S.isupper() -> bool\n\

3787

\n\

3788

Return True if all cased characters in S are uppercase and there is\n\

3789

at least one cased character in S, False otherwise.");

3790

3791

static PyObject*

3792

string_isupper(PyStringObject *self)

3793

{

3794

3795

= (unsigned char *) PyString_AS_STRING(self);

3796

3797

int cased;

3798

3799

/* Shortcut for single character strings */

3800

if (PyString_GET_SIZE(self) == 1)

3801

return PyBool_FromLong(isupper(*p) != 0);

3802

3803

/* Special case for empty strings */

3804

if (PyString_GET_SIZE(self) == 0)

3805

return PyBool_FromLong(0);

3806

3807

e = p + PyString_GET_SIZE(self);

3808

cased = 0;

3809

for (; p < e; p++) {

3810

if (islower(*p))

3811

return PyBool_FromLong(0);

3812

else if (!cased && isupper(*p))

3813

cased = 1;

3814

}

3815

return PyBool_FromLong(cased);

3816

}

3817

3818

3819

PyDoc_STRVAR(istitle__doc__,

3820

"S.istitle() -> bool\n\

3821

\n\

3822

Return True if S is a titlecased string and there is at least one\n\

3823

character in S, i.e. uppercase characters may only follow uncased\n\

3824

characters and lowercase characters only cased ones. Return False\n\

3825

otherwise.");

3826

3827

static PyObject*

3828

string_istitle(PyStringObject *self, PyObject *uncased)

3829

{

3830

3831

= (unsigned char *) PyString_AS_STRING(self);

3832

3833

int cased, previous_is_cased;

3834

3835

/* Shortcut for single character strings */

3836

if (PyString_GET_SIZE(self) == 1)

3837

return PyBool_FromLong(isupper(*p) != 0);

3838

3839

/* Special case for empty strings */

3840

if (PyString_GET_SIZE(self) == 0)

3841

return PyBool_FromLong(0);

3842

3843

e = p + PyString_GET_SIZE(self);

3844

cased = 0;

3845

previous_is_cased = 0;

3846

for (; p < e; p++) {

3847

3848

3849

if (isupper(ch)) {

3850

if (previous_is_cased)

3851

return PyBool_FromLong(0);

3852

previous_is_cased = 1;

3853

cased = 1;

3854

}

3855

else if (islower(ch)) {

3856

if (!previous_is_cased)

3857

return PyBool_FromLong(0);

3858

previous_is_cased = 1;

3859

cased = 1;

3860

}

3861

else

3862

previous_is_cased = 0;

3863

}

3864

return PyBool_FromLong(cased);

3865

}

3866

3867

3868

PyDoc_STRVAR(splitlines__doc__,

3869

"S.splitlines([keepends]) -> list of strings\n\

3870

\n\

3871

Return a list of the lines in S, breaking at line boundaries.\n\

3872

Line breaks are not included in the resulting list unless keepends\n\

3873

is given and true.");

3874

3875

static PyObject*

3876

string_splitlines(PyStringObject *self, PyObject *args)

3877

{

3878

3879

3880

Py_ssize_t len;

3881

int keepends = 0;

3882

PyObject *list;

3883

PyObject *str;

3884

char *data;

3885

3886

if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))

3887

return NULL;

3888

3889

data = PyString_AS_STRING(self);

3890

len = PyString_GET_SIZE(self);

3891

3892

/* This does not use the preallocated list because splitlines is

3893

usually run with hundreds of newlines. The overhead of

3894

switching between PyList_SET_ITEM and append causes about a

3895

2-3% slowdown for that common case. A smarter implementation

3896

could move the if check out, so the SET_ITEMs are done first

3897

and the appends only done when the prealloc buffer is full.

3898

That's too much work for little gain.*/

3899

3900

list = PyList_New(0);

3901

if (!list)

3902

goto onError;

3903

3904

for (i = j = 0; i < len; ) {

3905

Py_ssize_t eol;

3906

3907

/* Find a line and append it */

3908

while (i < len && data[i] != '\n' && data[i] != '\r')

3909

i++;

3910

3911

/* Skip the line break reading CRLF as one line break */

3912

eol = i;

3913

if (i < len) {

3914

if (data[i] == '\r' && i + 1 < len &&

3915

data[i+1] == '\n')

3916

i += 2;

3917

else

3918

i++;

3919

if (keepends)

3920

eol = i;

3921

}

3922

SPLIT_APPEND(data, j, eol);

3923

j = i;

3924

}

3925

if (j < len) {

3926

SPLIT_APPEND(data, j, len);

3927

}

3928

3929

return list;

3930

3931

onError:

3932

Py_XDECREF(list);

3933

return NULL;

3934

}

3935

3936

PyDoc_STRVAR(sizeof__doc__,

3937

"S.__sizeof__() -> size of S in memory, in bytes");

3938

3939

static PyObject *

3940

string_sizeof(PyStringObject *v)

3941

{

3942

Py_ssize_t res;

3943

res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;

3944

return PyInt_FromSsize_t(res);

3945

}

3946

3947

#undef SPLIT_APPEND

3948

#undef SPLIT_ADD

3949

#undef MAX_PREALLOC

3950

#undef PREALLOC_SIZE

3951

3952

static PyObject *

3953

string_getnewargs(PyStringObject *v)

3954

{

3955

return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));

3956

}

3957

3958

3959

#include "stringlib/string_format.h"

3960

3961

PyDoc_STRVAR(format__doc__,

3962

"S.format(*args, **kwargs) -> unicode\n\

3963

\n\

3964

");

3965

3966

static PyObject *

3967

string__format__(PyObject* self, PyObject* args)

3968

{

3969

PyObject *format_spec;

3970

PyObject *result = NULL;

3971

PyObject *tmp = NULL;

3972

3973

/* If 2.x, convert format_spec to the same type as value */

3974

/* This is to allow things like u''.format('') */

3975

if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))

3976

goto done;

3977

if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {

3978

PyErr_Format(PyExc_TypeError, "__format__ arg must be str "

3979

"or unicode, not %s", Py_TYPE(format_spec)->tp_name);

3980

goto done;

3981

}

3982

tmp = PyObject_Str(format_spec);

3983

if (tmp == NULL)

3984

goto done;

3985

format_spec = tmp;

3986

3987

result = _PyBytes_FormatAdvanced(self,

3988

PyString_AS_STRING(format_spec),

3989

PyString_GET_SIZE(format_spec));

3990

done:

3991

Py_XDECREF(tmp);

3992

return result;

3993

}

3994

3995

PyDoc_STRVAR(p_format__doc__,

3996

"S.__format__(format_spec) -> unicode\n\

3997

\n\

3998

");

3999

4000

4001

static PyMethodDef

4002

string_methods[] = {

4003

/* Counterparts of the obsolete stropmodule functions; except

4004

string.maketrans(). */

4005

{"join", (PyCFunction)string_join, METH_O, join__doc__},

4006

{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},

4007

{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},

4008

{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},

4009

{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},

4010

{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},

4011

{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},

4012

{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},

4013

{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},

4014

{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},

4015

{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},

4016

{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},

4017

{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,

4018

capitalize__doc__},

4019

{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},

4020

{"endswith", (PyCFunction)string_endswith, METH_VARARGS,

4021

endswith__doc__},

4022

{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},

4023

{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},

4024

{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},

4025

{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},

4026

{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},

4027

{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},

4028

{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},

4029

{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},

4030

{"rpartition", (PyCFunction)string_rpartition, METH_O,

4031

rpartition__doc__},

4032

{"startswith", (PyCFunction)string_startswith, METH_VARARGS,

4033

startswith__doc__},

4034

{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},

4035

{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,

4036

swapcase__doc__},

4037

{"translate", (PyCFunction)string_translate, METH_VARARGS,

4038

translate__doc__},

4039

{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},

4040

{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},

4041

{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},

4042

{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},

4043

{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},

4044

{"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},

4045

{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},

4046

{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},

4047

{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},

4048

{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},

4049

{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},

4050

{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,

4051

expandtabs__doc__},

4052

{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,

4053

splitlines__doc__},

4054

{"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,

4055

sizeof__doc__},

4056

{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},

4057

{NULL, NULL} /* sentinel */

4058

};

4059

4060

static PyObject *

4061

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);

4062

4063

static PyObject *

4064

string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4065

{

4066

PyObject *x = NULL;

4067

static char *kwlist[] = {"object", 0};

4068

4069

if (type != &PyString_Type)

4070

return str_subtype_new(type, args, kwds);

4071

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))

4072

return NULL;

4073

if (x == NULL)

4074

return PyString_FromString("");

4075

return PyObject_Str(x);

4076

}

4077

4078

static PyObject *

4079

str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4080

{

4081

PyObject *tmp, *pnew;

4082

Py_ssize_t n;

4083

4084

assert(PyType_IsSubtype(type, &PyString_Type));

4085

tmp = string_new(&PyString_Type, args, kwds);

4086

if (tmp == NULL)

4087

return NULL;

4088

assert(PyString_CheckExact(tmp));

4089

n = PyString_GET_SIZE(tmp);

4090

pnew = type->tp_alloc(type, n);

4091

if (pnew != NULL) {

4092

Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);

4093

((PyStringObject *)pnew)->ob_shash =

4094

((PyStringObject *)tmp)->ob_shash;

4095

((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;

4096

}

4097

Py_DECREF(tmp);

4098

return pnew;

4099

}

4100

4101

static PyObject *

4102

basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)

4103

{

4104

PyErr_SetString(PyExc_TypeError,

4105

"The basestring type cannot be instantiated");

4106

return NULL;

4107

}

4108

4109

static PyObject *

4110

string_mod(PyObject *v, PyObject *w)

4111

{

4112

if (!PyString_Check(v)) {

4113

Py_INCREF(Py_NotImplemented);

4114

return Py_NotImplemented;

4115

}

4116

return PyString_Format(v, w);

4117

}

4118

4119

PyDoc_STRVAR(basestring_doc,

4120

"Type basestring cannot be instantiated; it is the base for str and unicode.");

4121

4122

static PyNumberMethods string_as_number = {

4123

0, /*nb_add*/

4124

0, /*nb_subtract*/

4125

0, /*nb_multiply*/

4126

0, /*nb_divide*/

4127

string_mod, /*nb_remainder*/

4128

};

4129

4130

4131

PyTypeObject PyBaseString_Type = {

4132

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4133

"basestring",

4134

4135

4136

0, /* tp_dealloc */

4137

0, /* tp_print */

4138

0, /* tp_getattr */

4139

0, /* tp_setattr */

4140

0, /* tp_compare */

4141

0, /* tp_repr */

4142

0, /* tp_as_number */

4143

0, /* tp_as_sequence */

4144

0, /* tp_as_mapping */

4145

0, /* tp_hash */

4146

0, /* tp_call */

4147

0, /* tp_str */

4148

0, /* tp_getattro */

4149

0, /* tp_setattro */

4150

0, /* tp_as_buffer */

4151

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */

4152

basestring_doc, /* tp_doc */

4153

0, /* tp_traverse */

4154

0, /* tp_clear */

4155

0, /* tp_richcompare */

4156

0, /* tp_weaklistoffset */

4157

0, /* tp_iter */

4158

0, /* tp_iternext */

4159

0, /* tp_methods */

4160

0, /* tp_members */

4161

0, /* tp_getset */

4162

&PyBaseObject_Type, /* tp_base */

4163

0, /* tp_dict */

4164

0, /* tp_descr_get */

4165

0, /* tp_descr_set */

4166

0, /* tp_dictoffset */

4167

0, /* tp_init */

4168

0, /* tp_alloc */

4169

basestring_new, /* tp_new */

4170

0, /* tp_free */

4171

};

4172

4173

PyDoc_STRVAR(string_doc,

4174

"str(object) -> string\n\

4175

\n\

4176

Return a nice string representation of the object.\n\

4177

If the argument is a string, the return value is the same object.");

4178

4179

PyTypeObject PyString_Type = {

4180

PyVarObject_HEAD_INIT(&PyType_Type, 0)

4181

"str",

4182

sizeof(PyStringObject),

4183

sizeof(char),

4184

string_dealloc, /* tp_dealloc */

4185

(printfunc)string_print, /* tp_print */

4186

0, /* tp_getattr */

4187

0, /* tp_setattr */

4188

0, /* tp_compare */

4189

string_repr, /* tp_repr */

4190

&string_as_number, /* tp_as_number */

4191

&string_as_sequence, /* tp_as_sequence */

4192

&string_as_mapping, /* tp_as_mapping */

4193

(hashfunc)string_hash, /* tp_hash */

4194

0, /* tp_call */

4195

string_str, /* tp_str */

4196

PyObject_GenericGetAttr, /* tp_getattro */

4197

0, /* tp_setattro */

4198

&string_as_buffer, /* tp_as_buffer */

4199

Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |

4200

Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |

4201

Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */

4202

string_doc, /* tp_doc */

4203

0, /* tp_traverse */

4204

0, /* tp_clear */

4205

(richcmpfunc)string_richcompare, /* tp_richcompare */

4206

0, /* tp_weaklistoffset */

4207

0, /* tp_iter */

4208

0, /* tp_iternext */

4209

string_methods, /* tp_methods */

4210

0, /* tp_members */

4211

0, /* tp_getset */

4212

&PyBaseString_Type, /* tp_base */

4213

0, /* tp_dict */

4214

0, /* tp_descr_get */

4215

0, /* tp_descr_set */

4216

0, /* tp_dictoffset */

4217

0, /* tp_init */

4218

0, /* tp_alloc */

4219

string_new, /* tp_new */

4220

PyObject_Del, /* tp_free */

4221

};

4222

4223

void

4224

PyString_Concat(register PyObject **pv, register PyObject *w)

4225

{

4226

4227

if (*pv == NULL)

4228

return;

4229

if (w == NULL || !PyString_Check(*pv)) {

4230

Py_DECREF(*pv);

4231

*pv = NULL;

4232

return;

4233

}

4234

v = string_concat((PyStringObject *) *pv, w);

4235

Py_DECREF(*pv);

4236

*pv = v;

4237

}

4238

4239

void

4240

PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)

4241

{

4242

PyString_Concat(pv, w);

4243

Py_XDECREF(w);

4244

}

4245

4246

4247

/* The following function breaks the notion that strings are immutable:

4248

it changes the size of a string. We get away with this only if there

4249

is only one module referencing the object. You can also think of it

4250

as creating a new string object and destroying the old one, only

4251

more efficiently. In any case, don't use this if the string may

4252

already be known to some other part of the code...

4253

Note that if there's not enough memory to resize the string, the original

4254

string object at *pv is deallocated, *pv is set to NULL, an "out of

4255

memory" exception is set, and -1 is returned. Else (on success) 0 is

4256

returned, and the value in *pv may or may not be the same as on input.

4257

As always, an extra byte is allocated for a trailing \0 byte (newsize

4258

does *not* include that), and a trailing \0 byte is stored.

4259

4260

4261

int

4262

_PyString_Resize(PyObject **pv, Py_ssize_t newsize)

4263

{

4264

4265

4266

v = *pv;

4267

if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||

4268

PyString_CHECK_INTERNED(v)) {

4269

*pv = 0;

4270

Py_DECREF(v);

4271

PyErr_BadInternalCall();

4272

return -1;

4273

}

4274

/* XXX UNREF/NEWREF interface should be more symmetrical */

4275

_Py_DEC_REFTOTAL;

4276

_Py_ForgetReference(v);

4277

*pv = (PyObject *)

4278

PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);

4279

if (*pv == NULL) {

4280

PyObject_Del(v);

4281

PyErr_NoMemory();

4282

return -1;

4283

}

4284

_Py_NewReference(*pv);

4285

sv = (PyStringObject *) *pv;

4286

Py_SIZE(sv) = newsize;

4287

sv->ob_sval[newsize] = '\0';

4288

sv->ob_shash = -1; /* invalidate cached hash value */

4289

return 0;

4290

}

4291

4292

/* Helpers for formatstring */

4293

4294

Py_LOCAL_INLINE(PyObject *)

4295

getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)

4296

{

4297

Py_ssize_t argidx = *p_argidx;

4298

if (argidx < arglen) {

4299

(*p_argidx)++;

4300

if (arglen < 0)

4301

return args;

4302

else

4303

return PyTuple_GetItem(args, argidx);

4304

}

4305

PyErr_SetString(PyExc_TypeError,

4306

"not enough arguments for format string");

4307

return NULL;

4308

}

4309

4310

/* Format codes

4311

* F_LJUST '-'

4312

* F_SIGN '+'

4313

* F_BLANK ' '

4314

* F_ALT '#'

4315

* F_ZERO '0'

4316

4317

#define F_LJUST (1<<0)

4318

#define F_SIGN (1<<1)

4319

#define F_BLANK (1<<2)

4320

#define F_ALT (1<<3)

4321

#define F_ZERO (1<<4)

4322

4323

Py_LOCAL_INLINE(int)

4324

formatfloat(char *buf, size_t buflen, int flags,

4325

int prec, int type, PyObject *v)

4326

{

4327

/* fmt = '%#.' + `prec` + `type`

4328

worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/

4329

char fmt[20];

4330

double x;

4331

x = PyFloat_AsDouble(v);

4332

if (x == -1.0 && PyErr_Occurred()) {

4333

PyErr_Format(PyExc_TypeError, "float argument required, "

4334

"not %.200s", Py_TYPE(v)->tp_name);

4335

return -1;

4336

}

4337

if (prec < 0)

4338

prec = 6;

4339

if (type == 'f' && fabs(x)/1e25 >= 1e25)

4340

type = 'g';

4341

/* Worst case length calc to ensure no buffer overrun:

4342

4343

'g' formats:

4344

fmt = %#.<prec>g

4345

buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp

4346

for any double rep.)

4347

len = 1 + prec + 1 + 2 + 5 = 9 + prec

4348

4349

'f' formats:

4350

buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)

4351

len = 1 + 50 + 1 + prec = 52 + prec

4352

4353

If prec=0 the effective precision is 1 (the leading digit is

4354

always given), therefore increase the length by one.

4355

4356

4357

if (((type == 'g' || type == 'G') &&

4358

buflen <= (size_t)10 + (size_t)prec) ||

4359

(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {

4360

PyErr_SetString(PyExc_OverflowError,

4361

"formatted float is too long (precision too large?)");

4362

return -1;

4363

}

4364

PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",

4365

(flags&F_ALT) ? "#" : "",

4366

prec, type);

4367

PyOS_ascii_formatd(buf, buflen, fmt, x);

4368

return (int)strlen(buf);

4369

}

4370

4371

/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and

4372

* the F_ALT flag, for Python's long (unbounded) ints. It's not used for

4373

* Python's regular ints.

4374

* Return value: a new PyString*, or NULL if error.

4375

* . *pbuf is set to point into it,

4376

* *plen set to the # of chars following that.

4377

* Caller must decref it when done using pbuf.

4378

* The string starting at *pbuf is of the form

4379

* "-"? ("0x" | "0X")? digit+

4380

* "0x"/"0X" are present only for x and X conversions, with F_ALT

4381

* set in flags. The case of hex digits will be correct,

4382

* There will be at least prec digits, zero-filled on the left if

4383

* necessary to get that many.

4384

* val object to be converted

4385

* flags bitmask of format flags; only F_ALT is looked at

4386

* prec minimum number of digits; 0-fill on left if needed

4387

* type a character in [duoxX]; u acts the same as d

4388

4389

* CAUTION: o, x and X conversions on regular ints can never

4390

* produce a '-' sign, but can for Python's unbounded ints.

4391

4392

PyObject*

4393

_PyString_FormatLong(PyObject *val, int flags, int prec, int type,

4394

char **pbuf, int *plen)

4395

{

4396

PyObject *result = NULL;

4397

char *buf;

4398

Py_ssize_t i;

4399

int sign; /* 1 if '-', else 0 */

4400

int len; /* number of characters */

4401

Py_ssize_t llen;

4402

int numdigits; /* len == numnondigits + numdigits */

4403

int numnondigits = 0;

4404

4405

switch (type) {

4406

case 'd':

4407

case 'u':

4408

result = Py_TYPE(val)->tp_str(val);

4409

break;

4410

case 'o':

4411

result = Py_TYPE(val)->tp_as_number->nb_oct(val);

4412

break;

4413

case 'x':

4414

case 'X':

4415

numnondigits = 2;

4416

result = Py_TYPE(val)->tp_as_number->nb_hex(val);

4417

break;

4418

default:

4419

assert(!"'type' not in [duoxX]");

4420

}

4421

if (!result)

4422

return NULL;

4423

4424

buf = PyString_AsString(result);

4425

if (!buf) {

4426

Py_DECREF(result);

4427

return NULL;

4428

}

4429

4430

/* To modify the string in-place, there can only be one reference. */

4431

if (Py_REFCNT(result) != 1) {

4432

PyErr_BadInternalCall();

4433

return NULL;

4434

}

4435

llen = PyString_Size(result);

4436

if (llen > INT_MAX) {

4437

PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");

4438

return NULL;

4439

}

4440

len = (int)llen;

4441

if (buf[len-1] == 'L') {

4442

--len;

4443

buf[len] = '\0';

4444

}

4445

sign = buf[0] == '-';

4446

numnondigits += sign;

4447

numdigits = len - numnondigits;

4448

assert(numdigits > 0);

4449

4450

/* Get rid of base marker unless F_ALT */

4451

if ((flags & F_ALT) == 0) {

4452

/* Need to skip 0x, 0X or 0. */

4453

int skipped = 0;

4454

switch (type) {

4455

case 'o':

4456

assert(buf[sign] == '0');

4457

/* If 0 is only digit, leave it alone. */

4458

if (numdigits > 1) {

4459

skipped = 1;

4460

--numdigits;

4461

}

4462

break;

4463

case 'x':

4464

case 'X':

4465

assert(buf[sign] == '0');

4466

assert(buf[sign + 1] == 'x');

4467

skipped = 2;

4468

numnondigits -= 2;

4469

break;

4470

}

4471

if (skipped) {

4472

buf += skipped;

4473

len -= skipped;

4474

if (sign)

4475

buf[0] = '-';

4476

}

4477

assert(len == numnondigits + numdigits);

4478

assert(numdigits > 0);

4479

}

4480

4481

/* Fill with leading zeroes to meet minimum width. */

4482

if (prec > numdigits) {

4483

PyObject *r1 = PyString_FromStringAndSize(NULL,

4484

numnondigits + prec);

4485

char *b1;

4486

if (!r1) {

4487

Py_DECREF(result);

4488

return NULL;

4489

}

4490

b1 = PyString_AS_STRING(r1);

4491

for (i = 0; i < numnondigits; ++i)

4492

*b1++ = *buf++;

4493

for (i = 0; i < prec - numdigits; i++)

4494

*b1++ = '0';

4495

for (i = 0; i < numdigits; i++)

4496

*b1++ = *buf++;

4497

*b1 = '\0';

4498

Py_DECREF(result);

4499

result = r1;

4500

buf = PyString_AS_STRING(result);

4501

len = numnondigits + prec;

4502

}

4503

4504

/* Fix up case for hex conversions. */

4505

if (type == 'X') {

4506

/* Need to convert all lower case letters to upper case.

4507

and need to convert 0x to 0X (and -0x to -0X). */

4508

for (i = 0; i < len; i++)

4509

if (buf[i] >= 'a' && buf[i] <= 'x')

4510

buf[i] -= 'a'-'A';

4511

}

4512

*pbuf = buf;

4513

*plen = len;

4514

return result;

4515

}

4516

4517

Py_LOCAL_INLINE(int)

4518

formatint(char *buf, size_t buflen, int flags,

4519

int prec, int type, PyObject *v)

4520

{

4521

/* fmt = '%#.' + `prec` + 'l' + `type`

4522

worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)

4523

+ 1 + 1 = 24 */

4524

char fmt[64]; /* plenty big enough! */

4525

char *sign;

4526

long x;

4527

4528

x = PyInt_AsLong(v);

4529

if (x == -1 && PyErr_Occurred()) {

4530

PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",

4531

Py_TYPE(v)->tp_name);

4532

return -1;

4533

}

4534

if (x < 0 && type == 'u') {

4535

type = 'd';

4536

}

4537

if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))

4538

sign = "-";

4539

else

4540

sign = "";

4541

if (prec < 0)

4542

prec = 1;

4543

4544

if ((flags & F_ALT) &&

4545

(type == 'x' || type == 'X')) {

4546

/* When converting under %#x or %#X, there are a number

4547

* of issues that cause pain:

4548

* - when 0 is being converted, the C standard leaves off

4549

* the '0x' or '0X', which is inconsistent with other

4550

* %#x/%#X conversions and inconsistent with Python's

4551

* hex() function

4552

* - there are platforms that violate the standard and

4553

* convert 0 with the '0x' or '0X'

4554

* (Metrowerks, Compaq Tru64)

4555

* - there are platforms that give '0x' when converting

4556

* under %#X, but convert 0 in accordance with the

4557

* standard (OS/2 EMX)

4558

4559

* We can achieve the desired consistency by inserting our

4560

* own '0x' or '0X' prefix, and substituting %x/%X in place

4561

* of %#x/%#X.

4562

4563

* Note that this is the same approach as used in

4564

* formatint() in unicodeobject.c

4565

4566

PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",

4567

sign, type, prec, type);

4568

}

4569

else {

4570

PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",

4571

sign, (flags&F_ALT) ? "#" : "",

4572

prec, type);

4573

}

4574

4575

/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))

4576

* worst case buf = '-0x' + [0-9]*prec, where prec >= 11

4577

4578

if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {

4579

PyErr_SetString(PyExc_OverflowError,

4580

"formatted integer is too long (precision too large?)");

4581

return -1;

4582

}

4583

if (sign[0])

4584

PyOS_snprintf(buf, buflen, fmt, -x);

4585

else

4586

PyOS_snprintf(buf, buflen, fmt, x);

4587

return (int)strlen(buf);

4588

}

4589

4590

Py_LOCAL_INLINE(int)

4591

formatchar(char *buf, size_t buflen, PyObject *v)

4592

{

4593

/* presume that the buffer is at least 2 characters long */

4594

if (PyString_Check(v)) {

4595

if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))

4596

return -1;

4597

}

4598

else {

4599

if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))

4600

return -1;

4601

}

4602

buf[1] = '\0';

4603

return 1;

4604

}

4605

4606

/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)

4607

4608

FORMATBUFLEN is the length of the buffer in which the floats, ints, &

4609

chars are formatted. XXX This is a magic number. Each formatting

4610

routine does bounds checking to ensure no overflow, but a better

4611

solution may be to malloc a buffer of appropriate size for each

4612

format. For now, the current solution is sufficient.

4613

4614

#define FORMATBUFLEN (size_t)120

4615

4616

PyObject *

4617

PyString_Format(PyObject *format, PyObject *args)

4618

{

4619

char *fmt, *res;

4620

Py_ssize_t arglen, argidx;

4621

Py_ssize_t reslen, rescnt, fmtcnt;

4622

int args_owned = 0;

4623

PyObject *result, *orig_args;

4624

#ifdef Py_USING_UNICODE

4625

PyObject *v, *w;

4626

#endif

4627

PyObject *dict = NULL;

4628

if (format == NULL || !PyString_Check(format) || args == NULL) {

4629

PyErr_BadInternalCall();

4630

return NULL;

4631

}

4632

orig_args = args;

4633

fmt = PyString_AS_STRING(format);

4634

fmtcnt = PyString_GET_SIZE(format);

4635

reslen = rescnt = fmtcnt + 100;

4636

result = PyString_FromStringAndSize((char *)NULL, reslen);

4637

if (result == NULL)

4638

return NULL;

4639

res = PyString_AsString(result);

4640

if (PyTuple_Check(args)) {

4641

arglen = PyTuple_GET_SIZE(args);

4642

argidx = 0;

4643

}

4644

else {

4645

arglen = -1;

4646

argidx = -2;

4647

}

4648

if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&

4649

!PyObject_TypeCheck(args, &PyBaseString_Type))

4650

dict = args;

4651

while (--fmtcnt >= 0) {

4652

if (*fmt != '%') {

4653

if (--rescnt < 0) {

4654

rescnt = fmtcnt + 100;

4655

reslen += rescnt;

4656

if (_PyString_Resize(&result, reslen) < 0)

4657

return NULL;

4658

res = PyString_AS_STRING(result)

4659

+ reslen - rescnt;

4660

--rescnt;

4661

}

4662

*res++ = *fmt++;

4663

}

4664

else {

4665

/* Got a format specifier */

4666

int flags = 0;

4667

Py_ssize_t width = -1;

4668

int prec = -1;

4669

int c = '\0';

4670

int fill;

4671

int isnumok;

4672

PyObject *v = NULL;

4673

PyObject *temp = NULL;

4674

char *pbuf;

4675

int sign;

4676

Py_ssize_t len;

4677

char formatbuf[FORMATBUFLEN];

4678

/* For format{float,int,char}() */

4679

#ifdef Py_USING_UNICODE

4680

char *fmt_start = fmt;

4681

Py_ssize_t argidx_start = argidx;

4682

#endif

4683

4684

fmt++;

4685

if (*fmt == '(') {

4686

char *keystart;

4687

Py_ssize_t keylen;

4688

PyObject *key;

4689

int pcount = 1;

4690

4691

if (dict == NULL) {

4692

PyErr_SetString(PyExc_TypeError,

4693

"format requires a mapping");

4694

goto error;

4695

}

4696

++fmt;

4697

--fmtcnt;

4698

keystart = fmt;

4699

/* Skip over balanced parentheses */

4700

while (pcount > 0 && --fmtcnt >= 0) {

4701

if (*fmt == ')')

4702

--pcount;

4703

else if (*fmt == '(')

4704

++pcount;

4705

fmt++;

4706

}

4707

keylen = fmt - keystart - 1;

4708

if (fmtcnt < 0 || pcount > 0) {

4709

PyErr_SetString(PyExc_ValueError,

4710

"incomplete format key");

4711

goto error;

4712

}

4713

key = PyString_FromStringAndSize(keystart,

4714

keylen);

4715

if (key == NULL)

4716

goto error;

4717

if (args_owned) {

4718

Py_DECREF(args);

4719

args_owned = 0;

4720

}

4721

args = PyObject_GetItem(dict, key);

4722

Py_DECREF(key);

4723

if (args == NULL) {

4724

goto error;

4725

}

4726

args_owned = 1;

4727

arglen = -1;

4728

argidx = -2;

4729

}

4730

while (--fmtcnt >= 0) {

4731

switch (c = *fmt++) {

4732

case '-': flags |= F_LJUST; continue;

4733

case '+': flags |= F_SIGN; continue;

4734

case ' ': flags |= F_BLANK; continue;

4735

case '#': flags |= F_ALT; continue;

4736

case '0': flags |= F_ZERO; continue;

4737

}

4738

break;

4739

}

4740

if (c == '*') {

4741

v = getnextarg(args, arglen, &argidx);

4742

if (v == NULL)

4743

goto error;

4744

if (!PyInt_Check(v)) {

4745

PyErr_SetString(PyExc_TypeError,

4746

"* wants int");

4747

goto error;

4748

}

4749

width = PyInt_AsLong(v);

4750

if (width < 0) {

4751

flags |= F_LJUST;

4752

width = -width;

4753

}

4754

if (--fmtcnt >= 0)

4755

c = *fmt++;

4756

}

4757

else if (c >= 0 && isdigit(c)) {

4758

width = c - '0';

4759

while (--fmtcnt >= 0) {

4760

c = Py_CHARMASK(*fmt++);

4761

if (!isdigit(c))

4762

break;

4763

if ((width*10) / 10 != width) {

4764

PyErr_SetString(

4765

PyExc_ValueError,

4766

"width too big");

4767

goto error;

4768

}

4769

width = width*10 + (c - '0');

4770

}

4771

}

4772

if (c == '.') {

4773

prec = 0;

4774

if (--fmtcnt >= 0)

4775

c = *fmt++;

4776

if (c == '*') {

4777

v = getnextarg(args, arglen, &argidx);

4778

if (v == NULL)

4779

goto error;

4780

if (!PyInt_Check(v)) {

4781

PyErr_SetString(

4782

PyExc_TypeError,

4783

"* wants int");

4784

goto error;

4785

}

4786

prec = PyInt_AsLong(v);

4787

if (prec < 0)

4788

prec = 0;

4789

if (--fmtcnt >= 0)

4790

c = *fmt++;

4791

}

4792

else if (c >= 0 && isdigit(c)) {

4793

prec = c - '0';

4794

while (--fmtcnt >= 0) {

4795

c = Py_CHARMASK(*fmt++);

4796

if (!isdigit(c))

4797

break;

4798

if ((prec*10) / 10 != prec) {

4799

PyErr_SetString(

4800

PyExc_ValueError,

4801

"prec too big");

4802

goto error;

4803

}

4804

prec = prec*10 + (c - '0');

4805

}

4806

}

4807

} /* prec */

4808

if (fmtcnt >= 0) {

4809

if (c == 'h' || c == 'l' || c == 'L') {

4810

if (--fmtcnt >= 0)

4811

c = *fmt++;

4812

}

4813

}

4814

if (fmtcnt < 0) {

4815

PyErr_SetString(PyExc_ValueError,

4816

"incomplete format");

4817

goto error;

4818

}

4819

if (c != '%') {

4820

v = getnextarg(args, arglen, &argidx);

4821

if (v == NULL)

4822

goto error;

4823

}

4824

sign = 0;

4825

fill = ' ';

4826

switch (c) {

4827

case '%':

4828

pbuf = "%";

4829

len = 1;

4830

break;

4831

case 's':

4832

#ifdef Py_USING_UNICODE

4833

if (PyUnicode_Check(v)) {

4834

fmt = fmt_start;

4835

argidx = argidx_start;

4836

goto unicode;

4837

}

4838

#endif

4839

temp = _PyObject_Str(v);

4840

#ifdef Py_USING_UNICODE

4841

if (temp != NULL && PyUnicode_Check(temp)) {

4842

Py_DECREF(temp);

4843

fmt = fmt_start;

4844

argidx = argidx_start;

4845

goto unicode;

4846

}

4847

#endif

4848

/* Fall through */

4849

case 'r':

4850

if (c == 'r')

4851

temp = PyObject_Repr(v);

4852

if (temp == NULL)

4853

goto error;

4854

if (!PyString_Check(temp)) {

4855

PyErr_SetString(PyExc_TypeError,

4856

"%s argument has non-string str()");

4857

Py_DECREF(temp);

4858

goto error;

4859

}

4860

pbuf = PyString_AS_STRING(temp);

4861

len = PyString_GET_SIZE(temp);

4862

if (prec >= 0 && len > prec)

4863

len = prec;

4864

break;

4865

case 'i':

4866

case 'd':

4867

case 'u':

4868

case 'o':

4869

case 'x':

4870

case 'X':

4871

if (c == 'i')

4872

c = 'd';

4873

isnumok = 0;

4874

if (PyNumber_Check(v)) {

4875

PyObject *iobj=NULL;

4876

4877

if (PyInt_Check(v) || (PyLong_Check(v))) {

4878

iobj = v;

4879

Py_INCREF(iobj);

4880

}

4881

else {

4882

iobj = PyNumber_Int(v);

4883

if (iobj==NULL) iobj = PyNumber_Long(v);

4884

}

4885

if (iobj!=NULL) {

4886

if (PyInt_Check(iobj)) {

4887

isnumok = 1;

4888

pbuf = formatbuf;

4889

len = formatint(pbuf,

4890

sizeof(formatbuf),

4891

flags, prec, c, iobj);

4892

Py_DECREF(iobj);

4893

if (len < 0)

4894

goto error;

4895

sign = 1;

4896

}

4897

else if (PyLong_Check(iobj)) {

4898

int ilen;

4899

4900

isnumok = 1;

4901

temp = _PyString_FormatLong(iobj, flags,

4902

prec, c, &pbuf, &ilen);

4903

Py_DECREF(iobj);

4904

len = ilen;

4905

if (!temp)

4906

goto error;

4907

sign = 1;

4908

}

4909

else {

4910

Py_DECREF(iobj);

4911

}

4912

}

4913

}

4914

if (!isnumok) {

4915

PyErr_Format(PyExc_TypeError,

4916

"%%%c format: a number is required, "

4917

"not %.200s", c, Py_TYPE(v)->tp_name);

4918

goto error;

4919

}

4920

if (flags & F_ZERO)

4921

fill = '0';

4922

break;

4923

case 'e':

4924

case 'E':

4925

case 'f':

4926

case 'F':

4927

case 'g':

4928

case 'G':

4929

if (c == 'F')

4930

c = 'f';

4931

pbuf = formatbuf;

4932

len = formatfloat(pbuf, sizeof(formatbuf),

4933

flags, prec, c, v);

4934

if (len < 0)

4935

goto error;

4936

sign = 1;

4937

if (flags & F_ZERO)

4938

fill = '0';

4939

break;

4940

case 'c':

4941

#ifdef Py_USING_UNICODE

4942

if (PyUnicode_Check(v)) {

4943

fmt = fmt_start;

4944

argidx = argidx_start;

4945

goto unicode;

4946

}

4947

#endif

4948

pbuf = formatbuf;

4949

len = formatchar(pbuf, sizeof(formatbuf), v);

4950

if (len < 0)

4951

goto error;

4952

break;

4953

default:

4954

PyErr_Format(PyExc_ValueError,

4955

"unsupported format character '%c' (0x%x) "

4956

"at index %zd",

4957

c, c,

4958

(Py_ssize_t)(fmt - 1 -

4959

PyString_AsString(format)));

4960

goto error;

4961

}

4962

if (sign) {

4963

if (*pbuf == '-' || *pbuf == '+') {

4964

sign = *pbuf++;

4965

len--;

4966

}

4967

else if (flags & F_SIGN)

4968

sign = '+';

4969

else if (flags & F_BLANK)

4970

sign = ' ';

4971

else

4972

sign = 0;

4973

}

4974

if (width < len)

4975

width = len;

4976

if (rescnt - (sign != 0) < width) {

4977

reslen -= rescnt;

4978

rescnt = width + fmtcnt + 100;

4979

reslen += rescnt;

4980

if (reslen < 0) {

4981

Py_DECREF(result);

4982

Py_XDECREF(temp);

4983

return PyErr_NoMemory();

4984

}

4985

if (_PyString_Resize(&result, reslen) < 0) {

4986

Py_XDECREF(temp);

4987

return NULL;

4988

}

4989

res = PyString_AS_STRING(result)

4990

+ reslen - rescnt;

4991

}

4992

if (sign) {

4993

if (fill != ' ')

4994

*res++ = sign;

4995

rescnt--;

4996

if (width > len)

4997

width--;

4998

}

4999

if ((flags & F_ALT) && (c == 'x' || c == 'X')) {

5000

assert(pbuf[0] == '0');

5001

assert(pbuf[1] == c);

5002

if (fill != ' ') {

5003

*res++ = *pbuf++;

5004

*res++ = *pbuf++;

5005

}

5006

rescnt -= 2;

5007

width -= 2;

5008

if (width < 0)

5009

width = 0;

5010

len -= 2;

5011

}

5012

if (width > len && !(flags & F_LJUST)) {

5013

do {

5014

--rescnt;

5015

*res++ = fill;

5016

} while (--width > len);

5017

}

5018

if (fill == ' ') {

5019

if (sign)

5020

*res++ = sign;

5021

if ((flags & F_ALT) &&

5022

(c == 'x' || c == 'X')) {

5023

assert(pbuf[0] == '0');

5024

assert(pbuf[1] == c);

5025

*res++ = *pbuf++;

5026

*res++ = *pbuf++;

5027

}

5028

}

5029

Py_MEMCPY(res, pbuf, len);

5030

res += len;

5031

rescnt -= len;

5032

while (--width >= len) {

5033

--rescnt;

5034

*res++ = ' ';

5035

}

5036

if (dict && (argidx < arglen) && c != '%') {

5037

PyErr_SetString(PyExc_TypeError,

5038

"not all arguments converted during string formatting");

5039

Py_XDECREF(temp);

5040

goto error;

5041

}

5042

Py_XDECREF(temp);

5043

} /* '%' */

5044

} /* until end */

5045

if (argidx < arglen && !dict) {

5046

PyErr_SetString(PyExc_TypeError,

5047

"not all arguments converted during string formatting");

5048

goto error;

5049

}

5050

if (args_owned) {

5051

Py_DECREF(args);

5052

}

5053

_PyString_Resize(&result, reslen - rescnt);

5054

return result;

5055

5056

#ifdef Py_USING_UNICODE

5057

unicode:

5058

if (args_owned) {

5059

Py_DECREF(args);

5060

args_owned = 0;

5061

}

5062

/* Fiddle args right (remove the first argidx arguments) */

5063

if (PyTuple_Check(orig_args) && argidx > 0) {

5064

PyObject *v;

5065

Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;

5066

v = PyTuple_New(n);

5067

if (v == NULL)

5068

goto error;

5069

while (--n >= 0) {

5070

PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);

5071

Py_INCREF(w);

5072

PyTuple_SET_ITEM(v, n, w);

5073

}

5074

args = v;

5075

} else {

5076

Py_INCREF(orig_args);

5077

args = orig_args;

5078

}

5079

args_owned = 1;

5080

/* Take what we have of the result and let the Unicode formatting

5081

function format the rest of the input. */

5082

rescnt = res - PyString_AS_STRING(result);

5083

if (_PyString_Resize(&result, rescnt))

5084

goto error;

5085

fmtcnt = PyString_GET_SIZE(format) - \

5086

(fmt - PyString_AS_STRING(format));

5087

format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);

5088

if (format == NULL)

5089

goto error;

5090

v = PyUnicode_Format(format, args);

5091

Py_DECREF(format);

5092

if (v == NULL)

5093

goto error;

5094

/* Paste what we have (result) to what the Unicode formatting

5095

function returned (v) and return the result (or error) */

5096

w = PyUnicode_Concat(result, v);

5097

Py_DECREF(result);

5098

Py_DECREF(v);

5099

Py_DECREF(args);

5100

return w;

5101

#endif /* Py_USING_UNICODE */

5102

5103

error:

5104

Py_DECREF(result);

5105

if (args_owned) {

5106

Py_DECREF(args);

5107

}

5108

return NULL;

5109

}

5110

5111

void

5112

PyString_InternInPlace(PyObject **p)

5113

{

5114

5115

PyObject *t;

5116

if (s == NULL || !PyString_Check(s))

5117

Py_FatalError("PyString_InternInPlace: strings only please!");

5118

/* If it's a string subclass, we don't really know what putting

5119

it in the interned dict might do. */

5120

if (!PyString_CheckExact(s))

5121

return;

5122

if (PyString_CHECK_INTERNED(s))

5123

return;

5124

if (interned == NULL) {

5125

interned = PyDict_New();

5126

if (interned == NULL) {

5127

PyErr_Clear(); /* Don't leave an exception */

5128

return;

5129

}

5130

}

5131

t = PyDict_GetItem(interned, (PyObject *)s);

5132

if (t) {

5133

Py_INCREF(t);

5134

Py_DECREF(*p);

5135

*p = t;

5136

return;

5137

}

5138

5139

if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {

5140

PyErr_Clear();

5141

return;

5142

}

5143

/* The two references in interned are not counted by refcnt.

5144

The string deallocator will take care of this */

5145

Py_REFCNT(s) -= 2;

5146

PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;

5147

}

5148

5149

void

5150

PyString_InternImmortal(PyObject **p)

5151

{

5152

PyString_InternInPlace(p);

5153

if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {

5154

PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;

5155

Py_INCREF(*p);

5156

}

5157

}

5158

5159

5160

PyObject *

5161

PyString_InternFromString(const char *cp)

5162

{

5163

PyObject *s = PyString_FromString(cp);

5164

if (s == NULL)

5165

return NULL;

5166

PyString_InternInPlace(&s);

5167

return s;

5168

}

5169

5170

void

5171

PyString_Fini(void)

5172

{

5173

int i;

5174

for (i = 0; i < UCHAR_MAX + 1; i++) {

5175

Py_XDECREF(characters[i]);

5176

characters[i] = NULL;

5177

}

5178

Py_XDECREF(nullstring);

5179

nullstring = NULL;

5180

}

5181

5182

void _Py_ReleaseInternedStrings(void)

5183

{

5184

PyObject *keys;

5185

PyStringObject *s;

5186

Py_ssize_t i, n;

5187

Py_ssize_t immortal_size = 0, mortal_size = 0;

5188

5189

if (interned == NULL || !PyDict_Check(interned))

5190

return;

5191

keys = PyDict_Keys(interned);

5192

if (keys == NULL || !PyList_Check(keys)) {

5193

PyErr_Clear();

5194

return;

5195

}

5196

5197

/* Since _Py_ReleaseInternedStrings() is intended to help a leak

5198

detector, interned strings are not forcibly deallocated; rather, we

5199

give them their stolen references back, and then clear and DECREF

5200

the interned dict. */

5201

5202

n = PyList_GET_SIZE(keys);

5203

fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",

5204

n);

5205

for (i = 0; i < n; i++) {

5206

s = (PyStringObject *) PyList_GET_ITEM(keys, i);

5207

switch (s->ob_sstate) {

5208

case SSTATE_NOT_INTERNED:

5209

/* XXX Shouldn't happen */

5210

break;

5211

case SSTATE_INTERNED_IMMORTAL:

5212

Py_REFCNT(s) += 1;

5213

immortal_size += Py_SIZE(s);

5214

break;

5215

case SSTATE_INTERNED_MORTAL:

5216

Py_REFCNT(s) += 2;

5217

mortal_size += Py_SIZE(s);

5218

break;

5219

default:

5220

Py_FatalError("Inconsistent interned string state.");

5221

}

5222

s->ob_sstate = SSTATE_NOT_INTERNED;

5223

}

5224

fprintf(stderr, "total size of all interned strings: "

5225

"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "

5226

"mortal/immortal\n", mortal_size, immortal_size);

5227

Py_DECREF(keys);

5228

PyDict_Clear(interned);

5229

Py_DECREF(interned);

5230

interned = NULL;

5231

}

Older »