~ubuntu-branches/ubuntu/utopic/glib2.0/utopic

« back to all changes in this revision

Viewing changes to glib/gutf8.c

Committer: Bazaar Package Importer
Author(s): Sebastien Bacher
Date: 2007-05-04 10:55:02 UTC
mto: (3.2.1 intrepid) (1.6.1 upstream) (157.1.1 precise-proposed) (162.1.1 quantal) (172.1.1 experimental) (185.1.1 raring-proposed)
mto: This revision was merged to the branch mainline in revision 4.
Revision ID: james.westby@ubuntu.com-20070504105502-izybnh9z2nprjlk6

Tags: upstream-2.12.12

Import upstream version 2.12.12

files added:
AUTHORS

COPYING

ChangeLog

ChangeLog.pre-1-2

ChangeLog.pre-2-0

ChangeLog.pre-2-10

ChangeLog.pre-2-12

ChangeLog.pre-2-2

ChangeLog.pre-2-4

ChangeLog.pre-2-6

ChangeLog.pre-2-8

HACKING

INSTALL

INSTALL.in

MAINTAINERS

Makefile.am

Makefile.in

NEWS

NEWS.pre-1-3

README

README.in

README.win32

acglib.m4

acinclude.m4

aclocal.m4

autogen.sh

build

build/ChangeLog

build/Makefile.am

build/Makefile.in

build/README

build/win32

build/win32/Makefile.am

build/win32/Makefile.in

build/win32/dirent

build/win32/dirent/Makefile.am

build/win32/dirent/Makefile.in

build/win32/dirent/README

build/win32/dirent/dirent-zip

build/win32/dirent/dirent.c

build/win32/dirent/dirent.h

build/win32/dirent/makefile.msc

build/win32/dirent/wdirent.c

build/win32/make.msc

build/win32/module.defs

build/win32/vs8

build/win32/vs8/Makefile.am

build/win32/vs8/Makefile.in

build/win32/vs8/glib-genmarshal.vcproj

build/win32/vs8/glib.sln

build/win32/vs8/glib.vcproj

build/win32/vs8/gmodule.vcproj

build/win32/vs8/gobject.vcproj

build/win32/vs8/gthread.vcproj

compile

config.guess

config.h.in

config.h.win32

config.h.win32.in

config.sub

configure

configure.in

depcomp

docs

docs/Makefile.am

docs/Makefile.in

docs/debugging.txt

docs/macros.txt

docs/reference

docs/reference/AUTHORS

docs/reference/COPYING

docs/reference/ChangeLog

docs/reference/Makefile.am

docs/reference/Makefile.in

docs/reference/NEWS

docs/reference/README

docs/reference/glib

docs/reference/glib/Makefile.am

docs/reference/glib/Makefile.in

docs/reference/glib/building.sgml

docs/reference/glib/changes.sgml

docs/reference/glib/compiling.sgml

docs/reference/glib/cross.sgml

docs/reference/glib/file-name-encodings.png

docs/reference/glib/file-name-encodings.sxd

docs/reference/glib/glib-docs.sgml

docs/reference/glib/glib-gettextize.1

docs/reference/glib/glib-gettextize.xml

docs/reference/glib/glib-overrides.txt

docs/reference/glib/glib-sections.txt

docs/reference/glib/glib.types

docs/reference/glib/html

docs/reference/glib/html/file-name-encodings.png

docs/reference/glib/html/glib-Arrays.html

docs/reference/glib/html/glib-Asynchronous-Queues.html

docs/reference/glib/html/glib-Atomic-Operations.html

docs/reference/glib/html/glib-Automatic-String-Completion.html

docs/reference/glib/html/glib-Balanced-Binary-Trees.html

docs/reference/glib/html/glib-Base64-Encoding.html

docs/reference/glib/html/glib-Basic-Types.html

docs/reference/glib/html/glib-Bookmark-file-parser.html

docs/reference/glib/html/glib-Byte-Arrays.html

docs/reference/glib/html/glib-Byte-Order-Macros.html

docs/reference/glib/html/glib-Caches.html

docs/reference/glib/html/glib-Character-Set-Conversion.html

docs/reference/glib/html/glib-Commandline-option-parser.html

docs/reference/glib/html/glib-Datasets.html

docs/reference/glib/html/glib-Date-and-Time-Functions.html

docs/reference/glib/html/glib-Double-ended-Queues.html

docs/reference/glib/html/glib-Doubly-Linked-Lists.html

docs/reference/glib/html/glib-Dynamic-Loading-of-Modules.html

docs/reference/glib/html/glib-Error-Reporting.html

docs/reference/glib/html/glib-File-Utilities.html

docs/reference/glib/html/glib-Glob-style-pattern-matching.html

docs/reference/glib/html/glib-Hash-Tables.html

docs/reference/glib/html/glib-Hook-Functions.html

docs/reference/glib/html/glib-I18N.html

docs/reference/glib/html/glib-IO-Channels.html

docs/reference/glib/html/glib-Key-value-file-parser.html

docs/reference/glib/html/glib-Keyed-Data-Lists.html

docs/reference/glib/html/glib-Lexical-Scanner.html

docs/reference/glib/html/glib-Limits-of-Basic-Types.html

docs/reference/glib/html/glib-Memory-Allocation.html

docs/reference/glib/html/glib-Memory-Allocators.html

docs/reference/glib/html/glib-Memory-Chunks.html

docs/reference/glib/html/glib-Memory-Slices.html

docs/reference/glib/html/glib-Message-Logging.html

docs/reference/glib/html/glib-Miscellaneous-Macros.html

docs/reference/glib/html/glib-Miscellaneous-Utility-Functions.html

docs/reference/glib/html/glib-N-ary-Trees.html

docs/reference/glib/html/glib-Numerical-Definitions.html

docs/reference/glib/html/glib-Pointer-Arrays.html

docs/reference/glib/html/glib-Quarks.html

docs/reference/glib/html/glib-Random-Numbers.html

docs/reference/glib/html/glib-Relations-and-Tuples.html

docs/reference/glib/html/glib-Shell-related-Utilities.html

docs/reference/glib/html/glib-Simple-XML-Subset-Parser.html

docs/reference/glib/html/glib-Singly-Linked-Lists.html

docs/reference/glib/html/glib-Spawning-Processes.html

docs/reference/glib/html/glib-Standard-Macros.html

docs/reference/glib/html/glib-String-Chunks.html

docs/reference/glib/html/glib-String-Utility-Functions.html

docs/reference/glib/html/glib-Strings.html

docs/reference/glib/html/glib-The-Main-Event-Loop.html

docs/reference/glib/html/glib-Thread-Pools.html

docs/reference/glib/html/glib-Threads.html

docs/reference/glib/html/glib-Timers.html

docs/reference/glib/html/glib-Trash-Stacks.html

docs/reference/glib/html/glib-Type-Conversion-Macros.html

docs/reference/glib/html/glib-Unicode-Manipulation.html

docs/reference/glib/html/glib-Version-Information.html

docs/reference/glib/html/glib-Warnings-and-Assertions.html

docs/reference/glib/html/glib-Windows-Compatibility-Functions.html

docs/reference/glib/html/glib-building.html

docs/reference/glib/html/glib-changes.html

docs/reference/glib/html/glib-compiling.html

docs/reference/glib/html/glib-core.html

docs/reference/glib/html/glib-cross-compiling.html

docs/reference/glib/html/glib-data-types.html

docs/reference/glib/html/glib-fundamentals.html

docs/reference/glib/html/glib-gettextize.html

docs/reference/glib/html/glib-resources.html

docs/reference/glib/html/glib-running.html

docs/reference/glib/html/glib-utilities.html

docs/reference/glib/html/glib.devhelp

docs/reference/glib/html/glib.devhelp2

docs/reference/glib/html/glib.html

docs/reference/glib/html/home.png

docs/reference/glib/html/index.html

docs/reference/glib/html/index.sgml

docs/reference/glib/html/ix01.html

docs/reference/glib/html/ix02.html

docs/reference/glib/html/ix03.html

docs/reference/glib/html/ix04.html

docs/reference/glib/html/ix05.html

docs/reference/glib/html/ix06.html

docs/reference/glib/html/ix07.html

docs/reference/glib/html/ix08.html

docs/reference/glib/html/left.png

docs/reference/glib/html/mainloop-states.gif

docs/reference/glib/html/right.png

docs/reference/glib/html/style.css

docs/reference/glib/html/tools.html

docs/reference/glib/html/up.png

docs/reference/glib/mainloop-states.eps

docs/reference/glib/mainloop-states.fig

docs/reference/glib/mainloop-states.gif

docs/reference/glib/mainloop-states.png

docs/reference/glib/resources.sgml

docs/reference/glib/running.sgml

docs/reference/glib/tmpl

docs/reference/glib/tmpl/allocators.sgml

docs/reference/glib/tmpl/arrays.sgml

docs/reference/glib/tmpl/arrays_byte.sgml

docs/reference/glib/tmpl/arrays_pointer.sgml

docs/reference/glib/tmpl/async_queues.sgml

docs/reference/glib/tmpl/atomic_operations.sgml

docs/reference/glib/tmpl/base64.sgml

docs/reference/glib/tmpl/bookmarkfile.sgml

docs/reference/glib/tmpl/byte_order.sgml

docs/reference/glib/tmpl/caches.sgml

docs/reference/glib/tmpl/completion.sgml

docs/reference/glib/tmpl/conversions.sgml

docs/reference/glib/tmpl/datalist.sgml

docs/reference/glib/tmpl/datasets.sgml

docs/reference/glib/tmpl/date.sgml

docs/reference/glib/tmpl/error_reporting.sgml

docs/reference/glib/tmpl/fileutils.sgml

docs/reference/glib/tmpl/glib-unused.sgml

docs/reference/glib/tmpl/hash_tables.sgml

docs/reference/glib/tmpl/hooks.sgml

docs/reference/glib/tmpl/i18n.sgml

docs/reference/glib/tmpl/iochannels.sgml

docs/reference/glib/tmpl/keyfile.sgml

docs/reference/glib/tmpl/limits.sgml

docs/reference/glib/tmpl/linked_lists_double.sgml

docs/reference/glib/tmpl/linked_lists_single.sgml

docs/reference/glib/tmpl/macros.sgml

docs/reference/glib/tmpl/macros_misc.sgml

docs/reference/glib/tmpl/main.sgml

docs/reference/glib/tmpl/markup.sgml

docs/reference/glib/tmpl/memory.sgml

docs/reference/glib/tmpl/memory_chunks.sgml

docs/reference/glib/tmpl/memory_slices.sgml

docs/reference/glib/tmpl/messages.sgml

docs/reference/glib/tmpl/misc_utils.sgml

docs/reference/glib/tmpl/modules.sgml

docs/reference/glib/tmpl/numerical.sgml

docs/reference/glib/tmpl/option.sgml

docs/reference/glib/tmpl/patterns.sgml

docs/reference/glib/tmpl/quarks.sgml

docs/reference/glib/tmpl/queue.sgml

docs/reference/glib/tmpl/random_numbers.sgml

docs/reference/glib/tmpl/relations.sgml

docs/reference/glib/tmpl/scanner.sgml

docs/reference/glib/tmpl/shell.sgml

docs/reference/glib/tmpl/spawn.sgml

docs/reference/glib/tmpl/string_chunks.sgml

docs/reference/glib/tmpl/string_utils.sgml

docs/reference/glib/tmpl/strings.sgml

docs/reference/glib/tmpl/thread_pools.sgml

docs/reference/glib/tmpl/threads.sgml

docs/reference/glib/tmpl/timers.sgml

docs/reference/glib/tmpl/trash_stack.sgml

docs/reference/glib/tmpl/trees-binary.sgml

docs/reference/glib/tmpl/trees-nary.sgml

docs/reference/glib/tmpl/type_conversion.sgml

docs/reference/glib/tmpl/types.sgml

docs/reference/glib/tmpl/unicode.sgml

docs/reference/glib/tmpl/version.sgml

docs/reference/glib/tmpl/warnings.sgml

docs/reference/glib/tmpl/windows.sgml

docs/reference/glib/version.xml

docs/reference/glib/version.xml.in

docs/reference/glib/xml

docs/reference/glib/xml/allocators.xml

docs/reference/glib/xml/arrays.xml

docs/reference/glib/xml/arrays_byte.xml

docs/reference/glib/xml/arrays_pointer.xml

docs/reference/glib/xml/async_queues.xml

docs/reference/glib/xml/atomic_operations.xml

docs/reference/glib/xml/base64.xml

docs/reference/glib/xml/bookmarkfile.xml

docs/reference/glib/xml/byte_order.xml

docs/reference/glib/xml/caches.xml

docs/reference/glib/xml/completion.xml

docs/reference/glib/xml/conversions.xml

docs/reference/glib/xml/datalist.xml

docs/reference/glib/xml/datasets.xml

docs/reference/glib/xml/date.xml

docs/reference/glib/xml/error_reporting.xml

docs/reference/glib/xml/fileutils.xml

docs/reference/glib/xml/hash_tables.xml

docs/reference/glib/xml/hooks.xml

docs/reference/glib/xml/i18n.xml

docs/reference/glib/xml/iochannels.xml

docs/reference/glib/xml/keyfile.xml

docs/reference/glib/xml/limits.xml

docs/reference/glib/xml/linked_lists_double.xml

docs/reference/glib/xml/linked_lists_single.xml

docs/reference/glib/xml/macros.xml

docs/reference/glib/xml/macros_misc.xml

docs/reference/glib/xml/main.xml

docs/reference/glib/xml/markup.xml

docs/reference/glib/xml/memory.xml

docs/reference/glib/xml/memory_chunks.xml

docs/reference/glib/xml/memory_slices.xml

docs/reference/glib/xml/messages.xml

docs/reference/glib/xml/misc_utils.xml

docs/reference/glib/xml/modules.xml

docs/reference/glib/xml/numerical.xml

docs/reference/glib/xml/option.xml

docs/reference/glib/xml/patterns.xml

docs/reference/glib/xml/quarks.xml

docs/reference/glib/xml/queue.xml

docs/reference/glib/xml/random_numbers.xml

docs/reference/glib/xml/relations.xml

docs/reference/glib/xml/scanner.xml

docs/reference/glib/xml/shell.xml

docs/reference/glib/xml/spawn.xml

docs/reference/glib/xml/string_chunks.xml

docs/reference/glib/xml/string_utils.xml

docs/reference/glib/xml/strings.xml

docs/reference/glib/xml/thread_pools.xml

docs/reference/glib/xml/threads.xml

docs/reference/glib/xml/timers.xml

docs/reference/glib/xml/trash_stack.xml

docs/reference/glib/xml/trees-binary.xml

docs/reference/glib/xml/trees-nary.xml

docs/reference/glib/xml/type_conversion.xml

docs/reference/glib/xml/types.xml

docs/reference/glib/xml/unicode.xml

docs/reference/glib/xml/version.xml

docs/reference/glib/xml/warnings.xml

docs/reference/glib/xml/windows.xml

docs/reference/gobject

docs/reference/gobject/Makefile.am

docs/reference/gobject/Makefile.in

docs/reference/gobject/glib-genmarshal.1

docs/reference/gobject/glib-genmarshal.xml

docs/reference/gobject/glib-mkenums.1

docs/reference/gobject/glib-mkenums.xml

docs/reference/gobject/gobject-docs.sgml

docs/reference/gobject/gobject-overrides.txt

docs/reference/gobject/gobject-query.1

docs/reference/gobject/gobject-query.xml

docs/reference/gobject/gobject-sections.txt

docs/reference/gobject/gobject.cI

docs/reference/gobject/gobject.types

docs/reference/gobject/html

docs/reference/gobject/html/GTypeModule.html

docs/reference/gobject/html/GTypePlugin.html

docs/reference/gobject/html/ch01.html

docs/reference/gobject/html/ch01s02.html

docs/reference/gobject/html/ch02.html

docs/reference/gobject/html/ch06s03.html

docs/reference/gobject/html/ch07s02.html

docs/reference/gobject/html/ch07s03.html

docs/reference/gobject/html/chapter-gobject.html

docs/reference/gobject/html/chapter-signal.html

docs/reference/gobject/html/glib-genmarshal.html

docs/reference/gobject/html/glib-mkenums.html

docs/reference/gobject/html/glue.png

docs/reference/gobject/html/gobject-Boxed-Types.html

docs/reference/gobject/html/gobject-Closures.html

docs/reference/gobject/html/gobject-Enumeration-and-Flag-Types.html

docs/reference/gobject/html/gobject-GParamSpec.html

docs/reference/gobject/html/gobject-Generic-values.html

docs/reference/gobject/html/gobject-Signals.html

docs/reference/gobject/html/gobject-Standard-Parameter-and-Value-Types.html

docs/reference/gobject/html/gobject-The-Base-Object-Type.html

docs/reference/gobject/html/gobject-Type-Information.html

docs/reference/gobject/html/gobject-Value-arrays.html

docs/reference/gobject/html/gobject-Varargs-Value-Collection.html

docs/reference/gobject/html/gobject-memory.html

docs/reference/gobject/html/gobject-properties.html

docs/reference/gobject/html/gobject-query.html

docs/reference/gobject/html/gobject.devhelp

docs/reference/gobject/html/gobject.devhelp2

docs/reference/gobject/html/gtype-conventions.html

docs/reference/gobject/html/gtype-instantiable-classed.html

docs/reference/gobject/html/gtype-non-instantiable-classed.html

docs/reference/gobject/html/gtype-non-instantiable.html

docs/reference/gobject/html/home.png

docs/reference/gobject/html/howto-gobject-chainup.html

docs/reference/gobject/html/howto-gobject-code.html

docs/reference/gobject/html/howto-gobject-construction.html

docs/reference/gobject/html/howto-gobject-destruction.html

docs/reference/gobject/html/howto-gobject-methods.html

docs/reference/gobject/html/howto-gobject.html

docs/reference/gobject/html/howto-interface-implement.html

docs/reference/gobject/html/howto-interface-properties.html

docs/reference/gobject/html/howto-interface.html

docs/reference/gobject/html/howto-signals.html

docs/reference/gobject/html/index.html

docs/reference/gobject/html/index.sgml

docs/reference/gobject/html/ix01.html

docs/reference/gobject/html/ix02.html

docs/reference/gobject/html/ix03.html

docs/reference/gobject/html/ix04.html

docs/reference/gobject/html/ix05.html

docs/reference/gobject/html/ix06.html

docs/reference/gobject/html/ix07.html

docs/reference/gobject/html/ix08.html

docs/reference/gobject/html/left.png

docs/reference/gobject/html/pr01.html

docs/reference/gobject/html/pt01.html

docs/reference/gobject/html/pt02.html

docs/reference/gobject/html/pt03.html

docs/reference/gobject/html/right.png

docs/reference/gobject/html/rn01.html

docs/reference/gobject/html/rn02.html

docs/reference/gobject/html/signal.html

docs/reference/gobject/html/style.css

docs/reference/gobject/html/tools-ginspector.html

docs/reference/gobject/html/tools-gob.html

docs/reference/gobject/html/tools-gtkdoc.html

docs/reference/gobject/html/tools-refdb.html

docs/reference/gobject/html/up.png

docs/reference/gobject/images

docs/reference/gobject/images/glue.png

docs/reference/gobject/tmpl

docs/reference/gobject/tmpl/enumerations_flags.sgml

docs/reference/gobject/tmpl/gboxed.sgml

docs/reference/gobject/tmpl/gclosure.sgml

docs/reference/gobject/tmpl/generic_values.sgml

docs/reference/gobject/tmpl/gobject-unused.sgml

docs/reference/gobject/tmpl/gparamspec.sgml

docs/reference/gobject/tmpl/gtype.sgml

docs/reference/gobject/tmpl/gtypemodule.sgml

docs/reference/gobject/tmpl/gtypeplugin.sgml

docs/reference/gobject/tmpl/objects.sgml

docs/reference/gobject/tmpl/param_value_types.sgml

docs/reference/gobject/tmpl/signals.sgml

docs/reference/gobject/tmpl/value_arrays.sgml

docs/reference/gobject/tmpl/value_collection.sgml

docs/reference/gobject/tut_gobject.xml

docs/reference/gobject/tut_gsignal.xml

docs/reference/gobject/tut_gtype.xml

docs/reference/gobject/tut_howto.xml

docs/reference/gobject/tut_intro.xml

docs/reference/gobject/tut_tools.xml

docs/reference/gobject/version.xml

docs/reference/gobject/version.xml.in

docs/reference/gobject/xml

docs/reference/gobject/xml/enumerations_flags.xml

docs/reference/gobject/xml/gboxed.xml

docs/reference/gobject/xml/gclosure.xml

docs/reference/gobject/xml/generic_values.xml

docs/reference/gobject/xml/gparamspec.xml

docs/reference/gobject/xml/gtype.xml

docs/reference/gobject/xml/gtypemodule.xml

docs/reference/gobject/xml/gtypeplugin.xml

docs/reference/gobject/xml/objects.xml

docs/reference/gobject/xml/param_value_types.xml

docs/reference/gobject/xml/signals.xml

docs/reference/gobject/xml/value_arrays.xml

docs/reference/gobject/xml/value_collection.xml

glib

glib-2.0-uninstalled.pc.in

glib-2.0.pc.in

glib-gettextize.in

glib-zip.in

glib/Makefile.am

glib/Makefile.in

glib/abicheck.sh

glib/galias.h

glib/galiasdef.c

glib/galloca.h

glib/garray.c

glib/garray.h

glib/gasyncqueue.c

glib/gasyncqueue.h

glib/gatomic.c

glib/gatomic.h

glib/gbacktrace.c

glib/gbacktrace.h

glib/gbase64.c

glib/gbase64.h

glib/gbookmarkfile.c

glib/gbookmarkfile.h

glib/gbsearcharray.h

glib/gcache.c

glib/gcache.h

glib/gcompletion.c

glib/gcompletion.h

glib/gconvert.c

glib/gconvert.h

glib/gdataset.c

glib/gdataset.h

glib/gdatasetprivate.h

glib/gdate.c

glib/gdate.h

glib/gdebug.h

glib/gdir.c

glib/gdir.h

glib/gen-unicode-tables.pl

glib/gerror.c

glib/gerror.h

glib/gfileutils.c

glib/gfileutils.h

glib/ghash.c

glib/ghash.h

glib/ghook.c

glib/ghook.h

glib/gi18n-lib.h

glib/gi18n.h

glib/giochannel.c

glib/giochannel.h

glib/giounix.c

glib/giowin32.c

glib/gkeyfile.c

glib/gkeyfile.h

glib/glib-mirroring-tab

glib/glib-mirroring-tab/Makefile

glib/glib-mirroring-tab/gen-mirroring-tab.c

glib/glib-mirroring-tab/packtab.c

glib/glib-mirroring-tab/packtab.h

glib/glib-object.h

glib/glib.h

glib/glib.rc

glib/glib.rc.in

glib/glib.symbols

glib/glibintl.h

glib/glist.c

glib/glist.h

glib/gmacros.h

glib/gmain.c

glib/gmain.h

glib/gmappedfile.c

glib/gmappedfile.h

glib/gmarkup.c

glib/gmarkup.h

glib/gmem.c

glib/gmem.h

glib/gmessages.c

glib/gmessages.h

glib/gmirroringtable.h

glib/gnode.c

glib/gnode.h

glib/gnulib

glib/gnulib/Makefile.am

glib/gnulib/Makefile.in

glib/gnulib/README

glib/gnulib/asnprintf.c

glib/gnulib/g-gnulib.h

glib/gnulib/makefile.msc

glib/gnulib/printf-args.c

glib/gnulib/printf-args.h

glib/gnulib/printf-parse.c

glib/gnulib/printf-parse.h

glib/gnulib/printf.c

glib/gnulib/printf.h

glib/gnulib/vasnprintf.c

glib/gnulib/vasnprintf.h

glib/goption.c

glib/goption.h

glib/gpattern.c

glib/gpattern.h

glib/gprimes.c

glib/gprimes.h

glib/gprintf.c

glib/gprintf.h

glib/gprintfint.h

glib/gqsort.c

glib/gqsort.h

glib/gquark.h

glib/gqueue.c

glib/gqueue.h

glib/grand.c

glib/grand.h

glib/grel.c

glib/grel.h

glib/gscanner.c

glib/gscanner.h

glib/gshell.c

glib/gshell.h

glib/gslice.c

glib/gslice.h

glib/gslist.c

glib/gslist.h

glib/gspawn-win32-helper-console.c

glib/gspawn-win32-helper.c

glib/gspawn-win32.c

glib/gspawn.c

glib/gspawn.h

glib/gstdio.c

glib/gstdio.h

glib/gstrfuncs.c

glib/gstrfuncs.h

glib/gstring.c

glib/gstring.h

glib/gthread.c

glib/gthread.h

glib/gthreadpool.c

glib/gthreadpool.h

glib/gthreadprivate.h

glib/gtimer.c

glib/gtimer.h

glib/gtree.c

glib/gtree.h

glib/gtypes.h

glib/gunibreak.c

glib/gunibreak.h

glib/gunichartables.h

glib/gunicode.h

glib/gunicodeprivate.h

glib/gunicollate.c

glib/gunicomp.h

glib/gunidecomp.c

glib/gunidecomp.h

glib/guniprop.c

glib/gutf8.c

glib/gutils.c

glib/gutils.h

glib/gwin32.c

glib/gwin32.h

glib/libcharset

glib/libcharset/Makefile.am

glib/libcharset/Makefile.in

glib/libcharset/README

glib/libcharset/codeset.m4

glib/libcharset/config.charset

glib/libcharset/glibc21.m4

glib/libcharset/libcharset-glib.patch

glib/libcharset/libcharset.h

glib/libcharset/localcharset.c

glib/libcharset/make-patch.sh

glib/libcharset/ref-add.sin

glib/libcharset/ref-del.sin

glib/libcharset/update.sh

glib/makefile.msc

glib/makefile.msc.in

glib/makegalias.pl

glib/pltcheck.sh

glibconfig.h.win32

glibconfig.h.win32.in

gmodule

gmodule-2.0-uninstalled.pc.in

gmodule-2.0.pc.in

gmodule-export-2.0.pc.in

gmodule-no-export-2.0-uninstalled.pc.in

gmodule-no-export-2.0.pc.in

gmodule/AUTHORS

gmodule/COPYING

gmodule/ChangeLog

gmodule/Makefile.am

gmodule/Makefile.in

gmodule/gmodule-ar.c

gmodule/gmodule-beos.c

gmodule/gmodule-dl.c

gmodule/gmodule-dld.c

gmodule/gmodule-dyld.c

gmodule/gmodule-os2.c

gmodule/gmodule-win32.c

gmodule/gmodule.c

gmodule/gmodule.def

gmodule/gmodule.h

gmodule/gmodule.rc

gmodule/gmodule.rc.in

gmodule/gmoduleconf.h.in

gmodule/gmoduleconf.h.win32

gmodule/makefile.msc

gmodule/makefile.msc.in

gobject

gobject-2.0-uninstalled.pc.in

gobject-2.0.pc.in

gobject/ChangeLog

gobject/Makefile.am

gobject/Makefile.in

gobject/abicheck.sh

gobject/gboxed.c

gobject/gboxed.h

gobject/gclosure.c

gobject/gclosure.h

gobject/genums.c

gobject/genums.h

gobject/glib-genmarshal.1

gobject/glib-genmarshal.c

gobject/glib-mkenums.1

gobject/glib-mkenums.in

gobject/gmarshal.c

gobject/gmarshal.h

gobject/gmarshal.list

gobject/gmarshal.strings

gobject/gobject-query.c

gobject/gobject.c

gobject/gobject.h

gobject/gobject.rc

gobject/gobject.rc.in

gobject/gobject.symbols

gobject/gobjectalias.h

gobject/gobjectaliasdef.c

gobject/gobjectnotifyqueue.c

gobject/gparam.c

gobject/gparam.h

gobject/gparamspecs.c

gobject/gparamspecs.h

gobject/gsignal.c

gobject/gsignal.h

gobject/gsourceclosure.c

gobject/gsourceclosure.h

gobject/gtype.c

gobject/gtype.h

gobject/gtypemodule.c

gobject/gtypemodule.h

gobject/gtypeplugin.c

gobject/gtypeplugin.h

gobject/gvalue.c

gobject/gvalue.h

gobject/gvaluearray.c

gobject/gvaluearray.h

gobject/gvaluecollector.h

gobject/gvaluetransform.c

gobject/gvaluetypes.c

gobject/gvaluetypes.h

gobject/makefile.msc

gobject/makefile.msc.in

gobject/makegobjectalias.pl

gobject/marshal-genstrings.pl

gobject/pltcheck.sh

gobject/stamp-gmarshal.h

gobject/testgobject.c

gthread

gthread-2.0-uninstalled.pc.in

gthread-2.0.pc.in

gthread/ChangeLog

gthread/Makefile.am

gthread/Makefile.in

gthread/gthread-impl.c

gthread/gthread-none.c

gthread/gthread-posix.c

gthread/gthread-win32.c

gthread/gthread.def

gthread/gthread.rc

gthread/gthread.rc.in

gthread/makefile.msc

gthread/makefile.msc.in

gtk-doc.make

install-sh

ltmain.sh

m4macros

m4macros/Makefile.am

m4macros/Makefile.in

m4macros/glib-2.0.m4

m4macros/glib-gettext.m4

makefile.msc

missing

mkinstalldirs

msvc_recommended_pragmas.h

po/ChangeLog

po/Makefile.in.in

po/POTFILES.in

po/am.gmo

po/am.po

po/ar.gmo

po/ar.po

po/az.gmo

po/az.po

po/be.gmo

po/be.po

po/be@latin.gmo

po/be@latin.po

po/bg.gmo

po/bg.po

po/bn.gmo

po/bn.po

po/bn_IN.gmo

po/bn_IN.po

po/bs.gmo

po/bs.po

po/ca.gmo

po/ca.po

po/cs.gmo

po/cs.po

po/cy.gmo

po/cy.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/dz.gmo

po/dz.po

po/el.gmo

po/el.po

po/en_CA.gmo

po/en_CA.po

po/en_GB.gmo

po/en_GB.po

po/eo.gmo

po/eo.po

po/es.gmo

po/es.po

po/et.gmo

po/et.po

po/eu.gmo

po/eu.po

po/fa.gmo

po/fa.po

po/fi.gmo

po/fi.po

po/fr.gmo

po/fr.po

po/ga.gmo

po/ga.po

po/gl.gmo

po/gl.po

po/glib20.pot

po/gu.gmo

po/gu.po

po/he.gmo

po/he.po

po/hi.gmo

po/hi.po

po/hr.gmo

po/hr.po

po/hu.gmo

po/hu.po

po/hy.gmo

po/hy.po

po/id.gmo

po/id.po

po/is.gmo

po/is.po

po/it.gmo

po/it.po

po/ja.gmo

po/ja.po

po/ka.gmo

po/ka.po

po/ko.gmo

po/ko.po

po/ku.gmo

po/ku.po

po/lt.gmo

po/lt.po

po/lv.gmo

po/lv.po

po/mk.gmo

po/mk.po

po/ml.gmo

po/ml.po

po/mn.gmo

po/mn.po

po/ms.gmo

po/ms.po

po/nb.gmo

po/nb.po

po/ne.gmo

po/ne.po

po/nl.gmo

po/nl.po

po/nn.gmo

po/nn.po

po/or.gmo

po/or.po

po/pa.gmo

po/pa.po

po/pl.gmo

po/pl.po

po/po2tbl.sed.in

po/pt.gmo

po/pt.po

po/pt_BR.gmo

po/pt_BR.po

po/ro.gmo

po/ro.po

po/ru.gmo

po/ru.po

po/rw.gmo

po/rw.po

po/sk.gmo

po/sk.po

po/sl.gmo

po/sl.po

po/sq.gmo

po/sq.po

po/sr.gmo

po/sr.po

po/sr@Latn.gmo

po/sr@Latn.po

po/sr@ije.gmo

po/sr@ije.po

po/sv.gmo

po/sv.po

po/ta.gmo

po/ta.po

po/te.gmo

po/te.po

po/th.gmo

po/th.po

po/tl.gmo

po/tl.po

po/tr.gmo

po/tr.po

po/tt.gmo

po/tt.po

po/uk.gmo

po/uk.po

po/vi.gmo

po/vi.po

po/wa.gmo

po/wa.po

po/xh.gmo

po/xh.po

po/yi.gmo

po/yi.po

po/zh_CN.gmo

po/zh_CN.po

po/zh_HK.gmo

po/zh_HK.po

po/zh_TW.gmo

po/zh_TW.po

sanity_check

tests

tests/Makefile.am

tests/Makefile.in

tests/array-test.c

tests/asyncqueue-test.c

tests/atomic-test.c

tests/base64-test.c

tests/bit-test.c

tests/bookmarkfile-test.c

tests/bookmarks

tests/bookmarks/fail-01.xbel

tests/bookmarks/fail-02.xbel

tests/bookmarks/fail-03.xbel

tests/bookmarks/fail-04.xbel

tests/bookmarks/fail-05.xbel

tests/bookmarks/fail-06.xbel

tests/bookmarks/fail-07.xbel

tests/bookmarks/fail-08.xbel

tests/bookmarks/fail-09.xbel

tests/bookmarks/fail-10.xbel

tests/bookmarks/fail-11.xbel

tests/bookmarks/fail-12.xbel

tests/bookmarks/fail-13.xbel

tests/bookmarks/fail-14.xbel

tests/bookmarks/fail-15.xbel

tests/bookmarks/fail-16.xbel

tests/bookmarks/fail-17.xbel

tests/bookmarks/valid-01.xbel

tests/bookmarks/valid-02.xbel

tests/casefold.txt

tests/casemap.txt

tests/child-test.c

tests/collate

tests/collate/collate-1.file

tests/collate/collate-1.in

tests/collate/collate-1.unicode

tests/collate/collate-2.file

tests/collate/collate-2.in

tests/collate/collate-2.unicode

tests/completion-test.c

tests/convert-test.c

tests/cxx-test.C

tests/date-test.c

tests/dirname-test.c

tests/env-test.c

tests/errorcheck-mutex-test.c

tests/file-test.c

tests/gen-casefold-txt.pl

tests/gen-casemap-txt.pl

tests/gio-test.c

tests/gobject

tests/gobject/Makefile.am

tests/gobject/Makefile.in

tests/gobject/accumulator.c

tests/gobject/defaultiface.c

tests/gobject/deftype.c

tests/gobject/gvalue-test.c

tests/gobject/ifacecheck.c

tests/gobject/ifaceinherit.c

tests/gobject/ifaceinit.c

tests/gobject/ifaceproperties.c

tests/gobject/override.c

tests/gobject/paramspec-test.c

tests/gobject/references.c

tests/gobject/singleton.c

tests/gobject/testcommon.h

tests/gobject/testmarshal.c

tests/gobject/testmarshal.h

tests/gobject/testmarshal.list

tests/gobject/testmodule.c

tests/gobject/testmodule.h

tests/hash-test.c

tests/iochannel-test-infile

tests/iochannel-test.c

tests/keyfile-test.c

tests/libmoduletestplugin_a.c

tests/libmoduletestplugin_b.c

tests/list-test.c

tests/mainloop-test.c

tests/makefile.msc

tests/makefile.msc.in

tests/mapping-test.c

tests/markup-escape-test.c

tests/markup-test.c

tests/markups

tests/markups/expected-1

tests/markups/expected-10

tests/markups/expected-11

tests/markups/expected-2

tests/markups/expected-3

tests/markups/expected-4

tests/markups/expected-5

tests/markups/expected-6

tests/markups/expected-7

tests/markups/expected-8

tests/markups/expected-9

tests/markups/fail-1.gmarkup

tests/markups/fail-10.gmarkup

tests/markups/fail-11.gmarkup

tests/markups/fail-12.gmarkup

tests/markups/fail-13.gmarkup

tests/markups/fail-14.gmarkup

tests/markups/fail-15.gmarkup

tests/markups/fail-16.gmarkup

tests/markups/fail-17.gmarkup

tests/markups/fail-18.gmarkup

tests/markups/fail-19.gmarkup

tests/markups/fail-2.gmarkup

tests/markups/fail-20.gmarkup

tests/markups/fail-21.gmarkup

tests/markups/fail-22.gmarkup

tests/markups/fail-23.gmarkup

tests/markups/fail-24.gmarkup

tests/markups/fail-25.gmarkup

tests/markups/fail-26.gmarkup

tests/markups/fail-27.gmarkup

tests/markups/fail-28.gmarkup

tests/markups/fail-29.gmarkup

tests/markups/fail-3.gmarkup

tests/markups/fail-30.gmarkup

tests/markups/fail-31.gmarkup

tests/markups/fail-32.gmarkup

tests/markups/fail-33.gmarkup

tests/markups/fail-34.gmarkup

tests/markups/fail-35.gmarkup

tests/markups/fail-36.gmarkup

tests/markups/fail-37.gmarkup

tests/markups/fail-38.gmarkup

tests/markups/fail-39.gmarkup

tests/markups/fail-4.gmarkup

tests/markups/fail-40.gmarkup

tests/markups/fail-5.gmarkup

tests/markups/fail-6.gmarkup

tests/markups/fail-7.gmarkup

tests/markups/fail-8.gmarkup

tests/markups/fail-9.gmarkup

tests/markups/valid-1.gmarkup

tests/markups/valid-10.gmarkup

tests/markups/valid-11.gmarkup

tests/markups/valid-2.gmarkup

tests/markups/valid-3.gmarkup

tests/markups/valid-4.gmarkup

tests/markups/valid-5.gmarkup

tests/markups/valid-6.gmarkup

tests/markups/valid-7.gmarkup

tests/markups/valid-8.gmarkup

tests/markups/valid-9.gmarkup

tests/memchunks.c

tests/module-test.c

tests/node-test.c

tests/option-test.c

tests/patterntest.c

tests/printf-test.c

tests/qsort-test.c

tests/queue-test.c

tests/rand-test.c

tests/refcount

tests/refcount/Makefile.am

tests/refcount/Makefile.in

tests/refcount/closures.c

tests/refcount/objects.c

tests/refcount/objects2.c

tests/refcount/properties.c

tests/refcount/properties2.c

tests/refcount/signals.c

tests/relation-test.c

tests/run-bookmark-test.sh

tests/run-collate-tests.sh

tests/run-markup-tests.sh

tests/shell-test.c

tests/slice-color.c

tests/slice-test.c

tests/slist-test.c

tests/spawn-test-win32-gui.c

tests/spawn-test.c

tests/strfunc-test.c

tests/string-test.c

tests/strtod-test.c

tests/strtoll-test.c

tests/testgdate.c

tests/testgdateparser.c

tests/testglib.c

tests/thread-test.c

tests/threadpool-test.c

tests/timeloop-basic.c

tests/timeloop-closure.c

tests/timeloop.c

tests/tree-test.c

tests/type-test.c

tests/unicode-caseconv.c

tests/unicode-collate.c

tests/unicode-encoding.c

tests/unicode-normalize.c

tests/uri-test.c

tests/utf8-pointer.c

tests/utf8-validate.c

tests/utf8.txt

files removed:
upstream

upstream/tarballs

upstream/tarballs/glib-2.12.11.tar.gz

Show diffs side-by-side

added added

removed removed

glib/gutf8.c

/* gutf8.c - Operations on UTF-8 strings.

* This library is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2 of the License, or (at your option) any later version.

* This library is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

* You should have received a copy of the GNU Lesser General Public

* License along with this library; if not, write to the

* Free Software Foundation, Inc., 59 Temple Place - Suite 330,

* Boston, MA 02111-1307, USA.

#include "config.h"

#include <stdlib.h>

#ifdef HAVE_CODESET

#include <langinfo.h>

#endif

#include <string.h>

#include "glib.h"

#ifdef G_PLATFORM_WIN32

#include <stdio.h>

#define STRICT

#include <windows.h>

#undef STRICT

#endif

#include "libcharset/libcharset.h"

#include "glibintl.h"

#include "galias.h"

#define UTF8_COMPUTE(Char, Mask, Len) \

if (Char < 128) \

{ \

Len = 1; \

Mask = 0x7f; \

} \

else if ((Char & 0xe0) == 0xc0) \

{ \

Len = 2; \

Mask = 0x1f; \

} \

else if ((Char & 0xf0) == 0xe0) \

{ \

Len = 3; \

Mask = 0x0f; \

} \

else if ((Char & 0xf8) == 0xf0) \

{ \

Len = 4; \

Mask = 0x07; \

} \

else if ((Char & 0xfc) == 0xf8) \

{ \

Len = 5; \

Mask = 0x03; \

} \

else if ((Char & 0xfe) == 0xfc) \

{ \

Len = 6; \

Mask = 0x01; \

} \

else \

Len = -1;

#define UTF8_LENGTH(Char) \

((Char) < 0x80 ? 1 : \

((Char) < 0x800 ? 2 : \

((Char) < 0x10000 ? 3 : \

((Char) < 0x200000 ? 4 : \

((Char) < 0x4000000 ? 5 : 6)))))

#define UTF8_GET(Result, Chars, Count, Mask, Len) \

(Result) = (Chars)[0] & (Mask); \

for ((Count) = 1; (Count) < (Len); ++(Count)) \

{ \

if (((Chars)[(Count)] & 0xc0) != 0x80) \

{ \

(Result) = -1; \

break; \

} \

(Result) <<= 6; \

(Result) |= ((Chars)[(Count)] & 0x3f); \

}

#define UNICODE_VALID(Char) \

100

((Char) < 0x110000 && \

101

(((Char) & 0xFFFFF800) != 0xD800) && \

102

((Char) < 0xFDD0 || (Char) > 0xFDEF) && \

103

((Char) & 0xFFFE) != 0xFFFE)

104

105

106

static const gchar utf8_skip_data[256] = {

107

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

108

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

109

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

110

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

111

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

112

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

113

2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,

114

3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1

115

};

116

117

const gchar * const g_utf8_skip = utf8_skip_data;

118

119

/**

120

* g_utf8_find_prev_char:

121

* @str: pointer to the beginning of a UTF-8 encoded string

122

* @p: pointer to some position within @str

123

124

* Given a position @p with a UTF-8 encoded string @str, find the start

125

* of the previous UTF-8 character starting before @p. Returns %NULL if no

126

* UTF-8 characters are present in @str before @p.

127

128

* @p does not have to be at the beginning of a UTF-8 character. No check

129

* is made to see if the character found is actually valid other than

130

* it starts with an appropriate byte.

131

132

* Return value: a pointer to the found character or %NULL.

133

**/

134

gchar *

135

g_utf8_find_prev_char (const char *str,

136

const char *p)

137

{

138

for (--p; p >= str; --p)

139

{

140

if ((*p & 0xc0) != 0x80)

141

return (gchar *)p;

142

}

143

return NULL;

144

}

145

146

/**

147

* g_utf8_find_next_char:

148

* @p: a pointer to a position within a UTF-8 encoded string

149

* @end: a pointer to the end of the string, or %NULL to indicate

150

* that the string is nul-terminated, in which case

151

* the returned value will be

152

153

* Finds the start of the next UTF-8 character in the string after @p.

154

155

* @p does not have to be at the beginning of a UTF-8 character. No check

156

* is made to see if the character found is actually valid other than

157

* it starts with an appropriate byte.

158

159

* Return value: a pointer to the found character or %NULL

160

**/

161

gchar *

162

g_utf8_find_next_char (const gchar *p,

163

const gchar *end)

164

{

165

if (*p)

166

{

167

if (end)

168

for (++p; p < end && (*p & 0xc0) == 0x80; ++p)

169

;

170

else

171

for (++p; (*p & 0xc0) == 0x80; ++p)

172

;

173

}

174

return (p == end) ? NULL : (gchar *)p;

175

}

176

177

/**

178

* g_utf8_prev_char:

179

* @p: a pointer to a position within a UTF-8 encoded string

180

181

* Finds the previous UTF-8 character in the string before @p.

182

183

* @p does not have to be at the beginning of a UTF-8 character. No check

184

* is made to see if the character found is actually valid other than

185

* it starts with an appropriate byte. If @p might be the first

186

* character of the string, you must use g_utf8_find_prev_char() instead.

187

188

* Return value: a pointer to the found character.

189

**/

190

gchar *

191

g_utf8_prev_char (const gchar *p)

192

{

193

while (TRUE)

194

{

195

p--;

196

if ((*p & 0xc0) != 0x80)

197

return (gchar *)p;

198

}

199

}

200

201

/**

202

* g_utf8_strlen:

203

* @p: pointer to the start of a UTF-8 encoded string.

204

* @max: the maximum number of bytes to examine. If @max

205

* is less than 0, then the string is assumed to be

206

* nul-terminated. If @max is 0, @p will not be examined and

207

* may be %NULL.

208

209

* Returns the length of the string in characters.

210

211

* Return value: the length of the string in characters

212

**/

213

glong

214

g_utf8_strlen (const gchar *p,

215

gssize max)

216

{

217

glong len = 0;

218

const gchar *start = p;

219

g_return_val_if_fail (p != NULL || max == 0, 0);

220

221

if (max < 0)

222

{

223

while (*p)

224

{

225

p = g_utf8_next_char (p);

226

++len;

227

}

228

}

229

else

230

{

231

if (max == 0 || !*p)

232

return 0;

233

234

p = g_utf8_next_char (p);

235

236

while (p - start < max && *p)

237

{

238

++len;

239

p = g_utf8_next_char (p);

240

}

241

242

/* only do the last len increment if we got a complete

243

* char (don't count partial chars)

244

245

if (p - start <= max)

246

++len;

247

}

248

249

return len;

250

}

251

252

/**

253

* g_utf8_get_char:

254

* @p: a pointer to Unicode character encoded as UTF-8

255

256

* Converts a sequence of bytes encoded as UTF-8 to a Unicode character.

257

* If @p does not point to a valid UTF-8 encoded character, results are

258

* undefined. If you are not sure that the bytes are complete

259

* valid Unicode characters, you should use g_utf8_get_char_validated()

260

* instead.

261

262

* Return value: the resulting character

263

**/

264

gunichar

265

g_utf8_get_char (const gchar *p)

266

{

267

int i, mask = 0, len;

268

gunichar result;

269

unsigned char c = (unsigned char) *p;

270

271

UTF8_COMPUTE (c, mask, len);

272

if (len == -1)

273

return (gunichar)-1;

274

UTF8_GET (result, p, i, mask, len);

275

276

return result;

277

}

278

279

/**

280

* g_utf8_offset_to_pointer:

281

* @str: a UTF-8 encoded string

282

* @offset: a character offset within @str

283

284

* Converts from an integer character offset to a pointer to a position

285

* within the string.

286

287

* Since 2.10, this function allows to pass a negative @offset to

288

* step backwards. It is usually worth stepping backwards from the end

289

* instead of forwards if @offset is in the last fourth of the string,

290

* since moving forward is about 3 times faster than moving backward.

291

292

* Return value: the resulting pointer

293

**/

294

gchar *

295

g_utf8_offset_to_pointer (const gchar *str,

296

glong offset)

297

{

298

const gchar *s = str;

299

300

if (offset > 0)

301

while (offset--)

302

s = g_utf8_next_char (s);

303

else

304

{

305

const char *s1;

306

307

/* This nice technique for fast backwards stepping

308

* through a UTF-8 string was dubbed "stutter stepping"

309

* by its inventor, Larry Ewing.

310

311

while (offset)

312

{

313

s1 = s;

314

s += offset;

315

while ((*s & 0xc0) == 0x80)

316

s--;

317

318

offset += g_utf8_pointer_to_offset (s, s1);

319

}

320

}

321

322

return (gchar *)s;

323

}

324

325

/**

326

* g_utf8_pointer_to_offset:

327

* @str: a UTF-8 encoded string

328

* @pos: a pointer to a position within @str

329

330

* Converts from a pointer to position within a string to a integer

331

* character offset.

332

333

* Since 2.10, this function allows @pos to be before @str, and returns

334

* a negative offset in this case.

335

336

* Return value: the resulting character offset

337

**/

338

glong

339

g_utf8_pointer_to_offset (const gchar *str,

340

const gchar *pos)

341

{

342

const gchar *s = str;

343

glong offset = 0;

344

345

if (pos < str)

346

offset = - g_utf8_pointer_to_offset (pos, str);

347

else

348

while (s < pos)

349

{

350

s = g_utf8_next_char (s);

351

offset++;

352

}

353

354

return offset;

355

}

356

357

358

/**

359

* g_utf8_strncpy:

360

* @dest: buffer to fill with characters from @src

361

* @src: UTF-8 encoded string

362

* @n: character count

363

364

* Like the standard C strncpy() function, but

365

* copies a given number of characters instead of a given number of

366

* bytes. The @src string must be valid UTF-8 encoded text.

367

* (Use g_utf8_validate() on all text before trying to use UTF-8

368

* utility functions with it.)

369

370

* Return value: @dest

371

**/

372

gchar *

373

g_utf8_strncpy (gchar *dest,

374

const gchar *src,

375

gsize n)

376

{

377

const gchar *s = src;

378

while (n && *s)

379

{

380

s = g_utf8_next_char(s);

381

n--;

382

}

383

strncpy(dest, src, s - src);

384

dest[s - src] = 0;

385

return dest;

386

}

387

388

G_LOCK_DEFINE_STATIC (aliases);

389

390

static GHashTable *

391

get_alias_hash (void)

392

{

393

static GHashTable *alias_hash = NULL;

394

const char *aliases;

395

396

G_LOCK (aliases);

397

398

if (!alias_hash)

399

{

400

alias_hash = g_hash_table_new (g_str_hash, g_str_equal);

401

402

aliases = _g_locale_get_charset_aliases ();

403

while (*aliases != '\0')

404

{

405

const char *canonical;

406

const char *alias;

407

const char **alias_array;

408

int count = 0;

409

410

alias = aliases;

411

aliases += strlen (aliases) + 1;

412

canonical = aliases;

413

aliases += strlen (aliases) + 1;

414

415

alias_array = g_hash_table_lookup (alias_hash, canonical);

416

if (alias_array)

417

{

418

while (alias_array[count])

419

count++;

420

}

421

422

alias_array = g_renew (const char *, alias_array, count + 2);

423

alias_array[count] = alias;

424

alias_array[count + 1] = NULL;

425

426

g_hash_table_insert (alias_hash, (char *)canonical, alias_array);

427

}

428

}

429

430

G_UNLOCK (aliases);

431

432

return alias_hash;

433

}

434

435

/* As an abuse of the alias table, the following routines gets

436

* the charsets that are aliases for the canonical name.

437

438

G_GNUC_INTERNAL const char **

439

_g_charset_get_aliases (const char *canonical_name)

440

{

441

GHashTable *alias_hash = get_alias_hash ();

442

443

return g_hash_table_lookup (alias_hash, canonical_name);

444

}

445

446

static gboolean

447

g_utf8_get_charset_internal (const char *raw_data,

448

const char **a)

449

{

450

const char *charset = getenv("CHARSET");

451

452

if (charset && *charset)

453

{

454

*a = charset;

455

456

if (charset && strstr (charset, "UTF-8"))

457

return TRUE;

458

else

459

return FALSE;

460

}

461

462

/* The libcharset code tries to be thread-safe without

463

* a lock, but has a memory leak and a missing memory

464

* barrier, so we lock for it

465

466

G_LOCK (aliases);

467

charset = _g_locale_charset_unalias (raw_data);

468

G_UNLOCK (aliases);

469

470

if (charset && *charset)

471

{

472

*a = charset;

473

474

if (charset && strstr (charset, "UTF-8"))

475

return TRUE;

476

else

477

return FALSE;

478

}

479

480

/* Assume this for compatibility at present. */

481

*a = "US-ASCII";

482

483

return FALSE;

484

}

485

486

typedef struct _GCharsetCache GCharsetCache;

487

488

struct _GCharsetCache {

489

gboolean is_utf8;

490

gchar *raw;

491

gchar *charset;

492

};

493

494

static void

495

charset_cache_free (gpointer data)

496

{

497

GCharsetCache *cache = data;

498

g_free (cache->raw);

499

g_free (cache->charset);

500

g_free (cache);

501

}

502

503

/**

504

* g_get_charset:

505

* @charset: return location for character set name

506

507

* Obtains the character set for the current locale; you might use

508

* this character set as an argument to g_convert(), to convert from

509

* the current locale's encoding to some other encoding. (Frequently

510

* g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts,

511

* though.)

512

513

* The return value is %TRUE if the locale's encoding is UTF-8, in that

514

* case you can perhaps avoid calling g_convert().

515

516

* The string returned in @charset is not allocated, and should not be

517

* freed.

518

519

* Return value: %TRUE if the returned charset is UTF-8

520

**/

521

gboolean

522

g_get_charset (G_CONST_RETURN char **charset)

523

{

524

static GStaticPrivate cache_private = G_STATIC_PRIVATE_INIT;

525

GCharsetCache *cache = g_static_private_get (&cache_private);

526

const gchar *raw;

527

528

if (!cache)

529

{

530

cache = g_new0 (GCharsetCache, 1);

531

g_static_private_set (&cache_private, cache, charset_cache_free);

532

}

533

534

raw = _g_locale_charset_raw ();

535

536

if (!(cache->raw && strcmp (cache->raw, raw) == 0))

537

{

538

const gchar *new_charset;

539

540

g_free (cache->raw);

541

g_free (cache->charset);

542

cache->raw = g_strdup (raw);

543

cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);

544

cache->charset = g_strdup (new_charset);

545

}

546

547

if (charset)

548

*charset = cache->charset;

549

550

return cache->is_utf8;

551

}

552

553

/* unicode_strchr */

554

555

/**

556

* g_unichar_to_utf8:

557

* @c: a Unicode character code

558

* @outbuf: output buffer, must have at least 6 bytes of space.

559

* If %NULL, the length will be computed and returned

560

* and nothing will be written to @outbuf.

561

562

* Converts a single character to UTF-8.

563

564

* Return value: number of bytes written

565

**/

566

int

567

g_unichar_to_utf8 (gunichar c,

568

gchar *outbuf)

569

{

570

/* If this gets modified, also update the copy in g_string_insert_unichar() */

571

guint len = 0;

572

int first;

573

int i;

574

575

if (c < 0x80)

576

{

577

first = 0;

578

len = 1;

579

}

580

else if (c < 0x800)

581

{

582

first = 0xc0;

583

len = 2;

584

}

585

else if (c < 0x10000)

586

{

587

first = 0xe0;

588

len = 3;

589

}

590

else if (c < 0x200000)

591

{

592

first = 0xf0;

593

len = 4;

594

}

595

else if (c < 0x4000000)

596

{

597

first = 0xf8;

598

len = 5;

599

}

600

else

601

{

602

first = 0xfc;

603

len = 6;

604

}

605

606

if (outbuf)

607

{

608

for (i = len - 1; i > 0; --i)

609

{

610

outbuf[i] = (c & 0x3f) | 0x80;

611

c >>= 6;

612

}

613

outbuf[0] = c | first;

614

}

615

616

return len;

617

}

618

619

/**

620

* g_utf8_strchr:

621

* @p: a nul-terminated UTF-8 encoded string

622

* @len: the maximum length of @p

623

* @c: a Unicode character

624

625

* Finds the leftmost occurrence of the given Unicode character

626

* in a UTF-8 encoded string, while limiting the search to @len bytes.

627

* If @len is -1, allow unbounded search.

628

629

* Return value: %NULL if the string does not contain the character,

630

* otherwise, a pointer to the start of the leftmost occurrence of

631

* the character in the string.

632

**/

633

gchar *

634

g_utf8_strchr (const char *p,

635

gssize len,

636

gunichar c)

637

{

638

gchar ch[10];

639

640

gint charlen = g_unichar_to_utf8 (c, ch);

641

ch[charlen] = '\0';

642

643

return g_strstr_len (p, len, ch);

644

}

645

646

647

/**

648

* g_utf8_strrchr:

649

* @p: a nul-terminated UTF-8 encoded string

650

* @len: the maximum length of @p

651

* @c: a Unicode character

652

653

* Find the rightmost occurrence of the given Unicode character

654

* in a UTF-8 encoded string, while limiting the search to @len bytes.

655

* If @len is -1, allow unbounded search.

656

657

* Return value: %NULL if the string does not contain the character,

658

* otherwise, a pointer to the start of the rightmost occurrence of the

659

* character in the string.

660

**/

661

gchar *

662

g_utf8_strrchr (const char *p,

663

gssize len,

664

gunichar c)

665

{

666

gchar ch[10];

667

668

gint charlen = g_unichar_to_utf8 (c, ch);

669

ch[charlen] = '\0';

670

671

return g_strrstr_len (p, len, ch);

672

}

673

674

675

/* Like g_utf8_get_char, but take a maximum length

676

* and return (gunichar)-2 on incomplete trailing character

677

678

static inline gunichar

679

g_utf8_get_char_extended (const gchar *p,

680

gssize max_len)

681

{

682

guint i, len;

683

gunichar wc = (guchar) *p;

684

685

if (wc < 0x80)

686

{

687

return wc;

688

}

689

else if (wc < 0xc0)

690

{

691

return (gunichar)-1;

692

}

693

else if (wc < 0xe0)

694

{

695

len = 2;

696

wc &= 0x1f;

697

}

698

else if (wc < 0xf0)

699

{

700

len = 3;

701

wc &= 0x0f;

702

}

703

else if (wc < 0xf8)

704

{

705

len = 4;

706

wc &= 0x07;

707

}

708

else if (wc < 0xfc)

709

{

710

len = 5;

711

wc &= 0x03;

712

}

713

else if (wc < 0xfe)

714

{

715

len = 6;

716

wc &= 0x01;

717

}

718

else

719

{

720

return (gunichar)-1;

721

}

722

723

if (max_len >= 0 && len > max_len)

724

{

725

for (i = 1; i < max_len; i++)

726

{

727

if ((((guchar *)p)[i] & 0xc0) != 0x80)

728

return (gunichar)-1;

729

}

730

return (gunichar)-2;

731

}

732

733

for (i = 1; i < len; ++i)

734

{

735

gunichar ch = ((guchar *)p)[i];

736

737

if ((ch & 0xc0) != 0x80)

738

{

739

if (ch)

740

return (gunichar)-1;

741

else

742

return (gunichar)-2;

743

}

744

745

wc <<= 6;

746

wc |= (ch & 0x3f);

747

}

748

749

if (UTF8_LENGTH(wc) != len)

750

return (gunichar)-1;

751

752

return wc;

753

}

754

755

/**

756

* g_utf8_get_char_validated:

757

* @p: a pointer to Unicode character encoded as UTF-8

758

* @max_len: the maximum number of bytes to read, or -1, for no maximum.

759

760

* Convert a sequence of bytes encoded as UTF-8 to a Unicode character.

761

* This function checks for incomplete characters, for invalid characters

762

* such as characters that are out of the range of Unicode, and for

763

* overlong encodings of valid characters.

764

765

* Return value: the resulting character. If @p points to a partial

766

* sequence at the end of a string that could begin a valid

767

* character (or if @max_len is zero), returns (gunichar)-2;

768

* otherwise, if @p does not point to a valid UTF-8 encoded

769

* Unicode character, returns (gunichar)-1.

770

**/

771

gunichar

772

g_utf8_get_char_validated (const gchar *p,

773

gssize max_len)

774

{

775

gunichar result;

776

777

if (max_len == 0)

778

return (gunichar)-2;

779

780

result = g_utf8_get_char_extended (p, max_len);

781

782

if (result & 0x80000000)

783

return result;

784

else if (!UNICODE_VALID (result))

785

return (gunichar)-1;

786

else

787

return result;

788

}

789

790

/**

791

* g_utf8_to_ucs4_fast:

792

* @str: a UTF-8 encoded string

793

* @len: the maximum length of @str to use. If @len < 0, then

794

* the string is nul-terminated.

795

* @items_written: location to store the number of characters in the

796

* result, or %NULL.

797

798

* Convert a string from UTF-8 to a 32-bit fixed width

799

* representation as UCS-4, assuming valid UTF-8 input.

800

* This function is roughly twice as fast as g_utf8_to_ucs4()

801

* but does no error checking on the input.

802

803

* Return value: a pointer to a newly allocated UCS-4 string.

804

* This value must be freed with g_free().

805

**/

806

gunichar *

807

g_utf8_to_ucs4_fast (const gchar *str,

808

glong len,

809

glong *items_written)

810

{

811

gint j, charlen;

812

gunichar *result;

813

gint n_chars, i;

814

const gchar *p;

815

816

g_return_val_if_fail (str != NULL, NULL);

817

818

p = str;

819

n_chars = 0;

820

if (len < 0)

821

{

822

while (*p)

823

{

824

p = g_utf8_next_char (p);

825

++n_chars;

826

}

827

}

828

else

829

{

830

while (p < str + len && *p)

831

{

832

p = g_utf8_next_char (p);

833

++n_chars;

834

}

835

}

836

837

result = g_new (gunichar, n_chars + 1);

838

839

p = str;

840

for (i=0; i < n_chars; i++)

841

{

842

gunichar wc = ((unsigned char *)p)[0];

843

844

if (wc < 0x80)

845

{

846

result[i] = wc;

847

p++;

848

}

849

else

850

{

851

if (wc < 0xe0)

852

{

853

charlen = 2;

854

wc &= 0x1f;

855

}

856

else if (wc < 0xf0)

857

{

858

charlen = 3;

859

wc &= 0x0f;

860

}

861

else if (wc < 0xf8)

862

{

863

charlen = 4;

864

wc &= 0x07;

865

}

866

else if (wc < 0xfc)

867

{

868

charlen = 5;

869

wc &= 0x03;

870

}

871

else

872

{

873

charlen = 6;

874

wc &= 0x01;

875

}

876

877

for (j = 1; j < charlen; j++)

878

{

879

wc <<= 6;

880

wc |= ((unsigned char *)p)[j] & 0x3f;

881

}

882

883

result[i] = wc;

884

p += charlen;

885

}

886

}

887

result[i] = 0;

888

889

if (items_written)

890

*items_written = i;

891

892

return result;

893

}

894

895

/**

896

* g_utf8_to_ucs4:

897

* @str: a UTF-8 encoded string

898

* @len: the maximum length of @str to use. If @len < 0, then

899

* the string is nul-terminated.

900

* @items_read: location to store number of bytes read, or %NULL.

901

* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be

902

* returned in case @str contains a trailing partial

903

* character. If an error occurs then the index of the

904

* invalid input is stored here.

905

* @items_written: location to store number of characters written or %NULL.

906

* The value here stored does not include the trailing 0

907

* character.

908

* @error: location to store the error occuring, or %NULL to ignore

909

* errors. Any of the errors in #GConvertError other than

910

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

911

912

* Convert a string from UTF-8 to a 32-bit fixed width

913

* representation as UCS-4. A trailing 0 will be added to the

914

* string after the converted text.

915

916

* Return value: a pointer to a newly allocated UCS-4 string.

917

* This value must be freed with g_free(). If an

918

* error occurs, %NULL will be returned and

919

* @error set.

920

**/

921

gunichar *

922

g_utf8_to_ucs4 (const gchar *str,

923

glong len,

924

glong *items_read,

925

glong *items_written,

926

GError **error)

927

{

928

gunichar *result = NULL;

929

gint n_chars, i;

930

const gchar *in;

931

932

in = str;

933

n_chars = 0;

934

while ((len < 0 || str + len - in > 0) && *in)

935

{

936

gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in);

937

if (wc & 0x80000000)

938

{

939

if (wc == (gunichar)-2)

940

{

941

if (items_read)

942

break;

943

else

944

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,

945

_("Partial character sequence at end of input"));

946

}

947

else

948

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

949

_("Invalid byte sequence in conversion input"));

950

951

goto err_out;

952

}

953

954

n_chars++;

955

956

in = g_utf8_next_char (in);

957

}

958

959

result = g_new (gunichar, n_chars + 1);

960

961

in = str;

962

for (i=0; i < n_chars; i++)

963

{

964

result[i] = g_utf8_get_char (in);

965

in = g_utf8_next_char (in);

966

}

967

result[i] = 0;

968

969

if (items_written)

970

*items_written = n_chars;

971

972

err_out:

973

if (items_read)

974

*items_read = in - str;

975

976

return result;

977

}

978

979

/**

980

* g_ucs4_to_utf8:

981

* @str: a UCS-4 encoded string

982

* @len: the maximum length (number of characters) of @str to use.

983

* If @len < 0, then the string is terminated with a 0 character.

984

* @items_read: location to store number of characters read, or %NULL.

985

* @items_written: location to store number of bytes written or %NULL.

986

* The value here stored does not include the trailing 0

987

* byte.

988

* @error: location to store the error occuring, or %NULL to ignore

989

* errors. Any of the errors in #GConvertError other than

990

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

991

992

* Convert a string from a 32-bit fixed width representation as UCS-4.

993

* to UTF-8. The result will be terminated with a 0 byte.

994

995

* Return value: a pointer to a newly allocated UTF-8 string.

996

* This value must be freed with g_free(). If an

997

* error occurs, %NULL will be returned and

998

* @error set. In that case, @items_read will be

999

* set to the position of the first invalid input

1000

* character.

1001

**/

1002

gchar *

1003

g_ucs4_to_utf8 (const gunichar *str,

1004

glong len,

1005

glong *items_read,

1006

glong *items_written,

1007

GError **error)

1008

{

1009

gint result_length;

1010

gchar *result = NULL;

1011

gchar *p;

1012

gint i;

1013

1014

result_length = 0;

1015

for (i = 0; len < 0 || i < len ; i++)

1016

{

1017

if (!str[i])

1018

break;

1019

1020

if (str[i] >= 0x80000000)

1021

{

1022

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1023

_("Character out of range for UTF-8"));

1024

goto err_out;

1025

}

1026

1027

result_length += UTF8_LENGTH (str[i]);

1028

}

1029

1030

result = g_malloc (result_length + 1);

1031

p = result;

1032

1033

i = 0;

1034

while (p < result + result_length)

1035

p += g_unichar_to_utf8 (str[i++], p);

1036

1037

*p = '\0';

1038

1039

if (items_written)

1040

*items_written = p - result;

1041

1042

err_out:

1043

if (items_read)

1044

*items_read = i;

1045

1046

return result;

1047

}

1048

1049

#define SURROGATE_VALUE(h,l) (((h) - 0xd800) * 0x400 + (l) - 0xdc00 + 0x10000)

1050

1051

/**

1052

* g_utf16_to_utf8:

1053

* @str: a UTF-16 encoded string

1054

* @len: the maximum length (number of <type>gunichar2</type>) of @str to use.

1055

* If @len < 0, then the string is terminated with a 0 character.

1056

* @items_read: location to store number of words read, or %NULL.

1057

* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be

1058

* returned in case @str contains a trailing partial

1059

* character. If an error occurs then the index of the

1060

* invalid input is stored here.

1061

* @items_written: location to store number of bytes written, or %NULL.

1062

* The value stored here does not include the trailing

1063

* 0 byte.

1064

* @error: location to store the error occuring, or %NULL to ignore

1065

* errors. Any of the errors in #GConvertError other than

1066

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

1067

1068

* Convert a string from UTF-16 to UTF-8. The result will be

1069

* terminated with a 0 byte.

1070

1071

* Note that the input is expected to be already in native endianness,

1072

* an initial byte-order-mark character is not handled specially.

1073

* g_convert() can be used to convert a byte buffer of UTF-16 data of

1074

* ambiguous endianess.

1075

1076

* Return value: a pointer to a newly allocated UTF-8 string.

1077

* This value must be freed with g_free(). If an

1078

* error occurs, %NULL will be returned and

1079

* @error set.

1080

**/

1081

gchar *

1082

g_utf16_to_utf8 (const gunichar2 *str,

1083

glong len,

1084

glong *items_read,

1085

glong *items_written,

1086

GError **error)

1087

{

1088

/* This function and g_utf16_to_ucs4 are almost exactly identical - The lines that differ

1089

* are marked.

1090

1091

const gunichar2 *in;

1092

gchar *out;

1093

gchar *result = NULL;

1094

gint n_bytes;

1095

gunichar high_surrogate;

1096

1097

g_return_val_if_fail (str != 0, NULL);

1098

1099

n_bytes = 0;

1100

in = str;

1101

high_surrogate = 0;

1102

while ((len < 0 || in - str < len) && *in)

1103

{

1104

gunichar2 c = *in;

1105

gunichar wc;

1106

1107

if (c >= 0xdc00 && c < 0xe000) /* low surrogate */

1108

{

1109

if (high_surrogate)

1110

{

1111

wc = SURROGATE_VALUE (high_surrogate, c);

1112

high_surrogate = 0;

1113

}

1114

else

1115

{

1116

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1117

_("Invalid sequence in conversion input"));

1118

goto err_out;

1119

}

1120

}

1121

else

1122

{

1123

if (high_surrogate)

1124

{

1125

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1126

_("Invalid sequence in conversion input"));

1127

goto err_out;

1128

}

1129

1130

if (c >= 0xd800 && c < 0xdc00) /* high surrogate */

1131

{

1132

high_surrogate = c;

1133

goto next1;

1134

}

1135

else

1136

wc = c;

1137

}

1138

1139

/********** DIFFERENT for UTF8/UCS4 **********/

1140

n_bytes += UTF8_LENGTH (wc);

1141

1142

next1:

1143

in++;

1144

}

1145

1146

if (high_surrogate && !items_read)

1147

{

1148

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,

1149

_("Partial character sequence at end of input"));

1150

goto err_out;

1151

}

1152

1153

/* At this point, everything is valid, and we just need to convert

1154

1155

/********** DIFFERENT for UTF8/UCS4 **********/

1156

result = g_malloc (n_bytes + 1);

1157

1158

high_surrogate = 0;

1159

out = result;

1160

in = str;

1161

while (out < result + n_bytes)

1162

{

1163

gunichar2 c = *in;

1164

gunichar wc;

1165

1166

if (c >= 0xdc00 && c < 0xe000) /* low surrogate */

1167

{

1168

wc = SURROGATE_VALUE (high_surrogate, c);

1169

high_surrogate = 0;

1170

}

1171

else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */

1172

{

1173

high_surrogate = c;

1174

goto next2;

1175

}

1176

else

1177

wc = c;

1178

1179

/********** DIFFERENT for UTF8/UCS4 **********/

1180

out += g_unichar_to_utf8 (wc, out);

1181

1182

next2:

1183

in++;

1184

}

1185

1186

/********** DIFFERENT for UTF8/UCS4 **********/

1187

*out = '\0';

1188

1189

if (items_written)

1190

/********** DIFFERENT for UTF8/UCS4 **********/

1191

*items_written = out - result;

1192

1193

err_out:

1194

if (items_read)

1195

*items_read = in - str;

1196

1197

return result;

1198

}

1199

1200

/**

1201

* g_utf16_to_ucs4:

1202

* @str: a UTF-16 encoded string

1203

* @len: the maximum length (number of <type>gunichar2</type>) of @str to use.

1204

* If @len < 0, then the string is terminated with a 0 character.

1205

* @items_read: location to store number of words read, or %NULL.

1206

* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be

1207

* returned in case @str contains a trailing partial

1208

* character. If an error occurs then the index of the

1209

* invalid input is stored here.

1210

* @items_written: location to store number of characters written, or %NULL.

1211

* The value stored here does not include the trailing

1212

* 0 character.

1213

* @error: location to store the error occuring, or %NULL to ignore

1214

* errors. Any of the errors in #GConvertError other than

1215

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

1216

1217

* Convert a string from UTF-16 to UCS-4. The result will be

1218

* terminated with a 0 character.

1219

1220

* Return value: a pointer to a newly allocated UCS-4 string.

1221

* This value must be freed with g_free(). If an

1222

* error occurs, %NULL will be returned and

1223

* @error set.

1224

**/

1225

gunichar *

1226

g_utf16_to_ucs4 (const gunichar2 *str,

1227

glong len,

1228

glong *items_read,

1229

glong *items_written,

1230

GError **error)

1231

{

1232

const gunichar2 *in;

1233

gchar *out;

1234

gchar *result = NULL;

1235

gint n_bytes;

1236

gunichar high_surrogate;

1237

1238

g_return_val_if_fail (str != 0, NULL);

1239

1240

n_bytes = 0;

1241

in = str;

1242

high_surrogate = 0;

1243

while ((len < 0 || in - str < len) && *in)

1244

{

1245

gunichar2 c = *in;

1246

gunichar wc;

1247

1248

if (c >= 0xdc00 && c < 0xe000) /* low surrogate */

1249

{

1250

if (high_surrogate)

1251

{

1252

wc = SURROGATE_VALUE (high_surrogate, c);

1253

high_surrogate = 0;

1254

}

1255

else

1256

{

1257

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1258

_("Invalid sequence in conversion input"));

1259

goto err_out;

1260

}

1261

}

1262

else

1263

{

1264

if (high_surrogate)

1265

{

1266

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1267

_("Invalid sequence in conversion input"));

1268

goto err_out;

1269

}

1270

1271

if (c >= 0xd800 && c < 0xdc00) /* high surrogate */

1272

{

1273

high_surrogate = c;

1274

goto next1;

1275

}

1276

else

1277

wc = c;

1278

}

1279

1280

/********** DIFFERENT for UTF8/UCS4 **********/

1281

n_bytes += sizeof (gunichar);

1282

1283

next1:

1284

in++;

1285

}

1286

1287

if (high_surrogate && !items_read)

1288

{

1289

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,

1290

_("Partial character sequence at end of input"));

1291

goto err_out;

1292

}

1293

1294

/* At this point, everything is valid, and we just need to convert

1295

1296

/********** DIFFERENT for UTF8/UCS4 **********/

1297

result = g_malloc (n_bytes + 4);

1298

1299

high_surrogate = 0;

1300

out = result;

1301

in = str;

1302

while (out < result + n_bytes)

1303

{

1304

gunichar2 c = *in;

1305

gunichar wc;

1306

1307

if (c >= 0xdc00 && c < 0xe000) /* low surrogate */

1308

{

1309

wc = SURROGATE_VALUE (high_surrogate, c);

1310

high_surrogate = 0;

1311

}

1312

else if (c >= 0xd800 && c < 0xdc00) /* high surrogate */

1313

{

1314

high_surrogate = c;

1315

goto next2;

1316

}

1317

else

1318

wc = c;

1319

1320

/********** DIFFERENT for UTF8/UCS4 **********/

1321

*(gunichar *)out = wc;

1322

out += sizeof (gunichar);

1323

1324

next2:

1325

in++;

1326

}

1327

1328

/********** DIFFERENT for UTF8/UCS4 **********/

1329

*(gunichar *)out = 0;

1330

1331

if (items_written)

1332

/********** DIFFERENT for UTF8/UCS4 **********/

1333

*items_written = (out - result) / sizeof (gunichar);

1334

1335

err_out:

1336

if (items_read)

1337

*items_read = in - str;

1338

1339

return (gunichar *)result;

1340

}

1341

1342

/**

1343

* g_utf8_to_utf16:

1344

* @str: a UTF-8 encoded string

1345

* @len: the maximum length (number of characters) of @str to use.

1346

* If @len < 0, then the string is nul-terminated.

1347

* @items_read: location to store number of bytes read, or %NULL.

1348

* If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be

1349

* returned in case @str contains a trailing partial

1350

* character. If an error occurs then the index of the

1351

* invalid input is stored here.

1352

* @items_written: location to store number of <type>gunichar2</type> written,

1353

* or %NULL.

1354

* The value stored here does not include the trailing 0.

1355

* @error: location to store the error occuring, or %NULL to ignore

1356

* errors. Any of the errors in #GConvertError other than

1357

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

1358

1359

* Convert a string from UTF-8 to UTF-16. A 0 character will be

1360

* added to the result after the converted text.

1361

1362

* Return value: a pointer to a newly allocated UTF-16 string.

1363

* This value must be freed with g_free(). If an

1364

* error occurs, %NULL will be returned and

1365

* @error set.

1366

**/

1367

gunichar2 *

1368

g_utf8_to_utf16 (const gchar *str,

1369

glong len,

1370

glong *items_read,

1371

glong *items_written,

1372

GError **error)

1373

{

1374

gunichar2 *result = NULL;

1375

gint n16;

1376

const gchar *in;

1377

gint i;

1378

1379

g_return_val_if_fail (str != NULL, NULL);

1380

1381

in = str;

1382

n16 = 0;

1383

while ((len < 0 || str + len - in > 0) && *in)

1384

{

1385

gunichar wc = g_utf8_get_char_extended (in, len < 0 ? 6 : str + len - in);

1386

if (wc & 0x80000000)

1387

{

1388

if (wc == (gunichar)-2)

1389

{

1390

if (items_read)

1391

break;

1392

else

1393

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,

1394

_("Partial character sequence at end of input"));

1395

}

1396

else

1397

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1398

_("Invalid byte sequence in conversion input"));

1399

1400

goto err_out;

1401

}

1402

1403

if (wc < 0xd800)

1404

n16 += 1;

1405

else if (wc < 0xe000)

1406

{

1407

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1408

_("Invalid sequence in conversion input"));

1409

1410

goto err_out;

1411

}

1412

else if (wc < 0x10000)

1413

n16 += 1;

1414

else if (wc < 0x110000)

1415

n16 += 2;

1416

else

1417

{

1418

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1419

_("Character out of range for UTF-16"));

1420

1421

goto err_out;

1422

}

1423

1424

in = g_utf8_next_char (in);

1425

}

1426

1427

result = g_new (gunichar2, n16 + 1);

1428

1429

in = str;

1430

for (i = 0; i < n16;)

1431

{

1432

gunichar wc = g_utf8_get_char (in);

1433

1434

if (wc < 0x10000)

1435

{

1436

result[i++] = wc;

1437

}

1438

else

1439

{

1440

result[i++] = (wc - 0x10000) / 0x400 + 0xd800;

1441

result[i++] = (wc - 0x10000) % 0x400 + 0xdc00;

1442

}

1443

1444

in = g_utf8_next_char (in);

1445

}

1446

1447

result[i] = 0;

1448

1449

if (items_written)

1450

*items_written = n16;

1451

1452

err_out:

1453

if (items_read)

1454

*items_read = in - str;

1455

1456

return result;

1457

}

1458

1459

/**

1460

* g_ucs4_to_utf16:

1461

* @str: a UCS-4 encoded string

1462

* @len: the maximum length (number of characters) of @str to use.

1463

* If @len < 0, then the string is terminated with a 0 character.

1464

* @items_read: location to store number of bytes read, or %NULL.

1465

* If an error occurs then the index of the invalid input

1466

* is stored here.

1467

* @items_written: location to store number of <type>gunichar2</type>

1468

* written, or %NULL. The value stored here does not

1469

* include the trailing 0.

1470

* @error: location to store the error occuring, or %NULL to ignore

1471

* errors. Any of the errors in #GConvertError other than

1472

* %G_CONVERT_ERROR_NO_CONVERSION may occur.

1473

1474

* Convert a string from UCS-4 to UTF-16. A 0 character will be

1475

* added to the result after the converted text.

1476

1477

* Return value: a pointer to a newly allocated UTF-16 string.

1478

* This value must be freed with g_free(). If an

1479

* error occurs, %NULL will be returned and

1480

* @error set.

1481

**/

1482

gunichar2 *

1483

g_ucs4_to_utf16 (const gunichar *str,

1484

glong len,

1485

glong *items_read,

1486

glong *items_written,

1487

GError **error)

1488

{

1489

gunichar2 *result = NULL;

1490

gint n16;

1491

gint i, j;

1492

1493

n16 = 0;

1494

i = 0;

1495

while ((len < 0 || i < len) && str[i])

1496

{

1497

gunichar wc = str[i];

1498

1499

if (wc < 0xd800)

1500

n16 += 1;

1501

else if (wc < 0xe000)

1502

{

1503

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1504

_("Invalid sequence in conversion input"));

1505

1506

goto err_out;

1507

}

1508

else if (wc < 0x10000)

1509

n16 += 1;

1510

else if (wc < 0x110000)

1511

n16 += 2;

1512

else

1513

{

1514

g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,

1515

_("Character out of range for UTF-16"));

1516

1517

goto err_out;

1518

}

1519

1520

i++;

1521

}

1522

1523

result = g_new (gunichar2, n16 + 1);

1524

1525

for (i = 0, j = 0; j < n16; i++)

1526

{

1527

gunichar wc = str[i];

1528

1529

if (wc < 0x10000)

1530

{

1531

result[j++] = wc;

1532

}

1533

else

1534

{

1535

result[j++] = (wc - 0x10000) / 0x400 + 0xd800;

1536

result[j++] = (wc - 0x10000) % 0x400 + 0xdc00;

1537

}

1538

}

1539

result[j] = 0;

1540

1541

if (items_written)

1542

*items_written = n16;

1543

1544

err_out:

1545

if (items_read)

1546

*items_read = i;

1547

1548

return result;

1549

}

1550

1551

#define CONTINUATION_CHAR \

1552

G_STMT_START { \

1553

if ((*(guchar *)p & 0xc0) != 0x80) /* 10xxxxxx */ \

1554

goto error; \

1555

val <<= 6; \

1556

val |= (*(guchar *)p) & 0x3f; \

1557

} G_STMT_END

1558

1559

static const gchar *

1560

fast_validate (const char *str)

1561

1562

{

1563

gunichar val = 0;

1564

gunichar min = 0;

1565

const gchar *p;

1566

1567

for (p = str; *p; p++)

1568

{

1569

if (*(guchar *)p < 128)

1570

/* done */;

1571

else

1572

{

1573

const gchar *last;

1574

1575

last = p;

1576

if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */

1577

{

1578

if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))

1579

goto error;

1580

p++;

1581

if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */

1582

goto error;

1583

}

1584

else

1585

{

1586

if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */

1587

{

1588

min = (1 << 11);

1589

val = *(guchar *)p & 0x0f;

1590

goto TWO_REMAINING;

1591

}

1592

else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */

1593

{

1594

min = (1 << 16);

1595

val = *(guchar *)p & 0x07;

1596

}

1597

else

1598

goto error;

1599

1600

p++;

1601

CONTINUATION_CHAR;

1602

TWO_REMAINING:

1603

p++;

1604

CONTINUATION_CHAR;

1605

p++;

1606

CONTINUATION_CHAR;

1607

1608

if (G_UNLIKELY (val < min))

1609

goto error;

1610

1611

if (G_UNLIKELY (!UNICODE_VALID(val)))

1612

goto error;

1613

}

1614

1615

continue;

1616

1617

error:

1618

return last;

1619

}

1620

}

1621

1622

return p;

1623

}

1624

1625

static const gchar *

1626

fast_validate_len (const char *str,

1627

gssize max_len)

1628

1629

{

1630

gunichar val = 0;

1631

gunichar min = 0;

1632

const gchar *p;

1633

1634

for (p = str; (max_len < 0 || (p - str) < max_len) && *p; p++)

1635

{

1636

if (*(guchar *)p < 128)

1637

/* done */;

1638

else

1639

{

1640

const gchar *last;

1641

1642

last = p;

1643

if ((*(guchar *)p & 0xe0) == 0xc0) /* 110xxxxx */

1644

{

1645

if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 2))

1646

goto error;

1647

1648

if (G_UNLIKELY ((*(guchar *)p & 0x1e) == 0))

1649

goto error;

1650

p++;

1651

if (G_UNLIKELY ((*(guchar *)p & 0xc0) != 0x80)) /* 10xxxxxx */

1652

goto error;

1653

}

1654

else

1655

{

1656

if ((*(guchar *)p & 0xf0) == 0xe0) /* 1110xxxx */

1657

{

1658

if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 3))

1659

goto error;

1660

1661

min = (1 << 11);

1662

val = *(guchar *)p & 0x0f;

1663

goto TWO_REMAINING;

1664

}

1665

else if ((*(guchar *)p & 0xf8) == 0xf0) /* 11110xxx */

1666

{

1667

if (G_UNLIKELY (max_len >= 0 && max_len - (p - str) < 4))

1668

goto error;

1669

1670

min = (1 << 16);

1671

val = *(guchar *)p & 0x07;

1672

}

1673

else

1674

goto error;

1675

1676

p++;

1677

CONTINUATION_CHAR;

1678

TWO_REMAINING:

1679

p++;

1680

CONTINUATION_CHAR;

1681

p++;

1682

CONTINUATION_CHAR;

1683

1684

if (G_UNLIKELY (val < min))

1685

goto error;

1686

if (G_UNLIKELY (!UNICODE_VALID(val)))

1687

goto error;

1688

}

1689

1690

continue;

1691

1692

error:

1693

return last;

1694

}

1695

}

1696

1697

return p;

1698

}

1699

1700

/**

1701

* g_utf8_validate:

1702

* @str: a pointer to character data

1703

* @max_len: max bytes to validate, or -1 to go until NUL

1704

* @end: return location for end of valid data

1705

1706

* Validates UTF-8 encoded text. @str is the text to validate;

1707

* if @str is nul-terminated, then @max_len can be -1, otherwise

1708

* @max_len should be the number of bytes to validate.

1709

* If @end is non-%NULL, then the end of the valid range

1710

* will be stored there (i.e. the start of the first invalid

1711

* character if some bytes were invalid, or the end of the text

1712

* being validated otherwise).

1713

1714

* Note that g_utf8_validate() returns %FALSE if @max_len is

1715

* positive and NUL is met before @max_len bytes have been read.

1716

1717

* Returns %TRUE if all of @str was valid. Many GLib and GTK+

1718

* routines <emphasis>require</emphasis> valid UTF-8 as input;

1719

* so data read from a file or the network should be checked

1720

* with g_utf8_validate() before doing anything else with it.

1721

1722

* Return value: %TRUE if the text was valid UTF-8

1723

**/

1724

gboolean

1725

g_utf8_validate (const char *str,

1726

gssize max_len,

1727

const gchar **end)

1728

1729

{

1730

const gchar *p;

1731

1732

if (max_len < 0)

1733

p = fast_validate (str);

1734

else

1735

p = fast_validate_len (str, max_len);

1736

1737

if (end)

1738

*end = p;

1739

1740

if ((max_len >= 0 && p != str + max_len) ||

1741

(max_len < 0 && *p != '\0'))

1742

return FALSE;

1743

else

1744

return TRUE;

1745

}

1746

1747

1748

/**

1749

* g_unichar_validate:

1750

* @ch: a Unicode character

1751

1752

* Checks whether @ch is a valid Unicode character. Some possible

1753

* integer values of @ch will not be valid. 0 is considered a valid

1754

* character, though it's normally a string terminator.

1755

1756

* Return value: %TRUE if @ch is a valid Unicode character

1757

**/

1758

gboolean

1759

g_unichar_validate (gunichar ch)

1760

{

1761

return UNICODE_VALID (ch);

1762

}

1763

1764

/**

1765

* g_utf8_strreverse:

1766

* @str: a UTF-8 encoded string

1767

* @len: the maximum length of @str to use. If @len < 0, then

1768

* the string is nul-terminated.

1769

1770

* Reverses a UTF-8 string. @str must be valid UTF-8 encoded text.

1771

* (Use g_utf8_validate() on all text before trying to use UTF-8

1772

* utility functions with it.)

1773

1774

* Note that unlike g_strreverse(), this function returns

1775

* newly-allocated memory, which should be freed with g_free() when

1776

* no longer needed.

1777

1778

* Returns: a newly-allocated string which is the reverse of @str.

1779

1780

* Since: 2.2

1781

1782

gchar *

1783

g_utf8_strreverse (const gchar *str,

1784

gssize len)

1785

{

1786

gchar *result;

1787

const gchar *p;

1788

gchar *m, *r, skip;

1789

1790

if (len < 0)

1791

len = strlen (str);

1792

1793

result = g_new (gchar, len + 1);

1794

r = result + len;

1795

p = str;

1796

while (*p)

1797

{

1798

skip = g_utf8_skip[*(guchar*)p];

1799

r -= skip;

1800

for (m = r; skip; skip--)

1801

*m++ = *p++;

1802

}

1803

result[len] = 0;

1804

1805

return result;

1806

}

1807

1808

1809

gchar *

1810

_g_utf8_make_valid (const gchar *name)

1811

{

1812

GString *string;

1813

const gchar *remainder, *invalid;

1814

gint remaining_bytes, valid_bytes;

1815

1816

string = NULL;

1817

remainder = name;

1818

remaining_bytes = strlen (name);

1819

1820

while (remaining_bytes != 0)

1821

{

1822

if (g_utf8_validate (remainder, remaining_bytes, &invalid))

1823

break;

1824

valid_bytes = invalid - remainder;

1825

1826

if (string == NULL)

1827

string = g_string_sized_new (remaining_bytes);

1828

1829

g_string_append_len (string, remainder, valid_bytes);

1830

/* append U+FFFD REPLACEMENT CHARACTER */

1831

g_string_append (string, "\357\277\275");

1832

1833

remaining_bytes -= valid_bytes + 1;

1834

remainder = invalid + 1;

1835

}

1836

1837

if (string == NULL)

1838

return g_strdup (name);

1839

1840

g_string_append (string, remainder);

1841

1842

g_assert (g_utf8_validate (string->str, -1, NULL));

1843

1844

return g_string_free (string, FALSE);

1845

}

1846

1847

1848

#define __G_UTF8_C__

1849

#include "galiasdef.c"

Older »