~ubuntu-branches/ubuntu/trusty/glib2.0/trusty-proposed

« back to all changes in this revision

Viewing changes to docs/reference/glib/xml/unicode.xml

  • Committer: Bazaar Package Importer
  • Author(s): Sebastien Bacher
  • Date: 2010-03-09 11:28:22 UTC
  • mfrom: (3.4.8 experimental)
  • Revision ID: james.westby@ubuntu.com-20100309112822-j4n0v3xbtsup8s97
Tags: 2.23.5-1ubuntu1
* Resync on Debian
* debian/patches/01_gettext-desktopfiles.patch:
  - updated to use gettext for X-GNOME-Fullname too
* debian/patches/71_gio_launch_handler.patch:
  - new gio default launch handle feature required for wncksync
* debian/control.in, 
  debian/patches/80-gtester-subunit.patch:
  - gtester-report subunit support
* debian/libglib2.0-0.symbols:
  - updated the symbols list for the gio launcher handler
* debian/rules:
  - don't break build on test suite errors, debian recently activated this but
    the build breaks even when there is no error in the testsuite

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?xml version="1.0"?>
 
2
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" 
 
3
               "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
 
4
<!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
 
5
<!ENTITY version SYSTEM "version.xml">
 
6
]>
 
7
<refentry id="glib-Unicode-Manipulation">
 
8
<refmeta>
 
9
<refentrytitle role="top_of_page" id="glib-Unicode-Manipulation.top_of_page">Unicode Manipulation</refentrytitle>
 
10
<manvolnum>3</manvolnum>
 
11
<refmiscinfo>GLIB Library</refmiscinfo>
 
12
</refmeta>
 
13
 
 
14
<refnamediv>
 
15
<refname>Unicode Manipulation</refname>
 
16
<refpurpose>functions operating on Unicode characters and UTF-8 strings</refpurpose>
 
17
</refnamediv>
 
18
 
 
19
<refsynopsisdiv id="glib-Unicode-Manipulation.synopsis" role="synopsis">
 
20
<title role="synopsis.title">Synopsis</title>
 
21
 
 
22
<synopsis>
 
23
 
 
24
#include &lt;glib.h&gt;
 
25
 
 
26
typedef             <link linkend="gunichar">gunichar</link>;
 
27
typedef             <link linkend="gunichar2">gunichar2</link>;
 
28
 
 
29
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-validate">g_unichar_validate</link>                  (<link linkend="gunichar">gunichar</link> ch);
 
30
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isalnum">g_unichar_isalnum</link>                   (<link linkend="gunichar">gunichar</link> c);
 
31
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isalpha">g_unichar_isalpha</link>                   (<link linkend="gunichar">gunichar</link> c);
 
32
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-iscntrl">g_unichar_iscntrl</link>                   (<link linkend="gunichar">gunichar</link> c);
 
33
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isdefined">g_unichar_isdefined</link>                 (<link linkend="gunichar">gunichar</link> c);
 
34
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isdigit">g_unichar_isdigit</link>                   (<link linkend="gunichar">gunichar</link> c);
 
35
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isgraph">g_unichar_isgraph</link>                   (<link linkend="gunichar">gunichar</link> c);
 
36
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-islower">g_unichar_islower</link>                   (<link linkend="gunichar">gunichar</link> c);
 
37
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-ismark">g_unichar_ismark</link>                    (<link linkend="gunichar">gunichar</link> c);
 
38
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isprint">g_unichar_isprint</link>                   (<link linkend="gunichar">gunichar</link> c);
 
39
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-ispunct">g_unichar_ispunct</link>                   (<link linkend="gunichar">gunichar</link> c);
 
40
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isspace">g_unichar_isspace</link>                   (<link linkend="gunichar">gunichar</link> c);
 
41
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-istitle">g_unichar_istitle</link>                   (<link linkend="gunichar">gunichar</link> c);
 
42
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isupper">g_unichar_isupper</link>                   (<link linkend="gunichar">gunichar</link> c);
 
43
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-isxdigit">g_unichar_isxdigit</link>                  (<link linkend="gunichar">gunichar</link> c);
 
44
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-iswide">g_unichar_iswide</link>                    (<link linkend="gunichar">gunichar</link> c);
 
45
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-iswide-cjk">g_unichar_iswide_cjk</link>                (<link linkend="gunichar">gunichar</link> c);
 
46
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-iszerowidth">g_unichar_iszerowidth</link>               (<link linkend="gunichar">gunichar</link> c);
 
47
<link linkend="gunichar">gunichar</link>            <link linkend="g-unichar-toupper">g_unichar_toupper</link>                   (<link linkend="gunichar">gunichar</link> c);
 
48
<link linkend="gunichar">gunichar</link>            <link linkend="g-unichar-tolower">g_unichar_tolower</link>                   (<link linkend="gunichar">gunichar</link> c);
 
49
<link linkend="gunichar">gunichar</link>            <link linkend="g-unichar-totitle">g_unichar_totitle</link>                   (<link linkend="gunichar">gunichar</link> c);
 
50
<link linkend="gint">gint</link>                <link linkend="g-unichar-digit-value">g_unichar_digit_value</link>               (<link linkend="gunichar">gunichar</link> c);
 
51
<link linkend="gint">gint</link>                <link linkend="g-unichar-xdigit-value">g_unichar_xdigit_value</link>              (<link linkend="gunichar">gunichar</link> c);
 
52
enum                <link linkend="GUnicodeType">GUnicodeType</link>;
 
53
<link linkend="GUnicodeType">GUnicodeType</link>        <link linkend="g-unichar-type">g_unichar_type</link>                      (<link linkend="gunichar">gunichar</link> c);
 
54
enum                <link linkend="GUnicodeBreakType">GUnicodeBreakType</link>;
 
55
<link linkend="GUnicodeBreakType">GUnicodeBreakType</link>   <link linkend="g-unichar-break-type">g_unichar_break_type</link>                (<link linkend="gunichar">gunichar</link> c);
 
56
<link linkend="gint">gint</link>                <link linkend="g-unichar-combining-class">g_unichar_combining_class</link>           (<link linkend="gunichar">gunichar</link> uc);
 
57
<link linkend="void">void</link>                <link linkend="g-unicode-canonical-ordering">g_unicode_canonical_ordering</link>        (<link linkend="gunichar">gunichar</link> *string,
 
58
                                                         <link linkend="gsize">gsize</link> len);
 
59
<link linkend="gunichar">gunichar</link> *          <link linkend="g-unicode-canonical-decomposition">g_unicode_canonical_decomposition</link>   (<link linkend="gunichar">gunichar</link> ch,
 
60
                                                         <link linkend="gsize">gsize</link> *result_len);
 
61
<link linkend="gboolean">gboolean</link>            <link linkend="g-unichar-get-mirror-char">g_unichar_get_mirror_char</link>           (<link linkend="gunichar">gunichar</link> ch,
 
62
                                                         <link linkend="gunichar">gunichar</link> *mirrored_ch);
 
63
enum                <link linkend="GUnicodeScript">GUnicodeScript</link>;
 
64
<link linkend="GUnicodeScript">GUnicodeScript</link>      <link linkend="g-unichar-get-script">g_unichar_get_script</link>                (<link linkend="gunichar">gunichar</link> ch);
 
65
 
 
66
#define             <link linkend="g-utf8-next-char">g_utf8_next_char</link>                    (p)
 
67
<link linkend="gunichar">gunichar</link>            <link linkend="g-utf8-get-char">g_utf8_get_char</link>                     (const <link linkend="gchar">gchar</link> *p);
 
68
<link linkend="gunichar">gunichar</link>            <link linkend="g-utf8-get-char-validated">g_utf8_get_char_validated</link>           (const <link linkend="gchar">gchar</link> *p,
 
69
                                                         <link linkend="gssize">gssize</link> max_len);
 
70
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-offset-to-pointer">g_utf8_offset_to_pointer</link>            (const <link linkend="gchar">gchar</link> *str,
 
71
                                                         <link linkend="glong">glong</link> offset);
 
72
<link linkend="glong">glong</link>               <link linkend="g-utf8-pointer-to-offset">g_utf8_pointer_to_offset</link>            (const <link linkend="gchar">gchar</link> *str,
 
73
                                                         const <link linkend="gchar">gchar</link> *pos);
 
74
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-prev-char">g_utf8_prev_char</link>                    (const <link linkend="gchar">gchar</link> *p);
 
75
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-find-next-char">g_utf8_find_next_char</link>               (const <link linkend="gchar">gchar</link> *p,
 
76
                                                         const <link linkend="gchar">gchar</link> *end);
 
77
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-find-prev-char">g_utf8_find_prev_char</link>               (const <link linkend="gchar">gchar</link> *str,
 
78
                                                         const <link linkend="gchar">gchar</link> *p);
 
79
<link linkend="glong">glong</link>               <link linkend="g-utf8-strlen">g_utf8_strlen</link>                       (const <link linkend="gchar">gchar</link> *p,
 
80
                                                         <link linkend="gssize">gssize</link> max);
 
81
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-strncpy">g_utf8_strncpy</link>                      (<link linkend="gchar">gchar</link> *dest,
 
82
                                                         const <link linkend="gchar">gchar</link> *src,
 
83
                                                         <link linkend="gsize">gsize</link> n);
 
84
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-strchr">g_utf8_strchr</link>                       (const <link linkend="gchar">gchar</link> *p,
 
85
                                                         <link linkend="gssize">gssize</link> len,
 
86
                                                         <link linkend="gunichar">gunichar</link> c);
 
87
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-strrchr">g_utf8_strrchr</link>                      (const <link linkend="gchar">gchar</link> *p,
 
88
                                                         <link linkend="gssize">gssize</link> len,
 
89
                                                         <link linkend="gunichar">gunichar</link> c);
 
90
<link linkend="gchar">gchar</link>*              <link linkend="g-utf8-strreverse">g_utf8_strreverse</link>                   (const <link linkend="gchar">gchar</link> *str,
 
91
                                                         <link linkend="gssize">gssize</link> len);
 
92
<link linkend="gboolean">gboolean</link>            <link linkend="g-utf8-validate">g_utf8_validate</link>                     (const <link linkend="gchar">gchar</link> *str,
 
93
                                                         <link linkend="gssize">gssize</link> max_len,
 
94
                                                         const <link linkend="gchar">gchar</link> **end);
 
95
 
 
96
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-strup">g_utf8_strup</link>                        (const <link linkend="gchar">gchar</link> *str,
 
97
                                                         <link linkend="gssize">gssize</link> len);
 
98
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-strdown">g_utf8_strdown</link>                      (const <link linkend="gchar">gchar</link> *str,
 
99
                                                         <link linkend="gssize">gssize</link> len);
 
100
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-casefold">g_utf8_casefold</link>                     (const <link linkend="gchar">gchar</link> *str,
 
101
                                                         <link linkend="gssize">gssize</link> len);
 
102
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-normalize">g_utf8_normalize</link>                    (const <link linkend="gchar">gchar</link> *str,
 
103
                                                         <link linkend="gssize">gssize</link> len,
 
104
                                                         <link linkend="GNormalizeMode">GNormalizeMode</link> mode);
 
105
enum                <link linkend="GNormalizeMode">GNormalizeMode</link>;
 
106
<link linkend="gint">gint</link>                <link linkend="g-utf8-collate">g_utf8_collate</link>                      (const <link linkend="gchar">gchar</link> *str1,
 
107
                                                         const <link linkend="gchar">gchar</link> *str2);
 
108
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-collate-key">g_utf8_collate_key</link>                  (const <link linkend="gchar">gchar</link> *str,
 
109
                                                         <link linkend="gssize">gssize</link> len);
 
110
<link linkend="gchar">gchar</link> *             <link linkend="g-utf8-collate-key-for-filename">g_utf8_collate_key_for_filename</link>     (const <link linkend="gchar">gchar</link> *str,
 
111
                                                         <link linkend="gssize">gssize</link> len);
 
112
 
 
113
<link linkend="gunichar2">gunichar2</link> *         <link linkend="g-utf8-to-utf16">g_utf8_to_utf16</link>                     (const <link linkend="gchar">gchar</link> *str,
 
114
                                                         <link linkend="glong">glong</link> len,
 
115
                                                         <link linkend="glong">glong</link> *items_read,
 
116
                                                         <link linkend="glong">glong</link> *items_written,
 
117
                                                         <link linkend="GError">GError</link> **error);
 
118
<link linkend="gunichar">gunichar</link> *          <link linkend="g-utf8-to-ucs4">g_utf8_to_ucs4</link>                      (const <link linkend="gchar">gchar</link> *str,
 
119
                                                         <link linkend="glong">glong</link> len,
 
120
                                                         <link linkend="glong">glong</link> *items_read,
 
121
                                                         <link linkend="glong">glong</link> *items_written,
 
122
                                                         <link linkend="GError">GError</link> **error);
 
123
<link linkend="gunichar">gunichar</link> *          <link linkend="g-utf8-to-ucs4-fast">g_utf8_to_ucs4_fast</link>                 (const <link linkend="gchar">gchar</link> *str,
 
124
                                                         <link linkend="glong">glong</link> len,
 
125
                                                         <link linkend="glong">glong</link> *items_written);
 
126
<link linkend="gunichar">gunichar</link> *          <link linkend="g-utf16-to-ucs4">g_utf16_to_ucs4</link>                     (const <link linkend="gunichar2">gunichar2</link> *str,
 
127
                                                         <link linkend="glong">glong</link> len,
 
128
                                                         <link linkend="glong">glong</link> *items_read,
 
129
                                                         <link linkend="glong">glong</link> *items_written,
 
130
                                                         <link linkend="GError">GError</link> **error);
 
131
<link linkend="gchar">gchar</link>*              <link linkend="g-utf16-to-utf8">g_utf16_to_utf8</link>                     (const <link linkend="gunichar2">gunichar2</link> *str,
 
132
                                                         <link linkend="glong">glong</link> len,
 
133
                                                         <link linkend="glong">glong</link> *items_read,
 
134
                                                         <link linkend="glong">glong</link> *items_written,
 
135
                                                         <link linkend="GError">GError</link> **error);
 
136
<link linkend="gunichar2">gunichar2</link> *         <link linkend="g-ucs4-to-utf16">g_ucs4_to_utf16</link>                     (const <link linkend="gunichar">gunichar</link> *str,
 
137
                                                         <link linkend="glong">glong</link> len,
 
138
                                                         <link linkend="glong">glong</link> *items_read,
 
139
                                                         <link linkend="glong">glong</link> *items_written,
 
140
                                                         <link linkend="GError">GError</link> **error);
 
141
<link linkend="gchar">gchar</link>*              <link linkend="g-ucs4-to-utf8">g_ucs4_to_utf8</link>                      (const <link linkend="gunichar">gunichar</link> *str,
 
142
                                                         <link linkend="glong">glong</link> len,
 
143
                                                         <link linkend="glong">glong</link> *items_read,
 
144
                                                         <link linkend="glong">glong</link> *items_written,
 
145
                                                         <link linkend="GError">GError</link> **error);
 
146
<link linkend="gint">gint</link>                <link linkend="g-unichar-to-utf8">g_unichar_to_utf8</link>                   (<link linkend="gunichar">gunichar</link> c,
 
147
                                                         <link linkend="gchar">gchar</link> *outbuf);
 
148
</synopsis>
 
149
</refsynopsisdiv>
 
150
 
 
151
 
 
152
 
 
153
 
 
154
 
 
155
 
 
156
 
 
157
 
 
158
 
 
159
<refsect1 id="glib-Unicode-Manipulation.description" role="desc">
 
160
<title role="desc.title">Description</title>
 
161
<para>
 
162
This section describes a number of functions for dealing with
 
163
Unicode characters and strings.  There are analogues of the
 
164
traditional <filename>ctype.h</filename> character classification
 
165
and case conversion functions, UTF-8 analogues of some string utility
 
166
functions, functions to perform normalization, case conversion and
 
167
collation on UTF-8 strings and finally functions to convert between
 
168
the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
 
169
</para>
 
170
<para>
 
171
The implementations of the Unicode functions in GLib are based
 
172
on the Unicode Character Data tables, which are available from
 
173
<ulink url="http://www.unicode.org/">www.unicode.org</ulink>.
 
174
GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
 
175
GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1.
 
176
</para>
 
177
</refsect1>
 
178
 
 
179
<refsect1 id="glib-Unicode-Manipulation.details" role="details">
 
180
<title role="details.title">Details</title>
 
181
<refsect2 id="gunichar" role="typedef">
 
182
<title>gunichar</title>
 
183
<indexterm zone="gunichar"><primary sortas="unichar">gunichar</primary></indexterm><programlisting>typedef guint32 gunichar;
 
184
</programlisting>
 
185
<para>
 
186
A type which can hold any UTF-32 or UCS-4 character code, also known
 
187
as a Unicode code point.
 
188
</para>
 
189
<para>
 
190
If you want to produce the UTF-8 representation of a <link linkend="gunichar"><type>gunichar</type></link>,
 
191
use <link linkend="g-ucs4-to-utf8"><function>g_ucs4_to_utf8()</function></link>. See also <link linkend="g-utf8-to-ucs4"><function>g_utf8_to_ucs4()</function></link> for the reverse process.
 
192
</para>
 
193
<para>
 
194
To print/scan values of this type as integer, use
 
195
<link linkend="G-GINT32-MODIFIER--CAPS"><literal>G_GINT32_MODIFIER</literal></link> and/or <link linkend="G-GUINT32-FORMAT--CAPS"><literal>G_GUINT32_FORMAT</literal></link>.
 
196
</para>
 
197
<para>
 
198
The notation to express a Unicode code point in running text is as a
 
199
hexadecimal number with four to six digits and uppercase letters, prefixed
 
200
by the string "U+".  Leading zeros are omitted, unless the code point would
 
201
have fewer than four hexadecimal digits.
 
202
For example, "U+0041 LATIN CAPITAL LETTER A".
 
203
To print a code point in the U+-notation, use the format string
 
204
"U+<link linkend="04--CAPS"><literal>04</literal></link>"G_GINT32_FORMAT"X".
 
205
To scan, use the format string "U+<link linkend="06--CAPS"><literal>06</literal></link>"G_GINT32_FORMAT"X".
 
206
<informalexample>
 
207
<programlisting>
 
208
gunichar c;
 
209
sscanf ("U+0041", "U+%06"G_GINT32_FORMAT"X", &amp;c)
 
210
g_print ("Read U+%04"G_GINT32_FORMAT"X", c);
 
211
</programlisting>
 
212
</informalexample>
 
213
</para></refsect2>
 
214
<refsect2 id="gunichar2" role="typedef">
 
215
<title>gunichar2</title>
 
216
<indexterm zone="gunichar2"><primary sortas="unichar2">gunichar2</primary></indexterm><programlisting>typedef guint16 gunichar2;
 
217
</programlisting>
 
218
<para>
 
219
A type which can hold any UTF-16 code
 
220
point<footnote id="utf16_surrogate_pairs">UTF-16 also has so called
 
221
<firstterm>surrogate pairs</firstterm> to encode characters beyond the
 
222
BMP as pairs of 16bit numbers. Surrogate pairs cannot be stored in a
 
223
single gunichar2 field, but all GLib functions accepting gunichar2 arrays
 
224
will correctly interpret surrogate pairs.</footnote>.
 
225
</para>
 
226
<para>
 
227
To print/scan values of this type to/from text you need to convert
 
228
to/from UTF-8, using <link linkend="g-utf16-to-utf8"><function>g_utf16_to_utf8()</function></link>/<link linkend="g-utf8-to-utf16"><function>g_utf8_to_utf16()</function></link>.
 
229
</para>
 
230
<para>
 
231
To print/scan values of this type as integer, use
 
232
<link linkend="G-GINT16-MODIFIER--CAPS"><literal>G_GINT16_MODIFIER</literal></link> and/or <link linkend="G-GUINT16-FORMAT--CAPS"><literal>G_GUINT16_FORMAT</literal></link>.
 
233
</para></refsect2>
 
234
<refsect2 id="g-unichar-validate" role="function">
 
235
<title>g_unichar_validate ()</title>
 
236
<indexterm zone="g-unichar-validate"><primary sortas="unichar_validate">g_unichar_validate</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_validate                  (<link linkend="gunichar">gunichar</link> ch);</programlisting>
 
237
<para>
 
238
Checks whether <parameter>ch</parameter> is a valid Unicode character. Some possible
 
239
integer values of <parameter>ch</parameter> will not be valid. 0 is considered a valid
 
240
character, though it's normally a string terminator.</para>
 
241
<para>
 
242
</para><variablelist role="params">
 
243
<varlistentry><term><parameter>ch</parameter>&#160;:</term>
 
244
<listitem><simpara> a Unicode character
 
245
</simpara></listitem></varlistentry>
 
246
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>ch</parameter> is a valid Unicode character
 
247
</simpara></listitem></varlistentry>
 
248
</variablelist></refsect2>
 
249
<refsect2 id="g-unichar-isalnum" role="function">
 
250
<title>g_unichar_isalnum ()</title>
 
251
<indexterm zone="g-unichar-isalnum"><primary sortas="unichar_isalnum">g_unichar_isalnum</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isalnum                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
252
<para>
 
253
Determines whether a character is alphanumeric.
 
254
Given some UTF-8 text, obtain a character value
 
255
with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
256
<para>
 
257
</para><variablelist role="params">
 
258
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
259
<listitem><simpara> a Unicode character
 
260
</simpara></listitem></varlistentry>
 
261
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an alphanumeric character
 
262
</simpara></listitem></varlistentry>
 
263
</variablelist></refsect2>
 
264
<refsect2 id="g-unichar-isalpha" role="function">
 
265
<title>g_unichar_isalpha ()</title>
 
266
<indexterm zone="g-unichar-isalpha"><primary sortas="unichar_isalpha">g_unichar_isalpha</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isalpha                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
267
<para>
 
268
Determines whether a character is alphabetic (i.e. a letter).
 
269
Given some UTF-8 text, obtain a character value with
 
270
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
271
<para>
 
272
</para><variablelist role="params">
 
273
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
274
<listitem><simpara> a Unicode character
 
275
</simpara></listitem></varlistentry>
 
276
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an alphabetic character
 
277
</simpara></listitem></varlistentry>
 
278
</variablelist></refsect2>
 
279
<refsect2 id="g-unichar-iscntrl" role="function">
 
280
<title>g_unichar_iscntrl ()</title>
 
281
<indexterm zone="g-unichar-iscntrl"><primary sortas="unichar_iscntrl">g_unichar_iscntrl</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_iscntrl                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
282
<para>
 
283
Determines whether a character is a control character.
 
284
Given some UTF-8 text, obtain a character value with
 
285
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
286
<para>
 
287
</para><variablelist role="params">
 
288
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
289
<listitem><simpara> a Unicode character
 
290
</simpara></listitem></varlistentry>
 
291
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a control character
 
292
</simpara></listitem></varlistentry>
 
293
</variablelist></refsect2>
 
294
<refsect2 id="g-unichar-isdefined" role="function">
 
295
<title>g_unichar_isdefined ()</title>
 
296
<indexterm zone="g-unichar-isdefined"><primary sortas="unichar_isdefined">g_unichar_isdefined</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isdefined                 (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
297
<para>
 
298
Determines if a given character is assigned in the Unicode
 
299
standard.</para>
 
300
<para>
 
301
</para><variablelist role="params">
 
302
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
303
<listitem><simpara> a Unicode character
 
304
</simpara></listitem></varlistentry>
 
305
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character has an assigned value
 
306
</simpara></listitem></varlistentry>
 
307
</variablelist></refsect2>
 
308
<refsect2 id="g-unichar-isdigit" role="function">
 
309
<title>g_unichar_isdigit ()</title>
 
310
<indexterm zone="g-unichar-isdigit"><primary sortas="unichar_isdigit">g_unichar_isdigit</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isdigit                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
311
<para>
 
312
Determines whether a character is numeric (i.e. a digit).  This
 
313
covers ASCII 0-9 and also digits in other languages/scripts.  Given
 
314
some UTF-8 text, obtain a character value with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
315
<para>
 
316
</para><variablelist role="params">
 
317
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
318
<listitem><simpara> a Unicode character
 
319
</simpara></listitem></varlistentry>
 
320
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a digit
 
321
</simpara></listitem></varlistentry>
 
322
</variablelist></refsect2>
 
323
<refsect2 id="g-unichar-isgraph" role="function">
 
324
<title>g_unichar_isgraph ()</title>
 
325
<indexterm zone="g-unichar-isgraph"><primary sortas="unichar_isgraph">g_unichar_isgraph</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isgraph                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
326
<para>
 
327
Determines whether a character is printable and not a space
 
328
(returns <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> for control characters, format characters, and
 
329
spaces). <link linkend="g-unichar-isprint"><function>g_unichar_isprint()</function></link> is similar, but returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for
 
330
spaces. Given some UTF-8 text, obtain a character value with
 
331
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
332
<para>
 
333
</para><variablelist role="params">
 
334
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
335
<listitem><simpara> a Unicode character
 
336
</simpara></listitem></varlistentry>
 
337
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is printable unless it's a space
 
338
</simpara></listitem></varlistentry>
 
339
</variablelist></refsect2>
 
340
<refsect2 id="g-unichar-islower" role="function">
 
341
<title>g_unichar_islower ()</title>
 
342
<indexterm zone="g-unichar-islower"><primary sortas="unichar_islower">g_unichar_islower</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_islower                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
343
<para>
 
344
Determines whether a character is a lowercase letter.
 
345
Given some UTF-8 text, obtain a character value with
 
346
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
347
<para>
 
348
</para><variablelist role="params">
 
349
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
350
<listitem><simpara> a Unicode character
 
351
</simpara></listitem></varlistentry>
 
352
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a lowercase letter
 
353
</simpara></listitem></varlistentry>
 
354
</variablelist></refsect2>
 
355
<refsect2 id="g-unichar-ismark" role="function" condition="since:2.14">
 
356
<title>g_unichar_ismark ()</title>
 
357
<indexterm zone="g-unichar-ismark" role="2.14"><primary sortas="unichar_ismark">g_unichar_ismark</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_ismark                    (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
358
<para>
 
359
Determines whether a character is a mark (non-spacing mark,
 
360
combining mark, or enclosing mark in Unicode speak).
 
361
Given some UTF-8 text, obtain a character value
 
362
with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.
 
363
</para>
 
364
<para>
 
365
Note: in most cases where isalpha characters are allowed,
 
366
ismark characters should be allowed to as they are essential
 
367
for writing most European languages as well as many non-Latin
 
368
scripts.</para>
 
369
<para>
 
370
</para><variablelist role="params">
 
371
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
372
<listitem><simpara> a Unicode character
 
373
</simpara></listitem></varlistentry>
 
374
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a mark character
 
375
 
 
376
</simpara></listitem></varlistentry>
 
377
</variablelist><para role="since">Since 2.14</para></refsect2>
 
378
<refsect2 id="g-unichar-isprint" role="function">
 
379
<title>g_unichar_isprint ()</title>
 
380
<indexterm zone="g-unichar-isprint"><primary sortas="unichar_isprint">g_unichar_isprint</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isprint                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
381
<para>
 
382
Determines whether a character is printable.
 
383
Unlike <link linkend="g-unichar-isgraph"><function>g_unichar_isgraph()</function></link>, returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for spaces.
 
384
Given some UTF-8 text, obtain a character value with
 
385
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
386
<para>
 
387
</para><variablelist role="params">
 
388
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
389
<listitem><simpara> a Unicode character
 
390
</simpara></listitem></varlistentry>
 
391
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is printable
 
392
</simpara></listitem></varlistentry>
 
393
</variablelist></refsect2>
 
394
<refsect2 id="g-unichar-ispunct" role="function">
 
395
<title>g_unichar_ispunct ()</title>
 
396
<indexterm zone="g-unichar-ispunct"><primary sortas="unichar_ispunct">g_unichar_ispunct</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_ispunct                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
397
<para>
 
398
Determines whether a character is punctuation or a symbol.
 
399
Given some UTF-8 text, obtain a character value with
 
400
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
 
401
<para>
 
402
</para><variablelist role="params">
 
403
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
404
<listitem><simpara> a Unicode character
 
405
</simpara></listitem></varlistentry>
 
406
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a punctuation or symbol character
 
407
</simpara></listitem></varlistentry>
 
408
</variablelist></refsect2>
 
409
<refsect2 id="g-unichar-isspace" role="function">
 
410
<title>g_unichar_isspace ()</title>
 
411
<indexterm zone="g-unichar-isspace"><primary sortas="unichar_isspace">g_unichar_isspace</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isspace                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
412
<para>
 
413
Determines whether a character is a space, tab, or line separator
 
414
(newline, carriage return, etc.).  Given some UTF-8 text, obtain a
 
415
character value with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.
 
416
</para>
 
417
<para>
 
418
(Note: don't use this to do word breaking; you have to use
 
419
Pango or equivalent to get word breaking right, the algorithm
 
420
is fairly complex.)</para>
 
421
<para>
 
422
</para><variablelist role="params">
 
423
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
424
<listitem><simpara> a Unicode character
 
425
</simpara></listitem></varlistentry>
 
426
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a space character
 
427
</simpara></listitem></varlistentry>
 
428
</variablelist></refsect2>
 
429
<refsect2 id="g-unichar-istitle" role="function">
 
430
<title>g_unichar_istitle ()</title>
 
431
<indexterm zone="g-unichar-istitle"><primary sortas="unichar_istitle">g_unichar_istitle</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_istitle                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
432
<para>
 
433
Determines if a character is titlecase. Some characters in
 
434
Unicode which are composites, such as the DZ digraph
 
435
have three case variants instead of just two. The titlecase
 
436
form is used at the beginning of a word where only the
 
437
first letter is capitalized. The titlecase form of the DZ
 
438
digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.</para>
 
439
<para>
 
440
</para><variablelist role="params">
 
441
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
442
<listitem><simpara> a Unicode character
 
443
</simpara></listitem></varlistentry>
 
444
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is titlecase
 
445
</simpara></listitem></varlistentry>
 
446
</variablelist></refsect2>
 
447
<refsect2 id="g-unichar-isupper" role="function">
 
448
<title>g_unichar_isupper ()</title>
 
449
<indexterm zone="g-unichar-isupper"><primary sortas="unichar_isupper">g_unichar_isupper</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isupper                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
450
<para>
 
451
Determines if a character is uppercase.</para>
 
452
<para>
 
453
</para><variablelist role="params">
 
454
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
455
<listitem><simpara> a Unicode character
 
456
</simpara></listitem></varlistentry>
 
457
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an uppercase character
 
458
</simpara></listitem></varlistentry>
 
459
</variablelist></refsect2>
 
460
<refsect2 id="g-unichar-isxdigit" role="function">
 
461
<title>g_unichar_isxdigit ()</title>
 
462
<indexterm zone="g-unichar-isxdigit"><primary sortas="unichar_isxdigit">g_unichar_isxdigit</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_isxdigit                  (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
463
<para>
 
464
Determines if a character is a hexidecimal digit.</para>
 
465
<para>
 
466
</para><variablelist role="params">
 
467
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
468
<listitem><simpara> a Unicode character.
 
469
</simpara></listitem></varlistentry>
 
470
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is a hexadecimal digit
 
471
</simpara></listitem></varlistentry>
 
472
</variablelist></refsect2>
 
473
<refsect2 id="g-unichar-iswide" role="function">
 
474
<title>g_unichar_iswide ()</title>
 
475
<indexterm zone="g-unichar-iswide"><primary sortas="unichar_iswide">g_unichar_iswide</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_iswide                    (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
476
<para>
 
477
Determines if a character is typically rendered in a double-width
 
478
cell.</para>
 
479
<para>
 
480
</para><variablelist role="params">
 
481
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
482
<listitem><simpara> a Unicode character
 
483
</simpara></listitem></varlistentry>
 
484
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is wide
 
485
</simpara></listitem></varlistentry>
 
486
</variablelist></refsect2>
 
487
<refsect2 id="g-unichar-iswide-cjk" role="function" condition="since:2.12">
 
488
<title>g_unichar_iswide_cjk ()</title>
 
489
<indexterm zone="g-unichar-iswide-cjk" role="2.12"><primary sortas="unichar_iswide_cjk">g_unichar_iswide_cjk</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_iswide_cjk                (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
490
<para>
 
491
Determines if a character is typically rendered in a double-width
 
492
cell under legacy East Asian locales.  If a character is wide according to
 
493
<link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link>, then it is also reported wide with this function, but
 
494
the converse is not necessarily true.  See the
 
495
<ulink url="http://www.unicode.org/reports/tr11/">Unicode Standard
 
496
Annex #11</ulink> for details.
 
497
</para>
 
498
<para>
 
499
If a character passes the <link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link> test then it will also pass
 
500
this test, but not the other way around.  Note that some characters may
 
501
pas both this test and <link linkend="g-unichar-iszerowidth"><function>g_unichar_iszerowidth()</function></link>.</para>
 
502
<para>
 
503
</para><variablelist role="params">
 
504
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
505
<listitem><simpara> a Unicode character
 
506
</simpara></listitem></varlistentry>
 
507
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is wide in legacy East Asian locales
 
508
 
 
509
</simpara></listitem></varlistentry>
 
510
</variablelist><para role="since">Since 2.12</para></refsect2>
 
511
<refsect2 id="g-unichar-iszerowidth" role="function" condition="since:2.14">
 
512
<title>g_unichar_iszerowidth ()</title>
 
513
<indexterm zone="g-unichar-iszerowidth" role="2.14"><primary sortas="unichar_iszerowidth">g_unichar_iszerowidth</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_iszerowidth               (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
514
<para>
 
515
Determines if a given character typically takes zero width when rendered.
 
516
The return value is <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for all non-spacing and enclosing marks
 
517
(e.g., combining accents), format characters, zero-width
 
518
space, but not U+00AD SOFT HYPHEN.
 
519
</para>
 
520
<para>
 
521
A typical use of this function is with one of <link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link> or
 
522
<link linkend="g-unichar-iswide-cjk"><function>g_unichar_iswide_cjk()</function></link> to determine the number of cells a string occupies
 
523
when displayed on a grid display (terminals).  However, note that not all
 
524
terminals support zero-width rendering of zero-width marks.</para>
 
525
<para>
 
526
</para><variablelist role="params">
 
527
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
528
<listitem><simpara> a Unicode character
 
529
</simpara></listitem></varlistentry>
 
530
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character has zero width
 
531
 
 
532
</simpara></listitem></varlistentry>
 
533
</variablelist><para role="since">Since 2.14</para></refsect2>
 
534
<refsect2 id="g-unichar-toupper" role="function">
 
535
<title>g_unichar_toupper ()</title>
 
536
<indexterm zone="g-unichar-toupper"><primary sortas="unichar_toupper">g_unichar_toupper</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link>            g_unichar_toupper                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
537
<para>
 
538
Converts a character to uppercase.</para>
 
539
<para>
 
540
</para><variablelist role="params">
 
541
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
542
<listitem><simpara> a Unicode character
 
543
</simpara></listitem></varlistentry>
 
544
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the result of converting <parameter>c</parameter> to uppercase.
 
545
              If <parameter>c</parameter> is not an lowercase or titlecase character,
 
546
              or has no upper case equivalent <parameter>c</parameter> is returned unchanged.
 
547
</simpara></listitem></varlistentry>
 
548
</variablelist></refsect2>
 
549
<refsect2 id="g-unichar-tolower" role="function">
 
550
<title>g_unichar_tolower ()</title>
 
551
<indexterm zone="g-unichar-tolower"><primary sortas="unichar_tolower">g_unichar_tolower</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link>            g_unichar_tolower                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
552
<para>
 
553
Converts a character to lower case.</para>
 
554
<para>
 
555
</para><variablelist role="params">
 
556
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
557
<listitem><simpara> a Unicode character.
 
558
</simpara></listitem></varlistentry>
 
559
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the result of converting <parameter>c</parameter> to lower case.
 
560
              If <parameter>c</parameter> is not an upperlower or titlecase character,
 
561
              or has no lowercase equivalent <parameter>c</parameter> is returned unchanged.
 
562
</simpara></listitem></varlistentry>
 
563
</variablelist></refsect2>
 
564
<refsect2 id="g-unichar-totitle" role="function">
 
565
<title>g_unichar_totitle ()</title>
 
566
<indexterm zone="g-unichar-totitle"><primary sortas="unichar_totitle">g_unichar_totitle</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link>            g_unichar_totitle                   (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
567
<para>
 
568
Converts a character to the titlecase.</para>
 
569
<para>
 
570
</para><variablelist role="params">
 
571
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
572
<listitem><simpara> a Unicode character
 
573
</simpara></listitem></varlistentry>
 
574
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the result of converting <parameter>c</parameter> to titlecase.
 
575
              If <parameter>c</parameter> is not an uppercase or lowercase character,
 
576
              <parameter>c</parameter> is returned unchanged.
 
577
</simpara></listitem></varlistentry>
 
578
</variablelist></refsect2>
 
579
<refsect2 id="g-unichar-digit-value" role="function">
 
580
<title>g_unichar_digit_value ()</title>
 
581
<indexterm zone="g-unichar-digit-value"><primary sortas="unichar_digit_value">g_unichar_digit_value</primary></indexterm><programlisting><link linkend="gint">gint</link>                g_unichar_digit_value               (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
582
<para>
 
583
Determines the numeric value of a character as a decimal
 
584
digit.</para>
 
585
<para>
 
586
</para><variablelist role="params">
 
587
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
588
<listitem><simpara> a Unicode character
 
589
</simpara></listitem></varlistentry>
 
590
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> If <parameter>c</parameter> is a decimal digit (according to
 
591
<link linkend="g-unichar-isdigit"><function>g_unichar_isdigit()</function></link>), its numeric value. Otherwise, -1.
 
592
</simpara></listitem></varlistentry>
 
593
</variablelist></refsect2>
 
594
<refsect2 id="g-unichar-xdigit-value" role="function">
 
595
<title>g_unichar_xdigit_value ()</title>
 
596
<indexterm zone="g-unichar-xdigit-value"><primary sortas="unichar_xdigit_value">g_unichar_xdigit_value</primary></indexterm><programlisting><link linkend="gint">gint</link>                g_unichar_xdigit_value              (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
597
<para>
 
598
Determines the numeric value of a character as a hexidecimal
 
599
digit.</para>
 
600
<para>
 
601
</para><variablelist role="params">
 
602
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
603
<listitem><simpara> a Unicode character
 
604
</simpara></listitem></varlistentry>
 
605
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> If <parameter>c</parameter> is a hex digit (according to
 
606
<link linkend="g-unichar-isxdigit"><function>g_unichar_isxdigit()</function></link>), its numeric value. Otherwise, -1.
 
607
</simpara></listitem></varlistentry>
 
608
</variablelist></refsect2>
 
609
<refsect2 id="GUnicodeType" role="enum">
 
610
<title>enum GUnicodeType</title>
 
611
<indexterm zone="GUnicodeType"><primary sortas="UnicodeType">GUnicodeType</primary></indexterm><programlisting>typedef enum
 
612
{
 
613
  G_UNICODE_CONTROL,
 
614
  G_UNICODE_FORMAT,
 
615
  G_UNICODE_UNASSIGNED,
 
616
  G_UNICODE_PRIVATE_USE,
 
617
  G_UNICODE_SURROGATE,
 
618
  G_UNICODE_LOWERCASE_LETTER,
 
619
  G_UNICODE_MODIFIER_LETTER,
 
620
  G_UNICODE_OTHER_LETTER,
 
621
  G_UNICODE_TITLECASE_LETTER,
 
622
  G_UNICODE_UPPERCASE_LETTER,
 
623
  G_UNICODE_COMBINING_MARK,
 
624
  G_UNICODE_ENCLOSING_MARK,
 
625
  G_UNICODE_NON_SPACING_MARK,
 
626
  G_UNICODE_DECIMAL_NUMBER,
 
627
  G_UNICODE_LETTER_NUMBER,
 
628
  G_UNICODE_OTHER_NUMBER,
 
629
  G_UNICODE_CONNECT_PUNCTUATION,
 
630
  G_UNICODE_DASH_PUNCTUATION,
 
631
  G_UNICODE_CLOSE_PUNCTUATION,
 
632
  G_UNICODE_FINAL_PUNCTUATION,
 
633
  G_UNICODE_INITIAL_PUNCTUATION,
 
634
  G_UNICODE_OTHER_PUNCTUATION,
 
635
  G_UNICODE_OPEN_PUNCTUATION,
 
636
  G_UNICODE_CURRENCY_SYMBOL,
 
637
  G_UNICODE_MODIFIER_SYMBOL,
 
638
  G_UNICODE_MATH_SYMBOL,
 
639
  G_UNICODE_OTHER_SYMBOL,
 
640
  G_UNICODE_LINE_SEPARATOR,
 
641
  G_UNICODE_PARAGRAPH_SEPARATOR,
 
642
  G_UNICODE_SPACE_SEPARATOR
 
643
} GUnicodeType;
 
644
</programlisting>
 
645
<para>
 
646
These are the possible character classifications from the
 
647
Unicode specification.
 
648
See <ulink url="http://www.unicode.org/Public/UNIDATA/UnicodeData.html"
 
649
>http://www.unicode.org/Public/UNIDATA/UnicodeData.html</ulink>.
 
650
</para><variablelist role="enum">
 
651
<varlistentry id="G-UNICODE-CONTROL--CAPS" role="constant">
 
652
<term><literal>G_UNICODE_CONTROL</literal></term>
 
653
<listitem><simpara>General category "Other, Control" (Cc)
 
654
</simpara></listitem>
 
655
</varlistentry>
 
656
<varlistentry id="G-UNICODE-FORMAT--CAPS" role="constant">
 
657
<term><literal>G_UNICODE_FORMAT</literal></term>
 
658
<listitem><simpara>General category "Other, Format" (Cf)
 
659
</simpara></listitem>
 
660
</varlistentry>
 
661
<varlistentry id="G-UNICODE-UNASSIGNED--CAPS" role="constant">
 
662
<term><literal>G_UNICODE_UNASSIGNED</literal></term>
 
663
<listitem><simpara>General category "Other, Not Assigned" (Cn)
 
664
</simpara></listitem>
 
665
</varlistentry>
 
666
<varlistentry id="G-UNICODE-PRIVATE-USE--CAPS" role="constant">
 
667
<term><literal>G_UNICODE_PRIVATE_USE</literal></term>
 
668
<listitem><simpara>General category "Other, Private Use" (Co)
 
669
</simpara></listitem>
 
670
</varlistentry>
 
671
<varlistentry id="G-UNICODE-SURROGATE--CAPS" role="constant">
 
672
<term><literal>G_UNICODE_SURROGATE</literal></term>
 
673
<listitem><simpara>General category "Other, Surrogate" (Cs)
 
674
</simpara></listitem>
 
675
</varlistentry>
 
676
<varlistentry id="G-UNICODE-LOWERCASE-LETTER--CAPS" role="constant">
 
677
<term><literal>G_UNICODE_LOWERCASE_LETTER</literal></term>
 
678
<listitem><simpara>General category "Letter, Lowercase" (Ll)
 
679
</simpara></listitem>
 
680
</varlistentry>
 
681
<varlistentry id="G-UNICODE-MODIFIER-LETTER--CAPS" role="constant">
 
682
<term><literal>G_UNICODE_MODIFIER_LETTER</literal></term>
 
683
<listitem><simpara>General category "Letter, Modifier" (Lm)
 
684
</simpara></listitem>
 
685
</varlistentry>
 
686
<varlistentry id="G-UNICODE-OTHER-LETTER--CAPS" role="constant">
 
687
<term><literal>G_UNICODE_OTHER_LETTER</literal></term>
 
688
<listitem><simpara>General category "Letter, Other" (Lo)
 
689
</simpara></listitem>
 
690
</varlistentry>
 
691
<varlistentry id="G-UNICODE-TITLECASE-LETTER--CAPS" role="constant">
 
692
<term><literal>G_UNICODE_TITLECASE_LETTER</literal></term>
 
693
<listitem><simpara>General category "Letter, Titlecase" (Lt)
 
694
</simpara></listitem>
 
695
</varlistentry>
 
696
<varlistentry id="G-UNICODE-UPPERCASE-LETTER--CAPS" role="constant">
 
697
<term><literal>G_UNICODE_UPPERCASE_LETTER</literal></term>
 
698
<listitem><simpara>General category "Letter, Uppercase" (Lu)
 
699
</simpara></listitem>
 
700
</varlistentry>
 
701
<varlistentry id="G-UNICODE-COMBINING-MARK--CAPS" role="constant">
 
702
<term><literal>G_UNICODE_COMBINING_MARK</literal></term>
 
703
<listitem><simpara>General category "Mark, Spacing Combining" (Mc)
 
704
</simpara></listitem>
 
705
</varlistentry>
 
706
<varlistentry id="G-UNICODE-ENCLOSING-MARK--CAPS" role="constant">
 
707
<term><literal>G_UNICODE_ENCLOSING_MARK</literal></term>
 
708
<listitem><simpara>General category "Mark, Enclosing" (Me)
 
709
</simpara></listitem>
 
710
</varlistentry>
 
711
<varlistentry id="G-UNICODE-NON-SPACING-MARK--CAPS" role="constant">
 
712
<term><literal>G_UNICODE_NON_SPACING_MARK</literal></term>
 
713
<listitem><simpara>General category "Mark, Nonspacing" (Mn)
 
714
</simpara></listitem>
 
715
</varlistentry>
 
716
<varlistentry id="G-UNICODE-DECIMAL-NUMBER--CAPS" role="constant">
 
717
<term><literal>G_UNICODE_DECIMAL_NUMBER</literal></term>
 
718
<listitem><simpara>General category "Number, Decimal Digit" (Nd)
 
719
</simpara></listitem>
 
720
</varlistentry>
 
721
<varlistentry id="G-UNICODE-LETTER-NUMBER--CAPS" role="constant">
 
722
<term><literal>G_UNICODE_LETTER_NUMBER</literal></term>
 
723
<listitem><simpara>General category "Number, Letter" (Nl)
 
724
</simpara></listitem>
 
725
</varlistentry>
 
726
<varlistentry id="G-UNICODE-OTHER-NUMBER--CAPS" role="constant">
 
727
<term><literal>G_UNICODE_OTHER_NUMBER</literal></term>
 
728
<listitem><simpara>General category "Number, Other" (No)
 
729
</simpara></listitem>
 
730
</varlistentry>
 
731
<varlistentry id="G-UNICODE-CONNECT-PUNCTUATION--CAPS" role="constant">
 
732
<term><literal>G_UNICODE_CONNECT_PUNCTUATION</literal></term>
 
733
<listitem><simpara>General category "Punctuation, Connector" (Pc)
 
734
</simpara></listitem>
 
735
</varlistentry>
 
736
<varlistentry id="G-UNICODE-DASH-PUNCTUATION--CAPS" role="constant">
 
737
<term><literal>G_UNICODE_DASH_PUNCTUATION</literal></term>
 
738
<listitem><simpara>General category "Punctuation, Dash" (Pd)
 
739
</simpara></listitem>
 
740
</varlistentry>
 
741
<varlistentry id="G-UNICODE-CLOSE-PUNCTUATION--CAPS" role="constant">
 
742
<term><literal>G_UNICODE_CLOSE_PUNCTUATION</literal></term>
 
743
<listitem><simpara>General category "Punctuation, Close" (Pe)
 
744
</simpara></listitem>
 
745
</varlistentry>
 
746
<varlistentry id="G-UNICODE-FINAL-PUNCTUATION--CAPS" role="constant">
 
747
<term><literal>G_UNICODE_FINAL_PUNCTUATION</literal></term>
 
748
<listitem><simpara>General category "Punctuation, Final quote" (Pf)
 
749
</simpara></listitem>
 
750
</varlistentry>
 
751
<varlistentry id="G-UNICODE-INITIAL-PUNCTUATION--CAPS" role="constant">
 
752
<term><literal>G_UNICODE_INITIAL_PUNCTUATION</literal></term>
 
753
<listitem><simpara>General category "Punctuation, Initial quote" (Pi)
 
754
</simpara></listitem>
 
755
</varlistentry>
 
756
<varlistentry id="G-UNICODE-OTHER-PUNCTUATION--CAPS" role="constant">
 
757
<term><literal>G_UNICODE_OTHER_PUNCTUATION</literal></term>
 
758
<listitem><simpara>General category "Punctuation, Other" (Po)
 
759
</simpara></listitem>
 
760
</varlistentry>
 
761
<varlistentry id="G-UNICODE-OPEN-PUNCTUATION--CAPS" role="constant">
 
762
<term><literal>G_UNICODE_OPEN_PUNCTUATION</literal></term>
 
763
<listitem><simpara>General category "Punctuation, Open" (Ps)
 
764
</simpara></listitem>
 
765
</varlistentry>
 
766
<varlistentry id="G-UNICODE-CURRENCY-SYMBOL--CAPS" role="constant">
 
767
<term><literal>G_UNICODE_CURRENCY_SYMBOL</literal></term>
 
768
<listitem><simpara>General category "Symbol, Currency" (Sc)
 
769
</simpara></listitem>
 
770
</varlistentry>
 
771
<varlistentry id="G-UNICODE-MODIFIER-SYMBOL--CAPS" role="constant">
 
772
<term><literal>G_UNICODE_MODIFIER_SYMBOL</literal></term>
 
773
<listitem><simpara>General category "Symbol, Modifier" (Sk)
 
774
</simpara></listitem>
 
775
</varlistentry>
 
776
<varlistentry id="G-UNICODE-MATH-SYMBOL--CAPS" role="constant">
 
777
<term><literal>G_UNICODE_MATH_SYMBOL</literal></term>
 
778
<listitem><simpara>General category "Symbol, Math" (Sm)
 
779
</simpara></listitem>
 
780
</varlistentry>
 
781
<varlistentry id="G-UNICODE-OTHER-SYMBOL--CAPS" role="constant">
 
782
<term><literal>G_UNICODE_OTHER_SYMBOL</literal></term>
 
783
<listitem><simpara>General category "Symbol, Other" (So)
 
784
</simpara></listitem>
 
785
</varlistentry>
 
786
<varlistentry id="G-UNICODE-LINE-SEPARATOR--CAPS" role="constant">
 
787
<term><literal>G_UNICODE_LINE_SEPARATOR</literal></term>
 
788
<listitem><simpara>General category "Separator, Line" (Zl)
 
789
</simpara></listitem>
 
790
</varlistentry>
 
791
<varlistentry id="G-UNICODE-PARAGRAPH-SEPARATOR--CAPS" role="constant">
 
792
<term><literal>G_UNICODE_PARAGRAPH_SEPARATOR</literal></term>
 
793
<listitem><simpara>General category "Separator, Paragraph" (Zp)
 
794
</simpara></listitem>
 
795
</varlistentry>
 
796
<varlistentry id="G-UNICODE-SPACE-SEPARATOR--CAPS" role="constant">
 
797
<term><literal>G_UNICODE_SPACE_SEPARATOR</literal></term>
 
798
<listitem><simpara>General category "Separator, Space" (Zs)
 
799
</simpara></listitem>
 
800
</varlistentry>
 
801
</variablelist></refsect2>
 
802
<refsect2 id="g-unichar-type" role="function">
 
803
<title>g_unichar_type ()</title>
 
804
<indexterm zone="g-unichar-type"><primary sortas="unichar_type">g_unichar_type</primary></indexterm><programlisting><link linkend="GUnicodeType">GUnicodeType</link>        g_unichar_type                      (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
805
<para>
 
806
Classifies a Unicode character by type.</para>
 
807
<para>
 
808
</para><variablelist role="params">
 
809
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
810
<listitem><simpara> a Unicode character
 
811
</simpara></listitem></varlistentry>
 
812
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the type of the character.
 
813
</simpara></listitem></varlistentry>
 
814
</variablelist></refsect2>
 
815
<refsect2 id="GUnicodeBreakType" role="enum">
 
816
<title>enum GUnicodeBreakType</title>
 
817
<indexterm zone="GUnicodeBreakType"><primary sortas="UnicodeBreakType">GUnicodeBreakType</primary></indexterm><programlisting>typedef enum
 
818
{
 
819
  G_UNICODE_BREAK_MANDATORY,
 
820
  G_UNICODE_BREAK_CARRIAGE_RETURN,
 
821
  G_UNICODE_BREAK_LINE_FEED,
 
822
  G_UNICODE_BREAK_COMBINING_MARK,
 
823
  G_UNICODE_BREAK_SURROGATE,
 
824
  G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
 
825
  G_UNICODE_BREAK_INSEPARABLE,
 
826
  G_UNICODE_BREAK_NON_BREAKING_GLUE,
 
827
  G_UNICODE_BREAK_CONTINGENT,
 
828
  G_UNICODE_BREAK_SPACE,
 
829
  G_UNICODE_BREAK_AFTER,
 
830
  G_UNICODE_BREAK_BEFORE,
 
831
  G_UNICODE_BREAK_BEFORE_AND_AFTER,
 
832
  G_UNICODE_BREAK_HYPHEN,
 
833
  G_UNICODE_BREAK_NON_STARTER,
 
834
  G_UNICODE_BREAK_OPEN_PUNCTUATION,
 
835
  G_UNICODE_BREAK_CLOSE_PUNCTUATION,
 
836
  G_UNICODE_BREAK_QUOTATION,
 
837
  G_UNICODE_BREAK_EXCLAMATION,
 
838
  G_UNICODE_BREAK_IDEOGRAPHIC,
 
839
  G_UNICODE_BREAK_NUMERIC,
 
840
  G_UNICODE_BREAK_INFIX_SEPARATOR,
 
841
  G_UNICODE_BREAK_SYMBOL,
 
842
  G_UNICODE_BREAK_ALPHABETIC,
 
843
  G_UNICODE_BREAK_PREFIX,
 
844
  G_UNICODE_BREAK_POSTFIX,
 
845
  G_UNICODE_BREAK_COMPLEX_CONTEXT,
 
846
  G_UNICODE_BREAK_AMBIGUOUS,
 
847
  G_UNICODE_BREAK_UNKNOWN,
 
848
  G_UNICODE_BREAK_NEXT_LINE,
 
849
  G_UNICODE_BREAK_WORD_JOINER,
 
850
  G_UNICODE_BREAK_HANGUL_L_JAMO,
 
851
  G_UNICODE_BREAK_HANGUL_V_JAMO,
 
852
  G_UNICODE_BREAK_HANGUL_T_JAMO,
 
853
  G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
 
854
  G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
 
855
} GUnicodeBreakType;
 
856
</programlisting>
 
857
<para>
 
858
These are the possible line break classifications.
 
859
The five Hangul types were added in Unicode 4.1, so, has been
 
860
introduced in GLib 2.10.  Note that new types may be added in the future.
 
861
Applications should be ready to handle unknown values.
 
862
They may be regarded as <link linkend="G-UNICODE-BREAK-UNKNOWN--CAPS"><literal>G_UNICODE_BREAK_UNKNOWN</literal></link>.
 
863
See <ulink url="http://www.unicode.org/unicode/reports/tr14/"
 
864
>http://www.unicode.org/unicode/reports/tr14/</ulink>.
 
865
</para><variablelist role="enum">
 
866
<varlistentry id="G-UNICODE-BREAK-MANDATORY--CAPS" role="constant">
 
867
<term><literal>G_UNICODE_BREAK_MANDATORY</literal></term>
 
868
<listitem><simpara>Mandatory Break (BK)
 
869
</simpara></listitem>
 
870
</varlistentry>
 
871
<varlistentry id="G-UNICODE-BREAK-CARRIAGE-RETURN--CAPS" role="constant">
 
872
<term><literal>G_UNICODE_BREAK_CARRIAGE_RETURN</literal></term>
 
873
<listitem><simpara>Carriage Return (CR)
 
874
</simpara></listitem>
 
875
</varlistentry>
 
876
<varlistentry id="G-UNICODE-BREAK-LINE-FEED--CAPS" role="constant">
 
877
<term><literal>G_UNICODE_BREAK_LINE_FEED</literal></term>
 
878
<listitem><simpara>Line Feed (LF)
 
879
</simpara></listitem>
 
880
</varlistentry>
 
881
<varlistentry id="G-UNICODE-BREAK-COMBINING-MARK--CAPS" role="constant">
 
882
<term><literal>G_UNICODE_BREAK_COMBINING_MARK</literal></term>
 
883
<listitem><simpara>Attached Characters and Combining Marks (CM)
 
884
</simpara></listitem>
 
885
</varlistentry>
 
886
<varlistentry id="G-UNICODE-BREAK-SURROGATE--CAPS" role="constant">
 
887
<term><literal>G_UNICODE_BREAK_SURROGATE</literal></term>
 
888
<listitem><simpara>Surrogates (SG)
 
889
</simpara></listitem>
 
890
</varlistentry>
 
891
<varlistentry id="G-UNICODE-BREAK-ZERO-WIDTH-SPACE--CAPS" role="constant">
 
892
<term><literal>G_UNICODE_BREAK_ZERO_WIDTH_SPACE</literal></term>
 
893
<listitem><simpara>Zero Width Space (ZW)
 
894
</simpara></listitem>
 
895
</varlistentry>
 
896
<varlistentry id="G-UNICODE-BREAK-INSEPARABLE--CAPS" role="constant">
 
897
<term><literal>G_UNICODE_BREAK_INSEPARABLE</literal></term>
 
898
<listitem><simpara>Inseparable (IN)
 
899
</simpara></listitem>
 
900
</varlistentry>
 
901
<varlistentry id="G-UNICODE-BREAK-NON-BREAKING-GLUE--CAPS" role="constant">
 
902
<term><literal>G_UNICODE_BREAK_NON_BREAKING_GLUE</literal></term>
 
903
<listitem><simpara>Non-breaking ("Glue") (GL)
 
904
</simpara></listitem>
 
905
</varlistentry>
 
906
<varlistentry id="G-UNICODE-BREAK-CONTINGENT--CAPS" role="constant">
 
907
<term><literal>G_UNICODE_BREAK_CONTINGENT</literal></term>
 
908
<listitem><simpara>Contingent Break Opportunity (CB)
 
909
</simpara></listitem>
 
910
</varlistentry>
 
911
<varlistentry id="G-UNICODE-BREAK-SPACE--CAPS" role="constant">
 
912
<term><literal>G_UNICODE_BREAK_SPACE</literal></term>
 
913
<listitem><simpara>Space (SP)
 
914
</simpara></listitem>
 
915
</varlistentry>
 
916
<varlistentry id="G-UNICODE-BREAK-AFTER--CAPS" role="constant">
 
917
<term><literal>G_UNICODE_BREAK_AFTER</literal></term>
 
918
<listitem><simpara>Break Opportunity After (BA)
 
919
</simpara></listitem>
 
920
</varlistentry>
 
921
<varlistentry id="G-UNICODE-BREAK-BEFORE--CAPS" role="constant">
 
922
<term><literal>G_UNICODE_BREAK_BEFORE</literal></term>
 
923
<listitem><simpara>Break Opportunity Before (BB)
 
924
</simpara></listitem>
 
925
</varlistentry>
 
926
<varlistentry id="G-UNICODE-BREAK-BEFORE-AND-AFTER--CAPS" role="constant">
 
927
<term><literal>G_UNICODE_BREAK_BEFORE_AND_AFTER</literal></term>
 
928
<listitem><simpara>Break Opportunity Before and After (B2)
 
929
</simpara></listitem>
 
930
</varlistentry>
 
931
<varlistentry id="G-UNICODE-BREAK-HYPHEN--CAPS" role="constant">
 
932
<term><literal>G_UNICODE_BREAK_HYPHEN</literal></term>
 
933
<listitem><simpara>Hyphen (HY)
 
934
</simpara></listitem>
 
935
</varlistentry>
 
936
<varlistentry id="G-UNICODE-BREAK-NON-STARTER--CAPS" role="constant">
 
937
<term><literal>G_UNICODE_BREAK_NON_STARTER</literal></term>
 
938
<listitem><simpara>Nonstarter (NS)
 
939
</simpara></listitem>
 
940
</varlistentry>
 
941
<varlistentry id="G-UNICODE-BREAK-OPEN-PUNCTUATION--CAPS" role="constant">
 
942
<term><literal>G_UNICODE_BREAK_OPEN_PUNCTUATION</literal></term>
 
943
<listitem><simpara>Opening Punctuation (OP)
 
944
</simpara></listitem>
 
945
</varlistentry>
 
946
<varlistentry id="G-UNICODE-BREAK-CLOSE-PUNCTUATION--CAPS" role="constant">
 
947
<term><literal>G_UNICODE_BREAK_CLOSE_PUNCTUATION</literal></term>
 
948
<listitem><simpara>Closing Punctuation (CL)
 
949
</simpara></listitem>
 
950
</varlistentry>
 
951
<varlistentry id="G-UNICODE-BREAK-QUOTATION--CAPS" role="constant">
 
952
<term><literal>G_UNICODE_BREAK_QUOTATION</literal></term>
 
953
<listitem><simpara>Ambiguous Quotation (QU)
 
954
</simpara></listitem>
 
955
</varlistentry>
 
956
<varlistentry id="G-UNICODE-BREAK-EXCLAMATION--CAPS" role="constant">
 
957
<term><literal>G_UNICODE_BREAK_EXCLAMATION</literal></term>
 
958
<listitem><simpara>Exclamation/Interrogation (EX)
 
959
</simpara></listitem>
 
960
</varlistentry>
 
961
<varlistentry id="G-UNICODE-BREAK-IDEOGRAPHIC--CAPS" role="constant">
 
962
<term><literal>G_UNICODE_BREAK_IDEOGRAPHIC</literal></term>
 
963
<listitem><simpara>Ideographic (ID)
 
964
</simpara></listitem>
 
965
</varlistentry>
 
966
<varlistentry id="G-UNICODE-BREAK-NUMERIC--CAPS" role="constant">
 
967
<term><literal>G_UNICODE_BREAK_NUMERIC</literal></term>
 
968
<listitem><simpara>Numeric (NU)
 
969
</simpara></listitem>
 
970
</varlistentry>
 
971
<varlistentry id="G-UNICODE-BREAK-INFIX-SEPARATOR--CAPS" role="constant">
 
972
<term><literal>G_UNICODE_BREAK_INFIX_SEPARATOR</literal></term>
 
973
<listitem><simpara>Infix Separator (Numeric) (IS)
 
974
</simpara></listitem>
 
975
</varlistentry>
 
976
<varlistentry id="G-UNICODE-BREAK-SYMBOL--CAPS" role="constant">
 
977
<term><literal>G_UNICODE_BREAK_SYMBOL</literal></term>
 
978
<listitem><simpara>Symbols Allowing Break After (SY)
 
979
</simpara></listitem>
 
980
</varlistentry>
 
981
<varlistentry id="G-UNICODE-BREAK-ALPHABETIC--CAPS" role="constant">
 
982
<term><literal>G_UNICODE_BREAK_ALPHABETIC</literal></term>
 
983
<listitem><simpara>Ordinary Alphabetic and Symbol Characters (AL)
 
984
</simpara></listitem>
 
985
</varlistentry>
 
986
<varlistentry id="G-UNICODE-BREAK-PREFIX--CAPS" role="constant">
 
987
<term><literal>G_UNICODE_BREAK_PREFIX</literal></term>
 
988
<listitem><simpara>Prefix (Numeric) (PR)
 
989
</simpara></listitem>
 
990
</varlistentry>
 
991
<varlistentry id="G-UNICODE-BREAK-POSTFIX--CAPS" role="constant">
 
992
<term><literal>G_UNICODE_BREAK_POSTFIX</literal></term>
 
993
<listitem><simpara>Postfix (Numeric) (PO)
 
994
</simpara></listitem>
 
995
</varlistentry>
 
996
<varlistentry id="G-UNICODE-BREAK-COMPLEX-CONTEXT--CAPS" role="constant">
 
997
<term><literal>G_UNICODE_BREAK_COMPLEX_CONTEXT</literal></term>
 
998
<listitem><simpara>Complex Content Dependent (South East Asian) (SA)
 
999
</simpara></listitem>
 
1000
</varlistentry>
 
1001
<varlistentry id="G-UNICODE-BREAK-AMBIGUOUS--CAPS" role="constant">
 
1002
<term><literal>G_UNICODE_BREAK_AMBIGUOUS</literal></term>
 
1003
<listitem><simpara>Ambiguous (Alphabetic or Ideographic) (AI)
 
1004
</simpara></listitem>
 
1005
</varlistentry>
 
1006
<varlistentry id="G-UNICODE-BREAK-UNKNOWN--CAPS" role="constant">
 
1007
<term><literal>G_UNICODE_BREAK_UNKNOWN</literal></term>
 
1008
<listitem><simpara>Unknown (XX)
 
1009
</simpara></listitem>
 
1010
</varlistentry>
 
1011
<varlistentry id="G-UNICODE-BREAK-NEXT-LINE--CAPS" role="constant">
 
1012
<term><literal>G_UNICODE_BREAK_NEXT_LINE</literal></term>
 
1013
<listitem><simpara>Next Line (NL)
 
1014
</simpara></listitem>
 
1015
</varlistentry>
 
1016
<varlistentry id="G-UNICODE-BREAK-WORD-JOINER--CAPS" role="constant">
 
1017
<term><literal>G_UNICODE_BREAK_WORD_JOINER</literal></term>
 
1018
<listitem><simpara>Word Joiner (WJ)
 
1019
</simpara></listitem>
 
1020
</varlistentry>
 
1021
<varlistentry id="G-UNICODE-BREAK-HANGUL-L-JAMO--CAPS" role="constant">
 
1022
<term><literal>G_UNICODE_BREAK_HANGUL_L_JAMO</literal></term>
 
1023
<listitem><simpara>Hangul L Jamo (JL)
 
1024
</simpara></listitem>
 
1025
</varlistentry>
 
1026
<varlistentry id="G-UNICODE-BREAK-HANGUL-V-JAMO--CAPS" role="constant">
 
1027
<term><literal>G_UNICODE_BREAK_HANGUL_V_JAMO</literal></term>
 
1028
<listitem><simpara>Hangul V Jamo (JV)
 
1029
</simpara></listitem>
 
1030
</varlistentry>
 
1031
<varlistentry id="G-UNICODE-BREAK-HANGUL-T-JAMO--CAPS" role="constant">
 
1032
<term><literal>G_UNICODE_BREAK_HANGUL_T_JAMO</literal></term>
 
1033
<listitem><simpara>Hangul T Jamo (JT)
 
1034
</simpara></listitem>
 
1035
</varlistentry>
 
1036
<varlistentry id="G-UNICODE-BREAK-HANGUL-LV-SYLLABLE--CAPS" role="constant">
 
1037
<term><literal>G_UNICODE_BREAK_HANGUL_LV_SYLLABLE</literal></term>
 
1038
<listitem><simpara>Hangul LV Syllable (H2)
 
1039
</simpara></listitem>
 
1040
</varlistentry>
 
1041
<varlistentry id="G-UNICODE-BREAK-HANGUL-LVT-SYLLABLE--CAPS" role="constant">
 
1042
<term><literal>G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE</literal></term>
 
1043
<listitem><simpara>Hangul LVT Syllable (H3)
 
1044
</simpara></listitem>
 
1045
</varlistentry>
 
1046
</variablelist></refsect2>
 
1047
<refsect2 id="g-unichar-break-type" role="function">
 
1048
<title>g_unichar_break_type ()</title>
 
1049
<indexterm zone="g-unichar-break-type"><primary sortas="unichar_break_type">g_unichar_break_type</primary></indexterm><programlisting><link linkend="GUnicodeBreakType">GUnicodeBreakType</link>   g_unichar_break_type                (<link linkend="gunichar">gunichar</link> c);</programlisting>
 
1050
<para>
 
1051
Determines the break type of <parameter>c</parameter>. <parameter>c</parameter> should be a Unicode character
 
1052
(to derive a character from UTF-8 encoded text, use
 
1053
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>). The break type is used to find word and line
 
1054
breaks ("text boundaries"), Pango implements the Unicode boundary
 
1055
resolution algorithms and normally you would use a function such
 
1056
as <link linkend="pango-break"><function>pango_break()</function></link> instead of caring about break types yourself.</para>
 
1057
<para>
 
1058
</para><variablelist role="params">
 
1059
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
1060
<listitem><simpara> a Unicode character
 
1061
</simpara></listitem></varlistentry>
 
1062
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the break type of <parameter>c</parameter>
 
1063
</simpara></listitem></varlistentry>
 
1064
</variablelist></refsect2>
 
1065
<refsect2 id="g-unichar-combining-class" role="function" condition="since:2.14">
 
1066
<title>g_unichar_combining_class ()</title>
 
1067
<indexterm zone="g-unichar-combining-class" role="2.14"><primary sortas="unichar_combining_class">g_unichar_combining_class</primary></indexterm><programlisting><link linkend="gint">gint</link>                g_unichar_combining_class           (<link linkend="gunichar">gunichar</link> uc);</programlisting>
 
1068
<para>
 
1069
Determines the canonical combining class of a Unicode character.</para>
 
1070
<para>
 
1071
</para><variablelist role="params">
 
1072
<varlistentry><term><parameter>uc</parameter>&#160;:</term>
 
1073
<listitem><simpara> a Unicode character
 
1074
</simpara></listitem></varlistentry>
 
1075
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the combining class of the character
 
1076
 
 
1077
</simpara></listitem></varlistentry>
 
1078
</variablelist><para role="since">Since 2.14</para></refsect2>
 
1079
<refsect2 id="g-unicode-canonical-ordering" role="function">
 
1080
<title>g_unicode_canonical_ordering ()</title>
 
1081
<indexterm zone="g-unicode-canonical-ordering"><primary sortas="unicode_canonical_ordering">g_unicode_canonical_ordering</primary></indexterm><programlisting><link linkend="void">void</link>                g_unicode_canonical_ordering        (<link linkend="gunichar">gunichar</link> *string,
 
1082
                                                         <link linkend="gsize">gsize</link> len);</programlisting>
 
1083
<para>
 
1084
Computes the canonical ordering of a string in-place.  
 
1085
This rearranges decomposed characters in the string 
 
1086
according to their combining classes.  See the Unicode 
 
1087
manual for more information.</para>
 
1088
<para>
 
1089
</para><variablelist role="params">
 
1090
<varlistentry><term><parameter>string</parameter>&#160;:</term>
 
1091
<listitem><simpara> a UCS-4 encoded string.
 
1092
</simpara></listitem></varlistentry>
 
1093
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
1094
<listitem><simpara> the maximum length of <parameter>string</parameter> to use.
 
1095
</simpara></listitem></varlistentry>
 
1096
</variablelist></refsect2>
 
1097
<refsect2 id="g-unicode-canonical-decomposition" role="function">
 
1098
<title>g_unicode_canonical_decomposition ()</title>
 
1099
<indexterm zone="g-unicode-canonical-decomposition"><primary sortas="unicode_canonical_decomposition">g_unicode_canonical_decomposition</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> *          g_unicode_canonical_decomposition   (<link linkend="gunichar">gunichar</link> ch,
 
1100
                                                         <link linkend="gsize">gsize</link> *result_len);</programlisting>
 
1101
<para>
 
1102
Computes the canonical decomposition of a Unicode character.</para>
 
1103
<para>
 
1104
</para><variablelist role="params">
 
1105
<varlistentry><term><parameter>ch</parameter>&#160;:</term>
 
1106
<listitem><simpara> a Unicode character.
 
1107
</simpara></listitem></varlistentry>
 
1108
<varlistentry><term><parameter>result_len</parameter>&#160;:</term>
 
1109
<listitem><simpara> location to store the length of the return value.
 
1110
</simpara></listitem></varlistentry>
 
1111
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string of Unicode characters.
 
1112
  <parameter>result_len</parameter> is set to the resulting length of the string.
 
1113
</simpara></listitem></varlistentry>
 
1114
</variablelist></refsect2>
 
1115
<refsect2 id="g-unichar-get-mirror-char" role="function" condition="since:2.4">
 
1116
<title>g_unichar_get_mirror_char ()</title>
 
1117
<indexterm zone="g-unichar-get-mirror-char" role="2.4"><primary sortas="unichar_get_mirror_char">g_unichar_get_mirror_char</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_unichar_get_mirror_char           (<link linkend="gunichar">gunichar</link> ch,
 
1118
                                                         <link linkend="gunichar">gunichar</link> *mirrored_ch);</programlisting>
 
1119
<para>
 
1120
In Unicode, some characters are <firstterm>mirrored</firstterm>. This
 
1121
means that their images are mirrored horizontally in text that is laid
 
1122
out from right to left. For instance, "(" would become its mirror image,
 
1123
")", in right-to-left text.
 
1124
</para>
 
1125
<para>
 
1126
If <parameter>ch</parameter> has the Unicode mirrored property and there is another unicode
 
1127
character that typically has a glyph that is the mirror image of <parameter>ch</parameter>'s
 
1128
glyph and <parameter>mirrored_ch</parameter> is set, it puts that character in the address
 
1129
pointed to by <parameter>mirrored_ch</parameter>.  Otherwise the original character is put.</para>
 
1130
<para>
 
1131
</para><variablelist role="params">
 
1132
<varlistentry><term><parameter>ch</parameter>&#160;:</term>
 
1133
<listitem><simpara> a Unicode character
 
1134
</simpara></listitem></varlistentry>
 
1135
<varlistentry><term><parameter>mirrored_ch</parameter>&#160;:</term>
 
1136
<listitem><simpara> location to store the mirrored character
 
1137
</simpara></listitem></varlistentry>
 
1138
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>ch</parameter> has a mirrored character, <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> otherwise
 
1139
 
 
1140
</simpara></listitem></varlistentry>
 
1141
</variablelist><para role="since">Since 2.4</para></refsect2>
 
1142
<refsect2 id="GUnicodeScript" role="enum">
 
1143
<title>enum GUnicodeScript</title>
 
1144
<indexterm zone="GUnicodeScript"><primary sortas="UnicodeScript">GUnicodeScript</primary></indexterm><programlisting>typedef enum 
 
1145
{                         /* ISO 15924 code */
 
1146
  G_UNICODE_SCRIPT_INVALID_CODE = -1,
 
1147
  G_UNICODE_SCRIPT_COMMON       = 0,   /* Zyyy */
 
1148
  G_UNICODE_SCRIPT_INHERITED,          /* Qaai */
 
1149
  G_UNICODE_SCRIPT_ARABIC,             /* Arab */
 
1150
  G_UNICODE_SCRIPT_ARMENIAN,           /* Armn */
 
1151
  G_UNICODE_SCRIPT_BENGALI,            /* Beng */
 
1152
  G_UNICODE_SCRIPT_BOPOMOFO,           /* Bopo */
 
1153
  G_UNICODE_SCRIPT_CHEROKEE,           /* Cher */
 
1154
  G_UNICODE_SCRIPT_COPTIC,             /* Qaac */
 
1155
  G_UNICODE_SCRIPT_CYRILLIC,           /* Cyrl (Cyrs) */
 
1156
  G_UNICODE_SCRIPT_DESERET,            /* Dsrt */
 
1157
  G_UNICODE_SCRIPT_DEVANAGARI,         /* Deva */
 
1158
  G_UNICODE_SCRIPT_ETHIOPIC,           /* Ethi */
 
1159
  G_UNICODE_SCRIPT_GEORGIAN,           /* Geor (Geon, Geoa) */
 
1160
  G_UNICODE_SCRIPT_GOTHIC,             /* Goth */
 
1161
  G_UNICODE_SCRIPT_GREEK,              /* Grek */
 
1162
  G_UNICODE_SCRIPT_GUJARATI,           /* Gujr */
 
1163
  G_UNICODE_SCRIPT_GURMUKHI,           /* Guru */
 
1164
  G_UNICODE_SCRIPT_HAN,                /* Hani */
 
1165
  G_UNICODE_SCRIPT_HANGUL,             /* Hang */
 
1166
  G_UNICODE_SCRIPT_HEBREW,             /* Hebr */
 
1167
  G_UNICODE_SCRIPT_HIRAGANA,           /* Hira */
 
1168
  G_UNICODE_SCRIPT_KANNADA,            /* Knda */
 
1169
  G_UNICODE_SCRIPT_KATAKANA,           /* Kana */
 
1170
  G_UNICODE_SCRIPT_KHMER,              /* Khmr */
 
1171
  G_UNICODE_SCRIPT_LAO,                /* Laoo */
 
1172
  G_UNICODE_SCRIPT_LATIN,              /* Latn (Latf, Latg) */
 
1173
  G_UNICODE_SCRIPT_MALAYALAM,          /* Mlym */
 
1174
  G_UNICODE_SCRIPT_MONGOLIAN,          /* Mong */
 
1175
  G_UNICODE_SCRIPT_MYANMAR,            /* Mymr */
 
1176
  G_UNICODE_SCRIPT_OGHAM,              /* Ogam */
 
1177
  G_UNICODE_SCRIPT_OLD_ITALIC,         /* Ital */
 
1178
  G_UNICODE_SCRIPT_ORIYA,              /* Orya */
 
1179
  G_UNICODE_SCRIPT_RUNIC,              /* Runr */
 
1180
  G_UNICODE_SCRIPT_SINHALA,            /* Sinh */
 
1181
  G_UNICODE_SCRIPT_SYRIAC,             /* Syrc (Syrj, Syrn, Syre) */
 
1182
  G_UNICODE_SCRIPT_TAMIL,              /* Taml */
 
1183
  G_UNICODE_SCRIPT_TELUGU,             /* Telu */
 
1184
  G_UNICODE_SCRIPT_THAANA,             /* Thaa */
 
1185
  G_UNICODE_SCRIPT_THAI,               /* Thai */
 
1186
  G_UNICODE_SCRIPT_TIBETAN,            /* Tibt */
 
1187
  G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */
 
1188
  G_UNICODE_SCRIPT_YI,                 /* Yiii */
 
1189
  G_UNICODE_SCRIPT_TAGALOG,            /* Tglg */
 
1190
  G_UNICODE_SCRIPT_HANUNOO,            /* Hano */
 
1191
  G_UNICODE_SCRIPT_BUHID,              /* Buhd */
 
1192
  G_UNICODE_SCRIPT_TAGBANWA,           /* Tagb */
 
1193
 
 
1194
  /* Unicode-4.0 additions */
 
1195
  G_UNICODE_SCRIPT_BRAILLE,            /* Brai */
 
1196
  G_UNICODE_SCRIPT_CYPRIOT,            /* Cprt */
 
1197
  G_UNICODE_SCRIPT_LIMBU,              /* Limb */
 
1198
  G_UNICODE_SCRIPT_OSMANYA,            /* Osma */
 
1199
  G_UNICODE_SCRIPT_SHAVIAN,            /* Shaw */
 
1200
  G_UNICODE_SCRIPT_LINEAR_B,           /* Linb */
 
1201
  G_UNICODE_SCRIPT_TAI_LE,             /* Tale */
 
1202
  G_UNICODE_SCRIPT_UGARITIC,           /* Ugar */
 
1203
      
 
1204
  /* Unicode-4.1 additions */
 
1205
  G_UNICODE_SCRIPT_NEW_TAI_LUE,        /* Talu */
 
1206
  G_UNICODE_SCRIPT_BUGINESE,           /* Bugi */
 
1207
  G_UNICODE_SCRIPT_GLAGOLITIC,         /* Glag */
 
1208
  G_UNICODE_SCRIPT_TIFINAGH,           /* Tfng */
 
1209
  G_UNICODE_SCRIPT_SYLOTI_NAGRI,       /* Sylo */
 
1210
  G_UNICODE_SCRIPT_OLD_PERSIAN,        /* Xpeo */
 
1211
  G_UNICODE_SCRIPT_KHAROSHTHI,         /* Khar */
 
1212
 
 
1213
  /* Unicode-5.0 additions */
 
1214
  G_UNICODE_SCRIPT_UNKNOWN,            /* Zzzz */
 
1215
  G_UNICODE_SCRIPT_BALINESE,           /* Bali */
 
1216
  G_UNICODE_SCRIPT_CUNEIFORM,          /* Xsux */
 
1217
  G_UNICODE_SCRIPT_PHOENICIAN,         /* Phnx */
 
1218
  G_UNICODE_SCRIPT_PHAGS_PA,           /* Phag */
 
1219
  G_UNICODE_SCRIPT_NKO,                /* Nkoo */
 
1220
 
 
1221
  /* Unicode-5.1 additions */
 
1222
  G_UNICODE_SCRIPT_KAYAH_LI,           /* Kali */
 
1223
  G_UNICODE_SCRIPT_LEPCHA,             /* Lepc */
 
1224
  G_UNICODE_SCRIPT_REJANG,             /* Rjng */
 
1225
  G_UNICODE_SCRIPT_SUNDANESE,          /* Sund */
 
1226
  G_UNICODE_SCRIPT_SAURASHTRA,         /* Saur */
 
1227
  G_UNICODE_SCRIPT_CHAM,               /* Cham */
 
1228
  G_UNICODE_SCRIPT_OL_CHIKI,           /* Olck */
 
1229
  G_UNICODE_SCRIPT_VAI,                /* Vaii */
 
1230
  G_UNICODE_SCRIPT_CARIAN,             /* Cari */
 
1231
  G_UNICODE_SCRIPT_LYCIAN,             /* Lyci */
 
1232
  G_UNICODE_SCRIPT_LYDIAN              /* Lydi */
 
1233
} GUnicodeScript;
 
1234
</programlisting>
 
1235
<para>
 
1236
The <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> enumeration identifies different writing
 
1237
systems. The values correspond to the names as defined in the
 
1238
Unicode standard. The enumeration has been added in GLib 2.14,
 
1239
and is interchangeable with <link linkend="PangoScript"><type>PangoScript</type></link>.
 
1240
Note that new types may be added in the future. Applications
 
1241
should be ready to handle unknown values.
 
1242
See <ulink
 
1243
url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex
 
1244
#24: Script names</ulink>.
 
1245
</para><variablelist role="enum">
 
1246
<varlistentry id="G-UNICODE-SCRIPT-INVALID-CODE--CAPS" role="constant">
 
1247
<term><literal>G_UNICODE_SCRIPT_INVALID_CODE</literal></term>
 
1248
<listitem><simpara>a value never returned from <link linkend="g-unichar-get-script"><function>g_unichar_get_script()</function></link>
 
1249
</simpara></listitem>
 
1250
</varlistentry>
 
1251
<varlistentry id="G-UNICODE-SCRIPT-COMMON--CAPS" role="constant">
 
1252
<term><literal>G_UNICODE_SCRIPT_COMMON</literal></term>
 
1253
<listitem><simpara>    a character used by multiple different scripts
 
1254
</simpara></listitem>
 
1255
</varlistentry>
 
1256
<varlistentry id="G-UNICODE-SCRIPT-INHERITED--CAPS" role="constant">
 
1257
<term><literal>G_UNICODE_SCRIPT_INHERITED</literal></term>
 
1258
<listitem><simpara> a mark glyph that takes its script from the
 
1259
                              base glyph to which it is attached
 
1260
</simpara></listitem>
 
1261
</varlistentry>
 
1262
<varlistentry id="G-UNICODE-SCRIPT-ARABIC--CAPS" role="constant">
 
1263
<term><literal>G_UNICODE_SCRIPT_ARABIC</literal></term>
 
1264
<listitem><simpara>    Arabic
 
1265
</simpara></listitem>
 
1266
</varlistentry>
 
1267
<varlistentry id="G-UNICODE-SCRIPT-ARMENIAN--CAPS" role="constant">
 
1268
<term><literal>G_UNICODE_SCRIPT_ARMENIAN</literal></term>
 
1269
<listitem><simpara>  Armenian
 
1270
</simpara></listitem>
 
1271
</varlistentry>
 
1272
<varlistentry id="G-UNICODE-SCRIPT-BENGALI--CAPS" role="constant">
 
1273
<term><literal>G_UNICODE_SCRIPT_BENGALI</literal></term>
 
1274
<listitem><simpara>   Bengali
 
1275
</simpara></listitem>
 
1276
</varlistentry>
 
1277
<varlistentry id="G-UNICODE-SCRIPT-BOPOMOFO--CAPS" role="constant">
 
1278
<term><literal>G_UNICODE_SCRIPT_BOPOMOFO</literal></term>
 
1279
<listitem><simpara>  Bopomofo
 
1280
</simpara></listitem>
 
1281
</varlistentry>
 
1282
<varlistentry id="G-UNICODE-SCRIPT-CHEROKEE--CAPS" role="constant">
 
1283
<term><literal>G_UNICODE_SCRIPT_CHEROKEE</literal></term>
 
1284
<listitem><simpara>  Cherokee
 
1285
</simpara></listitem>
 
1286
</varlistentry>
 
1287
<varlistentry id="G-UNICODE-SCRIPT-COPTIC--CAPS" role="constant">
 
1288
<term><literal>G_UNICODE_SCRIPT_COPTIC</literal></term>
 
1289
<listitem><simpara>    Coptic
 
1290
</simpara></listitem>
 
1291
</varlistentry>
 
1292
<varlistentry id="G-UNICODE-SCRIPT-CYRILLIC--CAPS" role="constant">
 
1293
<term><literal>G_UNICODE_SCRIPT_CYRILLIC</literal></term>
 
1294
<listitem><simpara>  Cyrillic
 
1295
</simpara></listitem>
 
1296
</varlistentry>
 
1297
<varlistentry id="G-UNICODE-SCRIPT-DESERET--CAPS" role="constant">
 
1298
<term><literal>G_UNICODE_SCRIPT_DESERET</literal></term>
 
1299
<listitem><simpara>   Deseret
 
1300
</simpara></listitem>
 
1301
</varlistentry>
 
1302
<varlistentry id="G-UNICODE-SCRIPT-DEVANAGARI--CAPS" role="constant">
 
1303
<term><literal>G_UNICODE_SCRIPT_DEVANAGARI</literal></term>
 
1304
<listitem><simpara>Devanagari
 
1305
</simpara></listitem>
 
1306
</varlistentry>
 
1307
<varlistentry id="G-UNICODE-SCRIPT-ETHIOPIC--CAPS" role="constant">
 
1308
<term><literal>G_UNICODE_SCRIPT_ETHIOPIC</literal></term>
 
1309
<listitem><simpara>  Ethiopic
 
1310
</simpara></listitem>
 
1311
</varlistentry>
 
1312
<varlistentry id="G-UNICODE-SCRIPT-GEORGIAN--CAPS" role="constant">
 
1313
<term><literal>G_UNICODE_SCRIPT_GEORGIAN</literal></term>
 
1314
<listitem><simpara>  Georgian
 
1315
</simpara></listitem>
 
1316
</varlistentry>
 
1317
<varlistentry id="G-UNICODE-SCRIPT-GOTHIC--CAPS" role="constant">
 
1318
<term><literal>G_UNICODE_SCRIPT_GOTHIC</literal></term>
 
1319
<listitem><simpara>    Gothic
 
1320
</simpara></listitem>
 
1321
</varlistentry>
 
1322
<varlistentry id="G-UNICODE-SCRIPT-GREEK--CAPS" role="constant">
 
1323
<term><literal>G_UNICODE_SCRIPT_GREEK</literal></term>
 
1324
<listitem><simpara>     Greek
 
1325
</simpara></listitem>
 
1326
</varlistentry>
 
1327
<varlistentry id="G-UNICODE-SCRIPT-GUJARATI--CAPS" role="constant">
 
1328
<term><literal>G_UNICODE_SCRIPT_GUJARATI</literal></term>
 
1329
<listitem><simpara>  Gujarati
 
1330
</simpara></listitem>
 
1331
</varlistentry>
 
1332
<varlistentry id="G-UNICODE-SCRIPT-GURMUKHI--CAPS" role="constant">
 
1333
<term><literal>G_UNICODE_SCRIPT_GURMUKHI</literal></term>
 
1334
<listitem><simpara>  Gurmukhi
 
1335
</simpara></listitem>
 
1336
</varlistentry>
 
1337
<varlistentry id="G-UNICODE-SCRIPT-HAN--CAPS" role="constant">
 
1338
<term><literal>G_UNICODE_SCRIPT_HAN</literal></term>
 
1339
<listitem><simpara>       Han
 
1340
</simpara></listitem>
 
1341
</varlistentry>
 
1342
<varlistentry id="G-UNICODE-SCRIPT-HANGUL--CAPS" role="constant">
 
1343
<term><literal>G_UNICODE_SCRIPT_HANGUL</literal></term>
 
1344
<listitem><simpara>    Hangul
 
1345
</simpara></listitem>
 
1346
</varlistentry>
 
1347
<varlistentry id="G-UNICODE-SCRIPT-HEBREW--CAPS" role="constant">
 
1348
<term><literal>G_UNICODE_SCRIPT_HEBREW</literal></term>
 
1349
<listitem><simpara>    Hebrew
 
1350
</simpara></listitem>
 
1351
</varlistentry>
 
1352
<varlistentry id="G-UNICODE-SCRIPT-HIRAGANA--CAPS" role="constant">
 
1353
<term><literal>G_UNICODE_SCRIPT_HIRAGANA</literal></term>
 
1354
<listitem><simpara>  Hiragana
 
1355
</simpara></listitem>
 
1356
</varlistentry>
 
1357
<varlistentry id="G-UNICODE-SCRIPT-KANNADA--CAPS" role="constant">
 
1358
<term><literal>G_UNICODE_SCRIPT_KANNADA</literal></term>
 
1359
<listitem><simpara>   Kannada
 
1360
</simpara></listitem>
 
1361
</varlistentry>
 
1362
<varlistentry id="G-UNICODE-SCRIPT-KATAKANA--CAPS" role="constant">
 
1363
<term><literal>G_UNICODE_SCRIPT_KATAKANA</literal></term>
 
1364
<listitem><simpara>  Katakana
 
1365
</simpara></listitem>
 
1366
</varlistentry>
 
1367
<varlistentry id="G-UNICODE-SCRIPT-KHMER--CAPS" role="constant">
 
1368
<term><literal>G_UNICODE_SCRIPT_KHMER</literal></term>
 
1369
<listitem><simpara>     Khmer
 
1370
</simpara></listitem>
 
1371
</varlistentry>
 
1372
<varlistentry id="G-UNICODE-SCRIPT-LAO--CAPS" role="constant">
 
1373
<term><literal>G_UNICODE_SCRIPT_LAO</literal></term>
 
1374
<listitem><simpara>       Lao
 
1375
</simpara></listitem>
 
1376
</varlistentry>
 
1377
<varlistentry id="G-UNICODE-SCRIPT-LATIN--CAPS" role="constant">
 
1378
<term><literal>G_UNICODE_SCRIPT_LATIN</literal></term>
 
1379
<listitem><simpara>     Latin
 
1380
</simpara></listitem>
 
1381
</varlistentry>
 
1382
<varlistentry id="G-UNICODE-SCRIPT-MALAYALAM--CAPS" role="constant">
 
1383
<term><literal>G_UNICODE_SCRIPT_MALAYALAM</literal></term>
 
1384
<listitem><simpara> Malayalam
 
1385
</simpara></listitem>
 
1386
</varlistentry>
 
1387
<varlistentry id="G-UNICODE-SCRIPT-MONGOLIAN--CAPS" role="constant">
 
1388
<term><literal>G_UNICODE_SCRIPT_MONGOLIAN</literal></term>
 
1389
<listitem><simpara> Mongolian
 
1390
</simpara></listitem>
 
1391
</varlistentry>
 
1392
<varlistentry id="G-UNICODE-SCRIPT-MYANMAR--CAPS" role="constant">
 
1393
<term><literal>G_UNICODE_SCRIPT_MYANMAR</literal></term>
 
1394
<listitem><simpara>   Myanmar
 
1395
</simpara></listitem>
 
1396
</varlistentry>
 
1397
<varlistentry id="G-UNICODE-SCRIPT-OGHAM--CAPS" role="constant">
 
1398
<term><literal>G_UNICODE_SCRIPT_OGHAM</literal></term>
 
1399
<listitem><simpara>     Ogham
 
1400
</simpara></listitem>
 
1401
</varlistentry>
 
1402
<varlistentry id="G-UNICODE-SCRIPT-OLD-ITALIC--CAPS" role="constant">
 
1403
<term><literal>G_UNICODE_SCRIPT_OLD_ITALIC</literal></term>
 
1404
<listitem><simpara>Old Italic
 
1405
</simpara></listitem>
 
1406
</varlistentry>
 
1407
<varlistentry id="G-UNICODE-SCRIPT-ORIYA--CAPS" role="constant">
 
1408
<term><literal>G_UNICODE_SCRIPT_ORIYA</literal></term>
 
1409
<listitem><simpara>     Oriya
 
1410
</simpara></listitem>
 
1411
</varlistentry>
 
1412
<varlistentry id="G-UNICODE-SCRIPT-RUNIC--CAPS" role="constant">
 
1413
<term><literal>G_UNICODE_SCRIPT_RUNIC</literal></term>
 
1414
<listitem><simpara>     Runic
 
1415
</simpara></listitem>
 
1416
</varlistentry>
 
1417
<varlistentry id="G-UNICODE-SCRIPT-SINHALA--CAPS" role="constant">
 
1418
<term><literal>G_UNICODE_SCRIPT_SINHALA</literal></term>
 
1419
<listitem><simpara>   Sinhala
 
1420
</simpara></listitem>
 
1421
</varlistentry>
 
1422
<varlistentry id="G-UNICODE-SCRIPT-SYRIAC--CAPS" role="constant">
 
1423
<term><literal>G_UNICODE_SCRIPT_SYRIAC</literal></term>
 
1424
<listitem><simpara>    Syriac
 
1425
</simpara></listitem>
 
1426
</varlistentry>
 
1427
<varlistentry id="G-UNICODE-SCRIPT-TAMIL--CAPS" role="constant">
 
1428
<term><literal>G_UNICODE_SCRIPT_TAMIL</literal></term>
 
1429
<listitem><simpara>     Tamil
 
1430
</simpara></listitem>
 
1431
</varlistentry>
 
1432
<varlistentry id="G-UNICODE-SCRIPT-TELUGU--CAPS" role="constant">
 
1433
<term><literal>G_UNICODE_SCRIPT_TELUGU</literal></term>
 
1434
<listitem><simpara>    Telugu
 
1435
</simpara></listitem>
 
1436
</varlistentry>
 
1437
<varlistentry id="G-UNICODE-SCRIPT-THAANA--CAPS" role="constant">
 
1438
<term><literal>G_UNICODE_SCRIPT_THAANA</literal></term>
 
1439
<listitem><simpara>    Thaana
 
1440
</simpara></listitem>
 
1441
</varlistentry>
 
1442
<varlistentry id="G-UNICODE-SCRIPT-THAI--CAPS" role="constant">
 
1443
<term><literal>G_UNICODE_SCRIPT_THAI</literal></term>
 
1444
<listitem><simpara>      Thai
 
1445
</simpara></listitem>
 
1446
</varlistentry>
 
1447
<varlistentry id="G-UNICODE-SCRIPT-TIBETAN--CAPS" role="constant">
 
1448
<term><literal>G_UNICODE_SCRIPT_TIBETAN</literal></term>
 
1449
<listitem><simpara>   Tibetan
 
1450
</simpara></listitem>
 
1451
</varlistentry>
 
1452
<varlistentry id="G-UNICODE-SCRIPT-CANADIAN-ABORIGINAL--CAPS" role="constant">
 
1453
<term><literal>G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL</literal></term>
 
1454
<listitem><simpara>
 
1455
                              Canadian Aboriginal
 
1456
</simpara></listitem>
 
1457
</varlistentry>
 
1458
<varlistentry id="G-UNICODE-SCRIPT-YI--CAPS" role="constant">
 
1459
<term><literal>G_UNICODE_SCRIPT_YI</literal></term>
 
1460
<listitem><simpara>        Yi
 
1461
</simpara></listitem>
 
1462
</varlistentry>
 
1463
<varlistentry id="G-UNICODE-SCRIPT-TAGALOG--CAPS" role="constant">
 
1464
<term><literal>G_UNICODE_SCRIPT_TAGALOG</literal></term>
 
1465
<listitem><simpara>   Tagalog
 
1466
</simpara></listitem>
 
1467
</varlistentry>
 
1468
<varlistentry id="G-UNICODE-SCRIPT-HANUNOO--CAPS" role="constant">
 
1469
<term><literal>G_UNICODE_SCRIPT_HANUNOO</literal></term>
 
1470
<listitem><simpara>   Hanunoo
 
1471
</simpara></listitem>
 
1472
</varlistentry>
 
1473
<varlistentry id="G-UNICODE-SCRIPT-BUHID--CAPS" role="constant">
 
1474
<term><literal>G_UNICODE_SCRIPT_BUHID</literal></term>
 
1475
<listitem><simpara>     Buhid
 
1476
</simpara></listitem>
 
1477
</varlistentry>
 
1478
<varlistentry id="G-UNICODE-SCRIPT-TAGBANWA--CAPS" role="constant">
 
1479
<term><literal>G_UNICODE_SCRIPT_TAGBANWA</literal></term>
 
1480
<listitem><simpara>  Tagbanwa
 
1481
</simpara></listitem>
 
1482
</varlistentry>
 
1483
<varlistentry id="G-UNICODE-SCRIPT-BRAILLE--CAPS" role="constant">
 
1484
<term><literal>G_UNICODE_SCRIPT_BRAILLE</literal></term>
 
1485
<listitem><simpara>   Braille
 
1486
</simpara></listitem>
 
1487
</varlistentry>
 
1488
<varlistentry id="G-UNICODE-SCRIPT-CYPRIOT--CAPS" role="constant">
 
1489
<term><literal>G_UNICODE_SCRIPT_CYPRIOT</literal></term>
 
1490
<listitem><simpara>   Cypriot
 
1491
</simpara></listitem>
 
1492
</varlistentry>
 
1493
<varlistentry id="G-UNICODE-SCRIPT-LIMBU--CAPS" role="constant">
 
1494
<term><literal>G_UNICODE_SCRIPT_LIMBU</literal></term>
 
1495
<listitem><simpara>     Limbu
 
1496
</simpara></listitem>
 
1497
</varlistentry>
 
1498
<varlistentry id="G-UNICODE-SCRIPT-OSMANYA--CAPS" role="constant">
 
1499
<term><literal>G_UNICODE_SCRIPT_OSMANYA</literal></term>
 
1500
<listitem><simpara>   Osmanya
 
1501
</simpara></listitem>
 
1502
</varlistentry>
 
1503
<varlistentry id="G-UNICODE-SCRIPT-SHAVIAN--CAPS" role="constant">
 
1504
<term><literal>G_UNICODE_SCRIPT_SHAVIAN</literal></term>
 
1505
<listitem><simpara>   Shavian
 
1506
</simpara></listitem>
 
1507
</varlistentry>
 
1508
<varlistentry id="G-UNICODE-SCRIPT-LINEAR-B--CAPS" role="constant">
 
1509
<term><literal>G_UNICODE_SCRIPT_LINEAR_B</literal></term>
 
1510
<listitem><simpara>  Linear B
 
1511
</simpara></listitem>
 
1512
</varlistentry>
 
1513
<varlistentry id="G-UNICODE-SCRIPT-TAI-LE--CAPS" role="constant">
 
1514
<term><literal>G_UNICODE_SCRIPT_TAI_LE</literal></term>
 
1515
<listitem><simpara>    Tai Le
 
1516
</simpara></listitem>
 
1517
</varlistentry>
 
1518
<varlistentry id="G-UNICODE-SCRIPT-UGARITIC--CAPS" role="constant">
 
1519
<term><literal>G_UNICODE_SCRIPT_UGARITIC</literal></term>
 
1520
<listitem><simpara>  Ugaritic
 
1521
</simpara></listitem>
 
1522
</varlistentry>
 
1523
<varlistentry id="G-UNICODE-SCRIPT-NEW-TAI-LUE--CAPS" role="constant">
 
1524
<term><literal>G_UNICODE_SCRIPT_NEW_TAI_LUE</literal></term>
 
1525
<listitem><simpara>New Tai Lue
 
1526
</simpara></listitem>
 
1527
</varlistentry>
 
1528
<varlistentry id="G-UNICODE-SCRIPT-BUGINESE--CAPS" role="constant">
 
1529
<term><literal>G_UNICODE_SCRIPT_BUGINESE</literal></term>
 
1530
<listitem><simpara>  Buginese
 
1531
</simpara></listitem>
 
1532
</varlistentry>
 
1533
<varlistentry id="G-UNICODE-SCRIPT-GLAGOLITIC--CAPS" role="constant">
 
1534
<term><literal>G_UNICODE_SCRIPT_GLAGOLITIC</literal></term>
 
1535
<listitem><simpara>Glagolitic
 
1536
</simpara></listitem>
 
1537
</varlistentry>
 
1538
<varlistentry id="G-UNICODE-SCRIPT-TIFINAGH--CAPS" role="constant">
 
1539
<term><literal>G_UNICODE_SCRIPT_TIFINAGH</literal></term>
 
1540
<listitem><simpara>  Tifinagh
 
1541
</simpara></listitem>
 
1542
</varlistentry>
 
1543
<varlistentry id="G-UNICODE-SCRIPT-SYLOTI-NAGRI--CAPS" role="constant">
 
1544
<term><literal>G_UNICODE_SCRIPT_SYLOTI_NAGRI</literal></term>
 
1545
<listitem><simpara>Syloti Nagri
 
1546
</simpara></listitem>
 
1547
</varlistentry>
 
1548
<varlistentry id="G-UNICODE-SCRIPT-OLD-PERSIAN--CAPS" role="constant">
 
1549
<term><literal>G_UNICODE_SCRIPT_OLD_PERSIAN</literal></term>
 
1550
<listitem><simpara>Old Persian
 
1551
</simpara></listitem>
 
1552
</varlistentry>
 
1553
<varlistentry id="G-UNICODE-SCRIPT-KHAROSHTHI--CAPS" role="constant">
 
1554
<term><literal>G_UNICODE_SCRIPT_KHAROSHTHI</literal></term>
 
1555
<listitem><simpara>Kharoshthi
 
1556
</simpara></listitem>
 
1557
</varlistentry>
 
1558
<varlistentry id="G-UNICODE-SCRIPT-UNKNOWN--CAPS" role="constant">
 
1559
<term><literal>G_UNICODE_SCRIPT_UNKNOWN</literal></term>
 
1560
<listitem><simpara>   an unassigned code point
 
1561
</simpara></listitem>
 
1562
</varlistentry>
 
1563
<varlistentry id="G-UNICODE-SCRIPT-BALINESE--CAPS" role="constant">
 
1564
<term><literal>G_UNICODE_SCRIPT_BALINESE</literal></term>
 
1565
<listitem><simpara>  Balinese
 
1566
</simpara></listitem>
 
1567
</varlistentry>
 
1568
<varlistentry id="G-UNICODE-SCRIPT-CUNEIFORM--CAPS" role="constant">
 
1569
<term><literal>G_UNICODE_SCRIPT_CUNEIFORM</literal></term>
 
1570
<listitem><simpara> Cuneiform
 
1571
</simpara></listitem>
 
1572
</varlistentry>
 
1573
<varlistentry id="G-UNICODE-SCRIPT-PHOENICIAN--CAPS" role="constant">
 
1574
<term><literal>G_UNICODE_SCRIPT_PHOENICIAN</literal></term>
 
1575
<listitem><simpara>Phoenician
 
1576
</simpara></listitem>
 
1577
</varlistentry>
 
1578
<varlistentry id="G-UNICODE-SCRIPT-PHAGS-PA--CAPS" role="constant">
 
1579
<term><literal>G_UNICODE_SCRIPT_PHAGS_PA</literal></term>
 
1580
<listitem><simpara>  Phags-pa
 
1581
</simpara></listitem>
 
1582
</varlistentry>
 
1583
<varlistentry id="G-UNICODE-SCRIPT-NKO--CAPS" role="constant">
 
1584
<term><literal>G_UNICODE_SCRIPT_NKO</literal></term>
 
1585
<listitem><simpara>       N'Ko
 
1586
</simpara></listitem>
 
1587
</varlistentry>
 
1588
<varlistentry id="G-UNICODE-SCRIPT-KAYAH-LI--CAPS" role="constant">
 
1589
<term><literal>G_UNICODE_SCRIPT_KAYAH_LI</literal></term>
 
1590
<listitem><simpara>  Kayah Li. Since 2.16.3
 
1591
</simpara></listitem>
 
1592
</varlistentry>
 
1593
<varlistentry id="G-UNICODE-SCRIPT-LEPCHA--CAPS" role="constant">
 
1594
<term><literal>G_UNICODE_SCRIPT_LEPCHA</literal></term>
 
1595
<listitem><simpara>    Lepcha. Since 2.16.3
 
1596
</simpara></listitem>
 
1597
</varlistentry>
 
1598
<varlistentry id="G-UNICODE-SCRIPT-REJANG--CAPS" role="constant">
 
1599
<term><literal>G_UNICODE_SCRIPT_REJANG</literal></term>
 
1600
<listitem><simpara>    Rejang. Since 2.16.3
 
1601
</simpara></listitem>
 
1602
</varlistentry>
 
1603
<varlistentry id="G-UNICODE-SCRIPT-SUNDANESE--CAPS" role="constant">
 
1604
<term><literal>G_UNICODE_SCRIPT_SUNDANESE</literal></term>
 
1605
<listitem><simpara> Sundanese. Since 2.16.3
 
1606
</simpara></listitem>
 
1607
</varlistentry>
 
1608
<varlistentry id="G-UNICODE-SCRIPT-SAURASHTRA--CAPS" role="constant">
 
1609
<term><literal>G_UNICODE_SCRIPT_SAURASHTRA</literal></term>
 
1610
<listitem><simpara>Saurashtra. Since 2.16.3
 
1611
</simpara></listitem>
 
1612
</varlistentry>
 
1613
<varlistentry id="G-UNICODE-SCRIPT-CHAM--CAPS" role="constant">
 
1614
<term><literal>G_UNICODE_SCRIPT_CHAM</literal></term>
 
1615
<listitem><simpara>      Cham. Since 2.16.3
 
1616
</simpara></listitem>
 
1617
</varlistentry>
 
1618
<varlistentry id="G-UNICODE-SCRIPT-OL-CHIKI--CAPS" role="constant">
 
1619
<term><literal>G_UNICODE_SCRIPT_OL_CHIKI</literal></term>
 
1620
<listitem><simpara>  Ol Chiki. Since 2.16.3
 
1621
</simpara></listitem>
 
1622
</varlistentry>
 
1623
<varlistentry id="G-UNICODE-SCRIPT-VAI--CAPS" role="constant">
 
1624
<term><literal>G_UNICODE_SCRIPT_VAI</literal></term>
 
1625
<listitem><simpara>       Vai. Since 2.16.3
 
1626
</simpara></listitem>
 
1627
</varlistentry>
 
1628
<varlistentry id="G-UNICODE-SCRIPT-CARIAN--CAPS" role="constant">
 
1629
<term><literal>G_UNICODE_SCRIPT_CARIAN</literal></term>
 
1630
<listitem><simpara>    Carian. Since 2.16.3
 
1631
</simpara></listitem>
 
1632
</varlistentry>
 
1633
<varlistentry id="G-UNICODE-SCRIPT-LYCIAN--CAPS" role="constant">
 
1634
<term><literal>G_UNICODE_SCRIPT_LYCIAN</literal></term>
 
1635
<listitem><simpara>    Lycian. Since 2.16.3
 
1636
</simpara></listitem>
 
1637
</varlistentry>
 
1638
<varlistentry id="G-UNICODE-SCRIPT-LYDIAN--CAPS" role="constant">
 
1639
<term><literal>G_UNICODE_SCRIPT_LYDIAN</literal></term>
 
1640
<listitem><simpara>    Lydian. Since 2.16.3
 
1641
</simpara></listitem>
 
1642
</varlistentry>
 
1643
</variablelist></refsect2>
 
1644
<refsect2 id="g-unichar-get-script" role="function" condition="since:2.14">
 
1645
<title>g_unichar_get_script ()</title>
 
1646
<indexterm zone="g-unichar-get-script" role="2.14"><primary sortas="unichar_get_script">g_unichar_get_script</primary></indexterm><programlisting><link linkend="GUnicodeScript">GUnicodeScript</link>      g_unichar_get_script                (<link linkend="gunichar">gunichar</link> ch);</programlisting>
 
1647
<para>
 
1648
Looks up the <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> for a particular character (as defined 
 
1649
by Unicode Standard Annex <link linkend="24--CAPS"><type>24</type></link>). No check is made for <parameter>ch</parameter> being a
 
1650
valid Unicode character; if you pass in invalid character, the
 
1651
result is undefined.
 
1652
</para>
 
1653
<para>
 
1654
This function is equivalent to <link linkend="pango-script-for-unichar"><function>pango_script_for_unichar()</function></link> and the
 
1655
two are interchangeable.</para>
 
1656
<para>
 
1657
</para><variablelist role="params">
 
1658
<varlistentry><term><parameter>ch</parameter>&#160;:</term>
 
1659
<listitem><simpara> a Unicode character
 
1660
</simpara></listitem></varlistentry>
 
1661
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> for the character.
 
1662
 
 
1663
</simpara></listitem></varlistentry>
 
1664
</variablelist><para role="since">Since 2.14</para></refsect2>
 
1665
<refsect2 id="g-utf8-next-char" role="macro">
 
1666
<title>g_utf8_next_char()</title>
 
1667
<indexterm zone="g-utf8-next-char"><primary sortas="utf8_next_char">g_utf8_next_char</primary></indexterm><programlisting>#define             g_utf8_next_char(p)</programlisting>
 
1668
<para>
 
1669
Skips to the next character in a UTF-8 string. The string must be
 
1670
valid; this macro is as fast as possible, and has no error-checking.
 
1671
You would use this macro to iterate over a string character by
 
1672
character. The macro returns the start of the next UTF-8 character.
 
1673
Before using this macro, use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> to validate strings
 
1674
that may contain invalid UTF-8.
 
1675
</para><variablelist role="params">
 
1676
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1677
<listitem><simpara>Pointer to the start of a valid UTF-8 character.
 
1678
</simpara></listitem></varlistentry>
 
1679
</variablelist></refsect2>
 
1680
<refsect2 id="g-utf8-get-char" role="function">
 
1681
<title>g_utf8_get_char ()</title>
 
1682
<indexterm zone="g-utf8-get-char"><primary sortas="utf8_get_char">g_utf8_get_char</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link>            g_utf8_get_char                     (const <link linkend="gchar">gchar</link> *p);</programlisting>
 
1683
<para>
 
1684
Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
 
1685
If <parameter>p</parameter> does not point to a valid UTF-8 encoded character, results are
 
1686
undefined. If you are not sure that the bytes are complete
 
1687
valid Unicode characters, you should use <link linkend="g-utf8-get-char-validated"><function>g_utf8_get_char_validated()</function></link>
 
1688
instead.</para>
 
1689
<para>
 
1690
</para><variablelist role="params">
 
1691
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1692
<listitem><simpara> a pointer to Unicode character encoded as UTF-8
 
1693
</simpara></listitem></varlistentry>
 
1694
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the resulting character
 
1695
</simpara></listitem></varlistentry>
 
1696
</variablelist></refsect2>
 
1697
<refsect2 id="g-utf8-get-char-validated" role="function">
 
1698
<title>g_utf8_get_char_validated ()</title>
 
1699
<indexterm zone="g-utf8-get-char-validated"><primary sortas="utf8_get_char_validated">g_utf8_get_char_validated</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link>            g_utf8_get_char_validated           (const <link linkend="gchar">gchar</link> *p,
 
1700
                                                         <link linkend="gssize">gssize</link> max_len);</programlisting>
 
1701
<para>
 
1702
Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
 
1703
This function checks for incomplete characters, for invalid characters
 
1704
such as characters that are out of the range of Unicode, and for
 
1705
overlong encodings of valid characters.</para>
 
1706
<para>
 
1707
</para><variablelist role="params">
 
1708
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1709
<listitem><simpara> a pointer to Unicode character encoded as UTF-8
 
1710
</simpara></listitem></varlistentry>
 
1711
<varlistentry><term><parameter>max_len</parameter>&#160;:</term>
 
1712
<listitem><simpara> the maximum number of bytes to read, or -1, for no maximum or
 
1713
          if <parameter>p</parameter> is nul-terminated
 
1714
</simpara></listitem></varlistentry>
 
1715
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the resulting character. If <parameter>p</parameter> points to a partial
 
1716
   sequence at the end of a string that could begin a valid 
 
1717
   character (or if <parameter>max_len</parameter> is zero), returns (gunichar)-2; 
 
1718
   otherwise, if <parameter>p</parameter> does not point to a valid UTF-8 encoded 
 
1719
   Unicode character, returns (gunichar)-1.
 
1720
</simpara></listitem></varlistentry>
 
1721
</variablelist></refsect2>
 
1722
<refsect2 id="g-utf8-offset-to-pointer" role="function">
 
1723
<title>g_utf8_offset_to_pointer ()</title>
 
1724
<indexterm zone="g-utf8-offset-to-pointer"><primary sortas="utf8_offset_to_pointer">g_utf8_offset_to_pointer</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_offset_to_pointer            (const <link linkend="gchar">gchar</link> *str,
 
1725
                                                         <link linkend="glong">glong</link> offset);</programlisting>
 
1726
<para>
 
1727
Converts from an integer character offset to a pointer to a position
 
1728
within the string.
 
1729
</para>
 
1730
<para>
 
1731
Since 2.10, this function allows to pass a negative <parameter>offset</parameter> to
 
1732
step backwards. It is usually worth stepping backwards from the end
 
1733
instead of forwards if <parameter>offset</parameter> is in the last fourth of the string,
 
1734
since moving forward is about 3 times faster than moving backward.
 
1735
</para>
 
1736
<para>
 
1737
<note><para>
 
1738
This function doesn't abort when reaching the end of <parameter>str</parameter>. Therefore
 
1739
you should be sure that <parameter>offset</parameter> is within string boundaries before
 
1740
calling that function. Call <link linkend="g-utf8-strlen"><function>g_utf8_strlen()</function></link> when unsure.
 
1741
</para>
 
1742
<para>
 
1743
This limitation exists as this function is called frequently during
 
1744
text rendering and therefore has to be as fast as possible.
 
1745
</para></note></para>
 
1746
<para>
 
1747
</para><variablelist role="params">
 
1748
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
1749
<listitem><simpara> a UTF-8 encoded string
 
1750
</simpara></listitem></varlistentry>
 
1751
<varlistentry><term><parameter>offset</parameter>&#160;:</term>
 
1752
<listitem><simpara> a character offset within <parameter>str</parameter>
 
1753
</simpara></listitem></varlistentry>
 
1754
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the resulting pointer
 
1755
</simpara></listitem></varlistentry>
 
1756
</variablelist></refsect2>
 
1757
<refsect2 id="g-utf8-pointer-to-offset" role="function">
 
1758
<title>g_utf8_pointer_to_offset ()</title>
 
1759
<indexterm zone="g-utf8-pointer-to-offset"><primary sortas="utf8_pointer_to_offset">g_utf8_pointer_to_offset</primary></indexterm><programlisting><link linkend="glong">glong</link>               g_utf8_pointer_to_offset            (const <link linkend="gchar">gchar</link> *str,
 
1760
                                                         const <link linkend="gchar">gchar</link> *pos);</programlisting>
 
1761
<para>
 
1762
Converts from a pointer to position within a string to a integer
 
1763
character offset.
 
1764
</para>
 
1765
<para>
 
1766
Since 2.10, this function allows <parameter>pos</parameter> to be before <parameter>str</parameter>, and returns
 
1767
a negative offset in this case.</para>
 
1768
<para>
 
1769
</para><variablelist role="params">
 
1770
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
1771
<listitem><simpara> a UTF-8 encoded string
 
1772
</simpara></listitem></varlistentry>
 
1773
<varlistentry><term><parameter>pos</parameter>&#160;:</term>
 
1774
<listitem><simpara> a pointer to a position within <parameter>str</parameter>
 
1775
</simpara></listitem></varlistentry>
 
1776
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the resulting character offset
 
1777
</simpara></listitem></varlistentry>
 
1778
</variablelist></refsect2>
 
1779
<refsect2 id="g-utf8-prev-char" role="function">
 
1780
<title>g_utf8_prev_char ()</title>
 
1781
<indexterm zone="g-utf8-prev-char"><primary sortas="utf8_prev_char">g_utf8_prev_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_prev_char                    (const <link linkend="gchar">gchar</link> *p);</programlisting>
 
1782
<para>
 
1783
Finds the previous UTF-8 character in the string before <parameter>p</parameter>.
 
1784
</para>
 
1785
<para>
 
1786
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
 
1787
is made to see if the character found is actually valid other than
 
1788
it starts with an appropriate byte. If <parameter>p</parameter> might be the first
 
1789
character of the string, you must use <link linkend="g-utf8-find-prev-char"><function>g_utf8_find_prev_char()</function></link> instead.</para>
 
1790
<para>
 
1791
</para><variablelist role="params">
 
1792
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1793
<listitem><simpara> a pointer to a position within a UTF-8 encoded string
 
1794
</simpara></listitem></varlistentry>
 
1795
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to the found character.
 
1796
</simpara></listitem></varlistentry>
 
1797
</variablelist></refsect2>
 
1798
<refsect2 id="g-utf8-find-next-char" role="function">
 
1799
<title>g_utf8_find_next_char ()</title>
 
1800
<indexterm zone="g-utf8-find-next-char"><primary sortas="utf8_find_next_char">g_utf8_find_next_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_find_next_char               (const <link linkend="gchar">gchar</link> *p,
 
1801
                                                         const <link linkend="gchar">gchar</link> *end);</programlisting>
 
1802
<para>
 
1803
Finds the start of the next UTF-8 character in the string after <parameter>p</parameter>.
 
1804
</para>
 
1805
<para>
 
1806
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
 
1807
is made to see if the character found is actually valid other than
 
1808
it starts with an appropriate byte.</para>
 
1809
<para>
 
1810
</para><variablelist role="params">
 
1811
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1812
<listitem><simpara> a pointer to a position within a UTF-8 encoded string
 
1813
</simpara></listitem></varlistentry>
 
1814
<varlistentry><term><parameter>end</parameter>&#160;:</term>
 
1815
<listitem><simpara> a pointer to the byte following the end of the string,
 
1816
or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to indicate that the string is nul-terminated.
 
1817
</simpara></listitem></varlistentry>
 
1818
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to the found character or <link linkend="NULL--CAPS"><literal>NULL</literal></link>
 
1819
</simpara></listitem></varlistentry>
 
1820
</variablelist></refsect2>
 
1821
<refsect2 id="g-utf8-find-prev-char" role="function">
 
1822
<title>g_utf8_find_prev_char ()</title>
 
1823
<indexterm zone="g-utf8-find-prev-char"><primary sortas="utf8_find_prev_char">g_utf8_find_prev_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_find_prev_char               (const <link linkend="gchar">gchar</link> *str,
 
1824
                                                         const <link linkend="gchar">gchar</link> *p);</programlisting>
 
1825
<para>
 
1826
Given a position <parameter>p</parameter> with a UTF-8 encoded string <parameter>str</parameter>, find the start
 
1827
of the previous UTF-8 character starting before <parameter>p</parameter>. Returns <link linkend="NULL--CAPS"><literal>NULL</literal></link> if no
 
1828
UTF-8 characters are present in <parameter>str</parameter> before <parameter>p</parameter>.
 
1829
</para>
 
1830
<para>
 
1831
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
 
1832
is made to see if the character found is actually valid other than
 
1833
it starts with an appropriate byte.</para>
 
1834
<para>
 
1835
</para><variablelist role="params">
 
1836
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
1837
<listitem><simpara> pointer to the beginning of a UTF-8 encoded string
 
1838
</simpara></listitem></varlistentry>
 
1839
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1840
<listitem><simpara> pointer to some position within <parameter>str</parameter>
 
1841
</simpara></listitem></varlistentry>
 
1842
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to the found character or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
1843
</simpara></listitem></varlistentry>
 
1844
</variablelist></refsect2>
 
1845
<refsect2 id="g-utf8-strlen" role="function">
 
1846
<title>g_utf8_strlen ()</title>
 
1847
<indexterm zone="g-utf8-strlen"><primary sortas="utf8_strlen">g_utf8_strlen</primary></indexterm><programlisting><link linkend="glong">glong</link>               g_utf8_strlen                       (const <link linkend="gchar">gchar</link> *p,
 
1848
                                                         <link linkend="gssize">gssize</link> max);</programlisting>
 
1849
<para>
 
1850
Computes the length of the string in characters, not including
 
1851
the terminating nul character.</para>
 
1852
<para>
 
1853
</para><variablelist role="params">
 
1854
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1855
<listitem><simpara> pointer to the start of a UTF-8 encoded string
 
1856
</simpara></listitem></varlistentry>
 
1857
<varlistentry><term><parameter>max</parameter>&#160;:</term>
 
1858
<listitem><simpara> the maximum number of bytes to examine. If <parameter>max</parameter>
 
1859
      is less than 0, then the string is assumed to be
 
1860
      nul-terminated. If <parameter>max</parameter> is 0, <parameter>p</parameter> will not be examined and
 
1861
      may be <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
1862
</simpara></listitem></varlistentry>
 
1863
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> the length of the string in characters
 
1864
</simpara></listitem></varlistentry>
 
1865
</variablelist></refsect2>
 
1866
<refsect2 id="g-utf8-strncpy" role="function">
 
1867
<title>g_utf8_strncpy ()</title>
 
1868
<indexterm zone="g-utf8-strncpy"><primary sortas="utf8_strncpy">g_utf8_strncpy</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_strncpy                      (<link linkend="gchar">gchar</link> *dest,
 
1869
                                                         const <link linkend="gchar">gchar</link> *src,
 
1870
                                                         <link linkend="gsize">gsize</link> n);</programlisting>
 
1871
<para>
 
1872
Like the standard C <link linkend="strncpy"><function>strncpy()</function></link> function, but 
 
1873
copies a given number of characters instead of a given number of 
 
1874
bytes. The <parameter>src</parameter> string must be valid UTF-8 encoded text. 
 
1875
(Use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> on all text before trying to use UTF-8 
 
1876
utility functions with it.)</para>
 
1877
<para>
 
1878
</para><variablelist role="params">
 
1879
<varlistentry><term><parameter>dest</parameter>&#160;:</term>
 
1880
<listitem><simpara> buffer to fill with characters from <parameter>src</parameter>
 
1881
</simpara></listitem></varlistentry>
 
1882
<varlistentry><term><parameter>src</parameter>&#160;:</term>
 
1883
<listitem><simpara> UTF-8 encoded string
 
1884
</simpara></listitem></varlistentry>
 
1885
<varlistentry><term><parameter>n</parameter>&#160;:</term>
 
1886
<listitem><simpara> character count
 
1887
</simpara></listitem></varlistentry>
 
1888
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <parameter>dest</parameter>
 
1889
</simpara></listitem></varlistentry>
 
1890
</variablelist></refsect2>
 
1891
<refsect2 id="g-utf8-strchr" role="function">
 
1892
<title>g_utf8_strchr ()</title>
 
1893
<indexterm zone="g-utf8-strchr"><primary sortas="utf8_strchr">g_utf8_strchr</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_strchr                       (const <link linkend="gchar">gchar</link> *p,
 
1894
                                                         <link linkend="gssize">gssize</link> len,
 
1895
                                                         <link linkend="gunichar">gunichar</link> c);</programlisting>
 
1896
<para>
 
1897
Finds the leftmost occurrence of the given Unicode character
 
1898
in a UTF-8 encoded string, while limiting the search to <parameter>len</parameter> bytes.
 
1899
If <parameter>len</parameter> is -1, allow unbounded search.</para>
 
1900
<para>
 
1901
</para><variablelist role="params">
 
1902
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1903
<listitem><simpara> a nul-terminated UTF-8 encoded string
 
1904
</simpara></listitem></varlistentry>
 
1905
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
1906
<listitem><simpara> the maximum length of <parameter>p</parameter>
 
1907
</simpara></listitem></varlistentry>
 
1908
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
1909
<listitem><simpara> a Unicode character
 
1910
</simpara></listitem></varlistentry>
 
1911
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="NULL--CAPS"><literal>NULL</literal></link> if the string does not contain the character, 
 
1912
  otherwise, a pointer to the start of the leftmost occurrence of 
 
1913
  the character in the string.
 
1914
</simpara></listitem></varlistentry>
 
1915
</variablelist></refsect2>
 
1916
<refsect2 id="g-utf8-strrchr" role="function">
 
1917
<title>g_utf8_strrchr ()</title>
 
1918
<indexterm zone="g-utf8-strrchr"><primary sortas="utf8_strrchr">g_utf8_strrchr</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_strrchr                      (const <link linkend="gchar">gchar</link> *p,
 
1919
                                                         <link linkend="gssize">gssize</link> len,
 
1920
                                                         <link linkend="gunichar">gunichar</link> c);</programlisting>
 
1921
<para>
 
1922
Find the rightmost occurrence of the given Unicode character
 
1923
in a UTF-8 encoded string, while limiting the search to <parameter>len</parameter> bytes.
 
1924
If <parameter>len</parameter> is -1, allow unbounded search.</para>
 
1925
<para>
 
1926
</para><variablelist role="params">
 
1927
<varlistentry><term><parameter>p</parameter>&#160;:</term>
 
1928
<listitem><simpara> a nul-terminated UTF-8 encoded string
 
1929
</simpara></listitem></varlistentry>
 
1930
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
1931
<listitem><simpara> the maximum length of <parameter>p</parameter>
 
1932
</simpara></listitem></varlistentry>
 
1933
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
1934
<listitem><simpara> a Unicode character
 
1935
</simpara></listitem></varlistentry>
 
1936
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="NULL--CAPS"><literal>NULL</literal></link> if the string does not contain the character, 
 
1937
  otherwise, a pointer to the start of the rightmost occurrence of the 
 
1938
  character in the string.
 
1939
</simpara></listitem></varlistentry>
 
1940
</variablelist></refsect2>
 
1941
<refsect2 id="g-utf8-strreverse" role="function" condition="since:2.2">
 
1942
<title>g_utf8_strreverse ()</title>
 
1943
<indexterm zone="g-utf8-strreverse" role="2.2"><primary sortas="utf8_strreverse">g_utf8_strreverse</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf8_strreverse                   (const <link linkend="gchar">gchar</link> *str,
 
1944
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
1945
<para>
 
1946
Reverses a UTF-8 string. <parameter>str</parameter> must be valid UTF-8 encoded text. 
 
1947
(Use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> on all text before trying to use UTF-8 
 
1948
utility functions with it.)
 
1949
</para>
 
1950
<para>
 
1951
This function is intended for programmatic uses of reversed strings.
 
1952
It pays no attention to decomposed characters, combining marks, byte 
 
1953
order marks, directional indicators (LRM, LRO, etc) and similar 
 
1954
characters which might need special handling when reversing a string 
 
1955
for display purposes.
 
1956
</para>
 
1957
<para>
 
1958
Note that unlike <link linkend="g-strreverse"><function>g_strreverse()</function></link>, this function returns
 
1959
newly-allocated memory, which should be freed with <link linkend="g-free"><function>g_free()</function></link> when
 
1960
no longer needed.</para>
 
1961
<para>
 
1962
</para><variablelist role="params">
 
1963
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
1964
<listitem><simpara> a UTF-8 encoded string
 
1965
</simpara></listitem></varlistentry>
 
1966
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
1967
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> &lt; 0,
 
1968
      then the string is nul-terminated.
 
1969
</simpara></listitem></varlistentry>
 
1970
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly-allocated string which is the reverse of <parameter>str</parameter>.
 
1971
 
 
1972
</simpara></listitem></varlistentry>
 
1973
</variablelist><para role="since">Since 2.2</para></refsect2>
 
1974
<refsect2 id="g-utf8-validate" role="function">
 
1975
<title>g_utf8_validate ()</title>
 
1976
<indexterm zone="g-utf8-validate"><primary sortas="utf8_validate">g_utf8_validate</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link>            g_utf8_validate                     (const <link linkend="gchar">gchar</link> *str,
 
1977
                                                         <link linkend="gssize">gssize</link> max_len,
 
1978
                                                         const <link linkend="gchar">gchar</link> **end);</programlisting>
 
1979
<para>
 
1980
Validates UTF-8 encoded text. <parameter>str</parameter> is the text to validate;
 
1981
if <parameter>str</parameter> is nul-terminated, then <parameter>max_len</parameter> can be -1, otherwise
 
1982
<parameter>max_len</parameter> should be the number of bytes to validate.
 
1983
If <parameter>end</parameter> is non-<link linkend="NULL--CAPS"><literal>NULL</literal></link>, then the end of the valid range
 
1984
will be stored there (i.e. the start of the first invalid 
 
1985
character if some bytes were invalid, or the end of the text 
 
1986
being validated otherwise).
 
1987
</para>
 
1988
<para>
 
1989
Note that <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> returns <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> if <parameter>max_len</parameter> is 
 
1990
positive and NUL is met before <parameter>max_len</parameter> bytes have been read.
 
1991
</para>
 
1992
<para>
 
1993
Returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if all of <parameter>str</parameter> was valid. Many GLib and GTK+
 
1994
routines <emphasis>require</emphasis> valid UTF-8 as input;
 
1995
so data read from a file or the network should be checked
 
1996
with <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> before doing anything else with it.</para>
 
1997
<para>
 
1998
</para><variablelist role="params">
 
1999
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2000
<listitem><simpara> a pointer to character data
 
2001
</simpara></listitem></varlistentry>
 
2002
<varlistentry><term><parameter>max_len</parameter>&#160;:</term>
 
2003
<listitem><simpara> max bytes to validate, or -1 to go until NUL
 
2004
</simpara></listitem></varlistentry>
 
2005
<varlistentry><term><parameter>end</parameter>&#160;:</term>
 
2006
<listitem><simpara> return location for end of valid data
 
2007
</simpara></listitem></varlistentry>
 
2008
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the text was valid UTF-8
 
2009
</simpara></listitem></varlistentry>
 
2010
</variablelist></refsect2>
 
2011
<refsect2 id="g-utf8-strup" role="function">
 
2012
<title>g_utf8_strup ()</title>
 
2013
<indexterm zone="g-utf8-strup"><primary sortas="utf8_strup">g_utf8_strup</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_strup                        (const <link linkend="gchar">gchar</link> *str,
 
2014
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
2015
<para>
 
2016
Converts all Unicode characters in the string that have a case
 
2017
to uppercase. The exact manner that this is done depends
 
2018
on the current locale, and may result in the number of
 
2019
characters in the string increasing. (For instance, the
 
2020
German ess-zet will be changed to SS.)</para>
 
2021
<para>
 
2022
</para><variablelist role="params">
 
2023
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2024
<listitem><simpara> a UTF-8 encoded string
 
2025
</simpara></listitem></varlistentry>
 
2026
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2027
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2028
</simpara></listitem></varlistentry>
 
2029
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string, with all characters
 
2030
   converted to uppercase.  
 
2031
</simpara></listitem></varlistentry>
 
2032
</variablelist></refsect2>
 
2033
<refsect2 id="g-utf8-strdown" role="function">
 
2034
<title>g_utf8_strdown ()</title>
 
2035
<indexterm zone="g-utf8-strdown"><primary sortas="utf8_strdown">g_utf8_strdown</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_strdown                      (const <link linkend="gchar">gchar</link> *str,
 
2036
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
2037
<para>
 
2038
Converts all Unicode characters in the string that have a case
 
2039
to lowercase. The exact manner that this is done depends
 
2040
on the current locale, and may result in the number of
 
2041
characters in the string changing.</para>
 
2042
<para>
 
2043
</para><variablelist role="params">
 
2044
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2045
<listitem><simpara> a UTF-8 encoded string
 
2046
</simpara></listitem></varlistentry>
 
2047
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2048
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2049
</simpara></listitem></varlistentry>
 
2050
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string, with all characters
 
2051
   converted to lowercase.  
 
2052
</simpara></listitem></varlistentry>
 
2053
</variablelist></refsect2>
 
2054
<refsect2 id="g-utf8-casefold" role="function">
 
2055
<title>g_utf8_casefold ()</title>
 
2056
<indexterm zone="g-utf8-casefold"><primary sortas="utf8_casefold">g_utf8_casefold</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_casefold                     (const <link linkend="gchar">gchar</link> *str,
 
2057
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
2058
<para>
 
2059
Converts a string into a form that is independent of case. The
 
2060
result will not correspond to any particular case, but can be
 
2061
compared for equality or ordered with the results of calling
 
2062
<link linkend="g-utf8-casefold"><function>g_utf8_casefold()</function></link> on other strings.
 
2063
</para>
 
2064
<para>
 
2065
Note that calling <link linkend="g-utf8-casefold"><function>g_utf8_casefold()</function></link> followed by <link linkend="g-utf8-collate"><function>g_utf8_collate()</function></link> is
 
2066
only an approximation to the correct linguistic case insensitive
 
2067
ordering, though it is a fairly good one. Getting this exactly
 
2068
right would require a more sophisticated collation function that
 
2069
takes case sensitivity into account. GLib does not currently
 
2070
provide such a function.</para>
 
2071
<para>
 
2072
</para><variablelist role="params">
 
2073
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2074
<listitem><simpara> a UTF-8 encoded string
 
2075
</simpara></listitem></varlistentry>
 
2076
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2077
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2078
</simpara></listitem></varlistentry>
 
2079
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string, that is a
 
2080
  case independent form of <parameter>str</parameter>.
 
2081
</simpara></listitem></varlistentry>
 
2082
</variablelist></refsect2>
 
2083
<refsect2 id="g-utf8-normalize" role="function">
 
2084
<title>g_utf8_normalize ()</title>
 
2085
<indexterm zone="g-utf8-normalize"><primary sortas="utf8_normalize">g_utf8_normalize</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_normalize                    (const <link linkend="gchar">gchar</link> *str,
 
2086
                                                         <link linkend="gssize">gssize</link> len,
 
2087
                                                         <link linkend="GNormalizeMode">GNormalizeMode</link> mode);</programlisting>
 
2088
<para>
 
2089
Converts a string into canonical form, standardizing
 
2090
such issues as whether a character with an accent
 
2091
is represented as a base character and combining
 
2092
accent or as a single precomposed character. The
 
2093
string has to be valid UTF-8, otherwise <link linkend="NULL--CAPS"><literal>NULL</literal></link> is
 
2094
returned. You should generally call <link linkend="g-utf8-normalize"><function>g_utf8_normalize()</function></link>
 
2095
before comparing two Unicode strings.
 
2096
</para>
 
2097
<para>
 
2098
The normalization mode <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> only
 
2099
standardizes differences that do not affect the
 
2100
text content, such as the above-mentioned accent
 
2101
representation. <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link> also standardizes
 
2102
the "compatibility" characters in Unicode, such
 
2103
as SUPERSCRIPT THREE to the standard forms
 
2104
(in this case DIGIT THREE). Formatting information
 
2105
may be lost but for most text operations such
 
2106
characters should be considered the same.
 
2107
</para>
 
2108
<para>
 
2109
<link linkend="G-NORMALIZE-DEFAULT-COMPOSE--CAPS"><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></link> and <link linkend="G-NORMALIZE-ALL-COMPOSE--CAPS"><literal>G_NORMALIZE_ALL_COMPOSE</literal></link>
 
2110
are like <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> and <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>,
 
2111
but returned a result with composed forms rather
 
2112
than a maximally decomposed form. This is often
 
2113
useful if you intend to convert the string to
 
2114
a legacy encoding or pass it to a system with
 
2115
less capable Unicode handling.</para>
 
2116
<para>
 
2117
</para><variablelist role="params">
 
2118
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2119
<listitem><simpara> a UTF-8 encoded string.
 
2120
</simpara></listitem></varlistentry>
 
2121
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2122
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2123
</simpara></listitem></varlistentry>
 
2124
<varlistentry><term><parameter>mode</parameter>&#160;:</term>
 
2125
<listitem><simpara> the type of normalization to perform.
 
2126
</simpara></listitem></varlistentry>
 
2127
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string, that is the
 
2128
  normalized form of <parameter>str</parameter>, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> if <parameter>str</parameter> is not
 
2129
  valid UTF-8.
 
2130
</simpara></listitem></varlistentry>
 
2131
</variablelist></refsect2>
 
2132
<refsect2 id="GNormalizeMode" role="enum">
 
2133
<title>enum GNormalizeMode</title>
 
2134
<indexterm zone="GNormalizeMode"><primary sortas="NormalizeMode">GNormalizeMode</primary></indexterm><programlisting>typedef enum {
 
2135
  G_NORMALIZE_DEFAULT,
 
2136
  G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
 
2137
  G_NORMALIZE_DEFAULT_COMPOSE,
 
2138
  G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
 
2139
  G_NORMALIZE_ALL,
 
2140
  G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
 
2141
  G_NORMALIZE_ALL_COMPOSE,
 
2142
  G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
 
2143
} GNormalizeMode;
 
2144
</programlisting>
 
2145
<para>
 
2146
Defines how a Unicode string is transformed in a canonical
 
2147
form, standardizing such issues as whether a character with an accent is
 
2148
represented as a base character and combining accent or as a single precomposed
 
2149
character. Unicode strings should generally be normalized before comparing them.
 
2150
</para><variablelist role="enum">
 
2151
<varlistentry id="G-NORMALIZE-DEFAULT--CAPS" role="constant">
 
2152
<term><literal>G_NORMALIZE_DEFAULT</literal></term>
 
2153
<listitem><simpara>standardize differences that do not affect the
 
2154
  text content, such as the above-mentioned accent representation.
 
2155
</simpara></listitem>
 
2156
</varlistentry>
 
2157
<varlistentry id="G-NORMALIZE-NFD--CAPS" role="constant">
 
2158
<term><literal>G_NORMALIZE_NFD</literal></term>
 
2159
<listitem><simpara>another name for <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link>.
 
2160
</simpara></listitem>
 
2161
</varlistentry>
 
2162
<varlistentry id="G-NORMALIZE-DEFAULT-COMPOSE--CAPS" role="constant">
 
2163
<term><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></term>
 
2164
<listitem><simpara>like <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link>, but with composed
 
2165
  forms rather than a maximally decomposed form.
 
2166
</simpara></listitem>
 
2167
</varlistentry>
 
2168
<varlistentry id="G-NORMALIZE-NFC--CAPS" role="constant">
 
2169
<term><literal>G_NORMALIZE_NFC</literal></term>
 
2170
<listitem><simpara>another name for <link linkend="G-NORMALIZE-DEFAULT-COMPOSE--CAPS"><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></link>.
 
2171
</simpara></listitem>
 
2172
</varlistentry>
 
2173
<varlistentry id="G-NORMALIZE-ALL--CAPS" role="constant">
 
2174
<term><literal>G_NORMALIZE_ALL</literal></term>
 
2175
<listitem><simpara>beyond <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> also standardize the 
 
2176
  "compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the
 
2177
  standard forms (in this case DIGIT THREE). Formatting information may be
 
2178
  lost but for most text operations such characters should be considered the
 
2179
  same.
 
2180
</simpara></listitem>
 
2181
</varlistentry>
 
2182
<varlistentry id="G-NORMALIZE-NFKD--CAPS" role="constant">
 
2183
<term><literal>G_NORMALIZE_NFKD</literal></term>
 
2184
<listitem><simpara>another name for <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>.
 
2185
</simpara></listitem>
 
2186
</varlistentry>
 
2187
<varlistentry id="G-NORMALIZE-ALL-COMPOSE--CAPS" role="constant">
 
2188
<term><literal>G_NORMALIZE_ALL_COMPOSE</literal></term>
 
2189
<listitem><simpara>like <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>, but with composed
 
2190
  forms rather than a maximally decomposed form.
 
2191
</simpara></listitem>
 
2192
</varlistentry>
 
2193
<varlistentry id="G-NORMALIZE-NFKC--CAPS" role="constant">
 
2194
<term><literal>G_NORMALIZE_NFKC</literal></term>
 
2195
<listitem><simpara>another name for <link linkend="G-NORMALIZE-ALL-COMPOSE--CAPS"><literal>G_NORMALIZE_ALL_COMPOSE</literal></link>.
 
2196
</simpara></listitem>
 
2197
</varlistentry>
 
2198
</variablelist></refsect2>
 
2199
<refsect2 id="g-utf8-collate" role="function">
 
2200
<title>g_utf8_collate ()</title>
 
2201
<indexterm zone="g-utf8-collate"><primary sortas="utf8_collate">g_utf8_collate</primary></indexterm><programlisting><link linkend="gint">gint</link>                g_utf8_collate                      (const <link linkend="gchar">gchar</link> *str1,
 
2202
                                                         const <link linkend="gchar">gchar</link> *str2);</programlisting>
 
2203
<para>
 
2204
Compares two strings for ordering using the linguistically
 
2205
correct rules for the <link linkend="setlocale">current locale</link>. 
 
2206
When sorting a large number of strings, it will be significantly 
 
2207
faster to obtain collation keys with <link linkend="g-utf8-collate-key"><function>g_utf8_collate_key()</function></link> and 
 
2208
compare the keys with <link linkend="strcmp"><function>strcmp()</function></link> when sorting instead of sorting 
 
2209
the original strings.</para>
 
2210
<para>
 
2211
</para><variablelist role="params">
 
2212
<varlistentry><term><parameter>str1</parameter>&#160;:</term>
 
2213
<listitem><simpara> a UTF-8 encoded string
 
2214
</simpara></listitem></varlistentry>
 
2215
<varlistentry><term><parameter>str2</parameter>&#160;:</term>
 
2216
<listitem><simpara> a UTF-8 encoded string
 
2217
</simpara></listitem></varlistentry>
 
2218
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> &lt; 0 if <parameter>str1</parameter> compares before <parameter>str2</parameter>, 
 
2219
  0 if they compare equal, &gt; 0 if <parameter>str1</parameter> compares after <parameter>str2</parameter>.
 
2220
</simpara></listitem></varlistentry>
 
2221
</variablelist></refsect2>
 
2222
<refsect2 id="g-utf8-collate-key" role="function">
 
2223
<title>g_utf8_collate_key ()</title>
 
2224
<indexterm zone="g-utf8-collate-key"><primary sortas="utf8_collate_key">g_utf8_collate_key</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_collate_key                  (const <link linkend="gchar">gchar</link> *str,
 
2225
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
2226
<para>
 
2227
Converts a string into a collation key that can be compared
 
2228
with other collation keys produced by the same function using 
 
2229
<link linkend="strcmp"><function>strcmp()</function></link>. 
 
2230
</para>
 
2231
<para>
 
2232
The results of comparing the collation keys of two strings 
 
2233
with <link linkend="strcmp"><function>strcmp()</function></link> will always be the same as comparing the two 
 
2234
original keys with <link linkend="g-utf8-collate"><function>g_utf8_collate()</function></link>.
 
2235
</para>
 
2236
<para>
 
2237
Note that this function depends on the 
 
2238
<link linkend="setlocale">current locale</link>.</para>
 
2239
<para>
 
2240
</para><variablelist role="params">
 
2241
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2242
<listitem><simpara> a UTF-8 encoded string.
 
2243
</simpara></listitem></varlistentry>
 
2244
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2245
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2246
</simpara></listitem></varlistentry>
 
2247
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string. This string should
 
2248
  be freed with <link linkend="g-free"><function>g_free()</function></link> when you are done with it.
 
2249
</simpara></listitem></varlistentry>
 
2250
</variablelist></refsect2>
 
2251
<refsect2 id="g-utf8-collate-key-for-filename" role="function" condition="since:2.8">
 
2252
<title>g_utf8_collate_key_for_filename ()</title>
 
2253
<indexterm zone="g-utf8-collate-key-for-filename" role="2.8"><primary sortas="utf8_collate_key_for_filename">g_utf8_collate_key_for_filename</primary></indexterm><programlisting><link linkend="gchar">gchar</link> *             g_utf8_collate_key_for_filename     (const <link linkend="gchar">gchar</link> *str,
 
2254
                                                         <link linkend="gssize">gssize</link> len);</programlisting>
 
2255
<para>
 
2256
Converts a string into a collation key that can be compared
 
2257
with other collation keys produced by the same function using <link linkend="strcmp"><function>strcmp()</function></link>. 
 
2258
</para>
 
2259
<para>
 
2260
In order to sort filenames correctly, this function treats the dot '.' 
 
2261
as a special case. Most dictionary orderings seem to consider it
 
2262
insignificant, thus producing the ordering "event.c" "eventgenerator.c"
 
2263
"event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
 
2264
would like to treat numbers intelligently so that "file1" "file10" "file5"
 
2265
is sorted as "file1" "file5" "file10".
 
2266
</para>
 
2267
<para>
 
2268
Note that this function depends on the 
 
2269
<link linkend="setlocale">current locale</link>.</para>
 
2270
<para>
 
2271
</para><variablelist role="params">
 
2272
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2273
<listitem><simpara> a UTF-8 encoded string.
 
2274
</simpara></listitem></varlistentry>
 
2275
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2276
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
 
2277
</simpara></listitem></varlistentry>
 
2278
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a newly allocated string. This string should
 
2279
  be freed with <link linkend="g-free"><function>g_free()</function></link> when you are done with it.
 
2280
 
 
2281
</simpara></listitem></varlistentry>
 
2282
</variablelist><para role="since">Since 2.8</para></refsect2>
 
2283
<refsect2 id="g-utf8-to-utf16" role="function">
 
2284
<title>g_utf8_to_utf16 ()</title>
 
2285
<indexterm zone="g-utf8-to-utf16"><primary sortas="utf8_to_utf16">g_utf8_to_utf16</primary></indexterm><programlisting><link linkend="gunichar2">gunichar2</link> *         g_utf8_to_utf16                     (const <link linkend="gchar">gchar</link> *str,
 
2286
                                                         <link linkend="glong">glong</link> len,
 
2287
                                                         <link linkend="glong">glong</link> *items_read,
 
2288
                                                         <link linkend="glong">glong</link> *items_written,
 
2289
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2290
<para>
 
2291
Convert a string from UTF-8 to UTF-16. A 0 character will be
 
2292
added to the result after the converted text.</para>
 
2293
<para>
 
2294
</para><variablelist role="params">
 
2295
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2296
<listitem><simpara> a UTF-8 encoded string
 
2297
</simpara></listitem></varlistentry>
 
2298
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2299
<listitem><simpara> the maximum length (number of bytes) of <parameter>str</parameter> to use.
 
2300
      If <parameter>len</parameter> &lt; 0, then the string is nul-terminated.
 
2301
</simpara></listitem></varlistentry>
 
2302
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2303
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2304
             If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
 
2305
             returned in case <parameter>str</parameter> contains a trailing partial
 
2306
             character. If an error occurs then the index of the
 
2307
             invalid input is stored here.
 
2308
</simpara></listitem></varlistentry>
 
2309
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2310
<listitem><simpara> location to store number of <type>gunichar2</type> written,
 
2311
                or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2312
                The value stored here does not include the trailing 0.
 
2313
</simpara></listitem></varlistentry>
 
2314
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2315
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2316
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2317
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2318
</simpara></listitem></varlistentry>
 
2319
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UTF-16 string.
 
2320
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2321
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2322
              <parameter>error</parameter> set.
 
2323
</simpara></listitem></varlistentry>
 
2324
</variablelist></refsect2>
 
2325
<refsect2 id="g-utf8-to-ucs4" role="function">
 
2326
<title>g_utf8_to_ucs4 ()</title>
 
2327
<indexterm zone="g-utf8-to-ucs4"><primary sortas="utf8_to_ucs4">g_utf8_to_ucs4</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> *          g_utf8_to_ucs4                      (const <link linkend="gchar">gchar</link> *str,
 
2328
                                                         <link linkend="glong">glong</link> len,
 
2329
                                                         <link linkend="glong">glong</link> *items_read,
 
2330
                                                         <link linkend="glong">glong</link> *items_written,
 
2331
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2332
<para>
 
2333
Convert a string from UTF-8 to a 32-bit fixed width
 
2334
representation as UCS-4. A trailing 0 will be added to the
 
2335
string after the converted text.</para>
 
2336
<para>
 
2337
</para><variablelist role="params">
 
2338
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2339
<listitem><simpara> a UTF-8 encoded string
 
2340
</simpara></listitem></varlistentry>
 
2341
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2342
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> &lt; 0,
 
2343
      then the string is nul-terminated.
 
2344
</simpara></listitem></varlistentry>
 
2345
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2346
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2347
             If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
 
2348
             returned in case <parameter>str</parameter> contains a trailing partial
 
2349
             character. If an error occurs then the index of the
 
2350
             invalid input is stored here.
 
2351
</simpara></listitem></varlistentry>
 
2352
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2353
<listitem><simpara> location to store number of characters written or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2354
                The value here stored does not include the trailing 0
 
2355
                character. 
 
2356
</simpara></listitem></varlistentry>
 
2357
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2358
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2359
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2360
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2361
</simpara></listitem></varlistentry>
 
2362
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
 
2363
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2364
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2365
              <parameter>error</parameter> set.
 
2366
</simpara></listitem></varlistentry>
 
2367
</variablelist></refsect2>
 
2368
<refsect2 id="g-utf8-to-ucs4-fast" role="function">
 
2369
<title>g_utf8_to_ucs4_fast ()</title>
 
2370
<indexterm zone="g-utf8-to-ucs4-fast"><primary sortas="utf8_to_ucs4_fast">g_utf8_to_ucs4_fast</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> *          g_utf8_to_ucs4_fast                 (const <link linkend="gchar">gchar</link> *str,
 
2371
                                                         <link linkend="glong">glong</link> len,
 
2372
                                                         <link linkend="glong">glong</link> *items_written);</programlisting>
 
2373
<para>
 
2374
Convert a string from UTF-8 to a 32-bit fixed width
 
2375
representation as UCS-4, assuming valid UTF-8 input.
 
2376
This function is roughly twice as fast as <link linkend="g-utf8-to-ucs4"><function>g_utf8_to_ucs4()</function></link>
 
2377
but does no error checking on the input.</para>
 
2378
<para>
 
2379
</para><variablelist role="params">
 
2380
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2381
<listitem><simpara> a UTF-8 encoded string
 
2382
</simpara></listitem></varlistentry>
 
2383
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2384
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> &lt; 0,
 
2385
      then the string is nul-terminated.
 
2386
</simpara></listitem></varlistentry>
 
2387
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2388
<listitem><simpara> location to store the number of characters in the
 
2389
                result, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2390
</simpara></listitem></varlistentry>
 
2391
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
 
2392
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>.
 
2393
</simpara></listitem></varlistentry>
 
2394
</variablelist></refsect2>
 
2395
<refsect2 id="g-utf16-to-ucs4" role="function">
 
2396
<title>g_utf16_to_ucs4 ()</title>
 
2397
<indexterm zone="g-utf16-to-ucs4"><primary sortas="utf16_to_ucs4">g_utf16_to_ucs4</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> *          g_utf16_to_ucs4                     (const <link linkend="gunichar2">gunichar2</link> *str,
 
2398
                                                         <link linkend="glong">glong</link> len,
 
2399
                                                         <link linkend="glong">glong</link> *items_read,
 
2400
                                                         <link linkend="glong">glong</link> *items_written,
 
2401
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2402
<para>
 
2403
Convert a string from UTF-16 to UCS-4. The result will be
 
2404
nul-terminated.</para>
 
2405
<para>
 
2406
</para><variablelist role="params">
 
2407
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2408
<listitem><simpara> a UTF-16 encoded string
 
2409
</simpara></listitem></varlistentry>
 
2410
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2411
<listitem><simpara> the maximum length (number of <type>gunichar2</type>) of <parameter>str</parameter> to use. 
 
2412
      If <parameter>len</parameter> &lt; 0, then the string is nul-terminated.
 
2413
</simpara></listitem></varlistentry>
 
2414
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2415
<listitem><simpara> location to store number of words read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2416
             If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
 
2417
             returned in case <parameter>str</parameter> contains a trailing partial
 
2418
             character. If an error occurs then the index of the
 
2419
             invalid input is stored here.
 
2420
</simpara></listitem></varlistentry>
 
2421
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2422
<listitem><simpara> location to store number of characters written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2423
                The value stored here does not include the trailing
 
2424
                0 character.
 
2425
</simpara></listitem></varlistentry>
 
2426
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2427
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2428
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2429
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2430
</simpara></listitem></varlistentry>
 
2431
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
 
2432
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2433
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2434
              <parameter>error</parameter> set.
 
2435
</simpara></listitem></varlistentry>
 
2436
</variablelist></refsect2>
 
2437
<refsect2 id="g-utf16-to-utf8" role="function">
 
2438
<title>g_utf16_to_utf8 ()</title>
 
2439
<indexterm zone="g-utf16-to-utf8"><primary sortas="utf16_to_utf8">g_utf16_to_utf8</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_utf16_to_utf8                     (const <link linkend="gunichar2">gunichar2</link> *str,
 
2440
                                                         <link linkend="glong">glong</link> len,
 
2441
                                                         <link linkend="glong">glong</link> *items_read,
 
2442
                                                         <link linkend="glong">glong</link> *items_written,
 
2443
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2444
<para>
 
2445
Convert a string from UTF-16 to UTF-8. The result will be
 
2446
terminated with a 0 byte.
 
2447
</para>
 
2448
<para>
 
2449
Note that the input is expected to be already in native endianness,
 
2450
an initial byte-order-mark character is not handled specially.
 
2451
<link linkend="g-convert"><function>g_convert()</function></link> can be used to convert a byte buffer of UTF-16 data of
 
2452
ambiguous endianess.
 
2453
</para>
 
2454
<para>
 
2455
Further note that this function does not validate the result
 
2456
string; it may e.g. include embedded NUL characters. The only
 
2457
validation done by this function is to ensure that the input can
 
2458
be correctly interpreted as UTF-16, i.e. it doesn't contain
 
2459
things unpaired surrogates.</para>
 
2460
<para>
 
2461
</para><variablelist role="params">
 
2462
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2463
<listitem><simpara> a UTF-16 encoded string
 
2464
</simpara></listitem></varlistentry>
 
2465
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2466
<listitem><simpara> the maximum length (number of <type>gunichar2</type>) of <parameter>str</parameter> to use. 
 
2467
      If <parameter>len</parameter> &lt; 0, then the string is nul-terminated.
 
2468
</simpara></listitem></varlistentry>
 
2469
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2470
<listitem><simpara> location to store number of words read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2471
             If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
 
2472
             returned in case <parameter>str</parameter> contains a trailing partial
 
2473
             character. If an error occurs then the index of the
 
2474
             invalid input is stored here.
 
2475
</simpara></listitem></varlistentry>
 
2476
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2477
<listitem><simpara> location to store number of bytes written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2478
                The value stored here does not include the trailing
 
2479
                0 byte.
 
2480
</simpara></listitem></varlistentry>
 
2481
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2482
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2483
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2484
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2485
</simpara></listitem></varlistentry>
 
2486
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UTF-8 string.
 
2487
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2488
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2489
              <parameter>error</parameter> set.
 
2490
</simpara></listitem></varlistentry>
 
2491
</variablelist></refsect2>
 
2492
<refsect2 id="g-ucs4-to-utf16" role="function">
 
2493
<title>g_ucs4_to_utf16 ()</title>
 
2494
<indexterm zone="g-ucs4-to-utf16"><primary sortas="ucs4_to_utf16">g_ucs4_to_utf16</primary></indexterm><programlisting><link linkend="gunichar2">gunichar2</link> *         g_ucs4_to_utf16                     (const <link linkend="gunichar">gunichar</link> *str,
 
2495
                                                         <link linkend="glong">glong</link> len,
 
2496
                                                         <link linkend="glong">glong</link> *items_read,
 
2497
                                                         <link linkend="glong">glong</link> *items_written,
 
2498
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2499
<para>
 
2500
Convert a string from UCS-4 to UTF-16. A 0 character will be
 
2501
added to the result after the converted text.</para>
 
2502
<para>
 
2503
</para><variablelist role="params">
 
2504
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2505
<listitem><simpara> a UCS-4 encoded string
 
2506
</simpara></listitem></varlistentry>
 
2507
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2508
<listitem><simpara> the maximum length (number of characters) of <parameter>str</parameter> to use. 
 
2509
      If <parameter>len</parameter> &lt; 0, then the string is nul-terminated.
 
2510
</simpara></listitem></varlistentry>
 
2511
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2512
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2513
             If an error occurs then the index of the invalid input
 
2514
             is stored here.
 
2515
</simpara></listitem></varlistentry>
 
2516
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2517
<listitem><simpara> location to store number of <type>gunichar2</type> 
 
2518
                written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>. The value stored here does not 
 
2519
                include the trailing 0.
 
2520
</simpara></listitem></varlistentry>
 
2521
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2522
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2523
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2524
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2525
</simpara></listitem></varlistentry>
 
2526
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UTF-16 string.
 
2527
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2528
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2529
              <parameter>error</parameter> set.
 
2530
</simpara></listitem></varlistentry>
 
2531
</variablelist></refsect2>
 
2532
<refsect2 id="g-ucs4-to-utf8" role="function">
 
2533
<title>g_ucs4_to_utf8 ()</title>
 
2534
<indexterm zone="g-ucs4-to-utf8"><primary sortas="ucs4_to_utf8">g_ucs4_to_utf8</primary></indexterm><programlisting><link linkend="gchar">gchar</link>*              g_ucs4_to_utf8                      (const <link linkend="gunichar">gunichar</link> *str,
 
2535
                                                         <link linkend="glong">glong</link> len,
 
2536
                                                         <link linkend="glong">glong</link> *items_read,
 
2537
                                                         <link linkend="glong">glong</link> *items_written,
 
2538
                                                         <link linkend="GError">GError</link> **error);</programlisting>
 
2539
<para>
 
2540
Convert a string from a 32-bit fixed width representation as UCS-4.
 
2541
to UTF-8. The result will be terminated with a 0 byte.</para>
 
2542
<para>
 
2543
</para><variablelist role="params">
 
2544
<varlistentry><term><parameter>str</parameter>&#160;:</term>
 
2545
<listitem><simpara> a UCS-4 encoded string
 
2546
</simpara></listitem></varlistentry>
 
2547
<varlistentry><term><parameter>len</parameter>&#160;:</term>
 
2548
<listitem><simpara> the maximum length (number of characters) of <parameter>str</parameter> to use. 
 
2549
      If <parameter>len</parameter> &lt; 0, then the string is nul-terminated.
 
2550
</simpara></listitem></varlistentry>
 
2551
<varlistentry><term><parameter>items_read</parameter>&#160;:</term>
 
2552
<listitem><simpara> location to store number of characters read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2553
</simpara></listitem></varlistentry>
 
2554
<varlistentry><term><parameter>items_written</parameter>&#160;:</term>
 
2555
<listitem><simpara> location to store number of bytes written or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
 
2556
                The value here stored does not include the trailing 0
 
2557
                byte. 
 
2558
</simpara></listitem></varlistentry>
 
2559
<varlistentry><term><parameter>error</parameter>&#160;:</term>
 
2560
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
 
2561
        errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
 
2562
        <link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
 
2563
</simpara></listitem></varlistentry>
 
2564
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> a pointer to a newly allocated UTF-8 string.
 
2565
              This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
 
2566
              error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
 
2567
              <parameter>error</parameter> set. In that case, <parameter>items_read</parameter> will be
 
2568
              set to the position of the first invalid input 
 
2569
              character.
 
2570
</simpara></listitem></varlistentry>
 
2571
</variablelist></refsect2>
 
2572
<refsect2 id="g-unichar-to-utf8" role="function">
 
2573
<title>g_unichar_to_utf8 ()</title>
 
2574
<indexterm zone="g-unichar-to-utf8"><primary sortas="unichar_to_utf8">g_unichar_to_utf8</primary></indexterm><programlisting><link linkend="gint">gint</link>                g_unichar_to_utf8                   (<link linkend="gunichar">gunichar</link> c,
 
2575
                                                         <link linkend="gchar">gchar</link> *outbuf);</programlisting>
 
2576
<para>
 
2577
Converts a single character to UTF-8.</para>
 
2578
<para>
 
2579
</para><variablelist role="params">
 
2580
<varlistentry><term><parameter>c</parameter>&#160;:</term>
 
2581
<listitem><simpara> a Unicode character code
 
2582
</simpara></listitem></varlistentry>
 
2583
<varlistentry><term><parameter>outbuf</parameter>&#160;:</term>
 
2584
<listitem><simpara> output buffer, must have at least 6 bytes of space.
 
2585
      If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, the length will be computed and returned
 
2586
      and nothing will be written to <parameter>outbuf</parameter>.
 
2587
</simpara></listitem></varlistentry>
 
2588
<varlistentry><term><emphasis>Returns</emphasis>&#160;:</term><listitem><simpara> number of bytes written
 
2589
</simpara></listitem></varlistentry>
 
2590
</variablelist></refsect2>
 
2591
 
 
2592
</refsect1>
 
2593
 
 
2594
 
 
2595
 
 
2596
<refsect1 id="glib-Unicode-Manipulation.see-also">
 
2597
<title>See Also</title>
 
2598
<para>
 
2599
<variablelist>
 
2600
<varlistentry>
 
2601
<term><link linkend="g-locale-to-utf8"><function>g_locale_to_utf8()</function></link>, <link linkend="g-locale-from-utf8"><function>g_locale_from_utf8()</function></link></term>
 
2602
<listitem><para>
 
2603
Convenience functions for converting between UTF-8 and the locale encoding.
 
2604
</para></listitem>
 
2605
</varlistentry>
 
2606
</variablelist>
 
2607
</para>
 
2608
</refsect1>
 
2609
 
 
2610
</refentry>