2
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
4
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
5
<!ENTITY version SYSTEM "version.xml">
7
<refentry id="glib-Unicode-Manipulation">
9
<refentrytitle role="top_of_page" id="glib-Unicode-Manipulation.top_of_page">Unicode Manipulation</refentrytitle>
10
<manvolnum>3</manvolnum>
11
<refmiscinfo>GLIB Library</refmiscinfo>
15
<refname>Unicode Manipulation</refname>
16
<refpurpose>functions operating on Unicode characters and UTF-8 strings</refpurpose>
19
<refsynopsisdiv id="glib-Unicode-Manipulation.synopsis" role="synopsis">
20
<title role="synopsis.title">Synopsis</title>
24
#include <glib.h>
26
typedef <link linkend="gunichar">gunichar</link>;
27
typedef <link linkend="gunichar2">gunichar2</link>;
29
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-validate">g_unichar_validate</link> (<link linkend="gunichar">gunichar</link> ch);
30
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isalnum">g_unichar_isalnum</link> (<link linkend="gunichar">gunichar</link> c);
31
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isalpha">g_unichar_isalpha</link> (<link linkend="gunichar">gunichar</link> c);
32
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-iscntrl">g_unichar_iscntrl</link> (<link linkend="gunichar">gunichar</link> c);
33
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isdefined">g_unichar_isdefined</link> (<link linkend="gunichar">gunichar</link> c);
34
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isdigit">g_unichar_isdigit</link> (<link linkend="gunichar">gunichar</link> c);
35
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isgraph">g_unichar_isgraph</link> (<link linkend="gunichar">gunichar</link> c);
36
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-islower">g_unichar_islower</link> (<link linkend="gunichar">gunichar</link> c);
37
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-ismark">g_unichar_ismark</link> (<link linkend="gunichar">gunichar</link> c);
38
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isprint">g_unichar_isprint</link> (<link linkend="gunichar">gunichar</link> c);
39
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-ispunct">g_unichar_ispunct</link> (<link linkend="gunichar">gunichar</link> c);
40
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isspace">g_unichar_isspace</link> (<link linkend="gunichar">gunichar</link> c);
41
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-istitle">g_unichar_istitle</link> (<link linkend="gunichar">gunichar</link> c);
42
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isupper">g_unichar_isupper</link> (<link linkend="gunichar">gunichar</link> c);
43
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-isxdigit">g_unichar_isxdigit</link> (<link linkend="gunichar">gunichar</link> c);
44
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-iswide">g_unichar_iswide</link> (<link linkend="gunichar">gunichar</link> c);
45
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-iswide-cjk">g_unichar_iswide_cjk</link> (<link linkend="gunichar">gunichar</link> c);
46
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-iszerowidth">g_unichar_iszerowidth</link> (<link linkend="gunichar">gunichar</link> c);
47
<link linkend="gunichar">gunichar</link> <link linkend="g-unichar-toupper">g_unichar_toupper</link> (<link linkend="gunichar">gunichar</link> c);
48
<link linkend="gunichar">gunichar</link> <link linkend="g-unichar-tolower">g_unichar_tolower</link> (<link linkend="gunichar">gunichar</link> c);
49
<link linkend="gunichar">gunichar</link> <link linkend="g-unichar-totitle">g_unichar_totitle</link> (<link linkend="gunichar">gunichar</link> c);
50
<link linkend="gint">gint</link> <link linkend="g-unichar-digit-value">g_unichar_digit_value</link> (<link linkend="gunichar">gunichar</link> c);
51
<link linkend="gint">gint</link> <link linkend="g-unichar-xdigit-value">g_unichar_xdigit_value</link> (<link linkend="gunichar">gunichar</link> c);
52
enum <link linkend="GUnicodeType">GUnicodeType</link>;
53
<link linkend="GUnicodeType">GUnicodeType</link> <link linkend="g-unichar-type">g_unichar_type</link> (<link linkend="gunichar">gunichar</link> c);
54
enum <link linkend="GUnicodeBreakType">GUnicodeBreakType</link>;
55
<link linkend="GUnicodeBreakType">GUnicodeBreakType</link> <link linkend="g-unichar-break-type">g_unichar_break_type</link> (<link linkend="gunichar">gunichar</link> c);
56
<link linkend="gint">gint</link> <link linkend="g-unichar-combining-class">g_unichar_combining_class</link> (<link linkend="gunichar">gunichar</link> uc);
57
<link linkend="void">void</link> <link linkend="g-unicode-canonical-ordering">g_unicode_canonical_ordering</link> (<link linkend="gunichar">gunichar</link> *string,
58
<link linkend="gsize">gsize</link> len);
59
<link linkend="gunichar">gunichar</link> * <link linkend="g-unicode-canonical-decomposition">g_unicode_canonical_decomposition</link> (<link linkend="gunichar">gunichar</link> ch,
60
<link linkend="gsize">gsize</link> *result_len);
61
<link linkend="gboolean">gboolean</link> <link linkend="g-unichar-get-mirror-char">g_unichar_get_mirror_char</link> (<link linkend="gunichar">gunichar</link> ch,
62
<link linkend="gunichar">gunichar</link> *mirrored_ch);
63
enum <link linkend="GUnicodeScript">GUnicodeScript</link>;
64
<link linkend="GUnicodeScript">GUnicodeScript</link> <link linkend="g-unichar-get-script">g_unichar_get_script</link> (<link linkend="gunichar">gunichar</link> ch);
66
#define <link linkend="g-utf8-next-char">g_utf8_next_char</link> (p)
67
<link linkend="gunichar">gunichar</link> <link linkend="g-utf8-get-char">g_utf8_get_char</link> (const <link linkend="gchar">gchar</link> *p);
68
<link linkend="gunichar">gunichar</link> <link linkend="g-utf8-get-char-validated">g_utf8_get_char_validated</link> (const <link linkend="gchar">gchar</link> *p,
69
<link linkend="gssize">gssize</link> max_len);
70
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-offset-to-pointer">g_utf8_offset_to_pointer</link> (const <link linkend="gchar">gchar</link> *str,
71
<link linkend="glong">glong</link> offset);
72
<link linkend="glong">glong</link> <link linkend="g-utf8-pointer-to-offset">g_utf8_pointer_to_offset</link> (const <link linkend="gchar">gchar</link> *str,
73
const <link linkend="gchar">gchar</link> *pos);
74
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-prev-char">g_utf8_prev_char</link> (const <link linkend="gchar">gchar</link> *p);
75
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-find-next-char">g_utf8_find_next_char</link> (const <link linkend="gchar">gchar</link> *p,
76
const <link linkend="gchar">gchar</link> *end);
77
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-find-prev-char">g_utf8_find_prev_char</link> (const <link linkend="gchar">gchar</link> *str,
78
const <link linkend="gchar">gchar</link> *p);
79
<link linkend="glong">glong</link> <link linkend="g-utf8-strlen">g_utf8_strlen</link> (const <link linkend="gchar">gchar</link> *p,
80
<link linkend="gssize">gssize</link> max);
81
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-strncpy">g_utf8_strncpy</link> (<link linkend="gchar">gchar</link> *dest,
82
const <link linkend="gchar">gchar</link> *src,
83
<link linkend="gsize">gsize</link> n);
84
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-strchr">g_utf8_strchr</link> (const <link linkend="gchar">gchar</link> *p,
85
<link linkend="gssize">gssize</link> len,
86
<link linkend="gunichar">gunichar</link> c);
87
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-strrchr">g_utf8_strrchr</link> (const <link linkend="gchar">gchar</link> *p,
88
<link linkend="gssize">gssize</link> len,
89
<link linkend="gunichar">gunichar</link> c);
90
<link linkend="gchar">gchar</link>* <link linkend="g-utf8-strreverse">g_utf8_strreverse</link> (const <link linkend="gchar">gchar</link> *str,
91
<link linkend="gssize">gssize</link> len);
92
<link linkend="gboolean">gboolean</link> <link linkend="g-utf8-validate">g_utf8_validate</link> (const <link linkend="gchar">gchar</link> *str,
93
<link linkend="gssize">gssize</link> max_len,
94
const <link linkend="gchar">gchar</link> **end);
96
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-strup">g_utf8_strup</link> (const <link linkend="gchar">gchar</link> *str,
97
<link linkend="gssize">gssize</link> len);
98
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-strdown">g_utf8_strdown</link> (const <link linkend="gchar">gchar</link> *str,
99
<link linkend="gssize">gssize</link> len);
100
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-casefold">g_utf8_casefold</link> (const <link linkend="gchar">gchar</link> *str,
101
<link linkend="gssize">gssize</link> len);
102
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-normalize">g_utf8_normalize</link> (const <link linkend="gchar">gchar</link> *str,
103
<link linkend="gssize">gssize</link> len,
104
<link linkend="GNormalizeMode">GNormalizeMode</link> mode);
105
enum <link linkend="GNormalizeMode">GNormalizeMode</link>;
106
<link linkend="gint">gint</link> <link linkend="g-utf8-collate">g_utf8_collate</link> (const <link linkend="gchar">gchar</link> *str1,
107
const <link linkend="gchar">gchar</link> *str2);
108
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-collate-key">g_utf8_collate_key</link> (const <link linkend="gchar">gchar</link> *str,
109
<link linkend="gssize">gssize</link> len);
110
<link linkend="gchar">gchar</link> * <link linkend="g-utf8-collate-key-for-filename">g_utf8_collate_key_for_filename</link> (const <link linkend="gchar">gchar</link> *str,
111
<link linkend="gssize">gssize</link> len);
113
<link linkend="gunichar2">gunichar2</link> * <link linkend="g-utf8-to-utf16">g_utf8_to_utf16</link> (const <link linkend="gchar">gchar</link> *str,
114
<link linkend="glong">glong</link> len,
115
<link linkend="glong">glong</link> *items_read,
116
<link linkend="glong">glong</link> *items_written,
117
<link linkend="GError">GError</link> **error);
118
<link linkend="gunichar">gunichar</link> * <link linkend="g-utf8-to-ucs4">g_utf8_to_ucs4</link> (const <link linkend="gchar">gchar</link> *str,
119
<link linkend="glong">glong</link> len,
120
<link linkend="glong">glong</link> *items_read,
121
<link linkend="glong">glong</link> *items_written,
122
<link linkend="GError">GError</link> **error);
123
<link linkend="gunichar">gunichar</link> * <link linkend="g-utf8-to-ucs4-fast">g_utf8_to_ucs4_fast</link> (const <link linkend="gchar">gchar</link> *str,
124
<link linkend="glong">glong</link> len,
125
<link linkend="glong">glong</link> *items_written);
126
<link linkend="gunichar">gunichar</link> * <link linkend="g-utf16-to-ucs4">g_utf16_to_ucs4</link> (const <link linkend="gunichar2">gunichar2</link> *str,
127
<link linkend="glong">glong</link> len,
128
<link linkend="glong">glong</link> *items_read,
129
<link linkend="glong">glong</link> *items_written,
130
<link linkend="GError">GError</link> **error);
131
<link linkend="gchar">gchar</link>* <link linkend="g-utf16-to-utf8">g_utf16_to_utf8</link> (const <link linkend="gunichar2">gunichar2</link> *str,
132
<link linkend="glong">glong</link> len,
133
<link linkend="glong">glong</link> *items_read,
134
<link linkend="glong">glong</link> *items_written,
135
<link linkend="GError">GError</link> **error);
136
<link linkend="gunichar2">gunichar2</link> * <link linkend="g-ucs4-to-utf16">g_ucs4_to_utf16</link> (const <link linkend="gunichar">gunichar</link> *str,
137
<link linkend="glong">glong</link> len,
138
<link linkend="glong">glong</link> *items_read,
139
<link linkend="glong">glong</link> *items_written,
140
<link linkend="GError">GError</link> **error);
141
<link linkend="gchar">gchar</link>* <link linkend="g-ucs4-to-utf8">g_ucs4_to_utf8</link> (const <link linkend="gunichar">gunichar</link> *str,
142
<link linkend="glong">glong</link> len,
143
<link linkend="glong">glong</link> *items_read,
144
<link linkend="glong">glong</link> *items_written,
145
<link linkend="GError">GError</link> **error);
146
<link linkend="gint">gint</link> <link linkend="g-unichar-to-utf8">g_unichar_to_utf8</link> (<link linkend="gunichar">gunichar</link> c,
147
<link linkend="gchar">gchar</link> *outbuf);
159
<refsect1 id="glib-Unicode-Manipulation.description" role="desc">
160
<title role="desc.title">Description</title>
162
This section describes a number of functions for dealing with
163
Unicode characters and strings. There are analogues of the
164
traditional <filename>ctype.h</filename> character classification
165
and case conversion functions, UTF-8 analogues of some string utility
166
functions, functions to perform normalization, case conversion and
167
collation on UTF-8 strings and finally functions to convert between
168
the UTF-8, UTF-16 and UCS-4 encodings of Unicode.
171
The implementations of the Unicode functions in GLib are based
172
on the Unicode Character Data tables, which are available from
173
<ulink url="http://www.unicode.org/">www.unicode.org</ulink>.
174
GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
175
GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1.
179
<refsect1 id="glib-Unicode-Manipulation.details" role="details">
180
<title role="details.title">Details</title>
181
<refsect2 id="gunichar" role="typedef">
182
<title>gunichar</title>
183
<indexterm zone="gunichar"><primary sortas="unichar">gunichar</primary></indexterm><programlisting>typedef guint32 gunichar;
186
A type which can hold any UTF-32 or UCS-4 character code, also known
187
as a Unicode code point.
190
If you want to produce the UTF-8 representation of a <link linkend="gunichar"><type>gunichar</type></link>,
191
use <link linkend="g-ucs4-to-utf8"><function>g_ucs4_to_utf8()</function></link>. See also <link linkend="g-utf8-to-ucs4"><function>g_utf8_to_ucs4()</function></link> for the reverse process.
194
To print/scan values of this type as integer, use
195
<link linkend="G-GINT32-MODIFIER--CAPS"><literal>G_GINT32_MODIFIER</literal></link> and/or <link linkend="G-GUINT32-FORMAT--CAPS"><literal>G_GUINT32_FORMAT</literal></link>.
198
The notation to express a Unicode code point in running text is as a
199
hexadecimal number with four to six digits and uppercase letters, prefixed
200
by the string "U+". Leading zeros are omitted, unless the code point would
201
have fewer than four hexadecimal digits.
202
For example, "U+0041 LATIN CAPITAL LETTER A".
203
To print a code point in the U+-notation, use the format string
204
"U+<link linkend="04--CAPS"><literal>04</literal></link>"G_GINT32_FORMAT"X".
205
To scan, use the format string "U+<link linkend="06--CAPS"><literal>06</literal></link>"G_GINT32_FORMAT"X".
209
sscanf ("U+0041", "U+%06"G_GINT32_FORMAT"X", &c)
210
g_print ("Read U+%04"G_GINT32_FORMAT"X", c);
214
<refsect2 id="gunichar2" role="typedef">
215
<title>gunichar2</title>
216
<indexterm zone="gunichar2"><primary sortas="unichar2">gunichar2</primary></indexterm><programlisting>typedef guint16 gunichar2;
219
A type which can hold any UTF-16 code
220
point<footnote id="utf16_surrogate_pairs">UTF-16 also has so called
221
<firstterm>surrogate pairs</firstterm> to encode characters beyond the
222
BMP as pairs of 16bit numbers. Surrogate pairs cannot be stored in a
223
single gunichar2 field, but all GLib functions accepting gunichar2 arrays
224
will correctly interpret surrogate pairs.</footnote>.
227
To print/scan values of this type to/from text you need to convert
228
to/from UTF-8, using <link linkend="g-utf16-to-utf8"><function>g_utf16_to_utf8()</function></link>/<link linkend="g-utf8-to-utf16"><function>g_utf8_to_utf16()</function></link>.
231
To print/scan values of this type as integer, use
232
<link linkend="G-GINT16-MODIFIER--CAPS"><literal>G_GINT16_MODIFIER</literal></link> and/or <link linkend="G-GUINT16-FORMAT--CAPS"><literal>G_GUINT16_FORMAT</literal></link>.
234
<refsect2 id="g-unichar-validate" role="function">
235
<title>g_unichar_validate ()</title>
236
<indexterm zone="g-unichar-validate"><primary sortas="unichar_validate">g_unichar_validate</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_validate (<link linkend="gunichar">gunichar</link> ch);</programlisting>
238
Checks whether <parameter>ch</parameter> is a valid Unicode character. Some possible
239
integer values of <parameter>ch</parameter> will not be valid. 0 is considered a valid
240
character, though it's normally a string terminator.</para>
242
</para><variablelist role="params">
243
<varlistentry><term><parameter>ch</parameter> :</term>
244
<listitem><simpara> a Unicode character
245
</simpara></listitem></varlistentry>
246
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>ch</parameter> is a valid Unicode character
247
</simpara></listitem></varlistentry>
248
</variablelist></refsect2>
249
<refsect2 id="g-unichar-isalnum" role="function">
250
<title>g_unichar_isalnum ()</title>
251
<indexterm zone="g-unichar-isalnum"><primary sortas="unichar_isalnum">g_unichar_isalnum</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isalnum (<link linkend="gunichar">gunichar</link> c);</programlisting>
253
Determines whether a character is alphanumeric.
254
Given some UTF-8 text, obtain a character value
255
with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
257
</para><variablelist role="params">
258
<varlistentry><term><parameter>c</parameter> :</term>
259
<listitem><simpara> a Unicode character
260
</simpara></listitem></varlistentry>
261
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an alphanumeric character
262
</simpara></listitem></varlistentry>
263
</variablelist></refsect2>
264
<refsect2 id="g-unichar-isalpha" role="function">
265
<title>g_unichar_isalpha ()</title>
266
<indexterm zone="g-unichar-isalpha"><primary sortas="unichar_isalpha">g_unichar_isalpha</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isalpha (<link linkend="gunichar">gunichar</link> c);</programlisting>
268
Determines whether a character is alphabetic (i.e. a letter).
269
Given some UTF-8 text, obtain a character value with
270
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
272
</para><variablelist role="params">
273
<varlistentry><term><parameter>c</parameter> :</term>
274
<listitem><simpara> a Unicode character
275
</simpara></listitem></varlistentry>
276
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an alphabetic character
277
</simpara></listitem></varlistentry>
278
</variablelist></refsect2>
279
<refsect2 id="g-unichar-iscntrl" role="function">
280
<title>g_unichar_iscntrl ()</title>
281
<indexterm zone="g-unichar-iscntrl"><primary sortas="unichar_iscntrl">g_unichar_iscntrl</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_iscntrl (<link linkend="gunichar">gunichar</link> c);</programlisting>
283
Determines whether a character is a control character.
284
Given some UTF-8 text, obtain a character value with
285
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
287
</para><variablelist role="params">
288
<varlistentry><term><parameter>c</parameter> :</term>
289
<listitem><simpara> a Unicode character
290
</simpara></listitem></varlistentry>
291
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a control character
292
</simpara></listitem></varlistentry>
293
</variablelist></refsect2>
294
<refsect2 id="g-unichar-isdefined" role="function">
295
<title>g_unichar_isdefined ()</title>
296
<indexterm zone="g-unichar-isdefined"><primary sortas="unichar_isdefined">g_unichar_isdefined</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isdefined (<link linkend="gunichar">gunichar</link> c);</programlisting>
298
Determines if a given character is assigned in the Unicode
301
</para><variablelist role="params">
302
<varlistentry><term><parameter>c</parameter> :</term>
303
<listitem><simpara> a Unicode character
304
</simpara></listitem></varlistentry>
305
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character has an assigned value
306
</simpara></listitem></varlistentry>
307
</variablelist></refsect2>
308
<refsect2 id="g-unichar-isdigit" role="function">
309
<title>g_unichar_isdigit ()</title>
310
<indexterm zone="g-unichar-isdigit"><primary sortas="unichar_isdigit">g_unichar_isdigit</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isdigit (<link linkend="gunichar">gunichar</link> c);</programlisting>
312
Determines whether a character is numeric (i.e. a digit). This
313
covers ASCII 0-9 and also digits in other languages/scripts. Given
314
some UTF-8 text, obtain a character value with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
316
</para><variablelist role="params">
317
<varlistentry><term><parameter>c</parameter> :</term>
318
<listitem><simpara> a Unicode character
319
</simpara></listitem></varlistentry>
320
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a digit
321
</simpara></listitem></varlistentry>
322
</variablelist></refsect2>
323
<refsect2 id="g-unichar-isgraph" role="function">
324
<title>g_unichar_isgraph ()</title>
325
<indexterm zone="g-unichar-isgraph"><primary sortas="unichar_isgraph">g_unichar_isgraph</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isgraph (<link linkend="gunichar">gunichar</link> c);</programlisting>
327
Determines whether a character is printable and not a space
328
(returns <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> for control characters, format characters, and
329
spaces). <link linkend="g-unichar-isprint"><function>g_unichar_isprint()</function></link> is similar, but returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for
330
spaces. Given some UTF-8 text, obtain a character value with
331
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
333
</para><variablelist role="params">
334
<varlistentry><term><parameter>c</parameter> :</term>
335
<listitem><simpara> a Unicode character
336
</simpara></listitem></varlistentry>
337
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is printable unless it's a space
338
</simpara></listitem></varlistentry>
339
</variablelist></refsect2>
340
<refsect2 id="g-unichar-islower" role="function">
341
<title>g_unichar_islower ()</title>
342
<indexterm zone="g-unichar-islower"><primary sortas="unichar_islower">g_unichar_islower</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_islower (<link linkend="gunichar">gunichar</link> c);</programlisting>
344
Determines whether a character is a lowercase letter.
345
Given some UTF-8 text, obtain a character value with
346
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
348
</para><variablelist role="params">
349
<varlistentry><term><parameter>c</parameter> :</term>
350
<listitem><simpara> a Unicode character
351
</simpara></listitem></varlistentry>
352
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a lowercase letter
353
</simpara></listitem></varlistentry>
354
</variablelist></refsect2>
355
<refsect2 id="g-unichar-ismark" role="function" condition="since:2.14">
356
<title>g_unichar_ismark ()</title>
357
<indexterm zone="g-unichar-ismark" role="2.14"><primary sortas="unichar_ismark">g_unichar_ismark</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_ismark (<link linkend="gunichar">gunichar</link> c);</programlisting>
359
Determines whether a character is a mark (non-spacing mark,
360
combining mark, or enclosing mark in Unicode speak).
361
Given some UTF-8 text, obtain a character value
362
with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.
365
Note: in most cases where isalpha characters are allowed,
366
ismark characters should be allowed to as they are essential
367
for writing most European languages as well as many non-Latin
370
</para><variablelist role="params">
371
<varlistentry><term><parameter>c</parameter> :</term>
372
<listitem><simpara> a Unicode character
373
</simpara></listitem></varlistentry>
374
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a mark character
376
</simpara></listitem></varlistentry>
377
</variablelist><para role="since">Since 2.14</para></refsect2>
378
<refsect2 id="g-unichar-isprint" role="function">
379
<title>g_unichar_isprint ()</title>
380
<indexterm zone="g-unichar-isprint"><primary sortas="unichar_isprint">g_unichar_isprint</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isprint (<link linkend="gunichar">gunichar</link> c);</programlisting>
382
Determines whether a character is printable.
383
Unlike <link linkend="g-unichar-isgraph"><function>g_unichar_isgraph()</function></link>, returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for spaces.
384
Given some UTF-8 text, obtain a character value with
385
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
387
</para><variablelist role="params">
388
<varlistentry><term><parameter>c</parameter> :</term>
389
<listitem><simpara> a Unicode character
390
</simpara></listitem></varlistentry>
391
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is printable
392
</simpara></listitem></varlistentry>
393
</variablelist></refsect2>
394
<refsect2 id="g-unichar-ispunct" role="function">
395
<title>g_unichar_ispunct ()</title>
396
<indexterm zone="g-unichar-ispunct"><primary sortas="unichar_ispunct">g_unichar_ispunct</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_ispunct (<link linkend="gunichar">gunichar</link> c);</programlisting>
398
Determines whether a character is punctuation or a symbol.
399
Given some UTF-8 text, obtain a character value with
400
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.</para>
402
</para><variablelist role="params">
403
<varlistentry><term><parameter>c</parameter> :</term>
404
<listitem><simpara> a Unicode character
405
</simpara></listitem></varlistentry>
406
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a punctuation or symbol character
407
</simpara></listitem></varlistentry>
408
</variablelist></refsect2>
409
<refsect2 id="g-unichar-isspace" role="function">
410
<title>g_unichar_isspace ()</title>
411
<indexterm zone="g-unichar-isspace"><primary sortas="unichar_isspace">g_unichar_isspace</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isspace (<link linkend="gunichar">gunichar</link> c);</programlisting>
413
Determines whether a character is a space, tab, or line separator
414
(newline, carriage return, etc.). Given some UTF-8 text, obtain a
415
character value with <link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>.
418
(Note: don't use this to do word breaking; you have to use
419
Pango or equivalent to get word breaking right, the algorithm
420
is fairly complex.)</para>
422
</para><variablelist role="params">
423
<varlistentry><term><parameter>c</parameter> :</term>
424
<listitem><simpara> a Unicode character
425
</simpara></listitem></varlistentry>
426
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is a space character
427
</simpara></listitem></varlistentry>
428
</variablelist></refsect2>
429
<refsect2 id="g-unichar-istitle" role="function">
430
<title>g_unichar_istitle ()</title>
431
<indexterm zone="g-unichar-istitle"><primary sortas="unichar_istitle">g_unichar_istitle</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_istitle (<link linkend="gunichar">gunichar</link> c);</programlisting>
433
Determines if a character is titlecase. Some characters in
434
Unicode which are composites, such as the DZ digraph
435
have three case variants instead of just two. The titlecase
436
form is used at the beginning of a word where only the
437
first letter is capitalized. The titlecase form of the DZ
438
digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.</para>
440
</para><variablelist role="params">
441
<varlistentry><term><parameter>c</parameter> :</term>
442
<listitem><simpara> a Unicode character
443
</simpara></listitem></varlistentry>
444
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is titlecase
445
</simpara></listitem></varlistentry>
446
</variablelist></refsect2>
447
<refsect2 id="g-unichar-isupper" role="function">
448
<title>g_unichar_isupper ()</title>
449
<indexterm zone="g-unichar-isupper"><primary sortas="unichar_isupper">g_unichar_isupper</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isupper (<link linkend="gunichar">gunichar</link> c);</programlisting>
451
Determines if a character is uppercase.</para>
453
</para><variablelist role="params">
454
<varlistentry><term><parameter>c</parameter> :</term>
455
<listitem><simpara> a Unicode character
456
</simpara></listitem></varlistentry>
457
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>c</parameter> is an uppercase character
458
</simpara></listitem></varlistentry>
459
</variablelist></refsect2>
460
<refsect2 id="g-unichar-isxdigit" role="function">
461
<title>g_unichar_isxdigit ()</title>
462
<indexterm zone="g-unichar-isxdigit"><primary sortas="unichar_isxdigit">g_unichar_isxdigit</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_isxdigit (<link linkend="gunichar">gunichar</link> c);</programlisting>
464
Determines if a character is a hexidecimal digit.</para>
466
</para><variablelist role="params">
467
<varlistentry><term><parameter>c</parameter> :</term>
468
<listitem><simpara> a Unicode character.
469
</simpara></listitem></varlistentry>
470
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is a hexadecimal digit
471
</simpara></listitem></varlistentry>
472
</variablelist></refsect2>
473
<refsect2 id="g-unichar-iswide" role="function">
474
<title>g_unichar_iswide ()</title>
475
<indexterm zone="g-unichar-iswide"><primary sortas="unichar_iswide">g_unichar_iswide</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_iswide (<link linkend="gunichar">gunichar</link> c);</programlisting>
477
Determines if a character is typically rendered in a double-width
480
</para><variablelist role="params">
481
<varlistentry><term><parameter>c</parameter> :</term>
482
<listitem><simpara> a Unicode character
483
</simpara></listitem></varlistentry>
484
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is wide
485
</simpara></listitem></varlistentry>
486
</variablelist></refsect2>
487
<refsect2 id="g-unichar-iswide-cjk" role="function" condition="since:2.12">
488
<title>g_unichar_iswide_cjk ()</title>
489
<indexterm zone="g-unichar-iswide-cjk" role="2.12"><primary sortas="unichar_iswide_cjk">g_unichar_iswide_cjk</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_iswide_cjk (<link linkend="gunichar">gunichar</link> c);</programlisting>
491
Determines if a character is typically rendered in a double-width
492
cell under legacy East Asian locales. If a character is wide according to
493
<link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link>, then it is also reported wide with this function, but
494
the converse is not necessarily true. See the
495
<ulink url="http://www.unicode.org/reports/tr11/">Unicode Standard
496
Annex #11</ulink> for details.
499
If a character passes the <link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link> test then it will also pass
500
this test, but not the other way around. Note that some characters may
501
pas both this test and <link linkend="g-unichar-iszerowidth"><function>g_unichar_iszerowidth()</function></link>.</para>
503
</para><variablelist role="params">
504
<varlistentry><term><parameter>c</parameter> :</term>
505
<listitem><simpara> a Unicode character
506
</simpara></listitem></varlistentry>
507
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character is wide in legacy East Asian locales
509
</simpara></listitem></varlistentry>
510
</variablelist><para role="since">Since 2.12</para></refsect2>
511
<refsect2 id="g-unichar-iszerowidth" role="function" condition="since:2.14">
512
<title>g_unichar_iszerowidth ()</title>
513
<indexterm zone="g-unichar-iszerowidth" role="2.14"><primary sortas="unichar_iszerowidth">g_unichar_iszerowidth</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_iszerowidth (<link linkend="gunichar">gunichar</link> c);</programlisting>
515
Determines if a given character typically takes zero width when rendered.
516
The return value is <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> for all non-spacing and enclosing marks
517
(e.g., combining accents), format characters, zero-width
518
space, but not U+00AD SOFT HYPHEN.
521
A typical use of this function is with one of <link linkend="g-unichar-iswide"><function>g_unichar_iswide()</function></link> or
522
<link linkend="g-unichar-iswide-cjk"><function>g_unichar_iswide_cjk()</function></link> to determine the number of cells a string occupies
523
when displayed on a grid display (terminals). However, note that not all
524
terminals support zero-width rendering of zero-width marks.</para>
526
</para><variablelist role="params">
527
<varlistentry><term><parameter>c</parameter> :</term>
528
<listitem><simpara> a Unicode character
529
</simpara></listitem></varlistentry>
530
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the character has zero width
532
</simpara></listitem></varlistentry>
533
</variablelist><para role="since">Since 2.14</para></refsect2>
534
<refsect2 id="g-unichar-toupper" role="function">
535
<title>g_unichar_toupper ()</title>
536
<indexterm zone="g-unichar-toupper"><primary sortas="unichar_toupper">g_unichar_toupper</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> g_unichar_toupper (<link linkend="gunichar">gunichar</link> c);</programlisting>
538
Converts a character to uppercase.</para>
540
</para><variablelist role="params">
541
<varlistentry><term><parameter>c</parameter> :</term>
542
<listitem><simpara> a Unicode character
543
</simpara></listitem></varlistentry>
544
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the result of converting <parameter>c</parameter> to uppercase.
545
If <parameter>c</parameter> is not an lowercase or titlecase character,
546
or has no upper case equivalent <parameter>c</parameter> is returned unchanged.
547
</simpara></listitem></varlistentry>
548
</variablelist></refsect2>
549
<refsect2 id="g-unichar-tolower" role="function">
550
<title>g_unichar_tolower ()</title>
551
<indexterm zone="g-unichar-tolower"><primary sortas="unichar_tolower">g_unichar_tolower</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> g_unichar_tolower (<link linkend="gunichar">gunichar</link> c);</programlisting>
553
Converts a character to lower case.</para>
555
</para><variablelist role="params">
556
<varlistentry><term><parameter>c</parameter> :</term>
557
<listitem><simpara> a Unicode character.
558
</simpara></listitem></varlistentry>
559
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the result of converting <parameter>c</parameter> to lower case.
560
If <parameter>c</parameter> is not an upperlower or titlecase character,
561
or has no lowercase equivalent <parameter>c</parameter> is returned unchanged.
562
</simpara></listitem></varlistentry>
563
</variablelist></refsect2>
564
<refsect2 id="g-unichar-totitle" role="function">
565
<title>g_unichar_totitle ()</title>
566
<indexterm zone="g-unichar-totitle"><primary sortas="unichar_totitle">g_unichar_totitle</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> g_unichar_totitle (<link linkend="gunichar">gunichar</link> c);</programlisting>
568
Converts a character to the titlecase.</para>
570
</para><variablelist role="params">
571
<varlistentry><term><parameter>c</parameter> :</term>
572
<listitem><simpara> a Unicode character
573
</simpara></listitem></varlistentry>
574
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the result of converting <parameter>c</parameter> to titlecase.
575
If <parameter>c</parameter> is not an uppercase or lowercase character,
576
<parameter>c</parameter> is returned unchanged.
577
</simpara></listitem></varlistentry>
578
</variablelist></refsect2>
579
<refsect2 id="g-unichar-digit-value" role="function">
580
<title>g_unichar_digit_value ()</title>
581
<indexterm zone="g-unichar-digit-value"><primary sortas="unichar_digit_value">g_unichar_digit_value</primary></indexterm><programlisting><link linkend="gint">gint</link> g_unichar_digit_value (<link linkend="gunichar">gunichar</link> c);</programlisting>
583
Determines the numeric value of a character as a decimal
586
</para><variablelist role="params">
587
<varlistentry><term><parameter>c</parameter> :</term>
588
<listitem><simpara> a Unicode character
589
</simpara></listitem></varlistentry>
590
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> If <parameter>c</parameter> is a decimal digit (according to
591
<link linkend="g-unichar-isdigit"><function>g_unichar_isdigit()</function></link>), its numeric value. Otherwise, -1.
592
</simpara></listitem></varlistentry>
593
</variablelist></refsect2>
594
<refsect2 id="g-unichar-xdigit-value" role="function">
595
<title>g_unichar_xdigit_value ()</title>
596
<indexterm zone="g-unichar-xdigit-value"><primary sortas="unichar_xdigit_value">g_unichar_xdigit_value</primary></indexterm><programlisting><link linkend="gint">gint</link> g_unichar_xdigit_value (<link linkend="gunichar">gunichar</link> c);</programlisting>
598
Determines the numeric value of a character as a hexidecimal
601
</para><variablelist role="params">
602
<varlistentry><term><parameter>c</parameter> :</term>
603
<listitem><simpara> a Unicode character
604
</simpara></listitem></varlistentry>
605
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> If <parameter>c</parameter> is a hex digit (according to
606
<link linkend="g-unichar-isxdigit"><function>g_unichar_isxdigit()</function></link>), its numeric value. Otherwise, -1.
607
</simpara></listitem></varlistentry>
608
</variablelist></refsect2>
609
<refsect2 id="GUnicodeType" role="enum">
610
<title>enum GUnicodeType</title>
611
<indexterm zone="GUnicodeType"><primary sortas="UnicodeType">GUnicodeType</primary></indexterm><programlisting>typedef enum
615
G_UNICODE_UNASSIGNED,
616
G_UNICODE_PRIVATE_USE,
618
G_UNICODE_LOWERCASE_LETTER,
619
G_UNICODE_MODIFIER_LETTER,
620
G_UNICODE_OTHER_LETTER,
621
G_UNICODE_TITLECASE_LETTER,
622
G_UNICODE_UPPERCASE_LETTER,
623
G_UNICODE_COMBINING_MARK,
624
G_UNICODE_ENCLOSING_MARK,
625
G_UNICODE_NON_SPACING_MARK,
626
G_UNICODE_DECIMAL_NUMBER,
627
G_UNICODE_LETTER_NUMBER,
628
G_UNICODE_OTHER_NUMBER,
629
G_UNICODE_CONNECT_PUNCTUATION,
630
G_UNICODE_DASH_PUNCTUATION,
631
G_UNICODE_CLOSE_PUNCTUATION,
632
G_UNICODE_FINAL_PUNCTUATION,
633
G_UNICODE_INITIAL_PUNCTUATION,
634
G_UNICODE_OTHER_PUNCTUATION,
635
G_UNICODE_OPEN_PUNCTUATION,
636
G_UNICODE_CURRENCY_SYMBOL,
637
G_UNICODE_MODIFIER_SYMBOL,
638
G_UNICODE_MATH_SYMBOL,
639
G_UNICODE_OTHER_SYMBOL,
640
G_UNICODE_LINE_SEPARATOR,
641
G_UNICODE_PARAGRAPH_SEPARATOR,
642
G_UNICODE_SPACE_SEPARATOR
646
These are the possible character classifications from the
647
Unicode specification.
648
See <ulink url="http://www.unicode.org/Public/UNIDATA/UnicodeData.html"
649
>http://www.unicode.org/Public/UNIDATA/UnicodeData.html</ulink>.
650
</para><variablelist role="enum">
651
<varlistentry id="G-UNICODE-CONTROL--CAPS" role="constant">
652
<term><literal>G_UNICODE_CONTROL</literal></term>
653
<listitem><simpara>General category "Other, Control" (Cc)
654
</simpara></listitem>
656
<varlistentry id="G-UNICODE-FORMAT--CAPS" role="constant">
657
<term><literal>G_UNICODE_FORMAT</literal></term>
658
<listitem><simpara>General category "Other, Format" (Cf)
659
</simpara></listitem>
661
<varlistentry id="G-UNICODE-UNASSIGNED--CAPS" role="constant">
662
<term><literal>G_UNICODE_UNASSIGNED</literal></term>
663
<listitem><simpara>General category "Other, Not Assigned" (Cn)
664
</simpara></listitem>
666
<varlistentry id="G-UNICODE-PRIVATE-USE--CAPS" role="constant">
667
<term><literal>G_UNICODE_PRIVATE_USE</literal></term>
668
<listitem><simpara>General category "Other, Private Use" (Co)
669
</simpara></listitem>
671
<varlistentry id="G-UNICODE-SURROGATE--CAPS" role="constant">
672
<term><literal>G_UNICODE_SURROGATE</literal></term>
673
<listitem><simpara>General category "Other, Surrogate" (Cs)
674
</simpara></listitem>
676
<varlistentry id="G-UNICODE-LOWERCASE-LETTER--CAPS" role="constant">
677
<term><literal>G_UNICODE_LOWERCASE_LETTER</literal></term>
678
<listitem><simpara>General category "Letter, Lowercase" (Ll)
679
</simpara></listitem>
681
<varlistentry id="G-UNICODE-MODIFIER-LETTER--CAPS" role="constant">
682
<term><literal>G_UNICODE_MODIFIER_LETTER</literal></term>
683
<listitem><simpara>General category "Letter, Modifier" (Lm)
684
</simpara></listitem>
686
<varlistentry id="G-UNICODE-OTHER-LETTER--CAPS" role="constant">
687
<term><literal>G_UNICODE_OTHER_LETTER</literal></term>
688
<listitem><simpara>General category "Letter, Other" (Lo)
689
</simpara></listitem>
691
<varlistentry id="G-UNICODE-TITLECASE-LETTER--CAPS" role="constant">
692
<term><literal>G_UNICODE_TITLECASE_LETTER</literal></term>
693
<listitem><simpara>General category "Letter, Titlecase" (Lt)
694
</simpara></listitem>
696
<varlistentry id="G-UNICODE-UPPERCASE-LETTER--CAPS" role="constant">
697
<term><literal>G_UNICODE_UPPERCASE_LETTER</literal></term>
698
<listitem><simpara>General category "Letter, Uppercase" (Lu)
699
</simpara></listitem>
701
<varlistentry id="G-UNICODE-COMBINING-MARK--CAPS" role="constant">
702
<term><literal>G_UNICODE_COMBINING_MARK</literal></term>
703
<listitem><simpara>General category "Mark, Spacing Combining" (Mc)
704
</simpara></listitem>
706
<varlistentry id="G-UNICODE-ENCLOSING-MARK--CAPS" role="constant">
707
<term><literal>G_UNICODE_ENCLOSING_MARK</literal></term>
708
<listitem><simpara>General category "Mark, Enclosing" (Me)
709
</simpara></listitem>
711
<varlistentry id="G-UNICODE-NON-SPACING-MARK--CAPS" role="constant">
712
<term><literal>G_UNICODE_NON_SPACING_MARK</literal></term>
713
<listitem><simpara>General category "Mark, Nonspacing" (Mn)
714
</simpara></listitem>
716
<varlistentry id="G-UNICODE-DECIMAL-NUMBER--CAPS" role="constant">
717
<term><literal>G_UNICODE_DECIMAL_NUMBER</literal></term>
718
<listitem><simpara>General category "Number, Decimal Digit" (Nd)
719
</simpara></listitem>
721
<varlistentry id="G-UNICODE-LETTER-NUMBER--CAPS" role="constant">
722
<term><literal>G_UNICODE_LETTER_NUMBER</literal></term>
723
<listitem><simpara>General category "Number, Letter" (Nl)
724
</simpara></listitem>
726
<varlistentry id="G-UNICODE-OTHER-NUMBER--CAPS" role="constant">
727
<term><literal>G_UNICODE_OTHER_NUMBER</literal></term>
728
<listitem><simpara>General category "Number, Other" (No)
729
</simpara></listitem>
731
<varlistentry id="G-UNICODE-CONNECT-PUNCTUATION--CAPS" role="constant">
732
<term><literal>G_UNICODE_CONNECT_PUNCTUATION</literal></term>
733
<listitem><simpara>General category "Punctuation, Connector" (Pc)
734
</simpara></listitem>
736
<varlistentry id="G-UNICODE-DASH-PUNCTUATION--CAPS" role="constant">
737
<term><literal>G_UNICODE_DASH_PUNCTUATION</literal></term>
738
<listitem><simpara>General category "Punctuation, Dash" (Pd)
739
</simpara></listitem>
741
<varlistentry id="G-UNICODE-CLOSE-PUNCTUATION--CAPS" role="constant">
742
<term><literal>G_UNICODE_CLOSE_PUNCTUATION</literal></term>
743
<listitem><simpara>General category "Punctuation, Close" (Pe)
744
</simpara></listitem>
746
<varlistentry id="G-UNICODE-FINAL-PUNCTUATION--CAPS" role="constant">
747
<term><literal>G_UNICODE_FINAL_PUNCTUATION</literal></term>
748
<listitem><simpara>General category "Punctuation, Final quote" (Pf)
749
</simpara></listitem>
751
<varlistentry id="G-UNICODE-INITIAL-PUNCTUATION--CAPS" role="constant">
752
<term><literal>G_UNICODE_INITIAL_PUNCTUATION</literal></term>
753
<listitem><simpara>General category "Punctuation, Initial quote" (Pi)
754
</simpara></listitem>
756
<varlistentry id="G-UNICODE-OTHER-PUNCTUATION--CAPS" role="constant">
757
<term><literal>G_UNICODE_OTHER_PUNCTUATION</literal></term>
758
<listitem><simpara>General category "Punctuation, Other" (Po)
759
</simpara></listitem>
761
<varlistentry id="G-UNICODE-OPEN-PUNCTUATION--CAPS" role="constant">
762
<term><literal>G_UNICODE_OPEN_PUNCTUATION</literal></term>
763
<listitem><simpara>General category "Punctuation, Open" (Ps)
764
</simpara></listitem>
766
<varlistentry id="G-UNICODE-CURRENCY-SYMBOL--CAPS" role="constant">
767
<term><literal>G_UNICODE_CURRENCY_SYMBOL</literal></term>
768
<listitem><simpara>General category "Symbol, Currency" (Sc)
769
</simpara></listitem>
771
<varlistentry id="G-UNICODE-MODIFIER-SYMBOL--CAPS" role="constant">
772
<term><literal>G_UNICODE_MODIFIER_SYMBOL</literal></term>
773
<listitem><simpara>General category "Symbol, Modifier" (Sk)
774
</simpara></listitem>
776
<varlistentry id="G-UNICODE-MATH-SYMBOL--CAPS" role="constant">
777
<term><literal>G_UNICODE_MATH_SYMBOL</literal></term>
778
<listitem><simpara>General category "Symbol, Math" (Sm)
779
</simpara></listitem>
781
<varlistentry id="G-UNICODE-OTHER-SYMBOL--CAPS" role="constant">
782
<term><literal>G_UNICODE_OTHER_SYMBOL</literal></term>
783
<listitem><simpara>General category "Symbol, Other" (So)
784
</simpara></listitem>
786
<varlistentry id="G-UNICODE-LINE-SEPARATOR--CAPS" role="constant">
787
<term><literal>G_UNICODE_LINE_SEPARATOR</literal></term>
788
<listitem><simpara>General category "Separator, Line" (Zl)
789
</simpara></listitem>
791
<varlistentry id="G-UNICODE-PARAGRAPH-SEPARATOR--CAPS" role="constant">
792
<term><literal>G_UNICODE_PARAGRAPH_SEPARATOR</literal></term>
793
<listitem><simpara>General category "Separator, Paragraph" (Zp)
794
</simpara></listitem>
796
<varlistentry id="G-UNICODE-SPACE-SEPARATOR--CAPS" role="constant">
797
<term><literal>G_UNICODE_SPACE_SEPARATOR</literal></term>
798
<listitem><simpara>General category "Separator, Space" (Zs)
799
</simpara></listitem>
801
</variablelist></refsect2>
802
<refsect2 id="g-unichar-type" role="function">
803
<title>g_unichar_type ()</title>
804
<indexterm zone="g-unichar-type"><primary sortas="unichar_type">g_unichar_type</primary></indexterm><programlisting><link linkend="GUnicodeType">GUnicodeType</link> g_unichar_type (<link linkend="gunichar">gunichar</link> c);</programlisting>
806
Classifies a Unicode character by type.</para>
808
</para><variablelist role="params">
809
<varlistentry><term><parameter>c</parameter> :</term>
810
<listitem><simpara> a Unicode character
811
</simpara></listitem></varlistentry>
812
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the type of the character.
813
</simpara></listitem></varlistentry>
814
</variablelist></refsect2>
815
<refsect2 id="GUnicodeBreakType" role="enum">
816
<title>enum GUnicodeBreakType</title>
817
<indexterm zone="GUnicodeBreakType"><primary sortas="UnicodeBreakType">GUnicodeBreakType</primary></indexterm><programlisting>typedef enum
819
G_UNICODE_BREAK_MANDATORY,
820
G_UNICODE_BREAK_CARRIAGE_RETURN,
821
G_UNICODE_BREAK_LINE_FEED,
822
G_UNICODE_BREAK_COMBINING_MARK,
823
G_UNICODE_BREAK_SURROGATE,
824
G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
825
G_UNICODE_BREAK_INSEPARABLE,
826
G_UNICODE_BREAK_NON_BREAKING_GLUE,
827
G_UNICODE_BREAK_CONTINGENT,
828
G_UNICODE_BREAK_SPACE,
829
G_UNICODE_BREAK_AFTER,
830
G_UNICODE_BREAK_BEFORE,
831
G_UNICODE_BREAK_BEFORE_AND_AFTER,
832
G_UNICODE_BREAK_HYPHEN,
833
G_UNICODE_BREAK_NON_STARTER,
834
G_UNICODE_BREAK_OPEN_PUNCTUATION,
835
G_UNICODE_BREAK_CLOSE_PUNCTUATION,
836
G_UNICODE_BREAK_QUOTATION,
837
G_UNICODE_BREAK_EXCLAMATION,
838
G_UNICODE_BREAK_IDEOGRAPHIC,
839
G_UNICODE_BREAK_NUMERIC,
840
G_UNICODE_BREAK_INFIX_SEPARATOR,
841
G_UNICODE_BREAK_SYMBOL,
842
G_UNICODE_BREAK_ALPHABETIC,
843
G_UNICODE_BREAK_PREFIX,
844
G_UNICODE_BREAK_POSTFIX,
845
G_UNICODE_BREAK_COMPLEX_CONTEXT,
846
G_UNICODE_BREAK_AMBIGUOUS,
847
G_UNICODE_BREAK_UNKNOWN,
848
G_UNICODE_BREAK_NEXT_LINE,
849
G_UNICODE_BREAK_WORD_JOINER,
850
G_UNICODE_BREAK_HANGUL_L_JAMO,
851
G_UNICODE_BREAK_HANGUL_V_JAMO,
852
G_UNICODE_BREAK_HANGUL_T_JAMO,
853
G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
854
G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
858
These are the possible line break classifications.
859
The five Hangul types were added in Unicode 4.1, so, has been
860
introduced in GLib 2.10. Note that new types may be added in the future.
861
Applications should be ready to handle unknown values.
862
They may be regarded as <link linkend="G-UNICODE-BREAK-UNKNOWN--CAPS"><literal>G_UNICODE_BREAK_UNKNOWN</literal></link>.
863
See <ulink url="http://www.unicode.org/unicode/reports/tr14/"
864
>http://www.unicode.org/unicode/reports/tr14/</ulink>.
865
</para><variablelist role="enum">
866
<varlistentry id="G-UNICODE-BREAK-MANDATORY--CAPS" role="constant">
867
<term><literal>G_UNICODE_BREAK_MANDATORY</literal></term>
868
<listitem><simpara>Mandatory Break (BK)
869
</simpara></listitem>
871
<varlistentry id="G-UNICODE-BREAK-CARRIAGE-RETURN--CAPS" role="constant">
872
<term><literal>G_UNICODE_BREAK_CARRIAGE_RETURN</literal></term>
873
<listitem><simpara>Carriage Return (CR)
874
</simpara></listitem>
876
<varlistentry id="G-UNICODE-BREAK-LINE-FEED--CAPS" role="constant">
877
<term><literal>G_UNICODE_BREAK_LINE_FEED</literal></term>
878
<listitem><simpara>Line Feed (LF)
879
</simpara></listitem>
881
<varlistentry id="G-UNICODE-BREAK-COMBINING-MARK--CAPS" role="constant">
882
<term><literal>G_UNICODE_BREAK_COMBINING_MARK</literal></term>
883
<listitem><simpara>Attached Characters and Combining Marks (CM)
884
</simpara></listitem>
886
<varlistentry id="G-UNICODE-BREAK-SURROGATE--CAPS" role="constant">
887
<term><literal>G_UNICODE_BREAK_SURROGATE</literal></term>
888
<listitem><simpara>Surrogates (SG)
889
</simpara></listitem>
891
<varlistentry id="G-UNICODE-BREAK-ZERO-WIDTH-SPACE--CAPS" role="constant">
892
<term><literal>G_UNICODE_BREAK_ZERO_WIDTH_SPACE</literal></term>
893
<listitem><simpara>Zero Width Space (ZW)
894
</simpara></listitem>
896
<varlistentry id="G-UNICODE-BREAK-INSEPARABLE--CAPS" role="constant">
897
<term><literal>G_UNICODE_BREAK_INSEPARABLE</literal></term>
898
<listitem><simpara>Inseparable (IN)
899
</simpara></listitem>
901
<varlistentry id="G-UNICODE-BREAK-NON-BREAKING-GLUE--CAPS" role="constant">
902
<term><literal>G_UNICODE_BREAK_NON_BREAKING_GLUE</literal></term>
903
<listitem><simpara>Non-breaking ("Glue") (GL)
904
</simpara></listitem>
906
<varlistentry id="G-UNICODE-BREAK-CONTINGENT--CAPS" role="constant">
907
<term><literal>G_UNICODE_BREAK_CONTINGENT</literal></term>
908
<listitem><simpara>Contingent Break Opportunity (CB)
909
</simpara></listitem>
911
<varlistentry id="G-UNICODE-BREAK-SPACE--CAPS" role="constant">
912
<term><literal>G_UNICODE_BREAK_SPACE</literal></term>
913
<listitem><simpara>Space (SP)
914
</simpara></listitem>
916
<varlistentry id="G-UNICODE-BREAK-AFTER--CAPS" role="constant">
917
<term><literal>G_UNICODE_BREAK_AFTER</literal></term>
918
<listitem><simpara>Break Opportunity After (BA)
919
</simpara></listitem>
921
<varlistentry id="G-UNICODE-BREAK-BEFORE--CAPS" role="constant">
922
<term><literal>G_UNICODE_BREAK_BEFORE</literal></term>
923
<listitem><simpara>Break Opportunity Before (BB)
924
</simpara></listitem>
926
<varlistentry id="G-UNICODE-BREAK-BEFORE-AND-AFTER--CAPS" role="constant">
927
<term><literal>G_UNICODE_BREAK_BEFORE_AND_AFTER</literal></term>
928
<listitem><simpara>Break Opportunity Before and After (B2)
929
</simpara></listitem>
931
<varlistentry id="G-UNICODE-BREAK-HYPHEN--CAPS" role="constant">
932
<term><literal>G_UNICODE_BREAK_HYPHEN</literal></term>
933
<listitem><simpara>Hyphen (HY)
934
</simpara></listitem>
936
<varlistentry id="G-UNICODE-BREAK-NON-STARTER--CAPS" role="constant">
937
<term><literal>G_UNICODE_BREAK_NON_STARTER</literal></term>
938
<listitem><simpara>Nonstarter (NS)
939
</simpara></listitem>
941
<varlistentry id="G-UNICODE-BREAK-OPEN-PUNCTUATION--CAPS" role="constant">
942
<term><literal>G_UNICODE_BREAK_OPEN_PUNCTUATION</literal></term>
943
<listitem><simpara>Opening Punctuation (OP)
944
</simpara></listitem>
946
<varlistentry id="G-UNICODE-BREAK-CLOSE-PUNCTUATION--CAPS" role="constant">
947
<term><literal>G_UNICODE_BREAK_CLOSE_PUNCTUATION</literal></term>
948
<listitem><simpara>Closing Punctuation (CL)
949
</simpara></listitem>
951
<varlistentry id="G-UNICODE-BREAK-QUOTATION--CAPS" role="constant">
952
<term><literal>G_UNICODE_BREAK_QUOTATION</literal></term>
953
<listitem><simpara>Ambiguous Quotation (QU)
954
</simpara></listitem>
956
<varlistentry id="G-UNICODE-BREAK-EXCLAMATION--CAPS" role="constant">
957
<term><literal>G_UNICODE_BREAK_EXCLAMATION</literal></term>
958
<listitem><simpara>Exclamation/Interrogation (EX)
959
</simpara></listitem>
961
<varlistentry id="G-UNICODE-BREAK-IDEOGRAPHIC--CAPS" role="constant">
962
<term><literal>G_UNICODE_BREAK_IDEOGRAPHIC</literal></term>
963
<listitem><simpara>Ideographic (ID)
964
</simpara></listitem>
966
<varlistentry id="G-UNICODE-BREAK-NUMERIC--CAPS" role="constant">
967
<term><literal>G_UNICODE_BREAK_NUMERIC</literal></term>
968
<listitem><simpara>Numeric (NU)
969
</simpara></listitem>
971
<varlistentry id="G-UNICODE-BREAK-INFIX-SEPARATOR--CAPS" role="constant">
972
<term><literal>G_UNICODE_BREAK_INFIX_SEPARATOR</literal></term>
973
<listitem><simpara>Infix Separator (Numeric) (IS)
974
</simpara></listitem>
976
<varlistentry id="G-UNICODE-BREAK-SYMBOL--CAPS" role="constant">
977
<term><literal>G_UNICODE_BREAK_SYMBOL</literal></term>
978
<listitem><simpara>Symbols Allowing Break After (SY)
979
</simpara></listitem>
981
<varlistentry id="G-UNICODE-BREAK-ALPHABETIC--CAPS" role="constant">
982
<term><literal>G_UNICODE_BREAK_ALPHABETIC</literal></term>
983
<listitem><simpara>Ordinary Alphabetic and Symbol Characters (AL)
984
</simpara></listitem>
986
<varlistentry id="G-UNICODE-BREAK-PREFIX--CAPS" role="constant">
987
<term><literal>G_UNICODE_BREAK_PREFIX</literal></term>
988
<listitem><simpara>Prefix (Numeric) (PR)
989
</simpara></listitem>
991
<varlistentry id="G-UNICODE-BREAK-POSTFIX--CAPS" role="constant">
992
<term><literal>G_UNICODE_BREAK_POSTFIX</literal></term>
993
<listitem><simpara>Postfix (Numeric) (PO)
994
</simpara></listitem>
996
<varlistentry id="G-UNICODE-BREAK-COMPLEX-CONTEXT--CAPS" role="constant">
997
<term><literal>G_UNICODE_BREAK_COMPLEX_CONTEXT</literal></term>
998
<listitem><simpara>Complex Content Dependent (South East Asian) (SA)
999
</simpara></listitem>
1001
<varlistentry id="G-UNICODE-BREAK-AMBIGUOUS--CAPS" role="constant">
1002
<term><literal>G_UNICODE_BREAK_AMBIGUOUS</literal></term>
1003
<listitem><simpara>Ambiguous (Alphabetic or Ideographic) (AI)
1004
</simpara></listitem>
1006
<varlistentry id="G-UNICODE-BREAK-UNKNOWN--CAPS" role="constant">
1007
<term><literal>G_UNICODE_BREAK_UNKNOWN</literal></term>
1008
<listitem><simpara>Unknown (XX)
1009
</simpara></listitem>
1011
<varlistentry id="G-UNICODE-BREAK-NEXT-LINE--CAPS" role="constant">
1012
<term><literal>G_UNICODE_BREAK_NEXT_LINE</literal></term>
1013
<listitem><simpara>Next Line (NL)
1014
</simpara></listitem>
1016
<varlistentry id="G-UNICODE-BREAK-WORD-JOINER--CAPS" role="constant">
1017
<term><literal>G_UNICODE_BREAK_WORD_JOINER</literal></term>
1018
<listitem><simpara>Word Joiner (WJ)
1019
</simpara></listitem>
1021
<varlistentry id="G-UNICODE-BREAK-HANGUL-L-JAMO--CAPS" role="constant">
1022
<term><literal>G_UNICODE_BREAK_HANGUL_L_JAMO</literal></term>
1023
<listitem><simpara>Hangul L Jamo (JL)
1024
</simpara></listitem>
1026
<varlistentry id="G-UNICODE-BREAK-HANGUL-V-JAMO--CAPS" role="constant">
1027
<term><literal>G_UNICODE_BREAK_HANGUL_V_JAMO</literal></term>
1028
<listitem><simpara>Hangul V Jamo (JV)
1029
</simpara></listitem>
1031
<varlistentry id="G-UNICODE-BREAK-HANGUL-T-JAMO--CAPS" role="constant">
1032
<term><literal>G_UNICODE_BREAK_HANGUL_T_JAMO</literal></term>
1033
<listitem><simpara>Hangul T Jamo (JT)
1034
</simpara></listitem>
1036
<varlistentry id="G-UNICODE-BREAK-HANGUL-LV-SYLLABLE--CAPS" role="constant">
1037
<term><literal>G_UNICODE_BREAK_HANGUL_LV_SYLLABLE</literal></term>
1038
<listitem><simpara>Hangul LV Syllable (H2)
1039
</simpara></listitem>
1041
<varlistentry id="G-UNICODE-BREAK-HANGUL-LVT-SYLLABLE--CAPS" role="constant">
1042
<term><literal>G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE</literal></term>
1043
<listitem><simpara>Hangul LVT Syllable (H3)
1044
</simpara></listitem>
1046
</variablelist></refsect2>
1047
<refsect2 id="g-unichar-break-type" role="function">
1048
<title>g_unichar_break_type ()</title>
1049
<indexterm zone="g-unichar-break-type"><primary sortas="unichar_break_type">g_unichar_break_type</primary></indexterm><programlisting><link linkend="GUnicodeBreakType">GUnicodeBreakType</link> g_unichar_break_type (<link linkend="gunichar">gunichar</link> c);</programlisting>
1051
Determines the break type of <parameter>c</parameter>. <parameter>c</parameter> should be a Unicode character
1052
(to derive a character from UTF-8 encoded text, use
1053
<link linkend="g-utf8-get-char"><function>g_utf8_get_char()</function></link>). The break type is used to find word and line
1054
breaks ("text boundaries"), Pango implements the Unicode boundary
1055
resolution algorithms and normally you would use a function such
1056
as <link linkend="pango-break"><function>pango_break()</function></link> instead of caring about break types yourself.</para>
1058
</para><variablelist role="params">
1059
<varlistentry><term><parameter>c</parameter> :</term>
1060
<listitem><simpara> a Unicode character
1061
</simpara></listitem></varlistentry>
1062
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the break type of <parameter>c</parameter>
1063
</simpara></listitem></varlistentry>
1064
</variablelist></refsect2>
1065
<refsect2 id="g-unichar-combining-class" role="function" condition="since:2.14">
1066
<title>g_unichar_combining_class ()</title>
1067
<indexterm zone="g-unichar-combining-class" role="2.14"><primary sortas="unichar_combining_class">g_unichar_combining_class</primary></indexterm><programlisting><link linkend="gint">gint</link> g_unichar_combining_class (<link linkend="gunichar">gunichar</link> uc);</programlisting>
1069
Determines the canonical combining class of a Unicode character.</para>
1071
</para><variablelist role="params">
1072
<varlistentry><term><parameter>uc</parameter> :</term>
1073
<listitem><simpara> a Unicode character
1074
</simpara></listitem></varlistentry>
1075
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the combining class of the character
1077
</simpara></listitem></varlistentry>
1078
</variablelist><para role="since">Since 2.14</para></refsect2>
1079
<refsect2 id="g-unicode-canonical-ordering" role="function">
1080
<title>g_unicode_canonical_ordering ()</title>
1081
<indexterm zone="g-unicode-canonical-ordering"><primary sortas="unicode_canonical_ordering">g_unicode_canonical_ordering</primary></indexterm><programlisting><link linkend="void">void</link> g_unicode_canonical_ordering (<link linkend="gunichar">gunichar</link> *string,
1082
<link linkend="gsize">gsize</link> len);</programlisting>
1084
Computes the canonical ordering of a string in-place.
1085
This rearranges decomposed characters in the string
1086
according to their combining classes. See the Unicode
1087
manual for more information.</para>
1089
</para><variablelist role="params">
1090
<varlistentry><term><parameter>string</parameter> :</term>
1091
<listitem><simpara> a UCS-4 encoded string.
1092
</simpara></listitem></varlistentry>
1093
<varlistentry><term><parameter>len</parameter> :</term>
1094
<listitem><simpara> the maximum length of <parameter>string</parameter> to use.
1095
</simpara></listitem></varlistentry>
1096
</variablelist></refsect2>
1097
<refsect2 id="g-unicode-canonical-decomposition" role="function">
1098
<title>g_unicode_canonical_decomposition ()</title>
1099
<indexterm zone="g-unicode-canonical-decomposition"><primary sortas="unicode_canonical_decomposition">g_unicode_canonical_decomposition</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> * g_unicode_canonical_decomposition (<link linkend="gunichar">gunichar</link> ch,
1100
<link linkend="gsize">gsize</link> *result_len);</programlisting>
1102
Computes the canonical decomposition of a Unicode character.</para>
1104
</para><variablelist role="params">
1105
<varlistentry><term><parameter>ch</parameter> :</term>
1106
<listitem><simpara> a Unicode character.
1107
</simpara></listitem></varlistentry>
1108
<varlistentry><term><parameter>result_len</parameter> :</term>
1109
<listitem><simpara> location to store the length of the return value.
1110
</simpara></listitem></varlistentry>
1111
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string of Unicode characters.
1112
<parameter>result_len</parameter> is set to the resulting length of the string.
1113
</simpara></listitem></varlistentry>
1114
</variablelist></refsect2>
1115
<refsect2 id="g-unichar-get-mirror-char" role="function" condition="since:2.4">
1116
<title>g_unichar_get_mirror_char ()</title>
1117
<indexterm zone="g-unichar-get-mirror-char" role="2.4"><primary sortas="unichar_get_mirror_char">g_unichar_get_mirror_char</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_unichar_get_mirror_char (<link linkend="gunichar">gunichar</link> ch,
1118
<link linkend="gunichar">gunichar</link> *mirrored_ch);</programlisting>
1120
In Unicode, some characters are <firstterm>mirrored</firstterm>. This
1121
means that their images are mirrored horizontally in text that is laid
1122
out from right to left. For instance, "(" would become its mirror image,
1123
")", in right-to-left text.
1126
If <parameter>ch</parameter> has the Unicode mirrored property and there is another unicode
1127
character that typically has a glyph that is the mirror image of <parameter>ch</parameter>'s
1128
glyph and <parameter>mirrored_ch</parameter> is set, it puts that character in the address
1129
pointed to by <parameter>mirrored_ch</parameter>. Otherwise the original character is put.</para>
1131
</para><variablelist role="params">
1132
<varlistentry><term><parameter>ch</parameter> :</term>
1133
<listitem><simpara> a Unicode character
1134
</simpara></listitem></varlistentry>
1135
<varlistentry><term><parameter>mirrored_ch</parameter> :</term>
1136
<listitem><simpara> location to store the mirrored character
1137
</simpara></listitem></varlistentry>
1138
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if <parameter>ch</parameter> has a mirrored character, <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> otherwise
1140
</simpara></listitem></varlistentry>
1141
</variablelist><para role="since">Since 2.4</para></refsect2>
1142
<refsect2 id="GUnicodeScript" role="enum">
1143
<title>enum GUnicodeScript</title>
1144
<indexterm zone="GUnicodeScript"><primary sortas="UnicodeScript">GUnicodeScript</primary></indexterm><programlisting>typedef enum
1145
{ /* ISO 15924 code */
1146
G_UNICODE_SCRIPT_INVALID_CODE = -1,
1147
G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */
1148
G_UNICODE_SCRIPT_INHERITED, /* Qaai */
1149
G_UNICODE_SCRIPT_ARABIC, /* Arab */
1150
G_UNICODE_SCRIPT_ARMENIAN, /* Armn */
1151
G_UNICODE_SCRIPT_BENGALI, /* Beng */
1152
G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */
1153
G_UNICODE_SCRIPT_CHEROKEE, /* Cher */
1154
G_UNICODE_SCRIPT_COPTIC, /* Qaac */
1155
G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */
1156
G_UNICODE_SCRIPT_DESERET, /* Dsrt */
1157
G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */
1158
G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */
1159
G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */
1160
G_UNICODE_SCRIPT_GOTHIC, /* Goth */
1161
G_UNICODE_SCRIPT_GREEK, /* Grek */
1162
G_UNICODE_SCRIPT_GUJARATI, /* Gujr */
1163
G_UNICODE_SCRIPT_GURMUKHI, /* Guru */
1164
G_UNICODE_SCRIPT_HAN, /* Hani */
1165
G_UNICODE_SCRIPT_HANGUL, /* Hang */
1166
G_UNICODE_SCRIPT_HEBREW, /* Hebr */
1167
G_UNICODE_SCRIPT_HIRAGANA, /* Hira */
1168
G_UNICODE_SCRIPT_KANNADA, /* Knda */
1169
G_UNICODE_SCRIPT_KATAKANA, /* Kana */
1170
G_UNICODE_SCRIPT_KHMER, /* Khmr */
1171
G_UNICODE_SCRIPT_LAO, /* Laoo */
1172
G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */
1173
G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */
1174
G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */
1175
G_UNICODE_SCRIPT_MYANMAR, /* Mymr */
1176
G_UNICODE_SCRIPT_OGHAM, /* Ogam */
1177
G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */
1178
G_UNICODE_SCRIPT_ORIYA, /* Orya */
1179
G_UNICODE_SCRIPT_RUNIC, /* Runr */
1180
G_UNICODE_SCRIPT_SINHALA, /* Sinh */
1181
G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */
1182
G_UNICODE_SCRIPT_TAMIL, /* Taml */
1183
G_UNICODE_SCRIPT_TELUGU, /* Telu */
1184
G_UNICODE_SCRIPT_THAANA, /* Thaa */
1185
G_UNICODE_SCRIPT_THAI, /* Thai */
1186
G_UNICODE_SCRIPT_TIBETAN, /* Tibt */
1187
G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */
1188
G_UNICODE_SCRIPT_YI, /* Yiii */
1189
G_UNICODE_SCRIPT_TAGALOG, /* Tglg */
1190
G_UNICODE_SCRIPT_HANUNOO, /* Hano */
1191
G_UNICODE_SCRIPT_BUHID, /* Buhd */
1192
G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */
1194
/* Unicode-4.0 additions */
1195
G_UNICODE_SCRIPT_BRAILLE, /* Brai */
1196
G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */
1197
G_UNICODE_SCRIPT_LIMBU, /* Limb */
1198
G_UNICODE_SCRIPT_OSMANYA, /* Osma */
1199
G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */
1200
G_UNICODE_SCRIPT_LINEAR_B, /* Linb */
1201
G_UNICODE_SCRIPT_TAI_LE, /* Tale */
1202
G_UNICODE_SCRIPT_UGARITIC, /* Ugar */
1204
/* Unicode-4.1 additions */
1205
G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */
1206
G_UNICODE_SCRIPT_BUGINESE, /* Bugi */
1207
G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */
1208
G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */
1209
G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */
1210
G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */
1211
G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */
1213
/* Unicode-5.0 additions */
1214
G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */
1215
G_UNICODE_SCRIPT_BALINESE, /* Bali */
1216
G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */
1217
G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */
1218
G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */
1219
G_UNICODE_SCRIPT_NKO, /* Nkoo */
1221
/* Unicode-5.1 additions */
1222
G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */
1223
G_UNICODE_SCRIPT_LEPCHA, /* Lepc */
1224
G_UNICODE_SCRIPT_REJANG, /* Rjng */
1225
G_UNICODE_SCRIPT_SUNDANESE, /* Sund */
1226
G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */
1227
G_UNICODE_SCRIPT_CHAM, /* Cham */
1228
G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */
1229
G_UNICODE_SCRIPT_VAI, /* Vaii */
1230
G_UNICODE_SCRIPT_CARIAN, /* Cari */
1231
G_UNICODE_SCRIPT_LYCIAN, /* Lyci */
1232
G_UNICODE_SCRIPT_LYDIAN /* Lydi */
1236
The <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> enumeration identifies different writing
1237
systems. The values correspond to the names as defined in the
1238
Unicode standard. The enumeration has been added in GLib 2.14,
1239
and is interchangeable with <link linkend="PangoScript"><type>PangoScript</type></link>.
1240
Note that new types may be added in the future. Applications
1241
should be ready to handle unknown values.
1243
url="http://www.unicode.org/reports/tr24/">Unicode Standard Annex
1244
#24: Script names</ulink>.
1245
</para><variablelist role="enum">
1246
<varlistentry id="G-UNICODE-SCRIPT-INVALID-CODE--CAPS" role="constant">
1247
<term><literal>G_UNICODE_SCRIPT_INVALID_CODE</literal></term>
1248
<listitem><simpara>a value never returned from <link linkend="g-unichar-get-script"><function>g_unichar_get_script()</function></link>
1249
</simpara></listitem>
1251
<varlistentry id="G-UNICODE-SCRIPT-COMMON--CAPS" role="constant">
1252
<term><literal>G_UNICODE_SCRIPT_COMMON</literal></term>
1253
<listitem><simpara> a character used by multiple different scripts
1254
</simpara></listitem>
1256
<varlistentry id="G-UNICODE-SCRIPT-INHERITED--CAPS" role="constant">
1257
<term><literal>G_UNICODE_SCRIPT_INHERITED</literal></term>
1258
<listitem><simpara> a mark glyph that takes its script from the
1259
base glyph to which it is attached
1260
</simpara></listitem>
1262
<varlistentry id="G-UNICODE-SCRIPT-ARABIC--CAPS" role="constant">
1263
<term><literal>G_UNICODE_SCRIPT_ARABIC</literal></term>
1264
<listitem><simpara> Arabic
1265
</simpara></listitem>
1267
<varlistentry id="G-UNICODE-SCRIPT-ARMENIAN--CAPS" role="constant">
1268
<term><literal>G_UNICODE_SCRIPT_ARMENIAN</literal></term>
1269
<listitem><simpara> Armenian
1270
</simpara></listitem>
1272
<varlistentry id="G-UNICODE-SCRIPT-BENGALI--CAPS" role="constant">
1273
<term><literal>G_UNICODE_SCRIPT_BENGALI</literal></term>
1274
<listitem><simpara> Bengali
1275
</simpara></listitem>
1277
<varlistentry id="G-UNICODE-SCRIPT-BOPOMOFO--CAPS" role="constant">
1278
<term><literal>G_UNICODE_SCRIPT_BOPOMOFO</literal></term>
1279
<listitem><simpara> Bopomofo
1280
</simpara></listitem>
1282
<varlistentry id="G-UNICODE-SCRIPT-CHEROKEE--CAPS" role="constant">
1283
<term><literal>G_UNICODE_SCRIPT_CHEROKEE</literal></term>
1284
<listitem><simpara> Cherokee
1285
</simpara></listitem>
1287
<varlistentry id="G-UNICODE-SCRIPT-COPTIC--CAPS" role="constant">
1288
<term><literal>G_UNICODE_SCRIPT_COPTIC</literal></term>
1289
<listitem><simpara> Coptic
1290
</simpara></listitem>
1292
<varlistentry id="G-UNICODE-SCRIPT-CYRILLIC--CAPS" role="constant">
1293
<term><literal>G_UNICODE_SCRIPT_CYRILLIC</literal></term>
1294
<listitem><simpara> Cyrillic
1295
</simpara></listitem>
1297
<varlistentry id="G-UNICODE-SCRIPT-DESERET--CAPS" role="constant">
1298
<term><literal>G_UNICODE_SCRIPT_DESERET</literal></term>
1299
<listitem><simpara> Deseret
1300
</simpara></listitem>
1302
<varlistentry id="G-UNICODE-SCRIPT-DEVANAGARI--CAPS" role="constant">
1303
<term><literal>G_UNICODE_SCRIPT_DEVANAGARI</literal></term>
1304
<listitem><simpara>Devanagari
1305
</simpara></listitem>
1307
<varlistentry id="G-UNICODE-SCRIPT-ETHIOPIC--CAPS" role="constant">
1308
<term><literal>G_UNICODE_SCRIPT_ETHIOPIC</literal></term>
1309
<listitem><simpara> Ethiopic
1310
</simpara></listitem>
1312
<varlistentry id="G-UNICODE-SCRIPT-GEORGIAN--CAPS" role="constant">
1313
<term><literal>G_UNICODE_SCRIPT_GEORGIAN</literal></term>
1314
<listitem><simpara> Georgian
1315
</simpara></listitem>
1317
<varlistentry id="G-UNICODE-SCRIPT-GOTHIC--CAPS" role="constant">
1318
<term><literal>G_UNICODE_SCRIPT_GOTHIC</literal></term>
1319
<listitem><simpara> Gothic
1320
</simpara></listitem>
1322
<varlistentry id="G-UNICODE-SCRIPT-GREEK--CAPS" role="constant">
1323
<term><literal>G_UNICODE_SCRIPT_GREEK</literal></term>
1324
<listitem><simpara> Greek
1325
</simpara></listitem>
1327
<varlistentry id="G-UNICODE-SCRIPT-GUJARATI--CAPS" role="constant">
1328
<term><literal>G_UNICODE_SCRIPT_GUJARATI</literal></term>
1329
<listitem><simpara> Gujarati
1330
</simpara></listitem>
1332
<varlistentry id="G-UNICODE-SCRIPT-GURMUKHI--CAPS" role="constant">
1333
<term><literal>G_UNICODE_SCRIPT_GURMUKHI</literal></term>
1334
<listitem><simpara> Gurmukhi
1335
</simpara></listitem>
1337
<varlistentry id="G-UNICODE-SCRIPT-HAN--CAPS" role="constant">
1338
<term><literal>G_UNICODE_SCRIPT_HAN</literal></term>
1339
<listitem><simpara> Han
1340
</simpara></listitem>
1342
<varlistentry id="G-UNICODE-SCRIPT-HANGUL--CAPS" role="constant">
1343
<term><literal>G_UNICODE_SCRIPT_HANGUL</literal></term>
1344
<listitem><simpara> Hangul
1345
</simpara></listitem>
1347
<varlistentry id="G-UNICODE-SCRIPT-HEBREW--CAPS" role="constant">
1348
<term><literal>G_UNICODE_SCRIPT_HEBREW</literal></term>
1349
<listitem><simpara> Hebrew
1350
</simpara></listitem>
1352
<varlistentry id="G-UNICODE-SCRIPT-HIRAGANA--CAPS" role="constant">
1353
<term><literal>G_UNICODE_SCRIPT_HIRAGANA</literal></term>
1354
<listitem><simpara> Hiragana
1355
</simpara></listitem>
1357
<varlistentry id="G-UNICODE-SCRIPT-KANNADA--CAPS" role="constant">
1358
<term><literal>G_UNICODE_SCRIPT_KANNADA</literal></term>
1359
<listitem><simpara> Kannada
1360
</simpara></listitem>
1362
<varlistentry id="G-UNICODE-SCRIPT-KATAKANA--CAPS" role="constant">
1363
<term><literal>G_UNICODE_SCRIPT_KATAKANA</literal></term>
1364
<listitem><simpara> Katakana
1365
</simpara></listitem>
1367
<varlistentry id="G-UNICODE-SCRIPT-KHMER--CAPS" role="constant">
1368
<term><literal>G_UNICODE_SCRIPT_KHMER</literal></term>
1369
<listitem><simpara> Khmer
1370
</simpara></listitem>
1372
<varlistentry id="G-UNICODE-SCRIPT-LAO--CAPS" role="constant">
1373
<term><literal>G_UNICODE_SCRIPT_LAO</literal></term>
1374
<listitem><simpara> Lao
1375
</simpara></listitem>
1377
<varlistentry id="G-UNICODE-SCRIPT-LATIN--CAPS" role="constant">
1378
<term><literal>G_UNICODE_SCRIPT_LATIN</literal></term>
1379
<listitem><simpara> Latin
1380
</simpara></listitem>
1382
<varlistentry id="G-UNICODE-SCRIPT-MALAYALAM--CAPS" role="constant">
1383
<term><literal>G_UNICODE_SCRIPT_MALAYALAM</literal></term>
1384
<listitem><simpara> Malayalam
1385
</simpara></listitem>
1387
<varlistentry id="G-UNICODE-SCRIPT-MONGOLIAN--CAPS" role="constant">
1388
<term><literal>G_UNICODE_SCRIPT_MONGOLIAN</literal></term>
1389
<listitem><simpara> Mongolian
1390
</simpara></listitem>
1392
<varlistentry id="G-UNICODE-SCRIPT-MYANMAR--CAPS" role="constant">
1393
<term><literal>G_UNICODE_SCRIPT_MYANMAR</literal></term>
1394
<listitem><simpara> Myanmar
1395
</simpara></listitem>
1397
<varlistentry id="G-UNICODE-SCRIPT-OGHAM--CAPS" role="constant">
1398
<term><literal>G_UNICODE_SCRIPT_OGHAM</literal></term>
1399
<listitem><simpara> Ogham
1400
</simpara></listitem>
1402
<varlistentry id="G-UNICODE-SCRIPT-OLD-ITALIC--CAPS" role="constant">
1403
<term><literal>G_UNICODE_SCRIPT_OLD_ITALIC</literal></term>
1404
<listitem><simpara>Old Italic
1405
</simpara></listitem>
1407
<varlistentry id="G-UNICODE-SCRIPT-ORIYA--CAPS" role="constant">
1408
<term><literal>G_UNICODE_SCRIPT_ORIYA</literal></term>
1409
<listitem><simpara> Oriya
1410
</simpara></listitem>
1412
<varlistentry id="G-UNICODE-SCRIPT-RUNIC--CAPS" role="constant">
1413
<term><literal>G_UNICODE_SCRIPT_RUNIC</literal></term>
1414
<listitem><simpara> Runic
1415
</simpara></listitem>
1417
<varlistentry id="G-UNICODE-SCRIPT-SINHALA--CAPS" role="constant">
1418
<term><literal>G_UNICODE_SCRIPT_SINHALA</literal></term>
1419
<listitem><simpara> Sinhala
1420
</simpara></listitem>
1422
<varlistentry id="G-UNICODE-SCRIPT-SYRIAC--CAPS" role="constant">
1423
<term><literal>G_UNICODE_SCRIPT_SYRIAC</literal></term>
1424
<listitem><simpara> Syriac
1425
</simpara></listitem>
1427
<varlistentry id="G-UNICODE-SCRIPT-TAMIL--CAPS" role="constant">
1428
<term><literal>G_UNICODE_SCRIPT_TAMIL</literal></term>
1429
<listitem><simpara> Tamil
1430
</simpara></listitem>
1432
<varlistentry id="G-UNICODE-SCRIPT-TELUGU--CAPS" role="constant">
1433
<term><literal>G_UNICODE_SCRIPT_TELUGU</literal></term>
1434
<listitem><simpara> Telugu
1435
</simpara></listitem>
1437
<varlistentry id="G-UNICODE-SCRIPT-THAANA--CAPS" role="constant">
1438
<term><literal>G_UNICODE_SCRIPT_THAANA</literal></term>
1439
<listitem><simpara> Thaana
1440
</simpara></listitem>
1442
<varlistentry id="G-UNICODE-SCRIPT-THAI--CAPS" role="constant">
1443
<term><literal>G_UNICODE_SCRIPT_THAI</literal></term>
1444
<listitem><simpara> Thai
1445
</simpara></listitem>
1447
<varlistentry id="G-UNICODE-SCRIPT-TIBETAN--CAPS" role="constant">
1448
<term><literal>G_UNICODE_SCRIPT_TIBETAN</literal></term>
1449
<listitem><simpara> Tibetan
1450
</simpara></listitem>
1452
<varlistentry id="G-UNICODE-SCRIPT-CANADIAN-ABORIGINAL--CAPS" role="constant">
1453
<term><literal>G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL</literal></term>
1456
</simpara></listitem>
1458
<varlistentry id="G-UNICODE-SCRIPT-YI--CAPS" role="constant">
1459
<term><literal>G_UNICODE_SCRIPT_YI</literal></term>
1460
<listitem><simpara> Yi
1461
</simpara></listitem>
1463
<varlistentry id="G-UNICODE-SCRIPT-TAGALOG--CAPS" role="constant">
1464
<term><literal>G_UNICODE_SCRIPT_TAGALOG</literal></term>
1465
<listitem><simpara> Tagalog
1466
</simpara></listitem>
1468
<varlistentry id="G-UNICODE-SCRIPT-HANUNOO--CAPS" role="constant">
1469
<term><literal>G_UNICODE_SCRIPT_HANUNOO</literal></term>
1470
<listitem><simpara> Hanunoo
1471
</simpara></listitem>
1473
<varlistentry id="G-UNICODE-SCRIPT-BUHID--CAPS" role="constant">
1474
<term><literal>G_UNICODE_SCRIPT_BUHID</literal></term>
1475
<listitem><simpara> Buhid
1476
</simpara></listitem>
1478
<varlistentry id="G-UNICODE-SCRIPT-TAGBANWA--CAPS" role="constant">
1479
<term><literal>G_UNICODE_SCRIPT_TAGBANWA</literal></term>
1480
<listitem><simpara> Tagbanwa
1481
</simpara></listitem>
1483
<varlistentry id="G-UNICODE-SCRIPT-BRAILLE--CAPS" role="constant">
1484
<term><literal>G_UNICODE_SCRIPT_BRAILLE</literal></term>
1485
<listitem><simpara> Braille
1486
</simpara></listitem>
1488
<varlistentry id="G-UNICODE-SCRIPT-CYPRIOT--CAPS" role="constant">
1489
<term><literal>G_UNICODE_SCRIPT_CYPRIOT</literal></term>
1490
<listitem><simpara> Cypriot
1491
</simpara></listitem>
1493
<varlistentry id="G-UNICODE-SCRIPT-LIMBU--CAPS" role="constant">
1494
<term><literal>G_UNICODE_SCRIPT_LIMBU</literal></term>
1495
<listitem><simpara> Limbu
1496
</simpara></listitem>
1498
<varlistentry id="G-UNICODE-SCRIPT-OSMANYA--CAPS" role="constant">
1499
<term><literal>G_UNICODE_SCRIPT_OSMANYA</literal></term>
1500
<listitem><simpara> Osmanya
1501
</simpara></listitem>
1503
<varlistentry id="G-UNICODE-SCRIPT-SHAVIAN--CAPS" role="constant">
1504
<term><literal>G_UNICODE_SCRIPT_SHAVIAN</literal></term>
1505
<listitem><simpara> Shavian
1506
</simpara></listitem>
1508
<varlistentry id="G-UNICODE-SCRIPT-LINEAR-B--CAPS" role="constant">
1509
<term><literal>G_UNICODE_SCRIPT_LINEAR_B</literal></term>
1510
<listitem><simpara> Linear B
1511
</simpara></listitem>
1513
<varlistentry id="G-UNICODE-SCRIPT-TAI-LE--CAPS" role="constant">
1514
<term><literal>G_UNICODE_SCRIPT_TAI_LE</literal></term>
1515
<listitem><simpara> Tai Le
1516
</simpara></listitem>
1518
<varlistentry id="G-UNICODE-SCRIPT-UGARITIC--CAPS" role="constant">
1519
<term><literal>G_UNICODE_SCRIPT_UGARITIC</literal></term>
1520
<listitem><simpara> Ugaritic
1521
</simpara></listitem>
1523
<varlistentry id="G-UNICODE-SCRIPT-NEW-TAI-LUE--CAPS" role="constant">
1524
<term><literal>G_UNICODE_SCRIPT_NEW_TAI_LUE</literal></term>
1525
<listitem><simpara>New Tai Lue
1526
</simpara></listitem>
1528
<varlistentry id="G-UNICODE-SCRIPT-BUGINESE--CAPS" role="constant">
1529
<term><literal>G_UNICODE_SCRIPT_BUGINESE</literal></term>
1530
<listitem><simpara> Buginese
1531
</simpara></listitem>
1533
<varlistentry id="G-UNICODE-SCRIPT-GLAGOLITIC--CAPS" role="constant">
1534
<term><literal>G_UNICODE_SCRIPT_GLAGOLITIC</literal></term>
1535
<listitem><simpara>Glagolitic
1536
</simpara></listitem>
1538
<varlistentry id="G-UNICODE-SCRIPT-TIFINAGH--CAPS" role="constant">
1539
<term><literal>G_UNICODE_SCRIPT_TIFINAGH</literal></term>
1540
<listitem><simpara> Tifinagh
1541
</simpara></listitem>
1543
<varlistentry id="G-UNICODE-SCRIPT-SYLOTI-NAGRI--CAPS" role="constant">
1544
<term><literal>G_UNICODE_SCRIPT_SYLOTI_NAGRI</literal></term>
1545
<listitem><simpara>Syloti Nagri
1546
</simpara></listitem>
1548
<varlistentry id="G-UNICODE-SCRIPT-OLD-PERSIAN--CAPS" role="constant">
1549
<term><literal>G_UNICODE_SCRIPT_OLD_PERSIAN</literal></term>
1550
<listitem><simpara>Old Persian
1551
</simpara></listitem>
1553
<varlistentry id="G-UNICODE-SCRIPT-KHAROSHTHI--CAPS" role="constant">
1554
<term><literal>G_UNICODE_SCRIPT_KHAROSHTHI</literal></term>
1555
<listitem><simpara>Kharoshthi
1556
</simpara></listitem>
1558
<varlistentry id="G-UNICODE-SCRIPT-UNKNOWN--CAPS" role="constant">
1559
<term><literal>G_UNICODE_SCRIPT_UNKNOWN</literal></term>
1560
<listitem><simpara> an unassigned code point
1561
</simpara></listitem>
1563
<varlistentry id="G-UNICODE-SCRIPT-BALINESE--CAPS" role="constant">
1564
<term><literal>G_UNICODE_SCRIPT_BALINESE</literal></term>
1565
<listitem><simpara> Balinese
1566
</simpara></listitem>
1568
<varlistentry id="G-UNICODE-SCRIPT-CUNEIFORM--CAPS" role="constant">
1569
<term><literal>G_UNICODE_SCRIPT_CUNEIFORM</literal></term>
1570
<listitem><simpara> Cuneiform
1571
</simpara></listitem>
1573
<varlistentry id="G-UNICODE-SCRIPT-PHOENICIAN--CAPS" role="constant">
1574
<term><literal>G_UNICODE_SCRIPT_PHOENICIAN</literal></term>
1575
<listitem><simpara>Phoenician
1576
</simpara></listitem>
1578
<varlistentry id="G-UNICODE-SCRIPT-PHAGS-PA--CAPS" role="constant">
1579
<term><literal>G_UNICODE_SCRIPT_PHAGS_PA</literal></term>
1580
<listitem><simpara> Phags-pa
1581
</simpara></listitem>
1583
<varlistentry id="G-UNICODE-SCRIPT-NKO--CAPS" role="constant">
1584
<term><literal>G_UNICODE_SCRIPT_NKO</literal></term>
1585
<listitem><simpara> N'Ko
1586
</simpara></listitem>
1588
<varlistentry id="G-UNICODE-SCRIPT-KAYAH-LI--CAPS" role="constant">
1589
<term><literal>G_UNICODE_SCRIPT_KAYAH_LI</literal></term>
1590
<listitem><simpara> Kayah Li. Since 2.16.3
1591
</simpara></listitem>
1593
<varlistentry id="G-UNICODE-SCRIPT-LEPCHA--CAPS" role="constant">
1594
<term><literal>G_UNICODE_SCRIPT_LEPCHA</literal></term>
1595
<listitem><simpara> Lepcha. Since 2.16.3
1596
</simpara></listitem>
1598
<varlistentry id="G-UNICODE-SCRIPT-REJANG--CAPS" role="constant">
1599
<term><literal>G_UNICODE_SCRIPT_REJANG</literal></term>
1600
<listitem><simpara> Rejang. Since 2.16.3
1601
</simpara></listitem>
1603
<varlistentry id="G-UNICODE-SCRIPT-SUNDANESE--CAPS" role="constant">
1604
<term><literal>G_UNICODE_SCRIPT_SUNDANESE</literal></term>
1605
<listitem><simpara> Sundanese. Since 2.16.3
1606
</simpara></listitem>
1608
<varlistentry id="G-UNICODE-SCRIPT-SAURASHTRA--CAPS" role="constant">
1609
<term><literal>G_UNICODE_SCRIPT_SAURASHTRA</literal></term>
1610
<listitem><simpara>Saurashtra. Since 2.16.3
1611
</simpara></listitem>
1613
<varlistentry id="G-UNICODE-SCRIPT-CHAM--CAPS" role="constant">
1614
<term><literal>G_UNICODE_SCRIPT_CHAM</literal></term>
1615
<listitem><simpara> Cham. Since 2.16.3
1616
</simpara></listitem>
1618
<varlistentry id="G-UNICODE-SCRIPT-OL-CHIKI--CAPS" role="constant">
1619
<term><literal>G_UNICODE_SCRIPT_OL_CHIKI</literal></term>
1620
<listitem><simpara> Ol Chiki. Since 2.16.3
1621
</simpara></listitem>
1623
<varlistentry id="G-UNICODE-SCRIPT-VAI--CAPS" role="constant">
1624
<term><literal>G_UNICODE_SCRIPT_VAI</literal></term>
1625
<listitem><simpara> Vai. Since 2.16.3
1626
</simpara></listitem>
1628
<varlistentry id="G-UNICODE-SCRIPT-CARIAN--CAPS" role="constant">
1629
<term><literal>G_UNICODE_SCRIPT_CARIAN</literal></term>
1630
<listitem><simpara> Carian. Since 2.16.3
1631
</simpara></listitem>
1633
<varlistentry id="G-UNICODE-SCRIPT-LYCIAN--CAPS" role="constant">
1634
<term><literal>G_UNICODE_SCRIPT_LYCIAN</literal></term>
1635
<listitem><simpara> Lycian. Since 2.16.3
1636
</simpara></listitem>
1638
<varlistentry id="G-UNICODE-SCRIPT-LYDIAN--CAPS" role="constant">
1639
<term><literal>G_UNICODE_SCRIPT_LYDIAN</literal></term>
1640
<listitem><simpara> Lydian. Since 2.16.3
1641
</simpara></listitem>
1643
</variablelist></refsect2>
1644
<refsect2 id="g-unichar-get-script" role="function" condition="since:2.14">
1645
<title>g_unichar_get_script ()</title>
1646
<indexterm zone="g-unichar-get-script" role="2.14"><primary sortas="unichar_get_script">g_unichar_get_script</primary></indexterm><programlisting><link linkend="GUnicodeScript">GUnicodeScript</link> g_unichar_get_script (<link linkend="gunichar">gunichar</link> ch);</programlisting>
1648
Looks up the <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> for a particular character (as defined
1649
by Unicode Standard Annex <link linkend="24--CAPS"><type>24</type></link>). No check is made for <parameter>ch</parameter> being a
1650
valid Unicode character; if you pass in invalid character, the
1651
result is undefined.
1654
This function is equivalent to <link linkend="pango-script-for-unichar"><function>pango_script_for_unichar()</function></link> and the
1655
two are interchangeable.</para>
1657
</para><variablelist role="params">
1658
<varlistentry><term><parameter>ch</parameter> :</term>
1659
<listitem><simpara> a Unicode character
1660
</simpara></listitem></varlistentry>
1661
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the <link linkend="GUnicodeScript"><type>GUnicodeScript</type></link> for the character.
1663
</simpara></listitem></varlistentry>
1664
</variablelist><para role="since">Since 2.14</para></refsect2>
1665
<refsect2 id="g-utf8-next-char" role="macro">
1666
<title>g_utf8_next_char()</title>
1667
<indexterm zone="g-utf8-next-char"><primary sortas="utf8_next_char">g_utf8_next_char</primary></indexterm><programlisting>#define g_utf8_next_char(p)</programlisting>
1669
Skips to the next character in a UTF-8 string. The string must be
1670
valid; this macro is as fast as possible, and has no error-checking.
1671
You would use this macro to iterate over a string character by
1672
character. The macro returns the start of the next UTF-8 character.
1673
Before using this macro, use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> to validate strings
1674
that may contain invalid UTF-8.
1675
</para><variablelist role="params">
1676
<varlistentry><term><parameter>p</parameter> :</term>
1677
<listitem><simpara>Pointer to the start of a valid UTF-8 character.
1678
</simpara></listitem></varlistentry>
1679
</variablelist></refsect2>
1680
<refsect2 id="g-utf8-get-char" role="function">
1681
<title>g_utf8_get_char ()</title>
1682
<indexterm zone="g-utf8-get-char"><primary sortas="utf8_get_char">g_utf8_get_char</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> g_utf8_get_char (const <link linkend="gchar">gchar</link> *p);</programlisting>
1684
Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
1685
If <parameter>p</parameter> does not point to a valid UTF-8 encoded character, results are
1686
undefined. If you are not sure that the bytes are complete
1687
valid Unicode characters, you should use <link linkend="g-utf8-get-char-validated"><function>g_utf8_get_char_validated()</function></link>
1690
</para><variablelist role="params">
1691
<varlistentry><term><parameter>p</parameter> :</term>
1692
<listitem><simpara> a pointer to Unicode character encoded as UTF-8
1693
</simpara></listitem></varlistentry>
1694
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the resulting character
1695
</simpara></listitem></varlistentry>
1696
</variablelist></refsect2>
1697
<refsect2 id="g-utf8-get-char-validated" role="function">
1698
<title>g_utf8_get_char_validated ()</title>
1699
<indexterm zone="g-utf8-get-char-validated"><primary sortas="utf8_get_char_validated">g_utf8_get_char_validated</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> g_utf8_get_char_validated (const <link linkend="gchar">gchar</link> *p,
1700
<link linkend="gssize">gssize</link> max_len);</programlisting>
1702
Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
1703
This function checks for incomplete characters, for invalid characters
1704
such as characters that are out of the range of Unicode, and for
1705
overlong encodings of valid characters.</para>
1707
</para><variablelist role="params">
1708
<varlistentry><term><parameter>p</parameter> :</term>
1709
<listitem><simpara> a pointer to Unicode character encoded as UTF-8
1710
</simpara></listitem></varlistentry>
1711
<varlistentry><term><parameter>max_len</parameter> :</term>
1712
<listitem><simpara> the maximum number of bytes to read, or -1, for no maximum or
1713
if <parameter>p</parameter> is nul-terminated
1714
</simpara></listitem></varlistentry>
1715
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the resulting character. If <parameter>p</parameter> points to a partial
1716
sequence at the end of a string that could begin a valid
1717
character (or if <parameter>max_len</parameter> is zero), returns (gunichar)-2;
1718
otherwise, if <parameter>p</parameter> does not point to a valid UTF-8 encoded
1719
Unicode character, returns (gunichar)-1.
1720
</simpara></listitem></varlistentry>
1721
</variablelist></refsect2>
1722
<refsect2 id="g-utf8-offset-to-pointer" role="function">
1723
<title>g_utf8_offset_to_pointer ()</title>
1724
<indexterm zone="g-utf8-offset-to-pointer"><primary sortas="utf8_offset_to_pointer">g_utf8_offset_to_pointer</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_offset_to_pointer (const <link linkend="gchar">gchar</link> *str,
1725
<link linkend="glong">glong</link> offset);</programlisting>
1727
Converts from an integer character offset to a pointer to a position
1731
Since 2.10, this function allows to pass a negative <parameter>offset</parameter> to
1732
step backwards. It is usually worth stepping backwards from the end
1733
instead of forwards if <parameter>offset</parameter> is in the last fourth of the string,
1734
since moving forward is about 3 times faster than moving backward.
1738
This function doesn't abort when reaching the end of <parameter>str</parameter>. Therefore
1739
you should be sure that <parameter>offset</parameter> is within string boundaries before
1740
calling that function. Call <link linkend="g-utf8-strlen"><function>g_utf8_strlen()</function></link> when unsure.
1743
This limitation exists as this function is called frequently during
1744
text rendering and therefore has to be as fast as possible.
1745
</para></note></para>
1747
</para><variablelist role="params">
1748
<varlistentry><term><parameter>str</parameter> :</term>
1749
<listitem><simpara> a UTF-8 encoded string
1750
</simpara></listitem></varlistentry>
1751
<varlistentry><term><parameter>offset</parameter> :</term>
1752
<listitem><simpara> a character offset within <parameter>str</parameter>
1753
</simpara></listitem></varlistentry>
1754
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the resulting pointer
1755
</simpara></listitem></varlistentry>
1756
</variablelist></refsect2>
1757
<refsect2 id="g-utf8-pointer-to-offset" role="function">
1758
<title>g_utf8_pointer_to_offset ()</title>
1759
<indexterm zone="g-utf8-pointer-to-offset"><primary sortas="utf8_pointer_to_offset">g_utf8_pointer_to_offset</primary></indexterm><programlisting><link linkend="glong">glong</link> g_utf8_pointer_to_offset (const <link linkend="gchar">gchar</link> *str,
1760
const <link linkend="gchar">gchar</link> *pos);</programlisting>
1762
Converts from a pointer to position within a string to a integer
1766
Since 2.10, this function allows <parameter>pos</parameter> to be before <parameter>str</parameter>, and returns
1767
a negative offset in this case.</para>
1769
</para><variablelist role="params">
1770
<varlistentry><term><parameter>str</parameter> :</term>
1771
<listitem><simpara> a UTF-8 encoded string
1772
</simpara></listitem></varlistentry>
1773
<varlistentry><term><parameter>pos</parameter> :</term>
1774
<listitem><simpara> a pointer to a position within <parameter>str</parameter>
1775
</simpara></listitem></varlistentry>
1776
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the resulting character offset
1777
</simpara></listitem></varlistentry>
1778
</variablelist></refsect2>
1779
<refsect2 id="g-utf8-prev-char" role="function">
1780
<title>g_utf8_prev_char ()</title>
1781
<indexterm zone="g-utf8-prev-char"><primary sortas="utf8_prev_char">g_utf8_prev_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_prev_char (const <link linkend="gchar">gchar</link> *p);</programlisting>
1783
Finds the previous UTF-8 character in the string before <parameter>p</parameter>.
1786
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
1787
is made to see if the character found is actually valid other than
1788
it starts with an appropriate byte. If <parameter>p</parameter> might be the first
1789
character of the string, you must use <link linkend="g-utf8-find-prev-char"><function>g_utf8_find_prev_char()</function></link> instead.</para>
1791
</para><variablelist role="params">
1792
<varlistentry><term><parameter>p</parameter> :</term>
1793
<listitem><simpara> a pointer to a position within a UTF-8 encoded string
1794
</simpara></listitem></varlistentry>
1795
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to the found character.
1796
</simpara></listitem></varlistentry>
1797
</variablelist></refsect2>
1798
<refsect2 id="g-utf8-find-next-char" role="function">
1799
<title>g_utf8_find_next_char ()</title>
1800
<indexterm zone="g-utf8-find-next-char"><primary sortas="utf8_find_next_char">g_utf8_find_next_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_find_next_char (const <link linkend="gchar">gchar</link> *p,
1801
const <link linkend="gchar">gchar</link> *end);</programlisting>
1803
Finds the start of the next UTF-8 character in the string after <parameter>p</parameter>.
1806
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
1807
is made to see if the character found is actually valid other than
1808
it starts with an appropriate byte.</para>
1810
</para><variablelist role="params">
1811
<varlistentry><term><parameter>p</parameter> :</term>
1812
<listitem><simpara> a pointer to a position within a UTF-8 encoded string
1813
</simpara></listitem></varlistentry>
1814
<varlistentry><term><parameter>end</parameter> :</term>
1815
<listitem><simpara> a pointer to the byte following the end of the string,
1816
or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to indicate that the string is nul-terminated.
1817
</simpara></listitem></varlistentry>
1818
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to the found character or <link linkend="NULL--CAPS"><literal>NULL</literal></link>
1819
</simpara></listitem></varlistentry>
1820
</variablelist></refsect2>
1821
<refsect2 id="g-utf8-find-prev-char" role="function">
1822
<title>g_utf8_find_prev_char ()</title>
1823
<indexterm zone="g-utf8-find-prev-char"><primary sortas="utf8_find_prev_char">g_utf8_find_prev_char</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_find_prev_char (const <link linkend="gchar">gchar</link> *str,
1824
const <link linkend="gchar">gchar</link> *p);</programlisting>
1826
Given a position <parameter>p</parameter> with a UTF-8 encoded string <parameter>str</parameter>, find the start
1827
of the previous UTF-8 character starting before <parameter>p</parameter>. Returns <link linkend="NULL--CAPS"><literal>NULL</literal></link> if no
1828
UTF-8 characters are present in <parameter>str</parameter> before <parameter>p</parameter>.
1831
<parameter>p</parameter> does not have to be at the beginning of a UTF-8 character. No check
1832
is made to see if the character found is actually valid other than
1833
it starts with an appropriate byte.</para>
1835
</para><variablelist role="params">
1836
<varlistentry><term><parameter>str</parameter> :</term>
1837
<listitem><simpara> pointer to the beginning of a UTF-8 encoded string
1838
</simpara></listitem></varlistentry>
1839
<varlistentry><term><parameter>p</parameter> :</term>
1840
<listitem><simpara> pointer to some position within <parameter>str</parameter>
1841
</simpara></listitem></varlistentry>
1842
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to the found character or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
1843
</simpara></listitem></varlistentry>
1844
</variablelist></refsect2>
1845
<refsect2 id="g-utf8-strlen" role="function">
1846
<title>g_utf8_strlen ()</title>
1847
<indexterm zone="g-utf8-strlen"><primary sortas="utf8_strlen">g_utf8_strlen</primary></indexterm><programlisting><link linkend="glong">glong</link> g_utf8_strlen (const <link linkend="gchar">gchar</link> *p,
1848
<link linkend="gssize">gssize</link> max);</programlisting>
1850
Computes the length of the string in characters, not including
1851
the terminating nul character.</para>
1853
</para><variablelist role="params">
1854
<varlistentry><term><parameter>p</parameter> :</term>
1855
<listitem><simpara> pointer to the start of a UTF-8 encoded string
1856
</simpara></listitem></varlistentry>
1857
<varlistentry><term><parameter>max</parameter> :</term>
1858
<listitem><simpara> the maximum number of bytes to examine. If <parameter>max</parameter>
1859
is less than 0, then the string is assumed to be
1860
nul-terminated. If <parameter>max</parameter> is 0, <parameter>p</parameter> will not be examined and
1861
may be <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
1862
</simpara></listitem></varlistentry>
1863
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> the length of the string in characters
1864
</simpara></listitem></varlistentry>
1865
</variablelist></refsect2>
1866
<refsect2 id="g-utf8-strncpy" role="function">
1867
<title>g_utf8_strncpy ()</title>
1868
<indexterm zone="g-utf8-strncpy"><primary sortas="utf8_strncpy">g_utf8_strncpy</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_strncpy (<link linkend="gchar">gchar</link> *dest,
1869
const <link linkend="gchar">gchar</link> *src,
1870
<link linkend="gsize">gsize</link> n);</programlisting>
1872
Like the standard C <link linkend="strncpy"><function>strncpy()</function></link> function, but
1873
copies a given number of characters instead of a given number of
1874
bytes. The <parameter>src</parameter> string must be valid UTF-8 encoded text.
1875
(Use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> on all text before trying to use UTF-8
1876
utility functions with it.)</para>
1878
</para><variablelist role="params">
1879
<varlistentry><term><parameter>dest</parameter> :</term>
1880
<listitem><simpara> buffer to fill with characters from <parameter>src</parameter>
1881
</simpara></listitem></varlistentry>
1882
<varlistentry><term><parameter>src</parameter> :</term>
1883
<listitem><simpara> UTF-8 encoded string
1884
</simpara></listitem></varlistentry>
1885
<varlistentry><term><parameter>n</parameter> :</term>
1886
<listitem><simpara> character count
1887
</simpara></listitem></varlistentry>
1888
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <parameter>dest</parameter>
1889
</simpara></listitem></varlistentry>
1890
</variablelist></refsect2>
1891
<refsect2 id="g-utf8-strchr" role="function">
1892
<title>g_utf8_strchr ()</title>
1893
<indexterm zone="g-utf8-strchr"><primary sortas="utf8_strchr">g_utf8_strchr</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_strchr (const <link linkend="gchar">gchar</link> *p,
1894
<link linkend="gssize">gssize</link> len,
1895
<link linkend="gunichar">gunichar</link> c);</programlisting>
1897
Finds the leftmost occurrence of the given Unicode character
1898
in a UTF-8 encoded string, while limiting the search to <parameter>len</parameter> bytes.
1899
If <parameter>len</parameter> is -1, allow unbounded search.</para>
1901
</para><variablelist role="params">
1902
<varlistentry><term><parameter>p</parameter> :</term>
1903
<listitem><simpara> a nul-terminated UTF-8 encoded string
1904
</simpara></listitem></varlistentry>
1905
<varlistentry><term><parameter>len</parameter> :</term>
1906
<listitem><simpara> the maximum length of <parameter>p</parameter>
1907
</simpara></listitem></varlistentry>
1908
<varlistentry><term><parameter>c</parameter> :</term>
1909
<listitem><simpara> a Unicode character
1910
</simpara></listitem></varlistentry>
1911
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="NULL--CAPS"><literal>NULL</literal></link> if the string does not contain the character,
1912
otherwise, a pointer to the start of the leftmost occurrence of
1913
the character in the string.
1914
</simpara></listitem></varlistentry>
1915
</variablelist></refsect2>
1916
<refsect2 id="g-utf8-strrchr" role="function">
1917
<title>g_utf8_strrchr ()</title>
1918
<indexterm zone="g-utf8-strrchr"><primary sortas="utf8_strrchr">g_utf8_strrchr</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_strrchr (const <link linkend="gchar">gchar</link> *p,
1919
<link linkend="gssize">gssize</link> len,
1920
<link linkend="gunichar">gunichar</link> c);</programlisting>
1922
Find the rightmost occurrence of the given Unicode character
1923
in a UTF-8 encoded string, while limiting the search to <parameter>len</parameter> bytes.
1924
If <parameter>len</parameter> is -1, allow unbounded search.</para>
1926
</para><variablelist role="params">
1927
<varlistentry><term><parameter>p</parameter> :</term>
1928
<listitem><simpara> a nul-terminated UTF-8 encoded string
1929
</simpara></listitem></varlistentry>
1930
<varlistentry><term><parameter>len</parameter> :</term>
1931
<listitem><simpara> the maximum length of <parameter>p</parameter>
1932
</simpara></listitem></varlistentry>
1933
<varlistentry><term><parameter>c</parameter> :</term>
1934
<listitem><simpara> a Unicode character
1935
</simpara></listitem></varlistentry>
1936
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="NULL--CAPS"><literal>NULL</literal></link> if the string does not contain the character,
1937
otherwise, a pointer to the start of the rightmost occurrence of the
1938
character in the string.
1939
</simpara></listitem></varlistentry>
1940
</variablelist></refsect2>
1941
<refsect2 id="g-utf8-strreverse" role="function" condition="since:2.2">
1942
<title>g_utf8_strreverse ()</title>
1943
<indexterm zone="g-utf8-strreverse" role="2.2"><primary sortas="utf8_strreverse">g_utf8_strreverse</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf8_strreverse (const <link linkend="gchar">gchar</link> *str,
1944
<link linkend="gssize">gssize</link> len);</programlisting>
1946
Reverses a UTF-8 string. <parameter>str</parameter> must be valid UTF-8 encoded text.
1947
(Use <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> on all text before trying to use UTF-8
1948
utility functions with it.)
1951
This function is intended for programmatic uses of reversed strings.
1952
It pays no attention to decomposed characters, combining marks, byte
1953
order marks, directional indicators (LRM, LRO, etc) and similar
1954
characters which might need special handling when reversing a string
1955
for display purposes.
1958
Note that unlike <link linkend="g-strreverse"><function>g_strreverse()</function></link>, this function returns
1959
newly-allocated memory, which should be freed with <link linkend="g-free"><function>g_free()</function></link> when
1960
no longer needed.</para>
1962
</para><variablelist role="params">
1963
<varlistentry><term><parameter>str</parameter> :</term>
1964
<listitem><simpara> a UTF-8 encoded string
1965
</simpara></listitem></varlistentry>
1966
<varlistentry><term><parameter>len</parameter> :</term>
1967
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> < 0,
1968
then the string is nul-terminated.
1969
</simpara></listitem></varlistentry>
1970
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly-allocated string which is the reverse of <parameter>str</parameter>.
1972
</simpara></listitem></varlistentry>
1973
</variablelist><para role="since">Since 2.2</para></refsect2>
1974
<refsect2 id="g-utf8-validate" role="function">
1975
<title>g_utf8_validate ()</title>
1976
<indexterm zone="g-utf8-validate"><primary sortas="utf8_validate">g_utf8_validate</primary></indexterm><programlisting><link linkend="gboolean">gboolean</link> g_utf8_validate (const <link linkend="gchar">gchar</link> *str,
1977
<link linkend="gssize">gssize</link> max_len,
1978
const <link linkend="gchar">gchar</link> **end);</programlisting>
1980
Validates UTF-8 encoded text. <parameter>str</parameter> is the text to validate;
1981
if <parameter>str</parameter> is nul-terminated, then <parameter>max_len</parameter> can be -1, otherwise
1982
<parameter>max_len</parameter> should be the number of bytes to validate.
1983
If <parameter>end</parameter> is non-<link linkend="NULL--CAPS"><literal>NULL</literal></link>, then the end of the valid range
1984
will be stored there (i.e. the start of the first invalid
1985
character if some bytes were invalid, or the end of the text
1986
being validated otherwise).
1989
Note that <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> returns <link linkend="FALSE--CAPS"><literal>FALSE</literal></link> if <parameter>max_len</parameter> is
1990
positive and NUL is met before <parameter>max_len</parameter> bytes have been read.
1993
Returns <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if all of <parameter>str</parameter> was valid. Many GLib and GTK+
1994
routines <emphasis>require</emphasis> valid UTF-8 as input;
1995
so data read from a file or the network should be checked
1996
with <link linkend="g-utf8-validate"><function>g_utf8_validate()</function></link> before doing anything else with it.</para>
1998
</para><variablelist role="params">
1999
<varlistentry><term><parameter>str</parameter> :</term>
2000
<listitem><simpara> a pointer to character data
2001
</simpara></listitem></varlistentry>
2002
<varlistentry><term><parameter>max_len</parameter> :</term>
2003
<listitem><simpara> max bytes to validate, or -1 to go until NUL
2004
</simpara></listitem></varlistentry>
2005
<varlistentry><term><parameter>end</parameter> :</term>
2006
<listitem><simpara> return location for end of valid data
2007
</simpara></listitem></varlistentry>
2008
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> <link linkend="TRUE--CAPS"><literal>TRUE</literal></link> if the text was valid UTF-8
2009
</simpara></listitem></varlistentry>
2010
</variablelist></refsect2>
2011
<refsect2 id="g-utf8-strup" role="function">
2012
<title>g_utf8_strup ()</title>
2013
<indexterm zone="g-utf8-strup"><primary sortas="utf8_strup">g_utf8_strup</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_strup (const <link linkend="gchar">gchar</link> *str,
2014
<link linkend="gssize">gssize</link> len);</programlisting>
2016
Converts all Unicode characters in the string that have a case
2017
to uppercase. The exact manner that this is done depends
2018
on the current locale, and may result in the number of
2019
characters in the string increasing. (For instance, the
2020
German ess-zet will be changed to SS.)</para>
2022
</para><variablelist role="params">
2023
<varlistentry><term><parameter>str</parameter> :</term>
2024
<listitem><simpara> a UTF-8 encoded string
2025
</simpara></listitem></varlistentry>
2026
<varlistentry><term><parameter>len</parameter> :</term>
2027
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2028
</simpara></listitem></varlistentry>
2029
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string, with all characters
2030
converted to uppercase.
2031
</simpara></listitem></varlistentry>
2032
</variablelist></refsect2>
2033
<refsect2 id="g-utf8-strdown" role="function">
2034
<title>g_utf8_strdown ()</title>
2035
<indexterm zone="g-utf8-strdown"><primary sortas="utf8_strdown">g_utf8_strdown</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_strdown (const <link linkend="gchar">gchar</link> *str,
2036
<link linkend="gssize">gssize</link> len);</programlisting>
2038
Converts all Unicode characters in the string that have a case
2039
to lowercase. The exact manner that this is done depends
2040
on the current locale, and may result in the number of
2041
characters in the string changing.</para>
2043
</para><variablelist role="params">
2044
<varlistentry><term><parameter>str</parameter> :</term>
2045
<listitem><simpara> a UTF-8 encoded string
2046
</simpara></listitem></varlistentry>
2047
<varlistentry><term><parameter>len</parameter> :</term>
2048
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2049
</simpara></listitem></varlistentry>
2050
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string, with all characters
2051
converted to lowercase.
2052
</simpara></listitem></varlistentry>
2053
</variablelist></refsect2>
2054
<refsect2 id="g-utf8-casefold" role="function">
2055
<title>g_utf8_casefold ()</title>
2056
<indexterm zone="g-utf8-casefold"><primary sortas="utf8_casefold">g_utf8_casefold</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_casefold (const <link linkend="gchar">gchar</link> *str,
2057
<link linkend="gssize">gssize</link> len);</programlisting>
2059
Converts a string into a form that is independent of case. The
2060
result will not correspond to any particular case, but can be
2061
compared for equality or ordered with the results of calling
2062
<link linkend="g-utf8-casefold"><function>g_utf8_casefold()</function></link> on other strings.
2065
Note that calling <link linkend="g-utf8-casefold"><function>g_utf8_casefold()</function></link> followed by <link linkend="g-utf8-collate"><function>g_utf8_collate()</function></link> is
2066
only an approximation to the correct linguistic case insensitive
2067
ordering, though it is a fairly good one. Getting this exactly
2068
right would require a more sophisticated collation function that
2069
takes case sensitivity into account. GLib does not currently
2070
provide such a function.</para>
2072
</para><variablelist role="params">
2073
<varlistentry><term><parameter>str</parameter> :</term>
2074
<listitem><simpara> a UTF-8 encoded string
2075
</simpara></listitem></varlistentry>
2076
<varlistentry><term><parameter>len</parameter> :</term>
2077
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2078
</simpara></listitem></varlistentry>
2079
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string, that is a
2080
case independent form of <parameter>str</parameter>.
2081
</simpara></listitem></varlistentry>
2082
</variablelist></refsect2>
2083
<refsect2 id="g-utf8-normalize" role="function">
2084
<title>g_utf8_normalize ()</title>
2085
<indexterm zone="g-utf8-normalize"><primary sortas="utf8_normalize">g_utf8_normalize</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_normalize (const <link linkend="gchar">gchar</link> *str,
2086
<link linkend="gssize">gssize</link> len,
2087
<link linkend="GNormalizeMode">GNormalizeMode</link> mode);</programlisting>
2089
Converts a string into canonical form, standardizing
2090
such issues as whether a character with an accent
2091
is represented as a base character and combining
2092
accent or as a single precomposed character. The
2093
string has to be valid UTF-8, otherwise <link linkend="NULL--CAPS"><literal>NULL</literal></link> is
2094
returned. You should generally call <link linkend="g-utf8-normalize"><function>g_utf8_normalize()</function></link>
2095
before comparing two Unicode strings.
2098
The normalization mode <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> only
2099
standardizes differences that do not affect the
2100
text content, such as the above-mentioned accent
2101
representation. <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link> also standardizes
2102
the "compatibility" characters in Unicode, such
2103
as SUPERSCRIPT THREE to the standard forms
2104
(in this case DIGIT THREE). Formatting information
2105
may be lost but for most text operations such
2106
characters should be considered the same.
2109
<link linkend="G-NORMALIZE-DEFAULT-COMPOSE--CAPS"><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></link> and <link linkend="G-NORMALIZE-ALL-COMPOSE--CAPS"><literal>G_NORMALIZE_ALL_COMPOSE</literal></link>
2110
are like <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> and <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>,
2111
but returned a result with composed forms rather
2112
than a maximally decomposed form. This is often
2113
useful if you intend to convert the string to
2114
a legacy encoding or pass it to a system with
2115
less capable Unicode handling.</para>
2117
</para><variablelist role="params">
2118
<varlistentry><term><parameter>str</parameter> :</term>
2119
<listitem><simpara> a UTF-8 encoded string.
2120
</simpara></listitem></varlistentry>
2121
<varlistentry><term><parameter>len</parameter> :</term>
2122
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2123
</simpara></listitem></varlistentry>
2124
<varlistentry><term><parameter>mode</parameter> :</term>
2125
<listitem><simpara> the type of normalization to perform.
2126
</simpara></listitem></varlistentry>
2127
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string, that is the
2128
normalized form of <parameter>str</parameter>, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> if <parameter>str</parameter> is not
2130
</simpara></listitem></varlistentry>
2131
</variablelist></refsect2>
2132
<refsect2 id="GNormalizeMode" role="enum">
2133
<title>enum GNormalizeMode</title>
2134
<indexterm zone="GNormalizeMode"><primary sortas="NormalizeMode">GNormalizeMode</primary></indexterm><programlisting>typedef enum {
2135
G_NORMALIZE_DEFAULT,
2136
G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
2137
G_NORMALIZE_DEFAULT_COMPOSE,
2138
G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
2140
G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
2141
G_NORMALIZE_ALL_COMPOSE,
2142
G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
2146
Defines how a Unicode string is transformed in a canonical
2147
form, standardizing such issues as whether a character with an accent is
2148
represented as a base character and combining accent or as a single precomposed
2149
character. Unicode strings should generally be normalized before comparing them.
2150
</para><variablelist role="enum">
2151
<varlistentry id="G-NORMALIZE-DEFAULT--CAPS" role="constant">
2152
<term><literal>G_NORMALIZE_DEFAULT</literal></term>
2153
<listitem><simpara>standardize differences that do not affect the
2154
text content, such as the above-mentioned accent representation.
2155
</simpara></listitem>
2157
<varlistentry id="G-NORMALIZE-NFD--CAPS" role="constant">
2158
<term><literal>G_NORMALIZE_NFD</literal></term>
2159
<listitem><simpara>another name for <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link>.
2160
</simpara></listitem>
2162
<varlistentry id="G-NORMALIZE-DEFAULT-COMPOSE--CAPS" role="constant">
2163
<term><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></term>
2164
<listitem><simpara>like <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link>, but with composed
2165
forms rather than a maximally decomposed form.
2166
</simpara></listitem>
2168
<varlistentry id="G-NORMALIZE-NFC--CAPS" role="constant">
2169
<term><literal>G_NORMALIZE_NFC</literal></term>
2170
<listitem><simpara>another name for <link linkend="G-NORMALIZE-DEFAULT-COMPOSE--CAPS"><literal>G_NORMALIZE_DEFAULT_COMPOSE</literal></link>.
2171
</simpara></listitem>
2173
<varlistentry id="G-NORMALIZE-ALL--CAPS" role="constant">
2174
<term><literal>G_NORMALIZE_ALL</literal></term>
2175
<listitem><simpara>beyond <link linkend="G-NORMALIZE-DEFAULT--CAPS"><literal>G_NORMALIZE_DEFAULT</literal></link> also standardize the
2176
"compatibility" characters in Unicode, such as SUPERSCRIPT THREE to the
2177
standard forms (in this case DIGIT THREE). Formatting information may be
2178
lost but for most text operations such characters should be considered the
2180
</simpara></listitem>
2182
<varlistentry id="G-NORMALIZE-NFKD--CAPS" role="constant">
2183
<term><literal>G_NORMALIZE_NFKD</literal></term>
2184
<listitem><simpara>another name for <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>.
2185
</simpara></listitem>
2187
<varlistentry id="G-NORMALIZE-ALL-COMPOSE--CAPS" role="constant">
2188
<term><literal>G_NORMALIZE_ALL_COMPOSE</literal></term>
2189
<listitem><simpara>like <link linkend="G-NORMALIZE-ALL--CAPS"><literal>G_NORMALIZE_ALL</literal></link>, but with composed
2190
forms rather than a maximally decomposed form.
2191
</simpara></listitem>
2193
<varlistentry id="G-NORMALIZE-NFKC--CAPS" role="constant">
2194
<term><literal>G_NORMALIZE_NFKC</literal></term>
2195
<listitem><simpara>another name for <link linkend="G-NORMALIZE-ALL-COMPOSE--CAPS"><literal>G_NORMALIZE_ALL_COMPOSE</literal></link>.
2196
</simpara></listitem>
2198
</variablelist></refsect2>
2199
<refsect2 id="g-utf8-collate" role="function">
2200
<title>g_utf8_collate ()</title>
2201
<indexterm zone="g-utf8-collate"><primary sortas="utf8_collate">g_utf8_collate</primary></indexterm><programlisting><link linkend="gint">gint</link> g_utf8_collate (const <link linkend="gchar">gchar</link> *str1,
2202
const <link linkend="gchar">gchar</link> *str2);</programlisting>
2204
Compares two strings for ordering using the linguistically
2205
correct rules for the <link linkend="setlocale">current locale</link>.
2206
When sorting a large number of strings, it will be significantly
2207
faster to obtain collation keys with <link linkend="g-utf8-collate-key"><function>g_utf8_collate_key()</function></link> and
2208
compare the keys with <link linkend="strcmp"><function>strcmp()</function></link> when sorting instead of sorting
2209
the original strings.</para>
2211
</para><variablelist role="params">
2212
<varlistentry><term><parameter>str1</parameter> :</term>
2213
<listitem><simpara> a UTF-8 encoded string
2214
</simpara></listitem></varlistentry>
2215
<varlistentry><term><parameter>str2</parameter> :</term>
2216
<listitem><simpara> a UTF-8 encoded string
2217
</simpara></listitem></varlistentry>
2218
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> < 0 if <parameter>str1</parameter> compares before <parameter>str2</parameter>,
2219
0 if they compare equal, > 0 if <parameter>str1</parameter> compares after <parameter>str2</parameter>.
2220
</simpara></listitem></varlistentry>
2221
</variablelist></refsect2>
2222
<refsect2 id="g-utf8-collate-key" role="function">
2223
<title>g_utf8_collate_key ()</title>
2224
<indexterm zone="g-utf8-collate-key"><primary sortas="utf8_collate_key">g_utf8_collate_key</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_collate_key (const <link linkend="gchar">gchar</link> *str,
2225
<link linkend="gssize">gssize</link> len);</programlisting>
2227
Converts a string into a collation key that can be compared
2228
with other collation keys produced by the same function using
2229
<link linkend="strcmp"><function>strcmp()</function></link>.
2232
The results of comparing the collation keys of two strings
2233
with <link linkend="strcmp"><function>strcmp()</function></link> will always be the same as comparing the two
2234
original keys with <link linkend="g-utf8-collate"><function>g_utf8_collate()</function></link>.
2237
Note that this function depends on the
2238
<link linkend="setlocale">current locale</link>.</para>
2240
</para><variablelist role="params">
2241
<varlistentry><term><parameter>str</parameter> :</term>
2242
<listitem><simpara> a UTF-8 encoded string.
2243
</simpara></listitem></varlistentry>
2244
<varlistentry><term><parameter>len</parameter> :</term>
2245
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2246
</simpara></listitem></varlistentry>
2247
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string. This string should
2248
be freed with <link linkend="g-free"><function>g_free()</function></link> when you are done with it.
2249
</simpara></listitem></varlistentry>
2250
</variablelist></refsect2>
2251
<refsect2 id="g-utf8-collate-key-for-filename" role="function" condition="since:2.8">
2252
<title>g_utf8_collate_key_for_filename ()</title>
2253
<indexterm zone="g-utf8-collate-key-for-filename" role="2.8"><primary sortas="utf8_collate_key_for_filename">g_utf8_collate_key_for_filename</primary></indexterm><programlisting><link linkend="gchar">gchar</link> * g_utf8_collate_key_for_filename (const <link linkend="gchar">gchar</link> *str,
2254
<link linkend="gssize">gssize</link> len);</programlisting>
2256
Converts a string into a collation key that can be compared
2257
with other collation keys produced by the same function using <link linkend="strcmp"><function>strcmp()</function></link>.
2260
In order to sort filenames correctly, this function treats the dot '.'
2261
as a special case. Most dictionary orderings seem to consider it
2262
insignificant, thus producing the ordering "event.c" "eventgenerator.c"
2263
"event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
2264
would like to treat numbers intelligently so that "file1" "file10" "file5"
2265
is sorted as "file1" "file5" "file10".
2268
Note that this function depends on the
2269
<link linkend="setlocale">current locale</link>.</para>
2271
</para><variablelist role="params">
2272
<varlistentry><term><parameter>str</parameter> :</term>
2273
<listitem><simpara> a UTF-8 encoded string.
2274
</simpara></listitem></varlistentry>
2275
<varlistentry><term><parameter>len</parameter> :</term>
2276
<listitem><simpara> length of <parameter>str</parameter>, in bytes, or -1 if <parameter>str</parameter> is nul-terminated.
2277
</simpara></listitem></varlistentry>
2278
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a newly allocated string. This string should
2279
be freed with <link linkend="g-free"><function>g_free()</function></link> when you are done with it.
2281
</simpara></listitem></varlistentry>
2282
</variablelist><para role="since">Since 2.8</para></refsect2>
2283
<refsect2 id="g-utf8-to-utf16" role="function">
2284
<title>g_utf8_to_utf16 ()</title>
2285
<indexterm zone="g-utf8-to-utf16"><primary sortas="utf8_to_utf16">g_utf8_to_utf16</primary></indexterm><programlisting><link linkend="gunichar2">gunichar2</link> * g_utf8_to_utf16 (const <link linkend="gchar">gchar</link> *str,
2286
<link linkend="glong">glong</link> len,
2287
<link linkend="glong">glong</link> *items_read,
2288
<link linkend="glong">glong</link> *items_written,
2289
<link linkend="GError">GError</link> **error);</programlisting>
2291
Convert a string from UTF-8 to UTF-16. A 0 character will be
2292
added to the result after the converted text.</para>
2294
</para><variablelist role="params">
2295
<varlistentry><term><parameter>str</parameter> :</term>
2296
<listitem><simpara> a UTF-8 encoded string
2297
</simpara></listitem></varlistentry>
2298
<varlistentry><term><parameter>len</parameter> :</term>
2299
<listitem><simpara> the maximum length (number of bytes) of <parameter>str</parameter> to use.
2300
If <parameter>len</parameter> < 0, then the string is nul-terminated.
2301
</simpara></listitem></varlistentry>
2302
<varlistentry><term><parameter>items_read</parameter> :</term>
2303
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2304
If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
2305
returned in case <parameter>str</parameter> contains a trailing partial
2306
character. If an error occurs then the index of the
2307
invalid input is stored here.
2308
</simpara></listitem></varlistentry>
2309
<varlistentry><term><parameter>items_written</parameter> :</term>
2310
<listitem><simpara> location to store number of <type>gunichar2</type> written,
2311
or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2312
The value stored here does not include the trailing 0.
2313
</simpara></listitem></varlistentry>
2314
<varlistentry><term><parameter>error</parameter> :</term>
2315
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2316
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2317
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2318
</simpara></listitem></varlistentry>
2319
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UTF-16 string.
2320
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2321
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2322
<parameter>error</parameter> set.
2323
</simpara></listitem></varlistentry>
2324
</variablelist></refsect2>
2325
<refsect2 id="g-utf8-to-ucs4" role="function">
2326
<title>g_utf8_to_ucs4 ()</title>
2327
<indexterm zone="g-utf8-to-ucs4"><primary sortas="utf8_to_ucs4">g_utf8_to_ucs4</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> * g_utf8_to_ucs4 (const <link linkend="gchar">gchar</link> *str,
2328
<link linkend="glong">glong</link> len,
2329
<link linkend="glong">glong</link> *items_read,
2330
<link linkend="glong">glong</link> *items_written,
2331
<link linkend="GError">GError</link> **error);</programlisting>
2333
Convert a string from UTF-8 to a 32-bit fixed width
2334
representation as UCS-4. A trailing 0 will be added to the
2335
string after the converted text.</para>
2337
</para><variablelist role="params">
2338
<varlistentry><term><parameter>str</parameter> :</term>
2339
<listitem><simpara> a UTF-8 encoded string
2340
</simpara></listitem></varlistentry>
2341
<varlistentry><term><parameter>len</parameter> :</term>
2342
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> < 0,
2343
then the string is nul-terminated.
2344
</simpara></listitem></varlistentry>
2345
<varlistentry><term><parameter>items_read</parameter> :</term>
2346
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2347
If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
2348
returned in case <parameter>str</parameter> contains a trailing partial
2349
character. If an error occurs then the index of the
2350
invalid input is stored here.
2351
</simpara></listitem></varlistentry>
2352
<varlistentry><term><parameter>items_written</parameter> :</term>
2353
<listitem><simpara> location to store number of characters written or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2354
The value here stored does not include the trailing 0
2356
</simpara></listitem></varlistentry>
2357
<varlistentry><term><parameter>error</parameter> :</term>
2358
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2359
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2360
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2361
</simpara></listitem></varlistentry>
2362
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
2363
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2364
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2365
<parameter>error</parameter> set.
2366
</simpara></listitem></varlistentry>
2367
</variablelist></refsect2>
2368
<refsect2 id="g-utf8-to-ucs4-fast" role="function">
2369
<title>g_utf8_to_ucs4_fast ()</title>
2370
<indexterm zone="g-utf8-to-ucs4-fast"><primary sortas="utf8_to_ucs4_fast">g_utf8_to_ucs4_fast</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> * g_utf8_to_ucs4_fast (const <link linkend="gchar">gchar</link> *str,
2371
<link linkend="glong">glong</link> len,
2372
<link linkend="glong">glong</link> *items_written);</programlisting>
2374
Convert a string from UTF-8 to a 32-bit fixed width
2375
representation as UCS-4, assuming valid UTF-8 input.
2376
This function is roughly twice as fast as <link linkend="g-utf8-to-ucs4"><function>g_utf8_to_ucs4()</function></link>
2377
but does no error checking on the input.</para>
2379
</para><variablelist role="params">
2380
<varlistentry><term><parameter>str</parameter> :</term>
2381
<listitem><simpara> a UTF-8 encoded string
2382
</simpara></listitem></varlistentry>
2383
<varlistentry><term><parameter>len</parameter> :</term>
2384
<listitem><simpara> the maximum length of <parameter>str</parameter> to use, in bytes. If <parameter>len</parameter> < 0,
2385
then the string is nul-terminated.
2386
</simpara></listitem></varlistentry>
2387
<varlistentry><term><parameter>items_written</parameter> :</term>
2388
<listitem><simpara> location to store the number of characters in the
2389
result, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2390
</simpara></listitem></varlistentry>
2391
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
2392
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>.
2393
</simpara></listitem></varlistentry>
2394
</variablelist></refsect2>
2395
<refsect2 id="g-utf16-to-ucs4" role="function">
2396
<title>g_utf16_to_ucs4 ()</title>
2397
<indexterm zone="g-utf16-to-ucs4"><primary sortas="utf16_to_ucs4">g_utf16_to_ucs4</primary></indexterm><programlisting><link linkend="gunichar">gunichar</link> * g_utf16_to_ucs4 (const <link linkend="gunichar2">gunichar2</link> *str,
2398
<link linkend="glong">glong</link> len,
2399
<link linkend="glong">glong</link> *items_read,
2400
<link linkend="glong">glong</link> *items_written,
2401
<link linkend="GError">GError</link> **error);</programlisting>
2403
Convert a string from UTF-16 to UCS-4. The result will be
2404
nul-terminated.</para>
2406
</para><variablelist role="params">
2407
<varlistentry><term><parameter>str</parameter> :</term>
2408
<listitem><simpara> a UTF-16 encoded string
2409
</simpara></listitem></varlistentry>
2410
<varlistentry><term><parameter>len</parameter> :</term>
2411
<listitem><simpara> the maximum length (number of <type>gunichar2</type>) of <parameter>str</parameter> to use.
2412
If <parameter>len</parameter> < 0, then the string is nul-terminated.
2413
</simpara></listitem></varlistentry>
2414
<varlistentry><term><parameter>items_read</parameter> :</term>
2415
<listitem><simpara> location to store number of words read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2416
If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
2417
returned in case <parameter>str</parameter> contains a trailing partial
2418
character. If an error occurs then the index of the
2419
invalid input is stored here.
2420
</simpara></listitem></varlistentry>
2421
<varlistentry><term><parameter>items_written</parameter> :</term>
2422
<listitem><simpara> location to store number of characters written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2423
The value stored here does not include the trailing
2425
</simpara></listitem></varlistentry>
2426
<varlistentry><term><parameter>error</parameter> :</term>
2427
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2428
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2429
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2430
</simpara></listitem></varlistentry>
2431
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UCS-4 string.
2432
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2433
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2434
<parameter>error</parameter> set.
2435
</simpara></listitem></varlistentry>
2436
</variablelist></refsect2>
2437
<refsect2 id="g-utf16-to-utf8" role="function">
2438
<title>g_utf16_to_utf8 ()</title>
2439
<indexterm zone="g-utf16-to-utf8"><primary sortas="utf16_to_utf8">g_utf16_to_utf8</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_utf16_to_utf8 (const <link linkend="gunichar2">gunichar2</link> *str,
2440
<link linkend="glong">glong</link> len,
2441
<link linkend="glong">glong</link> *items_read,
2442
<link linkend="glong">glong</link> *items_written,
2443
<link linkend="GError">GError</link> **error);</programlisting>
2445
Convert a string from UTF-16 to UTF-8. The result will be
2446
terminated with a 0 byte.
2449
Note that the input is expected to be already in native endianness,
2450
an initial byte-order-mark character is not handled specially.
2451
<link linkend="g-convert"><function>g_convert()</function></link> can be used to convert a byte buffer of UTF-16 data of
2452
ambiguous endianess.
2455
Further note that this function does not validate the result
2456
string; it may e.g. include embedded NUL characters. The only
2457
validation done by this function is to ensure that the input can
2458
be correctly interpreted as UTF-16, i.e. it doesn't contain
2459
things unpaired surrogates.</para>
2461
</para><variablelist role="params">
2462
<varlistentry><term><parameter>str</parameter> :</term>
2463
<listitem><simpara> a UTF-16 encoded string
2464
</simpara></listitem></varlistentry>
2465
<varlistentry><term><parameter>len</parameter> :</term>
2466
<listitem><simpara> the maximum length (number of <type>gunichar2</type>) of <parameter>str</parameter> to use.
2467
If <parameter>len</parameter> < 0, then the string is nul-terminated.
2468
</simpara></listitem></varlistentry>
2469
<varlistentry><term><parameter>items_read</parameter> :</term>
2470
<listitem><simpara> location to store number of words read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2471
If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, then <link linkend="G-CONVERT-ERROR-PARTIAL-INPUT--CAPS"><literal>G_CONVERT_ERROR_PARTIAL_INPUT</literal></link> will be
2472
returned in case <parameter>str</parameter> contains a trailing partial
2473
character. If an error occurs then the index of the
2474
invalid input is stored here.
2475
</simpara></listitem></varlistentry>
2476
<varlistentry><term><parameter>items_written</parameter> :</term>
2477
<listitem><simpara> location to store number of bytes written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2478
The value stored here does not include the trailing
2480
</simpara></listitem></varlistentry>
2481
<varlistentry><term><parameter>error</parameter> :</term>
2482
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2483
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2484
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2485
</simpara></listitem></varlistentry>
2486
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UTF-8 string.
2487
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2488
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2489
<parameter>error</parameter> set.
2490
</simpara></listitem></varlistentry>
2491
</variablelist></refsect2>
2492
<refsect2 id="g-ucs4-to-utf16" role="function">
2493
<title>g_ucs4_to_utf16 ()</title>
2494
<indexterm zone="g-ucs4-to-utf16"><primary sortas="ucs4_to_utf16">g_ucs4_to_utf16</primary></indexterm><programlisting><link linkend="gunichar2">gunichar2</link> * g_ucs4_to_utf16 (const <link linkend="gunichar">gunichar</link> *str,
2495
<link linkend="glong">glong</link> len,
2496
<link linkend="glong">glong</link> *items_read,
2497
<link linkend="glong">glong</link> *items_written,
2498
<link linkend="GError">GError</link> **error);</programlisting>
2500
Convert a string from UCS-4 to UTF-16. A 0 character will be
2501
added to the result after the converted text.</para>
2503
</para><variablelist role="params">
2504
<varlistentry><term><parameter>str</parameter> :</term>
2505
<listitem><simpara> a UCS-4 encoded string
2506
</simpara></listitem></varlistentry>
2507
<varlistentry><term><parameter>len</parameter> :</term>
2508
<listitem><simpara> the maximum length (number of characters) of <parameter>str</parameter> to use.
2509
If <parameter>len</parameter> < 0, then the string is nul-terminated.
2510
</simpara></listitem></varlistentry>
2511
<varlistentry><term><parameter>items_read</parameter> :</term>
2512
<listitem><simpara> location to store number of bytes read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2513
If an error occurs then the index of the invalid input
2515
</simpara></listitem></varlistentry>
2516
<varlistentry><term><parameter>items_written</parameter> :</term>
2517
<listitem><simpara> location to store number of <type>gunichar2</type>
2518
written, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>. The value stored here does not
2519
include the trailing 0.
2520
</simpara></listitem></varlistentry>
2521
<varlistentry><term><parameter>error</parameter> :</term>
2522
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2523
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2524
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2525
</simpara></listitem></varlistentry>
2526
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UTF-16 string.
2527
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2528
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2529
<parameter>error</parameter> set.
2530
</simpara></listitem></varlistentry>
2531
</variablelist></refsect2>
2532
<refsect2 id="g-ucs4-to-utf8" role="function">
2533
<title>g_ucs4_to_utf8 ()</title>
2534
<indexterm zone="g-ucs4-to-utf8"><primary sortas="ucs4_to_utf8">g_ucs4_to_utf8</primary></indexterm><programlisting><link linkend="gchar">gchar</link>* g_ucs4_to_utf8 (const <link linkend="gunichar">gunichar</link> *str,
2535
<link linkend="glong">glong</link> len,
2536
<link linkend="glong">glong</link> *items_read,
2537
<link linkend="glong">glong</link> *items_written,
2538
<link linkend="GError">GError</link> **error);</programlisting>
2540
Convert a string from a 32-bit fixed width representation as UCS-4.
2541
to UTF-8. The result will be terminated with a 0 byte.</para>
2543
</para><variablelist role="params">
2544
<varlistentry><term><parameter>str</parameter> :</term>
2545
<listitem><simpara> a UCS-4 encoded string
2546
</simpara></listitem></varlistentry>
2547
<varlistentry><term><parameter>len</parameter> :</term>
2548
<listitem><simpara> the maximum length (number of characters) of <parameter>str</parameter> to use.
2549
If <parameter>len</parameter> < 0, then the string is nul-terminated.
2550
</simpara></listitem></varlistentry>
2551
<varlistentry><term><parameter>items_read</parameter> :</term>
2552
<listitem><simpara> location to store number of characters read, or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2553
</simpara></listitem></varlistentry>
2554
<varlistentry><term><parameter>items_written</parameter> :</term>
2555
<listitem><simpara> location to store number of bytes written or <link linkend="NULL--CAPS"><literal>NULL</literal></link>.
2556
The value here stored does not include the trailing 0
2558
</simpara></listitem></varlistentry>
2559
<varlistentry><term><parameter>error</parameter> :</term>
2560
<listitem><simpara> location to store the error occuring, or <link linkend="NULL--CAPS"><literal>NULL</literal></link> to ignore
2561
errors. Any of the errors in <link linkend="GConvertError"><type>GConvertError</type></link> other than
2562
<link linkend="G-CONVERT-ERROR-NO-CONVERSION--CAPS"><literal>G_CONVERT_ERROR_NO_CONVERSION</literal></link> may occur.
2563
</simpara></listitem></varlistentry>
2564
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> a pointer to a newly allocated UTF-8 string.
2565
This value must be freed with <link linkend="g-free"><function>g_free()</function></link>. If an
2566
error occurs, <link linkend="NULL--CAPS"><literal>NULL</literal></link> will be returned and
2567
<parameter>error</parameter> set. In that case, <parameter>items_read</parameter> will be
2568
set to the position of the first invalid input
2570
</simpara></listitem></varlistentry>
2571
</variablelist></refsect2>
2572
<refsect2 id="g-unichar-to-utf8" role="function">
2573
<title>g_unichar_to_utf8 ()</title>
2574
<indexterm zone="g-unichar-to-utf8"><primary sortas="unichar_to_utf8">g_unichar_to_utf8</primary></indexterm><programlisting><link linkend="gint">gint</link> g_unichar_to_utf8 (<link linkend="gunichar">gunichar</link> c,
2575
<link linkend="gchar">gchar</link> *outbuf);</programlisting>
2577
Converts a single character to UTF-8.</para>
2579
</para><variablelist role="params">
2580
<varlistentry><term><parameter>c</parameter> :</term>
2581
<listitem><simpara> a Unicode character code
2582
</simpara></listitem></varlistentry>
2583
<varlistentry><term><parameter>outbuf</parameter> :</term>
2584
<listitem><simpara> output buffer, must have at least 6 bytes of space.
2585
If <link linkend="NULL--CAPS"><literal>NULL</literal></link>, the length will be computed and returned
2586
and nothing will be written to <parameter>outbuf</parameter>.
2587
</simpara></listitem></varlistentry>
2588
<varlistentry><term><emphasis>Returns</emphasis> :</term><listitem><simpara> number of bytes written
2589
</simpara></listitem></varlistentry>
2590
</variablelist></refsect2>
2596
<refsect1 id="glib-Unicode-Manipulation.see-also">
2597
<title>See Also</title>
2601
<term><link linkend="g-locale-to-utf8"><function>g_locale_to_utf8()</function></link>, <link linkend="g-locale-from-utf8"><function>g_locale_from_utf8()</function></link></term>
2603
Convenience functions for converting between UTF-8 and the locale encoding.