2
*******************************************************************************
3
* Copyright (C) 2010, International Business Machines
4
* Corporation and others. All Rights Reserved.
5
*******************************************************************************
8
* tab size: 8 (not used)
11
* created on: 2010mar05
12
* created by: Markus W. Scherer
20
* \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23
#include "unicode/utypes.h"
27
#include "unicode/bytestream.h"
28
#include "unicode/stringpiece.h"
29
#include "unicode/uidna.h"
30
#include "unicode/unistr.h"
34
class U_COMMON_API IDNAInfo;
37
* Abstract base class for IDNA processing.
38
* See http://www.unicode.org/reports/tr46/
39
* and http://www.ietf.org/rfc/rfc3490.txt
41
* The IDNA class is not intended for public subclassing.
43
* This C++ API currently only implements UTS #46.
44
* The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45
* and IDNA2003 (functions that do not use a service object).
48
class U_COMMON_API IDNA : public UObject {
51
* Returns an IDNA instance which implements UTS #46.
52
* Returns an unmodifiable instance, owned by the caller.
53
* Cache it for multiple operations, and delete it when done.
54
* The instance is thread-safe, that is, it can be used concurrently.
56
* UTS #46 defines Unicode IDNA Compatibility Processing,
57
* updated to the latest version of Unicode and compatible with both
58
* IDNA2003 and IDNA2008.
60
* The worker functions use transitional processing, including deviation mappings,
61
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
62
* is used in which case the deviation characters are passed through without change.
64
* Disallowed characters are mapped to U+FFFD.
66
* For available options see the uidna.h header.
67
* Operations with the UTS #46 instance do not support the
68
* UIDNA_ALLOW_UNASSIGNED option.
70
* By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
71
* When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
72
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
74
* @param options Bit set to modify the processing and error checking.
75
* See option bit set values in uidna.h.
76
* @param errorCode Standard ICU error code. Its input value must
77
* pass the U_SUCCESS() test, or else the function returns
78
* immediately. Check for U_FAILURE() on output or use with
79
* function chaining. (See User Guide for details.)
80
* @return the UTS #46 IDNA instance, if successful
84
createUTS46Instance(uint32_t options, UErrorCode &errorCode);
87
* Converts a single domain name label into its ASCII form for DNS lookup.
88
* If any processing step fails, then info.hasErrors() will be TRUE and
89
* the result might not be an ASCII string.
90
* The label might be modified according to the types of errors.
91
* Labels with severe errors will be left in (or turned into) their Unicode form.
93
* The UErrorCode indicates an error only in exceptional cases,
94
* such as a U_MEMORY_ALLOCATION_ERROR.
96
* @param label Input domain name label
97
* @param dest Destination string object
98
* @param info Output container of IDNA processing details.
99
* @param errorCode Standard ICU error code. Its input value must
100
* pass the U_SUCCESS() test, or else the function returns
101
* immediately. Check for U_FAILURE() on output or use with
102
* function chaining. (See User Guide for details.)
106
virtual UnicodeString &
107
labelToASCII(const UnicodeString &label, UnicodeString &dest,
108
IDNAInfo &info, UErrorCode &errorCode) const = 0;
111
* Converts a single domain name label into its Unicode form for human-readable display.
112
* If any processing step fails, then info.hasErrors() will be TRUE.
113
* The label might be modified according to the types of errors.
115
* The UErrorCode indicates an error only in exceptional cases,
116
* such as a U_MEMORY_ALLOCATION_ERROR.
118
* @param label Input domain name label
119
* @param dest Destination string object
120
* @param info Output container of IDNA processing details.
121
* @param errorCode Standard ICU error code. Its input value must
122
* pass the U_SUCCESS() test, or else the function returns
123
* immediately. Check for U_FAILURE() on output or use with
124
* function chaining. (See User Guide for details.)
128
virtual UnicodeString &
129
labelToUnicode(const UnicodeString &label, UnicodeString &dest,
130
IDNAInfo &info, UErrorCode &errorCode) const = 0;
133
* Converts a whole domain name into its ASCII form for DNS lookup.
134
* If any processing step fails, then info.hasErrors() will be TRUE and
135
* the result might not be an ASCII string.
136
* The domain name might be modified according to the types of errors.
137
* Labels with severe errors will be left in (or turned into) their Unicode form.
139
* The UErrorCode indicates an error only in exceptional cases,
140
* such as a U_MEMORY_ALLOCATION_ERROR.
142
* @param name Input domain name
143
* @param dest Destination string object
144
* @param info Output container of IDNA processing details.
145
* @param errorCode Standard ICU error code. Its input value must
146
* pass the U_SUCCESS() test, or else the function returns
147
* immediately. Check for U_FAILURE() on output or use with
148
* function chaining. (See User Guide for details.)
152
virtual UnicodeString &
153
nameToASCII(const UnicodeString &name, UnicodeString &dest,
154
IDNAInfo &info, UErrorCode &errorCode) const = 0;
157
* Converts a whole domain name into its Unicode form for human-readable display.
158
* If any processing step fails, then info.hasErrors() will be TRUE.
159
* The domain name might be modified according to the types of errors.
161
* The UErrorCode indicates an error only in exceptional cases,
162
* such as a U_MEMORY_ALLOCATION_ERROR.
164
* @param name Input domain name
165
* @param dest Destination string object
166
* @param info Output container of IDNA processing details.
167
* @param errorCode Standard ICU error code. Its input value must
168
* pass the U_SUCCESS() test, or else the function returns
169
* immediately. Check for U_FAILURE() on output or use with
170
* function chaining. (See User Guide for details.)
174
virtual UnicodeString &
175
nameToUnicode(const UnicodeString &name, UnicodeString &dest,
176
IDNAInfo &info, UErrorCode &errorCode) const = 0;
178
// UTF-8 versions of the processing methods ---------------------------- ***
181
* Converts a single domain name label into its ASCII form for DNS lookup.
182
* UTF-8 version of labelToASCII(), same behavior.
184
* @param label Input domain name label
185
* @param dest Destination byte sink; Flush()ed if successful
186
* @param info Output container of IDNA processing details.
187
* @param errorCode Standard ICU error code. Its input value must
188
* pass the U_SUCCESS() test, or else the function returns
189
* immediately. Check for U_FAILURE() on output or use with
190
* function chaining. (See User Guide for details.)
195
labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
196
IDNAInfo &info, UErrorCode &errorCode) const;
199
* Converts a single domain name label into its Unicode form for human-readable display.
200
* UTF-8 version of labelToUnicode(), same behavior.
202
* @param label Input domain name label
203
* @param dest Destination byte sink; Flush()ed if successful
204
* @param info Output container of IDNA processing details.
205
* @param errorCode Standard ICU error code. Its input value must
206
* pass the U_SUCCESS() test, or else the function returns
207
* immediately. Check for U_FAILURE() on output or use with
208
* function chaining. (See User Guide for details.)
213
labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
214
IDNAInfo &info, UErrorCode &errorCode) const;
217
* Converts a whole domain name into its ASCII form for DNS lookup.
218
* UTF-8 version of nameToASCII(), same behavior.
220
* @param name Input domain name
221
* @param dest Destination byte sink; Flush()ed if successful
222
* @param info Output container of IDNA processing details.
223
* @param errorCode Standard ICU error code. Its input value must
224
* pass the U_SUCCESS() test, or else the function returns
225
* immediately. Check for U_FAILURE() on output or use with
226
* function chaining. (See User Guide for details.)
231
nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
232
IDNAInfo &info, UErrorCode &errorCode) const;
235
* Converts a whole domain name into its Unicode form for human-readable display.
236
* UTF-8 version of nameToUnicode(), same behavior.
238
* @param name Input domain name
239
* @param dest Destination byte sink; Flush()ed if successful
240
* @param info Output container of IDNA processing details.
241
* @param errorCode Standard ICU error code. Its input value must
242
* pass the U_SUCCESS() test, or else the function returns
243
* immediately. Check for U_FAILURE() on output or use with
244
* function chaining. (See User Guide for details.)
249
nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
250
IDNAInfo &info, UErrorCode &errorCode) const;
253
// No ICU "poor man's RTTI" for this class nor its subclasses.
254
virtual UClassID getDynamicClassID() const;
260
* Output container for IDNA processing errors.
261
* The IDNAInfo class is not suitable for subclassing.
264
class U_COMMON_API IDNAInfo : public UMemory {
267
* Constructor for stack allocation.
270
IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
272
* Were there IDNA processing errors?
273
* @return TRUE if there were processing errors
276
UBool hasErrors() const { return errors!=0; }
278
* Returns a bit set indicating IDNA processing errors.
279
* See UIDNA_ERROR_... constants in uidna.h.
280
* @return bit set of processing errors
283
uint32_t getErrors() const { return errors; }
285
* Returns TRUE if transitional and nontransitional processing produce different results.
286
* This is the case when the input label or domain name contains
287
* one or more deviation characters outside a Punycode label (see UTS #46).
289
* <li>With nontransitional processing, such characters are
290
* copied to the destination string.
291
* <li>With transitional processing, such characters are
292
* mapped (sharp s/sigma) or removed (joiner/nonjoiner).
294
* @return TRUE if transitional and nontransitional processing produce different results
297
UBool isTransitionalDifferent() const { return isTransDiff; }
302
IDNAInfo(const IDNAInfo &other); // no copying
303
IDNAInfo &operator=(const IDNAInfo &other); // no copying
306
errors=labelErrors=0;
312
uint32_t errors, labelErrors;
320
#endif // UCONFIG_NO_IDNA