24
#include "tu_config.h" // For DSOEXPORT
24
#include "dsodefs.h" // For DSOEXPORT
26
#include <boost/cstdint.hpp> // for boost::?int??_t
26
#include <boost/cstdint.hpp> // for C99 int types
28
28
/// Utilities to convert between std::string and std::wstring.
35
35
/// allowing many thousands of unique codes. Multibyte characters are
36
36
/// difficult to handle, as their length - used for many string
37
37
/// operations - is not certain without parsing the string.
38
/// Converting the string to a wstring (generally a uint32_t - how
39
/// many codes the reference player can deal with is unknown)
40
/// facilitates string operations, as the length of the string
41
/// is equal to the number of valid characters.
38
/// Converting the string to a wstring (generally a uint32_t - the
39
/// pp seems only to handle characters up to 65535 - two bytes is
40
/// the minimum size of a wchar) facilitates string operations, as
41
/// the length of the string is equal to the number of valid characters.
43
43
/// SWF5 and earlier, however, used the ISO-8859 specification,
44
44
/// allowing the standard 128 ASCII characters plus 128 extra
53
53
/// gnash::edit_text_character, ord() and chr().
56
static const boost::uint32_t invalid = -1;
56
58
/// Converts a std::string with multibyte characters into a std::wstring.
58
60
/// @return a version-dependent wstring.
82
84
/// as output. Advances string iterator past the character
83
85
/// returned, unless the returned character is '\0', in which
84
86
/// case the iterator does not advance.
85
boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it);
87
DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it,
88
const std::string::const_iterator& e);
87
90
/// \brief Encodes the given wide character into a canonical
88
91
/// string, theoretically up to 6 chars in length.
89
std::string encodeUnicodeCharacter(boost::uint32_t ucs_character);
92
DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character);
91
94
/// Encodes the given wide character into an at least 8-bit character.
93
96
/// Allows storage of Latin1 (ISO-8859-1) characters. This
94
97
/// is the format of SWF5 and below.
95
98
std::string encodeLatin1Character(boost::uint32_t ucsCharacter);
113
/// Interpret (and skip) Byte Order Mark in input stream
115
/// This function takes a pointer to a buffer and returns
116
/// the start of actual data after an eventual BOM.
117
/// No conversion is performed, no bytes copy, just skipping of
118
/// the BOM snippet and interpretation of it returned to the
119
/// encoding input parameter.
121
/// See http://en.wikipedia.org/wiki/Byte-order_mark
124
/// The input buffer.
127
/// Size of the input buffer, will be decremented by the
128
/// size of the BOM, if any.
131
/// Output parameter, will always be set.
132
/// encUNSPECIFIED if no BOM is found.
135
/// A pointer either equal to 'in' or some bytes inside it.
137
DSOEXPORT char* stripBOM(char* in, size_t& size, TextEncoding& encoding);
139
/// Return name of a text encoding
140
DSOEXPORT const char* textEncodingName(TextEncoding enc);