2
/////////////////////////////
4
// (C)2002 Ryo Chijiiwa <Ryo@IlohaMail.org>
7
// UTF-8 handling functions
9
// This file is part of IlohaMail. IlohaMail is free software released
10
// under the GPL license. See enclosed file COPYING for details, or
11
// see http://www.fsf.org/copyleft/gpl.html
12
////////////////////////////
15
* takes a string of utf-8 encoded characters and converts it to a string of unicode entities
16
* each unicode entitiy has the form &#nnnnn; n={0..9} and can be displayed by utf-8 supporting
18
* @param $source string encoded using utf-8 [STRING]
19
* @return string of unicode entities [STRING]
23
* Author: ronen at greyzone dot com
24
* Taken from php.net comment:
25
* http://www.php.net/manual/en/function.utf8-decode.php
27
function utf8ToUnicodeEntities ($source) {
28
// array used to figure what number to decrement from character order value
29
// according to number of characters used to map unicode to ascii by utf-8
35
// the number of bits to shift each charNum by
48
$len = strlen ($source);
51
$asciiPos = ord (substr ($source, $pos, 1));
52
if (($asciiPos >= 240) && ($asciiPos <= 255)) {
53
// 4 chars representing one unicode character
54
$thisLetter = substr ($source, $pos, 4);
57
else if (($asciiPos >= 224) && ($asciiPos <= 239)) {
58
// 3 chars representing one unicode character
59
$thisLetter = substr ($source, $pos, 3);
62
else if (($asciiPos >= 192) && ($asciiPos <= 223)) {
63
// 2 chars representing one unicode character
64
$thisLetter = substr ($source, $pos, 2);
68
// 1 char (lower ascii)
69
$thisLetter = substr ($source, $pos, 1);
73
// process the string representing the letter to a unicode entity
74
$thisLen = strlen ($thisLetter);
77
while ($thisPos < $thisLen) {
78
$thisCharOrd = ord (substr ($thisLetter, $thisPos, 1));
80
$charNum = intval ($thisCharOrd - $decrement[$thisLen]);
81
$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
84
$charNum = intval ($thisCharOrd - 128);
85
$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
92
$encodedLetter = "&#". str_pad($decimalCode, 3, "0", STR_PAD_LEFT) . ';';
94
$encodedLetter = "&#". str_pad($decimalCode, 5, "0", STR_PAD_LEFT) . ';';
96
$encodedString .= $encodedLetter;
99
return $encodedString;
b'\\ No newline at end of file'