~ubuntu-branches/ubuntu/trusty/cajun/trusty

« back to all changes in this revision

Viewing changes to debian/patches/0001-Fix-for-embedded-unicode.patch

  • Committer: Package Import Robot
  • Author(s): Daniel Pocock
  • Date: 2013-09-26 14:03:24 UTC
  • mfrom: (1.1.1)
  • Revision ID: package-import@ubuntu.com-20130926140324-m8cerngq0ct7rfoj
Tags: 2.0.3-1
* New upstream release
* Upstream now includes UTF-8 fix, local patch removed

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
diff --git a/json/reader.inl b/json/reader.inl
2
 
index fc20833..60c1b93 100644
3
 
--- a/json/reader.inl
4
 
+++ b/json/reader.inl
5
 
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6
 
 
7
 
 TODO:
8
 
 * better documentation
9
 
-* unicode character decoding
10
 
 
11
 
 */
12
 
 
13
 
@@ -308,7 +307,7 @@ inline std::string Reader::MatchString(InputStream& inputStream)
14
 
 
15
 
       // escape?
16
 
       if (c == '\\' &&
17
 
-          inputStream.EOS() == false) // shouldn't have reached the end yet
18
 
+         inputStream.EOS() == false) // shouldn't have reached the end yet
19
 
       {
20
 
          c = inputStream.Get();
21
 
          switch (c) {
22
 
@@ -320,7 +319,37 @@ inline std::string Reader::MatchString(InputStream& inputStream)
23
 
             case 'n':      string.push_back('\n');    break;
24
 
             case 'r':      string.push_back('\r');    break;
25
 
             case 't':      string.push_back('\t');    break;
26
 
-            case 'u':      string.push_back('\u');    break; // TODO: what do we do with this?
27
 
+            case 'u': { // convert unicode to UTF-8
28
 
+               int x = 0, i;
29
 
+
30
 
+               // next four characters should be hex
31
 
+               for (i = 0; i < 4; ++i) {
32
 
+                  c = inputStream.Get();
33
 
+                  if (c >= '0' && c <= '9') {
34
 
+                      x = (x << 4) | (c - '0');
35
 
+                  } else if (c >= 'a' && c <= 'f') {
36
 
+                      x = (x << 4) | (c - 'a' + 10);
37
 
+                  } else if (c >= 'A' && c <= 'F') {
38
 
+                      x = (x << 4) | (c - 'A' + 10);
39
 
+                  } else {
40
 
+                     std::string sMessage = std::string("Unrecognized hexadecimal character found in string: ") + c;
41
 
+                     throw ScanException(sMessage, inputStream.GetLocation());
42
 
+                  }
43
 
+               }
44
 
+
45
 
+               // encode as UTF-8
46
 
+               if (x < 0x80) {
47
 
+                   string.push_back(x);
48
 
+               } else if (x < 0x800) {
49
 
+                   string.push_back(0xc0 | (x >> 6));
50
 
+                   string.push_back(0x80 | (x & 0x3f));
51
 
+               } else {
52
 
+                   string.push_back(0xe0 | (x >> 12));
53
 
+                   string.push_back(0x80 | ((x >> 6) & 0x3f));
54
 
+                   string.push_back(0x80 | (x & 0x3f));
55
 
+               }
56
 
+               break;
57
 
+            }
58
 
             default: {
59
 
                std::string sMessage = std::string("Unrecognized escape sequence found in string: \\") + c;
60
 
                throw ScanException(sMessage, inputStream.GetLocation());
61
 
diff --git a/json/writer.inl b/json/writer.inl
62
 
index b16401b..27226b6 100644
63
 
--- a/json/writer.inl
64
 
+++ b/json/writer.inl
65
 
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
 
 
67
 
 TODO:
68
 
 * better documentation
69
 
-* unicode character encoding
70
 
 
71
 
 */
72
 
 
73
 
@@ -122,7 +121,7 @@ inline void Writer::Write_i(const Object& object)
74
 
 
75
 
 inline void Writer::Write_i(const Number& numberElement)
76
 
 {
77
 
-   m_ostr << std::setprecision(20) << numberElement.Value();
78
 
+   m_ostr << std::dec << std::setprecision(20) << numberElement.Value();
79
 
 }
80
 
 
81
 
 inline void Writer::Write_i(const Boolean& booleanElement)
82
 
@@ -139,6 +138,48 @@ inline void Writer::Write_i(const String& stringElement)
83
 
                                itEnd(s.end());
84
 
    for (; it != itEnd; ++it)
85
 
    {
86
 
+      // check for UTF-8 unicode encoding
87
 
+      unsigned char u = static_cast<unsigned char>(*it);
88
 
+      if (u & 0xc0) {
89
 
+         if ((u & 0xe0) == 0xc0) {
90
 
+            // two-character sequence
91
 
+            int x = (*it & 0x1f) << 6;
92
 
+            if ((it + 1) == itEnd) {
93
 
+               m_ostr << *it; continue;
94
 
+            }
95
 
+            u = static_cast<unsigned char>(*(it + 1));
96
 
+            if ((u & 0xc0) == 0x80) {
97
 
+               x |= u & 0x3f;
98
 
+               m_ostr << "\\u" << std::hex << std::setfill('0')
99
 
+                  << std::setw(4) << x;
100
 
+               ++it;
101
 
+               continue;
102
 
+            }
103
 
+
104
 
+         } else if ((u & 0xf0) == 0xe0) {
105
 
+            // three-character sequence
106
 
+            int x = (u & 0x0f) << 12;
107
 
+            if ((it + 1) == itEnd) {
108
 
+               m_ostr << *it; continue;
109
 
+            }
110
 
+            u = static_cast<unsigned char>(*(it + 1));
111
 
+            if ((u & 0xc0) == 0x80) {
112
 
+               x |= (u & 0x3f) << 6;
113
 
+               if ((it + 2) == itEnd) {
114
 
+                  m_ostr << *it; continue;
115
 
+               }
116
 
+               u = static_cast<unsigned char>(*(it + 2));
117
 
+               if ((u & 0xc0) == 0x80) {
118
 
+                  x |= u & 0x3f;
119
 
+                  m_ostr << "\\u" << std::hex << std::setfill('0')
120
 
+                     << std::setw(4) << x;
121
 
+                  it = it + 2;
122
 
+                  continue;
123
 
+               }
124
 
+            }
125
 
+         }
126
 
+      }
127
 
+
128
 
       switch (*it)
129
 
       {
130
 
          case '"':         m_ostr << "\\\"";   break;
131
 
@@ -148,7 +189,6 @@ inline void Writer::Write_i(const String& stringElement)
132
 
          case '\n':        m_ostr << "\\n";    break;
133
 
          case '\r':        m_ostr << "\\r";    break;
134
 
          case '\t':        m_ostr << "\\t";    break;
135
 
-         case '\u':        m_ostr << "\\u";    break; // uh...
136
 
          default:          m_ostr << *it;      break;
137
 
       }
138
 
    }