1
/******************************************************************************
3
Copyright (c) 2009-2010, Terry Caton
6
Redistribution and use in source and binary forms, with or without
7
modification, are permitted provided that the following conditions are met:
8
* Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
10
* Redistributions in binary form must reproduce the above copyright
11
notice, this list of conditions and the following disclaimer in the
12
documentation and/or other materials provided with the distribution.
13
* Neither the name of the projecct nor the names of its contributors
14
may be used to endorse or promote products derived from this software
15
without specific prior written permission.
17
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
******************************************************************************/
37
* better documentation
38
* unicode character decoding
45
inline std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) {
46
Reader::Read(elementRoot, istr);
50
inline Reader::Location::Location() :
57
//////////////////////
58
// Reader::InputStream
60
class Reader::InputStream // would be cool if we could inherit from std::istream & override "get"
63
InputStream(std::istream& iStr) :
66
// protect access to the input stream, so we can keeep track of document/line offsets
67
char Get(); // big, define outside
69
assert(m_iStr.eof() == false); // enforce reading of only valid stream data
74
m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever.
78
const Location& GetLocation() const { return m_Location; }
86
inline char Reader::InputStream::Get()
88
assert(m_iStr.eof() == false); // enforce reading of only valid stream data
89
char c = m_iStr.get();
91
++m_Location.m_nDocOffset;
94
m_Location.m_nLineOffset = 0;
97
++m_Location.m_nLineOffset;
105
//////////////////////
106
// Reader::TokenStream
108
class Reader::TokenStream
111
TokenStream(const Tokens& tokens);
119
const Tokens& m_Tokens;
120
Tokens::const_iterator m_itCurrent;
124
inline Reader::TokenStream::TokenStream(const Tokens& tokens) :
126
m_itCurrent(tokens.begin())
129
inline const Reader::Token& Reader::TokenStream::Peek() {
132
const Token& lastToken = *m_Tokens.rbegin();
133
std::string sMessage = "Unexpected end of token stream";
134
throw ParseException(sMessage, lastToken.locBegin, lastToken.locEnd); // nowhere to point to
136
return *(m_itCurrent);
139
inline const Reader::Token& Reader::TokenStream::Get() {
140
const Token& token = Peek();
145
inline bool Reader::TokenStream::EOS() const {
146
return m_itCurrent == m_Tokens.end();
153
inline void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); }
154
inline void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); }
155
inline void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); }
156
inline void Reader::Read(Number& number, std::istream& istr) { Read_i(number, istr); }
157
inline void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); }
158
inline void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); }
159
inline void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); }
162
template <typename ElementTypeT>
163
void Reader::Read_i(ElementTypeT& element, std::istream& istr)
168
InputStream inputStream(istr);
169
reader.Scan(tokens, inputStream);
171
TokenStream tokenStream(tokens);
172
reader.Parse(element, tokenStream);
174
if (tokenStream.EOS() == false)
176
const Token& token = tokenStream.Peek();
177
std::string sMessage = std::string("Expected End of token stream; found ") + token.sValue;
178
throw ParseException(sMessage, token.locBegin, token.locEnd);
183
inline void Reader::Scan(Tokens& tokens, InputStream& inputStream)
185
while (EatWhiteSpace(inputStream), // ignore any leading white space...
186
inputStream.EOS() == false) // ...before checking for EOS
188
// if all goes well, we'll create a token each pass
190
token.locBegin = inputStream.GetLocation();
192
// gives us null-terminated string
193
char sChar = inputStream.Peek();
197
token.sValue = MatchExpectedString(inputStream, "{");
198
token.nType = Token::TOKEN_OBJECT_BEGIN;
202
token.sValue = MatchExpectedString(inputStream, "}");
203
token.nType = Token::TOKEN_OBJECT_END;
207
token.sValue = MatchExpectedString(inputStream, "[");
208
token.nType = Token::TOKEN_ARRAY_BEGIN;
212
token.sValue = MatchExpectedString(inputStream, "]");
213
token.nType = Token::TOKEN_ARRAY_END;
217
token.sValue = MatchExpectedString(inputStream, ",");
218
token.nType = Token::TOKEN_NEXT_ELEMENT;
222
token.sValue = MatchExpectedString(inputStream, ":");
223
token.nType = Token::TOKEN_MEMBER_ASSIGN;
227
token.sValue = MatchString(inputStream);
228
token.nType = Token::TOKEN_STRING;
242
token.sValue = MatchNumber(inputStream);
243
token.nType = Token::TOKEN_NUMBER;
247
token.sValue = MatchExpectedString(inputStream, "true");
248
token.nType = Token::TOKEN_BOOLEAN;
252
token.sValue = MatchExpectedString(inputStream, "false");
253
token.nType = Token::TOKEN_BOOLEAN;
257
token.sValue = MatchExpectedString(inputStream, "null");
258
token.nType = Token::TOKEN_NULL;
263
std::string sErrorMessage = std::string("Unexpected character in stream: ") + sChar;
264
throw ScanException(sErrorMessage, inputStream.GetLocation());
268
token.locEnd = inputStream.GetLocation();
269
tokens.push_back(token);
274
inline void Reader::EatWhiteSpace(InputStream& inputStream)
276
while (inputStream.EOS() == false &&
277
::isspace(inputStream.Peek()))
281
inline std::string Reader::MatchExpectedString(InputStream& inputStream, const std::string& sExpected)
283
std::string::const_iterator it(sExpected.begin()),
284
itEnd(sExpected.end());
285
for ( ; it != itEnd; ++it) {
286
if (inputStream.EOS() || // did we reach the end before finding what we're looking for...
287
inputStream.Get() != *it) // ...or did we find something different?
289
std::string sMessage = std::string("Expected string: ") + sExpected;
290
throw ScanException(sMessage, inputStream.GetLocation());
294
// all's well if we made it here
299
inline std::string Reader::MatchString(InputStream& inputStream)
301
MatchExpectedString(inputStream, "\"");
304
while (inputStream.EOS() == false &&
305
inputStream.Peek() != '"')
307
char c = inputStream.Get();
311
inputStream.EOS() == false) // shouldn't have reached the end yet
313
c = inputStream.Get();
315
case '/': string.push_back('/'); break;
316
case '"': string.push_back('"'); break;
317
case '\\': string.push_back('\\'); break;
318
case 'b': string.push_back('\b'); break;
319
case 'f': string.push_back('\f'); break;
320
case 'n': string.push_back('\n'); break;
321
case 'r': string.push_back('\r'); break;
322
case 't': string.push_back('\t'); break;
323
case 'u': string.push_back('\u'); break; // TODO: what do we do with this?
325
std::string sMessage = std::string("Unrecognized escape sequence found in string: \\") + c;
326
throw ScanException(sMessage, inputStream.GetLocation());
335
// eat the last '"' that we just peeked
336
MatchExpectedString(inputStream, "\"");
338
// all's well if we made it here
343
inline std::string Reader::MatchNumber(InputStream& inputStream)
345
const char sNumericChars[] = "0123456789.eE-+";
346
std::set<char> numericChars;
347
numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars));
350
while (inputStream.EOS() == false &&
351
numericChars.find(inputStream.Peek()) != numericChars.end())
353
sNumber.push_back(inputStream.Get());
360
inline void Reader::Parse(UnknownElement& element, Reader::TokenStream& tokenStream)
362
const Token& token = tokenStream.Peek();
363
switch (token.nType) {
364
case Token::TOKEN_OBJECT_BEGIN:
366
// implicit non-const cast will perform conversion for us (if necessary)
367
Object& object = element;
368
Parse(object, tokenStream);
372
case Token::TOKEN_ARRAY_BEGIN:
374
Array& array = element;
375
Parse(array, tokenStream);
379
case Token::TOKEN_STRING:
381
String& string = element;
382
Parse(string, tokenStream);
386
case Token::TOKEN_NUMBER:
388
Number& number = element;
389
Parse(number, tokenStream);
393
case Token::TOKEN_BOOLEAN:
395
Boolean& boolean = element;
396
Parse(boolean, tokenStream);
400
case Token::TOKEN_NULL:
402
Null& null = element;
403
Parse(null, tokenStream);
409
std::string sMessage = std::string("Unexpected token: ") + token.sValue;
410
throw ParseException(sMessage, token.locBegin, token.locEnd);
416
inline void Reader::Parse(Object& object, Reader::TokenStream& tokenStream)
418
MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream);
420
bool bContinue = (tokenStream.EOS() == false &&
421
tokenStream.Peek().nType != Token::TOKEN_OBJECT_END);
424
Object::Member member;
426
// first the member name. save the token in case we have to throw an exception
427
const Token& tokenName = tokenStream.Peek();
428
member.name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
430
// ...then the key/value separator...
431
MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream);
433
// ...then the value itself (can be anything).
434
Parse(member.element, tokenStream);
436
// try adding it to the object (this could throw)
439
object.Insert(member);
443
// must be a duplicate name
444
std::string sMessage = std::string("Duplicate object member token: ") + member.name;
445
throw ParseException(sMessage, tokenName.locBegin, tokenName.locEnd);
448
bContinue = (tokenStream.EOS() == false &&
449
tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT);
451
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
454
MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream);
458
inline void Reader::Parse(Array& array, Reader::TokenStream& tokenStream)
460
MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream);
462
bool bContinue = (tokenStream.EOS() == false &&
463
tokenStream.Peek().nType != Token::TOKEN_ARRAY_END);
466
// ...what's next? could be anything
467
Array::iterator itElement = array.Insert(UnknownElement());
468
UnknownElement& element = *itElement;
469
Parse(element, tokenStream);
471
bContinue = (tokenStream.EOS() == false &&
472
tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT);
474
MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream);
477
MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream);
481
inline void Reader::Parse(String& string, Reader::TokenStream& tokenStream)
483
string = MatchExpectedToken(Token::TOKEN_STRING, tokenStream);
487
inline void Reader::Parse(Number& number, Reader::TokenStream& tokenStream)
489
const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception
490
const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream);
492
std::istringstream iStr(sValue);
496
// did we consume all characters in the token?
497
if (iStr.eof() == false)
499
char c = iStr.peek();
500
std::string sMessage = std::string("Unexpected character in NUMBER token: ") + c;
501
throw ParseException(sMessage, currentToken.locBegin, currentToken.locEnd);
508
inline void Reader::Parse(Boolean& boolean, Reader::TokenStream& tokenStream)
510
const std::string& sValue = MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream);
511
boolean = (sValue == "true" ? true : false);
515
inline void Reader::Parse(Null&, Reader::TokenStream& tokenStream)
517
MatchExpectedToken(Token::TOKEN_NULL, tokenStream);
521
inline const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream)
523
const Token& token = tokenStream.Get();
524
if (token.nType != nExpected)
526
std::string sMessage = std::string("Unexpected token: ") + token.sValue;
527
throw ParseException(sMessage, token.locBegin, token.locEnd);