~ubuntu-branches/ubuntu/oneiric/protobuf/oneiric

« back to all changes in this revision

Viewing changes to src/google/protobuf/io/tokenizer_unittest.cc

Committer: Bazaar Package Importer
Author(s): Iustin Pop
Date: 2008-08-03 11:01:44 UTC
Revision ID: james.westby@ubuntu.com-20080803110144-uyiw41bf1m2oe17t

Tags: upstream-2.0.0~b

Import upstream version 2.0.0~b

files added:

CHANGES.txt

CONTRIBUTORS.txt

COPYING.txt

INSTALL.txt

Makefile.am

Makefile.in

README.txt

aclocal.m4

autogen.sh

config.guess

config.h.in

config.sub

configure

configure.ac

depcomp

editors

editors/README.txt

editors/proto.vim

examples

examples/AddPerson.java

examples/ListPeople.java

examples/Makefile

examples/README.txt

examples/add_person.cc

examples/add_person.py

examples/addressbook.proto

examples/list_people.cc

examples/list_people.py

generate_descriptor_proto.sh

install-sh

java

java/README.txt

java/pom.xml

java/src

java/src/main

java/src/main/java

java/src/main/java/com

java/src/main/java/com/google

java/src/main/java/com/google/protobuf

java/src/main/java/com/google/protobuf/AbstractMessage.java

java/src/main/java/com/google/protobuf/ByteString.java

java/src/main/java/com/google/protobuf/CodedInputStream.java

java/src/main/java/com/google/protobuf/CodedOutputStream.java

java/src/main/java/com/google/protobuf/Descriptors.java

java/src/main/java/com/google/protobuf/DynamicMessage.java

java/src/main/java/com/google/protobuf/ExtensionRegistry.java

java/src/main/java/com/google/protobuf/FieldSet.java

java/src/main/java/com/google/protobuf/GeneratedMessage.java

java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java

java/src/main/java/com/google/protobuf/Message.java

java/src/main/java/com/google/protobuf/RpcCallback.java

java/src/main/java/com/google/protobuf/RpcChannel.java

java/src/main/java/com/google/protobuf/RpcController.java

java/src/main/java/com/google/protobuf/RpcUtil.java

java/src/main/java/com/google/protobuf/Service.java

java/src/main/java/com/google/protobuf/TextFormat.java

java/src/main/java/com/google/protobuf/UninitializedMessageException.java

java/src/main/java/com/google/protobuf/UnknownFieldSet.java

java/src/main/java/com/google/protobuf/WireFormat.java

java/src/test

java/src/test/java

java/src/test/java/com

java/src/test/java/com/google

java/src/test/java/com/google/protobuf

java/src/test/java/com/google/protobuf/AbstractMessageTest.java

java/src/test/java/com/google/protobuf/CodedInputStreamTest.java

java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java

java/src/test/java/com/google/protobuf/DescriptorsTest.java

java/src/test/java/com/google/protobuf/DynamicMessageTest.java

java/src/test/java/com/google/protobuf/GeneratedMessageTest.java

java/src/test/java/com/google/protobuf/MessageTest.java

java/src/test/java/com/google/protobuf/ServiceTest.java

java/src/test/java/com/google/protobuf/TestUtil.java

java/src/test/java/com/google/protobuf/TextFormatTest.java

java/src/test/java/com/google/protobuf/UnknownFieldSetTest.java

java/src/test/java/com/google/protobuf/WireFormatTest.java

java/src/test/java/com/google/protobuf/multiple_files_test.proto

ltmain.sh

m4/acx_pthread.m4

m4/stl_hash.m4

missing

python

python/README.txt

python/ez_setup.py

python/google

python/google/__init__.py

python/google/protobuf

python/google/protobuf/__init__.py

python/google/protobuf/descriptor.py

python/google/protobuf/internal

python/google/protobuf/internal/__init__.py

python/google/protobuf/internal/decoder.py

python/google/protobuf/internal/decoder_test.py

python/google/protobuf/internal/descriptor_test.py

python/google/protobuf/internal/encoder.py

python/google/protobuf/internal/encoder_test.py

python/google/protobuf/internal/generator_test.py

python/google/protobuf/internal/input_stream.py

python/google/protobuf/internal/input_stream_test.py

python/google/protobuf/internal/message_listener.py

python/google/protobuf/internal/more_extensions.proto

python/google/protobuf/internal/more_messages.proto

python/google/protobuf/internal/output_stream.py

python/google/protobuf/internal/output_stream_test.py

python/google/protobuf/internal/reflection_test.py

python/google/protobuf/internal/service_reflection_test.py

python/google/protobuf/internal/test_util.py

python/google/protobuf/internal/text_format_test.py

python/google/protobuf/internal/wire_format.py

python/google/protobuf/internal/wire_format_test.py

python/google/protobuf/message.py

python/google/protobuf/reflection.py

python/google/protobuf/service.py

python/google/protobuf/service_reflection.py

python/google/protobuf/text_format.py

python/mox.py

python/setup.py

python/stubout.py

src/Makefile.am

src/Makefile.in

src/google

src/google/protobuf

src/google/protobuf/compiler

src/google/protobuf/compiler/code_generator.cc

src/google/protobuf/compiler/code_generator.h

src/google/protobuf/compiler/command_line_interface.cc

src/google/protobuf/compiler/command_line_interface.h

src/google/protobuf/compiler/command_line_interface_unittest.cc

src/google/protobuf/compiler/cpp

src/google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc

src/google/protobuf/compiler/cpp/cpp_enum.cc

src/google/protobuf/compiler/cpp/cpp_enum.h

src/google/protobuf/compiler/cpp/cpp_enum_field.cc

src/google/protobuf/compiler/cpp/cpp_enum_field.h

src/google/protobuf/compiler/cpp/cpp_extension.cc

src/google/protobuf/compiler/cpp/cpp_extension.h

src/google/protobuf/compiler/cpp/cpp_field.cc

src/google/protobuf/compiler/cpp/cpp_field.h

src/google/protobuf/compiler/cpp/cpp_file.cc

src/google/protobuf/compiler/cpp/cpp_file.h

src/google/protobuf/compiler/cpp/cpp_generator.cc

src/google/protobuf/compiler/cpp/cpp_generator.h

src/google/protobuf/compiler/cpp/cpp_helpers.cc

src/google/protobuf/compiler/cpp/cpp_helpers.h

src/google/protobuf/compiler/cpp/cpp_message.cc

src/google/protobuf/compiler/cpp/cpp_message.h

src/google/protobuf/compiler/cpp/cpp_message_field.cc

src/google/protobuf/compiler/cpp/cpp_message_field.h

src/google/protobuf/compiler/cpp/cpp_primitive_field.cc

src/google/protobuf/compiler/cpp/cpp_primitive_field.h

src/google/protobuf/compiler/cpp/cpp_service.cc

src/google/protobuf/compiler/cpp/cpp_service.h

src/google/protobuf/compiler/cpp/cpp_string_field.cc

src/google/protobuf/compiler/cpp/cpp_string_field.h

src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto

src/google/protobuf/compiler/cpp/cpp_unittest.cc

src/google/protobuf/compiler/importer.cc

src/google/protobuf/compiler/importer.h

src/google/protobuf/compiler/importer_unittest.cc

src/google/protobuf/compiler/java

src/google/protobuf/compiler/java/java_enum.cc

src/google/protobuf/compiler/java/java_enum.h

src/google/protobuf/compiler/java/java_enum_field.cc

src/google/protobuf/compiler/java/java_enum_field.h

src/google/protobuf/compiler/java/java_extension.cc

src/google/protobuf/compiler/java/java_extension.h

src/google/protobuf/compiler/java/java_field.cc

src/google/protobuf/compiler/java/java_field.h

src/google/protobuf/compiler/java/java_file.cc

src/google/protobuf/compiler/java/java_file.h

src/google/protobuf/compiler/java/java_generator.cc

src/google/protobuf/compiler/java/java_generator.h

src/google/protobuf/compiler/java/java_helpers.cc

src/google/protobuf/compiler/java/java_helpers.h

src/google/protobuf/compiler/java/java_message.cc

src/google/protobuf/compiler/java/java_message.h

src/google/protobuf/compiler/java/java_message_field.cc

src/google/protobuf/compiler/java/java_message_field.h

src/google/protobuf/compiler/java/java_primitive_field.cc

src/google/protobuf/compiler/java/java_primitive_field.h

src/google/protobuf/compiler/java/java_service.cc

src/google/protobuf/compiler/java/java_service.h

src/google/protobuf/compiler/main.cc

src/google/protobuf/compiler/parser.cc

src/google/protobuf/compiler/parser.h

src/google/protobuf/compiler/parser_unittest.cc

src/google/protobuf/compiler/python

src/google/protobuf/compiler/python/python_generator.cc

src/google/protobuf/compiler/python/python_generator.h

src/google/protobuf/descriptor.cc

src/google/protobuf/descriptor.h

src/google/protobuf/descriptor.pb.cc

src/google/protobuf/descriptor.pb.h

src/google/protobuf/descriptor.proto

src/google/protobuf/descriptor_database.cc

src/google/protobuf/descriptor_database.h

src/google/protobuf/descriptor_database_unittest.cc

src/google/protobuf/descriptor_unittest.cc

src/google/protobuf/dynamic_message.cc

src/google/protobuf/dynamic_message.h

src/google/protobuf/dynamic_message_unittest.cc

src/google/protobuf/extension_set.cc

src/google/protobuf/extension_set.h

src/google/protobuf/extension_set_unittest.cc

src/google/protobuf/generated_message_reflection.cc

src/google/protobuf/generated_message_reflection.h

src/google/protobuf/generated_message_reflection_unittest.cc

src/google/protobuf/io

src/google/protobuf/io/coded_stream.cc

src/google/protobuf/io/coded_stream.h

src/google/protobuf/io/coded_stream_unittest.cc

src/google/protobuf/io/printer.cc

src/google/protobuf/io/printer.h

src/google/protobuf/io/printer_unittest.cc

src/google/protobuf/io/tokenizer.cc

src/google/protobuf/io/tokenizer.h

src/google/protobuf/io/tokenizer_unittest.cc

src/google/protobuf/io/zero_copy_stream.cc

src/google/protobuf/io/zero_copy_stream.h

src/google/protobuf/io/zero_copy_stream_impl.cc

src/google/protobuf/io/zero_copy_stream_impl.h

src/google/protobuf/io/zero_copy_stream_unittest.cc

src/google/protobuf/message.cc

src/google/protobuf/message.h

src/google/protobuf/message_unittest.cc

src/google/protobuf/reflection_ops.cc

src/google/protobuf/reflection_ops.h

src/google/protobuf/reflection_ops_unittest.cc

src/google/protobuf/repeated_field.cc

src/google/protobuf/repeated_field.h

src/google/protobuf/repeated_field_unittest.cc

src/google/protobuf/service.cc

src/google/protobuf/service.h

src/google/protobuf/stubs

src/google/protobuf/stubs/common.cc

src/google/protobuf/stubs/common.h

src/google/protobuf/stubs/common_unittest.cc

src/google/protobuf/stubs/hash.cc

src/google/protobuf/stubs/hash.h

src/google/protobuf/stubs/map-util.cc

src/google/protobuf/stubs/map-util.h

src/google/protobuf/stubs/stl_util-inl.cc

src/google/protobuf/stubs/stl_util-inl.h

src/google/protobuf/stubs/strutil.cc

src/google/protobuf/stubs/strutil.h

src/google/protobuf/stubs/strutil_unittest.cc

src/google/protobuf/stubs/substitute.cc

src/google/protobuf/stubs/substitute.h

src/google/protobuf/test_util.cc

src/google/protobuf/test_util.h

src/google/protobuf/testdata

src/google/protobuf/testdata/golden_message

src/google/protobuf/testdata/text_format_unittest_data.txt

src/google/protobuf/testdata/text_format_unittest_extensions_data.txt

src/google/protobuf/testing

src/google/protobuf/testing/file.cc

src/google/protobuf/testing/file.h

src/google/protobuf/testing/googletest.cc

src/google/protobuf/testing/googletest.h

src/google/protobuf/text_format.cc

src/google/protobuf/text_format.h

src/google/protobuf/text_format_unittest.cc

src/google/protobuf/unittest.proto

src/google/protobuf/unittest_embed_optimize_for.proto

src/google/protobuf/unittest_import.proto

src/google/protobuf/unittest_mset.proto

src/google/protobuf/unittest_optimize_for.proto

src/google/protobuf/unknown_field_set.cc

src/google/protobuf/unknown_field_set.h

src/google/protobuf/unknown_field_set_unittest.cc

src/google/protobuf/wire_format.cc

src/google/protobuf/wire_format.h

src/google/protobuf/wire_format_inl.h

src/google/protobuf/wire_format_unittest.cc

src/gtest

src/gtest/gtest-death-test.cc

src/gtest/gtest-death-test.h

src/gtest/gtest-filepath.cc

src/gtest/gtest-internal-inl.h

src/gtest/gtest-message.h

src/gtest/gtest-port.cc

src/gtest/gtest-spi.h

src/gtest/gtest.cc

src/gtest/gtest.h

src/gtest/gtest_main.cc

src/gtest/gtest_pred_impl.h

src/gtest/gtest_prod.h

src/gtest/internal

src/gtest/internal/gtest-death-test-internal.h

src/gtest/internal/gtest-filepath.h

src/gtest/internal/gtest-internal.h

src/gtest/internal/gtest-port.h

src/gtest/internal/gtest-string.h

src/solaris

src/solaris/libstdc++.la

vsprojects

vsprojects/config.h

vsprojects/convert2008to2005.sh

vsprojects/extract_includes.bat

vsprojects/libprotobuf.vcproj

vsprojects/libprotoc.vcproj

vsprojects/protobuf.sln

vsprojects/protoc.vcproj

vsprojects/readme.txt

vsprojects/tests.vcproj

Show diffs side-by-side

added added

removed removed

src/google/protobuf/io/tokenizer_unittest.cc

// Protocol Buffers - Google's data interchange format

// http://code.google.com/p/protobuf/

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

// Author: kenton@google.com (Kenton Varda)

// Based on original Protocol Buffers design by

// Sanjay Ghemawat, Jeff Dean, and others.

#include <vector>

#include <math.h>

#include <limits.h>

#include <google/protobuf/io/tokenizer.h>

#include <google/protobuf/io/zero_copy_stream_impl.h>

#include <google/protobuf/stubs/common.h>

#include <google/protobuf/stubs/strutil.h>

#include <google/protobuf/stubs/substitute.h>

#include <google/protobuf/testing/googletest.h>

#include <gtest/gtest.h>

namespace google {

namespace protobuf {

namespace io {

namespace {

// ===================================================================

// Data-Driven Test Infrastructure

// TODO(kenton): This is copied from coded_stream_unittest. This is

// temporary until these fetaures are integrated into gTest itself.

// TEST_1D and TEST_2D are macros I'd eventually like to see added to

// gTest. These macros can be used to declare tests which should be

// run multiple times, once for each item in some input array. TEST_1D

// tests all cases in a single input array. TEST_2D tests all

// combinations of cases from two arrays. The arrays must be statically

// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example:

// int kCases[] = {1, 2, 3, 4}

// TEST_1D(MyFixture, MyTest, kCases) {

// EXPECT_GT(kCases_case, 0);

// }

// This test iterates through the numbers 1, 2, 3, and 4 and tests that

// they are all grater than zero. In case of failure, the exact case

// which failed will be printed. The case type must be printable using

// ostream::operator<<.

#define TEST_1D(FIXTURE, NAME, CASES) \

class FIXTURE##_##NAME##_DD : public FIXTURE { \

protected: \

template <typename CaseType> \

void DoSingleCase(const CaseType& CASES##_case); \

}; \

TEST_F(FIXTURE##_##NAME##_DD, NAME) { \

for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \

SCOPED_TRACE(testing::Message() \

<< #CASES " case #" << i << ": " << CASES[i]); \

DoSingleCase(CASES[i]); \

} \

template <typename CaseType> \

void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)

#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \

class FIXTURE##_##NAME##_DD : public FIXTURE { \

protected: \

template <typename CaseType1, typename CaseType2> \

void DoSingleCase(const CaseType1& CASES1##_case, \

const CaseType2& CASES2##_case); \

}; \

TEST_F(FIXTURE##_##NAME##_DD, NAME) { \

for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \

for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \

SCOPED_TRACE(testing::Message() \

<< #CASES1 " case #" << i << ": " << CASES1[i] << ", " \

<< #CASES2 " case #" << j << ": " << CASES2[j]); \

DoSingleCase(CASES1[i], CASES2[j]); \

} \

template <typename CaseType1, typename CaseType2> \

100

void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \

101

const CaseType2& CASES2##_case)

102

103

// -------------------------------------------------------------------

104

105

// An input stream that is basically like an ArrayInputStream but sometimes

106

// returns empty buffers, just to throw us off.

107

class TestInputStream : public ZeroCopyInputStream {

108

public:

109

TestInputStream(const void* data, int size, int block_size)

110

: array_stream_(data, size, block_size), counter_(0) {}

111

~TestInputStream() {}

112

113

// implements ZeroCopyInputStream ----------------------------------

114

bool Next(const void** data, int* size) {

115

// We'll return empty buffers starting with the first buffer, and every

116

// 3 and 5 buffers after that.

117

if (counter_ % 3 == 0 || counter_ % 5 == 0) {

118

*data = NULL;

119

*size = 0;

120

++counter_;

121

return true;

122

} else {

123

++counter_;

124

return array_stream_.Next(data, size);

125

}

126

}

127

128

void BackUp(int count) { return array_stream_.BackUp(count); }

129

bool Skip(int count) { return array_stream_.Skip(count); }

130

int64 ByteCount() const { return array_stream_.ByteCount(); }

131

132

private:

133

ArrayInputStream array_stream_;

134

int counter_;

135

};

136

137

// -------------------------------------------------------------------

138

139

// An error collector which simply concatenates all its errors into a big

140

// block of text which can be checked.

141

class TestErrorCollector : public ErrorCollector {

142

public:

143

TestErrorCollector() {}

144

~TestErrorCollector() {}

145

146

string text_;

147

148

// implements ErrorCollector ---------------------------------------

149

void AddError(int line, int column, const string& message) {

150

strings::SubstituteAndAppend(&text_, "$0:$1: $2\n",

151

line, column, message);

152

}

153

};

154

155

// -------------------------------------------------------------------

156

157

// We test each operation over a variety of block sizes to insure that

158

// we test cases where reads cross buffer boundaries as well as cases

159

// where they don't. This is sort of a brute-force approach to this,

160

// but it's easy to write and easy to understand.

161

const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};

162

163

class TokenizerTest : public testing::Test {

164

protected:

165

// For easy testing.

166

uint64 ParseInteger(const string& text) {

167

uint64 result;

168

EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result));

169

return result;

170

}

171

};

172

173

// ===================================================================

174

175

// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error:

176

// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"

177

#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)

178

179

// In each test case, the entire input text should parse as a single token

180

// of the given type.

181

struct SimpleTokenCase {

182

string input;

183

Tokenizer::TokenType type;

184

};

185

186

inline ostream& operator<<(ostream& out,

187

const SimpleTokenCase& test_case) {

188

return out << CEscape(test_case.input);

189

}

190

191

SimpleTokenCase kSimpleTokenCases[] = {

192

// Test identifiers.

193

{ "hello", Tokenizer::TYPE_IDENTIFIER },

194

195

// Test integers.

196

{ "123", Tokenizer::TYPE_INTEGER },

197

{ "0xab6", Tokenizer::TYPE_INTEGER },

198

{ "0XAB6", Tokenizer::TYPE_INTEGER },

199

{ "0X1234567", Tokenizer::TYPE_INTEGER },

200

{ "0x89abcdef", Tokenizer::TYPE_INTEGER },

201

{ "0x89ABCDEF", Tokenizer::TYPE_INTEGER },

202

{ "01234567", Tokenizer::TYPE_INTEGER },

203

204

// Test floats.

205

{ "123.45", Tokenizer::TYPE_FLOAT },

206

{ "1.", Tokenizer::TYPE_FLOAT },

207

{ "1e3", Tokenizer::TYPE_FLOAT },

208

{ "1E3", Tokenizer::TYPE_FLOAT },

209

{ "1e-3", Tokenizer::TYPE_FLOAT },

210

{ "1e+3", Tokenizer::TYPE_FLOAT },

211

{ "1.e3", Tokenizer::TYPE_FLOAT },

212

{ "1.2e3", Tokenizer::TYPE_FLOAT },

213

{ ".1", Tokenizer::TYPE_FLOAT },

214

{ ".1e3", Tokenizer::TYPE_FLOAT },

215

{ ".1e-3", Tokenizer::TYPE_FLOAT },

216

{ ".1e+3", Tokenizer::TYPE_FLOAT },

217

218

// Test strings.

219

{ "'hello'", Tokenizer::TYPE_STRING },

220

{ "\"foo\"", Tokenizer::TYPE_STRING },

221

{ "'a\"b'", Tokenizer::TYPE_STRING },

222

{ "\"a'b\"", Tokenizer::TYPE_STRING },

223

{ "'a\\'b'", Tokenizer::TYPE_STRING },

224

{ "\"a\\\"b\"", Tokenizer::TYPE_STRING },

225

{ "'\\xf'", Tokenizer::TYPE_STRING },

226

{ "'\\0'", Tokenizer::TYPE_STRING },

227

228

// Test symbols.

229

{ "+", Tokenizer::TYPE_SYMBOL },

230

{ ".", Tokenizer::TYPE_SYMBOL },

231

};

232

233

TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {

234

// Set up the tokenizer.

235

TestInputStream input(kSimpleTokenCases_case.input.data(),

236

kSimpleTokenCases_case.input.size(),

237

kBlockSizes_case);

238

TestErrorCollector error_collector;

239

Tokenizer tokenizer(&input, &error_collector);

240

241

// Before Next() is called, the initial token should always be TYPE_START.

242

EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);

243

EXPECT_EQ("", tokenizer.current().text);

244

EXPECT_EQ(0, tokenizer.current().line);

245

EXPECT_EQ(0, tokenizer.current().column);

246

247

// Parse the token.

248

ASSERT_TRUE(tokenizer.Next());

249

250

// Check that it has the right type.

251

EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type);

252

// Check that it contains the complete input text.

253

EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text);

254

// Check that it is located at the beginning of the input

255

EXPECT_EQ(0, tokenizer.current().line);

256

EXPECT_EQ(0, tokenizer.current().column);

257

258

// There should be no more input.

259

EXPECT_FALSE(tokenizer.Next());

260

261

// After Next() returns false, the token should have type TYPE_END.

262

EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type);

263

EXPECT_EQ("", tokenizer.current().text);

264

EXPECT_EQ(0, tokenizer.current().line);

265

EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);

266

267

// There should be no errors.

268

EXPECT_TRUE(error_collector.text_.empty());

269

}

270

271

TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {

272

// Test the "allow_f_after_float" option.

273

274

// Set up the tokenizer.

275

const char* text = "1f 2.5f 6e3f 7F";

276

TestInputStream input(text, strlen(text), kBlockSizes_case);

277

TestErrorCollector error_collector;

278

Tokenizer tokenizer(&input, &error_collector);

279

tokenizer.set_allow_f_after_float(true);

280

281

// Advance through tokens and check that they are parsed as expected.

282

ASSERT_TRUE(tokenizer.Next());

283

EXPECT_EQ(tokenizer.current().text, "1f");

284

EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);

285

ASSERT_TRUE(tokenizer.Next());

286

EXPECT_EQ(tokenizer.current().text, "2.5f");

287

EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);

288

ASSERT_TRUE(tokenizer.Next());

289

EXPECT_EQ(tokenizer.current().text, "6e3f");

290

EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);

291

ASSERT_TRUE(tokenizer.Next());

292

EXPECT_EQ(tokenizer.current().text, "7F");

293

EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);

294

295

// There should be no more input.

296

EXPECT_FALSE(tokenizer.Next());

297

// There should be no errors.

298

EXPECT_TRUE(error_collector.text_.empty());

299

}

300

301

#endif

302

303

// -------------------------------------------------------------------

304

305

// In each case, the input is parsed to produce a list of tokens. The

306

// last token in "output" must have type TYPE_END.

307

struct MultiTokenCase {

308

string input;

309

Tokenizer::Token output[10]; // The compiler wants a constant array

310

// size for initialization to work. There

311

// is no reason this can't be increased if

312

// needed.

313

};

314

315

inline ostream& operator<<(ostream& out,

316

const MultiTokenCase& test_case) {

317

return out << CEscape(test_case.input);

318

}

319

320

MultiTokenCase kMultiTokenCases[] = {

321

// Test empty input.

322

{ "", {

323

{ Tokenizer::TYPE_END , "" , 0, 0 },

324

}},

325

326

// Test all token types at the same time.

327

{ "foo 1 1.2 + 'bar'", {

328

{ Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0 },

329

{ Tokenizer::TYPE_INTEGER , "1" , 0, 4 },

330

{ Tokenizer::TYPE_FLOAT , "1.2" , 0, 6 },

331

{ Tokenizer::TYPE_SYMBOL , "+" , 0, 10 },

332

{ Tokenizer::TYPE_STRING , "'bar'", 0, 12 },

333

{ Tokenizer::TYPE_END , "" , 0, 17 },

334

}},

335

336

// Test that consecutive symbols are parsed as separate tokens.

337

{ "!@+%", {

338

{ Tokenizer::TYPE_SYMBOL , "!" , 0, 0 },

339

{ Tokenizer::TYPE_SYMBOL , "@" , 0, 1 },

340

{ Tokenizer::TYPE_SYMBOL , "+" , 0, 2 },

341

{ Tokenizer::TYPE_SYMBOL , "%" , 0, 3 },

342

{ Tokenizer::TYPE_END , "" , 0, 4 },

343

}},

344

345

// Test that newlines affect line numbers correctly.

346

{ "foo bar\nrab oof", {

347

{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },

348

{ Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4 },

349

{ Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0 },

350

{ Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4 },

351

{ Tokenizer::TYPE_END , "" , 1, 7 },

352

}},

353

354

// Test that tabs affect column numbers correctly.

355

{ "foo\tbar \tbaz", {

356

{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },

357

{ Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8 },

358

{ Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 },

359

{ Tokenizer::TYPE_END , "" , 0, 19 },

360

}},

361

362

// Test that line comments are ignored.

363

{ "foo // This is a comment\n"

364

"bar // This is another comment", {

365

{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },

366

{ Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0 },

367

{ Tokenizer::TYPE_END , "" , 1, 30 },

368

}},

369

370

// Test that block comments are ignored.

371

{ "foo /* This is a block comment */ bar", {

372

{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },

373

{ Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 },

374

{ Tokenizer::TYPE_END , "" , 0, 37 },

375

}},

376

377

// Test that sh-style comments are not ignored by default.

378

{ "foo # bar\n"

379

"baz", {

380

{ Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },

381

{ Tokenizer::TYPE_SYMBOL , "#" , 0, 4 },

382

{ Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6 },

383

{ Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 },

384

{ Tokenizer::TYPE_END , "" , 1, 3 },

385

}},

386

};

387

388

TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {

389

// Set up the tokenizer.

390

TestInputStream input(kMultiTokenCases_case.input.data(),

391

kMultiTokenCases_case.input.size(),

392

kBlockSizes_case);

393

TestErrorCollector error_collector;

394

Tokenizer tokenizer(&input, &error_collector);

395

396

// Before Next() is called, the initial token should always be TYPE_START.

397

EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);

398

EXPECT_EQ("", tokenizer.current().text);

399

EXPECT_EQ(0, tokenizer.current().line);

400

EXPECT_EQ(0, tokenizer.current().column);

401

402

// Loop through all expected tokens.

403

int i = 0;

404

Tokenizer::Token token;

405

do {

406

token = kMultiTokenCases_case.output[i++];

407

408

SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);

409

410

// Next() should only return false when it hits the end token.

411

if (token.type != Tokenizer::TYPE_END) {

412

ASSERT_TRUE(tokenizer.Next());

413

} else {

414

ASSERT_FALSE(tokenizer.Next());

415

}

416

417

// Check that the token matches the expected one.

418

EXPECT_EQ(token.type, tokenizer.current().type);

419

EXPECT_EQ(token.text, tokenizer.current().text);

420

EXPECT_EQ(token.line, tokenizer.current().line);

421

EXPECT_EQ(token.column, tokenizer.current().column);

422

423

} while (token.type != Tokenizer::TYPE_END);

424

425

// There should be no errors.

426

EXPECT_TRUE(error_collector.text_.empty());

427

}

428

429

// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:

430

// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"

431

#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)

432

433

TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {

434

// Test the "comment_style" option.

435

436

const char* text = "foo # bar\n"

437

"baz // qux\n"

438

"corge /* grault */\n"

439

"garply";

440

const char* const kTokens[] = {"foo", // "# bar" is ignored

441

"baz", "/", "/", "qux",

442

"corge", "/", "*", "grault", "*", "/",

443

"garply"};

444

445

// Set up the tokenizer.

446

TestInputStream input(text, strlen(text), kBlockSizes_case);

447

TestErrorCollector error_collector;

448

Tokenizer tokenizer(&input, &error_collector);

449

tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE);

450

451

// Advance through tokens and check that they are parsed as expected.

452

for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) {

453

EXPECT_TRUE(tokenizer.Next());

454

EXPECT_EQ(tokenizer.current().text, kTokens[i]);

455

}

456

457

// There should be no more input.

458

EXPECT_FALSE(tokenizer.Next());

459

// There should be no errors.

460

EXPECT_TRUE(error_collector.text_.empty());

461

}

462

463

#endif

464

465

// -------------------------------------------------------------------

466

467

// Test parse helpers. It's not really worth setting up a full data-driven

468

// test here.

469

TEST_F(TokenizerTest, ParseInteger) {

470

EXPECT_EQ(0, ParseInteger("0"));

471

EXPECT_EQ(123, ParseInteger("123"));

472

EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12"));

473

EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12"));

474

EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF"));

475

EXPECT_EQ(01234567, ParseInteger("01234567"));

476

477

// Test invalid integers that may still be tokenized as integers.

478

EXPECT_EQ(0, ParseInteger("0x"));

479

480

#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet

481

// Test invalid integers that will never be tokenized as integers.

482

EXPECT_DEBUG_DEATH(ParseInteger("zxy"),

483

"passed text that could not have been tokenized as an integer");

484

EXPECT_DEBUG_DEATH(ParseInteger("1.2"),

485

"passed text that could not have been tokenized as an integer");

486

EXPECT_DEBUG_DEATH(ParseInteger("08"),

487

"passed text that could not have been tokenized as an integer");

488

EXPECT_DEBUG_DEATH(ParseInteger("0xg"),

489

"passed text that could not have been tokenized as an integer");

490

EXPECT_DEBUG_DEATH(ParseInteger("-1"),

491

"passed text that could not have been tokenized as an integer");

492

#endif // GTEST_HAS_DEATH_TEST

493

494

// Test overflows.

495

uint64 i;

496

EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));

497

EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i));

498

EXPECT_TRUE (Tokenizer::ParseInteger("1", 1, &i));

499

EXPECT_TRUE (Tokenizer::ParseInteger("12345", 12345, &i));

500

EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i));

501

EXPECT_TRUE (Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF" , kuint64max, &i));

502

EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i));

503

}

504

505

TEST_F(TokenizerTest, ParseFloat) {

506

EXPECT_DOUBLE_EQ(1 , Tokenizer::ParseFloat("1."));

507

EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1e3"));

508

EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1E3"));

509

EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3"));

510

EXPECT_DOUBLE_EQ(.1 , Tokenizer::ParseFloat(".1"));

511

EXPECT_DOUBLE_EQ(.25 , Tokenizer::ParseFloat(".25"));

512

EXPECT_DOUBLE_EQ(.1e3 , Tokenizer::ParseFloat(".1e3"));

513

EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3"));

514

EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3"));

515

EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3"));

516

EXPECT_DOUBLE_EQ(5 , Tokenizer::ParseFloat("5"));

517

EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12"));

518

EXPECT_DOUBLE_EQ(1.2 , Tokenizer::ParseFloat("1.2"));

519

EXPECT_DOUBLE_EQ(1.e2 , Tokenizer::ParseFloat("1.e2"));

520

521

// Test invalid integers that may still be tokenized as integers.

522

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e"));

523

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-"));

524

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e"));

525

526

// Test 'f' suffix.

527

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f"));

528

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f"));

529

EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F"));

530

531

// These should parse successfully even though they are out of range.

532

// Overflows become infinity and underflows become zero.

533

EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));

534

EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));

535

536

#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet

537

// Test invalid integers that will never be tokenized as integers.

538

EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),

539

"passed text that could not have been tokenized as a float");

540

EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("1-e0"),

541

"passed text that could not have been tokenized as a float");

542

EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),

543

"passed text that could not have been tokenized as a float");

544

#endif // GTEST_HAS_DEATH_TEST

545

}

546

547

TEST_F(TokenizerTest, ParseString) {

548

string output;

549

Tokenizer::ParseString("'hello'", &output);

550

EXPECT_EQ("hello", output);

551

Tokenizer::ParseString("\"blah\\nblah2\"", &output);

552

EXPECT_EQ("blah\nblah2", output);

553

Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output);

554

EXPECT_EQ("\1x\1\123\739\52\334n\3", output);

555

Tokenizer::ParseString("'\\x20\\x4'", &output);

556

EXPECT_EQ("\x20\x4", output);

557

558

// Test invalid strings that may still be tokenized as strings.

559

Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid

560

EXPECT_EQ("\a?\v\t", output);

561

Tokenizer::ParseString("'", &output);

562

EXPECT_EQ("", output);

563

Tokenizer::ParseString("'\\", &output);

564

EXPECT_EQ("\\", output);

565

566

// Test invalid strings that will never be tokenized as strings.

567

#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet

568

EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),

569

"passed text that could not have been tokenized as a string");

570

#endif // GTEST_HAS_DEATH_TEST

571

}

572

573

// -------------------------------------------------------------------

574

575

// Each case parses some input text, ignoring the tokens produced, and

576

// checks that the error output matches what is expected.

577

struct ErrorCase {

578

string input;

579

bool recoverable; // True if the tokenizer should be able to recover and

580

// parse more tokens after seeing this error. Cases

581

// for which this is true must end with "foo" as

582

// the last token, which the test will check for.

583

const char* errors;

584

};

585

586

inline ostream& operator<<(ostream& out,

587

const ErrorCase& test_case) {

588

return out << CEscape(test_case.input);

589

}

590

591

ErrorCase kErrorCases[] = {

592

// String errors.

593

{ "'\\l' foo", true,

594

"0:2: Invalid escape sequence in string literal.\n" },

595

{ "'\\x' foo", true,

596

"0:3: Expected hex digits for escape sequence.\n" },

597

{ "'foo", false,

598

"0:4: String literals cannot cross line boundaries.\n" },

599

{ "'bar\nfoo", true,

600

"0:4: String literals cannot cross line boundaries.\n" },

601

602

// Integer errors.

603

{ "123foo", true,

604

"0:3: Need space between number and identifier.\n" },

605

606

// Hex/octal errors.

607

{ "0x foo", true,

608

"0:2: \"0x\" must be followed by hex digits.\n" },

609

{ "0541823 foo", true,

610

"0:4: Numbers starting with leading zero must be in octal.\n" },

611

{ "0x123z foo", true,

612

"0:5: Need space between number and identifier.\n" },

613

{ "0x123.4 foo", true,

614

"0:5: Hex and octal numbers must be integers.\n" },

615

{ "0123.4 foo", true,

616

"0:4: Hex and octal numbers must be integers.\n" },

617

618

// Float errors.

619

{ "1e foo", true,

620

"0:2: \"e\" must be followed by exponent.\n" },

621

{ "1e- foo", true,

622

"0:3: \"e\" must be followed by exponent.\n" },

623

{ "1.2.3 foo", true,

624

"0:3: Already saw decimal point or exponent; can't have another one.\n" },

625

{ "1e2.3 foo", true,

626

"0:3: Already saw decimal point or exponent; can't have another one.\n" },

627

{ "a.1 foo", true,

628

"0:1: Need space between identifier and decimal point.\n" },

629

// allow_f_after_float not enabled, so this should be an error.

630

{ "1.0f foo", true,

631

"0:3: Need space between number and identifier.\n" },

632

633

// Block comment errors.

634

{ "/*", false,

635

"0:2: End-of-file inside block comment.\n"

636

"0:0: Comment started here.\n"},

637

{ "/*/*/ foo", true,

638

"0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"},

639

640

// Control characters. Multiple consecutive control characters should only

641

// produce one error.

642

{ "\b foo", true,

643

"0:0: Invalid control characters encountered in text.\n" },

644

{ "\b\b foo", true,

645

"0:0: Invalid control characters encountered in text.\n" },

646

647

// Check that control characters at end of input don't result in an

648

// infinite loop.

649

{ "\b", false,

650

"0:0: Invalid control characters encountered in text.\n" },

651

652

// Check recovery from '\0'. We have to explicitly specify the length of

653

// these strings because otherwise the string constructor will just call

654

// strlen() which will see the first '\0' and think that is the end of the

655

// string.

656

{ string("\0foo", 4), true,

657

"0:0: Invalid control characters encountered in text.\n" },

658

{ string("\0\0foo", 5), true,

659

"0:0: Invalid control characters encountered in text.\n" },

660

};

661

662

TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {

663

// Set up the tokenizer.

664

TestInputStream input(kErrorCases_case.input.data(),

665

kErrorCases_case.input.size(),

666

kBlockSizes_case);

667

TestErrorCollector error_collector;

668

Tokenizer tokenizer(&input, &error_collector);

669

670

// Ignore all input, except remember if the last token was "foo".

671

bool last_was_foo = false;

672

while (tokenizer.Next()) {

673

last_was_foo = tokenizer.current().text == "foo";

674

}

675

676

// Check that the errors match what was expected.

677

EXPECT_EQ(error_collector.text_, kErrorCases_case.errors);

678

679

// If the error was recoverable, make sure we saw "foo" after it.

680

if (kErrorCases_case.recoverable) {

681

EXPECT_TRUE(last_was_foo);

682

}

683

}

684

685

// -------------------------------------------------------------------

686

687

TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {

688

string text = "foo bar";

689

TestInputStream input(text.data(), text.size(), kBlockSizes_case);

690

691

// Create a tokenizer, read one token, then destroy it.

692

{

693

TestErrorCollector error_collector;

694

Tokenizer tokenizer(&input, &error_collector);

695

696

tokenizer.Next();

697

}

698

699

// Only "foo" should have been read.

700

EXPECT_EQ(strlen("foo"), input.ByteCount());

701

}

702

703

} // namespace

704

} // namespace io

705

} // namespace protobuf

706

} // namespace google

Older »