2
* Copyright 2010 Google Inc.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
17
// Author: jmarantz@google.com (Joshua Marantz)
19
// Infrastructure for testing html parsing and rewriting.
21
#ifndef PAGESPEED_KERNEL_HTML_HTML_PARSE_TEST_BASE_H_
22
#define PAGESPEED_KERNEL_HTML_HTML_PARSE_TEST_BASE_H_
24
#include "pagespeed/kernel/base/basictypes.h"
25
#include "pagespeed/kernel/base/gtest.h"
26
#include "pagespeed/kernel/base/mock_message_handler.h"
27
#include "pagespeed/kernel/base/null_mutex.h"
28
#include "pagespeed/kernel/base/scoped_ptr.h"
29
#include "pagespeed/kernel/base/string.h"
30
#include "pagespeed/kernel/base/string_util.h"
31
#include "pagespeed/kernel/base/string_writer.h"
32
#include "pagespeed/kernel/html/html_parse.h"
33
#include "pagespeed/kernel/html/html_writer_filter.h"
35
namespace net_instaweb {
39
// Shared infrastructure for unit-testing the HTML parser.
40
class HtmlParseTestBaseNoAlloc : public testing::Test {
42
static const char kTestDomain[];
43
static const char kXhtmlDtd[]; // DOCTYPE string for claiming XHTML
45
HtmlParseTestBaseNoAlloc()
46
: message_handler_(new NullMutex),
47
write_to_string_(&output_buffer_),
48
added_filter_(false) {
50
virtual ~HtmlParseTestBaseNoAlloc();
52
// To make the tests more concise, we generally omit the <html>...</html>
53
// tags bracketing the input. The libxml parser will add those in
54
// if we don't have them. To avoid having that make the test data more
55
// verbose, we automatically add them in the test infrastructure, both
56
// for stimulus and expected response.
58
// This flag controls whether we also add <body>...</body> tags. In
59
// the case html_parse_test, we go ahead and add them in. In the
60
// case of the rewriter tests, we want to explicitly control/observe
61
// the head and the body so we don't add the body tags in
62
// automatically. So classes that derive from HtmlParseTestBase must
63
// override this variable to indicate which they prefer.
64
virtual bool AddBody() const = 0;
66
// If true, prepends "<html>\n" and appends "\n</html>" to input text
67
// prior to parsing it. This was originally done for consistency with
68
// libxml2 but that's long since been made irrelevant and we should probably
69
// just stop doing it. Adding the virtual function here should help us
70
// incrementally update tests & their gold results.
71
virtual bool AddHtmlTags() const { return true; }
73
// Set a doctype string (e.g. "<!doctype html>") to be inserted before the
74
// rest of the document (for the current test only). If none is set, it
75
// defaults to the empty string.
76
void SetDoctype(StringPiece directive) {
77
directive.CopyToString(&doctype_string_);
80
virtual GoogleString AddHtmlBody(StringPiece html) {
83
ret = AddBody() ? "<html><body>\n" : "<html>\n";
84
StrAppend(&ret, html, (AddBody() ? "\n</body></html>\n" : "\n</html>"));
86
html.CopyToString(&ret);
91
// Check that the output HTML is serialized to string-compare
92
// precisely with the input.
93
void ValidateNoChanges(StringPiece case_id, StringPiece html_input) {
94
ValidateExpected(case_id, html_input, html_input);
97
// Fail to ValidateNoChanges.
98
void ValidateNoChangesFail(StringPiece case_id, StringPiece html_input) {
99
ValidateExpectedFail(case_id, html_input, html_input);
103
SetupWriter(&html_writer_filter_);
106
void SetupWriter(scoped_ptr<HtmlWriterFilter>* html_writer_filter) {
107
output_buffer_.clear();
108
if (html_writer_filter->get() == NULL) {
109
html_writer_filter->reset(new HtmlWriterFilter(html_parse()));
110
(*html_writer_filter)->set_writer(&write_to_string_);
111
html_parse()->AddFilter(html_writer_filter->get());
115
// Parse html_input, the result is stored in output_buffer_.
116
void Parse(StringPiece case_id, StringPiece html_input) {
117
// HtmlParser needs a valid HTTP URL to evaluate relative paths,
118
// so we create a dummy URL.
119
GoogleString dummy_url = StrCat(kTestDomain, case_id, ".html");
120
ParseUrl(dummy_url, html_input);
123
// Parse given an explicit URL rather than an id to build URL around.
124
virtual void ParseUrl(StringPiece url, StringPiece html_input);
126
// Validate that the output HTML serializes as specified in
127
// 'expected', which might not be identical to the input.
128
// Also, returns true if result came out as expected.
129
bool ValidateExpected(StringPiece case_id,
130
StringPiece html_input,
131
StringPiece expected);
133
// Same as ValidateExpected, but with an explicit URL rather than an id.
134
bool ValidateExpectedUrl(StringPiece url,
135
StringPiece html_input,
136
StringPiece expected);
138
// Fail to ValidateExpected.
139
void ValidateExpectedFail(StringPiece case_id,
140
StringPiece html_input,
141
StringPiece expected);
143
virtual HtmlParse* html_parse() = 0;
145
const GoogleUrl& html_gurl() { return html_parse()->google_url(); }
147
MockMessageHandler message_handler_;
148
StringWriter write_to_string_;
149
GoogleString output_buffer_;
151
scoped_ptr<HtmlWriterFilter> html_writer_filter_;
152
GoogleString doctype_string_;
155
DISALLOW_COPY_AND_ASSIGN(HtmlParseTestBaseNoAlloc);
158
class HtmlParseTestBase : public HtmlParseTestBaseNoAlloc {
160
HtmlParseTestBase() : html_parse_(&message_handler_) {
163
virtual HtmlParse* html_parse() { return &html_parse_; }
165
HtmlParse html_parse_;
168
DISALLOW_COPY_AND_ASSIGN(HtmlParseTestBase);
171
} // namespace net_instaweb
173
#endif // PAGESPEED_KERNEL_HTML_HTML_PARSE_TEST_BASE_H_