~marcusbritanicus/newbreeze/master

« back to all changes in this revision

Viewing changes to Plugins/MarkDown/markdown.cpp

  • Committer: Marcus Britanicus
  • Date: 2016-02-24 22:14:39 UTC
  • Revision ID: git-v1:1725d245965cb9fb0ab4095b69b569bbc0b30224
NewBreeze v3.0.0. MarkDownPreview plugin now uses libsoldout. Thu Feb 25 03:44:39 IST 2016

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
        Copyright (c) 2009 by Chad Nelson
3
 
        Released under the MIT License.
4
 
        See the provided LICENSE.TXT file for details.
5
 
*/
6
 
 
7
 
#include "markdown.hpp"
8
 
#include "markdown-tokens.hpp"
9
 
 
10
 
#include <sstream>
11
 
#include <cassert>
12
 
 
13
 
#include <boost/regex.hpp>
14
 
#include <boost/lexical_cast.hpp>
15
 
#include <boost/algorithm/string/case_conv.hpp>
16
 
 
17
 
using std::cerr;
18
 
using std::endl;
19
 
 
20
 
using boost::optional;
21
 
using boost::none;
22
 
using markdown::TokenPtr;
23
 
using markdown::CTokenGroupIter;
24
 
 
25
 
namespace {
26
 
 
27
 
struct HtmlTagInfo {
28
 
        std::string tagName, extra;
29
 
        bool isClosingTag;
30
 
        size_t lengthOfToken; // In original string
31
 
};
32
 
 
33
 
const std::string cHtmlTokenSource("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))*? */? *))>");
34
 
const boost::regex cHtmlTokenExpression(cHtmlTokenSource),
35
 
        cStartHtmlTokenExpression("^"+cHtmlTokenSource),
36
 
        cOneHtmlTokenExpression("^"+cHtmlTokenSource+"$");
37
 
 
38
 
enum ParseHtmlTagFlags { cAlone, cStarts };
39
 
 
40
 
optional<HtmlTagInfo> parseHtmlTag(std::string::const_iterator begin,
41
 
        std::string::const_iterator end, ParseHtmlTagFlags flags)
42
 
{
43
 
        boost::smatch m;
44
 
        if (boost::regex_search(begin, end, m, (flags==cAlone ?
45
 
                cOneHtmlTokenExpression : cStartHtmlTokenExpression)))
46
 
        {
47
 
                HtmlTagInfo r;
48
 
                r.tagName=m[3];
49
 
                if (m[4].matched) r.extra=m[4];
50
 
                r.isClosingTag=(m[2].length()>0);
51
 
                r.lengthOfToken=m[0].length();
52
 
                return r;
53
 
        }
54
 
        return none;
55
 
}
56
 
 
57
 
markdown::TokenGroup parseInlineHtmlText(const std::string& src) {
58
 
        markdown::TokenGroup r;
59
 
        std::string::const_iterator prev=src.begin(), end=src.end();
60
 
        while (1) {
61
 
                boost::smatch m;
62
 
                if (boost::regex_search(prev, end, m, cHtmlTokenExpression)) {
63
 
                        if (prev!=m[0].first) {
64
 
                                //cerr << "  Non-tag (" << std::distance(prev, m[0].first) << "): " << std::string(prev, m[0].first) << endl;
65
 
                                r.push_back(TokenPtr(new markdown::token::InlineHtmlContents(std::string(prev, m[0].first))));
66
 
                        }
67
 
                        //cerr << "  Tag: " << m[1] << endl;
68
 
                        r.push_back(TokenPtr(new markdown::token::HtmlTag(m[1])));
69
 
                        prev=m[0].second;
70
 
                } else {
71
 
                        std::string eol;
72
 
                        if (prev!=end) {
73
 
                                eol=std::string(prev, end);
74
 
                                //cerr << "  Non-tag: " << eol << endl;
75
 
                        }
76
 
                        eol+='\n';
77
 
                        r.push_back(TokenPtr(new markdown::token::InlineHtmlContents(eol)));
78
 
                        break;
79
 
                }
80
 
        }
81
 
        return r;
82
 
}
83
 
 
84
 
bool isHtmlCommentStart(std::string::const_iterator begin,
85
 
        std::string::const_iterator end)
86
 
{
87
 
        // It can't be a single-line comment, those will already have been parsed
88
 
        // by isBlankLine.
89
 
        static const boost::regex cExpression("^<!--");
90
 
        return boost::regex_search(begin, end, cExpression);
91
 
}
92
 
 
93
 
bool isHtmlCommentEnd(std::string::const_iterator begin,
94
 
        std::string::const_iterator end)
95
 
{
96
 
        static const boost::regex cExpression(".*-- *>$");
97
 
        return boost::regex_match(begin, end, cExpression);
98
 
}
99
 
 
100
 
bool isBlankLine(const std::string& line) {
101
 
        static const boost::regex cExpression(" {0,3}(<--(.*)-- *> *)* *");
102
 
        return boost::regex_match(line, cExpression);
103
 
}
104
 
 
105
 
optional<TokenPtr> parseInlineHtml(CTokenGroupIter& i, CTokenGroupIter end) {
106
 
        // Preconditions: Previous line was blank, or this is the first line.
107
 
        if ((*i)->text()) {
108
 
                const std::string& line(*(*i)->text());
109
 
 
110
 
                bool tag=false, comment=false;
111
 
                optional<HtmlTagInfo> tagInfo=parseHtmlTag(line.begin(), line.end(), cStarts);
112
 
                if (tagInfo && markdown::token::isValidTag(tagInfo->tagName)>1) {
113
 
                        tag=true;
114
 
                } else if (isHtmlCommentStart(line.begin(), line.end())) {
115
 
                        comment=true;
116
 
                }
117
 
 
118
 
                if (tag) {
119
 
                        // Block continues until an HTML tag (alone) on a line followed by a
120
 
                        // blank line.
121
 
                        markdown::TokenGroup contents;
122
 
                        CTokenGroupIter firstLine=i, prevLine=i;
123
 
                        size_t lines=0;
124
 
 
125
 
                        bool done=false;
126
 
                        do {
127
 
                                // We encode HTML tags so that their contents gets properly
128
 
                                // handled -- i.e. "<div style=">"/>" becomes <div style="&gt;"/>
129
 
                                if ((*i)->text()) {
130
 
                                        markdown::TokenGroup t=parseInlineHtmlText(*(*i)->text());
131
 
                                        contents.splice(contents.end(), t);
132
 
                                } else contents.push_back(*i);
133
 
 
134
 
                                prevLine=i;
135
 
                                ++i;
136
 
                                ++lines;
137
 
 
138
 
                                if (i!=end && (*i)->isBlankLine() && (*prevLine)->text()) {
139
 
                                        if (prevLine==firstLine) {
140
 
                                                done=true;
141
 
                                        } else {
142
 
                                                const std::string& text(*(*prevLine)->text());
143
 
                                                if (parseHtmlTag(text.begin(), text.end(), cAlone)) done=true;
144
 
                                        }
145
 
                                }
146
 
                        } while (i!=end && !done);
147
 
 
148
 
                        if (lines>1 || markdown::token::isValidTag(tagInfo->tagName, true)>1) {
149
 
                                i=prevLine;
150
 
                                return TokenPtr(new markdown::token::InlineHtmlBlock(contents));
151
 
                        } else {
152
 
                                // Single-line HTML "blocks" whose initial tags are span-tags
153
 
                                // don't qualify as inline HTML.
154
 
                                i=firstLine;
155
 
                                return none;
156
 
                        }
157
 
                } else if (comment) {
158
 
                        // Comment continues until a closing tag is found; at present, it
159
 
                        // also has to be the last thing on the line, and has to be
160
 
                        // immediately followed by a blank line too.
161
 
                        markdown::TokenGroup contents;
162
 
                        CTokenGroupIter firstLine=i, prevLine=i;
163
 
 
164
 
                        bool done=false;
165
 
                        do {
166
 
                                if ((*i)->text()) contents.push_back(TokenPtr(new markdown::token::InlineHtmlComment(*(*i)->text()+'\n')));
167
 
                                else contents.push_back(*i);
168
 
 
169
 
                                prevLine=i;
170
 
                                ++i;
171
 
 
172
 
                                if (i!=end && (*i)->isBlankLine() && (*prevLine)->text()) {
173
 
                                        if (prevLine==firstLine) {
174
 
                                                done=true;
175
 
                                        } else {
176
 
                                                const std::string& text(*(*prevLine)->text());
177
 
                                                if (isHtmlCommentEnd(text.begin(), text.end())) done=true;
178
 
                                        }
179
 
                                }
180
 
                        } while (i!=end && !done);
181
 
                        i=prevLine;
182
 
                        return TokenPtr(new markdown::token::InlineHtmlBlock(contents));
183
 
                }
184
 
        }
185
 
 
186
 
        return none;
187
 
}
188
 
 
189
 
optional<std::string> isCodeBlockLine(CTokenGroupIter& i, CTokenGroupIter end) {
190
 
        if ((*i)->isBlankLine()) {
191
 
                // If we get here, we're already in a code block.
192
 
                ++i;
193
 
                if (i!=end) {
194
 
                        optional<std::string> r=isCodeBlockLine(i, end);
195
 
                        if (r) return std::string("\n"+*r);
196
 
                }
197
 
                --i;
198
 
        } else if ((*i)->text() && (*i)->canContainMarkup()) {
199
 
                const std::string& line(*(*i)->text());
200
 
                if (line.length()>=4) {
201
 
                        std::string::const_iterator si=line.begin(), sie=si+4;
202
 
                        while (si!=sie && *si==' ') ++si;
203
 
                        if (si==sie) {
204
 
                                ++i;
205
 
                                return std::string(si, line.end());
206
 
                        }
207
 
                }
208
 
        }
209
 
        return none;
210
 
}
211
 
 
212
 
optional<TokenPtr> parseCodeBlock(CTokenGroupIter& i, CTokenGroupIter end) {
213
 
        if (!(*i)->isBlankLine()) {
214
 
                optional<std::string> contents=isCodeBlockLine(i, end);
215
 
                if (contents) {
216
 
                        std::ostringstream out;
217
 
                        out << *contents << '\n';
218
 
                        while (i!=end) {
219
 
                                contents=isCodeBlockLine(i, end);
220
 
                                if (contents) out << *contents << '\n';
221
 
                                else break;
222
 
                        }
223
 
                        return TokenPtr(new markdown::token::CodeBlock(out.str()));
224
 
                }
225
 
        }
226
 
        return none;
227
 
}
228
 
 
229
 
 
230
 
 
231
 
size_t countQuoteLevel(const std::string& prefixString) {
232
 
        size_t r=0;
233
 
        for (std::string::const_iterator qi=prefixString.begin(),
234
 
                qie=prefixString.end(); qi!=qie; ++qi)
235
 
                        if (*qi=='>') ++r;
236
 
        return r;
237
 
}
238
 
 
239
 
optional<TokenPtr> parseBlockQuote(CTokenGroupIter& i, CTokenGroupIter end) {
240
 
        static const boost::regex cBlockQuoteExpression("^((?: {0,3}>)+) (.*)$");
241
 
        // Useful captures: 1=prefix, 2=content
242
 
 
243
 
        if (!(*i)->isBlankLine() && (*i)->text() && (*i)->canContainMarkup()) {
244
 
                const std::string& line(*(*i)->text());
245
 
                boost::smatch m;
246
 
                if (boost::regex_match(line, m, cBlockQuoteExpression)) {
247
 
                        size_t quoteLevel=countQuoteLevel(m[1]);
248
 
                        boost::regex continuationExpression=boost::regex("^((?: {0,3}>){"+boost::lexical_cast<std::string>(quoteLevel)+"}) ?(.*)$");
249
 
 
250
 
                        markdown::TokenGroup subTokens;
251
 
                        subTokens.push_back(TokenPtr(new markdown::token::RawText(m[2])));
252
 
 
253
 
                        // The next line can be a continuation of this quote (with or
254
 
                        // without the prefix string) or a blank line. Blank lines are
255
 
                        // treated as part of this quote if the following line is a
256
 
                        // properly-prefixed quote line too, otherwise they terminate the
257
 
                        // quote.
258
 
                        ++i;
259
 
                        while (i!=end) {
260
 
                                if ((*i)->isBlankLine()) {
261
 
                                        CTokenGroupIter ii=i;
262
 
                                        ++ii;
263
 
                                        if (ii==end) {
264
 
                                                i=ii;
265
 
                                                break;
266
 
                                        } else {
267
 
                                                const std::string& line(*(*ii)->text());
268
 
                                                if (boost::regex_match(line, m, continuationExpression)) {
269
 
                                                        if (m[1].matched && m[1].length()>0) {
270
 
                                                                i=++ii;
271
 
                                                                subTokens.push_back(TokenPtr(new markdown::token::BlankLine));
272
 
                                                                subTokens.push_back(TokenPtr(new markdown::token::RawText(m[2])));
273
 
                                                        } else break;
274
 
                                                } else break;
275
 
                                        }
276
 
                                } else {
277
 
                                        const std::string& line(*(*i)->text());
278
 
                                        if (boost::regex_match(line, m, continuationExpression)) {
279
 
                                                assert(m[2].matched);
280
 
                                                if (!isBlankLine(m[2])) subTokens.push_back(TokenPtr(new markdown::token::RawText(m[2])));
281
 
                                                else subTokens.push_back(TokenPtr(new markdown::token::BlankLine(m[2])));
282
 
                                                ++i;
283
 
                                        } else break;
284
 
                                }
285
 
                        }
286
 
 
287
 
                        return TokenPtr(new markdown::token::BlockQuote(subTokens));
288
 
                }
289
 
        }
290
 
        return none;
291
 
}
292
 
 
293
 
optional<TokenPtr> parseListBlock(CTokenGroupIter& i, CTokenGroupIter end, bool sub=false) {
294
 
        static const boost::regex cUnorderedListExpression("^( *)([*+-]) +([^*-].*)$");
295
 
        static const boost::regex cOrderedListExpression("^( *)([0-9]+)\\. +(.*)$");
296
 
 
297
 
        enum ListType { cNone, cUnordered, cOrdered };
298
 
        ListType type=cNone;
299
 
        if (!(*i)->isBlankLine() && (*i)->text() && (*i)->canContainMarkup()) {
300
 
                boost::regex nextItemExpression, startSublistExpression;
301
 
                size_t indent=0;
302
 
 
303
 
                const std::string& line(*(*i)->text());
304
 
 
305
 
                //cerr << "IsList? " << line << endl;
306
 
 
307
 
                markdown::TokenGroup subTokens, subItemTokens;
308
 
 
309
 
                boost::smatch m;
310
 
                if (boost::regex_match(line, m, cUnorderedListExpression)) {
311
 
                        indent=m[1].length();
312
 
                        if (sub || indent<4) {
313
 
                                type=cUnordered;
314
 
                                char startChar=*m[2].first;
315
 
                                subItemTokens.push_back(TokenPtr(new markdown::token::RawText(m[3])));
316
 
 
317
 
                                std::ostringstream next;
318
 
                                next << "^" << std::string(indent, ' ') << "\\" << startChar << " +([^*-].*)$";
319
 
                                nextItemExpression=next.str();
320
 
                        }
321
 
                } else if (boost::regex_match(line, m, cOrderedListExpression)) {
322
 
                        indent=m[1].length();
323
 
                        if (sub || indent<4) {
324
 
                                type=cOrdered;
325
 
                                subItemTokens.push_back(TokenPtr(new markdown::token::RawText(m[3])));
326
 
 
327
 
                                std::ostringstream next;
328
 
                                next << "^" << std::string(indent, ' ') << "[0-9]+\\. +(.*)$";
329
 
                                nextItemExpression=next.str();
330
 
                        }
331
 
                }
332
 
 
333
 
                if (type!=cNone) {
334
 
                        CTokenGroupIter originalI=i;
335
 
                        size_t itemCount=1;
336
 
                        std::ostringstream sub;
337
 
                        sub << "^" << std::string(indent, ' ') << " +(([*+-])|([0-9]+\\.)) +.*$";
338
 
                        startSublistExpression=sub.str();
339
 
 
340
 
                        // There are several options for the next line. It's another item in
341
 
                        // this list (in which case this one is done); it's a continuation
342
 
                        // of this line (collect it and keep going); it's the first item in
343
 
                        // a sub-list (call this function recursively to collect it), it's
344
 
                        // the next item in the parent list (this one is ended); or it's
345
 
                        // blank.
346
 
                        //
347
 
                        // A blank line requires looking ahead. If the next line is an item
348
 
                        // for this list, then switch this list into paragraph-items mode
349
 
                        // and continue processing. If it's indented by four or more spaces
350
 
                        // (more than the list itself), then it's another continuation of
351
 
                        // the current item. Otherwise it's either a new paragraph (and this
352
 
                        // list is ended) or the beginning of a sub-list.
353
 
                        static const boost::regex cContinuedItemExpression("^ *([^ ].*)$");
354
 
 
355
 
                        boost::regex continuedAfterBlankLineExpression("^ {"+
356
 
                                boost::lexical_cast<std::string>(indent+4)+"}([^ ].*)$");
357
 
                        boost::regex codeBlockAfterBlankLineExpression("^ {"+
358
 
                                boost::lexical_cast<std::string>(indent+8)+"}(.*)$");
359
 
 
360
 
                        enum NextItemType { cUnknown, cEndOfList, cAnotherItem };
361
 
                        NextItemType nextItem=cUnknown;
362
 
                        bool setParagraphMode=false;
363
 
 
364
 
                        ++i;
365
 
                        while (i!=end) {
366
 
                                if ((*i)->isBlankLine()) {
367
 
                                        CTokenGroupIter ii=i;
368
 
                                        ++ii;
369
 
                                        if (ii==end) {
370
 
                                                i=ii;
371
 
                                                nextItem=cEndOfList;
372
 
                                        } else if ((*ii)->text()) {
373
 
                                                const std::string& line(*(*ii)->text());
374
 
                                                if (boost::regex_match(line, startSublistExpression)) {
375
 
                                                        setParagraphMode=true;
376
 
                                                        ++itemCount;
377
 
                                                        i=ii;
378
 
                                                        optional<TokenPtr> p=parseListBlock(i, end, true);
379
 
                                                        assert(p);
380
 
                                                        subItemTokens.push_back(*p);
381
 
                                                        continue;
382
 
                                                } else if (boost::regex_match(line, m, nextItemExpression)) {
383
 
                                                        setParagraphMode=true;
384
 
                                                        i=ii;
385
 
                                                        nextItem=cAnotherItem;
386
 
                                                } else if (boost::regex_match(line, m, continuedAfterBlankLineExpression)) {
387
 
                                                        assert(m[1].matched);
388
 
                                                        subItemTokens.push_back(TokenPtr(new markdown::token::BlankLine()));
389
 
                                                        subItemTokens.push_back(TokenPtr(new markdown::token::RawText(m[1])));
390
 
                                                        i=++ii;
391
 
                                                        continue;
392
 
                                                } else if (boost::regex_match(line, m, codeBlockAfterBlankLineExpression)) {
393
 
                                                        setParagraphMode=true;
394
 
                                                        ++itemCount;
395
 
                                                        assert(m[1].matched);
396
 
                                                        subItemTokens.push_back(TokenPtr(new markdown::token::BlankLine()));
397
 
 
398
 
                                                        std::string codeBlock=m[1]+'\n';
399
 
                                                        ++ii;
400
 
                                                        while (ii!=end) {
401
 
                                                                if ((*ii)->isBlankLine()) {
402
 
                                                                        CTokenGroupIter iii=ii;
403
 
                                                                        ++iii;
404
 
                                                                        const std::string& nextLine(*(*iii)->text());
405
 
                                                                        if (boost::regex_match(nextLine, m, codeBlockAfterBlankLineExpression)) {
406
 
                                                                                codeBlock+='\n'+m[1]+'\n';
407
 
                                                                                ii=iii;
408
 
                                                                        } else break;
409
 
                                                                } else if ((*ii)->text()) {
410
 
                                                                        const std::string& line(*(*ii)->text());
411
 
                                                                        if (boost::regex_match(line, m, codeBlockAfterBlankLineExpression)) {
412
 
                                                                                codeBlock+=m[1]+'\n';
413
 
                                                                        } else break;
414
 
                                                                } else break;
415
 
                                                                ++ii;
416
 
                                                        }
417
 
 
418
 
                                                        subItemTokens.push_back(TokenPtr(new markdown::token::CodeBlock(codeBlock)));
419
 
                                                        i=ii;
420
 
                                                        continue;
421
 
                                                } else {
422
 
                                                        nextItem=cEndOfList;
423
 
                                                }
424
 
                                        } else break;
425
 
                                } else if ((*i)->text()) {
426
 
                                        const std::string& line(*(*i)->text());
427
 
                                        if (boost::regex_match(line, startSublistExpression)) {
428
 
                                                ++itemCount;
429
 
                                                optional<TokenPtr> p=parseListBlock(i, end, true);
430
 
                                                assert(p);
431
 
                                                subItemTokens.push_back(*p);
432
 
                                                continue;
433
 
                                        } else if (boost::regex_match(line, m, nextItemExpression)) {
434
 
                                                nextItem=cAnotherItem;
435
 
                                        } else {
436
 
                                                if (boost::regex_match(line, m, cUnorderedListExpression)
437
 
                                                        || boost::regex_match(line, m, cOrderedListExpression))
438
 
                                                {
439
 
                                                        // Belongs to the parent list
440
 
                                                        nextItem=cEndOfList;
441
 
                                                } else {
442
 
                                                        boost::regex_match(line, m, cContinuedItemExpression);
443
 
                                                        assert(m[1].matched);
444
 
                                                        subItemTokens.push_back(TokenPtr(new markdown::token::RawText(m[1])));
445
 
                                                        ++i;
446
 
                                                        continue;
447
 
                                                }
448
 
                                        }
449
 
                                } else nextItem=cEndOfList;
450
 
 
451
 
                                if (!subItemTokens.empty()) {
452
 
                                        subTokens.push_back(TokenPtr(new markdown::token::ListItem(subItemTokens)));
453
 
                                        subItemTokens.clear();
454
 
                                }
455
 
 
456
 
                                assert(nextItem!=cUnknown);
457
 
                                if (nextItem==cAnotherItem) {
458
 
                                        subItemTokens.push_back(TokenPtr(new markdown::token::RawText(m[1])));
459
 
                                        ++itemCount;
460
 
                                        ++i;
461
 
                                } else { // nextItem==cEndOfList
462
 
                                        break;
463
 
                                }
464
 
                        }
465
 
 
466
 
                        // In case we hit the end with an unterminated item...
467
 
                        if (!subItemTokens.empty()) {
468
 
                                subTokens.push_back(TokenPtr(new markdown::token::ListItem(subItemTokens)));
469
 
                                subItemTokens.clear();
470
 
                        }
471
 
 
472
 
                        if (itemCount>1 || indent!=0) {
473
 
                                if (type==cUnordered) {
474
 
                                        return TokenPtr(new markdown::token::UnorderedList(subTokens, setParagraphMode));
475
 
                                } else {
476
 
                                        return TokenPtr(new markdown::token::OrderedList(subTokens, setParagraphMode));
477
 
                                }
478
 
                        } else {
479
 
                                // It looked like a list, but turned out to be a false alarm.
480
 
                                i=originalI;
481
 
                                return none;
482
 
                        }
483
 
                }
484
 
        }
485
 
        return none;
486
 
}
487
 
 
488
 
bool parseReference(CTokenGroupIter& i, CTokenGroupIter end, markdown::LinkIds &idTable) {
489
 
        if ((*i)->text()) {
490
 
                static const boost::regex cReference("^ {0,3}\\[(.+)\\]: +<?([^ >]+)>?(?: *(?:('|\")(.*)\\3)|(?:\\((.*)\\)))?$");
491
 
                // Useful captures: 1=id, 2=url, 4/5=title
492
 
 
493
 
                const std::string& line1(*(*i)->text());
494
 
                boost::smatch m;
495
 
                if (boost::regex_match(line1, m, cReference)) {
496
 
                        std::string id(m[1]), url(m[2]), title;
497
 
                        if (m[4].matched) title=m[4];
498
 
                        else if (m[5].matched) title=m[5];
499
 
                        else {
500
 
                                CTokenGroupIter ii=i;
501
 
                                ++ii;
502
 
                                if (ii!=end && (*ii)->text()) {
503
 
                                        // It could be on the next line
504
 
                                        static const boost::regex cSeparateTitle("^ *(?:(?:('|\")(.*)\\1)|(?:\\((.*)\\))) *$");
505
 
                                        // Useful Captures: 2/3=title
506
 
 
507
 
                                        const std::string& line2(*(*ii)->text());
508
 
                                        if (boost::regex_match(line2, m, cSeparateTitle)) {
509
 
                                                ++i;
510
 
                                                title=(m[2].matched ? m[2] : m[3]);
511
 
                                        }
512
 
                                }
513
 
                        }
514
 
 
515
 
                        idTable.add(id, url, title);
516
 
                        return true;
517
 
                }
518
 
        }
519
 
        return false;
520
 
}
521
 
 
522
 
void flushParagraph(std::string& paragraphText, markdown::TokenGroup&
523
 
        paragraphTokens, markdown::TokenGroup& finalTokens, bool noParagraphs)
524
 
{
525
 
        if (!paragraphText.empty()) {
526
 
                paragraphTokens.push_back(TokenPtr(new markdown::token::RawText(paragraphText)));
527
 
                paragraphText.clear();
528
 
        }
529
 
 
530
 
        if (!paragraphTokens.empty()) {
531
 
                if (noParagraphs) {
532
 
                        if (paragraphTokens.size()>1) {
533
 
                                finalTokens.push_back(TokenPtr(new markdown::token::Container(paragraphTokens)));
534
 
                        } else finalTokens.push_back(*paragraphTokens.begin());
535
 
                } else finalTokens.push_back(TokenPtr(new markdown::token::Paragraph(paragraphTokens)));
536
 
                paragraphTokens.clear();
537
 
        }
538
 
}
539
 
 
540
 
optional<TokenPtr> parseHeader(CTokenGroupIter& i, CTokenGroupIter end) {
541
 
        if (!(*i)->isBlankLine() && (*i)->text() && (*i)->canContainMarkup()) {
542
 
                // Hash-mark type
543
 
                static const boost::regex cHashHeaders("^(#{1,6}) +(.*?) *#*$");
544
 
                const std::string& line=*(*i)->text();
545
 
                boost::smatch m;
546
 
                if (boost::regex_match(line, m, cHashHeaders))
547
 
                        return TokenPtr(new markdown::token::Header(m[1].length(), m[2]));
548
 
 
549
 
                // Underlined type
550
 
                CTokenGroupIter ii=i;
551
 
                ++ii;
552
 
                if (ii!=end && !(*ii)->isBlankLine() && (*ii)->text() && (*ii)->canContainMarkup()) {
553
 
                        static const boost::regex cUnderlinedHeaders("^([-=])\\1*$");
554
 
                        const std::string& line=*(*ii)->text();
555
 
                        if (boost::regex_match(line, m, cUnderlinedHeaders)) {
556
 
                                char typeChar=std::string(m[1])[0];
557
 
                                TokenPtr p=TokenPtr(new markdown::token::Header((typeChar=='='
558
 
                                        ? 1 : 2), *(*i)->text()));
559
 
                                i=ii;
560
 
                                return p;
561
 
                        }
562
 
                }
563
 
        }
564
 
        return none;
565
 
}
566
 
 
567
 
optional<TokenPtr> parseHorizontalRule(CTokenGroupIter& i, CTokenGroupIter end) {
568
 
        if (!(*i)->isBlankLine() && (*i)->text() && (*i)->canContainMarkup()) {
569
 
                static const boost::regex cHorizontalRules("^ {0,3}((?:-|\\*|_) *){3,}$");
570
 
                const std::string& line=*(*i)->text();
571
 
                if (boost::regex_match(line, cHorizontalRules)) {
572
 
                        return TokenPtr(new markdown::token::HtmlTag("hr/"));
573
 
                }
574
 
        }
575
 
        return none;
576
 
}
577
 
 
578
 
} // namespace
579
 
 
580
 
 
581
 
 
582
 
namespace markdown {
583
 
 
584
 
optional<LinkIds::Target> LinkIds::find(const std::string& id) const {
585
 
        Table::const_iterator i=mTable.find(_scrubKey(id));
586
 
        if (i!=mTable.end()) return i->second;
587
 
        else return none;
588
 
}
589
 
 
590
 
void LinkIds::add(const std::string& id, const std::string& url, const
591
 
        std::string& title)
592
 
{
593
 
        mTable.insert(std::make_pair(_scrubKey(id), Target(url, title)));
594
 
}
595
 
 
596
 
std::string LinkIds::_scrubKey(std::string str) {
597
 
        boost::algorithm::to_lower(str);
598
 
        return str;
599
 
}
600
 
 
601
 
 
602
 
 
603
 
const size_t Document::cSpacesPerInitialTab=4; // Required by Markdown format
604
 
const size_t Document::cDefaultSpacesPerTab=cSpacesPerInitialTab;
605
 
 
606
 
Document::Document(size_t spacesPerTab): cSpacesPerTab(spacesPerTab),
607
 
        mTokenContainer(new token::Container), mIdTable(new LinkIds),
608
 
        mProcessed(false)
609
 
{
610
 
        // This space deliberately blank ;-)
611
 
}
612
 
 
613
 
Document::Document(std::istream& in, size_t spacesPerTab):
614
 
        cSpacesPerTab(spacesPerTab), mTokenContainer(new token::Container),
615
 
        mIdTable(new LinkIds), mProcessed(false)
616
 
{
617
 
        read(in);
618
 
}
619
 
 
620
 
Document::~Document() {
621
 
        delete mIdTable;
622
 
}
623
 
 
624
 
bool Document::read(const std::string& src) {
625
 
        std::istringstream in(src);
626
 
        return read(in);
627
 
}
628
 
 
629
 
bool Document::_getline(std::istream& in, std::string& line) {
630
 
        // Handles \n, \r, and \r\n (and even \n\r) on any system. Also does tab-
631
 
        // expansion, since this is the most efficient place for it.
632
 
        line.clear();
633
 
 
634
 
        bool initialWhitespace=true;
635
 
        char c;
636
 
        while (in.get(c)) {
637
 
                if (c=='\r') {
638
 
                        if ((in.get(c)) && c!='\n') in.unget();
639
 
                        return true;
640
 
                } else if (c=='\n') {
641
 
                        if ((in.get(c)) && c!='\r') in.unget();
642
 
                        return true;
643
 
                } else if (c=='\t') {
644
 
                        size_t convert=(initialWhitespace ? cSpacesPerInitialTab :
645
 
                                cSpacesPerTab);
646
 
                        line+=std::string(convert-(line.length()%convert), ' ');
647
 
                } else {
648
 
                        line.push_back(c);
649
 
                        if (c!=' ') initialWhitespace=false;
650
 
                }
651
 
        }
652
 
        return !line.empty();
653
 
}
654
 
 
655
 
bool Document::read(std::istream& in) {
656
 
        if (mProcessed) return false;
657
 
 
658
 
        token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get());
659
 
        assert(tokens!=0);
660
 
 
661
 
        std::string line;
662
 
        TokenGroup tgt;
663
 
        while (_getline(in, line)) {
664
 
                if (isBlankLine(line)) {
665
 
                        tgt.push_back(TokenPtr(new token::BlankLine(line)));
666
 
                } else {
667
 
                        tgt.push_back(TokenPtr(new token::RawText(line)));
668
 
                }
669
 
        }
670
 
        tokens->appendSubtokens(tgt);
671
 
 
672
 
        return true;
673
 
}
674
 
 
675
 
void Document::write(std::ostream& out) {
676
 
        _process();
677
 
        mTokenContainer->writeAsHtml(out);
678
 
}
679
 
 
680
 
void Document::writeTokens(std::ostream& out) {
681
 
        _process();
682
 
        mTokenContainer->writeToken(0, out);
683
 
}
684
 
 
685
 
std::string Document::asHtml() {
686
 
 
687
 
        _process();
688
 
 
689
 
        std::stringstream ss( std::ios_base::in | std::ios_base::out );
690
 
 
691
 
        mTokenContainer->writeAsHtml(ss);
692
 
        int size = ss.tellp();
693
 
        ss.seekp( 0 );
694
 
 
695
 
        char *buffer = new char[ size + 1 ];
696
 
        ss.read( buffer, size );
697
 
 
698
 
        return std::string( buffer );
699
 
}
700
 
 
701
 
void Document::_process() {
702
 
        if (!mProcessed) {
703
 
                _mergeMultilineHtmlTags();
704
 
                _processInlineHtmlAndReferences();
705
 
                _processBlocksItems(mTokenContainer);
706
 
                _processParagraphLines(mTokenContainer);
707
 
                mTokenContainer->processSpanElements(*mIdTable);
708
 
                mProcessed=true;
709
 
        }
710
 
}
711
 
 
712
 
void Document::_mergeMultilineHtmlTags() {
713
 
        static const boost::regex cHtmlTokenStart("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\5))*? */? *))$");
714
 
        static const boost::regex cHtmlTokenEnd("^ *((?:( +[a-zA-Z0-9]+?(?: ?= ?(\"|').*?\\3))*? */? *))>");
715
 
 
716
 
        TokenGroup processed;
717
 
 
718
 
        token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get());
719
 
        assert(tokens!=0);
720
 
 
721
 
        for (TokenGroup::const_iterator i=tokens->subTokens().begin(),
722
 
                ie=tokens->subTokens().end(); i!=ie; ++i)
723
 
        {
724
 
                if ((*i)->text() && boost::regex_match(*(*i)->text(), cHtmlTokenStart)) {
725
 
                        TokenGroup::const_iterator i2=i;
726
 
                        ++i2;
727
 
                        if (i2!=tokens->subTokens().end() && (*i2)->text() &&
728
 
                                boost::regex_match(*(*i2)->text(), cHtmlTokenEnd))
729
 
                        {
730
 
                                processed.push_back(TokenPtr(new markdown::token::RawText(*(*i)->text()+' '+*(*i2)->text())));
731
 
                                ++i;
732
 
                                continue;
733
 
                        }
734
 
                }
735
 
                processed.push_back(*i);
736
 
        }
737
 
        tokens->swapSubtokens(processed);
738
 
}
739
 
 
740
 
void Document::_processInlineHtmlAndReferences() {
741
 
        TokenGroup processed;
742
 
 
743
 
        token::Container *tokens=dynamic_cast<token::Container*>(mTokenContainer.get());
744
 
        assert(tokens!=0);
745
 
 
746
 
        for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
747
 
                iie=tokens->subTokens().end(); ii!=iie; ++ii)
748
 
        {
749
 
                if ((*ii)->text()) {
750
 
                        if (processed.empty() || processed.back()->isBlankLine()) {
751
 
                                optional<TokenPtr> inlineHtml=parseInlineHtml(ii, iie);
752
 
                                if (inlineHtml) {
753
 
                                        processed.push_back(*inlineHtml);
754
 
                                        if (ii==iie) break;
755
 
                                        continue;
756
 
                                }
757
 
                        }
758
 
 
759
 
                        if (parseReference(ii, iie, *mIdTable)) {
760
 
                                if (ii==iie) break;
761
 
                                continue;
762
 
                        }
763
 
 
764
 
                        // If it gets down here, just store it in its current (raw text)
765
 
                        // form. We'll group the raw text lines into paragraphs in a
766
 
                        // later pass, since we can't easily tell where paragraphs
767
 
                        // end until then.
768
 
                }
769
 
                processed.push_back(*ii);
770
 
        }
771
 
        tokens->swapSubtokens(processed);
772
 
}
773
 
 
774
 
void Document::_processBlocksItems(TokenPtr inTokenContainer) {
775
 
        if (!inTokenContainer->isContainer()) return;
776
 
 
777
 
        token::Container *tokens=dynamic_cast<token::Container*>(inTokenContainer.get());
778
 
        assert(tokens!=0);
779
 
 
780
 
        TokenGroup processed;
781
 
 
782
 
        for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
783
 
                iie=tokens->subTokens().end(); ii!=iie; ++ii)
784
 
        {
785
 
                if ((*ii)->text()) {
786
 
                        optional<TokenPtr> subitem;
787
 
                        if (!subitem) subitem=parseHeader(ii, iie);
788
 
                        if (!subitem) subitem=parseHorizontalRule(ii, iie);
789
 
                        if (!subitem) subitem=parseListBlock(ii, iie);
790
 
                        if (!subitem) subitem=parseBlockQuote(ii, iie);
791
 
                        if (!subitem) subitem=parseCodeBlock(ii, iie);
792
 
 
793
 
                        if (subitem) {
794
 
                                _processBlocksItems(*subitem);
795
 
                                processed.push_back(*subitem);
796
 
                                if (ii==iie) break;
797
 
                                continue;
798
 
                        } else processed.push_back(*ii);
799
 
                } else if ((*ii)->isContainer()) {
800
 
                        _processBlocksItems(*ii);
801
 
                        processed.push_back(*ii);
802
 
                }
803
 
        }
804
 
        tokens->swapSubtokens(processed);
805
 
}
806
 
 
807
 
void Document::_processParagraphLines(TokenPtr inTokenContainer) {
808
 
        token::Container *tokens=dynamic_cast<token::Container*>(inTokenContainer.get());
809
 
        assert(tokens!=0);
810
 
 
811
 
        bool noPara=tokens->inhibitParagraphs();
812
 
        for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
813
 
                iie=tokens->subTokens().end(); ii!=iie; ++ii)
814
 
                        if ((*ii)->isContainer()) _processParagraphLines(*ii);
815
 
 
816
 
        TokenGroup processed;
817
 
        std::string paragraphText;
818
 
        TokenGroup paragraphTokens;
819
 
        for (TokenGroup::const_iterator ii=tokens->subTokens().begin(),
820
 
                iie=tokens->subTokens().end(); ii!=iie; ++ii)
821
 
        {
822
 
                if ((*ii)->text() && (*ii)->canContainMarkup() && !(*ii)->inhibitParagraphs()) {
823
 
                        static const boost::regex cExpression("^(.*)  $");
824
 
                        if (!paragraphText.empty()) paragraphText+=" ";
825
 
 
826
 
                        boost::smatch m;
827
 
                        if (boost::regex_match(*(*ii)->text(), m, cExpression)) {
828
 
                                paragraphText += m[1];
829
 
                                flushParagraph(paragraphText, paragraphTokens, processed, noPara);
830
 
                                processed.push_back(TokenPtr(new markdown::token::HtmlTag("br/")));
831
 
                        } else paragraphText += *(*ii)->text();
832
 
                } else {
833
 
                        flushParagraph(paragraphText, paragraphTokens, processed, noPara);
834
 
                        processed.push_back(*ii);
835
 
                }
836
 
        }
837
 
 
838
 
        // Make sure the last paragraph is properly flushed too.
839
 
        flushParagraph(paragraphText, paragraphTokens, processed, noPara);
840
 
 
841
 
        tokens->swapSubtokens(processed);
842
 
}
843
 
 
844
 
} // namespace markdown