1
/******************************************************************
4
* A simple Markdown lexer for scintilla.
6
* Includes highlighting for some extra features from the
7
* Pandoc implementation; strikeout, using '#.' as a default
8
* ordered list item marker, and delimited code blocks.
12
* Standard indented code blocks are not highlighted at all,
13
* as it would conflict with other indentation schemes. Use
14
* delimited code blocks for blanket highlighting of an
15
* entire code block. Embedded HTML is not highlighted either.
16
* Blanket HTML highlighting has issues, because some Markdown
17
* implementations allow Markdown markup inside of the HTML. Also,
18
* there is a following blank line issue that can't be ignored,
19
* explained in the next paragraph. Embedded HTML and code
20
* blocks would be better supported with language specific
23
* The highlighting aims to accurately reflect correct syntax,
24
* but a few restrictions are relaxed. Delimited code blocks are
25
* highlighted, even if the line following the code block is not blank.
26
* Requiring a blank line after a block, breaks the highlighting
27
* in certain cases, because of the way Scintilla ends up calling
30
* Written by Jon Strait - jstrait@moonloop.net
32
* The License.txt file describes the conditions under which this
33
* software may be distributed.
35
*****************************************************************/
44
#include "Scintilla.h"
48
#include "LexAccessor.h"
50
#include "StyleContext.h"
51
#include "CharacterSet.h"
52
#include "LexerModule.h"
55
using namespace Scintilla;
58
static inline bool IsNewline(const int ch) {
59
return (ch == '\n' || ch == '\r');
62
// True if can follow ch down to the end with possibly trailing whitespace
63
static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) {
65
while (sc.GetRelative(++i) == ch)
67
// Skip over whitespace
68
while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
70
if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
72
sc.ChangeState(state);
73
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
79
// Set the state on text section from current to length characters,
80
// then set the rest until the newline to default, except for any characters matching token
81
static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) {
84
sc.SetState(SCE_MARKDOWN_DEFAULT);
87
while (sc.More() && !IsNewline(sc.ch)) {
88
if (sc.ch == token && !started) {
92
else if (sc.ch != token) {
93
sc.SetState(SCE_MARKDOWN_DEFAULT);
98
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
101
// Does the previous line have more than spaces and tabs?
102
static bool HasPrevLineContent(StyleContext &sc) {
104
// Go back to the previous newline
105
while ((--i + (int)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i)))
107
while ((--i + (int)sc.currentPos) >= 0) {
108
if (IsNewline(sc.GetRelative(i)))
110
if (!IsASpaceOrTab(sc.GetRelative(i)))
116
static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) {
120
c = sc.GetRelative(i);
123
// hit a terminating character
124
else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
125
// Are we a valid HRULE
126
if ((IsNewline(c) || sc.currentPos + i == endPos) &&
127
count >= 3 && !HasPrevLineContent(sc)) {
128
sc.SetState(SCE_MARKDOWN_HRULE);
130
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
134
sc.SetState(SCE_MARKDOWN_DEFAULT);
142
static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle,
143
WordList **, Accessor &styler) {
144
unsigned int endPos = startPos + length;
145
int precharCount = 0;
146
// Don't advance on a new loop iteration and retry at the same position.
147
// Useful in the corner case of having to start at the beginning file position
148
// in the default state.
149
bool freezeCursor = false;
151
StyleContext sc(startPos, length, initStyle, styler);
154
// Skip past escaped characters
160
// A blockquotes resets the line semantics
161
if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
162
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
164
// Conditional state-based actions
165
if (sc.state == SCE_MARKDOWN_CODE2) {
166
if (sc.Match("``") && sc.GetRelative(-2) != ' ') {
168
sc.SetState(SCE_MARKDOWN_DEFAULT);
171
else if (sc.state == SCE_MARKDOWN_CODE) {
172
if (sc.ch == '`' && sc.chPrev != ' ')
173
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
175
/* De-activated because it gets in the way of other valid indentation
176
* schemes, for example multiple paragraphs inside a list item.
178
else if (sc.state == SCE_MARKDOWN_CODEBK) {
180
if (IsNewline(sc.ch)) {
181
if (sc.chNext != '\t') {
182
for (int c = 1; c < 5; ++c) {
183
if (sc.GetRelative(c) != ' ')
188
else if (sc.atLineStart) {
189
if (sc.ch != '\t' ) {
190
for (int i = 0; i < 4; ++i) {
191
if (sc.GetRelative(i) != ' ')
197
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
201
else if (sc.state == SCE_MARKDOWN_STRONG1) {
202
if (sc.Match("**") && sc.chPrev != ' ') {
204
sc.SetState(SCE_MARKDOWN_DEFAULT);
207
else if (sc.state == SCE_MARKDOWN_STRONG2) {
208
if (sc.Match("__") && sc.chPrev != ' ') {
210
sc.SetState(SCE_MARKDOWN_DEFAULT);
214
else if (sc.state == SCE_MARKDOWN_EM1) {
215
if (sc.ch == '*' && sc.chPrev != ' ')
216
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
218
else if (sc.state == SCE_MARKDOWN_EM2) {
219
if (sc.ch == '_' && sc.chPrev != ' ')
220
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
222
else if (sc.state == SCE_MARKDOWN_CODEBK) {
223
if (sc.atLineStart && sc.Match("~~~")) {
225
while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
228
sc.SetState(SCE_MARKDOWN_DEFAULT);
231
else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
232
if (sc.Match("~~") && sc.chPrev != ' ') {
234
sc.SetState(SCE_MARKDOWN_DEFAULT);
237
else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
239
if (sc.Match("######"))
240
SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
241
else if (sc.Match("#####"))
242
SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
243
else if (sc.Match("####"))
244
SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
245
else if (sc.Match("###"))
246
SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
247
else if (sc.Match("##"))
248
SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
249
else if (sc.Match("#")) {
250
// Catch the special case of an unordered list
251
if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
253
sc.SetState(SCE_MARKDOWN_PRECHAR);
256
SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
259
else if (sc.Match("~~~")) {
260
if (!HasPrevLineContent(sc))
261
sc.SetState(SCE_MARKDOWN_CODEBK);
263
sc.SetState(SCE_MARKDOWN_DEFAULT);
265
else if (sc.ch == '=') {
266
if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc))
269
sc.SetState(SCE_MARKDOWN_DEFAULT);
271
else if (sc.ch == '-') {
272
if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc))
276
sc.SetState(SCE_MARKDOWN_PRECHAR);
279
else if (IsNewline(sc.ch))
280
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
283
sc.SetState(SCE_MARKDOWN_PRECHAR);
287
// The header lasts until the newline
288
else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
289
sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
290
sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
291
if (IsNewline(sc.ch))
292
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
295
// New state only within the initial whitespace
296
if (sc.state == SCE_MARKDOWN_PRECHAR) {
298
if (sc.ch == '>' && precharCount < 5)
299
sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
301
// Begin of code block
302
else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
303
sc.SetState(SCE_MARKDOWN_CODEBK);
305
// HRule - Total of three or more hyphens, asterisks, or underscores
306
// on a line by themselves
307
else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
310
else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
311
sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
312
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
315
else if (IsADigit(sc.ch)) {
317
while (IsADigit(sc.GetRelative(++digitCount)))
319
if (sc.GetRelative(digitCount) == '.' &&
320
IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
321
sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
322
sc.Forward(digitCount + 1);
323
sc.SetState(SCE_MARKDOWN_DEFAULT);
326
// Alternate Ordered list
327
else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
328
sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
330
sc.SetState(SCE_MARKDOWN_DEFAULT);
332
else if (sc.ch != ' ' || precharCount > 2)
333
sc.SetState(SCE_MARKDOWN_DEFAULT);
338
// New state anywhere in doc
339
if (sc.state == SCE_MARKDOWN_DEFAULT) {
340
if (sc.atLineStart && sc.ch == '#') {
341
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
345
if (sc.Match("![") || sc.ch == '[') {
346
int i = 0, j = 0, k = 0;
347
int len = endPos - sc.currentPos;
348
while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
350
if (sc.GetRelative(i) == ']') {
352
if (sc.GetRelative(++i) == '(') {
353
while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\'))
355
if (sc.GetRelative(i) == ')')
358
else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') {
359
while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
361
if (sc.GetRelative(i) == ']')
365
// At least a link text
367
sc.SetState(SCE_MARKDOWN_LINK);
369
// Also has a URL or reference portion
372
sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
375
// Code - also a special case for alternate inside spacing
376
if (sc.Match("``") && sc.GetRelative(3) != ' ') {
377
sc.SetState(SCE_MARKDOWN_CODE2);
380
else if (sc.ch == '`' && sc.chNext != ' ') {
381
sc.SetState(SCE_MARKDOWN_CODE);
384
else if (sc.Match("**") && sc.GetRelative(2) != ' ') {
385
sc.SetState(SCE_MARKDOWN_STRONG1);
388
else if (sc.Match("__") && sc.GetRelative(2) != ' ') {
389
sc.SetState(SCE_MARKDOWN_STRONG2);
393
else if (sc.ch == '*' && sc.chNext != ' ')
394
sc.SetState(SCE_MARKDOWN_EM1);
395
else if (sc.ch == '_' && sc.chNext != ' ')
396
sc.SetState(SCE_MARKDOWN_EM2);
398
else if (sc.Match("~~") && sc.GetRelative(2) != ' ') {
399
sc.SetState(SCE_MARKDOWN_STRIKEOUT);
403
else if (IsNewline(sc.ch))
404
sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
406
// Advance if not holding back the cursor for this iteration.
409
freezeCursor = false;
414
LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");