2
// MarkdownTextFormat.cs
5
// Jérémie Laval <jeremie.laval@xamarin.com>
6
// Alex Corrado <corrado@xamarin.com>
8
// Copyright (c) 2012 Xamarin Inc.
10
// Permission is hereby granted, free of charge, to any person obtaining a copy
11
// of this software and associated documentation files (the "Software"), to deal
12
// in the Software without restriction, including without limitation the rights
13
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
// copies of the Software, and to permit persons to whom the Software is
15
// furnished to do so, subject to the following conditions:
17
// The above copyright notice and this permission notice shall be included in
18
// all copies or substantial portions of the Software.
20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
32
using System.Text.RegularExpressions;
33
using System.Collections.Generic;
39
public class MarkdownTextFormat : TextFormat
42
public override void Parse (Stream input, IRichTextBuffer buffer)
44
using (var reader = new StreamReader (input))
45
ParseMarkdown (reader.ReadToEnd (), buffer);
48
/* The subset we support:
49
* - Headers in Atx-style i.e. prefixed with one or more '#' characters and in Setex-style i.e. underlined '=' or '-'
50
* - Paragraph are separated by a blank line
51
* - Line break inserted by a double space (" ") at the end of the line
52
* - A link has the syntax: "[This link](http://example.net/)" only
53
* - Code blocks are normal paragraph with a 4-spaces or 1-tab space prepended
54
* - A list is a number of text line with no newlines in between and prefixed by one of '+', '-' or '*' with whitespace immediately following. no nesting
55
* - Italic is by putting a portion of text between '*' or '_'
56
* - Bold is by putting a portion of text between '**' or '__'
57
* - Inline code is wrapped between the '`' character
58
* - horizontal ruler, a line with at least 3 hyphens
60
* Notable things we don't support (yet):
62
* - Blockquotes syntax (lines starting in '>')
63
* - Reference link syntax: [Google] [1] ... [1]: http://google.com
67
static void ParseMarkdown (string markdown, IRichTextBuffer buffer)
69
var lines = markdown.Replace ("\r\n", "\n").Split (new[] { '\n' });
70
var wasParagraph = false;
72
for (int i = 0; i < lines.Length; i++) {
74
var trimmed = line.TrimStart ();
76
if (string.IsNullOrWhiteSpace (line)) {
78
buffer.EmitEndParagraph ();
84
else if (line.StartsWith ("#")) {
85
var level = line.TakeWhile (c => c == '#').Count ();
86
buffer.EmitStartHeader (level);
87
ParseInline (buffer, line.Trim (' ', '#'));
88
buffer.EmitEndHeader ();
91
// Title (setex-style)
92
else if (i < lines.Length - 1 && !string.IsNullOrEmpty (lines[i + 1]) && lines[i + 1].All (c => c == '=' || c == '-')) {
93
var level = lines[i + 1][0] == '=' ? 1 : 2;
99
// In the above Markdown snippet we generate a paragraph and then want to insert a header, so we
100
// must close the paragraph containing 'FooBarBaz' first. Or we should disallow this construct
102
wasParagraph = false;
103
buffer.EmitEndParagraph ();
105
buffer.EmitStartHeader (level);
106
ParseInline (buffer, line);
107
buffer.EmitEndHeader ();
112
else if (line.All (c => c == '-') && line.Length >= 3) {
113
buffer.EmitHorizontalRuler ();
117
else if (line.StartsWith ("\t") || line.StartsWith (" ") || line.StartsWith ("```")) {
118
bool isFencedCodeBlock = line.StartsWith ("```");
120
if (isFencedCodeBlock)
123
var codeblock = new StringBuilder ();
124
for (; i < lines.Length; i++) {
126
if (!line.StartsWith ("\t") && !line.StartsWith (" ") && !isFencedCodeBlock)
128
if (isFencedCodeBlock && line.StartsWith ("```")) {
133
if (isFencedCodeBlock && !line.StartsWith ("```"))
134
codeblock.AppendLine (line);
136
codeblock.AppendLine (line.StartsWith ("\t") ? line.Substring (1) : line.Substring (4));
140
buffer.EmitEndParagraph ();
141
wasParagraph = false;
143
buffer.EmitCodeBlock (codeblock.ToString ());
147
else if ((trimmed [0] == '+' || trimmed [0] == '-' || trimmed [0] == '*') && (trimmed [1] == ' ' || trimmed [1] == '\t')) {
148
buffer.EmitOpenList ();
149
var bullet = line[0].ToString ();
150
for (; i < lines.Length; i++) {
152
if (!line.StartsWith (bullet))
154
buffer.EmitOpenBullet ();
155
ParseInline (buffer, line.TrimStart ('+', '-', '*', ' ', '\t'));
156
buffer.EmitCloseBullet ();
159
buffer.EmitCloseList ();
165
buffer.EmitStartParagraph (0);
166
ParseInline (buffer, line.TrimEnd () + (line.EndsWith (" ")? Environment.NewLine : " "));
171
// If we don't end in a newline we need to end the open paragrah
173
buffer.EmitEndParagraph ();
176
static void ParseInline (IRichTextBuffer buffer, string line)
178
var match = inline.Match (line);
179
int currentIndex = 0;
180
while (match.Success) {
181
var escaped = match.Index != 0 && line [match.Index - 1] == '\\';
184
var text = line.Substring (currentIndex, match.Index - currentIndex);
185
if (!string.IsNullOrEmpty (text))
186
ParseText (buffer, text);
190
var url = match.Groups["url"].Value;
191
var name = match.Groups["name"].Success? match.Groups["name"].Value : url;
192
var title = match.Groups["title"].Value;
193
buffer.EmitStartLink (url, title);
194
ParseText (buffer, name);
195
buffer.EmitEndLink ();
198
currentIndex = match.Index + match.Length;
201
match = match.NextMatch ();
203
// Add remaining text
204
ParseText (buffer, line.Substring (currentIndex));
207
static void ParseText (IRichTextBuffer buffer, string line, RichTextInlineStyle style = RichTextInlineStyle.Normal)
209
var match = styles.Match (line);
210
int currentIndex = 0;
211
while (match.Success) {
212
var escaped = match.Index != 0 && line [match.Index - 1] == '\\';
215
var text = line.Substring (currentIndex, match.Index - currentIndex);
216
if (!string.IsNullOrEmpty (text))
217
EmitText (buffer, text, style);
219
if (match.Groups["bold"].Success)
220
ParseText (buffer, match.Groups["bold"].Value, style | RichTextInlineStyle.Bold);
221
else if (match.Groups["italic"].Success)
222
ParseText (buffer, match.Groups["italic"].Value, style | RichTextInlineStyle.Italic);
224
EmitText (buffer, match.Groups["code"].Value, style | RichTextInlineStyle.Monospace);
226
currentIndex = match.Index + match.Length;
229
match = match.NextMatch ();
231
// Add remaining text
232
EmitText (buffer, line.Substring (currentIndex), style);
235
static void EmitText (IRichTextBuffer buffer, string text, RichTextInlineStyle style)
237
text = escape.Replace (text, m => m.Groups["next"].Value);
238
buffer.EmitText (text, style);
241
static readonly Regex escape = new Regex (@"\\(?<next>.)", RegexOptions.Singleline | RegexOptions.Compiled);
242
static readonly Regex inline = new Regex (@"\[(?<name>[^\]]+)\]\((?<url>[^\s""\)]+)(?:[ \t]*""(?<title>.*)"")?\)" + //link
243
// See http://daringfireball.net/2010/07/improved_regex_for_matching_urls
244
@"|(?i)\b(?<url>(?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'"".,<>?«»“”‘’]))"
245
//FIXME: image, etc...
246
, RegexOptions.Singleline | RegexOptions.Compiled);
248
static readonly Regex styles = new Regex (@"(?<double>\*{2}|_{2})(?<bold>[^\s]+[^\*{2}_{2}]*)(?<!\s)\k<double>" + // emphasis: double ** or __ for bold
249
@"|(?<single>\*|_)(?<italic>[^\s]+[^\*_]*)(?<!\s)\k<single>" + // emphasis: single * or _ for italic
250
@"|`(?<code>[^`]+)`" // inline code
251
, RegexOptions.Compiled);