~ubuntu-branches/ubuntu/trusty/monodevelop/trusty-proposed

« back to all changes in this revision

Viewing changes to external/xwt/Xwt/Xwt.Formats/MarkdownTextFormat.cs

  • Committer: Package Import Robot
  • Author(s): Jo Shields
  • Date: 2013-05-12 09:46:03 UTC
  • mto: This revision was merged to the branch mainline in revision 29.
  • Revision ID: package-import@ubuntu.com-20130512094603-mad323bzcxvmcam0
Tags: upstream-4.0.5+dfsg
Import upstream version 4.0.5+dfsg

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
//
 
2
// MarkdownTextFormat.cs
 
3
//
 
4
// Author:
 
5
//       Jérémie Laval <jeremie.laval@xamarin.com>
 
6
//       Alex Corrado <corrado@xamarin.com>
 
7
//
 
8
// Copyright (c) 2012 Xamarin Inc.
 
9
//
 
10
// Permission is hereby granted, free of charge, to any person obtaining a copy
 
11
// of this software and associated documentation files (the "Software"), to deal
 
12
// in the Software without restriction, including without limitation the rights
 
13
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 
14
// copies of the Software, and to permit persons to whom the Software is
 
15
// furnished to do so, subject to the following conditions:
 
16
//
 
17
// The above copyright notice and this permission notice shall be included in
 
18
// all copies or substantial portions of the Software.
 
19
//
 
20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
21
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
22
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 
23
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 
24
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 
25
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 
26
// THE SOFTWARE.
 
27
 
 
28
using System;
 
29
using System.Linq;
 
30
using System.IO;
 
31
using System.Text;
 
32
using System.Text.RegularExpressions;
 
33
using System.Collections.Generic;
 
34
 
 
35
using Xwt.Backends;
 
36
 
 
37
namespace Xwt.Formats
 
38
{
 
39
        public class MarkdownTextFormat : TextFormat
 
40
        {
 
41
 
 
42
                public override void Parse (Stream input, IRichTextBuffer buffer)
 
43
                {
 
44
                        using (var reader = new StreamReader (input))
 
45
                                ParseMarkdown (reader.ReadToEnd (), buffer);
 
46
                }
 
47
 
 
48
                /* The subset we support:
 
49
                 *   - Headers in Atx-style i.e. prefixed with one or more '#' characters and in Setex-style i.e. underlined '=' or '-'
 
50
                 *   - Paragraph are separated by a blank line
 
51
                 *   - Line break inserted by a double space ("  ") at the end of the line
 
52
                 *   - A link has the syntax: "[This link](http://example.net/)" only
 
53
                 *   - Code blocks are normal paragraph with a 4-spaces or 1-tab space prepended
 
54
                 *   - A list is a number of text line with no newlines in between and prefixed by one of '+', '-' or '*' with whitespace immediately following. no nesting
 
55
                 *   - Italic is by putting a portion of text between '*' or '_'
 
56
                 *   - Bold is by putting a portion of text between '**' or '__'
 
57
                 *   - Inline code is wrapped between the '`' character
 
58
                 *   - horizontal ruler, a line with at least 3 hyphens
 
59
                 *
 
60
                 * Notable things we don't support (yet):
 
61
                 *
 
62
                 *   - Blockquotes syntax (lines starting in '>')
 
63
                 *   - Reference link syntax: [Google] [1]  ... [1]: http://google.com
 
64
                 *   - Images
 
65
                 *   - Inline HTML
 
66
                 */
 
67
                static void ParseMarkdown (string markdown, IRichTextBuffer buffer)
 
68
                {
 
69
                        var lines = markdown.Replace ("\r\n", "\n").Split (new[] { '\n' });
 
70
                        var wasParagraph = false;
 
71
 
 
72
                        for (int i = 0; i < lines.Length; i++) {
 
73
                                var line = lines[i];
 
74
                                var trimmed = line.TrimStart ();
 
75
                                // New paragraph
 
76
                                if (string.IsNullOrWhiteSpace (line)) {
 
77
                                        if (wasParagraph) {
 
78
                                                buffer.EmitEndParagraph ();
 
79
                                                wasParagraph = false;
 
80
                                        }
 
81
                                }
 
82
 
 
83
                                // Title
 
84
                                else if (line.StartsWith ("#")) {
 
85
                                        var level = line.TakeWhile (c => c == '#').Count ();
 
86
                                        buffer.EmitStartHeader (level);
 
87
                                        ParseInline (buffer, line.Trim (' ', '#'));
 
88
                                        buffer.EmitEndHeader ();
 
89
                                }
 
90
 
 
91
                                // Title (setex-style)
 
92
                                else if (i < lines.Length - 1 && !string.IsNullOrEmpty (lines[i + 1]) && lines[i + 1].All (c => c == '=' || c == '-')) {
 
93
                                        var level = lines[i + 1][0] == '=' ? 1 : 2;
 
94
                                        //
 
95
                                        //      FooBarBaz
 
96
                                        //      SomeHeader
 
97
                                        //      =========
 
98
 
 
99
                                        // In the above Markdown snippet we generate a paragraph and then want to insert a header, so we
 
100
                                        // must close the paragraph containing 'FooBarBaz' first. Or we should disallow this construct
 
101
                                        if (wasParagraph) {
 
102
                                                wasParagraph = false;
 
103
                                                buffer.EmitEndParagraph ();
 
104
                                        }
 
105
                                        buffer.EmitStartHeader (level);
 
106
                                        ParseInline (buffer, line);
 
107
                                        buffer.EmitEndHeader ();
 
108
                                        i++;
 
109
                                }
 
110
 
 
111
                                // Ruler
 
112
                                else if (line.All (c => c == '-') && line.Length >= 3) {
 
113
                                        buffer.EmitHorizontalRuler ();
 
114
                                }
 
115
 
 
116
                                // Code blocks
 
117
                                else if (line.StartsWith ("\t") || line.StartsWith ("    ") || line.StartsWith ("```")) {
 
118
                                        bool isFencedCodeBlock = line.StartsWith ("```");
 
119
 
 
120
                                        if (isFencedCodeBlock)
 
121
                                                i++;
 
122
 
 
123
                                        var codeblock = new StringBuilder ();
 
124
                                        for (; i < lines.Length; i++) {
 
125
                                                line = lines[i];
 
126
                                                if (!line.StartsWith ("\t") && !line.StartsWith ("    ") && !isFencedCodeBlock)
 
127
                                                        break;
 
128
                                                if (isFencedCodeBlock && line.StartsWith ("```")) {
 
129
                                                        i++;
 
130
                                                        break;
 
131
                                                }
 
132
 
 
133
                                                if (isFencedCodeBlock && !line.StartsWith ("```"))
 
134
                                                        codeblock.AppendLine (line);
 
135
                                                else
 
136
                                                        codeblock.AppendLine (line.StartsWith ("\t") ? line.Substring (1) : line.Substring (4));
 
137
                                        }
 
138
                                        i--;
 
139
                                        if (wasParagraph) {
 
140
                                                buffer.EmitEndParagraph ();
 
141
                                                wasParagraph = false;
 
142
                                        }
 
143
                                        buffer.EmitCodeBlock (codeblock.ToString ());
 
144
                                }
 
145
 
 
146
                                // List
 
147
                                else if ((trimmed [0] == '+' || trimmed [0] == '-' || trimmed [0] == '*') && (trimmed [1] == ' ' || trimmed [1] == '\t')) {
 
148
                                        buffer.EmitOpenList ();
 
149
                                        var bullet = line[0].ToString ();
 
150
                                        for (; i < lines.Length; i++) {
 
151
                                                line = lines[i];
 
152
                                                if (!line.StartsWith (bullet))
 
153
                                                        break;
 
154
                                                buffer.EmitOpenBullet ();
 
155
                                                ParseInline (buffer, line.TrimStart ('+', '-', '*', ' ', '\t'));
 
156
                                                buffer.EmitCloseBullet ();
 
157
                                        }
 
158
                                        i--;
 
159
                                        buffer.EmitCloseList ();
 
160
                                }
 
161
 
 
162
                                // Normal paragraph
 
163
                                else {
 
164
                                        if (!wasParagraph)
 
165
                                                buffer.EmitStartParagraph (0);
 
166
                                        ParseInline (buffer, line.TrimEnd () + (line.EndsWith ("  ")? Environment.NewLine : " "));
 
167
                                        wasParagraph = true;
 
168
                                }
 
169
                        }
 
170
 
 
171
                        // If we don't end in a newline we need to end the open paragrah
 
172
                        if (wasParagraph)
 
173
                                buffer.EmitEndParagraph ();
 
174
                }
 
175
 
 
176
                static void ParseInline (IRichTextBuffer buffer, string line)
 
177
                {
 
178
                        var match = inline.Match (line);
 
179
                        int currentIndex = 0;
 
180
                        while (match.Success) {
 
181
                                var escaped = match.Index != 0 && line [match.Index - 1] == '\\';
 
182
                                if (!escaped) {
 
183
 
 
184
                                        var text = line.Substring (currentIndex, match.Index - currentIndex);
 
185
                                        if (!string.IsNullOrEmpty (text))
 
186
                                                ParseText (buffer, text);
 
187
 
 
188
                                        // Link
 
189
                                        {
 
190
                                                var url = match.Groups["url"].Value;
 
191
                                                var name = match.Groups["name"].Success? match.Groups["name"].Value : url;
 
192
                                                var title = match.Groups["title"].Value;
 
193
                                                buffer.EmitStartLink (url, title);
 
194
                                                ParseText (buffer, name);
 
195
                                                buffer.EmitEndLink ();
 
196
                                        }
 
197
 
 
198
                                        currentIndex = match.Index + match.Length;
 
199
 
 
200
                                }
 
201
                                match = match.NextMatch ();
 
202
                        }
 
203
                        // Add remaining text
 
204
                        ParseText (buffer, line.Substring (currentIndex));
 
205
                }
 
206
 
 
207
                static void ParseText (IRichTextBuffer buffer, string line, RichTextInlineStyle style = RichTextInlineStyle.Normal)
 
208
                {
 
209
                        var match = styles.Match (line);
 
210
                        int currentIndex = 0;
 
211
                        while (match.Success) {
 
212
                                var escaped = match.Index != 0 && line [match.Index - 1] == '\\';
 
213
                                if (!escaped) {
 
214
 
 
215
                                        var text = line.Substring (currentIndex, match.Index - currentIndex);
 
216
                                        if (!string.IsNullOrEmpty (text))
 
217
                                                EmitText (buffer, text, style);
 
218
 
 
219
                                        if (match.Groups["bold"].Success)
 
220
                                                ParseText (buffer, match.Groups["bold"].Value, style | RichTextInlineStyle.Bold);
 
221
                                        else if (match.Groups["italic"].Success)
 
222
                                                ParseText (buffer, match.Groups["italic"].Value, style | RichTextInlineStyle.Italic);
 
223
                                        else
 
224
                                                EmitText (buffer, match.Groups["code"].Value, style | RichTextInlineStyle.Monospace);
 
225
 
 
226
                                        currentIndex = match.Index + match.Length;
 
227
 
 
228
                                }
 
229
                                match = match.NextMatch ();
 
230
                        }
 
231
                        // Add remaining text
 
232
                        EmitText (buffer, line.Substring (currentIndex), style);
 
233
                }
 
234
 
 
235
                static void EmitText (IRichTextBuffer buffer, string text, RichTextInlineStyle style)
 
236
                {
 
237
                        text = escape.Replace (text, m => m.Groups["next"].Value);
 
238
                        buffer.EmitText (text, style);
 
239
                }
 
240
 
 
241
                static readonly Regex escape = new Regex (@"\\(?<next>.)", RegexOptions.Singleline | RegexOptions.Compiled);
 
242
                static readonly Regex inline = new Regex (@"\[(?<name>[^\]]+)\]\((?<url>[^\s""\)]+)(?:[ \t]*""(?<title>.*)"")?\)" + //link
 
243
                                                          // See http://daringfireball.net/2010/07/improved_regex_for_matching_urls
 
244
                                                          @"|(?i)\b(?<url>(?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'"".,<>?«»“”‘’]))"
 
245
                                                               //FIXME: image, etc...
 
246
                                                           , RegexOptions.Singleline | RegexOptions.Compiled);
 
247
 
 
248
                static readonly Regex styles = new Regex (@"(?<double>\*{2}|_{2})(?<bold>[^\s]+[^\*{2}_{2}]*)(?<!\s)\k<double>" + // emphasis: double ** or __ for bold
 
249
                                                          @"|(?<single>\*|_)(?<italic>[^\s]+[^\*_]*)(?<!\s)\k<single>" + // emphasis: single * or _ for italic
 
250
                                                          @"|`(?<code>[^`]+)`" // inline code
 
251
                                                          , RegexOptions.Compiled);
 
252
        }
 
253
}
 
254