318
323
my ($self, $text) = @_;
319
324
my $less_than_tab = $self->{tab_width} - 1;
321
# Hashify HTML blocks:
322
# We only want to do this for block-level HTML tags, such as headers,
323
# lists, and tables. That's because we still want to wrap <p>s around
324
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
325
# phrase emphasis, and spans. The list of tags we're looking for is
329
p | div | h[1-6] | blockquote | pre | table |
330
dl | ol | ul | script | noscript | form |
331
fieldset | iframe | math | ins | del
336
(?: # Match one attr name/value pair
337
\s+ # There needs to be at least some whitespace
338
# before each attribute name.
339
[\w.:_-]+ # Attribute name
342
".+?" # "Attribute value"
344
'.+?' # 'Attribute value'
349
my $empty_tag = qr{< \w+ $tag_attrs \s* />}oxms;
350
my $open_tag = qr{< $block_tags $tag_attrs \s* >}oxms;
351
my $close_tag = undef; # let Text::Balanced handle this
353
use Text::Balanced qw(gen_extract_tagged);
354
my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
357
while ($text =~ s{^(([ ]{0,$less_than_tab}<)?.*\n)}{}m) {
360
# current line could be start of code block
362
my ($tag, $remainder) = $extract_block->($cur_line . $text);
364
my $key = _md5_utf8($tag);
365
$self->{_html_blocks}{$key} = $tag;
366
push @chunks, "\n\n" . $key . "\n\n";
370
# No tag match, so toss $cur_line into @chunks
371
push @chunks, $cur_line;
375
# current line could NOT be start of code block
376
push @chunks, $cur_line;
380
push @chunks, $text; # Whatever is left.
382
$text = join '', @chunks;
384
# Special case just for <hr />. It was easier to make a special case than
385
# to make the other regex more complicated.
386
$text = $self->_HashHR($text);
326
# Hashify HTML blocks:
327
# We only want to do this for block-level HTML tags, such as headers,
328
# lists, and tables. That's because we still want to wrap <p>s around
329
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
330
# phrase emphasis, and spans. The list of tags we're looking for is
334
p | div | h[1-6] | blockquote | pre | table |
335
dl | ol | ul | script | noscript | form |
336
fieldset | iframe | math | ins | del
341
(?: # Match one attr name/value pair
342
\s+ # There needs to be at least some whitespace
343
# before each attribute name.
344
[\w.:_-]+ # Attribute name
347
".+?" # "Attribute value"
349
'.+?' # 'Attribute value'
354
my $empty_tag = qr{< \w+ $tag_attrs \s* />}oxms;
355
my $open_tag = qr{< $block_tags $tag_attrs \s* >}oxms;
356
my $close_tag = undef; # let Text::Balanced handle this
358
use Text::Balanced qw(gen_extract_tagged);
359
my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
363
while ($text =~ s{^(([ ]{0,$less_than_tab}<)?.*\n)}{}m) {
366
# current line could be start of code block
368
my ($tag, $remainder) = $extract_block->($cur_line . $text);
370
my $key = _md5_utf8($tag);
371
$self->{_html_blocks}{$key} = $tag;
372
push @chunks, "\n\n" . $key . "\n\n";
376
# No tag match, so toss $cur_line into @chunks
377
push @chunks, $cur_line;
381
# current line could NOT be start of code block
382
push @chunks, $cur_line;
386
push @chunks, $text; # Whatever is left.
388
$text = join '', @chunks;
390
# Special case just for <hr />. It was easier to make a special case than
391
# to make the other regex more complicated.
392
$text = $self->_HashHR($text);
388
394
$text = $self->_HashHTMLComments($text);
390
396
$text = $self->_HashPHPASPBlocks($text);
396
402
my ($self, $text) = @_;
397
403
my $less_than_tab = $self->{tab_width} - 1;
401
(?<=\n\n) # Starting after a blank line
403
\A\n? # the beginning of the doc
406
[ ]{0,$less_than_tab}
407
<(hr) # start tag = $2
410
/?> # the matching end tag
412
(?=\n{2,}|\Z) # followed by a blank line or end of document
415
my $key = _md5_utf8($1);
416
$self->{_html_blocks}{$key} = $1;
417
"\n\n" . $key . "\n\n";
407
(?<=\n\n) # Starting after a blank line
409
\A\n? # the beginning of the doc
412
[ ]{0,$less_than_tab}
413
<(hr) # start tag = $2
416
/?> # the matching end tag
418
(?=\n{2,}|\Z) # followed by a blank line or end of document
421
my $key = _md5_utf8($1);
422
$self->{_html_blocks}{$key} = $1;
423
"\n\n" . $key . "\n\n";
423
429
sub _HashHTMLComments {
424
430
my ($self, $text) = @_;
425
431
my $less_than_tab = $self->{tab_width} - 1;
427
433
# Special case for standalone HTML comments:
430
(?<=\n\n) # Starting after a blank line
432
\A\n? # the beginning of the doc
435
[ ]{0,$less_than_tab}
442
(?=\n{2,}|\Z) # followed by a blank line or end of document
445
my $key = _md5_utf8($1);
446
$self->{_html_blocks}{$key} = $1;
447
"\n\n" . $key . "\n\n";
436
(?<=\n\n) # Starting after a blank line
438
\A\n? # the beginning of the doc
441
[ ]{0,$less_than_tab}
448
(?=\n{2,}|\Z) # followed by a blank line or end of document
451
my $key = _md5_utf8($1);
452
$self->{_html_blocks}{$key} = $1;
453
"\n\n" . $key . "\n\n";
453
459
sub _HashPHPASPBlocks {
454
460
my ($self, $text) = @_;
455
461
my $less_than_tab = $self->{tab_width} - 1;
457
463
# PHP and ASP-style processor instructions (<?…?> and <%…%>)
460
(?<=\n\n) # Starting after a blank line
462
\A\n? # the beginning of the doc
465
[ ]{0,$less_than_tab}
472
(?=\n{2,}|\Z) # followed by a blank line or end of document
475
my $key = _md5_utf8($1);
476
$self->{_html_blocks}{$key} = $1;
477
"\n\n" . $key . "\n\n";
466
(?<=\n\n) # Starting after a blank line
468
\A\n? # the beginning of the doc
471
[ ]{0,$less_than_tab}
478
(?=\n{2,}|\Z) # followed by a blank line or end of document
481
my $key = _md5_utf8($1);
482
$self->{_html_blocks}{$key} = $1;
483
"\n\n" . $key . "\n\n";
482
488
sub _RunBlockGamut {
583
586
# value; this is likely overkill, but it should prevent us from colliding
584
587
# with the escape values by accident.
586
my ($self, $text) = @_;
587
my $tokens ||= $self->_TokenizeHTML($text);
588
$text = ''; # rebuild $text from the tokens
589
my ($self, $text) = @_;
590
my $tokens ||= $self->_TokenizeHTML($text);
591
$text = ''; # rebuild $text from the tokens
590
foreach my $cur_token (@$tokens) {
591
if ($cur_token->[0] eq "tag") {
592
$cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gox;
593
$cur_token->[1] =~ s{ (?<=.)</?code>(?=.) }{$g_escape_table{'`'}}gox;
594
$cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gox;
595
$cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gox;
597
$text .= $cur_token->[1];
593
foreach my $cur_token (@$tokens) {
594
if ($cur_token->[0] eq "tag") {
595
$cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gox;
596
$cur_token->[1] =~ s{ (?<=.)</?code>(?=.) }{$g_escape_table{'`'}}gox;
597
$cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gox;
598
$cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gox;
600
$text .= $cur_token->[1];
691
694
sub _GenerateAnchor {
692
695
# FIXME - Fugly, change to named params?
693
696
my ($self, $whole_match, $link_text, $link_id, $url, $title, $attributes) = @_;
697
700
$attributes = '' unless defined $attributes;
699
702
if ( !defined $url && defined $self->{_urls}{$link_id}) {
700
703
$url = $self->{_urls}{$link_id};
703
706
if (!defined $url) {
704
707
return $whole_match;
707
710
$url =~ s! \* !$g_escape_table{'*'}!gox; # We've got to encode these to avoid
708
711
$url =~ s! _ !$g_escape_table{'_'}!gox; # conflicting with italics/bold.
709
$url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
712
$url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
711
714
$result = qq{<a href="$url"};
713
716
if ( !defined $title && defined $link_id && defined $self->{_titles}{$link_id} ) {
714
717
$title = $self->{_titles}{$link_id};
717
720
if ( defined $title ) {
718
721
$title =~ s/"/"/g;
719
722
$title =~ s! \* !$g_escape_table{'*'}!gox;
720
723
$title =~ s! _ !$g_escape_table{'_'}!gox;
721
724
$result .= qq{ title="$title"};
724
727
$result .= "$attributes>$link_text</a>";
802
805
sub _GenerateImage {
803
806
# FIXME - Fugly, change to named params?
804
807
my ($self, $whole_match, $alt_text, $link_id, $url, $title, $attributes) = @_;
808
811
$attributes = '' unless defined $attributes;
810
813
$alt_text ||= '';
811
814
$alt_text =~ s/"/"/g;
812
815
# FIXME - how about >
814
817
if ( !defined $url && defined $self->{_urls}{$link_id}) {
815
818
$url = $self->{_urls}{$link_id};
818
821
# If there's no such link ID, leave intact:
819
return $whole_match unless defined $url;
822
return $whole_match unless defined $url;
821
824
$url =~ s! \* !$g_escape_table{'*'}!ogx; # We've got to encode these to avoid
822
825
$url =~ s! _ !$g_escape_table{'_'}!ogx; # conflicting with italics/bold.
823
$url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
826
$url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
825
828
if (!defined $title && length $link_id && defined $self->{_titles}{$link_id} && length $self->{_titles}{$link_id}) {
826
829
$title = $self->{_titles}{$link_id};
829
832
$result = qq{<img src="$url" alt="$alt_text"};
830
833
if (defined $title && length $title) {
1124
1127
sub _DoCodeBlocks {
1126
1129
# Process Markdown `<pre><code>` blocks.
1129
1132
my ($self, $text) = @_;
1133
( # $1 = the code block -- one or more lines, starting with a space/tab
1135
(?:[ ]{$self->{tab_width}} | \t) # Lines must start with a tab or a tab-width of spaces
1139
((?=^[ ]{0,$self->{tab_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1142
my $result; # return value
1144
$codeblock = $self->_EncodeCode($self->_Outdent($codeblock));
1145
$codeblock = $self->_Detab($codeblock);
1146
$codeblock =~ s/\A\n+//; # trim leading newlines
1147
$codeblock =~ s/\n+\z//; # trim trailing newlines
1149
$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
1136
( # $1 = the code block -- one or more lines, starting with a space/tab
1138
(?:[ ]{$self->{tab_width}} | \t) # Lines must start with a tab or a tab-width of spaces
1142
((?=^[ ]{0,$self->{tab_width}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1145
my $result; # return value
1147
$codeblock = $self->_EncodeCode($self->_Outdent($codeblock));
1148
$codeblock = $self->_Detab($codeblock);
1149
$codeblock =~ s/\A\n+//; # trim leading newlines
1150
$codeblock =~ s/\n+\z//; # trim trailing newlines
1152
$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
1157
1160
sub _DoCodeSpans {
1159
1162
# * Backtick quotes are used for <code></code> spans.
1161
1164
# * You can use multiple backticks as the delimiters if you want to
1162
1165
# include literal backticks in the code span. So, this input:
1164
1167
# Just type ``foo `bar` baz`` at the prompt.
1166
1169
# Will translate to:
1168
1171
# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1170
1173
# There's no arbitrary limit to the number of backticks you
1171
1174
# can use as delimters. If you need three consecutive backticks
1172
1175
# in your code, use four for delimiters, etc.
1174
1177
# * You can use spaces to get literal backticks at the edges:
1176
1179
# ... type `` `bar` `` ...
1180
1183
# ... type <code>`bar`</code> ...
1183
1186
my ($self, $text) = @_;
1186
(?<!\\) # Character before opening ` can't be a backslash
1187
(`+) # $1 = Opening run of `
1188
(.+?) # $2 = The code block
1190
\1 # Matching closer
1194
$c =~ s/^[ \t]*//g; # leading whitespace
1195
$c =~ s/[ \t]*$//g; # trailing whitespace
1196
$c = $self->_EncodeCode($c);
1189
(?<!\\) # Character before opening ` can't be a backslash
1190
(`+) # $1 = Opening run of `
1191
(.+?) # $2 = The code block
1193
\1 # Matching closer
1197
$c =~ s/^[ \t]*//g; # leading whitespace
1198
$c =~ s/[ \t]*$//g; # trailing whitespace
1199
$c = $self->_EncodeCode($c);
1630
1633
CPAN Module Text::MultiMarkdown (based on Text::Markdown by Sebastian
1631
1634
Riedel) originally by Darren Kulp (http://kulp.ch/)
1633
1636
This module is maintained by: Tomas Doran http://www.bobtfish.net/
1635
1638
=head1 THIS DISTRIBUTION
1637
Please note that this distribution is a fork of John Gruber's original Markdown project,
1640
Please note that this distribution is a fork of John Gruber's original Markdown project,
1638
1641
and it *is not* in any way blessed by him.
1640
Whilst this code aims to be compatible with the original Markdown.pl (and incorporates
1641
and passes the Markdown test suite) whilst fixing a number of bugs in the original -
1643
Whilst this code aims to be compatible with the original Markdown.pl (and incorporates
1644
and passes the Markdown test suite) whilst fixing a number of bugs in the original -
1642
1645
there may be differences between the behaviour of this module and Markdown.pl. If you find
1643
any differences where you believe Text::Markdown behaves contrary to the Markdown spec,
1646
any differences where you believe Text::Markdown behaves contrary to the Markdown spec,
1644
1647
please report them as bugs.
1646
1649
Text::Markdown *does not* extend the markdown dialect in any way from that which is documented at
1647
1650
daringfireball. If you want additional features, you should look at L<Text::MultiMarkdown>.
1654
You can find the source code repository for L<Text::Markdown> and L<Text::MultiMarkdown>
1655
on GitHub at <http://github.com/bobtfish/text-markdown>.
1649
1657
=head1 COPYRIGHT AND LICENSE
1651
Original Code Copyright (c) 2003-2004 John Gruber
1652
<http://daringfireball.net/>
1653
All rights reserved.
1655
MultiMarkdown changes Copyright (c) 2005-2006 Fletcher T. Penney
1656
<http://fletcher.freeshell.org/>
1657
All rights reserved.
1659
Text::MultiMarkdown changes Copyright (c) 2006-2008 Darren Kulp
1659
Original Code Copyright (c) 2003-2004 John Gruber
1660
<http://daringfireball.net/>
1661
All rights reserved.
1663
MultiMarkdown changes Copyright (c) 2005-2006 Fletcher T. Penney
1664
<http://fletcher.freeshell.org/>
1665
All rights reserved.
1667
Text::MultiMarkdown changes Copyright (c) 2006-2009 Darren Kulp
1660
1668
<http://kulp.ch> and Tomas Doran <http://www.bobtfish.net>
1662
1670
Redistribution and use in source and binary forms, with or without