1
# Copyright (C) 2005 ILOG http://www.ilog.fr
2
# and Foswiki Contributors. All Rights Reserved. Foswiki Contributors
3
# are listed in the AUTHORS file in the root of this distribution.
4
# NOTE: Please extend that file, not this notice.
6
# This program is free software; you can redistribute it and/or
7
# modify it under the terms of the GNU General Public License
8
# as published by the Free Software Foundation; either version 2
9
# of the License, or (at your option) any later version. For
10
# more details read LICENSE in the root of this distribution.
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
# As per the GPL, removal of this notice is prohibited.
20
---+ package Foswiki::Plugins::WysiwygPlugin::TML2HTML
22
Convertor class for translating TML (Topic Meta Language) into
25
The convertor does _not_ use the Foswiki rendering, as that is a
26
lossy conversion, and would make symmetric translation back to TML
29
The design goal was to support round-trip conversion from well-formed
30
TML to XHTML1.0 and back to identical TML. Notes that some deprecated
31
TML syntax is not supported.
35
package Foswiki::Plugins::WysiwygPlugin::TML2HTML;
40
use Foswiki::Plugins::WysiwygPlugin::Constants;
48
# HTML elements that are palatable to editors. Other HTML tags will be
49
# rendered in 'protected' regions to prevent the WYSIWYG editor mussing
50
# them up. Note that A is specifically excluded from this list because it
51
# is common for href attributes to contain macros. Users should
52
# be encouraged to use square bracket formulations for links instead.
53
my @PALATABLE_TAGS = qw(
54
ABBR ACRONYM ADDRESS B BDO BIG BLOCKQUOTE BR CAPTION CENTER CITE CODE COL
55
COLGROUP DD DEL DFN DIR DIV DL DT EM FONT H1 H2 H3 H4 H5 H6 HR HTML I IMG INS
56
ISINDEX KBD LABEL LEGEND LI OL P PRE Q S SAMP SMALL SPAN STRONG SUB SUP TABLE
57
TBODY TD TFOOT TH THEAD TITLE TR TT U UL STICKY
60
my $PALATABLE_HTML = '(' . join( '|', @PALATABLE_TAGS ) . ')';
64
---++ ClassMethod new()
66
Construct a new TML to HTML convertor.
73
return bless( $this, $class );
78
---++ ObjectMethod convert( $tml, \%options ) -> $tml
80
Convert a block of TML text into HTML.
82
* getViewUrl is a reference to a method:<br>
83
getViewUrl($web,$topic) -> $url (where $topic may include an anchor)
84
* markVars is true if we are to expand macros to spans.
85
It should be false otherwise (macros will be left as text).
90
my ( $this, $content, $options ) = @_;
92
$this->{opts} = $options;
94
return '' unless $content;
96
$content =~ s/[$TT0$TT1$TT2]/?/go;
98
# Render TML constructs to tagged HTML
99
$content = $this->_getRenderedVersion($content);
101
# Substitute back in protected elements
102
$content = $this->_dropBack($content);
105
#print STDERR "TML2HTML = '$content'\n";
107
# This should really use a template, but what the heck...
112
my ( $this, $text, $type, $encoding ) = @_;
113
$text = $this->_unLift($text);
114
my $n = scalar( @{ $this->{refs} } );
119
encoding => $encoding || 'span',
123
return $TT1 . $n . $TT2;
127
my ( $this, $text ) = @_;
129
# Restore everything that was lifted out
130
while ( $text =~ s#$TT1([0-9]+)$TT2#$this->{refs}->[$1]->{text}#g ) {
136
my ( $this, $text ) = @_;
138
# Restore everything that was lifted out
139
while ( $text =~ s#$TT1([0-9]+)$TT2#$this->_dropIn($1)#ge ) {
145
my ( $this, $n ) = @_;
146
my $thing = $this->{refs}->[$n];
147
return $thing->{text} if $thing->{encoding} eq 'NONE';
148
my $method = 'CGI::' . $thing->{encoding};
149
my $text = $thing->{text};
150
$text = _protectVerbatimChars($text)
151
if $thing->{type} =~ /^(PROTECTED|STICKY|VERBATIM)$/;
153
return &$method( { class => 'WYSIWYG_' . $thing->{type} }, $text );
157
# Parse and convert macros. If we are not using span markers
158
# for macros, we have to change the percent signs into entities
159
# to prevent internal tags being expanded by Foswiki during rendering.
160
# It's assumed that the editor will have the common sense to convert
161
# them back to characters when editing.
163
my ( $this, $text ) = @_;
165
return '' unless defined($text);
167
my @queue = split( /(\n?%)/s, $text );
171
while ( scalar(@queue) ) {
172
my $token = shift(@queue);
173
if ( $token =~ /^\n?%$/s ) {
174
if ( $token eq '%' && $stackTop =~ /}$/ ) {
175
while ( scalar(@stack)
176
&& $stackTop !~ /^\n?%([A-Z0-9_:]+){.*}$/os )
178
$stackTop = pop(@stack) . $stackTop;
182
&& $stackTop =~ m/^(\n?)%([A-Z0-9_:]+)({.*})?$/os )
185
my $tag = $2 . ( $3 || '' );
188
# The commented out lines disable PROTECTED for %SIMPLE% vars. See
189
# Bugs: Item4828 for the sort of problem this would help to avert.
190
# if ($tag =~ /^\n?%\w+{.*}%/) {
192
pop(@stack) . $nl . $this->_liftOut( $tag, 'PROTECTED' );
195
# $stackTop = pop( @stack ).$tag;
199
push( @stack, $stackTop );
200
$stackTop = $token; # push a new context
208
# Run out of input. Gather up everything in the stack.
209
while ( scalar(@stack) ) {
210
$stackTop = pop(@stack) . $stackTop;
217
my ( $this, $url ) = @_;
219
return $url unless ( $this->{opts}->{expandVarsInURL} );
220
return $this->{opts}->{expandVarsInURL}->( $url, $this->{opts} );
223
# Lifted straight out of DevelopBranch Render.pm
224
sub _getRenderedVersion {
225
my ( $this, $text, $refs ) = @_;
227
return '' unless $text; # nothing to do
229
@{ $this->{LIST} } = ();
237
$this->{removed} = {}; # Map of placeholders to tag parameters and text
239
# Do sticky first; it can't be ignored
240
$text = $this->_takeOutBlocks( $text, 'sticky' );
242
$text = $this->_takeOutBlocks( $text, 'verbatim' );
244
$text = $this->_takeOutBlocks( $text, 'literal' );
246
$text = $this->_takeOutSets($text);
251
# Remove PRE to prevent TML interpretation of text inside it
252
$text = $this->_takeOutBlocks( $text, 'pre' );
255
$text =~ s/(<!--.*?-->)/$this->_liftOut($1, 'PROTECTED')/ges;
257
# Handle inline IMG tags specially
258
$text =~ s/(<img [^>]*>)/$this->_takeOutIMGTag($1)/gei;
259
$text =~ s/<\/img>//gi;
261
# Handle colour tags specially (hack, hack, hackity-HACK!)
262
my $colourMatch = join( '|', grep( /^[A-Z]/, keys %WC::KNOWN_COLOUR ) );
264
s#%($colourMatch)%(.*?)%ENDCOLOR%#<font color="\L$1\E">$2</font>#og )
268
# Convert Foswiki tags to spans outside protected text
269
$text = $this->_processTags($text);
271
# protect some HTML tags.
272
$text =~ s/(<\/?(?!(?i:$PALATABLE_HTML)\b)[A-Z]+(\s[^>]*)?>)/
273
$this->_liftOut($1, 'PROTECTED')/gei;
275
$text =~ s/\\\n//gs; # Join lines ending in '\'
277
# Blockquoted email (indented with '> ')
278
# Could be used to provide different colours for different numbers of '>'
280
s/^>(.*?)$/'>'.CGI::cite( { class => 'TMLcite' }, $1 ).CGI::br()/gem;
282
# locate isolated < and > and translate to entities
283
# Protect isolated <!-- and -->
284
$text =~ s/<!--/{$TT0!--/g;
285
$text =~ s/-->/--}$TT0/g;
287
# SMELL: this next fragment is a frightful hack, to handle the
288
# case where simple HTML tags (i.e. without values) are embedded
289
# in the values provided to other tags. The only way to do this
290
# correctly (i.e. handle HTML tags with values as well) is to
291
# parse the HTML (bleagh!)
292
$text =~ s/<(\/[A-Za-z]+)>/{$TT0$1}$TT0/g;
293
$text =~ s/<([A-Za-z]+(\s+\/)?)>/{$TT0$1}$TT0/g;
294
$text =~ s/<(\S.*?)>/{$TT0$1}$TT0/g;
296
# entitify lone < and >, praying that we haven't screwed up :-(
297
$text =~ s/</<\;/g;
298
$text =~ s/>/>\;/g;
299
$text =~ s/{$TT0/</go;
300
$text =~ s/}$TT0/>/go;
304
s/((^|(?<=[-*\s(]))$Foswiki::regex{linkProtocolPattern}:[^\s<>"]+[^\s*.,!?;:)<])/$this->_liftOut($1, 'LINK')/geo;
307
$text =~ s/&([$Foswiki::regex{mixedAlphaNum}]+;)/$TT0$1/g; # "&abc;"
308
$text =~ s/&(#[0-9]+;)/$TT0$1/g; # "{"
309
#$text =~ s/&/&/g; # escape standalone "&"
310
$text =~ s/$TT0(#[0-9]+;)/&$1/go;
311
$text =~ s/$TT0([$Foswiki::regex{mixedAlphaNum}]+;)/&$1/go;
314
my $hr = CGI::hr( { class => 'TMLhr' } );
315
$text =~ s/^---+$/$hr/gm;
317
# Now we really _do_ need a line loop, to process TML
318
# line-oriented stuff.
319
my $inList = 0; # True when within a list type
320
my $inTable = 0; # True when within a table type
321
my $inParagraph = 1; # True when within a P
322
my @result = ('<p>');
324
foreach my $line ( split( /\n/, $text ) ) {
326
# Table: | cell | cell |
327
# allow trailing white space after the last |
328
if ( $line =~ m/^(\s*\|.*\|\s*)$/ ) {
329
push( @result, '</p>' ) if $inParagraph;
331
$this->_addListItem( \@result, '', '', '' ) if $inList;
337
{ border => 1, cellpadding => 0, cellspacing => 1 }
341
push( @result, _emitTR($1) );
347
push( @result, CGI::end_table() );
351
if ( $line =~ /$Foswiki::regex{headerPatternDa}/o ) {
354
$this->_addListItem( \@result, '', '', '' ) if $inList;
356
push( @result, '</p>' ) if $inParagraph;
358
my ( $indicator, $heading ) = ( $1, $2 );
360
if ( $heading =~ s/$Foswiki::regex{headerPatternNoTOC}//o ) {
363
if ( $indicator =~ /#/ ) {
364
$class .= ' numbered';
366
my $attrs = { class => $class };
367
my $fn = 'CGI::h' . length($indicator);
369
$line = &$fn( $attrs, " $heading " );
373
elsif ( $line =~ /^\s*$/ ) {
376
push( @result, '</p>' ) if $inParagraph;
379
$this->_addListItem( \@result, '', '', '' ) if $inList;
385
s/^((\t| )+)\$\s(([^:]+|:[^\s]+)+?):\s/<dt> $3 <\/dt><dd> /o )
389
push( @result, '</p>' ) if $inParagraph;
391
$this->_addListItem( \@result, 'dl', 'dd', $1, '' );
395
elsif ( $line =~ s/^((\t| )+)(\S+?):\s/<dt> $3<\/dt><dd> /o ) {
398
push( @result, '</p>' ) if $inParagraph;
400
$this->_addListItem( \@result, 'dl', 'dd', $1, '' );
404
elsif ( $line =~ s/^((\t| )+)\*(\s|$)/<li> /o ) {
407
push( @result, '</p>' ) if $inParagraph;
409
$this->_addListItem( \@result, 'ul', 'li', $1, '' );
413
elsif ( $line =~ m/^((\t| )+)([1AaIi]\.|\d+\.?) ?/ ) {
416
push( @result, '</p>' ) if $inParagraph;
420
if ( $ot !~ /^\d$/ ) {
421
$ot = ' type="' . $ot . '"';
426
$line =~ s/^((\t| )+)([1AaIi]\.|\d+\.?) ?/<li$ot> /;
427
$this->_addListItem( \@result, 'ol', 'li', $1, $ot );
431
elsif ( $inList && $line =~ /^[ \t]/ ) {
433
# Extend text of previous list item by dropping through
439
$this->_addListItem( \@result, '', '', '' ) if $inList;
443
push( @result, $line );
447
push( @result, '</table>' );
450
$this->_addListItem( \@result, '', '', '' );
452
elsif ($inParagraph) {
453
push( @result, '</p>' );
456
$text = join( "\n", @result );
458
# Trim any extra Ps from the top and bottom.
459
$text =~ s#^(\s*<p>\s*</p>)+##s;
460
$text =~ s#(<p>\s*</p>\s*)+$##s;
462
$text =~ s(${WC::STARTWW}==([^\s]+?|[^\s].*?[^\s])==$WC::ENDWW)
463
(CGI::b(CGI::span({class => 'WYSIWYG_TT'}, $1)))gem;
464
$text =~ s(${WC::STARTWW}__([^\s]+?|[^\s].*?[^\s])__$WC::ENDWW)
465
(CGI::b(CGI::i($1)))gem;
466
$text =~ s(${WC::STARTWW}\*([^\s]+?|[^\s].*?[^\s])\*$WC::ENDWW)
469
$text =~ s(${WC::STARTWW}\_([^\s]+?|[^\s].*?[^\s])\_$WC::ENDWW)
471
$text =~ s(${WC::STARTWW}\=([^\s]+?|[^\s].*?[^\s])\=$WC::ENDWW)
472
(CGI::span({class => 'WYSIWYG_TT'}, $1))gem;
474
# Handle [[][]] and [[]] links
476
# We _not_ support [[http://link text]] syntax
479
$text =~ s/(\[\[[^\]]*\](\[[^\]]*\])?\])/$this->_liftOut($1, 'LINK')/ge;
482
s/$WC::STARTWW(($Foswiki::regex{webNameRegex}\.)?$Foswiki::regex{wikiWordRegex}($Foswiki::regex{anchorRegex})?)/$this->_liftOut($1, 'LINK')/geom;
484
while ( my ( $placeholder, $val ) = each %{ $this->{removed} } ) {
485
if ( $placeholder =~ /^verbatim/i ) {
486
_addClass( $val->{params}->{class}, 'TMLverbatim' );
488
elsif ( $placeholder =~ /^literal/i ) {
489
_addClass( $val->{params}->{class}, 'WYSIWYG_LITERAL' );
491
elsif ( $placeholder =~ /^sticky/i ) {
492
_addClass( $val->{params}->{class}, 'WYSIWYG_STICKY' );
496
$this->_putBackBlocks( $text, 'pre' );
498
$this->_putBackBlocks( $text, 'literal', 'div' );
500
# replace verbatim with pre in the final output, with encoded entities
501
$this->_putBackBlocks( $text, 'verbatim', 'pre', \&_protectVerbatimChars );
503
$this->_putBackBlocks( $text, 'sticky', 'div', \&_protectVerbatimChars );
505
$text =~ s/(<nop>)/$this->_liftOut($1, 'PROTECTED')/ge;
512
$_[0] = join( ' ', ( split( /\s+/, $_[0] ), $_[1] ) );
519
# Encode special chars in verbatim as entities to prevent misinterpretation
520
sub _protectVerbatimChars {
522
$text =~ s/([\000-\011\013-\037<&>'"])/'&#'.ord($1).';'/ges;
523
$text =~ s/ / /g;
524
$text =~ s/\n/<br \/>/gs;
529
my ( $this, $text ) = @_;
531
# Expand selected macros in IMG tags so that images appear in the
534
s/(<img [^>]*\bsrc=)(["'])(.*?)\2/$1.$2.$this->_expandURL($3).$2/gie;
536
# Take out mce_src - it just causes problems.
537
$text =~ s/(<img [^>]*)\bmce_src=(["'])(.*?)\2/$1/gie;
538
$text =~ s:([^/])>$:$1 />:; # close the tag XHTML style
540
return $this->_liftOut( $text, '', 'NONE' );
543
# Pull out Foswiki Set statements, to prevent unwanted munging
547
qr/^((?:\t| )+\*\s+(?:Set|Local)\s+(?:$Foswiki::regex{tagNameRegex})\s*=)(.*)$/o;
552
foreach ( split( /\r?\n/, $_[1] ) ) {
554
if ( defined $lead ) {
556
$lead . $this->_liftOut( $value, 'PROTECTED' ) );
559
$value = defined($2) ? $2 : '';
563
if ( defined $lead ) {
564
if ( /^( |\t)+ *[^\s]/ && !/$Foswiki::regex{bulletRegex}/o ) {
566
# follow up line, extending value
570
push( @outtext, $lead . $this->_liftOut( $value, 'PROTECTED' ) );
573
push( @outtext, $_ );
575
if ( defined $lead ) {
576
push( @outtext, $lead . $this->_liftOut( $value, 'PROTECTED' ) );
578
return join( "\n", @outtext );
582
my ( $this, $intext, $tag ) = @_;
584
return '' unless $intext;
585
return $intext unless ( $intext =~ m/<$tag\b/ );
587
my $open = qr/<$tag\b[^>]*>/i;
588
my $close = qr/<\/$tag>/i;
595
foreach my $chunk ( split /($open|$close)/, $intext ) {
596
next unless defined($chunk);
597
if ( $chunk =~ m/<$tag\b([^>]*)>/ ) {
598
unless ( $depth++ ) {
604
elsif ( $depth && $chunk =~ m/$close/ ) {
605
unless ( --$depth ) {
606
my $placeholder = $tag . $n;
607
$this->{removed}->{$placeholder} = {
608
params => _parseParams($tagParams),
611
$chunk = $TT0 . $placeholder . $TT0;
625
# This would generate matching close tags
626
# while ( $depth-- ) {
627
# $scoop .= "</$tag>\n";
629
my $placeholder = $tag . $n;
630
$this->{removed}->{$placeholder} = {
631
params => _parseParams($tagParams),
634
$out .= $TT0 . $placeholder . $TT0;
637
# Filter spurious tags without matching open/close
638
$out =~ s/$open/<$tag$1>/g;
639
$out =~ s/$close/<\/$tag>/g;
640
$out =~ s/<($tag\s+\/)>/<$1>/g;
646
my ( $this, $text, $tag, $newtag, $callback ) = @_;
649
while ( my ( $placeholder, $val ) = each %{ $this->{removed} } ) {
650
if ( $placeholder =~ /^$tag\d+$/ ) {
651
my $params = $val->{params};
652
my $val = $val->{text};
653
$val = &$callback($val) if ( defined($callback) );
655
# Use div instead of span if the block contains block HTML
656
if ( $newtag eq 'span' && $val =~ m#</?($WC::ALWAYS_BLOCK_S)\b#io )
661
$fn = 'CGI::' . $newtag;
664
$_[1] =~ s/$TT0$placeholder$TT0/&$fn($params, $val)/e;
666
delete( $this->{removed}->{$placeholder} );
674
while ( $p =~ s/^\s*([$Foswiki::regex{mixedAlphaNum}]+)=(".*?"|'.*?')// ) {
677
$val =~ s/['"](.*)['"]/$1/;
678
$params->{$name} = $val;
683
# Lifted straight out of DevelopBranch Render.pm
685
my ( $this, $result, $theType, $theElement, $theIndent, $theOlType ) = @_;
687
$theIndent =~ s/ /\t/g;
688
my $depth = length($theIndent);
690
my $size = scalar( @{ $this->{LIST} } );
691
if ( $size < $depth ) {
693
while ( $size < $depth ) {
696
{ type => $theType, element => $theElement }
698
push( @$result, "<$theElement>" ) unless ($firstTime);
699
push( @$result, "<$theType>" );
705
while ( $size > $depth ) {
706
my $tags = pop( @{ $this->{LIST} } );
707
push( @$result, "</$tags->{element}>" );
708
push( @$result, "</$tags->{type}>" );
712
push( @$result, "</$this->{LIST}->[$size-1]->{element}>" );
717
my $oldt = $this->{LIST}->[ $size - 1 ];
718
if ( $oldt->{type} ne $theType ) {
719
push( @$result, "</$oldt->{type}>\n<$theType>" );
720
pop( @{ $this->{LIST} } );
723
{ type => $theType, element => $theElement }
732
$row =~ s/\t/ /g; # change tabs to space
733
$row =~ s/^(\s*)\|//; # Remove leading junk
737
while ( $row =~ s/^(.*?)\|// ) {
741
# make sure there's something there in empty cells. Otherwise
742
# the editor may compress it to (visual) nothing.
743
$cell =~ s/^\s+$/ /g;
745
my ( $left, $right ) = ( 0, 0 );
746
if ( $cell =~ /^(\s*)(.*?)(\s*)$/ ) {
752
if ( $left == 1 && $right < 2 ) {
754
# Treat left=1 and right=0 like 1 and 1 - Item5220
756
elsif ( $left > $right ) {
757
$attr->{class} = 'align-right';
758
$attr->{style} = 'text-align: right';
760
elsif ( $left < $right ) {
761
$attr->{class} = 'align-left';
762
$attr->{style} = 'text-align: left';
764
elsif ( $left > 1 ) {
765
$attr->{class} = 'align-center';
766
$attr->{style} = 'text-align: center';
770
if ( $cell =~ s/^\*(.+)\*$/$1/ ) {
774
$cell = ' '.$cell if $cell =~ /^(?:\*|==?|__?)[^\s]/;
775
$cell = $cell.' ' if $cell =~ /[^\s](?:\*|==?|__?)$/;
777
push( @tr, { fn => $fn, attr => $attr, text => $cell } );
783
for ( my $i = $#tr ; $i >= 0 ; $i-- ) {
784
if ( $i && length( $tr[$i]->{text} ) == 0 ) {
789
$tr[$i]->{attr}->{colspan} = $colspan + 1;
792
unshift( @row, $tr[$i] );
797
join( '', map { &{ $_->{fn} }( $_->{attr}, $_->{text} ) } @row ) );