2
package Pod::Simple::BlackBox;
4
# "What's in the box?" "Pain."
6
###########################################################################
8
# This is where all the scary things happen: parsing lines into
9
# paragraphs; and then into directives, verbatims, and then also
10
# turning formatting sequences into treelets.
12
# Are you really sure you want to read this code?
14
#-----------------------------------------------------------------------------
16
# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17
# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18
# to call the proper callbacks on the treelets.
20
# Every node in a treelet is a ['name', {attrhash}, ...children...]
27
*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
30
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
32
sub parse_line { shift->parse_lines(@_) } # alias
34
# - - - Turn back now! Run away! - - -
36
sub parse_lines { # Usage: $parser->parse_lines(@lines)
37
# an undef means end-of-stream
40
my $code_handler = $self->{'code_handler'};
41
my $cut_handler = $self->{'cut_handler'};
42
$self->{'line_count'} ||= 0;
47
print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
50
print "# About to parse lines: ",
51
join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
53
my $paras = ($self->{'paras'} ||= []);
54
# paragraph buffer. Because we need to defer processing of =over
55
# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
58
$self->{'pod_para_count'} ||= 0;
61
foreach my $source_line (@_) {
62
if( $self->{'source_dead'} ) {
63
DEBUG > 4 and print "# Source is dead.\n";
67
unless( defined $source_line ) {
68
DEBUG > 4 and print "# Undef-line seen.\n";
70
push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
71
push @$paras, $paras->[-1], $paras->[-1];
72
# So that it definitely fills the buffer.
73
$self->{'source_dead'} = 1;
74
$self->_ponder_paragraph_buffer;
79
if( $self->{'line_count'}++ ) {
80
($line = $source_line) =~ tr/\n\r//d;
81
# If we don't have two vars, we'll end up with that there
82
# tr/// modding the (potentially read-only) original source line!
85
DEBUG > 2 and print "First line: [$source_line]\n";
87
if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
88
DEBUG and print "UTF-8 BOM seen. Faking a '=encode utf8'.\n";
89
$self->_handle_encoding_line( "=encode utf8" );
92
} elsif( $line =~ s/^\xFE\xFF//s ) {
93
DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
95
$self->{'line_count'},
96
"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
102
# TODO: implement somehow?
104
} elsif( $line =~ s/^\xFF\xFE//s ) {
105
DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
107
$self->{'line_count'},
108
"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
114
# TODO: implement somehow?
117
DEBUG > 2 and print "First line is BOM-less.\n";
118
($line = $source_line) =~ tr/\n\r//d;
123
DEBUG > 5 and print "# Parsing line: [$line]\n";
125
if(!$self->{'in_pod'}) {
126
if($line =~ m/^=([a-zA-Z]+)/s) {
129
$self->{'line_count'},
130
"=cut found outside a pod block. Skipping to next block."
133
## Before there were errata sections in the world, it was
134
## least-pessimal to abort processing the file. But now we can
135
## just barrel on thru (but still not start a pod block).
141
$self->{'in_pod'} = $self->{'start_of_pod_block'}
142
= $self->{'last_was_blank'} = 1;
143
# And fall thru to the pod-mode block further down
146
DEBUG > 5 and print "# It's a code-line.\n";
147
$code_handler->(map $_, $line, $self->{'line_count'}, $self)
149
# Note: this may cause code to be processed out of order relative
150
# to pods, but in order relative to cuts.
152
# Note also that we haven't yet applied the transcoding to $line
153
# by time we call $code_handler!
155
if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
156
# That RE is from perlsyn, section "Plain Old Comments (Not!)",
157
#$fname = $2 if defined $2;
158
#DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
159
DEBUG > 1 and print "# Setting nextline to $1\n";
160
$self->{'line_count'} = $1 - 1;
167
# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
168
# Else we're in pod mode:
170
# Apply any necessary transcoding:
171
$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
173
# HERE WE CATCH =encoding EARLY!
174
if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
175
$line = $self->_handle_encoding_line( $line );
178
if($line =~ m/^=cut/s) {
179
# here ends the pod block, and therefore the previous pod para
180
DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
181
$self->{'in_pod'} = 0;
182
# ++$self->{'pod_para_count'};
183
$self->_ponder_paragraph_buffer();
184
# by now it's safe to consider the previous paragraph as done.
185
$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
188
# TODO: add to docs: Note: this may cause cuts to be processed out
189
# of order relative to pods, but in order relative to code.
191
} elsif($line =~ m/^\s*$/s) { # it's a blank line
192
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
193
DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
194
push @{$paras->[-1]}, $line;
195
} # otherwise it's not interesting
197
if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
198
DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
201
$self->{'last_was_blank'} = 1;
203
} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
205
if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
206
# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
207
my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
208
# Note that in "=head1 foo", the WS is lost.
209
# Example: ['=head1', {'start_line' => 123}, ' foo']
211
++$self->{'pod_para_count'};
213
$self->_ponder_paragraph_buffer();
214
# by now it's safe to consider the previous paragraph as done.
216
push @$paras, $new; # the new incipient paragraph
217
DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
219
} elsif($line =~ m/^\s/s) {
221
if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
222
DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
223
push @{$paras->[-1]}, $line;
225
++$self->{'pod_para_count'};
226
$self->_ponder_paragraph_buffer();
227
# by now it's safe to consider the previous paragraph as done.
228
DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
229
push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
232
++$self->{'pod_para_count'};
233
$self->_ponder_paragraph_buffer();
234
# by now it's safe to consider the previous paragraph as done.
235
push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
236
DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
238
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
241
# It's a non-blank line /continuing/ the current para
243
DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
244
push @{$paras->[-1]}, $line;
247
die "Continuing a paragraph but \@\$paras is empty?";
249
$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
252
} # ends the big while loop
254
DEBUG > 1 and print(pretty(@$paras), "\n");
258
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
260
sub _handle_encoding_line {
261
my($self, $line) = @_;
263
# The point of this routine is to set $self->{'_transcoder'} as indicated.
265
return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
266
DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
270
push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
274
# Cf. perldoc Encode and perldoc Encode::Supported
276
require Pod::Simple::Transcode;
278
if( $self->{'encoding'} ) {
279
my $norm_current = $self->{'encoding'};
281
foreach my $that ($norm_current, $norm_e) {
285
if($norm_current eq $norm_e) {
286
DEBUG > 1 and print "The '=encoding $orig' line is ",
287
"redundant. ($norm_current eq $norm_e). Ignoring.\n";
289
# But that doesn't necessarily mean that the earlier one went okay
291
$enc_error = "Encoding is already set to " . $self->{'encoding'};
292
DEBUG > 1 and print $enc_error;
295
# OK, let's turn on the encoding
297
DEBUG > 1 and print " Setting encoding to $e\n";
298
$self->{'encoding'} = $e;
303
DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
305
} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
307
die($enc_error = "WHAT? _transcoder is already set?!")
308
if $self->{'_transcoder'}; # should never happen
309
require Pod::Simple::Transcode;
310
$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
312
my @x = ('', "abc", "123");
313
$self->{'_transcoder'}->(@x);
315
$@ && die( $enc_error =
316
"Really unexpected error setting up encoding $e: $@\nAborting"
320
my @supported = Pod::Simple::Transcode::->all_encodings;
322
# Note unsupported, and complain
323
DEBUG and print " Encoding [$e] is unsupported.",
324
"\nSupporteds: @supported\n";
327
# Look for a near match:
331
foreach my $enc (@supported) {
334
next unless $n eq $norm;
335
$suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
338
my $encmodver = Pod::Simple::Transcode::->encmodver;
339
$enc_error = join '' =>
340
"This document probably does not appear as it should, because its ",
341
"\"=encoding $e\" line calls for an unsupported encoding.",
342
$suggestion, " [$encmodver\'s supported encodings are: @supported]"
345
$self->scream( $self->{'line_count'}, $enc_error );
347
push @{ $self->{'encoding_command_statuses'} }, $enc_error;
349
return '=encoding ALREADYDONE';
352
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
354
sub _handle_encoding_second_level {
355
# By time this is called, the encoding (if well formed) will already
356
# have been acted one.
357
my($self, $para) = @_;
359
my $content = join ' ', splice @x, 2;
360
$content =~ s/^\s+//s;
361
$content =~ s/\s+$//s;
363
DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
365
if($content eq 'ALREADYDONE') {
366
# It's already been handled. Check for errors.
367
if(! $self->{'encoding_command_statuses'} ) {
368
DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
369
} elsif( $self->{'encoding_command_statuses'}[-1] ) {
370
$self->whine( $para->[1]{'start_line'},
371
sprintf "Couldn't do %s: %s",
372
$self->{'encoding_command_reqs' }[-1],
373
$self->{'encoding_command_statuses'}[-1],
376
DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
380
# Otherwise it's a syntax error
381
$self->whine( $para->[1]{'start_line'},
382
"Invalid =encoding syntax: $content"
389
#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
392
my $m = -321; # magic line number
396
# Return 0 or more fake-o paragraphs explaining the accumulated
397
# errors on this document.
399
return() unless $self->{'errata'} and keys %{$self->{'errata'}};
403
foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
405
['=item', {'start_line' => $m}, "Around line $line:"],
406
map( ['~Para', {'start_line' => $m, '~cooked' => 1},
407
#['~Top', {'start_line' => $m},
411
@{$self->{'errata'}{$line}}
416
# TODO: report of unknown entities? unrenderable characters?
419
['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
420
['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
423
'The above document had some coding errors, which are explained below:'
426
['=over', {'start_line' => $m, 'errata' => 1}, ''],
430
['=back', {'start_line' => $m, 'errata' => 1}, ''],
433
DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
440
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
442
##############################################################################
444
## stop reading now stop reading now stop reading now stop reading now stop
446
## HERE IT BECOMES REALLY SCARY
448
## stop reading now stop reading now stop reading now stop reading now stop
450
##############################################################################
452
sub _ponder_paragraph_buffer {
454
# Para-token types as found in the buffer.
455
# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
456
# =over, =back, =item
457
# and the null =pod (to be complained about if over one line)
459
# "~data" paragraphs are something we generate at this level, depending on
460
# a currently open =over region
462
# Events fired: Begin and end for:
463
# directivename (like head1 .. head4), item, extend,
464
# for (from =begin...=end, =for),
465
# over-bullet, over-number, over-text, over-block,
466
# item-bullet, item-number, item-text,
468
# Data, Para, Verbatim
469
# B, C, longdirname (TODO -- wha?), etc. for all directives
474
return unless @{$paras = $self->{'paras'}};
475
my $curr_open = ($self->{'curr_open'} ||= []);
479
DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
481
# We have something in our buffer. So apparently the document has started.
482
unless($self->{'doc_has_started'}) {
483
$self->{'doc_has_started'} = 1;
485
my $starting_contentless;
486
$starting_contentless =
489
and @$paras and ! grep $_->[0] ne '~end', @$paras
490
# i.e., if the paras is all ~ends
493
DEBUG and print "# Starting ",
494
$starting_contentless ? 'contentless' : 'contentful',
498
$self->_handle_element_start(
499
($scratch = 'Document'),
501
'start_line' => $paras->[0][1]{'start_line'},
502
$starting_contentless ? ( 'contentless' => 1 ) : (),
507
my($para, $para_type);
509
last if @$paras == 1 and
510
( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
511
or $paras->[0][0] eq '=item' )
513
# Those're the three kinds of paragraphs that require lookahead.
514
# Actually, an "=item Foo" inside an <over type=text> region
515
# and any =item inside an <over type=block> region (rare)
516
# don't require any lookahead, but all others (bullets
519
# TODO: winge about many kinds of directives in non-resolving =for regions?
520
# TODO: many? like what? =head1 etc?
522
$para = shift @$paras;
523
$para_type = $para->[0];
525
DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
526
$self->_dump_curr_open(), ")\n";
528
if($para_type eq '=for') {
529
next if $self->_ponder_for($para,$curr_open,$paras);
531
} elsif($para_type eq '=begin') {
532
next if $self->_ponder_begin($para,$curr_open,$paras);
534
} elsif($para_type eq '=end') {
535
next if $self->_ponder_end($para,$curr_open,$paras);
537
} elsif($para_type eq '~end') { # The virtual end-document signal
538
next if $self->_ponder_doc_end($para,$curr_open,$paras);
542
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
543
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
544
if(grep $_->[1]{'~ignore'}, @$curr_open) {
546
print "Skipping $para_type paragraph because in ignore mode.\n";
549
#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
550
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
552
if($para_type eq '=pod') {
553
$self->_ponder_pod($para,$curr_open,$paras);
555
} elsif($para_type eq '=over') {
556
next if $self->_ponder_over($para,$curr_open,$paras);
558
} elsif($para_type eq '=back') {
559
next if $self->_ponder_back($para,$curr_open,$paras);
563
# All non-magical codes!!!
565
# Here we start using $para_type for our own twisted purposes, to
566
# mean how it should get treated, not as what the element name
569
DEBUG > 1 and print "Pondering non-magical $para_type\n";
573
# Enforce some =headN discipline
574
if($para_type =~ m/^=head\d$/s
575
and ! $self->{'accept_heads_anywhere'}
577
and $curr_open->[-1][0] eq '=over'
579
DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
581
$para->[1]{'start_line'},
582
"You forgot a '=back' before '$para_type'"
584
unshift @$paras, ['=back', {}, ''], $para; # close the =over
589
if($para_type eq '=item') {
592
unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
594
$para->[1]{'start_line'},
595
"'=item' outside of any '=over'"
598
['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
605
my $over_type = $over->[1]{'~type'};
609
die "Typeless over in stack, starting at line "
610
. $over->[1]{'start_line'};
612
} elsif($over_type eq 'block') {
613
unless($curr_open->[-1][1]{'~bitched_about'}) {
614
$curr_open->[-1][1]{'~bitched_about'} = 1;
616
$curr_open->[-1][1]{'start_line'},
617
"You can't have =items (as at line "
618
. $para->[1]{'start_line'}
619
. ") unless the first thing after the =over is an =item"
622
# Just turn it into a paragraph and reconsider it
623
$para->[0] = '~Para';
624
unshift @$paras, $para;
627
} elsif($over_type eq 'text') {
628
my $item_type = $self->_get_item_type($para);
629
# That kills the content of the item if it's a number or bullet.
630
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
632
if($item_type eq 'text') {
633
# Nothing special needs doing for 'text'
634
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
635
die "Unknown item type $item_type"
636
unless $item_type eq 'number' or $item_type eq 'bullet';
637
# Undo our clobbering:
638
push @$para, $para->[1]{'~orig_content'};
639
delete $para->[1]{'number'};
640
# Only a PROPER item-number element is allowed
641
# to have a number attribute.
643
die "Unhandled item type $item_type"; # should never happen
646
# =item-text thingies don't need any assimilation, it seems.
648
} elsif($over_type eq 'number') {
649
my $item_type = $self->_get_item_type($para);
650
# That kills the content of the item if it's a number or bullet.
651
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
653
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
655
if($item_type eq 'bullet') {
656
# Hm, it's not numeric. Correct for this.
657
$para->[1]{'number'} = $expected_value;
659
$para->[1]{'start_line'},
660
"Expected '=item $expected_value'"
662
push @$para, $para->[1]{'~orig_content'};
663
# restore the bullet, blocking the assimilation of next para
665
} elsif($item_type eq 'text') {
666
# Hm, it's not numeric. Correct for this.
667
$para->[1]{'number'} = $expected_value;
669
$para->[1]{'start_line'},
670
"Expected '=item $expected_value'"
672
# Text content will still be there and will block next ~Para
674
} elsif($item_type ne 'number') {
675
die "Unknown item type $item_type"; # should never happen
677
} elsif($expected_value == $para->[1]{'number'}) {
678
DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
681
DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
682
" instead of the expected value of $expected_value\n";
684
$para->[1]{'start_line'},
685
"You have '=item " . $para->[1]{'number'} .
686
"' instead of the expected '=item $expected_value'"
688
$para->[1]{'number'} = $expected_value; # correcting!!
692
# For the cases where we /didn't/ push to @$para
693
if($paras->[0][0] eq '~Para') {
694
DEBUG and print "Assimilating following ~Para content into $over_type item\n";
695
push @$para, splice @{shift @$paras},2;
697
DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
698
push @$para, ''; # Just so it's not contentless
703
} elsif($over_type eq 'bullet') {
704
my $item_type = $self->_get_item_type($para);
705
# That kills the content of the item if it's a number or bullet.
706
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
708
if($item_type eq 'bullet') {
711
if( $para->[1]{'~_freaky_para_hack'} ) {
712
DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
713
push @$para, delete $para->[1]{'~_freaky_para_hack'};
716
} elsif($item_type eq 'number') {
718
$para->[1]{'start_line'},
721
push @$para, $para->[1]{'~orig_content'};
722
# and block assimilation of the next paragraph
723
delete $para->[1]{'number'};
724
# Only a PROPER item-number element is allowed
725
# to have a number attribute.
726
} elsif($item_type eq 'text') {
728
$para->[1]{'start_line'},
731
# But doesn't need processing. But it'll block assimilation
734
die "Unhandled item type $item_type"; # should never happen
738
# For the cases where we /didn't/ push to @$para
739
if($paras->[0][0] eq '~Para') {
740
DEBUG and print "Assimilating following ~Para content into $over_type item\n";
741
push @$para, splice @{shift @$paras},2;
743
DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
744
push @$para, ''; # Just so it's not contentless
749
die "Unhandled =over type \"$over_type\"?";
753
$para_type = 'Plain';
754
$para->[0] .= '-' . $over_type;
755
# Whew. Now fall thru and process it.
758
} elsif($para_type eq '=extend') {
759
# Well, might as well implement it here.
760
$self->_ponder_extend($para);
762
} elsif($para_type eq '=encoding') {
763
# Not actually acted on here, but we catch errors here.
764
$self->_handle_encoding_second_level($para);
767
} elsif($para_type eq '~Verbatim') {
768
$para->[0] = 'Verbatim';
769
$para_type = '?Verbatim';
770
} elsif($para_type eq '~Para') {
772
$para_type = '?Plain';
773
} elsif($para_type eq 'Data') {
775
$para_type = '?Data';
776
} elsif( $para_type =~ s/^=//s
777
and defined( $para_type = $self->{'accept_directives'}{$para_type} )
779
DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
781
# An unknown directive!
782
DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
783
$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
786
$para->[1]{'start_line'},
787
"Unknown directive: $para->[0]"
790
# And maybe treat it as text instead of just letting it go?
794
if($para_type =~ s/^\?//s) {
795
if(! @$curr_open) { # usual case
796
DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
798
my @fors = grep $_->[0] eq '=for', @$curr_open;
799
DEBUG > 1 and print "Containing fors: ",
800
join(',', map $_->[1]{'target'}, @fors), "\n";
803
DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
805
#} elsif(grep $_->[1]{'~resolve'}, @fors) {
806
#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
807
} elsif( $fors[-1][1]{'~resolve'} ) {
808
# Look to the immediately containing for
810
if($para_type eq 'Data') {
811
DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
813
$para_type = 'Plain';
815
DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
818
DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
819
$para->[0] = $para_type = 'Data';
824
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
825
if($para_type eq 'Plain') {
826
$self->_ponder_Plain($para);
827
} elsif($para_type eq 'Verbatim') {
828
$self->_ponder_Verbatim($para);
829
} elsif($para_type eq 'Data') {
830
$self->_ponder_Data($para);
832
die "\$para type is $para_type -- how did that happen?";
836
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
837
$para->[0] =~ s/^[~=]//s;
839
DEBUG and print "\n", pretty($para), "\n";
841
# traverse the treelet (which might well be just one string scalar)
842
$self->{'content_seen'} ||= 1;
843
$self->_traverse_treelet_bit(@$para);
850
###########################################################################
851
# The sub-ponderers...
856
my ($self,$para,$curr_open,$paras) = @_;
858
# Fake it out as a begin/end
861
if(grep $_->[1]{'~ignore'}, @$curr_open) {
862
DEBUG > 1 and print "Ignoring ignorable =for\n";
866
for(my $i = 2; $i < @$para; ++$i) {
867
if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
872
unless(defined $target) {
874
$para->[1]{'start_line'},
875
"=for without a target?"
880
print "Faking out a =for $target as a =begin $target / =end $target\n";
886
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
891
{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
900
my ($self,$para,$curr_open,$paras) = @_;
901
my $content = join ' ', splice @$para, 2;
902
$content =~ s/^\s+//s;
903
$content =~ s/\s+$//s;
904
unless(length($content)) {
906
$para->[1]{'start_line'},
907
"=begin without a target?"
909
DEBUG and print "Ignoring targetless =begin\n";
913
unless($content =~ m/^\S+$/s) { # i.e., unless it's one word
915
$para->[1]{'start_line'},
916
"'=begin' only takes one parameter, not several as in '=begin $content'"
918
DEBUG and print "Ignoring unintelligible =begin $content\n";
923
$para->[1]{'target'} = $content; # without any ':'
925
$content =~ s/^:!/!:/s;
926
my $neg; # whether this is a negation-match
927
$neg = 1 if $content =~ s/^!//s;
928
my $to_resolve; # whether to process formatting codes
929
$to_resolve = 1 if $content =~ s/^://s;
931
my $dont_ignore; # whether this target matches us
933
foreach my $target_name (
934
split(',', $content, -1),
938
print " Considering whether =begin $content matches $target_name\n";
939
next unless $self->{'accept_targets'}{$target_name};
942
print " It DOES match the acceptable target $target_name!\n";
944
if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
946
$para->[1]{'target_matching'} = $target_name;
947
last; # stop looking at other target names
953
delete $para->[1]{'target_matching'};
954
DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
957
$para->[1]{'target_matching'} = '!';
958
DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
962
$para->[0] = '=for'; # Just what we happen to call these, internally
963
$para->[1]{'~really'} ||= '=begin';
964
$para->[1]{'~ignore'} = (! $dont_ignore) || 0;
965
$para->[1]{'~resolve'} = $to_resolve || 0;
967
DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
968
"ignore contents of this region\n";
969
DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
970
($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
971
DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
973
push @$curr_open, $para;
974
if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
975
DEBUG > 1 and print "Ignoring ignorable =begin\n";
977
$self->{'content_seen'} ||= 1;
978
$self->_handle_element_start((my $scratch='for'), $para->[1]);
985
my ($self,$para,$curr_open,$paras) = @_;
986
my $content = join ' ', splice @$para, 2;
987
$content =~ s/^\s+//s;
988
$content =~ s/\s+$//s;
989
DEBUG and print "Ogling '=end $content' directive\n";
991
unless(length($content)) {
993
$para->[1]{'start_line'},
994
"'=end' without a target?" . (
995
( @$curr_open and $curr_open->[-1][0] eq '=for' )
996
? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1000
DEBUG and print "Ignoring targetless =end\n";
1004
unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1006
$para->[1]{'start_line'},
1007
"'=end $content' is invalid. (Stack: "
1008
. $self->_dump_curr_open() . ')'
1010
DEBUG and print "Ignoring mistargetted =end $content\n";
1014
unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1016
$para->[1]{'start_line'},
1017
"=end $content without matching =begin. (Stack: "
1018
. $self->_dump_curr_open() . ')'
1020
DEBUG and print "Ignoring mistargetted =end $content\n";
1024
unless($content eq $curr_open->[-1][1]{'target'}) {
1026
$para->[1]{'start_line'},
1027
"=end $content doesn't match =begin "
1028
. $curr_open->[-1][1]{'target'}
1030
. $self->_dump_curr_open() . ')'
1032
DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1036
# Else it's okay to close...
1037
if(grep $_->[1]{'~ignore'}, @$curr_open) {
1038
DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1039
# And that may be because of this to-be-closed =for region, or some
1040
# other one, but it doesn't matter.
1042
$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1045
$self->{'content_seen'} ||= 1;
1046
$self->_handle_element_end( my $scratch = 'for' );
1048
DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1054
sub _ponder_doc_end {
1055
my ($self,$para,$curr_open,$paras) = @_;
1056
if(@$curr_open) { # Deal with things left open
1057
DEBUG and print "Stack is nonempty at end-document: (",
1058
$self->_dump_curr_open(), ")\n";
1060
DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1061
unshift @$paras, $self->_closers_for_all_curr_open;
1062
# Make sure there is exactly one ~end in the parastack, at the end:
1063
@$paras = grep $_->[0] ne '~end', @$paras;
1064
push @$paras, $para, $para;
1065
# We need two -- once for the next cycle where we
1066
# generate errata, and then another to be at the end
1067
# when that loop back around to process the errata.
1071
DEBUG and print "Okay, stack is empty now.\n";
1074
# Try generating errata section, if applicable
1075
unless($self->{'~tried_gen_errata'}) {
1076
$self->{'~tried_gen_errata'} = 1;
1077
my @extras = $self->_gen_errata();
1079
unshift @$paras, @extras;
1080
DEBUG and print "Generated errata... relooping...\n";
1081
return 1; # I.e., loop around again to process these fake-o paragraphs
1085
splice @$paras; # Well, that's that for this paragraph buffer.
1086
DEBUG and print "Throwing end-document event.\n";
1088
$self->_handle_element_end( my $scratch = 'Document' );
1089
return 1; # Hasta la byebye
1093
my ($self,$para,$curr_open,$paras) = @_;
1095
$para->[1]{'start_line'},
1096
"=pod directives shouldn't be over one line long! Ignoring all "
1097
. (@$para - 2) . " lines of content"
1099
# Content is always ignored.
1104
my ($self,$para,$curr_open,$paras) = @_;
1105
return 1 unless @$paras;
1108
if($paras->[0][0] eq '=item') { # most common case
1109
$list_type = $self->_get_initial_item_type($paras->[0]);
1111
} elsif($paras->[0][0] eq '=back') {
1112
# Ignore empty lists. TODO: make this an option?
1116
} elsif($paras->[0][0] eq '~end') {
1118
$para->[1]{'start_line'},
1119
"=over is the last thing in the document?!"
1121
return 1; # But feh, ignore it.
1123
$list_type = 'block';
1125
$para->[1]{'~type'} = $list_type;
1126
push @$curr_open, $para;
1127
# yes, we reuse the paragraph as a stack item
1129
my $content = join ' ', splice @$para, 2;
1131
if($content =~ m/^\s*$/s) {
1132
$para->[1]{'indent'} = 4;
1133
} elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1135
$para->[1]{'indent'} = $1;
1138
$para->[1]{'start_line'},
1139
"Can't have a 0 in =over $content"
1141
$para->[1]{'indent'} = 4;
1145
$para->[1]{'start_line'},
1146
"=over should be: '=over' or '=over positive_number'"
1148
$para->[1]{'indent'} = 4;
1150
DEBUG > 1 and print "=over found of type $list_type\n";
1152
$self->{'content_seen'} ||= 1;
1153
$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1159
my ($self,$para,$curr_open,$paras) = @_;
1160
# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1162
my $content = join ' ', splice @$para, 2;
1163
if($content =~ m/\S/) {
1165
$para->[1]{'start_line'},
1166
"=back doesn't take any parameters, but you said =back $content"
1170
if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1171
DEBUG > 1 and print "=back happily closes matching =over\n";
1172
# Expected case: we're closing the most recently opened thing
1173
#my $over = pop @$curr_open;
1174
$self->{'content_seen'} ||= 1;
1175
$self->_handle_element_end( my $scratch =
1176
'over-' . ( (pop @$curr_open)->[1]{'~type'} )
1179
DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1180
join(', ', map $_->[0], @$curr_open), ").\n";
1182
$para->[1]{'start_line'},
1183
'=back without =over'
1185
return 1; # and ignore it
1190
my ($self,$para,$curr_open,$paras) = @_;
1192
unless(@$curr_open and ($over = $curr_open->[-1])->[0] eq '=over') {
1194
$para->[1]{'start_line'},
1195
"'=item' outside of any '=over'"
1198
['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1205
my $over_type = $over->[1]{'~type'};
1209
die "Typeless over in stack, starting at line "
1210
. $over->[1]{'start_line'};
1212
} elsif($over_type eq 'block') {
1213
unless($curr_open->[-1][1]{'~bitched_about'}) {
1214
$curr_open->[-1][1]{'~bitched_about'} = 1;
1216
$curr_open->[-1][1]{'start_line'},
1217
"You can't have =items (as at line "
1218
. $para->[1]{'start_line'}
1219
. ") unless the first thing after the =over is an =item"
1222
# Just turn it into a paragraph and reconsider it
1223
$para->[0] = '~Para';
1224
unshift @$paras, $para;
1227
} elsif($over_type eq 'text') {
1228
my $item_type = $self->_get_item_type($para);
1229
# That kills the content of the item if it's a number or bullet.
1230
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1232
if($item_type eq 'text') {
1233
# Nothing special needs doing for 'text'
1234
} elsif($item_type eq 'number' or $item_type eq 'bullet') {
1235
die "Unknown item type $item_type"
1236
unless $item_type eq 'number' or $item_type eq 'bullet';
1237
# Undo our clobbering:
1238
push @$para, $para->[1]{'~orig_content'};
1239
delete $para->[1]{'number'};
1240
# Only a PROPER item-number element is allowed
1241
# to have a number attribute.
1243
die "Unhandled item type $item_type"; # should never happen
1246
# =item-text thingies don't need any assimilation, it seems.
1248
} elsif($over_type eq 'number') {
1249
my $item_type = $self->_get_item_type($para);
1250
# That kills the content of the item if it's a number or bullet.
1251
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1253
my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1255
if($item_type eq 'bullet') {
1256
# Hm, it's not numeric. Correct for this.
1257
$para->[1]{'number'} = $expected_value;
1259
$para->[1]{'start_line'},
1260
"Expected '=item $expected_value'"
1262
push @$para, $para->[1]{'~orig_content'};
1263
# restore the bullet, blocking the assimilation of next para
1265
} elsif($item_type eq 'text') {
1266
# Hm, it's not numeric. Correct for this.
1267
$para->[1]{'number'} = $expected_value;
1269
$para->[1]{'start_line'},
1270
"Expected '=item $expected_value'"
1272
# Text content will still be there and will block next ~Para
1274
} elsif($item_type ne 'number') {
1275
die "Unknown item type $item_type"; # should never happen
1277
} elsif($expected_value == $para->[1]{'number'}) {
1278
DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1281
DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1282
" instead of the expected value of $expected_value\n";
1284
$para->[1]{'start_line'},
1285
"You have '=item " . $para->[1]{'number'} .
1286
"' instead of the expected '=item $expected_value'"
1288
$para->[1]{'number'} = $expected_value; # correcting!!
1292
# For the cases where we /didn't/ push to @$para
1293
if($paras->[0][0] eq '~Para') {
1294
DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1295
push @$para, splice @{shift @$paras},2;
1297
DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1298
push @$para, ''; # Just so it's not contentless
1303
} elsif($over_type eq 'bullet') {
1304
my $item_type = $self->_get_item_type($para);
1305
# That kills the content of the item if it's a number or bullet.
1306
DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1308
if($item_type eq 'bullet') {
1311
if( $para->[1]{'~_freaky_para_hack'} ) {
1312
DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1313
push @$para, delete $para->[1]{'~_freaky_para_hack'};
1316
} elsif($item_type eq 'number') {
1318
$para->[1]{'start_line'},
1319
"Expected '=item *'"
1321
push @$para, $para->[1]{'~orig_content'};
1322
# and block assimilation of the next paragraph
1323
delete $para->[1]{'number'};
1324
# Only a PROPER item-number element is allowed
1325
# to have a number attribute.
1326
} elsif($item_type eq 'text') {
1328
$para->[1]{'start_line'},
1329
"Expected '=item *'"
1331
# But doesn't need processing. But it'll block assimilation
1334
die "Unhandled item type $item_type"; # should never happen
1338
# For the cases where we /didn't/ push to @$para
1339
if($paras->[0][0] eq '~Para') {
1340
DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1341
push @$para, splice @{shift @$paras},2;
1343
DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1344
push @$para, ''; # Just so it's not contentless
1349
die "Unhandled =over type \"$over_type\"?";
1352
$para->[0] .= '-' . $over_type;
1358
my ($self,$para) = @_;
1359
DEBUG and print " giving plain treatment...\n";
1360
unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1361
or $para->[1]{'~cooked'}
1364
@{$self->_make_treelet(
1365
join("\n", splice(@$para, 2)),
1366
$para->[1]{'start_line'}
1369
# Empty paragraphs don't need a treelet for any reason I can see.
1370
# And precooked paragraphs already have a treelet.
1374
sub _ponder_Verbatim {
1375
my ($self,$para) = @_;
1376
DEBUG and print " giving verbatim treatment...\n";
1378
$para->[1]{'xml:space'} = 'preserve';
1379
for(my $i = 2; $i < @$para; $i++) {
1380
foreach my $line ($para->[$i]) { # just for aliasing
1382
# Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1383
# tabs are at every EIGHTH column. For portability, it has to be
1384
# one setting everywhere, and 8th wins.
1385
s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1388
# TODO: whinge about (or otherwise treat) unindented or overlong lines
1393
# Now the VerbatimFormatted hoodoo...
1394
if( $self->{'accept_codes'} and
1395
$self->{'accept_codes'}{'VerbatimFormatted'}
1397
while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1398
# Kill any number of terminal newlines
1399
$self->_verbatim_format($para);
1400
} elsif ($self->{'codes_in_verbatim'}) {
1402
@{$self->_make_treelet(
1403
join("\n", splice(@$para, 2)),
1404
$para->[1]{'start_line'}, $para->[1]{'xml:space'}
1406
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1408
push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1409
$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1415
my ($self,$para) = @_;
1416
DEBUG and print " giving data treatment...\n";
1417
$para->[1]{'xml:space'} = 'preserve';
1418
push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1425
###########################################################################
1427
sub _traverse_treelet_bit { # for use only by the routine above
1428
my($self, $name) = splice @_,0,2;
1431
$self->_handle_element_start(($scratch=$name), shift @_);
1433
foreach my $x (@_) {
1435
&_traverse_treelet_bit($self, @$x);
1437
$self->_handle_text($x);
1441
$self->_handle_element_end($scratch=$name);
1445
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1447
sub _closers_for_all_curr_open {
1450
foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1451
my @copy = @$still_open;
1452
$copy[1] = {%{ $copy[1] }};
1453
#$copy[1]{'start_line'} = -1;
1454
if($copy[0] eq '=for') {
1456
} elsif($copy[0] eq '=over') {
1459
die "I don't know how to auto-close an open $copy[0] region";
1462
unless( @copy > 2 ) {
1463
push @copy, $copy[1]{'target'};
1464
$copy[-1] = '' unless defined $copy[-1];
1465
# since =over's don't have targets
1468
DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1469
unshift @closers, \@copy;
1474
#--------------------------------------------------------------------------
1476
sub _verbatim_format {
1481
for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1482
DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1484
# Unlike with simple Verbatim blocks, we don't end up just doing
1485
# a join("\n", ...) on the contents, so we have to append a
1486
# newline to ever line, and then nix the last one later.
1491
for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1492
print "_verbatim_format $i: $p->[$i]";
1497
for(my $i = $#$p; $i > 2; $i--) {
1498
# work backwards over the lines, except the first (#2)
1500
#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1501
# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1502
# look at a formatty line preceding a nonformatty one
1503
DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1504
if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1505
DEBUG > 5 and print " It's a formatty line. ",
1506
"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1508
if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1509
DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1512
DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1515
DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1519
# A formatty line has to have #: in the first two columns, and uses
1520
# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1522
# What do you want? i like pie. [or whatever]
1523
# #:^^^^^^^^^^^^^^^^^ /////////////
1526
DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1528
$formatting = ' ' . $1;
1529
$formatting =~ s/\s+$//s; # nix trailing whitespace
1530
unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1531
splice @$p,$i,1; # remove this line
1532
$i--; # don't consider next line
1536
if( length($formatting) >= length($p->[$i-1]) ) {
1537
$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1539
$formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1541
# Make $formatting and the previous line be exactly the same length,
1542
# with $formatting having a " " as the last character.
1544
DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1548
while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1549
#print "Format matches $1\n";
1552
#print "SKIPPING <$2>\n";
1554
substr($p->[$i-1], pos($formatting)-length($1), length($1));
1556
#print "SNARING $+\n";
1561
$5 ? 'VerbatimBI' : die("Should never get called")
1563
substr($p->[$i-1], pos($formatting)-length($1), length($1))
1565
#print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1569
splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1570
DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1572
DEBUG > 6 and print "New version of the above line is these tokens (",
1573
scalar(@new_line), "):",
1574
map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1575
$i--; # So the next line we scrutinize is the line before the one
1576
# that we just went and formatted
1579
$p->[0] = 'VerbatimFormatted';
1581
# Collapse adjacent text nodes, just for kicks.
1582
for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1583
if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1584
DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1585
$p->[$i] .= splice @$p, $i+1, 1; # merge
1590
# Now look for the last text token, and remove the terminal newline
1591
for( my $i = $#$p; $i >= 2; $i-- ) {
1592
# work backwards over the tokens, even the first
1593
if( !ref($p->[$i]) ) {
1594
if($p->[$i] =~ s/\n$//s) {
1595
DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1598
"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1600
last; # we only want the next one
1608
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1611
sub _treelet_from_formatting_codes {
1612
# Given a paragraph, returns a treelet. Full of scary tokenizing code.
1613
# Like [ '~Top', {'start_line' => $start_line},
1615
# [ 'B', {}, "pie" ],
1619
my($self, $para, $start_line, $preserve_space) = @_;
1621
my $treelet = ['~Top', {'start_line' => $start_line},];
1623
unless ($preserve_space || $self->{'preserve_whitespace'}) {
1624
$para =~ s/\. /\.\xA0 /g if $self->{'fullstop_space_harden'};
1626
$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1631
# Only apparent problem the above code is that N<< >> turns into
1632
# N<< >>. But then, word wrapping does that too! So don't do that!
1635
my @lineage = ($treelet);
1637
DEBUG > 4 and print "Paragraph:\n$para\n\n";
1639
# Here begins our frightening tokenizer RE. The following regex matches
1640
# text in four main parts:
1642
# * Start-codes. The first alternative matches C< or C<<, the latter
1643
# followed by some whitespace. $1 will hold the entire start code
1644
# (including any space following a multiple-angle-bracket delimiter),
1645
# and $2 will hold only the additional brackets past the first in a
1646
# multiple-bracket delimiter. length($2) + 1 will be the number of
1647
# closing brackets we have to find.
1649
# * Closing brackets. Match some amount of whitespace followed by
1650
# multiple close brackets. The logic to see if this closes anything
1651
# is down below. Note that in order to parse C<< >> correctly, we
1652
# have to use look-behind (?<=\s\s), since the match of the starting
1653
# code will have consumed the whitespace.
1655
# * A single closing bracket, to close a simple code like C<>.
1657
# * Something that isn't a start or end code. We have to be careful
1658
# about accepting whitespace, since perlpodspec says that any whitespace
1659
# before a multiple-bracket closing delimiter should be ignored.
1664
# Match starting codes, including the whitespace following a
1665
# multiple-delimiter start code. $1 gets the whole start code and
1666
# $2 gets all but one of the <s in the multiple-bracket case.
1667
([A-Z]<(?:(<+)\s+)?)
1669
# Match multiple-bracket end codes. $3 gets the whitespace that
1670
# should be discarded before an end bracket but kept in other cases
1671
# and $4 gets the end brackets themselves.
1672
(\s+|(?<=\s\s))(>{2,})
1674
(\s?>) # $5: simple end-codes
1676
( # $6: stuff containing no start-codes or end-codes
1692
DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1695
DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1696
push @stack, length($2) + 1;
1697
# length of the necessary complex end-code string
1699
DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1700
push @stack, 0; # signal that we're looking for simple
1702
push @lineage, [ substr($1,0,1), {}, ]; # new node object
1703
push @{ $lineage[-2] }, $lineage[-1];
1705
} elsif(defined $4) {
1706
DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1707
# This is where it gets messy...
1709
# We saw " >>>>" but needed nothing. This is ALL just stuff then.
1710
DEBUG > 4 and print " But it's really just stuff.\n";
1711
push @{ $lineage[-1] }, $3, $4;
1713
} elsif(!$stack[-1]) {
1714
# We saw " >>>>" but needed only ">". Back pos up.
1715
DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1716
push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1717
pos($para) = pos($para) - length($4) + 1;
1718
} elsif($stack[-1] == length($4)) {
1719
# We found " >>>>", and it was exactly what we needed. Commonest case.
1720
DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1721
} elsif($stack[-1] < length($4)) {
1722
# We saw " >>>>" but needed only " >>". Back pos up.
1723
DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1724
pos($para) = pos($para) - length($4) + $stack[-1];
1726
# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1727
DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1728
push @{ $lineage[-1] }, $3, $4;
1731
#print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1733
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1734
# Keep the element from being childless
1739
} elsif(defined $5) {
1740
DEBUG > 3 and print "Found apparent simple end-text code \"$4\"\n";
1742
if(@stack and ! $stack[-1]) {
1743
# We're indeed expecting a simple end-code
1744
DEBUG > 4 and print " It's indeed an end-code.\n";
1746
if(length($5) == 2) { # There was a space there: " >"
1747
push @{ $lineage[-1] }, ' ';
1748
} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1749
push @{ $lineage[-1] }, ''; # keep it from being really childless
1755
DEBUG > 4 and print " It's just stuff.\n";
1756
push @{ $lineage[-1] }, $5;
1759
} elsif(defined $6) {
1760
DEBUG > 3 and print "Found stuff \"$6\"\n";
1761
push @{ $lineage[-1] }, $6;
1764
# should never ever ever ever happen
1765
DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1766
die "SPORK 512512!";
1770
if(@stack) { # Uhoh, some sequences weren't closed.
1773
push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1776
my $code = (pop @lineage)->[0];
1777
my $ender_length = pop @stack;
1780
$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1782
$x = $code . "<$x>";
1785
DEBUG > 1 and print "Unterminated $x sequence\n";
1786
$self->whine($start_line,
1787
"Unterminated $x sequence",
1794
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1796
sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1797
return stringify_lol($_[1]);
1800
sub stringify_lol { # function: stringify_lol($lol)
1801
my $string_form = '';
1802
_stringify_lol( $_[0] => \$string_form );
1803
return $string_form;
1806
sub _stringify_lol { # the real recursor
1809
for(my $i = 2; $i < @$lol; ++$i) {
1810
if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1811
_stringify_lol( $lol->[$i], $to); # recurse!
1819
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1821
sub _dump_curr_open { # return a string representation of the stack
1822
my $curr_open = $_[0]{'curr_open'};
1824
return '[empty]' unless @$curr_open;
1828
? ( ($_->[1]{'~really'} || '=over')
1829
. ' ' . $_->[1]{'target'})
1836
###########################################################################
1838
"\a" => '\a', # ding!
1845
"\n" => '\n', # probably overrides one of either \cm or \cj
1854
sub pretty { # adopted from Class::Classless
1855
# Not the most brilliant routine, but passable.
1856
# Don't give it a cyclic data structure!
1857
my @stuff = @_; # copy
1865
} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1866
$x = "[ " . pretty(@$_) . " ]" ;
1868
} elsif(ref($_) eq 'SCALAR') {
1869
$x = "\\" . pretty($$_) ;
1871
} elsif(ref($_) eq 'HASH') {
1873
$x = "{" . join(", ",
1874
map(pretty($_) . '=>' . pretty($hr->{$_}),
1875
sort keys %$hr ) ) . "}" ;
1877
} elsif(!length($_)) { q{''} # empty string
1879
$_ eq '0' # very common case
1881
m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1882
and $_ ne '-0' # the strange case that that RE lets thru
1886
if( chr(65) eq 'A' ) {
1887
s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1888
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1889
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1891
# We're in some crazy non-ASCII world!
1892
s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1893
#<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1894
<$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1899
# $out =~ s/\n */ /g if length($out) < 75;
1903
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@