1
############################################################
3
# perltidy - a perl script indenter and formatter
5
# Copyright (c) 2000-2007 by Steve Hancock
6
# Distributed under the GPL license agreement; see file COPYING
8
# This program is free software; you can redistribute it and/or modify
9
# it under the terms of the GNU General Public License as published by
10
# the Free Software Foundation; either version 2 of the License, or
11
# (at your option) any later version.
13
# This program is distributed in the hope that it will be useful,
14
# but WITHOUT ANY WARRANTY; without even the implied warranty of
15
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
# GNU General Public License for more details.
18
# You should have received a copy of the GNU General Public License
19
# along with this program; if not, write to the Free Software
20
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
# For brief instructions instructions, try 'perltidy -h'.
23
# For more complete documentation, try 'man perltidy'
24
# or visit http://perltidy.sourceforge.net
26
# This script is an example of the default style. It was formatted with:
31
# Michael Cartmell supplied code for adaptation to VMS and helped with
33
# Hugh S. Myers supplied sub streamhandle and the supporting code to
34
# create a Perl::Tidy module which can operate on strings, arrays, etc.
35
# Yves Orton supplied coding to help detect Windows versions.
36
# Axel Rose supplied a patch for MacPerl.
37
# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.
38
# Dan Tyrell contributed a patch for binary I/O.
39
# Ueli Hugenschmidt contributed a patch for -fpsc
40
# Many others have supplied key ideas, suggestions, and bug reports;
41
# see the CHANGES file.
43
############################################################
46
use 5.004; # need IO::File from 5.004 or later
47
BEGIN { $^W = 1; } # turn on warnings
61
@ISA = qw( Exporter );
62
@EXPORT = qw( &perltidy );
68
( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker
73
# given filename and mode (r or w), create an object which:
74
# has a 'getline' method if mode='r', and
75
# has a 'print' method if mode='w'.
76
# The objects also need a 'close' method.
78
# How the object is made:
80
# if $filename is: Make object using:
81
# ---------------- -----------------
82
# '-' (STDIN if mode = 'r', STDOUT if mode='w')
84
# ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)
85
# STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)
87
# (check for 'print' method for 'w' mode)
88
# (check for 'getline' method for 'r' mode)
89
my $ref = ref( my $filename = shift );
96
if ( $ref eq 'ARRAY' ) {
97
$New = sub { Perl::Tidy::IOScalarArray->new(@_) };
99
elsif ( $ref eq 'SCALAR' ) {
100
$New = sub { Perl::Tidy::IOScalar->new(@_) };
104
# Accept an object with a getline method for reading. Note:
105
# IO::File is built-in and does not respond to the defined
106
# operator. If this causes trouble, the check can be
107
# skipped and we can just let it crash if there is no
109
if ( $mode =~ /[rR]/ ) {
110
if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {
111
$New = sub { $filename };
114
$New = sub { undef };
116
------------------------------------------------------------------------
117
No 'getline' method is defined for object of class $ref
118
Please check your call to Perl::Tidy::perltidy. Trace follows.
119
------------------------------------------------------------------------
124
# Accept an object with a print method for writing.
125
# See note above about IO::File
126
if ( $mode =~ /[wW]/ ) {
127
if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {
128
$New = sub { $filename };
131
$New = sub { undef };
133
------------------------------------------------------------------------
134
No 'print' method is defined for object of class $ref
135
Please check your call to Perl::Tidy::perltidy. Trace follows.
136
------------------------------------------------------------------------
145
if ( $filename eq '-' ) {
146
$New = sub { $mode eq 'w' ? *STDOUT : *STDIN }
149
$New = sub { IO::File->new(@_) };
152
$fh = $New->( $filename, $mode )
153
or warn "Couldn't open file:$filename in mode:$mode : $!\n";
154
return $fh, ( $ref or $filename );
157
sub find_input_line_ending {
159
# Peek at a file and return first line ending character.
160
# Quietly return undef in case of any trouble.
161
my ($input_file) = @_;
164
# silently ignore input from object or stdin
165
if ( ref($input_file) || $input_file eq '-' ) {
168
open( INFILE, $input_file ) || return $ending;
172
read( INFILE, $buf, 1024 );
174
if ( $buf && $buf =~ /([\012\015]+)/ ) {
178
if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }
181
elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }
184
elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }
198
# concatenate a path and file basename
199
# returns undef in case of error
201
BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }
203
# use File::Spec if we can
204
unless ($missing_file_spec) {
205
return File::Spec->catfile(@_);
208
# Perl 5.004 systems may not have File::Spec so we'll make
209
# a simple try. We assume File::Basename is available.
210
# return undef if not successful.
212
my $path = join '/', @_;
213
my $test_file = $path . $name;
214
my ( $test_name, $test_path ) = fileparse($test_file);
215
return $test_file if ( $test_name eq $name );
216
return undef if ( $^O eq 'VMS' );
218
# this should work at least for Windows and Unix:
219
$test_file = $path . '/' . $name;
220
( $test_name, $test_path ) = fileparse($test_file);
221
return $test_file if ( $test_name eq $name );
225
sub make_temporary_filename {
227
# Make a temporary filename.
229
# The POSIX tmpnam() function tends to be unreliable for non-unix
230
# systems (at least for the win32 systems that I've tested), so use
231
# a pre-defined name. A slight disadvantage of this is that two
232
# perltidy runs in the same working directory may conflict.
233
# However, the chance of that is small and managable by the user.
234
# An alternative would be to check for the file's existance and use,
235
# say .TMP0, .TMP1, etc, but that scheme has its own problems. So,
237
my $name = "perltidy.TMP";
238
if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {
241
eval "use POSIX qw(tmpnam)";
242
if ($@) { return $name }
245
# just make a couple of tries before giving up and using the default
247
my $tmpname = tmpnam();
248
my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );
258
# Here is a map of the flow of data from the input source to the output
261
# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->
262
# input groups output
263
# lines tokens lines of lines lines
266
# The names correspond to the package names responsible for the unit processes.
268
# The overall process is controlled by the "main" package.
270
# LineSource is the stream of input lines
272
# Tokenizer analyzes a line and breaks it into tokens, peeking ahead
273
# if necessary. A token is any section of the input line which should be
274
# manipulated as a single entity during formatting. For example, a single
275
# ',' character is a token, and so is an entire side comment. It handles
276
# the complexities of Perl syntax, such as distinguishing between '<<' as
277
# a shift operator and as a here-document, or distinguishing between '/'
278
# as a divide symbol and as a pattern delimiter.
280
# Formatter inserts and deletes whitespace between tokens, and breaks
281
# sequences of tokens at appropriate points as output lines. It bases its
282
# decisions on the default rules as modified by any command-line options.
284
# VerticalAligner collects groups of lines together and tries to line up
285
# certain tokens, such as '=>', '#', and '=' by adding whitespace.
287
# FileWriter simply writes lines to the output stream.
289
# The Logger package, not shown, records significant events and warning
290
# messages. It writes a .LOG file, which may be saved with a
291
# '-log' or a '-g' flag.
295
# variables needed by interrupt handler:
299
# this routine may be called to give a status report if interrupted. If a
300
# parameter is given, it will call exit with that parameter. This is no
301
# longer used because it works under Unix but not under Windows.
302
sub interrupt_handler {
304
my $exit_flag = shift;
305
print STDERR "perltidy interrupted";
307
my $input_line_number =
308
Perl::Tidy::Tokenizer::get_input_line_number();
309
print STDERR " at line $input_line_number";
313
if ( ref $input_file ) { print STDERR " of reference to:" }
314
else { print STDERR " of file:" }
315
print STDERR " $input_file";
318
exit $exit_flag if defined($exit_flag);
325
destination => undef,
332
dump_options => undef,
333
dump_options_type => undef,
334
dump_getopt_flags => undef,
335
dump_options_category => undef,
336
dump_options_range => undef,
337
dump_abbreviations => undef,
340
# don't overwrite callers ARGV
345
if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {
347
my @good_keys = sort keys %defaults;
348
@bad_keys = sort @bad_keys;
350
------------------------------------------------------------------------
351
Unknown perltidy parameter : (@bad_keys)
352
perltidy only understands : (@good_keys)
353
------------------------------------------------------------------------
358
my $get_hash_ref = sub {
360
my $hash_ref = $input_hash{$key};
361
if ( defined($hash_ref) ) {
362
unless ( ref($hash_ref) eq 'HASH' ) {
363
my $what = ref($hash_ref);
365
$what ? "but is ref to $what" : "but is not a reference";
367
------------------------------------------------------------------------
368
error in call to perltidy:
369
-$key must be reference to HASH $but_is
370
------------------------------------------------------------------------
377
%input_hash = ( %defaults, %input_hash );
378
my $argv = $input_hash{'argv'};
379
my $destination_stream = $input_hash{'destination'};
380
my $errorfile_stream = $input_hash{'errorfile'};
381
my $logfile_stream = $input_hash{'logfile'};
382
my $perltidyrc_stream = $input_hash{'perltidyrc'};
383
my $source_stream = $input_hash{'source'};
384
my $stderr_stream = $input_hash{'stderr'};
385
my $user_formatter = $input_hash{'formatter'};
387
# various dump parameters
388
my $dump_options_type = $input_hash{'dump_options_type'};
389
my $dump_options = $get_hash_ref->('dump_options');
390
my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');
391
my $dump_options_category = $get_hash_ref->('dump_options_category');
392
my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');
393
my $dump_options_range = $get_hash_ref->('dump_options_range');
395
# validate dump_options_type
396
if ( defined($dump_options) ) {
397
unless ( defined($dump_options_type) ) {
398
$dump_options_type = 'perltidyrc';
400
unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {
402
------------------------------------------------------------------------
403
Please check value of -dump_options_type in call to perltidy;
404
saw: '$dump_options_type'
405
expecting: 'perltidyrc' or 'full'
406
------------------------------------------------------------------------
412
$dump_options_type = "";
415
if ($user_formatter) {
417
# if the user defines a formatter, there is no output stream,
418
# but we need a null stream to keep coding simple
419
$destination_stream = Perl::Tidy::DevNull->new();
422
# see if ARGV is overridden
423
if ( defined($argv) ) {
425
my $rargv = ref $argv;
426
if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }
430
if ( $rargv eq 'ARRAY' ) {
435
------------------------------------------------------------------------
436
Please check value of -argv in call to perltidy;
437
it must be a string or ref to ARRAY but is: $rargv
438
------------------------------------------------------------------------
445
my ( $rargv, $msg ) = parse_args($argv);
448
Error parsing this string passed to to perltidy with 'argv':
456
# redirect STDERR if requested
457
if ($stderr_stream) {
458
my ( $fh_stderr, $stderr_file ) =
459
Perl::Tidy::streamhandle( $stderr_stream, 'w' );
460
if ($fh_stderr) { *STDERR = $fh_stderr }
463
------------------------------------------------------------------------
464
Unable to redirect STDERR to $stderr_stream
465
Please check value of -stderr in call to perltidy
466
------------------------------------------------------------------------
471
my $rpending_complaint;
472
$$rpending_complaint = "";
473
my $rpending_logfile_message;
474
$$rpending_logfile_message = "";
476
my ( $is_Windows, $Windows_type ) =
477
look_for_Windows($rpending_complaint);
479
# VMS file names are restricted to a 40.40 format, so we append _tdy
480
# instead of .tdy, etc. (but see also sub check_vms_filename)
483
if ( $^O eq 'VMS' ) {
489
$dot_pattern = '\.'; # must escape for use in regex
492
# handle command line options
493
my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,
494
$rexpansion, $roption_category, $roption_range )
495
= process_command_line(
496
$perltidyrc_stream, $is_Windows, $Windows_type,
497
$rpending_complaint, $dump_options_type,
500
# return or exit immediately after all dumps
503
# Getopt parameters and their flags
504
if ( defined($dump_getopt_flags) ) {
506
foreach my $op ( @{$roption_string} ) {
515
if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {
519
$dump_getopt_flags->{$opt} = $flag;
523
if ( defined($dump_options_category) ) {
525
%{$dump_options_category} = %{$roption_category};
528
if ( defined($dump_options_range) ) {
530
%{$dump_options_range} = %{$roption_range};
533
if ( defined($dump_abbreviations) ) {
535
%{$dump_abbreviations} = %{$rexpansion};
538
if ( defined($dump_options) ) {
540
%{$dump_options} = %{$rOpts};
543
return if ($quit_now);
545
# make printable string of options for this run as possible diagnostic
546
my $readable_options = readable_options( $rOpts, $roption_string );
548
# dump from command line
549
if ( $rOpts->{'dump-options'} ) {
550
print STDOUT $readable_options;
554
check_options( $rOpts, $is_Windows, $Windows_type,
555
$rpending_complaint );
557
if ($user_formatter) {
558
$rOpts->{'format'} = 'user';
561
# there must be one entry here for every possible format
562
my %default_file_extension = (
568
# be sure we have a valid output format
569
unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {
570
my $formats = join ' ',
571
sort map { "'" . $_ . "'" } keys %default_file_extension;
572
my $fmt = $rOpts->{'format'};
573
die "-format='$fmt' but must be one of: $formats\n";
576
my $output_extension =
577
make_extension( $rOpts->{'output-file-extension'},
578
$default_file_extension{ $rOpts->{'format'} }, $dot );
580
my $backup_extension =
581
make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );
583
my $html_toc_extension =
584
make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );
586
my $html_src_extension =
587
make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );
589
# check for -b option;
590
my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}
591
&& $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode
592
&& @ARGV > 0; # silently ignore if standard input;
593
# this allows -b to be in a .perltidyrc file
594
# without error messages when running from an editor
596
# turn off -b with warnings in case of conflicts with other options
597
if ($in_place_modify) {
598
if ( $rOpts->{'standard-output'} ) {
599
warn "Ignoring -b; you may not use -b and -st together\n";
600
$in_place_modify = 0;
602
if ($destination_stream) {
604
"Ignoring -b; you may not specify a destination array and -b together\n";
605
$in_place_modify = 0;
607
if ($source_stream) {
609
"Ignoring -b; you may not specify a source array and -b together\n";
610
$in_place_modify = 0;
612
if ( $rOpts->{'outfile'} ) {
613
warn "Ignoring -b; you may not use -b and -o together\n";
614
$in_place_modify = 0;
616
if ( defined( $rOpts->{'output-path'} ) ) {
617
warn "Ignoring -b; you may not use -b and -opath together\n";
618
$in_place_modify = 0;
622
Perl::Tidy::Formatter::check_options($rOpts);
623
if ( $rOpts->{'format'} eq 'html' ) {
624
Perl::Tidy::HtmlWriter->check_options($rOpts);
627
# make the pattern of file extensions that we shouldn't touch
628
my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";
629
if ($output_extension) {
630
my $ext = quotemeta($output_extension);
631
$forbidden_file_extensions .= "|$ext";
633
if ( $in_place_modify && $backup_extension ) {
634
my $ext = quotemeta($backup_extension);
635
$forbidden_file_extensions .= "|$ext";
637
$forbidden_file_extensions .= ')$';
639
# Create a diagnostics object if requested;
640
# This is only useful for code development
641
my $diagnostics_object = undef;
642
if ( $rOpts->{'DIAGNOSTICS'} ) {
643
$diagnostics_object = Perl::Tidy::Diagnostics->new();
646
# no filenames should be given if input is from an array
647
if ($source_stream) {
650
"You may not specify any filenames when a source array is given\n";
653
# we'll stuff the source array into ARGV
654
unshift( @ARGV, $source_stream );
656
# No special treatment for source stream which is a filename.
657
# This will enable checks for binary files and other bad stuff.
658
$source_stream = undef unless ref($source_stream);
661
# use stdin by default if no source array and no args
663
unshift( @ARGV, '-' ) unless @ARGV;
666
# loop to process all files in argument list
667
my $number_of_files = @ARGV;
668
my $formatter = undef;
670
while ( $input_file = shift @ARGV ) {
672
my $input_file_permissions;
674
#---------------------------------------------------------------
675
# determine the input file name
676
#---------------------------------------------------------------
677
if ($source_stream) {
678
$fileroot = "perltidy";
680
elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN
681
$fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc
682
$in_place_modify = 0;
685
$fileroot = $input_file;
686
unless ( -e $input_file ) {
688
# file doesn't exist - check for a file glob
689
if ( $input_file =~ /([\?\*\[\{])/ ) {
691
# Windows shell may not remove quotes, so do it
692
my $input_file = $input_file;
693
if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }
694
if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }
695
my $pattern = fileglob_to_re($input_file);
697
if ( !$@ && opendir( DIR, './' ) ) {
699
grep { /$pattern/ && !-d $_ } readdir(DIR);
702
unshift @ARGV, @files;
707
print "skipping file: '$input_file': no matches found\n";
711
unless ( -f $input_file ) {
712
print "skipping file: $input_file: not a regular file\n";
716
unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {
718
"skipping file: $input_file: Non-text (override with -f)\n";
722
# we should have a valid filename now
723
$fileroot = $input_file;
724
$input_file_permissions = ( stat $input_file )[2] & 07777;
726
if ( $^O eq 'VMS' ) {
727
( $fileroot, $dot ) = check_vms_filename($fileroot);
730
# add option to change path here
731
if ( defined( $rOpts->{'output-path'} ) ) {
733
my ( $base, $old_path ) = fileparse($fileroot);
734
my $new_path = $rOpts->{'output-path'};
735
unless ( -d $new_path ) {
736
unless ( mkdir $new_path, 0777 ) {
737
die "unable to create directory $new_path: $!\n";
740
my $path = $new_path;
741
$fileroot = catfile( $path, $base );
744
------------------------------------------------------------------------
745
Problem combining $new_path and $base to make a filename; check -opath
746
------------------------------------------------------------------------
752
# Skip files with same extension as the output files because
753
# this can lead to a messy situation with files like
754
# script.tdy.tdy.tdy ... or worse problems ... when you
755
# rerun perltidy over and over with wildcard input.
758
&& ( $input_file =~ /$forbidden_file_extensions/o
759
|| $input_file eq 'DIAGNOSTICS' )
762
print "skipping file: $input_file: wrong extension\n";
766
# the 'source_object' supplies a method to read the input file
768
Perl::Tidy::LineSource->new( $input_file, $rOpts,
769
$rpending_logfile_message );
770
next unless ($source_object);
772
# register this file name with the Diagnostics package
773
$diagnostics_object->set_input_file($input_file)
774
if $diagnostics_object;
776
#---------------------------------------------------------------
777
# determine the output file name
778
#---------------------------------------------------------------
779
my $output_file = undef;
780
my $actual_output_extension;
782
if ( $rOpts->{'outfile'} ) {
784
if ( $number_of_files <= 1 ) {
786
if ( $rOpts->{'standard-output'} ) {
787
die "You may not use -o and -st together\n";
789
elsif ($destination_stream) {
791
"You may not specify a destination array and -o together\n";
793
elsif ( defined( $rOpts->{'output-path'} ) ) {
794
die "You may not specify -o and -opath together\n";
796
elsif ( defined( $rOpts->{'output-file-extension'} ) ) {
797
die "You may not specify -o and -oext together\n";
799
$output_file = $rOpts->{outfile};
801
# make sure user gives a file name after -o
802
if ( $output_file =~ /^-/ ) {
803
die "You must specify a valid filename after -o\n";
806
# do not overwrite input file with -o
807
if ( defined($input_file_permissions)
808
&& ( $output_file eq $input_file ) )
811
"Use 'perltidy -b $input_file' to modify in-place\n";
815
die "You may not use -o with more than one input file\n";
818
elsif ( $rOpts->{'standard-output'} ) {
819
if ($destination_stream) {
821
"You may not specify a destination array and -st together\n";
825
if ( $number_of_files <= 1 ) {
828
die "You may not use -st with more than one input file\n";
831
elsif ($destination_stream) {
832
$output_file = $destination_stream;
834
elsif ($source_stream) { # source but no destination goes to stdout
837
elsif ( $input_file eq '-' ) {
841
if ($in_place_modify) {
842
$output_file = IO::File->new_tmpfile()
843
or die "cannot open temp file for -b option: $!\n";
846
$actual_output_extension = $output_extension;
847
$output_file = $fileroot . $output_extension;
851
# the 'sink_object' knows how to write the output file
852
my $tee_file = $fileroot . $dot . "TEE";
854
my $line_separator = $rOpts->{'output-line-ending'};
855
if ( $rOpts->{'preserve-line-endings'} ) {
856
$line_separator = find_input_line_ending($input_file);
859
# Eventually all I/O may be done with binmode, but for now it is
860
# only done when a user requests a particular line separator
861
# through the -ple or -ole flags
863
if ( defined($line_separator) ) { $binmode = 1 }
864
else { $line_separator = "\n" }
867
Perl::Tidy::LineSink->new( $output_file, $tee_file,
868
$line_separator, $rOpts, $rpending_logfile_message, $binmode );
870
#---------------------------------------------------------------
871
# initialize the error logger
872
#---------------------------------------------------------------
873
my $warning_file = $fileroot . $dot . "ERR";
874
if ($errorfile_stream) { $warning_file = $errorfile_stream }
875
my $log_file = $fileroot . $dot . "LOG";
876
if ($logfile_stream) { $log_file = $logfile_stream }
879
Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,
881
write_logfile_header(
882
$rOpts, $logger_object, $config_file,
883
$rraw_options, $Windows_type, $readable_options,
885
if ($$rpending_logfile_message) {
886
$logger_object->write_logfile_entry($$rpending_logfile_message);
888
if ($$rpending_complaint) {
889
$logger_object->complain($$rpending_complaint);
892
#---------------------------------------------------------------
893
# initialize the debug object, if any
894
#---------------------------------------------------------------
895
my $debugger_object = undef;
896
if ( $rOpts->{DEBUG} ) {
898
Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );
901
#---------------------------------------------------------------
902
# create a formatter for this file : html writer or pretty printer
903
#---------------------------------------------------------------
905
# we have to delete any old formatter because, for safety,
906
# the formatter will check to see that there is only one.
909
if ($user_formatter) {
910
$formatter = $user_formatter;
912
elsif ( $rOpts->{'format'} eq 'html' ) {
914
Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,
915
$actual_output_extension, $html_toc_extension,
916
$html_src_extension );
918
elsif ( $rOpts->{'format'} eq 'tidy' ) {
919
$formatter = Perl::Tidy::Formatter->new(
920
logger_object => $logger_object,
921
diagnostics_object => $diagnostics_object,
922
sink_object => $sink_object,
926
die "I don't know how to do -format=$rOpts->{'format'}\n";
929
unless ($formatter) {
930
die "Unable to continue with $rOpts->{'format'} formatting\n";
933
#---------------------------------------------------------------
934
# create the tokenizer for this file
935
#---------------------------------------------------------------
936
$tokenizer = undef; # must destroy old tokenizer
937
$tokenizer = Perl::Tidy::Tokenizer->new(
938
source_object => $source_object,
939
logger_object => $logger_object,
940
debugger_object => $debugger_object,
941
diagnostics_object => $diagnostics_object,
942
starting_level => $rOpts->{'starting-indentation-level'},
943
tabs => $rOpts->{'tabs'},
944
indent_columns => $rOpts->{'indent-columns'},
945
look_for_hash_bang => $rOpts->{'look-for-hash-bang'},
946
look_for_autoloader => $rOpts->{'look-for-autoloader'},
947
look_for_selfloader => $rOpts->{'look-for-selfloader'},
948
trim_qw => $rOpts->{'trim-qw'},
951
#---------------------------------------------------------------
953
#---------------------------------------------------------------
954
process_this_file( $tokenizer, $formatter );
956
#---------------------------------------------------------------
957
# close the input source and report errors
958
#---------------------------------------------------------------
959
$source_object->close_input_file();
961
# get file names to use for syntax check
962
my $ifname = $source_object->get_input_file_copy_name();
963
my $ofname = $sink_object->get_output_file_copy();
965
#---------------------------------------------------------------
966
# handle the -b option (backup and modify in-place)
967
#---------------------------------------------------------------
968
if ($in_place_modify) {
969
unless ( -f $input_file ) {
971
# oh, oh, no real file to backup ..
972
# shouldn't happen because of numerous preliminary checks
974
"problem with -b backing up input file '$input_file': not a file\n";
976
my $backup_name = $input_file . $backup_extension;
977
if ( -f $backup_name ) {
980
"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";
982
rename( $input_file, $backup_name )
984
"problem renaming $input_file to $backup_name for -b option: $!\n";
985
$ifname = $backup_name;
987
seek( $output_file, 0, 0 )
988
or die "unable to rewind tmp file for -b option: $!\n";
990
my $fout = IO::File->new("> $input_file")
992
"problem opening $input_file for write for -b option; check directory permissions: $!\n";
995
while ( $line = $output_file->getline() ) {
999
$output_file = $input_file;
1000
$ofname = $input_file;
1003
#---------------------------------------------------------------
1004
# clean up and report errors
1005
#---------------------------------------------------------------
1006
$sink_object->close_output_file() if $sink_object;
1007
$debugger_object->close_debug_file() if $debugger_object;
1009
my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes
1012
if ($input_file_permissions) {
1014
# give output script same permissions as input script, but
1015
# make it user-writable or else we can't run perltidy again.
1016
# Thus we retain whatever executable flags were set.
1017
if ( $rOpts->{'format'} eq 'tidy' ) {
1018
chmod( $input_file_permissions | 0600, $output_file );
1021
# else use default permissions for html and any other format
1024
if ( $logger_object && $rOpts->{'check-syntax'} ) {
1026
check_syntax( $ifname, $ofname, $logger_object, $rOpts );
1030
$logger_object->finish( $infile_syntax_ok, $formatter )
1032
} # end of loop to process all files
1033
} # end of main program
1036
sub fileglob_to_re {
1038
# modified (corrected) from version in find2perl
1040
$x =~ s#([./^\$()])#\\$1#g; # escape special characters
1041
$x =~ s#\*#.*#g; # '*' -> '.*'
1042
$x =~ s#\?#.#g; # '?' -> '.'
1043
"^$x\\z"; # match whole word
1046
sub make_extension {
1048
# Make a file extension, including any leading '.' if necessary
1049
# The '.' may actually be an '_' under VMS
1050
my ( $extension, $default, $dot ) = @_;
1052
# Use the default if none specified
1053
$extension = $default unless ($extension);
1055
# Only extensions with these leading characters get a '.'
1056
# This rule gives the user some freedom
1057
if ( $extension =~ /^[a-zA-Z0-9]/ ) {
1058
$extension = $dot . $extension;
1063
sub write_logfile_header {
1065
$rOpts, $logger_object, $config_file,
1066
$rraw_options, $Windows_type, $readable_options
1068
$logger_object->write_logfile_entry(
1069
"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"
1071
if ($Windows_type) {
1072
$logger_object->write_logfile_entry("Windows type is $Windows_type\n");
1074
my $options_string = join( ' ', @$rraw_options );
1077
$logger_object->write_logfile_entry(
1078
"Found Configuration File >>> $config_file \n");
1080
$logger_object->write_logfile_entry(
1081
"Configuration and command line parameters for this run:\n");
1082
$logger_object->write_logfile_entry("$options_string\n");
1084
if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {
1085
$rOpts->{'logfile'} = 1; # force logfile to be saved
1086
$logger_object->write_logfile_entry(
1087
"Final parameter set for this run\n");
1088
$logger_object->write_logfile_entry(
1089
"------------------------------------\n");
1091
$logger_object->write_logfile_entry($readable_options);
1093
$logger_object->write_logfile_entry(
1094
"------------------------------------\n");
1096
$logger_object->write_logfile_entry(
1097
"To find error messages search for 'WARNING' with your editor\n");
1100
sub generate_options {
1102
######################################################################
1103
# Generate and return references to:
1104
# @option_string - the list of options to be passed to Getopt::Long
1105
# @defaults - the list of default options
1106
# %expansion - a hash showing how all abbreviations are expanded
1107
# %category - a hash giving the general category of each option
1108
# %option_range - a hash giving the valid ranges of certain options
1110
# Note: a few options are not documented in the man page and usage
1111
# message. This is because these are experimental or debug options and
1112
# may or may not be retained in future versions.
1114
# Here are the undocumented flags as far as I know. Any of them
1115
# may disappear at any time. They are mainly for fine-tuning
1118
# fll --> fuzzy-line-length # a trivial parameter which gets
1119
# turned off for the extrude option
1120
# which is mainly for debugging
1121
# chk --> check-multiline-quotes # check for old bug; to be deleted
1122
# scl --> short-concatenation-item-length # helps break at '.'
1123
# recombine # for debugging line breaks
1124
# valign # for debugging vertical alignment
1125
# I --> DIAGNOSTICS # for debugging
1126
######################################################################
1128
# here is a summary of the Getopt codes:
1129
# <none> does not take an argument
1130
# =s takes a mandatory string
1131
# :s takes an optional string (DO NOT USE - filenames will get eaten up)
1132
# =i takes a mandatory integer
1133
# :i takes an optional integer (NOT RECOMMENDED - can cause trouble)
1134
# ! does not take an argument and may be negated
1135
# i.e., -foo and -nofoo are allowed
1136
# a double dash signals the end of the options list
1138
#---------------------------------------------------------------
1139
# Define the option string passed to GetOptions.
1140
#---------------------------------------------------------------
1142
my @option_string = ();
1144
my %option_category = ();
1145
my %option_range = ();
1146
my $rexpansion = \%expansion;
1148
# names of categories in manual
1149
# leading integers will allow sorting
1150
my @category_name = (
1152
'1. Basic formatting options',
1153
'2. Code indentation control',
1154
'3. Whitespace control',
1155
'4. Comment controls',
1156
'5. Linebreak controls',
1157
'6. Controlling list formatting',
1158
'7. Retaining or ignoring existing line breaks',
1159
'8. Blank line control',
1160
'9. Other controls',
1162
'11. pod2html options',
1163
'12. Controlling HTML properties',
1167
# These options are parsed directly by perltidy:
1170
# However, they are included in the option set so that they will
1171
# be seen in the options dump.
1173
# These long option names have no abbreviations or are treated specially
1174
@option_string = qw(
1183
my $category = 13; # Debugging
1184
foreach (@option_string) {
1185
my $opt = $_; # must avoid changing the actual flag
1187
$option_category{$opt} = $category_name[$category];
1190
$category = 11; # HTML
1191
$option_category{html} = $category_name[$category];
1193
# routine to install and check options
1194
my $add_option = sub {
1195
my ( $long_name, $short_name, $flag ) = @_;
1196
push @option_string, $long_name . $flag;
1197
$option_category{$long_name} = $category_name[$category];
1199
if ( $expansion{$short_name} ) {
1200
my $existing_name = $expansion{$short_name}[0];
1202
"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";
1204
$expansion{$short_name} = [$long_name];
1205
if ( $flag eq '!' ) {
1206
my $nshort_name = 'n' . $short_name;
1207
my $nolong_name = 'no' . $long_name;
1208
if ( $expansion{$nshort_name} ) {
1209
my $existing_name = $expansion{$nshort_name}[0];
1211
"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";
1213
$expansion{$nshort_name} = [$nolong_name];
1218
# Install long option names which have a simple abbreviation.
1219
# Options with code '!' get standard negation ('no' for long names,
1220
# 'n' for abbreviations). Categories follow the manual.
1222
###########################
1223
$category = 0; # I/O_Control
1224
###########################
1225
$add_option->( 'backup-and-modify-in-place', 'b', '!' );
1226
$add_option->( 'backup-file-extension', 'bext', '=s' );
1227
$add_option->( 'force-read-binary', 'f', '!' );
1228
$add_option->( 'format', 'fmt', '=s' );
1229
$add_option->( 'logfile', 'log', '!' );
1230
$add_option->( 'logfile-gap', 'g', ':i' );
1231
$add_option->( 'outfile', 'o', '=s' );
1232
$add_option->( 'output-file-extension', 'oext', '=s' );
1233
$add_option->( 'output-path', 'opath', '=s' );
1234
$add_option->( 'profile', 'pro', '=s' );
1235
$add_option->( 'quiet', 'q', '!' );
1236
$add_option->( 'standard-error-output', 'se', '!' );
1237
$add_option->( 'standard-output', 'st', '!' );
1238
$add_option->( 'warning-output', 'w', '!' );
1240
# options which are both toggle switches and values moved here
1241
# to hide from tidyview (which does not show category 0 flags):
1242
# -ole moved here from category 1
1243
# -sil moved here from category 2
1244
$add_option->( 'output-line-ending', 'ole', '=s' );
1245
$add_option->( 'starting-indentation-level', 'sil', '=i' );
1247
########################################
1248
$category = 1; # Basic formatting options
1249
########################################
1250
$add_option->( 'check-syntax', 'syn', '!' );
1251
$add_option->( 'entab-leading-whitespace', 'et', '=i' );
1252
$add_option->( 'indent-columns', 'i', '=i' );
1253
$add_option->( 'maximum-line-length', 'l', '=i' );
1254
$add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );
1255
$add_option->( 'preserve-line-endings', 'ple', '!' );
1256
$add_option->( 'tabs', 't', '!' );
1258
########################################
1259
$category = 2; # Code indentation control
1260
########################################
1261
$add_option->( 'continuation-indentation', 'ci', '=i' );
1262
$add_option->( 'line-up-parentheses', 'lp', '!' );
1263
$add_option->( 'outdent-keyword-list', 'okwl', '=s' );
1264
$add_option->( 'outdent-keywords', 'okw', '!' );
1265
$add_option->( 'outdent-labels', 'ola', '!' );
1266
$add_option->( 'outdent-long-quotes', 'olq', '!' );
1267
$add_option->( 'indent-closing-brace', 'icb', '!' );
1268
$add_option->( 'closing-token-indentation', 'cti', '=i' );
1269
$add_option->( 'closing-paren-indentation', 'cpi', '=i' );
1270
$add_option->( 'closing-brace-indentation', 'cbi', '=i' );
1271
$add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );
1272
$add_option->( 'brace-left-and-indent', 'bli', '!' );
1273
$add_option->( 'brace-left-and-indent-list', 'blil', '=s' );
1275
########################################
1276
$category = 3; # Whitespace control
1277
########################################
1278
$add_option->( 'add-semicolons', 'asc', '!' );
1279
$add_option->( 'add-whitespace', 'aws', '!' );
1280
$add_option->( 'block-brace-tightness', 'bbt', '=i' );
1281
$add_option->( 'brace-tightness', 'bt', '=i' );
1282
$add_option->( 'delete-old-whitespace', 'dws', '!' );
1283
$add_option->( 'delete-semicolons', 'dsm', '!' );
1284
$add_option->( 'nospace-after-keyword', 'nsak', '=s' );
1285
$add_option->( 'nowant-left-space', 'nwls', '=s' );
1286
$add_option->( 'nowant-right-space', 'nwrs', '=s' );
1287
$add_option->( 'paren-tightness', 'pt', '=i' );
1288
$add_option->( 'space-after-keyword', 'sak', '=s' );
1289
$add_option->( 'space-for-semicolon', 'sfs', '!' );
1290
$add_option->( 'space-function-paren', 'sfp', '!' );
1291
$add_option->( 'space-keyword-paren', 'skp', '!' );
1292
$add_option->( 'space-terminal-semicolon', 'sts', '!' );
1293
$add_option->( 'square-bracket-tightness', 'sbt', '=i' );
1294
$add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );
1295
$add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );
1296
$add_option->( 'trim-qw', 'tqw', '!' );
1297
$add_option->( 'want-left-space', 'wls', '=s' );
1298
$add_option->( 'want-right-space', 'wrs', '=s' );
1300
########################################
1301
$category = 4; # Comment controls
1302
########################################
1303
$add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );
1304
$add_option->( 'closing-side-comment-interval', 'csci', '=i' );
1305
$add_option->( 'closing-side-comment-list', 'cscl', '=s' );
1306
$add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );
1307
$add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );
1308
$add_option->( 'closing-side-comment-warnings', 'cscw', '!' );
1309
$add_option->( 'closing-side-comments', 'csc', '!' );
1310
$add_option->( 'format-skipping', 'fs', '!' );
1311
$add_option->( 'format-skipping-begin', 'fsb', '=s' );
1312
$add_option->( 'format-skipping-end', 'fse', '=s' );
1313
$add_option->( 'hanging-side-comments', 'hsc', '!' );
1314
$add_option->( 'indent-block-comments', 'ibc', '!' );
1315
$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );
1316
$add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );
1317
$add_option->( 'minimum-space-to-comment', 'msc', '=i' );
1318
$add_option->( 'outdent-long-comments', 'olc', '!' );
1319
$add_option->( 'outdent-static-block-comments', 'osbc', '!' );
1320
$add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );
1321
$add_option->( 'static-block-comments', 'sbc', '!' );
1322
$add_option->( 'static-side-comment-prefix', 'sscp', '=s' );
1323
$add_option->( 'static-side-comments', 'ssc', '!' );
1325
########################################
1326
$category = 5; # Linebreak controls
1327
########################################
1328
$add_option->( 'add-newlines', 'anl', '!' );
1329
$add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );
1330
$add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );
1331
$add_option->( 'brace-vertical-tightness', 'bvt', '=i' );
1332
$add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );
1333
$add_option->( 'cuddled-else', 'ce', '!' );
1334
$add_option->( 'delete-old-newlines', 'dnl', '!' );
1335
$add_option->( 'opening-brace-always-on-right', 'bar', '!' );
1336
$add_option->( 'opening-brace-on-new-line', 'bl', '!' );
1337
$add_option->( 'opening-hash-brace-right', 'ohbr', '!' );
1338
$add_option->( 'opening-paren-right', 'opr', '!' );
1339
$add_option->( 'opening-square-bracket-right', 'osbr', '!' );
1340
$add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );
1341
$add_option->( 'paren-vertical-tightness', 'pvt', '=i' );
1342
$add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );
1343
$add_option->( 'stack-closing-hash-brace', 'schb', '!' );
1344
$add_option->( 'stack-closing-paren', 'scp', '!' );
1345
$add_option->( 'stack-closing-square-bracket', 'scsb', '!' );
1346
$add_option->( 'stack-opening-hash-brace', 'sohb', '!' );
1347
$add_option->( 'stack-opening-paren', 'sop', '!' );
1348
$add_option->( 'stack-opening-square-bracket', 'sosb', '!' );
1349
$add_option->( 'vertical-tightness', 'vt', '=i' );
1350
$add_option->( 'vertical-tightness-closing', 'vtc', '=i' );
1351
$add_option->( 'want-break-after', 'wba', '=s' );
1352
$add_option->( 'want-break-before', 'wbb', '=s' );
1353
$add_option->( 'break-after-all-operators', 'baao', '!' );
1354
$add_option->( 'break-before-all-operators', 'bbao', '!' );
1355
$add_option->( 'keep-interior-semicolons', 'kis', '!' );
1357
########################################
1358
$category = 6; # Controlling list formatting
1359
########################################
1360
$add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );
1361
$add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );
1362
$add_option->( 'maximum-fields-per-table', 'mft', '=i' );
1364
########################################
1365
$category = 7; # Retaining or ignoring existing line breaks
1366
########################################
1367
$add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );
1368
$add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );
1369
$add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );
1370
$add_option->( 'ignore-old-breakpoints', 'iob', '!' );
1372
########################################
1373
$category = 8; # Blank line control
1374
########################################
1375
$add_option->( 'blanks-before-blocks', 'bbb', '!' );
1376
$add_option->( 'blanks-before-comments', 'bbc', '!' );
1377
$add_option->( 'blanks-before-subs', 'bbs', '!' );
1378
$add_option->( 'long-block-line-count', 'lbl', '=i' );
1379
$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );
1380
$add_option->( 'swallow-optional-blank-lines', 'sob', '!' );
1382
########################################
1383
$category = 9; # Other controls
1384
########################################
1385
$add_option->( 'delete-block-comments', 'dbc', '!' );
1386
$add_option->( 'delete-closing-side-comments', 'dcsc', '!' );
1387
$add_option->( 'delete-pod', 'dp', '!' );
1388
$add_option->( 'delete-side-comments', 'dsc', '!' );
1389
$add_option->( 'tee-block-comments', 'tbc', '!' );
1390
$add_option->( 'tee-pod', 'tp', '!' );
1391
$add_option->( 'tee-side-comments', 'tsc', '!' );
1392
$add_option->( 'look-for-autoloader', 'lal', '!' );
1393
$add_option->( 'look-for-hash-bang', 'x', '!' );
1394
$add_option->( 'look-for-selfloader', 'lsl', '!' );
1395
$add_option->( 'pass-version-line', 'pvl', '!' );
1397
########################################
1398
$category = 13; # Debugging
1399
########################################
1400
$add_option->( 'DEBUG', 'D', '!' );
1401
$add_option->( 'DIAGNOSTICS', 'I', '!' );
1402
$add_option->( 'check-multiline-quotes', 'chk', '!' );
1403
$add_option->( 'dump-defaults', 'ddf', '!' );
1404
$add_option->( 'dump-long-names', 'dln', '!' );
1405
$add_option->( 'dump-options', 'dop', '!' );
1406
$add_option->( 'dump-profile', 'dpro', '!' );
1407
$add_option->( 'dump-short-names', 'dsn', '!' );
1408
$add_option->( 'dump-token-types', 'dtt', '!' );
1409
$add_option->( 'dump-want-left-space', 'dwls', '!' );
1410
$add_option->( 'dump-want-right-space', 'dwrs', '!' );
1411
$add_option->( 'fuzzy-line-length', 'fll', '!' );
1412
$add_option->( 'help', 'h', '' );
1413
$add_option->( 'short-concatenation-item-length', 'scl', '=i' );
1414
$add_option->( 'show-options', 'opt', '!' );
1415
$add_option->( 'version', 'v', '' );
1417
#---------------------------------------------------------------------
1419
# The Perl::Tidy::HtmlWriter will add its own options to the string
1420
Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );
1422
########################################
1423
# Set categories 10, 11, 12
1424
########################################
1425
# Based on their known order
1426
$category = 12; # HTML properties
1427
foreach my $opt (@option_string) {
1428
my $long_name = $opt;
1429
$long_name =~ s/(!|=.*|:.*)$//;
1430
unless ( defined( $option_category{$long_name} ) ) {
1431
if ( $long_name =~ /^html-linked/ ) {
1432
$category = 10; # HTML options
1434
elsif ( $long_name =~ /^pod2html/ ) {
1435
$category = 11; # Pod2html
1437
$option_category{$long_name} = $category_name[$category];
1441
#---------------------------------------------------------------
1442
# Assign valid ranges to certain options
1443
#---------------------------------------------------------------
1444
# In the future, these may be used to make preliminary checks
1445
# hash keys are long names
1446
# If key or value is undefined:
1447
# strings may have any value
1448
# integer ranges are >=0
1449
# If value is defined:
1450
# value is [qw(any valid words)] for strings
1451
# value is [min, max] for integers
1452
# if min is undefined, there is no lower limit
1453
# if max is undefined, there is no upper limit
1454
# Parameters not listed here have defaults
1456
'format' => [ 'tidy', 'html', 'user' ],
1457
'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],
1459
'block-brace-tightness' => [ 0, 2 ],
1460
'brace-tightness' => [ 0, 2 ],
1461
'paren-tightness' => [ 0, 2 ],
1462
'square-bracket-tightness' => [ 0, 2 ],
1464
'block-brace-vertical-tightness' => [ 0, 2 ],
1465
'brace-vertical-tightness' => [ 0, 2 ],
1466
'brace-vertical-tightness-closing' => [ 0, 2 ],
1467
'paren-vertical-tightness' => [ 0, 2 ],
1468
'paren-vertical-tightness-closing' => [ 0, 2 ],
1469
'square-bracket-vertical-tightness' => [ 0, 2 ],
1470
'square-bracket-vertical-tightness-closing' => [ 0, 2 ],
1471
'vertical-tightness' => [ 0, 2 ],
1472
'vertical-tightness-closing' => [ 0, 2 ],
1474
'closing-brace-indentation' => [ 0, 3 ],
1475
'closing-paren-indentation' => [ 0, 3 ],
1476
'closing-square-bracket-indentation' => [ 0, 3 ],
1477
'closing-token-indentation' => [ 0, 3 ],
1479
'closing-side-comment-else-flag' => [ 0, 2 ],
1480
'comma-arrow-breakpoints' => [ 0, 3 ],
1483
# Note: we could actually allow negative ci if someone really wants it:
1484
# $option_range{'continuation-indentation'} = [ undef, undef ];
1486
#---------------------------------------------------------------
1487
# Assign default values to the above options here, except
1488
# for 'outfile' and 'help'.
1489
# These settings should approximate the perlstyle(1) suggestions.
1490
#---------------------------------------------------------------
1495
blanks-before-blocks
1496
blanks-before-comments
1498
block-brace-tightness=0
1499
block-brace-vertical-tightness=0
1501
brace-vertical-tightness-closing=0
1502
brace-vertical-tightness=0
1503
break-at-old-logical-breakpoints
1504
break-at-old-ternary-breakpoints
1505
break-at-old-keyword-breakpoints
1506
comma-arrow-breakpoints=1
1508
closing-side-comment-interval=6
1509
closing-side-comment-maximum-text=20
1510
closing-side-comment-else-flag=0
1511
closing-paren-indentation=0
1512
closing-brace-indentation=0
1513
closing-square-bracket-indentation=0
1514
continuation-indentation=2
1518
hanging-side-comments
1519
indent-block-comments
1521
long-block-line-count=8
1524
maximum-consecutive-blank-lines=1
1525
maximum-fields-per-table=0
1526
maximum-line-length=80
1527
minimum-space-to-comment=4
1528
nobrace-left-and-indent
1530
nodelete-old-whitespace
1535
nostatic-side-comments
1536
noswallow-optional-blank-lines
1541
outdent-long-comments
1543
paren-vertical-tightness-closing=0
1544
paren-vertical-tightness=0
1548
short-concatenation-item-length=8
1550
square-bracket-tightness=1
1551
square-bracket-vertical-tightness-closing=0
1552
square-bracket-vertical-tightness=0
1553
static-block-comments
1556
backup-file-extension=bak
1560
html-table-of-contents
1564
push @defaults, "perl-syntax-check-flags=-c -T";
1566
#---------------------------------------------------------------
1567
# Define abbreviations which will be expanded into the above primitives.
1568
# These may be defined recursively.
1569
#---------------------------------------------------------------
1572
'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],
1573
'fnl' => [qw(freeze-newlines)],
1574
'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],
1575
'fws' => [qw(freeze-whitespace)],
1576
'indent-only' => [qw(freeze-newlines freeze-whitespace)],
1577
'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],
1578
'nooutdent-long-lines' =>
1579
[qw(nooutdent-long-quotes nooutdent-long-comments)],
1580
'noll' => [qw(nooutdent-long-lines)],
1581
'io' => [qw(indent-only)],
1582
'delete-all-comments' =>
1583
[qw(delete-block-comments delete-side-comments delete-pod)],
1584
'nodelete-all-comments' =>
1585
[qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],
1586
'dac' => [qw(delete-all-comments)],
1587
'ndac' => [qw(nodelete-all-comments)],
1588
'gnu' => [qw(gnu-style)],
1589
'pbp' => [qw(perl-best-practices)],
1590
'tee-all-comments' =>
1591
[qw(tee-block-comments tee-side-comments tee-pod)],
1592
'notee-all-comments' =>
1593
[qw(notee-block-comments notee-side-comments notee-pod)],
1594
'tac' => [qw(tee-all-comments)],
1595
'ntac' => [qw(notee-all-comments)],
1596
'html' => [qw(format=html)],
1597
'nhtml' => [qw(format=tidy)],
1598
'tidy' => [qw(format=tidy)],
1600
'break-after-comma-arrows' => [qw(cab=0)],
1601
'nobreak-after-comma-arrows' => [qw(cab=1)],
1602
'baa' => [qw(cab=0)],
1603
'nbaa' => [qw(cab=1)],
1605
'break-at-old-trinary-breakpoints' => [qw(bot)],
1607
'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],
1608
'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],
1609
'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],
1610
'icp' => [qw(cpi=2 cbi=2 csbi=2)],
1611
'nicp' => [qw(cpi=0 cbi=0 csbi=0)],
1613
'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],
1614
'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],
1615
'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],
1616
'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],
1617
'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],
1619
'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1620
'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1621
'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1623
'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],
1624
'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],
1625
'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],
1627
'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1628
'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1629
'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1631
'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],
1632
'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],
1633
'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],
1635
'otr' => [qw(opr ohbr osbr)],
1636
'opening-token-right' => [qw(opr ohbr osbr)],
1637
'notr' => [qw(nopr nohbr nosbr)],
1638
'noopening-token-right' => [qw(nopr nohbr nosbr)],
1640
'sot' => [qw(sop sohb sosb)],
1641
'nsot' => [qw(nsop nsohb nsosb)],
1642
'stack-opening-tokens' => [qw(sop sohb sosb)],
1643
'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],
1645
'sct' => [qw(scp schb scsb)],
1646
'stack-closing-tokens' => => [qw(scp schb scsb)],
1647
'nsct' => [qw(nscp nschb nscsb)],
1648
'nostack-opening-tokens' => [qw(nscp nschb nscsb)],
1650
# 'mangle' originally deleted pod and comments, but to keep it
1651
# reversible, it no longer does. But if you really want to
1652
# delete them, just use:
1655
# An interesting use for 'mangle' is to do this:
1656
# perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new
1657
# which will form as many one-line blocks as possible
1663
delete-old-whitespace
1666
maximum-consecutive-blank-lines=0
1667
maximum-line-length=100000
1671
noblanks-before-blocks
1672
noblanks-before-subs
1677
# 'extrude' originally deleted pod and comments, but to keep it
1678
# reversible, it no longer does. But if you really want to
1679
# delete them, just use
1682
# An interesting use for 'extrude' is to do this:
1683
# perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new
1684
# which will break up all one-line blocks.
1691
delete-old-whitespace
1694
maximum-consecutive-blank-lines=0
1695
maximum-line-length=1
1698
noblanks-before-blocks
1699
noblanks-before-subs
1706
# this style tries to follow the GNU Coding Standards (which do
1707
# not really apply to perl but which are followed by some perl
1711
lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1
1715
# Style suggested in Damian Conway's Perl Best Practices
1716
'perl-best-practices' => [
1717
qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),
1718
q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)
1721
# Additional styles can be added here
1724
Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );
1726
# Uncomment next line to dump all expansions for debugging:
1727
# dump_short_names(\%expansion);
1729
\@option_string, \@defaults, \%expansion,
1730
\%option_category, \%option_range
1733
} # end of generate_options
1735
sub process_command_line {
1738
$perltidyrc_stream, $is_Windows, $Windows_type,
1739
$rpending_complaint, $dump_options_type
1745
$roption_string, $rdefaults, $rexpansion,
1746
$roption_category, $roption_range
1747
) = generate_options();
1749
#---------------------------------------------------------------
1750
# set the defaults by passing the above list through GetOptions
1751
#---------------------------------------------------------------
1757
# do not load the defaults if we are just dumping perltidyrc
1758
unless ( $dump_options_type eq 'perltidyrc' ) {
1759
for $i (@$rdefaults) { push @ARGV, "--" . $i }
1762
# Patch to save users Getopt::Long configuration
1763
# and set to Getopt::Long defaults. Use eval to avoid
1764
# breaking old versions of Perl without these routines.
1766
eval { $glc = Getopt::Long::Configure() };
1768
eval { Getopt::Long::ConfigDefaults() };
1770
else { $glc = undef }
1772
if ( !GetOptions( \%Opts, @$roption_string ) ) {
1773
die "Programming Bug: error in setting default options";
1776
# Patch to put the previous Getopt::Long configuration back
1777
eval { Getopt::Long::Configure($glc) } if defined $glc;
1781
my @raw_options = ();
1782
my $config_file = "";
1783
my $saw_ignore_profile = 0;
1784
my $saw_extrude = 0;
1785
my $saw_dump_profile = 0;
1788
#---------------------------------------------------------------
1789
# Take a first look at the command-line parameters. Do as many
1790
# immediate dumps as possible, which can avoid confusion if the
1791
# perltidyrc file has an error.
1792
#---------------------------------------------------------------
1793
foreach $i (@ARGV) {
1796
if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {
1797
$saw_ignore_profile = 1;
1800
# note: this must come before -pro and -profile, below:
1801
elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {
1802
$saw_dump_profile = 1;
1804
elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {
1807
"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";
1810
unless ( -e $config_file ) {
1811
warn "cannot find file given with -pro=$config_file: $!\n";
1815
elsif ( $i =~ /^-(pro|profile)=?$/ ) {
1816
die "usage: -pro=filename or --profile=filename, no spaces\n";
1818
elsif ( $i =~ /^-extrude$/ ) {
1821
elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {
1825
elsif ( $i =~ /^-(version|v)$/ ) {
1829
elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {
1830
dump_defaults(@$rdefaults);
1833
elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {
1834
dump_long_names(@$roption_string);
1837
elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {
1838
dump_short_names($rexpansion);
1841
elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {
1842
Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);
1847
if ( $saw_dump_profile && $saw_ignore_profile ) {
1848
warn "No profile to dump because of -npro\n";
1852
#---------------------------------------------------------------
1853
# read any .perltidyrc configuration file
1854
#---------------------------------------------------------------
1855
unless ($saw_ignore_profile) {
1857
# resolve possible conflict between $perltidyrc_stream passed
1858
# as call parameter to perltidy and -pro=filename on command
1860
if ($perltidyrc_stream) {
1863
Conflict: a perltidyrc configuration file was specified both as this
1864
perltidy call parameter: $perltidyrc_stream
1865
and with this -profile=$config_file.
1866
Using -profile=$config_file.
1870
$config_file = $perltidyrc_stream;
1874
# look for a config file if we don't have one yet
1875
my $rconfig_file_chatter;
1876
$$rconfig_file_chatter = "";
1878
find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,
1879
$rpending_complaint )
1880
unless $config_file;
1882
# open any config file
1885
( $fh_config, $config_file ) =
1886
Perl::Tidy::streamhandle( $config_file, 'r' );
1887
unless ($fh_config) {
1888
$$rconfig_file_chatter .=
1889
"# $config_file exists but cannot be opened\n";
1893
if ($saw_dump_profile) {
1894
if ($saw_dump_profile) {
1895
dump_config_file( $fh_config, $config_file,
1896
$rconfig_file_chatter );
1903
my ( $rconfig_list, $death_message ) =
1904
read_config_file( $fh_config, $config_file, $rexpansion );
1905
die $death_message if ($death_message);
1907
# process any .perltidyrc parameters right now so we can
1909
if (@$rconfig_list) {
1910
local @ARGV = @$rconfig_list;
1912
expand_command_abbreviations( $rexpansion, \@raw_options,
1915
if ( !GetOptions( \%Opts, @$roption_string ) ) {
1917
"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";
1920
# Anything left in this local @ARGV is an error and must be
1921
# invalid bare words from the configuration file. We cannot
1922
# check this earlier because bare words may have been valid
1923
# values for parameters. We had to wait for GetOptions to have
1927
my $str = "\'" . pop(@ARGV) . "\'";
1928
while ( my $param = pop(@ARGV) ) {
1929
if ( length($str) < 70 ) {
1930
$str .= ", '$param'";
1938
There are $count unrecognized values in the configuration file '$config_file':
1940
Use leading dashes for parameters. Use -npro to ignore this file.
1944
# Undo any options which cause premature exit. They are not
1945
# appropriate for a config file, and it could be hard to
1946
# diagnose the cause of the premature exit.
1955
dump-want-left-space
1956
dump-want-right-space
1964
if ( defined( $Opts{$_} ) ) {
1966
warn "ignoring --$_ in config file: $config_file\n";
1973
#---------------------------------------------------------------
1974
# now process the command line parameters
1975
#---------------------------------------------------------------
1976
expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );
1978
if ( !GetOptions( \%Opts, @$roption_string ) ) {
1979
die "Error on command line; for help try 'perltidy -h'\n";
1982
return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,
1983
$rexpansion, $roption_category, $roption_range );
1984
} # end of process_command_line
1988
my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;
1990
#---------------------------------------------------------------
1991
# check and handle any interactions among the basic options..
1992
#---------------------------------------------------------------
1994
# Since -vt, -vtc, and -cti are abbreviations, but under
1995
# msdos, an unquoted input parameter like vtc=1 will be
1996
# seen as 2 parameters, vtc and 1, so the abbreviations
1997
# won't be seen. Therefore, we will catch them here if
2000
if ( defined $rOpts->{'vertical-tightness'} ) {
2001
my $vt = $rOpts->{'vertical-tightness'};
2002
$rOpts->{'paren-vertical-tightness'} = $vt;
2003
$rOpts->{'square-bracket-vertical-tightness'} = $vt;
2004
$rOpts->{'brace-vertical-tightness'} = $vt;
2007
if ( defined $rOpts->{'vertical-tightness-closing'} ) {
2008
my $vtc = $rOpts->{'vertical-tightness-closing'};
2009
$rOpts->{'paren-vertical-tightness-closing'} = $vtc;
2010
$rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;
2011
$rOpts->{'brace-vertical-tightness-closing'} = $vtc;
2014
if ( defined $rOpts->{'closing-token-indentation'} ) {
2015
my $cti = $rOpts->{'closing-token-indentation'};
2016
$rOpts->{'closing-square-bracket-indentation'} = $cti;
2017
$rOpts->{'closing-brace-indentation'} = $cti;
2018
$rOpts->{'closing-paren-indentation'} = $cti;
2021
# In quiet mode, there is no log file and hence no way to report
2022
# results of syntax check, so don't do it.
2023
if ( $rOpts->{'quiet'} ) {
2024
$rOpts->{'check-syntax'} = 0;
2027
# can't check syntax if no output
2028
if ( $rOpts->{'format'} ne 'tidy' ) {
2029
$rOpts->{'check-syntax'} = 0;
2032
# Never let Windows 9x/Me systems run syntax check -- this will prevent a
2033
# wide variety of nasty problems on these systems, because they cannot
2034
# reliably run backticks. Don't even think about changing this!
2035
if ( $rOpts->{'check-syntax'}
2037
&& ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )
2039
$rOpts->{'check-syntax'} = 0;
2042
# It's really a bad idea to check syntax as root unless you wrote
2043
# the script yourself. FIXME: not sure if this works with VMS
2044
unless ($is_Windows) {
2046
if ( $< == 0 && $rOpts->{'check-syntax'} ) {
2047
$rOpts->{'check-syntax'} = 0;
2048
$$rpending_complaint .=
2049
"Syntax check deactivated for safety; you shouldn't run this as root\n";
2053
# see if user set a non-negative logfile-gap
2054
if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {
2056
# a zero gap will be taken as a 1
2057
if ( $rOpts->{'logfile-gap'} == 0 ) {
2058
$rOpts->{'logfile-gap'} = 1;
2061
# setting a non-negative logfile gap causes logfile to be saved
2062
$rOpts->{'logfile'} = 1;
2065
# not setting logfile gap, or setting it negative, causes default of 50
2067
$rOpts->{'logfile-gap'} = 50;
2070
# set short-cut flag when only indentation is to be done.
2071
# Note that the user may or may not have already set the
2073
if ( !$rOpts->{'add-whitespace'}
2074
&& !$rOpts->{'delete-old-whitespace'}
2075
&& !$rOpts->{'add-newlines'}
2076
&& !$rOpts->{'delete-old-newlines'} )
2078
$rOpts->{'indent-only'} = 1;
2081
# -isbc implies -ibc
2082
if ( $rOpts->{'indent-spaced-block-comments'} ) {
2083
$rOpts->{'indent-block-comments'} = 1;
2086
# -bli flag implies -bl
2087
if ( $rOpts->{'brace-left-and-indent'} ) {
2088
$rOpts->{'opening-brace-on-new-line'} = 1;
2091
if ( $rOpts->{'opening-brace-always-on-right'}
2092
&& $rOpts->{'opening-brace-on-new-line'} )
2095
Conflict: you specified both 'opening-brace-always-on-right' (-bar) and
2096
'opening-brace-on-new-line' (-bl). Ignoring -bl.
2098
$rOpts->{'opening-brace-on-new-line'} = 0;
2101
# it simplifies things if -bl is 0 rather than undefined
2102
if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {
2103
$rOpts->{'opening-brace-on-new-line'} = 0;
2106
# -sbl defaults to -bl if not defined
2107
if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {
2108
$rOpts->{'opening-sub-brace-on-new-line'} =
2109
$rOpts->{'opening-brace-on-new-line'};
2112
# set shortcut flag if no blanks to be written
2113
unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {
2114
$rOpts->{'swallow-optional-blank-lines'} = 1;
2117
if ( $rOpts->{'entab-leading-whitespace'} ) {
2118
if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {
2119
warn "-et=n must use a positive integer; ignoring -et\n";
2120
$rOpts->{'entab-leading-whitespace'} = undef;
2123
# entab leading whitespace has priority over the older 'tabs' option
2124
if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }
2128
sub expand_command_abbreviations {
2130
# go through @ARGV and expand any abbreviations
2132
my ( $rexpansion, $rraw_options, $config_file ) = @_;
2135
# set a pass limit to prevent an infinite loop;
2136
# 10 should be plenty, but it may be increased to allow deeply
2137
# nested expansions.
2138
my $max_passes = 10;
2141
# keep looping until all expansions have been converted into actual
2143
for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {
2145
my $abbrev_count = 0;
2147
# loop over each item in @ARGV..
2148
foreach $word (@ARGV) {
2150
# convert any leading 'no-' to just 'no'
2151
if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }
2153
# if it is a dash flag (instead of a file name)..
2154
if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {
2159
# save the raw input for debug output in case of circular refs
2160
if ( $pass_count == 0 ) {
2161
push( @$rraw_options, $word );
2164
# recombine abbreviation and flag, if necessary,
2165
# to allow abbreviations with arguments such as '-vt=1'
2166
if ( $rexpansion->{ $abr . $flags } ) {
2167
$abr = $abr . $flags;
2171
# if we see this dash item in the expansion hash..
2172
if ( $rexpansion->{$abr} ) {
2175
# stuff all of the words that it expands to into the
2176
# new arg list for the next pass
2177
foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {
2178
next unless $abbrev; # for safety; shouldn't happen
2179
push( @new_argv, '--' . $abbrev . $flags );
2183
# not in expansion hash, must be actual long name
2185
push( @new_argv, $word );
2189
# not a dash item, so just save it for the next pass
2191
push( @new_argv, $word );
2193
} # end of this pass
2195
# update parameter list @ARGV to the new one
2197
last unless ( $abbrev_count > 0 );
2199
# make sure we are not in an infinite loop
2200
if ( $pass_count == $max_passes ) {
2202
"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";
2203
print STDERR "Here are the raw options\n";
2205
print STDERR "(@$rraw_options)\n";
2206
my $num = @new_argv;
2209
print STDERR "After $max_passes passes here is ARGV\n";
2210
print STDERR "(@new_argv)\n";
2213
print STDERR "After $max_passes passes ARGV has $num entries\n";
2218
Please check your configuration file $config_file for circular-references.
2219
To deactivate it, use -npro.
2224
Program bug - circular-references in the %expansion hash, probably due to
2225
a recent program change.
2228
} # end of check for circular references
2229
} # end of loop over all passes
2232
# Debug routine -- this will dump the expansion hash
2233
sub dump_short_names {
2234
my $rexpansion = shift;
2236
List of short names. This list shows how all abbreviations are
2237
translated into other abbreviations and, eventually, into long names.
2238
New abbreviations may be defined in a .perltidyrc file.
2239
For a list of all long names, use perltidy --dump-long-names (-dln).
2240
--------------------------------------------------------------------------
2242
foreach my $abbrev ( sort keys %$rexpansion ) {
2243
my @list = @{ $$rexpansion{$abbrev} };
2244
print STDOUT "$abbrev --> @list\n";
2248
sub check_vms_filename {
2250
# given a valid filename (the perltidy input file)
2251
# create a modified filename and separator character
2254
# Contributed by Michael Cartmell
2256
my ( $base, $path ) = fileparse( $_[0] );
2258
# remove explicit ; version
2259
$base =~ s/;-?\d*$//
2261
# remove explicit . version ie two dots in filename NB ^ escapes a dot
2262
or $base =~ s/( # begin capture $1
2263
(?:^|[^^])\. # match a dot not preceded by a caret
2264
(?: # followed by nothing
2266
.*[^^] # anything ending in a non caret
2269
\.-?\d*$ # match . version number
2272
# normalise filename, if there are no unescaped dots then append one
2273
$base .= '.' unless $base =~ /(?:^|[^^])\./;
2275
# if we don't already have an extension then we just append the extention
2276
my $separator = ( $base =~ /\.$/ ) ? "" : "_";
2277
return ( $path . $base, $separator );
2282
# TODO: are these more standard names?
2283
# Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003
2285
# Returns a string that determines what MS OS we are on.
2286
# Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003
2287
# Returns blank string if not an MS system.
2288
# Original code contributed by: Yves Orton
2289
# We need to know this to decide where to look for config files
2291
my $rpending_complaint = shift;
2293
return $os unless $^O =~ /win32|dos/i; # is it a MS box?
2295
# Systems built from Perl source may not have Win32.pm
2296
# But probably have Win32::GetOSVersion() anyway so the
2297
# following line is not 'required':
2298
# return $os unless eval('require Win32');
2300
# Use the standard API call to determine the version
2301
my ( $undef, $major, $minor, $build, $id );
2302
eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };
2305
# NAME ID MAJOR MINOR
2306
# Windows NT 4 2 4 0
2307
# Windows 2000 2 5 0
2309
# Windows Server 2003 2 5 2
2311
return "win32s" unless $id; # If id==0 then its a win32s box.
2312
$os = { # Magic numbers from MSDN
2313
# documentation of GetOSVersion
2320
0 => "2000", # or NT 4, see below
2327
# If $os is undefined, the above code is out of date. Suggested updates
2329
unless ( defined $os ) {
2331
$$rpending_complaint .= <<EOS;
2332
Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!
2333
We won't be able to look for a system-wide config file.
2337
# Unfortunately the logic used for the various versions isnt so clever..
2338
# so we have to handle an outside case.
2339
return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;
2344
( $^O !~ /win32|dos/i )
2347
&& ( $^O ne 'MacOS' );
2350
sub look_for_Windows {
2352
# determine Windows sub-type and location of
2353
# system-wide configuration files
2354
my $rpending_complaint = shift;
2355
my $is_Windows = ( $^O =~ /win32|dos/i );
2356
my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;
2357
return ( $is_Windows, $Windows_type );
2360
sub find_config_file {
2362
# look for a .perltidyrc configuration file
2363
my ( $is_Windows, $Windows_type, $rconfig_file_chatter,
2364
$rpending_complaint ) = @_;
2366
$$rconfig_file_chatter .= "# Config file search...system reported as:";
2368
$$rconfig_file_chatter .= "Windows $Windows_type\n";
2371
$$rconfig_file_chatter .= " $^O\n";
2374
# sub to check file existance and record all tests
2375
my $exists_config_file = sub {
2376
my $config_file = shift;
2377
return 0 unless $config_file;
2378
$$rconfig_file_chatter .= "# Testing: $config_file\n";
2379
return -f $config_file;
2384
# look in current directory first
2385
$config_file = ".perltidyrc";
2386
return $config_file if $exists_config_file->($config_file);
2388
# Default environment vars.
2389
my @envs = qw(PERLTIDY HOME);
2391
# Check the NT/2k/XP locations, first a local machine def, then a
2393
push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;
2395
# Now go through the enviornment ...
2396
foreach my $var (@envs) {
2397
$$rconfig_file_chatter .= "# Examining: \$ENV{$var}";
2398
if ( defined( $ENV{$var} ) ) {
2399
$$rconfig_file_chatter .= " = $ENV{$var}\n";
2401
# test ENV{ PERLTIDY } as file:
2402
if ( $var eq 'PERLTIDY' ) {
2403
$config_file = "$ENV{$var}";
2404
return $config_file if $exists_config_file->($config_file);
2407
# test ENV as directory:
2408
$config_file = catfile( $ENV{$var}, ".perltidyrc" );
2409
return $config_file if $exists_config_file->($config_file);
2412
$$rconfig_file_chatter .= "\n";
2416
# then look for a system-wide definition
2417
# where to look varies with OS
2420
if ($Windows_type) {
2421
my ( $os, $system, $allusers ) =
2422
Win_Config_Locs( $rpending_complaint, $Windows_type );
2424
# Check All Users directory, if there is one.
2426
$config_file = catfile( $allusers, ".perltidyrc" );
2427
return $config_file if $exists_config_file->($config_file);
2430
# Check system directory.
2431
$config_file = catfile( $system, ".perltidyrc" );
2432
return $config_file if $exists_config_file->($config_file);
2436
# Place to add customization code for other systems
2437
elsif ( $^O eq 'OS2' ) {
2439
elsif ( $^O eq 'MacOS' ) {
2441
elsif ( $^O eq 'VMS' ) {
2444
# Assume some kind of Unix
2447
$config_file = "/usr/local/etc/perltidyrc";
2448
return $config_file if $exists_config_file->($config_file);
2450
$config_file = "/etc/perltidyrc";
2451
return $config_file if $exists_config_file->($config_file);
2454
# Couldn't find a config file
2458
sub Win_Config_Locs {
2460
# In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),
2461
# or undef if its not a win32 OS. In list context returns OS, System
2462
# Directory, and All Users Directory. All Users will be empty on a
2463
# 9x/Me box. Contributed by: Yves Orton.
2465
my $rpending_complaint = shift;
2466
my $os = (@_) ? shift : Win_OS_Type();
2472
if ( $os =~ /9[58]|Me/ ) {
2473
$system = "C:/Windows";
2475
elsif ( $os =~ /NT|XP|200?/ ) {
2476
$system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";
2479
? "C:/WinNT/profiles/All Users/"
2480
: "C:/Documents and Settings/All Users/";
2484
# This currently would only happen on a win32s computer. I dont have
2485
# one to test, so I am unsure how to proceed. Suggestions welcome!
2486
$$rpending_complaint .=
2487
"I dont know a sensible place to look for config files on an $os system.\n";
2490
return wantarray ? ( $os, $system, $allusers ) : $os;
2493
sub dump_config_file {
2495
my $config_file = shift;
2496
my $rconfig_file_chatter = shift;
2497
print STDOUT "$$rconfig_file_chatter";
2499
print STDOUT "# Dump of file: '$config_file'\n";
2500
while ( my $line = $fh->getline() ) { print STDOUT $line }
2501
eval { $fh->close() };
2504
print STDOUT "# ...no config file found\n";
2508
sub read_config_file {
2510
my ( $fh, $config_file, $rexpansion ) = @_;
2511
my @config_list = ();
2513
# file is bad if non-empty $death_message is returned
2514
my $death_message = "";
2518
while ( my $line = $fh->getline() ) {
2521
next if $line =~ /^\s*#/; # skip full-line comment
2522
( $line, $death_message ) =
2523
strip_comment( $line, $config_file, $line_no );
2524
last if ($death_message);
2525
$line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends
2528
# look for something of the general form
2533
if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {
2534
my ( $newname, $body, $curly ) = ( $2, $3, $4 );
2536
# handle a new alias definition
2540
"No '}' seen after $name and before $newname in config file $config_file line $.\n";
2545
if ( ${$rexpansion}{$name} ) {
2547
my @names = sort keys %$rexpansion;
2549
"Here is a list of all installed aliases\n(@names)\n"
2550
. "Attempting to redefine alias ($name) in config file $config_file line $.\n";
2553
${$rexpansion}{$name} = [];
2559
my ( $rbody_parts, $msg ) = parse_args($body);
2561
$death_message = <<EOM;
2562
Error reading file '$config_file' at line number $line_no.
2564
Please fix this line or use -npro to avoid reading this file
2571
# remove leading dashes if this is an alias
2572
foreach (@$rbody_parts) { s/^\-+//; }
2573
push @{ ${$rexpansion}{$name} }, @$rbody_parts;
2576
push( @config_list, @$rbody_parts );
2583
"Unexpected '}' seen in config file $config_file line $.\n";
2590
eval { $fh->close() };
2591
return ( \@config_list, $death_message );
2596
my ( $instr, $config_file, $line_no ) = @_;
2599
# nothing to do if no comments
2600
if ( $instr !~ /#/ ) {
2601
return ( $instr, $msg );
2604
# use simple method of no quotes
2605
elsif ( $instr !~ /['"]/ ) {
2606
$instr =~ s/\s*\#.*$//; # simple trim
2607
return ( $instr, $msg );
2610
# handle comments and quotes
2612
my $quote_char = "";
2615
# looking for ending quote character
2617
if ( $instr =~ /\G($quote_char)/gc ) {
2621
elsif ( $instr =~ /\G(.)/gc ) {
2625
# error..we reached the end without seeing the ending quote char
2628
Error reading file $config_file at line number $line_no.
2629
Did not see ending quote character <$quote_char> in this text:
2631
Please fix this line or use -npro to avoid reading this file
2637
# accumulating characters and looking for start of a quoted string
2639
if ( $instr =~ /\G([\"\'])/gc ) {
2643
elsif ( $instr =~ /\G#/gc ) {
2646
elsif ( $instr =~ /\G(.)/gc ) {
2654
return ( $outstr, $msg );
2659
# Parse a command string containing multiple string with possible
2660
# quotes, into individual commands. It might look like this, for example:
2662
# -wba=" + - " -some-thing -wbb='. && ||'
2664
# There is no need, at present, to handle escaped quote characters.
2665
# (They are not perltidy tokens, so needn't be in strings).
2668
my @body_parts = ();
2669
my $quote_char = "";
2674
# looking for ending quote character
2676
if ( $body =~ /\G($quote_char)/gc ) {
2679
elsif ( $body =~ /\G(.)/gc ) {
2683
# error..we reached the end without seeing the ending quote char
2685
if ( length($part) ) { push @body_parts, $part; }
2687
Did not see ending quote character <$quote_char> in this text:
2694
# accumulating characters and looking for start of a quoted string
2696
if ( $body =~ /\G([\"\'])/gc ) {
2699
elsif ( $body =~ /\G(\s+)/gc ) {
2700
if ( length($part) ) { push @body_parts, $part; }
2703
elsif ( $body =~ /\G(.)/gc ) {
2707
if ( length($part) ) { push @body_parts, $part; }
2712
return ( \@body_parts, $msg );
2715
sub dump_long_names {
2717
my @names = sort @_;
2719
# Command line long names (passed to GetOptions)
2720
#---------------------------------------------------------------
2721
# here is a summary of the Getopt codes:
2722
# <none> does not take an argument
2723
# =s takes a mandatory string
2724
# :s takes an optional string
2725
# =i takes a mandatory integer
2726
# :i takes an optional integer
2727
# ! does not take an argument and may be negated
2728
# i.e., -foo and -nofoo are allowed
2729
# a double dash signals the end of the options list
2731
#---------------------------------------------------------------
2734
foreach (@names) { print STDOUT "$_\n" }
2738
my @defaults = sort @_;
2739
print STDOUT "Default command line options:\n";
2740
foreach (@_) { print STDOUT "$_\n" }
2743
sub readable_options {
2745
# return options for this run as a string which could be
2746
# put in a perltidyrc file
2747
my ( $rOpts, $roption_string ) = @_;
2749
my $rGetopt_flags = \%Getopt_flags;
2750
my $readable_options = "# Final parameter set for this run.\n";
2751
$readable_options .=
2752
"# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";
2753
foreach my $opt ( @{$roption_string} ) {
2755
if ( $opt =~ /(.*)(!|=.*)$/ ) {
2759
if ( defined( $rOpts->{$opt} ) ) {
2760
$rGetopt_flags->{$opt} = $flag;
2763
foreach my $key ( sort keys %{$rOpts} ) {
2764
my $flag = $rGetopt_flags->{$key};
2765
my $value = $rOpts->{$key};
2769
if ( $flag =~ /^=/ ) {
2770
if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }
2771
$suffix = "=" . $value;
2773
elsif ( $flag =~ /^!/ ) {
2774
$prefix .= "no" unless ($value);
2779
$readable_options .=
2780
"# ERROR in dump_options: unrecognized flag $flag for $key\n";
2783
$readable_options .= $prefix . $key . $suffix . "\n";
2785
return $readable_options;
2790
This is perltidy, v$VERSION
2792
Copyright 2000-2007, Steve Hancock
2794
Perltidy is free software and may be copied under the terms of the GNU
2795
General Public License, which is included in the distribution files.
2797
Complete documentation for perltidy can be found using 'man perltidy'
2798
or on the internet at http://perltidy.sourceforge.net.
2805
This is perltidy version $VERSION, a perl script indenter. Usage:
2807
perltidy [ options ] file1 file2 file3 ...
2808
(output goes to file1.tdy, file2.tdy, file3.tdy, ...)
2809
perltidy [ options ] file1 -o outfile
2810
perltidy [ options ] file1 -st >outfile
2811
perltidy [ options ] <infile >outfile
2813
Options have short and long forms. Short forms are shown; see
2814
man pages for long forms. Note: '=s' indicates a required string,
2815
and '=n' indicates a required integer.
2819
-o=file name of the output file (only if single input file)
2820
-oext=s change output extension from 'tdy' to s
2821
-opath=path change path to be 'path' for output files
2822
-b backup original to .bak and modify file in-place
2823
-bext=s change default backup extension from 'bak' to s
2824
-q deactivate error messages (for running under editor)
2825
-w include non-critical warning messages in the .ERR error output
2826
-syn run perl -c to check syntax (default under unix systems)
2827
-log save .LOG file, which has useful diagnostics
2828
-f force perltidy to read a binary file
2829
-g like -log but writes more detailed .LOG file, for debugging scripts
2830
-opt write the set of options actually used to a .LOG file
2831
-npro ignore .perltidyrc configuration command file
2832
-pro=file read configuration commands from file instead of .perltidyrc
2833
-st send output to standard output, STDOUT
2834
-se send error output to standard error output, STDERR
2835
-v display version number to standard output and quit
2838
-i=n use n columns per indentation level (default n=4)
2839
-t tabs: use one tab character per indentation level, not recommeded
2840
-nt no tabs: use n spaces per indentation level (default)
2841
-et=n entab leading whitespace n spaces per tab; not recommended
2842
-io "indent only": just do indentation, no other formatting.
2843
-sil=n set starting indentation level to n; use if auto detection fails
2844
-ole=s specify output line ending (s=dos or win, mac, unix)
2845
-ple keep output line endings same as input (input must be filename)
2848
-fws freeze whitespace; this disables all whitespace changes
2849
and disables the following switches:
2850
-bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)
2851
-bbt same as -bt but for code block braces; same as -bt if not given
2852
-bbvt block braces vertically tight; use with -bl or -bli
2853
-bbvtl=s make -bbvt to apply to selected list of block types
2854
-pt=n paren tightness (n=0, 1 or 2)
2855
-sbt=n square bracket tightness (n=0, 1, or 2)
2856
-bvt=n brace vertical tightness,
2857
n=(0=open, 1=close unless multiple steps on a line, 2=always close)
2858
-pvt=n paren vertical tightness (see -bvt for n)
2859
-sbvt=n square bracket vertical tightness (see -bvt for n)
2860
-bvtc=n closing brace vertical tightness:
2861
n=(0=open, 1=sometimes close, 2=always close)
2862
-pvtc=n closing paren vertical tightness, see -bvtc for n.
2863
-sbvtc=n closing square bracket vertical tightness, see -bvtc for n.
2864
-ci=n sets continuation indentation=n, default is n=2 spaces
2865
-lp line up parentheses, brackets, and non-BLOCK braces
2866
-sfs add space before semicolon in for( ; ; )
2867
-aws allow perltidy to add whitespace (default)
2868
-dws delete all old non-essential whitespace
2869
-icb indent closing brace of a code block
2870
-cti=n closing indentation of paren, square bracket, or non-block brace:
2871
n=0 none, =1 align with opening, =2 one full indentation level
2872
-icp equivalent to -cti=2
2873
-wls=s want space left of tokens in string; i.e. -nwls='+ - * /'
2874
-wrs=s want space right of tokens in string;
2875
-sts put space before terminal semicolon of a statement
2876
-sak=s put space between keywords given in s and '(';
2877
-nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'
2880
-fnl freeze newlines; this disables all line break changes
2881
and disables the following switches:
2882
-anl add newlines; ok to introduce new line breaks
2883
-bbs add blank line before subs and packages
2884
-bbc add blank line before block comments
2885
-bbb add blank line between major blocks
2886
-sob swallow optional blank lines
2887
-ce cuddled else; use this style: '} else {'
2888
-dnl delete old newlines (default)
2889
-mbl=n maximum consecutive blank lines (default=1)
2890
-l=n maximum line length; default n=80
2891
-bl opening brace on new line
2892
-sbl opening sub brace on new line. value of -bl is used if not given.
2893
-bli opening brace on new line and indented
2894
-bar opening brace always on right, even for long clauses
2895
-vt=n vertical tightness (requires -lp); n controls break after opening
2896
token: 0=never 1=no break if next line balanced 2=no break
2897
-vtc=n vertical tightness of closing container; n controls if closing
2898
token starts new line: 0=always 1=not unless list 1=never
2899
-wba=s want break after tokens in string; i.e. wba=': .'
2900
-wbb=s want break before tokens in string
2902
Following Old Breakpoints
2903
-kis keep interior semicolons. Allows multiple statements per line.
2904
-boc break at old comma breaks: turns off all automatic list formatting
2905
-bol break at old logical breakpoints: or, and, ||, && (default)
2906
-bok break at old list keyword breakpoints such as map, sort (default)
2907
-bot break at old conditional (ternary ?:) operator breakpoints (default)
2908
-cab=n break at commas after a comma-arrow (=>):
2909
n=0 break at all commas after =>
2910
n=1 stable: break unless this breaks an existing one-line container
2911
n=2 break only if a one-line container cannot be formed
2912
n=3 do not treat commas after => specially at all
2915
-ibc indent block comments (default)
2916
-isbc indent spaced block comments; may indent unless no leading space
2917
-msc=n minimum desired spaces to side comment, default 4
2918
-fpsc=n fix position for side comments; default 0;
2919
-csc add or update closing side comments after closing BLOCK brace
2920
-dcsc delete closing side comments created by a -csc command
2921
-cscp=s change closing side comment prefix to be other than '## end'
2922
-cscl=s change closing side comment to apply to selected list of blocks
2923
-csci=n minimum number of lines needed to apply a -csc tag, default n=6
2924
-csct=n maximum number of columns of appended text, default n=20
2925
-cscw causes warning if old side comment is overwritten with -csc
2927
-sbc use 'static block comments' identified by leading '##' (default)
2928
-sbcp=s change static block comment identifier to be other than '##'
2929
-osbc outdent static block comments
2931
-ssc use 'static side comments' identified by leading '##' (default)
2932
-sscp=s change static side comment identifier to be other than '##'
2934
Delete selected text
2935
-dac delete all comments AND pod
2936
-dbc delete block comments
2937
-dsc delete side comments
2940
Send selected text to a '.TEE' file
2941
-tac tee all comments AND pod
2942
-tbc tee block comments
2943
-tsc tee side comments
2947
-olq outdent long quoted strings (default)
2948
-olc outdent a long block comment line
2949
-ola outdent statement labels
2950
-okw outdent control keywords (redo, next, last, goto, return)
2951
-okwl=s specify alternative keywords for -okw command
2954
-mft=n maximum fields per table; default n=40
2955
-x do not format lines before hash-bang line (i.e., for VMS)
2956
-asc allows perltidy to add a ';' when missing (default)
2957
-dsm allows perltidy to delete an unnecessary ';' (default)
2959
Combinations of other parameters
2960
-gnu attempt to follow GNU Coding Standards as applied to perl
2961
-mangle remove as many newlines as possible (but keep comments and pods)
2962
-extrude insert as many newlines as possible
2964
Dump and die, debugging
2965
-dop dump options used in this run to standard output and quit
2966
-ddf dump default options to standard output and quit
2967
-dsn dump all option short names to standard output and quit
2968
-dln dump option long names to standard output and quit
2969
-dpro dump whatever configuration file is in effect to standard output
2970
-dtt dump all token types to standard output and quit
2973
-html write an html file (see 'man perl2web' for many options)
2974
Note: when -html is used, no indentation or formatting are done.
2975
Hint: try perltidy -html -css=mystyle.css filename.pl
2976
and edit mystyle.css to change the appearance of filename.html.
2977
-nnn gives line numbers
2978
-pre only writes out <pre>..</pre> code section
2979
-toc places a table of contents to subs at the top (default)
2980
-pod passes pod text through pod2html (default)
2981
-frm write html as a frame (3 files)
2982
-text=s extra extension for table of contents if -frm, default='toc'
2983
-sext=s extra extension for file content if -frm, default='src'
2985
A prefix of "n" negates short form toggle switches, and a prefix of "no"
2986
negates the long forms. For example, -nasc means don't add missing
2989
If you are unable to see this entire text, try "perltidy -h | more"
2990
For more detailed information, and additional options, try "man perltidy",
2991
or go to the perltidy home page at http://perltidy.sourceforge.net
2996
sub process_this_file {
2998
my ( $truth, $beauty ) = @_;
3000
# loop to process each line of this file
3001
while ( my $line_of_tokens = $truth->get_line() ) {
3002
$beauty->write_line($line_of_tokens);
3006
eval { $beauty->finish_formatting() };
3007
$truth->report_tokenization_errors();
3012
# Use 'perl -c' to make sure that we did not create bad syntax
3013
# This is a very good independent check for programming errors
3015
# Given names of the input and output files, ($ifname, $ofname),
3016
# we do the following:
3017
# - check syntax of the input file
3018
# - if bad, all done (could be an incomplete code snippet)
3019
# - if infile syntax ok, then check syntax of the output file;
3020
# - if outfile syntax bad, issue warning; this implies a code bug!
3021
# - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good
3023
my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;
3024
my $infile_syntax_ok = 0;
3025
my $line_of_dashes = '-' x 42 . "\n";
3027
my $flags = $rOpts->{'perl-syntax-check-flags'};
3029
# be sure we invoke perl with -c
3030
# note: perl will accept repeated flags like '-c -c'. It is safest
3031
# to append another -c than try to find an interior bundled c, as
3032
# in -Tc, because such a 'c' might be in a quoted string, for example.
3033
if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }
3035
# be sure we invoke perl with -x if requested
3036
# same comments about repeated parameters applies
3037
if ( $rOpts->{'look-for-hash-bang'} ) {
3038
if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }
3041
# this shouldn't happen unless a termporary file couldn't be made
3042
if ( $ifname eq '-' ) {
3043
$logger_object->write_logfile_entry(
3044
"Cannot run perl -c on STDIN and STDOUT\n");
3045
return $infile_syntax_ok;
3048
$logger_object->write_logfile_entry(
3049
"checking input file syntax with perl $flags\n");
3050
$logger_object->write_logfile_entry($line_of_dashes);
3052
# Not all operating systems/shells support redirection of the standard
3054
my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';
3056
my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );
3057
$logger_object->write_logfile_entry("$perl_output\n");
3059
if ( $perl_output =~ /syntax\s*OK/ ) {
3060
$infile_syntax_ok = 1;
3061
$logger_object->write_logfile_entry($line_of_dashes);
3062
$logger_object->write_logfile_entry(
3063
"checking output file syntax with perl $flags ...\n");
3064
$logger_object->write_logfile_entry($line_of_dashes);
3067
do_syntax_check( $ofname, $flags, $error_redirection );
3068
$logger_object->write_logfile_entry("$perl_output\n");
3070
unless ( $perl_output =~ /syntax\s*OK/ ) {
3071
$logger_object->write_logfile_entry($line_of_dashes);
3072
$logger_object->warning(
3073
"The output file has a syntax error when tested with perl $flags $ofname !\n"
3075
$logger_object->warning(
3076
"This implies an error in perltidy; the file $ofname is bad\n");
3077
$logger_object->report_definite_bug();
3079
# the perl version number will be helpful for diagnosing the problem
3080
$logger_object->write_logfile_entry(
3081
qx/perl -v $error_redirection/ . "\n" );
3086
# Only warn of perl -c syntax errors. Other messages,
3087
# such as missing modules, are too common. They can be
3088
# seen by running with perltidy -w
3089
$logger_object->complain("A syntax check using perl $flags gives: \n");
3090
$logger_object->complain($line_of_dashes);
3091
$logger_object->complain("$perl_output\n");
3092
$logger_object->complain($line_of_dashes);
3093
$infile_syntax_ok = -1;
3094
$logger_object->write_logfile_entry($line_of_dashes);
3095
$logger_object->write_logfile_entry(
3096
"The output file will not be checked because of input file problems\n"
3099
return $infile_syntax_ok;
3102
sub do_syntax_check {
3103
my ( $fname, $flags, $error_redirection ) = @_;
3105
# We have to quote the filename in case it has unusual characters
3106
# or spaces. Example: this filename #CM11.pm# gives trouble.
3107
$fname = '"' . $fname . '"';
3109
# Under VMS something like -T will become -t (and an error) so we
3110
# will put quotes around the flags. Double quotes seem to work on
3111
# Unix/Windows/VMS, but this may not work on all systems. (Single
3112
# quotes do not work under Windows). It could become necessary to
3113
# put double quotes around each flag, such as: -"c" -"T"
3114
# We may eventually need some system-dependent coding here.
3115
$flags = '"' . $flags . '"';
3117
# now wish for luck...
3118
return qx/perl $flags $fname $error_redirection/;
3121
#####################################################################
3123
# This is a stripped down version of IO::Scalar
3124
# Given a reference to a scalar, it supplies either:
3125
# a getline method which reads lines (mode='r'), or
3126
# a print method which reads lines (mode='w')
3128
#####################################################################
3129
package Perl::Tidy::IOScalar;
3133
my ( $package, $rscalar, $mode ) = @_;
3134
my $ref = ref $rscalar;
3135
if ( $ref ne 'SCALAR' ) {
3137
------------------------------------------------------------------------
3138
expecting ref to SCALAR but got ref to ($ref); trace follows:
3139
------------------------------------------------------------------------
3143
if ( $mode eq 'w' ) {
3145
return bless [ $rscalar, $mode ], $package;
3147
elsif ( $mode eq 'r' ) {
3149
# Convert a scalar to an array.
3150
# This avoids looking for "\n" on each call to getline
3151
my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};
3153
return bless [ \@array, $mode, $i_next ], $package;
3157
------------------------------------------------------------------------
3158
expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3159
------------------------------------------------------------------------
3166
my $mode = $self->[1];
3167
if ( $mode ne 'r' ) {
3169
------------------------------------------------------------------------
3170
getline call requires mode = 'r' but mode = ($mode); trace follows:
3171
------------------------------------------------------------------------
3174
my $i = $self->[2]++;
3175
##my $line = $self->[0]->[$i];
3176
return $self->[0]->[$i];
3181
my $mode = $self->[1];
3182
if ( $mode ne 'w' ) {
3184
------------------------------------------------------------------------
3185
print call requires mode = 'w' but mode = ($mode); trace follows:
3186
------------------------------------------------------------------------
3189
${ $self->[0] } .= $_[0];
3191
sub close { return }
3193
#####################################################################
3195
# This is a stripped down version of IO::ScalarArray
3196
# Given a reference to an array, it supplies either:
3197
# a getline method which reads lines (mode='r'), or
3198
# a print method which reads lines (mode='w')
3200
# NOTE: this routine assumes that that there aren't any embedded
3201
# newlines within any of the array elements. There are no checks
3204
#####################################################################
3205
package Perl::Tidy::IOScalarArray;
3209
my ( $package, $rarray, $mode ) = @_;
3210
my $ref = ref $rarray;
3211
if ( $ref ne 'ARRAY' ) {
3213
------------------------------------------------------------------------
3214
expecting ref to ARRAY but got ref to ($ref); trace follows:
3215
------------------------------------------------------------------------
3219
if ( $mode eq 'w' ) {
3221
return bless [ $rarray, $mode ], $package;
3223
elsif ( $mode eq 'r' ) {
3225
return bless [ $rarray, $mode, $i_next ], $package;
3229
------------------------------------------------------------------------
3230
expecting mode = 'r' or 'w' but got mode ($mode); trace follows:
3231
------------------------------------------------------------------------
3238
my $mode = $self->[1];
3239
if ( $mode ne 'r' ) {
3241
------------------------------------------------------------------------
3242
getline requires mode = 'r' but mode = ($mode); trace follows:
3243
------------------------------------------------------------------------
3246
my $i = $self->[2]++;
3247
return $self->[0]->[$i];
3252
my $mode = $self->[1];
3253
if ( $mode ne 'w' ) {
3255
------------------------------------------------------------------------
3256
print requires mode = 'w' but mode = ($mode); trace follows:
3257
------------------------------------------------------------------------
3260
push @{ $self->[0] }, $_[0];
3262
sub close { return }
3264
#####################################################################
3266
# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method
3267
# which returns the next line to be parsed
3269
#####################################################################
3271
package Perl::Tidy::LineSource;
3275
my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;
3276
my $input_file_copy = undef;
3279
my $input_line_ending;
3280
if ( $rOpts->{'preserve-line-endings'} ) {
3281
$input_line_ending = Perl::Tidy::find_input_line_ending($input_file);
3284
( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );
3285
return undef unless $fh;
3287
# in order to check output syntax when standard output is used,
3288
# or when it is an object, we have to make a copy of the file
3289
if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )
3292
# Turning off syntax check when input output is used.
3293
# The reason is that temporary files cause problems on
3295
$rOpts->{'check-syntax'} = 0;
3296
$input_file_copy = '-';
3298
$$rpending_logfile_message .= <<EOM;
3299
Note: --syntax check will be skipped because standard input is used
3306
_fh_copy => $fh_copy,
3307
_filename => $input_file,
3308
_input_file_copy => $input_file_copy,
3309
_input_line_ending => $input_line_ending,
3310
_rinput_buffer => [],
3315
sub get_input_file_copy_name {
3317
my $ifname = $self->{_input_file_copy};
3319
$ifname = $self->{_filename};
3324
sub close_input_file {
3326
eval { $self->{_fh}->close() };
3327
eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};
3333
my $fh = $self->{_fh};
3334
my $fh_copy = $self->{_fh_copy};
3335
my $rinput_buffer = $self->{_rinput_buffer};
3337
if ( scalar(@$rinput_buffer) ) {
3338
$line = shift @$rinput_buffer;
3341
$line = $fh->getline();
3343
# patch to read raw mac files under unix, dos
3344
# see if the first line has embedded \r's
3345
if ( $line && !$self->{_started} ) {
3346
if ( $line =~ /[\015][^\015\012]/ ) {
3348
# found one -- break the line up and store in a buffer
3349
@$rinput_buffer = map { $_ . "\n" } split /\015/, $line;
3350
my $count = @$rinput_buffer;
3351
$line = shift @$rinput_buffer;
3353
$self->{_started}++;
3356
if ( $line && $fh_copy ) { $fh_copy->print($line); }
3360
#####################################################################
3362
# the Perl::Tidy::LineSink class supplies a write_line method for
3363
# actual file writing
3365
#####################################################################
3367
package Perl::Tidy::LineSink;
3371
my ( $class, $output_file, $tee_file, $line_separator, $rOpts,
3372
$rpending_logfile_message, $binmode )
3375
my $fh_copy = undef;
3377
my $output_file_copy = "";
3378
my $output_file_open = 0;
3380
if ( $rOpts->{'format'} eq 'tidy' ) {
3381
( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );
3382
unless ($fh) { die "Cannot write to output stream\n"; }
3383
$output_file_open = 1;
3385
if ( ref($fh) eq 'IO::File' ) {
3388
if ( $output_file eq '-' ) { binmode STDOUT }
3392
# in order to check output syntax when standard output is used,
3393
# or when it is an object, we have to make a copy of the file
3394
if ( $output_file eq '-' || ref $output_file ) {
3395
if ( $rOpts->{'check-syntax'} ) {
3397
# Turning off syntax check when standard output is used.
3398
# The reason is that temporary files cause problems on
3400
$rOpts->{'check-syntax'} = 0;
3401
$output_file_copy = '-';
3402
$$rpending_logfile_message .= <<EOM;
3403
Note: --syntax check will be skipped because standard output is used
3411
_fh_copy => $fh_copy,
3413
_output_file => $output_file,
3414
_output_file_open => $output_file_open,
3415
_output_file_copy => $output_file_copy,
3417
_tee_file => $tee_file,
3418
_tee_file_opened => 0,
3419
_line_separator => $line_separator,
3420
_binmode => $binmode,
3427
my $fh = $self->{_fh};
3428
my $fh_copy = $self->{_fh_copy};
3430
my $output_file_open = $self->{_output_file_open};
3432
$_[0] .= $self->{_line_separator};
3434
$fh->print( $_[0] ) if ( $self->{_output_file_open} );
3435
print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );
3437
if ( $self->{_tee_flag} ) {
3438
unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }
3439
my $fh_tee = $self->{_fh_tee};
3440
print $fh_tee $_[0];
3444
sub get_output_file_copy {
3446
my $ofname = $self->{_output_file_copy};
3448
$ofname = $self->{_output_file};
3455
$self->{_tee_flag} = 1;
3460
$self->{_tee_flag} = 0;
3463
sub really_open_tee_file {
3465
my $tee_file = $self->{_tee_file};
3467
$fh_tee = IO::File->new(">$tee_file")
3468
or die("couldn't open TEE file $tee_file: $!\n");
3469
binmode $fh_tee if $self->{_binmode};
3470
$self->{_tee_file_opened} = 1;
3471
$self->{_fh_tee} = $fh_tee;
3474
sub close_output_file {
3476
eval { $self->{_fh}->close() } if $self->{_output_file_open};
3477
eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );
3478
$self->close_tee_file();
3481
sub close_tee_file {
3484
if ( $self->{_tee_file_opened} ) {
3485
eval { $self->{_fh_tee}->close() };
3486
$self->{_tee_file_opened} = 0;
3490
#####################################################################
3492
# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is
3493
# useful for program development.
3495
# Only one such file is created regardless of the number of input
3496
# files processed. This allows the results of processing many files
3497
# to be summarized in a single file.
3499
#####################################################################
3501
package Perl::Tidy::Diagnostics;
3507
_write_diagnostics_count => 0,
3508
_last_diagnostic_file => "",
3514
sub set_input_file {
3516
$self->{_input_file} = $_[0];
3519
# This is a diagnostic routine which is useful for program development.
3520
# Output from debug messages go to a file named DIAGNOSTICS, where
3521
# they are labeled by file and line. This allows many files to be
3522
# scanned at once for some particular condition of interest.
3523
sub write_diagnostics {
3526
unless ( $self->{_write_diagnostics_count} ) {
3527
open DIAGNOSTICS, ">DIAGNOSTICS"
3528
or death("couldn't open DIAGNOSTICS: $!\n");
3531
my $last_diagnostic_file = $self->{_last_diagnostic_file};
3532
my $input_file = $self->{_input_file};
3533
if ( $last_diagnostic_file ne $input_file ) {
3534
print DIAGNOSTICS "\nFILE:$input_file\n";
3536
$self->{_last_diagnostic_file} = $input_file;
3537
my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();
3538
print DIAGNOSTICS "$input_line_number:\t@_";
3539
$self->{_write_diagnostics_count}++;
3542
#####################################################################
3544
# The Perl::Tidy::Logger class writes the .LOG and .ERR files
3546
#####################################################################
3548
package Perl::Tidy::Logger;
3553
my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;
3555
# remove any old error output file
3556
unless ( ref($warning_file) ) {
3557
if ( -e $warning_file ) { unlink($warning_file) }
3561
_log_file => $log_file,
3562
_fh_warnings => undef,
3564
_fh_warnings => undef,
3565
_last_input_line_written => 0,
3566
_at_end_of_file => 0,
3568
_block_log_output => 0,
3569
_line_of_tokens => undef,
3570
_output_line_number => undef,
3571
_wrote_line_information_string => 0,
3572
_wrote_column_headings => 0,
3573
_warning_file => $warning_file,
3574
_warning_count => 0,
3575
_complaint_count => 0,
3576
_saw_code_bug => -1, # -1=no 0=maybe 1=for sure
3577
_saw_brace_error => 0,
3578
_saw_extrude => $saw_extrude,
3579
_output_array => [],
3583
sub close_log_file {
3586
if ( $self->{_fh_warnings} ) {
3587
eval { $self->{_fh_warnings}->close() };
3588
$self->{_fh_warnings} = undef;
3592
sub get_warning_count {
3594
return $self->{_warning_count};
3597
sub get_use_prefix {
3599
return $self->{_use_prefix};
3602
sub block_log_output {
3604
$self->{_block_log_output} = 1;
3607
sub unblock_log_output {
3609
$self->{_block_log_output} = 0;
3612
sub interrupt_logfile {
3614
$self->{_use_prefix} = 0;
3615
$self->warning("\n");
3616
$self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );
3619
sub resume_logfile {
3621
$self->write_logfile_entry( '#' x 60 . "\n" );
3622
$self->{_use_prefix} = 1;
3625
sub we_are_at_the_last_line {
3627
unless ( $self->{_wrote_line_information_string} ) {
3628
$self->write_logfile_entry("Last line\n\n");
3630
$self->{_at_end_of_file} = 1;
3633
# record some stuff in case we go down in flames
3636
my ( $line_of_tokens, $output_line_number ) = @_;
3637
my $input_line = $line_of_tokens->{_line_text};
3638
my $input_line_number = $line_of_tokens->{_line_number};
3640
# save line information in case we have to write a logfile message
3641
$self->{_line_of_tokens} = $line_of_tokens;
3642
$self->{_output_line_number} = $output_line_number;
3643
$self->{_wrote_line_information_string} = 0;
3645
my $last_input_line_written = $self->{_last_input_line_written};
3646
my $rOpts = $self->{_rOpts};
3649
( $input_line_number - $last_input_line_written ) >=
3650
$rOpts->{'logfile-gap'}
3652
|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )
3655
my $rlevels = $line_of_tokens->{_rlevels};
3656
my $structural_indentation_level = $$rlevels[0];
3657
$self->{_last_input_line_written} = $input_line_number;
3658
( my $out_str = $input_line ) =~ s/^\s*//;
3661
$out_str = ( '.' x $structural_indentation_level ) . $out_str;
3663
if ( length($out_str) > 35 ) {
3664
$out_str = substr( $out_str, 0, 35 ) . " ....";
3666
$self->logfile_output( "", "$out_str\n" );
3670
sub write_logfile_entry {
3673
# add leading >>> to avoid confusing error mesages and code
3674
$self->logfile_output( ">>>", "@_" );
3677
sub write_column_headings {
3680
$self->{_wrote_column_headings} = 1;
3681
my $routput_array = $self->{_output_array};
3682
push @{$routput_array}, <<EOM;
3683
The nesting depths in the table below are at the start of the lines.
3684
The indicated output line numbers are not always exact.
3685
ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.
3687
in:out indent c b nesting code + messages; (messages begin with >>>)
3688
lines levels i k (code begins with one '.' per indent level)
3689
------ ----- - - -------- -------------------------------------------
3693
sub make_line_information_string {
3695
# make columns of information when a logfile message needs to go out
3697
my $line_of_tokens = $self->{_line_of_tokens};
3698
my $input_line_number = $line_of_tokens->{_line_number};
3699
my $line_information_string = "";
3700
if ($input_line_number) {
3702
my $output_line_number = $self->{_output_line_number};
3703
my $brace_depth = $line_of_tokens->{_curly_brace_depth};
3704
my $paren_depth = $line_of_tokens->{_paren_depth};
3705
my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};
3706
my $python_indentation_level =
3707
$line_of_tokens->{_python_indentation_level};
3708
my $rlevels = $line_of_tokens->{_rlevels};
3709
my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
3710
my $rci_levels = $line_of_tokens->{_rci_levels};
3711
my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
3713
my $structural_indentation_level = $$rlevels[0];
3715
$self->write_column_headings() unless $self->{_wrote_column_headings};
3717
# keep logfile columns aligned for scripts up to 999 lines;
3718
# for longer scripts it doesn't really matter
3719
my $extra_space = "";
3721
( $input_line_number < 10 ) ? " "
3722
: ( $input_line_number < 100 ) ? " "
3725
( $output_line_number < 10 ) ? " "
3726
: ( $output_line_number < 100 ) ? " "
3729
# there are 2 possible nesting strings:
3730
# the original which looks like this: (0 [1 {2
3731
# the new one, which looks like this: {{[
3732
# the new one is easier to read, and shows the order, but
3733
# could be arbitrarily long, so we use it unless it is too long
3734
my $nesting_string =
3735
"($paren_depth [$square_bracket_depth {$brace_depth";
3736
my $nesting_string_new = $$rnesting_tokens[0];
3738
my $ci_level = $$rci_levels[0];
3739
if ( $ci_level > 9 ) { $ci_level = '*' }
3740
my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';
3742
if ( length($nesting_string_new) <= 8 ) {
3744
$nesting_string_new . " " x ( 8 - length($nesting_string_new) );
3746
if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }
3747
$line_information_string =
3748
"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";
3750
return $line_information_string;
3753
sub logfile_output {
3755
my ( $prompt, $msg ) = @_;
3756
return if ( $self->{_block_log_output} );
3758
my $routput_array = $self->{_output_array};
3759
if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {
3760
push @{$routput_array}, "$msg";
3763
my $line_information_string = $self->make_line_information_string();
3764
$self->{_wrote_line_information_string} = 1;
3766
if ($line_information_string) {
3767
push @{$routput_array}, "$line_information_string $prompt$msg";
3770
push @{$routput_array}, "$msg";
3775
sub get_saw_brace_error {
3777
return $self->{_saw_brace_error};
3780
sub increment_brace_error {
3782
$self->{_saw_brace_error}++;
3787
use constant BRACE_WARNING_LIMIT => 10;
3788
my $saw_brace_error = $self->{_saw_brace_error};
3790
if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {
3794
$self->{_saw_brace_error} = $saw_brace_error;
3796
if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {
3797
$self->warning("No further warnings of this type will be given\n");
3803
# handle non-critical warning messages based on input flag
3805
my $rOpts = $self->{_rOpts};
3807
# these appear in .ERR output only if -w flag is used
3808
if ( $rOpts->{'warning-output'} ) {
3812
# otherwise, they go to the .LOG file
3814
$self->{_complaint_count}++;
3815
$self->write_logfile_entry(@_);
3821
# report errors to .ERR file (or stdout)
3823
use constant WARNING_LIMIT => 50;
3825
my $rOpts = $self->{_rOpts};
3826
unless ( $rOpts->{'quiet'} ) {
3828
my $warning_count = $self->{_warning_count};
3829
unless ($warning_count) {
3830
my $warning_file = $self->{_warning_file};
3832
if ( $rOpts->{'standard-error-output'} ) {
3833
$fh_warnings = *STDERR;
3836
( $fh_warnings, my $filename ) =
3837
Perl::Tidy::streamhandle( $warning_file, 'w' );
3838
$fh_warnings or die("couldn't open $filename $!\n");
3839
warn "## Please see file $filename\n";
3841
$self->{_fh_warnings} = $fh_warnings;
3844
my $fh_warnings = $self->{_fh_warnings};
3845
if ( $warning_count < WARNING_LIMIT ) {
3846
if ( $self->get_use_prefix() > 0 ) {
3847
my $input_line_number =
3848
Perl::Tidy::Tokenizer::get_input_line_number();
3849
$fh_warnings->print("$input_line_number:\t@_");
3850
$self->write_logfile_entry("WARNING: @_");
3853
$fh_warnings->print(@_);
3854
$self->write_logfile_entry(@_);
3858
$self->{_warning_count} = $warning_count;
3860
if ( $warning_count == WARNING_LIMIT ) {
3861
$fh_warnings->print("No further warnings will be given\n");
3866
# programming bug codes:
3868
# 0 = maybe, not sure.
3870
sub report_possible_bug {
3872
my $saw_code_bug = $self->{_saw_code_bug};
3873
$self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;
3876
sub report_definite_bug {
3878
$self->{_saw_code_bug} = 1;
3881
sub ask_user_for_bug_report {
3884
my ( $infile_syntax_ok, $formatter ) = @_;
3885
my $saw_code_bug = $self->{_saw_code_bug};
3886
if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {
3887
$self->warning(<<EOM);
3889
You may have encountered a code bug in perltidy. If you think so, and
3890
the problem is not listed in the BUGS file at
3891
http://perltidy.sourceforge.net, please report it so that it can be
3892
corrected. Include the smallest possible script which has the problem,
3893
along with the .LOG file. See the manual pages for contact information.
3898
elsif ( $saw_code_bug == 1 ) {
3899
if ( $self->{_saw_extrude} ) {
3900
$self->warning(<<EOM);
3902
You may have encountered a bug in perltidy. However, since you are using the
3903
-extrude option, the problem may be with perl or one of its modules, which have
3904
occasional problems with this type of file. If you believe that the
3905
problem is with perltidy, and the problem is not listed in the BUGS file at
3906
http://perltidy.sourceforge.net, please report it so that it can be corrected.
3907
Include the smallest possible script which has the problem, along with the .LOG
3908
file. See the manual pages for contact information.
3913
$self->warning(<<EOM);
3915
Oops, you seem to have encountered a bug in perltidy. Please check the
3916
BUGS file at http://perltidy.sourceforge.net. If the problem is not
3917
listed there, please report it so that it can be corrected. Include the
3918
smallest possible script which produces this message, along with the
3919
.LOG file if appropriate. See the manual pages for contact information.
3920
Your efforts are appreciated.
3923
my $added_semicolon_count = 0;
3925
$added_semicolon_count =
3926
$formatter->get_added_semicolon_count();
3928
if ( $added_semicolon_count > 0 ) {
3929
$self->warning(<<EOM);
3931
The log file shows that perltidy added $added_semicolon_count semicolons.
3932
Please rerun with -nasc to see if that is the cause of the syntax error. Even
3933
if that is the problem, please report it so that it can be fixed.
3943
# called after all formatting to summarize errors
3945
my ( $infile_syntax_ok, $formatter ) = @_;
3947
my $rOpts = $self->{_rOpts};
3948
my $warning_count = $self->{_warning_count};
3949
my $saw_code_bug = $self->{_saw_code_bug};
3952
( $saw_code_bug == 0 && $infile_syntax_ok == 1 )
3953
|| $saw_code_bug == 1
3954
|| $rOpts->{'logfile'};
3955
my $log_file = $self->{_log_file};
3956
if ($warning_count) {
3957
if ($save_logfile) {
3958
$self->block_log_output(); # avoid echoing this to the logfile
3960
"The logfile $log_file may contain useful information\n");
3961
$self->unblock_log_output();
3964
if ( $self->{_complaint_count} > 0 ) {
3966
"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"
3970
if ( $self->{_saw_brace_error}
3971
&& ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )
3973
$self->warning("To save a full .LOG file rerun with -g\n");
3976
$self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );
3978
if ($save_logfile) {
3979
my $log_file = $self->{_log_file};
3980
my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );
3982
my $routput_array = $self->{_output_array};
3983
foreach ( @{$routput_array} ) { $fh->print($_) }
3984
eval { $fh->close() };
3989
#####################################################################
3991
# The Perl::Tidy::DevNull class supplies a dummy print method
3993
#####################################################################
3995
package Perl::Tidy::DevNull;
3996
sub new { return bless {}, $_[0] }
3997
sub print { return }
3998
sub close { return }
4000
#####################################################################
4002
# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html
4004
#####################################################################
4006
package Perl::Tidy::HtmlWriter;
4016
%short_to_long_names
4020
$missing_html_entities
4023
# replace unsafe characters with HTML entity representation if HTML::Entities
4025
{ eval "use HTML::Entities"; $missing_html_entities = $@; }
4029
my ( $class, $input_file, $html_file, $extension, $html_toc_extension,
4030
$html_src_extension )
4033
my $html_file_opened = 0;
4035
( $html_fh, my $html_filename ) =
4036
Perl::Tidy::streamhandle( $html_file, 'w' );
4038
warn("can't open $html_file: $!\n");
4041
$html_file_opened = 1;
4043
if ( !$input_file || $input_file eq '-' || ref($input_file) ) {
4044
$input_file = "NONAME";
4047
# write the table of contents to a string
4049
my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );
4052
my @pre_string_stack;
4053
if ( $rOpts->{'html-pre-only'} ) {
4055
# pre section goes directly to the output stream
4056
$html_pre_fh = $html_fh;
4057
$html_pre_fh->print( <<"PRE_END");
4063
# pre section go out to a temporary string
4065
$html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
4066
push @pre_string_stack, \$pre_string;
4069
# pod text gets diverted if the 'pod2html' is used
4072
if ( $rOpts->{'pod2html'} ) {
4073
if ( $rOpts->{'html-pre-only'} ) {
4074
undef $rOpts->{'pod2html'};
4077
eval "use Pod::Html";
4080
"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
4081
undef $rOpts->{'pod2html'};
4084
$html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
4091
if ( $rOpts->{'frames'} ) {
4092
unless ($extension) {
4094
"cannot use frames without a specified output extension; ignoring -frm\n";
4095
undef $rOpts->{'frames'};
4098
$toc_filename = $input_file . $html_toc_extension . $extension;
4099
$src_filename = $input_file . $html_src_extension . $extension;
4103
# ----------------------------------------------------------
4104
# Output is now directed as follows:
4105
# html_toc_fh <-- table of contents items
4106
# html_pre_fh <-- the <pre> section of formatted code, except:
4107
# html_pod_fh <-- pod goes here with the pod2html option
4108
# ----------------------------------------------------------
4110
my $title = $rOpts->{'title'};
4112
( $title, my $path ) = fileparse($input_file);
4114
my $toc_item_count = 0;
4115
my $in_toc_package = "";
4118
_input_file => $input_file, # name of input file
4119
_title => $title, # title, unescaped
4120
_html_file => $html_file, # name of .html output file
4121
_toc_filename => $toc_filename, # for frames option
4122
_src_filename => $src_filename, # for frames option
4123
_html_file_opened => $html_file_opened, # a flag
4124
_html_fh => $html_fh, # the output stream
4125
_html_pre_fh => $html_pre_fh, # pre section goes here
4126
_rpre_string_stack => \@pre_string_stack, # stack of pre sections
4127
_html_pod_fh => $html_pod_fh, # pod goes here if pod2html
4128
_rpod_string => \$pod_string, # string holding pod
4129
_pod_cut_count => 0, # how many =cut's?
4130
_html_toc_fh => $html_toc_fh, # fh for table of contents
4131
_rtoc_string => \$toc_string, # string holding toc
4132
_rtoc_item_count => \$toc_item_count, # how many toc items
4133
_rin_toc_package => \$in_toc_package, # package name
4134
_rtoc_name_count => {}, # hash to track unique names
4135
_rpackage_stack => [], # stack to check for package
4137
_rlast_level => \$last_level, # brace indentation level
4143
# Add an item to the html table of contents.
4144
# This is called even if no table of contents is written,
4145
# because we still want to put the anchors in the <pre> text.
4146
# We are given an anchor name and its type; types are:
4147
# 'package', 'sub', '__END__', '__DATA__', 'EOF'
4148
# There must be an 'EOF' call at the end to wrap things up.
4150
my ( $name, $type ) = @_;
4151
my $html_toc_fh = $self->{_html_toc_fh};
4152
my $html_pre_fh = $self->{_html_pre_fh};
4153
my $rtoc_name_count = $self->{_rtoc_name_count};
4154
my $rtoc_item_count = $self->{_rtoc_item_count};
4155
my $rlast_level = $self->{_rlast_level};
4156
my $rin_toc_package = $self->{_rin_toc_package};
4157
my $rpackage_stack = $self->{_rpackage_stack};
4159
# packages contain sublists of subs, so to avoid errors all package
4160
# items are written and finished with the following routines
4161
my $end_package_list = sub {
4162
if ($$rin_toc_package) {
4163
$html_toc_fh->print("</ul>\n</li>\n");
4164
$$rin_toc_package = "";
4168
my $start_package_list = sub {
4169
my ( $unique_name, $package ) = @_;
4170
if ($$rin_toc_package) { $end_package_list->() }
4171
$html_toc_fh->print(<<EOM);
4172
<li><a href=\"#$unique_name\">package $package</a>
4175
$$rin_toc_package = $package;
4178
# start the table of contents on the first item
4179
unless ($$rtoc_item_count) {
4181
# but just quit if we hit EOF without any other entries
4182
# in this case, there will be no toc
4183
return if ( $type eq 'EOF' );
4184
$html_toc_fh->print( <<"TOC_END");
4185
<!-- BEGIN CODE INDEX --><a name="code-index"></a>
4189
$$rtoc_item_count++;
4191
# make a unique anchor name for this location:
4192
# - packages get a 'package-' prefix
4193
# - subs use their names
4194
my $unique_name = $name;
4195
if ( $type eq 'package' ) { $unique_name = "package-$name" }
4197
# append '-1', '-2', etc if necessary to make unique; this will
4198
# be unique because subs and packages cannot have a '-'
4199
if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {
4200
$unique_name .= "-$count";
4203
# - all names get terminal '-' if pod2html is used, to avoid
4204
# conflicts with anchor names created by pod2html
4205
if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }
4207
# start/stop lists of subs
4208
if ( $type eq 'sub' ) {
4209
my $package = $rpackage_stack->[$$rlast_level];
4210
unless ($package) { $package = 'main' }
4212
# if we're already in a package/sub list, be sure its the right
4213
# package or else close it
4214
if ( $$rin_toc_package && $$rin_toc_package ne $package ) {
4215
$end_package_list->();
4218
# start a package/sub list if necessary
4219
unless ($$rin_toc_package) {
4220
$start_package_list->( $unique_name, $package );
4224
# now write an entry in the toc for this item
4225
if ( $type eq 'package' ) {
4226
$start_package_list->( $unique_name, $name );
4228
elsif ( $type eq 'sub' ) {
4229
$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4232
$end_package_list->();
4233
$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");
4236
# write the anchor in the <pre> section
4237
$html_pre_fh->print("<a name=\"$unique_name\"></a>");
4239
# end the table of contents, if any, on the end of file
4240
if ( $type eq 'EOF' ) {
4241
$html_toc_fh->print( <<"TOC_END");
4243
<!-- END CODE INDEX -->
4250
# This is the official list of tokens which may be identified by the
4251
# user. Long names are used as getopt keys. Short names are
4252
# convenient short abbreviations for specifying input. Short names
4253
# somewhat resemble token type characters, but are often different
4254
# because they may only be alphanumeric, to allow command line
4255
# input. Also, note that because of case insensitivity of html,
4256
# this table must be in a single case only (I've chosen to use all
4258
# When adding NEW_TOKENS: update this hash table
4259
# short names => long names
4260
%short_to_long_names = (
4270
'pu' => 'punctuation',
4271
'i' => 'identifier',
4273
'h' => 'here-doc-target',
4274
'hh' => 'here-doc-text',
4276
'sc' => 'semicolon',
4277
'm' => 'subroutine',
4281
# Now we have to map actual token types into one of the above short
4282
# names; any token types not mapped will get 'punctuation'
4285
# The values of this hash table correspond to the keys of the
4286
# previous hash table.
4287
# The keys of this hash table are token types and can be seen
4288
# by running with --dump-token-types (-dtt).
4290
# When adding NEW_TOKENS: update this hash table
4291
# $type => $short_name
4292
%token_short_names = (
4317
# These token types will all be called identifiers for now
4318
# FIXME: need to separate user defined modules as separate type
4319
my @identifier = qw" i t U C Y Z G :: ";
4320
@token_short_names{@identifier} = ('i') x scalar(@identifier);
4322
# These token types will be called 'structure'
4323
my @structure = qw" { } ";
4324
@token_short_names{@structure} = ('s') x scalar(@structure);
4326
# OLD NOTES: save for reference
4327
# Any of these could be added later if it would be useful.
4328
# For now, they will by default become punctuation
4329
# my @list = qw" L R [ ] ";
4330
# @token_long_names{@list} = ('non-structure') x scalar(@list);
4333
# / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm
4335
# @token_long_names{@list} = ('math') x scalar(@list);
4337
# my @list = qw" & &= ~ ~= ^ ^= | |= ";
4338
# @token_long_names{@list} = ('bit') x scalar(@list);
4340
# my @list = qw" == != < > <= <=> ";
4341
# @token_long_names{@list} = ('numerical-comparison') x scalar(@list);
4343
# my @list = qw" && || ! &&= ||= //= ";
4344
# @token_long_names{@list} = ('logical') x scalar(@list);
4346
# my @list = qw" . .= =~ !~ x x= ";
4347
# @token_long_names{@list} = ('string-operators') x scalar(@list);
4350
# my @list = qw" .. -> <> ... \ ? ";
4351
# @token_long_names{@list} = ('misc-operators') x scalar(@list);
4355
sub make_getopt_long_names {
4357
my ($rgetopt_names) = @_;
4358
while ( my ( $short_name, $name ) = each %short_to_long_names ) {
4359
push @$rgetopt_names, "html-color-$name=s";
4360
push @$rgetopt_names, "html-italic-$name!";
4361
push @$rgetopt_names, "html-bold-$name!";
4363
push @$rgetopt_names, "html-color-background=s";
4364
push @$rgetopt_names, "html-linked-style-sheet=s";
4365
push @$rgetopt_names, "nohtml-style-sheets";
4366
push @$rgetopt_names, "html-pre-only";
4367
push @$rgetopt_names, "html-line-numbers";
4368
push @$rgetopt_names, "html-entities!";
4369
push @$rgetopt_names, "stylesheet";
4370
push @$rgetopt_names, "html-table-of-contents!";
4371
push @$rgetopt_names, "pod2html!";
4372
push @$rgetopt_names, "frames!";
4373
push @$rgetopt_names, "html-toc-extension=s";
4374
push @$rgetopt_names, "html-src-extension=s";
4376
# Pod::Html parameters:
4377
push @$rgetopt_names, "backlink=s";
4378
push @$rgetopt_names, "cachedir=s";
4379
push @$rgetopt_names, "htmlroot=s";
4380
push @$rgetopt_names, "libpods=s";
4381
push @$rgetopt_names, "podpath=s";
4382
push @$rgetopt_names, "podroot=s";
4383
push @$rgetopt_names, "title=s";
4385
# Pod::Html parameters with leading 'pod' which will be removed
4386
# before the call to Pod::Html
4387
push @$rgetopt_names, "podquiet!";
4388
push @$rgetopt_names, "podverbose!";
4389
push @$rgetopt_names, "podrecurse!";
4390
push @$rgetopt_names, "podflush";
4391
push @$rgetopt_names, "podheader!";
4392
push @$rgetopt_names, "podindex!";
4395
sub make_abbreviated_names {
4397
# We're appending things like this to the expansion list:
4398
# 'hcc' => [qw(html-color-comment)],
4399
# 'hck' => [qw(html-color-keyword)],
4402
my ($rexpansion) = @_;
4404
# abbreviations for color/bold/italic properties
4405
while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4406
${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];
4407
${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];
4408
${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];
4409
${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];
4410
${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];
4413
# abbreviations for all other html options
4414
${$rexpansion}{"hcbg"} = ["html-color-background"];
4415
${$rexpansion}{"pre"} = ["html-pre-only"];
4416
${$rexpansion}{"toc"} = ["html-table-of-contents"];
4417
${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];
4418
${$rexpansion}{"nnn"} = ["html-line-numbers"];
4419
${$rexpansion}{"hent"} = ["html-entities"];
4420
${$rexpansion}{"nhent"} = ["nohtml-entities"];
4421
${$rexpansion}{"css"} = ["html-linked-style-sheet"];
4422
${$rexpansion}{"nss"} = ["nohtml-style-sheets"];
4423
${$rexpansion}{"ss"} = ["stylesheet"];
4424
${$rexpansion}{"pod"} = ["pod2html"];
4425
${$rexpansion}{"npod"} = ["nopod2html"];
4426
${$rexpansion}{"frm"} = ["frames"];
4427
${$rexpansion}{"nfrm"} = ["noframes"];
4428
${$rexpansion}{"text"} = ["html-toc-extension"];
4429
${$rexpansion}{"sext"} = ["html-src-extension"];
4434
# This will be called once after options have been parsed
4438
# X11 color names for default settings that seemed to look ok
4439
# (these color names are only used for programming clarity; the hex
4440
# numbers are actually written)
4441
use constant ForestGreen => "#228B22";
4442
use constant SaddleBrown => "#8B4513";
4443
use constant magenta4 => "#8B008B";
4444
use constant IndianRed3 => "#CD5555";
4445
use constant DeepSkyBlue4 => "#00688B";
4446
use constant MediumOrchid3 => "#B452CD";
4447
use constant black => "#000000";
4448
use constant white => "#FFFFFF";
4449
use constant red => "#FF0000";
4451
# set default color, bold, italic properties
4452
# anything not listed here will be given the default (punctuation) color --
4453
# these types currently not listed and get default: ws pu s sc cm co p
4454
# When adding NEW_TOKENS: add an entry here if you don't want defaults
4456
# set_default_properties( $short_name, default_color, bold?, italic? );
4457
set_default_properties( 'c', ForestGreen, 0, 0 );
4458
set_default_properties( 'pd', ForestGreen, 0, 1 );
4459
set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown
4460
set_default_properties( 'q', IndianRed3, 0, 0 );
4461
set_default_properties( 'hh', IndianRed3, 0, 1 );
4462
set_default_properties( 'h', IndianRed3, 1, 0 );
4463
set_default_properties( 'i', DeepSkyBlue4, 0, 0 );
4464
set_default_properties( 'w', black, 0, 0 );
4465
set_default_properties( 'n', MediumOrchid3, 0, 0 );
4466
set_default_properties( 'v', MediumOrchid3, 0, 0 );
4467
set_default_properties( 'j', IndianRed3, 1, 0 );
4468
set_default_properties( 'm', red, 1, 0 );
4470
set_default_color( 'html-color-background', white );
4471
set_default_color( 'html-color-punctuation', black );
4473
# setup property lookup tables for tokens based on their short names
4474
# every token type has a short name, and will use these tables
4475
# to do the html markup
4476
while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {
4477
$html_color{$short_name} = $rOpts->{"html-color-$long_name"};
4478
$html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};
4479
$html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};
4482
# write style sheet to STDOUT and die if requested
4483
if ( defined( $rOpts->{'stylesheet'} ) ) {
4484
write_style_sheet_file('-');
4488
# make sure user gives a file name after -css
4489
if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {
4490
$css_linkname = $rOpts->{'html-linked-style-sheet'};
4491
if ( $css_linkname =~ /^-/ ) {
4492
die "You must specify a valid filename after -css\n";
4496
# check for conflict
4497
if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {
4498
$rOpts->{'nohtml-style-sheets'} = 0;
4499
warning("You can't specify both -css and -nss; -nss ignored\n");
4502
# write a style sheet file if necessary
4503
if ($css_linkname) {
4505
# if the selected filename exists, don't write, because user may
4506
# have done some work by hand to create it; use backup name instead
4507
# Also, this will avoid a potential disaster in which the user
4508
# forgets to specify the style sheet, like this:
4509
# perltidy -html -css myfile1.pl myfile2.pl
4510
# This would cause myfile1.pl to parsed as the style sheet by GetOpts
4511
my $css_filename = $css_linkname;
4512
unless ( -e $css_filename ) {
4513
write_style_sheet_file($css_filename);
4516
$missing_html_entities = 1 unless $rOpts->{'html-entities'};
4519
sub write_style_sheet_file {
4521
my $css_filename = shift;
4523
unless ( $fh = IO::File->new("> $css_filename") ) {
4524
die "can't open $css_filename: $!\n";
4526
write_style_sheet_data($fh);
4527
eval { $fh->close };
4530
sub write_style_sheet_data {
4532
# write the style sheet data to an open file handle
4535
my $bg_color = $rOpts->{'html-color-background'};
4536
my $text_color = $rOpts->{'html-color-punctuation'};
4538
# pre-bgcolor is new, and may not be defined
4539
my $pre_bg_color = $rOpts->{'html-pre-color-background'};
4540
$pre_bg_color = $bg_color unless $pre_bg_color;
4542
$fh->print(<<"EOM");
4543
/* default style sheet generated by perltidy */
4544
body {background: $bg_color; color: $text_color}
4545
pre { color: $text_color;
4546
background: $pre_bg_color;
4547
font-family: courier;
4552
foreach my $short_name ( sort keys %short_to_long_names ) {
4553
my $long_name = $short_to_long_names{$short_name};
4555
my $abbrev = '.' . $short_name;
4556
if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment
4557
my $color = $html_color{$short_name};
4558
if ( !defined($color) ) { $color = $text_color }
4559
$fh->print("$abbrev \{ color: $color;");
4561
if ( $html_bold{$short_name} ) {
4562
$fh->print(" font-weight:bold;");
4565
if ( $html_italic{$short_name} ) {
4566
$fh->print(" font-style:italic;");
4568
$fh->print("} /* $long_name */\n");
4572
sub set_default_color {
4574
# make sure that options hash $rOpts->{$key} contains a valid color
4575
my ( $key, $color ) = @_;
4576
if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }
4577
$rOpts->{$key} = check_RGB($color);
4582
# if color is a 6 digit hex RGB value, prepend a #, otherwise
4583
# assume that it is a valid ascii color name
4585
if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }
4589
sub set_default_properties {
4590
my ( $short_name, $color, $bold, $italic ) = @_;
4592
set_default_color( "html-color-$short_to_long_names{$short_name}", $color );
4594
$key = "html-bold-$short_to_long_names{$short_name}";
4595
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;
4596
$key = "html-italic-$short_to_long_names{$short_name}";
4597
$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;
4602
# Use Pod::Html to process the pod and make the page
4603
# then merge the perltidy code sections into it.
4604
# return 1 if success, 0 otherwise
4606
my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;
4607
my $input_file = $self->{_input_file};
4608
my $title = $self->{_title};
4609
my $success_flag = 0;
4611
# don't try to use pod2html if no pod
4612
unless ($pod_string) {
4613
return $success_flag;
4616
# Pod::Html requires a real temporary filename
4617
# If we are making a frame, we have a name available
4618
# Otherwise, we have to fine one
4620
if ( $rOpts->{'frames'} ) {
4621
$tmpfile = $self->{_toc_filename};
4624
$tmpfile = Perl::Tidy::make_temporary_filename();
4626
my $fh_tmp = IO::File->new( $tmpfile, 'w' );
4628
warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4629
return $success_flag;
4632
#------------------------------------------------------------------
4633
# Warning: a temporary file is open; we have to clean up if
4634
# things go bad. From here on all returns should be by going to
4635
# RETURN so that the temporary file gets unlinked.
4636
#------------------------------------------------------------------
4638
# write the pod text to the temporary file
4639
$fh_tmp->print($pod_string);
4642
# Hand off the pod to pod2html.
4643
# Note that we can use the same temporary filename for input and output
4644
# because of the way pod2html works.
4648
push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";
4651
# Flags with string args:
4652
# "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",
4653
# "podpath=s", "podroot=s"
4654
# Note: -css=s is handled by perltidy itself
4655
foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {
4656
if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }
4659
# Toggle switches; these have extra leading 'pod'
4660
# "header!", "index!", "recurse!", "quiet!", "verbose!"
4661
foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {
4662
my $kwd = $kw; # allows us to strip 'pod'
4663
if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }
4664
elsif ( defined( $rOpts->{$kw} ) ) {
4666
push @args, "--no$kwd";
4672
if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }
4674
# Must clean up if pod2html dies (it can);
4675
# Be careful not to overwrite callers __DIE__ routine
4676
local $SIG{__DIE__} = sub {
4678
unlink $tmpfile if -e $tmpfile;
4684
$fh_tmp = IO::File->new( $tmpfile, 'r' );
4687
# this error shouldn't happen ... we just used this filename
4688
warn "unable to open temporary file $tmpfile; cannot use pod2html\n";
4692
my $html_fh = $self->{_html_fh};
4697
# This routine will write the html selectively and store the toc
4698
my $html_print = sub {
4700
$html_fh->print($_) unless ($no_print);
4701
if ($in_toc) { push @toc, $_ }
4705
# loop over lines of html output from pod2html and merge in
4706
# the necessary perltidy html sections
4707
my ( $saw_body, $saw_index, $saw_body_end );
4708
while ( my $line = $fh_tmp->getline() ) {
4710
if ( $line =~ /^\s*<html>\s*$/i ) {
4711
my $date = localtime;
4712
$html_print->("<!-- Generated by perltidy on $date -->\n");
4713
$html_print->($line);
4716
# Copy the perltidy css, if any, after <body> tag
4717
elsif ( $line =~ /^\s*<body.*>\s*$/i ) {
4719
$html_print->($css_string) if $css_string;
4720
$html_print->($line);
4722
# add a top anchor and heading
4723
$html_print->("<a name=\"-top-\"></a>\n");
4724
$title = escape_html($title);
4725
$html_print->("<h1>$title</h1>\n");
4727
elsif ( $line =~ /^\s*<!-- INDEX BEGIN -->\s*$/i ) {
4730
# when frames are used, an extra table of contents in the
4731
# contents panel is confusing, so don't print it
4732
$no_print = $rOpts->{'frames'}
4733
|| !$rOpts->{'html-table-of-contents'};
4734
$html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};
4735
$html_print->($line);
4738
# Copy the perltidy toc, if any, after the Pod::Html toc
4739
elsif ( $line =~ /^\s*<!-- INDEX END -->\s*$/i ) {
4741
$html_print->($line);
4743
$html_print->("<hr />\n") if $rOpts->{'frames'};
4744
$html_print->("<h2>Code Index:</h2>\n");
4745
my @toc = map { $_ .= "\n" } split /\n/, $toc_string;
4746
$html_print->(@toc);
4752
# Copy one perltidy section after each marker
4753
elsif ( $line =~ /^(.*)<!-- pERLTIDY sECTION -->(.*)$/ ) {
4755
$html_print->($1) if $1;
4757
# Intermingle code and pod sections if we saw multiple =cut's.
4758
if ( $self->{_pod_cut_count} > 1 ) {
4759
my $rpre_string = shift(@$rpre_string_stack);
4760
if ($$rpre_string) {
4761
$html_print->('<pre>');
4762
$html_print->($$rpre_string);
4763
$html_print->('</pre>');
4767
# shouldn't happen: we stored a string before writing
4770
"Problem merging html stream with pod2html; order may be wrong\n";
4772
$html_print->($line);
4775
# If didn't see multiple =cut lines, we'll put the pod out first
4776
# and then the code, because it's less confusing.
4779
# since we are not intermixing code and pod, we don't need
4780
# or want any <hr> lines which separated pod and code
4781
$html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );
4785
# Copy any remaining code section before the </body> tag
4786
elsif ( $line =~ /^\s*<\/body>\s*$/i ) {
4788
if (@$rpre_string_stack) {
4789
unless ( $self->{_pod_cut_count} > 1 ) {
4790
$html_print->('<hr />');
4792
while ( my $rpre_string = shift(@$rpre_string_stack) ) {
4793
$html_print->('<pre>');
4794
$html_print->($$rpre_string);
4795
$html_print->('</pre>');
4798
$html_print->($line);
4801
$html_print->($line);
4806
unless ($saw_body) {
4807
warn "Did not see <body> in pod2html output\n";
4810
unless ($saw_body_end) {
4811
warn "Did not see </body> in pod2html output\n";
4814
unless ($saw_index) {
4815
warn "Did not find INDEX END in pod2html output\n";
4820
eval { $html_fh->close() };
4822
# note that we have to unlink tmpfile before making frames
4823
# because the tmpfile may be one of the names used for frames
4824
unlink $tmpfile if -e $tmpfile;
4825
if ( $success_flag && $rOpts->{'frames'} ) {
4826
$self->make_frame( \@toc );
4828
return $success_flag;
4833
# Make a frame with table of contents in the left panel
4834
# and the text in the right panel.
4836
# $html_filename contains the no-frames html output
4837
# $rtoc is a reference to an array with the table of contents
4840
my $input_file = $self->{_input_file};
4841
my $html_filename = $self->{_html_file};
4842
my $toc_filename = $self->{_toc_filename};
4843
my $src_filename = $self->{_src_filename};
4844
my $title = $self->{_title};
4845
$title = escape_html($title);
4847
# FUTURE input parameter:
4848
my $top_basename = "";
4850
# We need to produce 3 html files:
4851
# 1. - the table of contents
4852
# 2. - the contents (source code) itself
4853
# 3. - the frame which contains them
4855
# get basenames for relative links
4856
my ( $toc_basename, $toc_path ) = fileparse($toc_filename);
4857
my ( $src_basename, $src_path ) = fileparse($src_filename);
4859
# 1. Make the table of contents panel, with appropriate changes
4860
# to the anchor names
4861
my $src_frame_name = 'SRC';
4863
write_toc_html( $title, $toc_filename, $src_basename, $rtoc,
4866
# 2. The current .html filename is renamed to be the contents panel
4867
rename( $html_filename, $src_filename )
4868
or die "Cannot rename $html_filename to $src_filename:$!\n";
4870
# 3. Then use the original html filename for the frame
4872
$title, $html_filename, $top_basename,
4873
$toc_basename, $src_basename, $src_frame_name
4877
sub write_toc_html {
4879
# write a separate html table of contents file for frames
4880
my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;
4881
my $fh = IO::File->new( $toc_filename, 'w' )
4882
or die "Cannot open $toc_filename:$!\n";
4886
<title>$title</title>
4889
<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>
4893
change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );
4894
$fh->print( join "", @$rtoc );
4903
sub write_frame_html {
4905
# write an html file to be the table of contents frame
4907
$title, $frame_filename, $top_basename,
4908
$toc_basename, $src_basename, $src_frame_name
4911
my $fh = IO::File->new( $frame_filename, 'w' )
4912
or die "Cannot open $toc_basename:$!\n";
4915
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4916
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
4917
<?xml version="1.0" encoding="iso-8859-1" ?>
4918
<html xmlns="http://www.w3.org/1999/xhtml">
4920
<title>$title</title>
4924
# two left panels, one right, if master index file
4925
if ($top_basename) {
4927
<frameset cols="20%,80%">
4928
<frameset rows="30%,70%">
4929
<frame src = "$top_basename" />
4930
<frame src = "$toc_basename" />
4935
# one left panels, one right, if no master index file
4938
<frameset cols="20%,*">
4939
<frame src = "$toc_basename" />
4943
<frame src = "$src_basename" name = "$src_frame_name" />
4946
<p>If you see this message, you are using a non-frame-capable web client.</p>
4947
<p>This document contains:</p>
4949
<li><a href="$toc_basename">A table of contents</a></li>
4950
<li><a href="$src_basename">The source code</a></li>
4959
sub change_anchor_names {
4961
# add a filename and target to anchors
4962
# also return the first anchor
4963
my ( $rlines, $filename, $target ) = @_;
4965
foreach my $line (@$rlines) {
4967
# We're looking for lines like this:
4968
# <LI><A HREF="#synopsis">SYNOPSIS</A></LI>
4969
# ---- - -------- -----------------
4971
if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {
4975
my $href = "$filename#$name";
4976
$line = "$pre<a href=\"$href\" target=\"$target\">$post\n";
4977
unless ($first_anchor) { $first_anchor = $href }
4980
return $first_anchor;
4983
sub close_html_file {
4985
return unless $self->{_html_file_opened};
4987
my $html_fh = $self->{_html_fh};
4988
my $rtoc_string = $self->{_rtoc_string};
4990
# There are 3 basic paths to html output...
4992
# ---------------------------------
4993
# Path 1: finish up if in -pre mode
4994
# ---------------------------------
4995
if ( $rOpts->{'html-pre-only'} ) {
4996
$html_fh->print( <<"PRE_END");
4999
eval { $html_fh->close() };
5004
$self->add_toc_item( 'EOF', 'EOF' );
5006
my $rpre_string_stack = $self->{_rpre_string_stack};
5008
# Patch to darken the <pre> background color in case of pod2html and
5009
# interleaved code/documentation. Otherwise, the distinction
5010
# between code and documentation is blurred.
5011
if ( $rOpts->{pod2html}
5012
&& $self->{_pod_cut_count} >= 1
5013
&& $rOpts->{'html-color-background'} eq '#FFFFFF' )
5015
$rOpts->{'html-pre-color-background'} = '#F0F0F0';
5018
# put the css or its link into a string, if used
5020
my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );
5022
# use css linked to another file
5023
if ( $rOpts->{'html-linked-style-sheet'} ) {
5025
qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)
5029
# use css embedded in this file
5030
elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
5031
$fh_css->print( <<'ENDCSS');
5032
<style type="text/css">
5035
write_style_sheet_data($fh_css);
5036
$fh_css->print( <<"ENDCSS");
5042
# -----------------------------------------------------------
5043
# path 2: use pod2html if requested
5044
# If we fail for some reason, continue on to path 3
5045
# -----------------------------------------------------------
5046
if ( $rOpts->{'pod2html'} ) {
5047
my $rpod_string = $self->{_rpod_string};
5048
$self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
5049
$rpre_string_stack )
5053
# --------------------------------------------------
5054
# path 3: write code in html, with pod only in italics
5055
# --------------------------------------------------
5056
my $input_file = $self->{_input_file};
5057
my $title = escape_html($input_file);
5058
my $date = localtime;
5059
$html_fh->print( <<"HTML_START");
5060
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5061
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5062
<!-- Generated by perltidy on $date -->
5063
<html xmlns="http://www.w3.org/1999/xhtml">
5065
<title>$title</title>
5068
# output the css, if used
5070
$html_fh->print($css_string);
5071
$html_fh->print( <<"ENDCSS");
5078
$html_fh->print( <<"HTML_START");
5080
<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
5084
$html_fh->print("<a name=\"-top-\"></a>\n");
5085
$html_fh->print( <<"EOM");
5089
# copy the table of contents
5091
&& !$rOpts->{'frames'}
5092
&& $rOpts->{'html-table-of-contents'} )
5094
$html_fh->print($$rtoc_string);
5097
# copy the pre section(s)
5098
my $fname_comment = $input_file;
5099
$fname_comment =~ s/--+/-/g; # protect HTML comment tags
5100
$html_fh->print( <<"END_PRE");
5102
<!-- contents of filename: $fname_comment -->
5106
foreach my $rpre_string (@$rpre_string_stack) {
5107
$html_fh->print($$rpre_string);
5110
# and finish the html page
5111
$html_fh->print( <<"HTML_END");
5116
eval { $html_fh->close() }; # could be object without close method
5118
if ( $rOpts->{'frames'} ) {
5119
my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;
5120
$self->make_frame( \@toc );
5126
my ( $rtokens, $rtoken_type, $rlevels ) = @_;
5127
my ( @colored_tokens, $j, $string, $type, $token, $level );
5128
my $rlast_level = $self->{_rlast_level};
5129
my $rpackage_stack = $self->{_rpackage_stack};
5131
for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
5132
$type = $$rtoken_type[$j];
5133
$token = $$rtokens[$j];
5134
$level = $$rlevels[$j];
5135
$level = 0 if ( $level < 0 );
5137
#-------------------------------------------------------
5138
# Update the package stack. The package stack is needed to keep
5139
# the toc correct because some packages may be declared within
5140
# blocks and go out of scope when we leave the block.
5141
#-------------------------------------------------------
5142
if ( $level > $$rlast_level ) {
5143
unless ( $rpackage_stack->[ $level - 1 ] ) {
5144
$rpackage_stack->[ $level - 1 ] = 'main';
5146
$rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];
5148
elsif ( $level < $$rlast_level ) {
5149
my $package = $rpackage_stack->[$level];
5150
unless ($package) { $package = 'main' }
5152
# if we change packages due to a nesting change, we
5153
# have to make an entry in the toc
5154
if ( $package ne $rpackage_stack->[ $level + 1 ] ) {
5155
$self->add_toc_item( $package, 'package' );
5158
$$rlast_level = $level;
5160
#-------------------------------------------------------
5161
# Intercept a sub name here; split it
5162
# into keyword 'sub' and sub name; and add an
5164
#-------------------------------------------------------
5165
if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {
5166
$token = $self->markup_html_element( $1, 'k' );
5167
push @colored_tokens, $token;
5171
# but don't include sub declarations in the toc;
5172
# these wlll have leading token types 'i;'
5173
my $signature = join "", @$rtoken_type;
5174
unless ( $signature =~ /^i;/ ) {
5175
my $subname = $token;
5176
$subname =~ s/[\s\(].*$//; # remove any attributes and prototype
5177
$self->add_toc_item( $subname, 'sub' );
5181
#-------------------------------------------------------
5182
# Intercept a package name here; split it
5183
# into keyword 'package' and name; add to the toc,
5184
# and update the package stack
5185
#-------------------------------------------------------
5186
if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {
5187
$token = $self->markup_html_element( $1, 'k' );
5188
push @colored_tokens, $token;
5191
$self->add_toc_item( "$token", 'package' );
5192
$rpackage_stack->[$level] = $token;
5195
$token = $self->markup_html_element( $token, $type );
5196
push @colored_tokens, $token;
5198
return ( \@colored_tokens );
5201
sub markup_html_element {
5203
my ( $token, $type ) = @_;
5205
return $token if ( $type eq 'b' ); # skip a blank token
5206
return $token if ( $token =~ /^\s*$/ ); # skip a blank line
5207
$token = escape_html($token);
5209
# get the short abbreviation for this token type
5210
my $short_name = $token_short_names{$type};
5211
if ( !defined($short_name) ) {
5212
$short_name = "pu"; # punctuation is default
5215
# handle style sheets..
5216
if ( !$rOpts->{'nohtml-style-sheets'} ) {
5217
if ( $short_name ne 'pu' ) {
5218
$token = qq(<span class="$short_name">) . $token . "</span>";
5222
# handle no style sheets..
5224
my $color = $html_color{$short_name};
5226
if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {
5227
$token = qq(<font color="$color">) . $token . "</font>";
5229
if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }
5230
if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }
5238
if ($missing_html_entities) {
5239
$token =~ s/\&/&/g;
5240
$token =~ s/\</</g;
5241
$token =~ s/\>/>/g;
5242
$token =~ s/\"/"/g;
5245
HTML::Entities::encode_entities($token);
5250
sub finish_formatting {
5252
# called after last line
5254
$self->close_html_file();
5261
return unless $self->{_html_file_opened};
5262
my $html_pre_fh = $self->{_html_pre_fh};
5263
my ($line_of_tokens) = @_;
5264
my $line_type = $line_of_tokens->{_line_type};
5265
my $input_line = $line_of_tokens->{_line_text};
5266
my $line_number = $line_of_tokens->{_line_number};
5269
# markup line of code..
5271
if ( $line_type eq 'CODE' ) {
5272
my $rtoken_type = $line_of_tokens->{_rtoken_type};
5273
my $rtokens = $line_of_tokens->{_rtokens};
5274
my $rlevels = $line_of_tokens->{_rlevels};
5276
if ( $input_line =~ /(^\s*)/ ) {
5282
my ($rcolored_tokens) =
5283
$self->markup_tokens( $rtokens, $rtoken_type, $rlevels );
5284
$html_line .= join '', @$rcolored_tokens;
5287
# markup line of non-code..
5290
if ( $line_type eq 'HERE' ) { $line_character = 'H' }
5291
elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }
5292
elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }
5293
elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }
5294
elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }
5295
elsif ( $line_type eq 'END_START' ) {
5296
$line_character = 'k';
5297
$self->add_toc_item( '__END__', '__END__' );
5299
elsif ( $line_type eq 'DATA_START' ) {
5300
$line_character = 'k';
5301
$self->add_toc_item( '__DATA__', '__DATA__' );
5303
elsif ( $line_type =~ /^POD/ ) {
5304
$line_character = 'P';
5305
if ( $rOpts->{'pod2html'} ) {
5306
my $html_pod_fh = $self->{_html_pod_fh};
5307
if ( $line_type eq 'POD_START' ) {
5309
my $rpre_string_stack = $self->{_rpre_string_stack};
5310
my $rpre_string = $rpre_string_stack->[-1];
5312
# if we have written any non-blank lines to the
5313
# current pre section, start writing to a new output
5315
if ( $$rpre_string =~ /\S/ ) {
5318
Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
5319
$self->{_html_pre_fh} = $html_pre_fh;
5320
push @$rpre_string_stack, \$pre_string;
5322
# leave a marker in the pod stream so we know
5323
# where to put the pre section we just
5325
my $for_html = '=for html'; # don't confuse pod utils
5326
$html_pod_fh->print(<<EOM);
5329
<!-- pERLTIDY sECTION -->
5334
# otherwise, just clear the current string and start
5338
$html_pod_fh->print("\n");
5341
$html_pod_fh->print( $input_line . "\n" );
5342
if ( $line_type eq 'POD_END' ) {
5343
$self->{_pod_cut_count}++;
5344
$html_pod_fh->print("\n");
5349
else { $line_character = 'Q' }
5350
$html_line = $self->markup_html_element( $input_line, $line_character );
5353
# add the line number if requested
5354
if ( $rOpts->{'html-line-numbers'} ) {
5356
( $line_number < 10 ) ? " "
5357
: ( $line_number < 100 ) ? " "
5358
: ( $line_number < 1000 ) ? " "
5360
$html_line = $extra_space . $line_number . " " . $html_line;
5364
$html_pre_fh->print("$html_line\n");
5367
#####################################################################
5369
# The Perl::Tidy::Formatter package adds indentation, whitespace, and
5370
# line breaks to the token stream
5372
# WARNING: This is not a real class for speed reasons. Only one
5373
# Formatter may be used.
5375
#####################################################################
5377
package Perl::Tidy::Formatter;
5381
# Caution: these debug flags produce a lot of output
5382
# They should all be 0 except when debugging small scripts
5383
use constant FORMATTER_DEBUG_FLAG_BOND => 0;
5384
use constant FORMATTER_DEBUG_FLAG_BREAK => 0;
5385
use constant FORMATTER_DEBUG_FLAG_CI => 0;
5386
use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;
5387
use constant FORMATTER_DEBUG_FLAG_FORCE => 0;
5388
use constant FORMATTER_DEBUG_FLAG_LIST => 0;
5389
use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;
5390
use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;
5391
use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;
5392
use constant FORMATTER_DEBUG_FLAG_STORE => 0;
5393
use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;
5394
use constant FORMATTER_DEBUG_FLAG_WHITE => 0;
5396
my $debug_warning = sub {
5397
print "FORMATTER_DEBUGGING with key $_[0]\n";
5400
FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');
5401
FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');
5402
FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');
5403
FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');
5404
FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');
5405
FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');
5406
FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');
5407
FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');
5408
FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');
5409
FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');
5410
FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');
5411
FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');
5418
$max_gnu_stack_index
5419
$gnu_position_predictor
5420
$line_start_index_to_go
5421
$last_indentation_written
5422
$last_unadjusted_indentation
5425
$saw_VERSION_in_this_file
5430
$gnu_sequence_number
5431
$last_output_indentation
5437
@type_sequence_to_go
5438
@container_environment_to_go
5439
@bond_strength_to_go
5440
@forced_breakpoint_to_go
5443
@leading_spaces_to_go
5444
@reduced_spaces_to_go
5445
@matching_token_to_go
5447
@nesting_blocks_to_go
5449
@nesting_depth_to_go
5451
@old_breakpoint_to_go
5455
%saved_opening_indentation
5458
$comma_count_in_batch
5459
$old_line_count_in_batch
5460
$last_nonblank_index_to_go
5461
$last_nonblank_type_to_go
5462
$last_nonblank_token_to_go
5463
$last_last_nonblank_index_to_go
5464
$last_last_nonblank_type_to_go
5465
$last_last_nonblank_token_to_go
5466
@nonblank_lines_at_depth
5470
$in_format_skipping_section
5471
$format_skipping_pattern_begin
5472
$format_skipping_pattern_end
5474
$forced_breakpoint_count
5475
$forced_breakpoint_undo_count
5476
@forced_breakpoint_undo_stack
5477
%postponed_breakpoint
5481
$first_embedded_tab_at
5482
$last_embedded_tab_at
5483
$deleted_semicolon_count
5484
$first_deleted_semicolon_at
5485
$last_deleted_semicolon_at
5486
$added_semicolon_count
5487
$first_added_semicolon_at
5488
$last_added_semicolon_at
5489
$first_tabbing_disagreement
5490
$last_tabbing_disagreement
5491
$in_tabbing_disagreement
5492
$tabbing_disagreement_count
5496
$last_line_leading_type
5497
$last_line_leading_level
5498
$last_last_line_leading_level
5501
%block_opening_line_number
5502
$csc_new_statement_ok
5503
$accumulating_text_for_block
5505
$rleading_block_if_elsif_text
5506
$leading_block_text_level
5507
$leading_block_text_length_exceeded
5508
$leading_block_text_line_length
5509
$leading_block_text_line_number
5510
$closing_side_comment_prefix_pattern
5511
$closing_side_comment_list_pattern
5513
$last_nonblank_token
5515
$last_last_nonblank_token
5516
$last_last_nonblank_type
5517
$last_nonblank_block_type
5520
%is_if_brace_follower
5521
%space_after_keyword
5524
%is_last_next_redo_return
5525
%is_other_brace_follower
5526
%is_else_brace_follower
5527
%is_anon_sub_brace_follower
5528
%is_anon_sub_1_brace_follower
5530
%is_sort_map_grep_eval
5531
%is_sort_map_grep_eval_do
5532
%is_block_without_semicolon
5537
%is_if_unless_and_or_last_next_redo_return
5538
%is_until_while_for_if_elsif_else
5544
$is_static_block_comment
5545
$index_start_one_line_block
5546
$semicolons_before_block_self_destruct
5547
$index_max_forced_break
5550
$vertical_aligner_object
5555
$last_line_had_side_comment
5558
$static_block_comment_pattern
5559
$static_side_comment_pattern
5560
%opening_vertical_tightness
5561
%closing_vertical_tightness
5562
%closing_token_indentation
5564
%opening_token_right
5565
%stack_opening_token
5566
%stack_closing_token
5568
$block_brace_vertical_tightness_pattern
5571
$rOpts_add_whitespace
5572
$rOpts_block_brace_tightness
5573
$rOpts_block_brace_vertical_tightness
5574
$rOpts_brace_left_and_indent
5575
$rOpts_comma_arrow_breakpoints
5576
$rOpts_break_at_old_keyword_breakpoints
5577
$rOpts_break_at_old_comma_breakpoints
5578
$rOpts_break_at_old_logical_breakpoints
5579
$rOpts_break_at_old_ternary_breakpoints
5580
$rOpts_closing_side_comment_else_flag
5581
$rOpts_closing_side_comment_maximum_text
5582
$rOpts_continuation_indentation
5584
$rOpts_delete_old_whitespace
5585
$rOpts_fuzzy_line_length
5586
$rOpts_indent_columns
5587
$rOpts_line_up_parentheses
5588
$rOpts_maximum_fields_per_table
5589
$rOpts_maximum_line_length
5590
$rOpts_short_concatenation_item_length
5591
$rOpts_swallow_optional_blank_lines
5592
$rOpts_ignore_old_breakpoints
5593
$rOpts_format_skipping
5594
$rOpts_space_function_paren
5595
$rOpts_space_keyword_paren
5596
$rOpts_keep_interior_semicolons
5598
$half_maximum_line_length
5602
%is_keyword_returning_list
5606
%right_bond_strength
5623
# default list of block types for which -bli would apply
5624
$bli_list_string = 'if else elsif unless while for foreach do : sub';
5627
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
5628
<= >= == =~ !~ != ++ -- /= x=
5630
@is_digraph{@_} = (1) x scalar(@_);
5632
@_ = qw( ... **= <<= >>= &&= ||= //= <=> );
5633
@is_trigraph{@_} = (1) x scalar(@_);
5636
= **= += *= &= <<= &&=
5637
-= /= |= >>= ||= //=
5641
@is_assignment{@_} = (1) x scalar(@_);
5651
@is_keyword_returning_list{@_} = (1) x scalar(@_);
5653
@_ = qw(is if unless and or err last next redo return);
5654
@is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);
5656
# always break after a closing curly of these block types:
5657
@_ = qw(until while for if elsif else);
5658
@is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);
5660
@_ = qw(last next redo return);
5661
@is_last_next_redo_return{@_} = (1) x scalar(@_);
5663
@_ = qw(sort map grep);
5664
@is_sort_map_grep{@_} = (1) x scalar(@_);
5666
@_ = qw(sort map grep eval);
5667
@is_sort_map_grep_eval{@_} = (1) x scalar(@_);
5669
@_ = qw(sort map grep eval do);
5670
@is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);
5673
@is_if_unless{@_} = (1) x scalar(@_);
5675
@_ = qw(and or err);
5676
@is_and_or{@_} = (1) x scalar(@_);
5678
# Identify certain operators which often occur in chains.
5679
# Note: the minus (-) causes a side effect of padding of the first line in
5680
# something like this (by sub set_logical_padding):
5681
# Checkbutton => 'Transmission checked',
5682
# -variable => \$TRANS
5683
# This usually improves appearance so it seems ok.
5684
@_ = qw(&& || and or : ? . + - * /);
5685
@is_chain_operator{@_} = (1) x scalar(@_);
5687
# We can remove semicolons after blocks preceded by these keywords
5689
qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
5690
unless while until for foreach);
5691
@is_block_without_semicolon{@_} = (1) x scalar(@_);
5693
# 'L' is token for opening { at hash key
5695
@is_opening_type{@_} = (1) x scalar(@_);
5697
# 'R' is token for closing } at hash key
5699
@is_closing_type{@_} = (1) x scalar(@_);
5702
@is_opening_token{@_} = (1) x scalar(@_);
5705
@is_closing_token{@_} = (1) x scalar(@_);
5709
use constant WS_YES => 1;
5710
use constant WS_OPTIONAL => 0;
5711
use constant WS_NO => -1;
5713
# Token bond strengths.
5714
use constant NO_BREAK => 10000;
5715
use constant VERY_STRONG => 100;
5716
use constant STRONG => 2.1;
5717
use constant NOMINAL => 1.1;
5718
use constant WEAK => 0.8;
5719
use constant VERY_WEAK => 0.55;
5721
# values for testing indexes in output array
5722
use constant UNDEFINED_INDEX => -1;
5724
# Maximum number of little messages; probably need not be changed.
5725
use constant MAX_NAG_MESSAGES => 6;
5727
# increment between sequence numbers for each type
5728
# For example, ?: pairs might have numbers 7,11,15,...
5729
use constant TYPE_SEQUENCE_INCREMENT => 4;
5733
# methods to count instances
5735
sub get_count { $_count; }
5736
sub _increment_count { ++$_count }
5737
sub _decrement_count { --$_count }
5742
# trim leading and trailing whitespace from a string
5750
# given a string containing words separated by whitespace,
5751
# return the list of words
5756
return split( /\s+/, $str );
5759
# interface to Perl::Tidy::Logger routines
5761
if ($logger_object) {
5762
$logger_object->warning(@_);
5767
if ($logger_object) {
5768
$logger_object->complain(@_);
5772
sub write_logfile_entry {
5773
if ($logger_object) {
5774
$logger_object->write_logfile_entry(@_);
5779
if ($logger_object) {
5780
$logger_object->black_box(@_);
5784
sub report_definite_bug {
5785
if ($logger_object) {
5786
$logger_object->report_definite_bug();
5790
sub get_saw_brace_error {
5791
if ($logger_object) {
5792
$logger_object->get_saw_brace_error();
5796
sub we_are_at_the_last_line {
5797
if ($logger_object) {
5798
$logger_object->we_are_at_the_last_line();
5802
# interface to Perl::Tidy::Diagnostics routine
5803
sub write_diagnostics {
5805
if ($diagnostics_object) {
5806
$diagnostics_object->write_diagnostics(@_);
5810
sub get_added_semicolon_count {
5812
return $added_semicolon_count;
5816
$_[0]->_decrement_count();
5823
# we are given an object with a write_line() method to take lines
5825
sink_object => undef,
5826
diagnostics_object => undef,
5827
logger_object => undef,
5829
my %args = ( %defaults, @_ );
5831
$logger_object = $args{logger_object};
5832
$diagnostics_object = $args{diagnostics_object};
5834
# we create another object with a get_line() and peek_ahead() method
5835
my $sink_object = $args{sink_object};
5836
$file_writer_object =
5837
Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
5839
# initialize the leading whitespace stack to negative levels
5840
# so that we can never run off the end of the stack
5841
$gnu_position_predictor = 0; # where the current token is predicted to be
5842
$max_gnu_stack_index = 0;
5843
$max_gnu_item_index = -1;
5844
$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );
5845
@gnu_item_list = ();
5846
$last_output_indentation = 0;
5847
$last_indentation_written = 0;
5848
$last_unadjusted_indentation = 0;
5849
$last_leading_token = "";
5851
$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
5852
$saw_END_or_DATA_ = 0;
5854
@block_type_to_go = ();
5855
@type_sequence_to_go = ();
5856
@container_environment_to_go = ();
5857
@bond_strength_to_go = ();
5858
@forced_breakpoint_to_go = ();
5859
@lengths_to_go = (); # line length to start of ith token
5861
@matching_token_to_go = ();
5862
@mate_index_to_go = ();
5863
@nesting_blocks_to_go = ();
5864
@ci_levels_to_go = ();
5865
@nesting_depth_to_go = (0);
5866
@nobreak_to_go = ();
5867
@old_breakpoint_to_go = ();
5870
@leading_spaces_to_go = ();
5871
@reduced_spaces_to_go = ();
5874
@has_broken_sublist = ();
5875
@want_comma_break = ();
5878
$first_tabbing_disagreement = 0;
5879
$last_tabbing_disagreement = 0;
5880
$tabbing_disagreement_count = 0;
5881
$in_tabbing_disagreement = 0;
5882
$input_line_tabbing = undef;
5884
$last_line_type = "";
5885
$last_last_line_leading_level = 0;
5886
$last_line_leading_level = 0;
5887
$last_line_leading_type = '#';
5889
$last_nonblank_token = ';';
5890
$last_nonblank_type = ';';
5891
$last_last_nonblank_token = ';';
5892
$last_last_nonblank_type = ';';
5893
$last_nonblank_block_type = "";
5894
$last_output_level = 0;
5895
$looking_for_else = 0;
5896
$embedded_tab_count = 0;
5897
$first_embedded_tab_at = 0;
5898
$last_embedded_tab_at = 0;
5899
$deleted_semicolon_count = 0;
5900
$first_deleted_semicolon_at = 0;
5901
$last_deleted_semicolon_at = 0;
5902
$added_semicolon_count = 0;
5903
$first_added_semicolon_at = 0;
5904
$last_added_semicolon_at = 0;
5905
$last_line_had_side_comment = 0;
5906
$is_static_block_comment = 0;
5907
%postponed_breakpoint = ();
5909
# variables for adding side comments
5910
%block_leading_text = ();
5911
%block_opening_line_number = ();
5912
$csc_new_statement_ok = 1;
5914
%saved_opening_indentation = ();
5915
$in_format_skipping_section = 0;
5917
reset_block_text_accumulator();
5919
prepare_for_new_input_lines();
5921
$vertical_aligner_object =
5922
Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
5923
$logger_object, $diagnostics_object );
5925
if ( $rOpts->{'entab-leading-whitespace'} ) {
5926
write_logfile_entry(
5927
"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
5930
elsif ( $rOpts->{'tabs'} ) {
5931
write_logfile_entry("Indentation will be with a tab character\n");
5934
write_logfile_entry(
5935
"Indentation will be with $rOpts->{'indent-columns'} spaces\n");
5938
# This was the start of a formatter referent, but object-oriented
5939
# coding has turned out to be too slow here.
5940
$formatter_self = {};
5942
bless $formatter_self, $class;
5944
# Safety check..this is not a class yet
5945
if ( _increment_count() > 1 ) {
5947
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
5949
return $formatter_self;
5952
sub prepare_for_new_input_lines {
5954
$gnu_sequence_number++; # increment output batch counter
5955
%last_gnu_equals = ();
5956
%gnu_comma_count = ();
5957
%gnu_arrow_count = ();
5958
$line_start_index_to_go = 0;
5959
$max_gnu_item_index = UNDEFINED_INDEX;
5960
$index_max_forced_break = UNDEFINED_INDEX;
5961
$max_index_to_go = UNDEFINED_INDEX;
5962
$last_nonblank_index_to_go = UNDEFINED_INDEX;
5963
$last_nonblank_type_to_go = '';
5964
$last_nonblank_token_to_go = '';
5965
$last_last_nonblank_index_to_go = UNDEFINED_INDEX;
5966
$last_last_nonblank_type_to_go = '';
5967
$last_last_nonblank_token_to_go = '';
5968
$forced_breakpoint_count = 0;
5969
$forced_breakpoint_undo_count = 0;
5970
$rbrace_follower = undef;
5971
$lengths_to_go[0] = 0;
5972
$old_line_count_in_batch = 1;
5973
$comma_count_in_batch = 0;
5974
$starting_in_quote = 0;
5976
destroy_one_line_block();
5982
my ($line_of_tokens) = @_;
5984
my $line_type = $line_of_tokens->{_line_type};
5985
my $input_line = $line_of_tokens->{_line_text};
5987
# _line_type codes are:
5988
# SYSTEM - system-specific code before hash-bang line
5989
# CODE - line of perl code (including comments)
5990
# POD_START - line starting pod, such as '=head'
5991
# POD - pod documentation text
5992
# POD_END - last line of pod section, '=cut'
5993
# HERE - text of here-document
5994
# HERE_END - last line of here-doc (target word)
5995
# FORMAT - format section
5996
# FORMAT_END - last line of format section, '.'
5997
# DATA_START - __DATA__ line
5998
# DATA - unidentified text following __DATA__
5999
# END_START - __END__ line
6000
# END - unidentified text following __END__
6001
# ERROR - we are in big trouble, probably not a perl script
6003
# put a blank line after an =cut which comes before __END__ and __DATA__
6004
# (required by podchecker)
6005
if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
6006
$file_writer_object->reset_consecutive_blank_lines();
6007
if ( $input_line !~ /^\s*$/ ) { want_blank_line() }
6010
# handle line of code..
6011
if ( $line_type eq 'CODE' ) {
6013
# let logger see all non-blank lines of code
6014
if ( $input_line !~ /^\s*$/ ) {
6015
my $output_line_number =
6016
$vertical_aligner_object->get_output_line_number();
6017
black_box( $line_of_tokens, $output_line_number );
6019
print_line_of_tokens($line_of_tokens);
6022
# handle line of non-code..
6028
if ( $line_type =~ /^POD/ ) {
6030
# Pod docs should have a preceding blank line. But be
6031
# very careful in __END__ and __DATA__ sections, because:
6032
# 1. the user may be using this section for any purpose whatsoever
6033
# 2. the blank counters are not active there
6034
# It should be safe to request a blank line between an
6035
# __END__ or __DATA__ and an immediately following '=head'
6036
# type line, (types END_START and DATA_START), but not for
6037
# any other lines of type END or DATA.
6038
if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
6039
if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }
6041
&& $line_type eq 'POD_START'
6042
&& $last_line_type !~ /^(END|DATA)$/ )
6048
# leave the blank counters in a predictable state
6049
# after __END__ or __DATA__
6050
elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
6051
$file_writer_object->reset_consecutive_blank_lines();
6052
$saw_END_or_DATA_ = 1;
6055
# write unindented non-code line
6056
if ( !$skip_line ) {
6057
if ($tee_line) { $file_writer_object->tee_on() }
6058
write_unindented_line($input_line);
6059
if ($tee_line) { $file_writer_object->tee_off() }
6062
$last_line_type = $line_type;
6065
sub create_one_line_block {
6066
$index_start_one_line_block = $_[0];
6067
$semicolons_before_block_self_destruct = $_[1];
6070
sub destroy_one_line_block {
6071
$index_start_one_line_block = UNDEFINED_INDEX;
6072
$semicolons_before_block_self_destruct = 0;
6075
sub leading_spaces_to_go {
6077
# return the number of indentation spaces for a token in the output stream;
6078
# these were previously stored by 'set_leading_whitespace'.
6080
return get_SPACES( $leading_spaces_to_go[ $_[0] ] );
6086
# return the number of leading spaces associated with an indentation
6087
# variable $indentation is either a constant number of spaces or an object
6088
# with a get_SPACES method.
6089
my $indentation = shift;
6090
return ref($indentation) ? $indentation->get_SPACES() : $indentation;
6093
sub get_RECOVERABLE_SPACES {
6095
# return the number of spaces (+ means shift right, - means shift left)
6096
# that we would like to shift a group of lines with the same indentation
6097
# to get them to line up with their opening parens
6098
my $indentation = shift;
6099
return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
6102
sub get_AVAILABLE_SPACES_to_go {
6104
my $item = $leading_spaces_to_go[ $_[0] ];
6106
# return the number of available leading spaces associated with an
6107
# indentation variable. $indentation is either a constant number of
6108
# spaces or an object with a get_AVAILABLE_SPACES method.
6109
return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;
6112
sub new_lp_indentation_item {
6114
# this is an interface to the IndentationItem class
6115
my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
6117
# A negative level implies not to store the item in the item_list
6119
if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
6121
my $item = Perl::Tidy::IndentationItem->new(
6123
$ci_level, $available_spaces,
6124
$index, $gnu_sequence_number,
6125
$align_paren, $max_gnu_stack_index,
6126
$line_start_index_to_go,
6129
if ( $level >= 0 ) {
6130
$gnu_item_list[$max_gnu_item_index] = $item;
6136
sub set_leading_whitespace {
6138
# This routine defines leading whitespace
6139
# given: the level and continuation_level of a token,
6140
# define: space count of leading string which would apply if it
6141
# were the first token of a new line.
6143
my ( $level, $ci_level, $in_continued_quote ) = @_;
6145
# modify for -bli, which adds one continuation indentation for
6147
if ( $rOpts_brace_left_and_indent
6148
&& $max_index_to_go == 0
6149
&& $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
6154
# patch to avoid trouble when input file has negative indentation.
6155
# other logic should catch this error.
6156
if ( $level < 0 ) { $level = 0 }
6158
#-------------------------------------------
6159
# handle the standard indentation scheme
6160
#-------------------------------------------
6161
unless ($rOpts_line_up_parentheses) {
6163
$ci_level * $rOpts_continuation_indentation +
6164
$level * $rOpts_indent_columns;
6166
( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
6168
if ($in_continued_quote) {
6172
$leading_spaces_to_go[$max_index_to_go] = $space_count;
6173
$reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
6177
#-------------------------------------------------------------
6178
# handle case of -lp indentation..
6179
#-------------------------------------------------------------
6181
# The continued_quote flag means that this is the first token of a
6182
# line, and it is the continuation of some kind of multi-line quote
6183
# or pattern. It requires special treatment because it must have no
6184
# added leading whitespace. So we create a special indentation item
6185
# which is not in the stack.
6186
if ($in_continued_quote) {
6187
my $space_count = 0;
6188
my $available_space = 0;
6189
$level = -1; # flag to prevent storing in item_list
6190
$leading_spaces_to_go[$max_index_to_go] =
6191
$reduced_spaces_to_go[$max_index_to_go] =
6192
new_lp_indentation_item( $space_count, $level, $ci_level,
6193
$available_space, 0 );
6197
# get the top state from the stack
6198
my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();
6199
my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6200
my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6202
my $type = $types_to_go[$max_index_to_go];
6203
my $token = $tokens_to_go[$max_index_to_go];
6204
my $total_depth = $nesting_depth_to_go[$max_index_to_go];
6206
if ( $type eq '{' || $type eq '(' ) {
6208
$gnu_comma_count{ $total_depth + 1 } = 0;
6209
$gnu_arrow_count{ $total_depth + 1 } = 0;
6211
# If we come to an opening token after an '=' token of some type,
6212
# see if it would be helpful to 'break' after the '=' to save space
6213
my $last_equals = $last_gnu_equals{$total_depth};
6214
if ( $last_equals && $last_equals > $line_start_index_to_go ) {
6216
# find the position if we break at the '='
6217
my $i_test = $last_equals;
6218
if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
6221
##my $too_close = ($i_test==$max_index_to_go-1);
6223
my $test_position = total_line_length( $i_test, $max_index_to_go );
6227
# the equals is not just before an open paren (testing)
6230
# if we are beyond the midpoint
6231
$gnu_position_predictor > $half_maximum_line_length
6233
# or we are beyont the 1/4 point and there was an old
6234
# break at the equals
6236
$gnu_position_predictor > $half_maximum_line_length / 2
6238
$old_breakpoint_to_go[$last_equals]
6239
|| ( $last_equals > 0
6240
&& $old_breakpoint_to_go[ $last_equals - 1 ] )
6241
|| ( $last_equals > 1
6242
&& $types_to_go[ $last_equals - 1 ] eq 'b'
6243
&& $old_breakpoint_to_go[ $last_equals - 2 ] )
6249
# then make the switch -- note that we do not set a real
6250
# breakpoint here because we may not really need one; sub
6251
# scan_list will do that if necessary
6252
$line_start_index_to_go = $i_test + 1;
6253
$gnu_position_predictor = $test_position;
6258
# Check for decreasing depth ..
6259
# Note that one token may have both decreasing and then increasing
6260
# depth. For example, (level, ci) can go from (1,1) to (2,0). So,
6261
# in this example we would first go back to (1,0) then up to (2,0)
6263
if ( $level < $current_level || $ci_level < $current_ci_level ) {
6265
# loop to find the first entry at or completely below this level
6266
my ( $lev, $ci_lev );
6268
if ($max_gnu_stack_index) {
6270
# save index of token which closes this level
6271
$gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);
6273
# Undo any extra indentation if we saw no commas
6274
my $available_spaces =
6275
$gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();
6277
my $comma_count = 0;
6278
my $arrow_count = 0;
6279
if ( $type eq '}' || $type eq ')' ) {
6280
$comma_count = $gnu_comma_count{$total_depth};
6281
$arrow_count = $gnu_arrow_count{$total_depth};
6282
$comma_count = 0 unless $comma_count;
6283
$arrow_count = 0 unless $arrow_count;
6285
$gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);
6286
$gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);
6288
if ( $available_spaces > 0 ) {
6290
if ( $comma_count <= 0 || $arrow_count > 0 ) {
6292
my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();
6294
$gnu_stack[$max_gnu_stack_index]
6295
->get_SEQUENCE_NUMBER();
6297
# Be sure this item was created in this batch. This
6298
# should be true because we delete any available
6299
# space from open items at the end of each batch.
6300
if ( $gnu_sequence_number != $seqno
6301
|| $i > $max_gnu_item_index )
6304
"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
6306
report_definite_bug();
6310
if ( $arrow_count == 0 ) {
6312
->permanently_decrease_AVAILABLE_SPACES(
6317
->tentatively_decrease_AVAILABLE_SPACES(
6324
$j <= $max_gnu_item_index ;
6329
->decrease_SPACES($available_spaces);
6336
--$max_gnu_stack_index;
6337
$lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();
6338
$ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();
6340
# stop when we reach a level at or below the current level
6341
if ( $lev <= $level && $ci_lev <= $ci_level ) {
6343
$gnu_stack[$max_gnu_stack_index]->get_SPACES();
6344
$current_level = $lev;
6345
$current_ci_level = $ci_lev;
6350
# reached bottom of stack .. should never happen because
6351
# only negative levels can get here, and $level was forced
6352
# to be positive above.
6355
"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
6357
report_definite_bug();
6363
# handle increasing depth
6364
if ( $level > $current_level || $ci_level > $current_ci_level ) {
6366
# Compute the standard incremental whitespace. This will be
6367
# the minimum incremental whitespace that will be used. This
6368
# choice results in a smooth transition between the gnu-style
6369
# and the standard style.
6370
my $standard_increment =
6371
( $level - $current_level ) * $rOpts_indent_columns +
6372
( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;
6374
# Now we have to define how much extra incremental space
6375
# ("$available_space") we want. This extra space will be
6376
# reduced as necessary when long lines are encountered or when
6377
# it becomes clear that we do not have a good list.
6378
my $available_space = 0;
6379
my $align_paren = 0;
6382
# initialization on empty stack..
6383
if ( $max_gnu_stack_index == 0 ) {
6384
$space_count = $level * $rOpts_indent_columns;
6387
# if this is a BLOCK, add the standard increment
6388
elsif ($last_nonblank_block_type) {
6389
$space_count += $standard_increment;
6392
# if last nonblank token was not structural indentation,
6393
# just use standard increment
6394
elsif ( $last_nonblank_type ne '{' ) {
6395
$space_count += $standard_increment;
6398
# otherwise use the space to the first non-blank level change token
6401
$space_count = $gnu_position_predictor;
6403
my $min_gnu_indentation =
6404
$gnu_stack[$max_gnu_stack_index]->get_SPACES();
6406
$available_space = $space_count - $min_gnu_indentation;
6407
if ( $available_space >= $standard_increment ) {
6408
$min_gnu_indentation += $standard_increment;
6410
elsif ( $available_space > 1 ) {
6411
$min_gnu_indentation += $available_space + 1;
6413
elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {
6414
if ( ( $tightness{$last_nonblank_token} < 2 ) ) {
6415
$min_gnu_indentation += 2;
6418
$min_gnu_indentation += 1;
6422
$min_gnu_indentation += $standard_increment;
6424
$available_space = $space_count - $min_gnu_indentation;
6426
if ( $available_space < 0 ) {
6427
$space_count = $min_gnu_indentation;
6428
$available_space = 0;
6433
# update state, but not on a blank token
6434
if ( $types_to_go[$max_index_to_go] ne 'b' ) {
6436
$gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);
6438
++$max_gnu_stack_index;
6439
$gnu_stack[$max_gnu_stack_index] =
6440
new_lp_indentation_item( $space_count, $level, $ci_level,
6441
$available_space, $align_paren );
6443
# If the opening paren is beyond the half-line length, then
6444
# we will use the minimum (standard) indentation. This will
6445
# help avoid problems associated with running out of space
6446
# near the end of a line. As a result, in deeply nested
6447
# lists, there will be some indentations which are limited
6448
# to this minimum standard indentation. But the most deeply
6449
# nested container will still probably be able to shift its
6450
# parameters to the right for proper alignment, so in most
6451
# cases this will not be noticable.
6452
if ( $available_space > 0
6453
&& $space_count > $half_maximum_line_length )
6455
$gnu_stack[$max_gnu_stack_index]
6456
->tentatively_decrease_AVAILABLE_SPACES($available_space);
6461
# Count commas and look for non-list characters. Once we see a
6462
# non-list character, we give up and don't look for any more commas.
6463
if ( $type eq '=>' ) {
6464
$gnu_arrow_count{$total_depth}++;
6466
# tentatively treating '=>' like '=' for estimating breaks
6467
# TODO: this could use some experimentation
6468
$last_gnu_equals{$total_depth} = $max_index_to_go;
6471
elsif ( $type eq ',' ) {
6472
$gnu_comma_count{$total_depth}++;
6475
elsif ( $is_assignment{$type} ) {
6476
$last_gnu_equals{$total_depth} = $max_index_to_go;
6479
# this token might start a new line
6480
# if this is a non-blank..
6481
if ( $type ne 'b' ) {
6486
# this is the first nonblank token of the line
6487
$max_index_to_go == 1 && $types_to_go[0] eq 'b'
6489
# or previous character was one of these:
6490
|| $last_nonblank_type_to_go =~ /^([\:\?\,f])$/
6492
# or previous character was opening and this does not close it
6493
|| ( $last_nonblank_type_to_go eq '{' && $type ne '}' )
6494
|| ( $last_nonblank_type_to_go eq '(' and $type ne ')' )
6496
# or this token is one of these:
6497
|| $type =~ /^([\.]|\|\||\&\&)$/
6499
# or this is a closing structure
6500
|| ( $last_nonblank_type_to_go eq '}'
6501
&& $last_nonblank_token_to_go eq $last_nonblank_type_to_go )
6503
# or previous token was keyword 'return'
6504
|| ( $last_nonblank_type_to_go eq 'k'
6505
&& ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )
6507
# or starting a new line at certain keywords is fine
6509
&& $is_if_unless_and_or_last_next_redo_return{$token} )
6511
# or this is after an assignment after a closing structure
6513
$is_assignment{$last_nonblank_type_to_go}
6515
$last_last_nonblank_type_to_go =~ /^[\}\)\]]$/
6517
# and it is significantly to the right
6518
|| $gnu_position_predictor > $half_maximum_line_length
6523
check_for_long_gnu_style_lines();
6524
$line_start_index_to_go = $max_index_to_go;
6526
# back up 1 token if we want to break before that type
6527
# otherwise, we may strand tokens like '?' or ':' on a line
6528
if ( $line_start_index_to_go > 0 ) {
6529
if ( $last_nonblank_type_to_go eq 'k' ) {
6531
if ( $want_break_before{$last_nonblank_token_to_go} ) {
6532
$line_start_index_to_go--;
6535
elsif ( $want_break_before{$last_nonblank_type_to_go} ) {
6536
$line_start_index_to_go--;
6542
# remember the predicted position of this token on the output line
6543
if ( $max_index_to_go > $line_start_index_to_go ) {
6544
$gnu_position_predictor =
6545
total_line_length( $line_start_index_to_go, $max_index_to_go );
6548
$gnu_position_predictor = $space_count +
6549
token_sequence_length( $max_index_to_go, $max_index_to_go );
6552
# store the indentation object for this token
6553
# this allows us to manipulate the leading whitespace
6554
# (in case we have to reduce indentation to fit a line) without
6555
# having to change any token values
6556
$leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];
6557
$reduced_spaces_to_go[$max_index_to_go] =
6558
( $max_gnu_stack_index > 0 && $ci_level )
6559
? $gnu_stack[ $max_gnu_stack_index - 1 ]
6560
: $gnu_stack[$max_gnu_stack_index];
6564
sub check_for_long_gnu_style_lines {
6566
# look at the current estimated maximum line length, and
6567
# remove some whitespace if it exceeds the desired maximum
6569
# this is only for the '-lp' style
6570
return unless ($rOpts_line_up_parentheses);
6572
# nothing can be done if no stack items defined for this line
6573
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6575
# see if we have exceeded the maximum desired line length
6576
# keep 2 extra free because they are needed in some cases
6577
# (result of trial-and-error testing)
6579
$gnu_position_predictor - $rOpts_maximum_line_length + 2;
6581
return if ( $spaces_needed < 0 );
6583
# We are over the limit, so try to remove a requested number of
6584
# spaces from leading whitespace. We are only allowed to remove
6585
# from whitespace items created on this batch, since others have
6586
# already been used and cannot be undone.
6587
my @candidates = ();
6590
# loop over all whitespace items created for the current batch
6591
for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6592
my $item = $gnu_item_list[$i];
6594
# item must still be open to be a candidate (otherwise it
6595
# cannot influence the current token)
6596
next if ( $item->get_CLOSED() >= 0 );
6598
my $available_spaces = $item->get_AVAILABLE_SPACES();
6600
if ( $available_spaces > 0 ) {
6601
push( @candidates, [ $i, $available_spaces ] );
6605
return unless (@candidates);
6607
# sort by available whitespace so that we can remove whitespace
6608
# from the maximum available first
6609
@candidates = sort { $b->[1] <=> $a->[1] } @candidates;
6611
# keep removing whitespace until we are done or have no more
6613
foreach $candidate (@candidates) {
6614
my ( $i, $available_spaces ) = @{$candidate};
6615
my $deleted_spaces =
6616
( $available_spaces > $spaces_needed )
6618
: $available_spaces;
6620
# remove the incremental space from this item
6621
$gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);
6625
# update the leading whitespace of this item and all items
6626
# that came after it
6627
for ( ; $i <= $max_gnu_item_index ; $i++ ) {
6629
my $old_spaces = $gnu_item_list[$i]->get_SPACES();
6630
if ( $old_spaces > $deleted_spaces ) {
6631
$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);
6634
# shouldn't happen except for code bug:
6636
my $level = $gnu_item_list[$i_debug]->get_LEVEL();
6637
my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();
6638
my $old_level = $gnu_item_list[$i]->get_LEVEL();
6639
my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();
6641
"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"
6643
report_definite_bug();
6646
$gnu_position_predictor -= $deleted_spaces;
6647
$spaces_needed -= $deleted_spaces;
6648
last unless ( $spaces_needed > 0 );
6652
sub finish_lp_batch {
6654
# This routine is called once after each each output stream batch is
6655
# finished to undo indentation for all incomplete -lp
6656
# indentation levels. It is too risky to leave a level open,
6657
# because then we can't backtrack in case of a long line to follow.
6658
# This means that comments and blank lines will disrupt this
6659
# indentation style. But the vertical aligner may be able to
6660
# get the space back if there are side comments.
6662
# this is only for the 'lp' style
6663
return unless ($rOpts_line_up_parentheses);
6665
# nothing can be done if no stack items defined for this line
6666
return if ( $max_gnu_item_index == UNDEFINED_INDEX );
6668
# loop over all whitespace items created for the current batch
6670
for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {
6671
my $item = $gnu_item_list[$i];
6673
# only look for open items
6674
next if ( $item->get_CLOSED() >= 0 );
6676
# Tentatively remove all of the available space
6677
# (The vertical aligner will try to get it back later)
6678
my $available_spaces = $item->get_AVAILABLE_SPACES();
6679
if ( $available_spaces > 0 ) {
6681
# delete incremental space for this item
6683
->tentatively_decrease_AVAILABLE_SPACES($available_spaces);
6685
# Reduce the total indentation space of any nodes that follow
6686
# Note that any such nodes must necessarily be dependents
6688
foreach ( $i + 1 .. $max_gnu_item_index ) {
6689
$gnu_item_list[$_]->decrease_SPACES($available_spaces);
6696
sub reduce_lp_indentation {
6698
# reduce the leading whitespace at token $i if possible by $spaces_needed
6699
# (a large value of $spaces_needed will remove all excess space)
6700
# NOTE: to be called from scan_list only for a sequence of tokens
6701
# contained between opening and closing parens/braces/brackets
6703
my ( $i, $spaces_wanted ) = @_;
6704
my $deleted_spaces = 0;
6706
my $item = $leading_spaces_to_go[$i];
6707
my $available_spaces = $item->get_AVAILABLE_SPACES();
6710
$available_spaces > 0
6711
&& ( ( $spaces_wanted <= $available_spaces )
6712
|| !$item->get_HAVE_CHILD() )
6716
# we'll remove these spaces, but mark them as recoverable
6718
$item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);
6721
return $deleted_spaces;
6724
sub token_sequence_length {
6726
# return length of tokens ($ifirst .. $ilast) including first & last
6727
# returns 0 if $ifirst > $ilast
6730
return 0 if ( $ilast < 0 || $ifirst > $ilast );
6731
return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );
6732
return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];
6735
sub total_line_length {
6737
# return length of a line of tokens ($ifirst .. $ilast)
6740
if ( $ifirst < 0 ) { $ifirst = 0 }
6742
return leading_spaces_to_go($ifirst) +
6743
token_sequence_length( $ifirst, $ilast );
6746
sub excess_line_length {
6748
# return number of characters by which a line of tokens ($ifirst..$ilast)
6749
# exceeds the allowable line length.
6752
if ( $ifirst < 0 ) { $ifirst = 0 }
6753
return leading_spaces_to_go($ifirst) +
6754
token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;
6757
sub finish_formatting {
6759
# flush buffer and write any informative messages
6763
$file_writer_object->decrement_output_line_number()
6764
; # fix up line number since it was incremented
6765
we_are_at_the_last_line();
6766
if ( $added_semicolon_count > 0 ) {
6767
my $first = ( $added_semicolon_count > 1 ) ? "First" : "";
6769
( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";
6770
write_logfile_entry("$added_semicolon_count $what added:\n");
6771
write_logfile_entry(
6772
" $first at input line $first_added_semicolon_at\n");
6774
if ( $added_semicolon_count > 1 ) {
6775
write_logfile_entry(
6776
" Last at input line $last_added_semicolon_at\n");
6778
write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");
6779
write_logfile_entry("\n");
6782
if ( $deleted_semicolon_count > 0 ) {
6783
my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";
6785
( $deleted_semicolon_count > 1 )
6788
write_logfile_entry(
6789
"$deleted_semicolon_count unnecessary $what deleted:\n");
6790
write_logfile_entry(
6791
" $first at input line $first_deleted_semicolon_at\n");
6793
if ( $deleted_semicolon_count > 1 ) {
6794
write_logfile_entry(
6795
" Last at input line $last_deleted_semicolon_at\n");
6797
write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");
6798
write_logfile_entry("\n");
6801
if ( $embedded_tab_count > 0 ) {
6802
my $first = ( $embedded_tab_count > 1 ) ? "First" : "";
6804
( $embedded_tab_count > 1 )
6805
? "quotes or patterns"
6806
: "quote or pattern";
6807
write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");
6808
write_logfile_entry(
6809
"This means the display of this script could vary with device or software\n"
6811
write_logfile_entry(" $first at input line $first_embedded_tab_at\n");
6813
if ( $embedded_tab_count > 1 ) {
6814
write_logfile_entry(
6815
" Last at input line $last_embedded_tab_at\n");
6817
write_logfile_entry("\n");
6820
if ($first_tabbing_disagreement) {
6821
write_logfile_entry(
6822
"First indentation disagreement seen at input line $first_tabbing_disagreement\n"
6826
if ($in_tabbing_disagreement) {
6827
write_logfile_entry(
6828
"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"
6833
if ($last_tabbing_disagreement) {
6835
write_logfile_entry(
6836
"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"
6840
write_logfile_entry("No indentation disagreement seen\n");
6843
write_logfile_entry("\n");
6845
$vertical_aligner_object->report_anything_unusual();
6847
$file_writer_object->report_line_length_errors();
6852
# This routine is called to check the Opts hash after it is defined
6855
my ( $tabbing_string, $tab_msg );
6857
make_static_block_comment_pattern();
6858
make_static_side_comment_pattern();
6859
make_closing_side_comment_prefix();
6860
make_closing_side_comment_list_pattern();
6861
$format_skipping_pattern_begin =
6862
make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
6863
$format_skipping_pattern_end =
6864
make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
6866
# If closing side comments ARE selected, then we can safely
6867
# delete old closing side comments unless closing side comment
6868
# warnings are requested. This is a good idea because it will
6869
# eliminate any old csc's which fall below the line count threshold.
6870
# We cannot do this if warnings are turned on, though, because we
6871
# might delete some text which has been added. So that must
6872
# be handled when comments are created.
6873
if ( $rOpts->{'closing-side-comments'} ) {
6874
if ( !$rOpts->{'closing-side-comment-warnings'} ) {
6875
$rOpts->{'delete-closing-side-comments'} = 1;
6879
# If closing side comments ARE NOT selected, but warnings ARE
6880
# selected and we ARE DELETING csc's, then we will pretend to be
6881
# adding with a huge interval. This will force the comments to be
6882
# generated for comparison with the old comments, but not added.
6883
elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
6884
if ( $rOpts->{'delete-closing-side-comments'} ) {
6885
$rOpts->{'delete-closing-side-comments'} = 0;
6886
$rOpts->{'closing-side-comments'} = 1;
6887
$rOpts->{'closing-side-comment-interval'} = 100000000;
6892
make_block_brace_vertical_tightness_pattern();
6894
if ( $rOpts->{'line-up-parentheses'} ) {
6896
if ( $rOpts->{'indent-only'}
6897
|| !$rOpts->{'add-newlines'}
6898
|| !$rOpts->{'delete-old-newlines'} )
6901
-----------------------------------------------------------------------
6902
Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
6904
The -lp indentation logic requires that perltidy be able to coordinate
6905
arbitrarily large numbers of line breakpoints. This isn't possible
6906
with these flags. Sometimes an acceptable workaround is to use -wocb=3
6907
-----------------------------------------------------------------------
6909
$rOpts->{'line-up-parentheses'} = 0;
6913
# At present, tabs are not compatable with the line-up-parentheses style
6914
# (it would be possible to entab the total leading whitespace
6915
# just prior to writing the line, if desired).
6916
if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
6918
Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.
6920
$rOpts->{'tabs'} = 0;
6923
# Likewise, tabs are not compatable with outdenting..
6924
if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
6926
Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
6928
$rOpts->{'tabs'} = 0;
6931
if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
6933
Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.
6935
$rOpts->{'tabs'} = 0;
6938
if ( !$rOpts->{'space-for-semicolon'} ) {
6939
$want_left_space{'f'} = -1;
6942
if ( $rOpts->{'space-terminal-semicolon'} ) {
6943
$want_left_space{';'} = 1;
6946
# implement outdenting preferences for keywords
6947
%outdent_keyword = ();
6948
unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {
6949
@_ = qw(next last redo goto return); # defaults
6952
# FUTURE: if not a keyword, assume that it is an identifier
6954
if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
6955
$outdent_keyword{$_} = 1;
6958
warn "ignoring '$_' in -okwl list; not a perl keyword";
6962
# implement user whitespace preferences
6963
if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {
6964
@want_left_space{@_} = (1) x scalar(@_);
6967
if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {
6968
@want_right_space{@_} = (1) x scalar(@_);
6971
if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {
6972
@want_left_space{@_} = (-1) x scalar(@_);
6975
if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {
6976
@want_right_space{@_} = (-1) x scalar(@_);
6978
if ( $rOpts->{'dump-want-left-space'} ) {
6979
dump_want_left_space(*STDOUT);
6983
if ( $rOpts->{'dump-want-right-space'} ) {
6984
dump_want_right_space(*STDOUT);
6988
# default keywords for which space is introduced before an opening paren
6989
# (at present, including them messes up vertical alignment)
6990
@_ = qw(my local our and or err eq ne if else elsif until
6991
unless while for foreach return switch case given when);
6992
@space_after_keyword{@_} = (1) x scalar(@_);
6994
# allow user to modify these defaults
6995
if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {
6996
@space_after_keyword{@_} = (1) x scalar(@_);
6999
if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
7000
@space_after_keyword{@_} = (0) x scalar(@_);
7003
# implement user break preferences
7004
my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
7005
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
7006
. : ? && || and or err xor
7009
my $break_after = sub {
7010
foreach my $tok (@_) {
7011
if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:
7012
my $lbs = $left_bond_strength{$tok};
7013
my $rbs = $right_bond_strength{$tok};
7014
if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
7015
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7021
my $break_before = sub {
7022
foreach my $tok (@_) {
7023
my $lbs = $left_bond_strength{$tok};
7024
my $rbs = $right_bond_strength{$tok};
7025
if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
7026
( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
7032
$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
7033
$break_before->(@all_operators)
7034
if ( $rOpts->{'break-before-all-operators'} );
7036
$break_after->( split_words( $rOpts->{'want-break-after'} ) );
7037
$break_before->( split_words( $rOpts->{'want-break-before'} ) );
7039
# make note if breaks are before certain key types
7040
%want_break_before = ();
7041
foreach my $tok ( @all_operators, ',' ) {
7042
$want_break_before{$tok} =
7043
$left_bond_strength{$tok} < $right_bond_strength{$tok};
7046
# Coordinate ?/: breaks, which must be similar
7047
if ( !$want_break_before{':'} ) {
7048
$want_break_before{'?'} = $want_break_before{':'};
7049
$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
7050
$left_bond_strength{'?'} = NO_BREAK;
7053
# Define here tokens which may follow the closing brace of a do statement
7054
# on the same line, as in:
7055
# } while ( $something);
7056
@_ = qw(until while unless if ; : );
7058
@is_do_follower{@_} = (1) x scalar(@_);
7060
# These tokens may follow the closing brace of an if or elsif block.
7061
# In other words, for cuddled else we want code to look like:
7062
# } elsif ( $something) {
7064
if ( $rOpts->{'cuddled-else'} ) {
7065
@_ = qw(else elsif);
7066
@is_if_brace_follower{@_} = (1) x scalar(@_);
7069
%is_if_brace_follower = ();
7072
# nothing can follow the closing curly of an else { } block:
7073
%is_else_brace_follower = ();
7075
# what can follow a multi-line anonymous sub definition closing curly:
7076
@_ = qw# ; : => or and && || ~~ !~~ ) #;
7078
@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);
7080
# what can follow a one-line anonynomous sub closing curly:
7081
# one-line anonumous subs also have ']' here...
7082
# see tk3.t and PP.pm
7083
@_ = qw# ; : => or and && || ) ] ~~ !~~ #;
7085
@is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);
7087
# What can follow a closing curly of a block
7088
# which is not an if/elsif/else/do/sort/map/grep/eval/sub
7089
# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
7090
@_ = qw# ; : => or and && || ) #;
7093
# allow cuddled continue if cuddled else is specified
7094
if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }
7096
@is_other_brace_follower{@_} = (1) x scalar(@_);
7098
$right_bond_strength{'{'} = WEAK;
7099
$left_bond_strength{'{'} = VERY_STRONG;
7101
# make -l=0 equal to -l=infinite
7102
if ( !$rOpts->{'maximum-line-length'} ) {
7103
$rOpts->{'maximum-line-length'} = 1000000;
7106
# make -lbl=0 equal to -lbl=infinite
7107
if ( !$rOpts->{'long-block-line-count'} ) {
7108
$rOpts->{'long-block-line-count'} = 1000000;
7111
my $ole = $rOpts->{'output-line-ending'};
7120
unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
7121
my $str = join " ", keys %endings;
7123
Unrecognized line ending '$ole'; expecting one of: $str
7126
if ( $rOpts->{'preserve-line-endings'} ) {
7127
warn "Ignoring -ple; conflicts with -ole\n";
7128
$rOpts->{'preserve-line-endings'} = undef;
7132
# hashes used to simplify setting whitespace
7134
'{' => $rOpts->{'brace-tightness'},
7135
'}' => $rOpts->{'brace-tightness'},
7136
'(' => $rOpts->{'paren-tightness'},
7137
')' => $rOpts->{'paren-tightness'},
7138
'[' => $rOpts->{'square-bracket-tightness'},
7139
']' => $rOpts->{'square-bracket-tightness'},
7148
# frequently used parameters
7149
$rOpts_add_newlines = $rOpts->{'add-newlines'};
7150
$rOpts_add_whitespace = $rOpts->{'add-whitespace'};
7151
$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
7152
$rOpts_block_brace_vertical_tightness =
7153
$rOpts->{'block-brace-vertical-tightness'};
7154
$rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};
7155
$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};
7156
$rOpts_break_at_old_ternary_breakpoints =
7157
$rOpts->{'break-at-old-ternary-breakpoints'};
7158
$rOpts_break_at_old_comma_breakpoints =
7159
$rOpts->{'break-at-old-comma-breakpoints'};
7160
$rOpts_break_at_old_keyword_breakpoints =
7161
$rOpts->{'break-at-old-keyword-breakpoints'};
7162
$rOpts_break_at_old_logical_breakpoints =
7163
$rOpts->{'break-at-old-logical-breakpoints'};
7164
$rOpts_closing_side_comment_else_flag =
7165
$rOpts->{'closing-side-comment-else-flag'};
7166
$rOpts_closing_side_comment_maximum_text =
7167
$rOpts->{'closing-side-comment-maximum-text'};
7168
$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
7169
$rOpts_cuddled_else = $rOpts->{'cuddled-else'};
7170
$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
7171
$rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
7172
$rOpts_indent_columns = $rOpts->{'indent-columns'};
7173
$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};
7174
$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};
7175
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
7176
$rOpts_short_concatenation_item_length =
7177
$rOpts->{'short-concatenation-item-length'};
7178
$rOpts_swallow_optional_blank_lines =
7179
$rOpts->{'swallow-optional-blank-lines'};
7180
$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};
7181
$rOpts_format_skipping = $rOpts->{'format-skipping'};
7182
$rOpts_space_function_paren = $rOpts->{'space-function-paren'};
7183
$rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};
7184
$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};
7185
$half_maximum_line_length = $rOpts_maximum_line_length / 2;
7187
# Note that both opening and closing tokens can access the opening
7188
# and closing flags of their container types.
7189
%opening_vertical_tightness = (
7190
'(' => $rOpts->{'paren-vertical-tightness'},
7191
'{' => $rOpts->{'brace-vertical-tightness'},
7192
'[' => $rOpts->{'square-bracket-vertical-tightness'},
7193
')' => $rOpts->{'paren-vertical-tightness'},
7194
'}' => $rOpts->{'brace-vertical-tightness'},
7195
']' => $rOpts->{'square-bracket-vertical-tightness'},
7198
%closing_vertical_tightness = (
7199
'(' => $rOpts->{'paren-vertical-tightness-closing'},
7200
'{' => $rOpts->{'brace-vertical-tightness-closing'},
7201
'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7202
')' => $rOpts->{'paren-vertical-tightness-closing'},
7203
'}' => $rOpts->{'brace-vertical-tightness-closing'},
7204
']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
7207
# assume flag for '>' same as ')' for closing qw quotes
7208
%closing_token_indentation = (
7209
')' => $rOpts->{'closing-paren-indentation'},
7210
'}' => $rOpts->{'closing-brace-indentation'},
7211
']' => $rOpts->{'closing-square-bracket-indentation'},
7212
'>' => $rOpts->{'closing-paren-indentation'},
7215
%opening_token_right = (
7216
'(' => $rOpts->{'opening-paren-right'},
7217
'{' => $rOpts->{'opening-hash-brace-right'},
7218
'[' => $rOpts->{'opening-square-bracket-right'},
7221
%stack_opening_token = (
7222
'(' => $rOpts->{'stack-opening-paren'},
7223
'{' => $rOpts->{'stack-opening-hash-brace'},
7224
'[' => $rOpts->{'stack-opening-square-bracket'},
7227
%stack_closing_token = (
7228
')' => $rOpts->{'stack-closing-paren'},
7229
'}' => $rOpts->{'stack-closing-hash-brace'},
7230
']' => $rOpts->{'stack-closing-square-bracket'},
7234
sub make_static_block_comment_pattern {
7236
# create the pattern used to identify static block comments
7237
$static_block_comment_pattern = '^\s*##';
7239
# allow the user to change it
7240
if ( $rOpts->{'static-block-comment-prefix'} ) {
7241
my $prefix = $rOpts->{'static-block-comment-prefix'};
7242
$prefix =~ s/^\s*//;
7243
my $pattern = $prefix;
7245
# user may give leading caret to force matching left comments only
7246
if ( $prefix !~ /^\^#/ ) {
7247
if ( $prefix !~ /^#/ ) {
7249
"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";
7251
$pattern = '^\s*' . $prefix;
7253
eval "'##'=~/$pattern/";
7256
"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";
7258
$static_block_comment_pattern = $pattern;
7262
sub make_format_skipping_pattern {
7263
my ( $opt_name, $default ) = @_;
7264
my $param = $rOpts->{$opt_name};
7265
unless ($param) { $param = $default }
7267
if ( $param !~ /^#/ ) {
7268
die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";
7270
my $pattern = '^' . $param . '\s';
7271
eval "'#'=~/$pattern/";
7274
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";
7279
sub make_closing_side_comment_list_pattern {
7281
# turn any input list into a regex for recognizing selected block types
7282
$closing_side_comment_list_pattern = '^\w+';
7283
if ( defined( $rOpts->{'closing-side-comment-list'} )
7284
&& $rOpts->{'closing-side-comment-list'} )
7286
$closing_side_comment_list_pattern =
7287
make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
7291
sub make_bli_pattern {
7293
if ( defined( $rOpts->{'brace-left-and-indent-list'} )
7294
&& $rOpts->{'brace-left-and-indent-list'} )
7296
$bli_list_string = $rOpts->{'brace-left-and-indent-list'};
7299
$bli_pattern = make_block_pattern( '-blil', $bli_list_string );
7302
sub make_block_brace_vertical_tightness_pattern {
7304
# turn any input list into a regex for recognizing selected block types
7305
$block_brace_vertical_tightness_pattern =
7306
'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7308
if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
7309
&& $rOpts->{'block-brace-vertical-tightness-list'} )
7311
$block_brace_vertical_tightness_pattern =
7312
make_block_pattern( '-bbvtl',
7313
$rOpts->{'block-brace-vertical-tightness-list'} );
7317
sub make_block_pattern {
7319
# given a string of block-type keywords, return a regex to match them
7320
# The only tricky part is that labels are indicated with a single ':'
7321
# and the 'sub' token text may have additional text after it (name of
7326
# input string: "if else elsif unless while for foreach do : sub";
7327
# pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
7329
my ( $abbrev, $string ) = @_;
7330
my @list = split_words($string);
7336
if ( $i eq 'sub' ) {
7338
elsif ( $i eq ':' ) {
7339
push @words, '\w+:';
7341
elsif ( $i =~ /^\w/ ) {
7345
warn "unrecognized block type $i after $abbrev, ignoring\n";
7348
my $pattern = '(' . join( '|', @words ) . ')$';
7349
if ( $seen{'sub'} ) {
7350
$pattern = '(' . $pattern . '|sub)';
7352
$pattern = '^' . $pattern;
7356
sub make_static_side_comment_pattern {
7358
# create the pattern used to identify static side comments
7359
$static_side_comment_pattern = '^##';
7361
# allow the user to change it
7362
if ( $rOpts->{'static-side-comment-prefix'} ) {
7363
my $prefix = $rOpts->{'static-side-comment-prefix'};
7364
$prefix =~ s/^\s*//;
7365
my $pattern = '^' . $prefix;
7366
eval "'##'=~/$pattern/";
7369
"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";
7371
$static_side_comment_pattern = $pattern;
7375
sub make_closing_side_comment_prefix {
7377
# Be sure we have a valid closing side comment prefix
7378
my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
7379
my $csc_prefix_pattern;
7380
if ( !defined($csc_prefix) ) {
7381
$csc_prefix = '## end';
7382
$csc_prefix_pattern = '^##\s+end';
7385
my $test_csc_prefix = $csc_prefix;
7386
if ( $test_csc_prefix !~ /^#/ ) {
7387
$test_csc_prefix = '#' . $test_csc_prefix;
7390
# make a regex to recognize the prefix
7391
my $test_csc_prefix_pattern = $test_csc_prefix;
7393
# escape any special characters
7394
$test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
7396
$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
7398
# allow exact number of intermediate spaces to vary
7399
$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
7401
# make sure we have a good pattern
7402
# if we fail this we probably have an error in escaping
7404
eval "'##'=~/$test_csc_prefix_pattern/";
7407
# shouldn't happen..must have screwed up escaping, above
7408
report_definite_bug();
7410
"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";
7412
# just warn and keep going with defaults
7413
warn "Please consider using a simpler -cscp prefix\n";
7414
warn "Using default -cscp instead; please check output\n";
7417
$csc_prefix = $test_csc_prefix;
7418
$csc_prefix_pattern = $test_csc_prefix_pattern;
7421
$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
7422
$closing_side_comment_prefix_pattern = $csc_prefix_pattern;
7425
sub dump_want_left_space {
7429
These values are the main control of whitespace to the left of a token type;
7430
They may be altered with the -wls parameter.
7431
For a list of token types, use perltidy --dump-token-types (-dtt)
7432
1 means the token wants a space to its left
7433
-1 means the token does not want a space to its left
7434
------------------------------------------------------------------------
7436
foreach ( sort keys %want_left_space ) {
7437
print $fh "$_\t$want_left_space{$_}\n";
7441
sub dump_want_right_space {
7445
These values are the main control of whitespace to the right of a token type;
7446
They may be altered with the -wrs parameter.
7447
For a list of token types, use perltidy --dump-token-types (-dtt)
7448
1 means the token wants a space to its right
7449
-1 means the token does not want a space to its right
7450
------------------------------------------------------------------------
7452
foreach ( sort keys %want_right_space ) {
7453
print $fh "$_\t$want_right_space{$_}\n";
7457
{ # begin is_essential_whitespace
7459
my %is_sort_grep_map;
7464
@_ = qw(sort grep map);
7465
@is_sort_grep_map{@_} = (1) x scalar(@_);
7467
@_ = qw(for foreach);
7468
@is_for_foreach{@_} = (1) x scalar(@_);
7472
sub is_essential_whitespace {
7474
# Essential whitespace means whitespace which cannot be safely deleted
7475
# without risking the introduction of a syntax error.
7476
# We are given three tokens and their types:
7477
# ($tokenl, $typel) is the token to the left of the space in question
7478
# ($tokenr, $typer) is the token to the right of the space in question
7479
# ($tokenll, $typell) is previous nonblank token to the left of $tokenl
7481
# This is a slow routine but is not needed too often except when -mangle
7484
# Note: This routine should almost never need to be changed. It is
7485
# for avoiding syntax problems rather than for formatting.
7486
my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
7490
# never combine two bare words or numbers
7491
# examples: and ::ok(1)
7493
# for bla::bla:: abc
7494
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7495
# $input eq"quit" to make $inputeq"quit"
7496
# my $size=-s::SINK if $file; <==OK but we won't do it
7497
# don't join something like: for bla::bla:: abc
7498
# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
7499
( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
7501
# do not combine a number with a concatination dot
7502
# example: pom.caputo:
7503
# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
7504
|| ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )
7505
|| ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )
7507
# do not join a minus with a bare word, because you might form
7508
# a file test operator. Example from Complex.pm:
7509
# if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.
7510
|| ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )
7512
# and something like this could become ambiguous without space
7514
# use constant III=>1;
7518
|| ( ( $tokenl eq '-' )
7519
&& ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )
7521
# '= -' should not become =- or you will get a warning
7523
# || ($tokenr eq '-')
7525
# keep a space between a quote and a bareword to prevent the
7526
# bareword from becomming a quote modifier.
7527
|| ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7529
# keep a space between a token ending in '$' and any word;
7530
# this caused trouble: "die @$ if $@"
7531
|| ( ( $typel eq 'i' && $tokenl =~ /\$$/ )
7532
&& ( $tokenr =~ /^[a-zA-Z_]/ ) )
7534
# perl is very fussy about spaces before <<
7535
|| ( $tokenr =~ /^\<\</ )
7537
# avoid combining tokens to create new meanings. Example:
7538
# $a+ +$b must not become $a++$b
7539
|| ( $is_digraph{ $tokenl . $tokenr } )
7540
|| ( $is_trigraph{ $tokenl . $tokenr } )
7542
# another example: do not combine these two &'s:
7543
# allow_options & &OPT_EXECCGI
7544
|| ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )
7546
# don't combine $$ or $# with any alphanumeric
7547
# (testfile mangle.t with --mangle)
7548
|| ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )
7550
# retain any space after possible filehandle
7551
# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
7552
|| ( $typel eq 'Z' )
7554
# Perl is sensitive to whitespace after the + here:
7555
# $b = xvals $a + 0.1 * yvals $a;
7556
|| ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )
7558
# keep paren separate in 'use Foo::Bar ()'
7562
&& $tokenll eq 'use' )
7564
# keep any space between filehandle and paren:
7565
# file mangle.t with --mangle:
7566
|| ( $typel eq 'Y' && $tokenr eq '(' )
7568
# retain any space after here doc operator ( hereerr.t)
7569
|| ( $typel eq 'h' )
7571
# be careful with a space around ++ and --, to avoid ambiguity as to
7572
# which token it applies
7573
|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )
7574
|| ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )
7576
# need space after foreach my; for example, this will fail in
7577
# older versions of Perl:
7578
# foreach my$ft(@filetypes)...
7583
&& $is_for_foreach{$tokenll}
7587
# must have space between grep and left paren; "grep(" will fail
7588
|| ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )
7590
# don't stick numbers next to left parens, as in:
7591
#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
7592
|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )
7594
# We must be sure that a space between a ? and a quoted string
7595
# remains if the space before the ? remains. [Loca.pm, lockarea]
7597
# $b=join $comma ? ',' : ':', @_; # ok
7598
# $b=join $comma?',' : ':', @_; # ok!
7599
# $b=join $comma ?',' : ':', @_; # error!
7600
# Not really required:
7601
## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
7603
# do not remove space between an '&' and a bare word because
7604
# it may turn into a function evaluation, like here
7605
# between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
7606
# $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
7607
|| ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )
7609
; # the value of this long logic sequence is the result we want
7614
sub set_white_space_flag {
7616
# This routine examines each pair of nonblank tokens and
7617
# sets values for array @white_space_flag.
7619
# $white_space_flag[$j] is a flag indicating whether a white space
7620
# BEFORE token $j is needed, with the following values:
7622
# -1 do not want a space before token $j
7623
# 0 optional space or $j is a whitespace
7624
# 1 want a space before token $j
7627
# The values for the first token will be defined based
7628
# upon the contents of the "to_go" output array.
7630
# Note: retain debug print statements because they are usually
7631
# required after adding new token types.
7635
# initialize these global hashes, which control the use of
7636
# whitespace around tokens:
7641
# %space_after_keyword
7643
# Many token types are identical to the tokens themselves.
7644
# See the tokenizer for a complete list. Here are some special types:
7646
# f = semicolon in for statement
7649
# Note that :: is excluded since it should be contained in an identifier
7650
# Note that '->' is excluded because it never gets space
7651
# parentheses and brackets are excluded since they are handled specially
7652
# curly braces are included but may be overridden by logic, such as
7655
# NEW_TOKENS: create a whitespace rule here. This can be as
7656
# simple as adding your new letter to @spaces_both_sides, for
7660
@is_opening_type{@_} = (1) x scalar(@_);
7663
@is_closing_type{@_} = (1) x scalar(@_);
7665
my @spaces_both_sides = qw"
7666
+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
7667
.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
7668
&&= ||= //= <=> A k f w F n C Y U G v
7671
my @spaces_left_side = qw"
7672
t ! ~ m p { \ h pp mm Z j
7674
push( @spaces_left_side, '#' ); # avoids warning message
7676
my @spaces_right_side = qw"
7677
; } ) ] R J ++ -- **=
7679
push( @spaces_right_side, ',' ); # avoids warning message
7680
@want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);
7681
@want_right_space{@spaces_both_sides} =
7682
(1) x scalar(@spaces_both_sides);
7683
@want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);
7684
@want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);
7685
@want_left_space{@spaces_right_side} =
7686
(-1) x scalar(@spaces_right_side);
7687
@want_right_space{@spaces_right_side} =
7688
(1) x scalar(@spaces_right_side);
7689
$want_left_space{'L'} = WS_NO;
7690
$want_left_space{'->'} = WS_NO;
7691
$want_right_space{'->'} = WS_NO;
7692
$want_left_space{'**'} = WS_NO;
7693
$want_right_space{'**'} = WS_NO;
7695
# hash type information must stay tightly bound
7697
$binary_ws_rules{'i'}{'L'} = WS_NO;
7698
$binary_ws_rules{'i'}{'{'} = WS_YES;
7699
$binary_ws_rules{'k'}{'{'} = WS_YES;
7700
$binary_ws_rules{'U'}{'{'} = WS_YES;
7701
$binary_ws_rules{'i'}{'['} = WS_NO;
7702
$binary_ws_rules{'R'}{'L'} = WS_NO;
7703
$binary_ws_rules{'R'}{'{'} = WS_NO;
7704
$binary_ws_rules{'t'}{'L'} = WS_NO;
7705
$binary_ws_rules{'t'}{'{'} = WS_NO;
7706
$binary_ws_rules{'}'}{'L'} = WS_NO;
7707
$binary_ws_rules{'}'}{'{'} = WS_NO;
7708
$binary_ws_rules{'$'}{'L'} = WS_NO;
7709
$binary_ws_rules{'$'}{'{'} = WS_NO;
7710
$binary_ws_rules{'@'}{'L'} = WS_NO;
7711
$binary_ws_rules{'@'}{'{'} = WS_NO;
7712
$binary_ws_rules{'='}{'L'} = WS_YES;
7714
# the following includes ') {'
7715
# as in : if ( xxx ) { yyy }
7716
$binary_ws_rules{']'}{'L'} = WS_NO;
7717
$binary_ws_rules{']'}{'{'} = WS_NO;
7718
$binary_ws_rules{')'}{'{'} = WS_YES;
7719
$binary_ws_rules{')'}{'['} = WS_NO;
7720
$binary_ws_rules{']'}{'['} = WS_NO;
7721
$binary_ws_rules{']'}{'{'} = WS_NO;
7722
$binary_ws_rules{'}'}{'['} = WS_NO;
7723
$binary_ws_rules{'R'}{'['} = WS_NO;
7725
$binary_ws_rules{']'}{'++'} = WS_NO;
7726
$binary_ws_rules{']'}{'--'} = WS_NO;
7727
$binary_ws_rules{')'}{'++'} = WS_NO;
7728
$binary_ws_rules{')'}{'--'} = WS_NO;
7730
$binary_ws_rules{'R'}{'++'} = WS_NO;
7731
$binary_ws_rules{'R'}{'--'} = WS_NO;
7733
########################################################
7734
# should no longer be necessary (see niek.pl)
7735
##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label
7736
##$binary_ws_rules{'w'}{':'} = WS_NO;
7737
########################################################
7738
$binary_ws_rules{'i'}{'Q'} = WS_YES;
7739
$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'
7741
# FIXME: we need to split 'i' into variables and functions
7742
# and have no space for functions but space for variables. For now,
7743
# I have a special patch in the special rules below
7744
$binary_ws_rules{'i'}{'('} = WS_NO;
7746
$binary_ws_rules{'w'}{'('} = WS_NO;
7747
$binary_ws_rules{'w'}{'{'} = WS_YES;
7749
my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;
7750
my ( $last_token, $last_type, $last_block_type, $token, $type,
7752
my (@white_space_flag);
7753
my $j_tight_closing_paren = -1;
7755
if ( $max_index_to_go >= 0 ) {
7756
$token = $tokens_to_go[$max_index_to_go];
7757
$type = $types_to_go[$max_index_to_go];
7758
$block_type = $block_type_to_go[$max_index_to_go];
7766
# loop over all tokens
7769
for ( $j = 0 ; $j <= $jmax ; $j++ ) {
7771
if ( $$rtoken_type[$j] eq 'b' ) {
7772
$white_space_flag[$j] = WS_OPTIONAL;
7776
# set a default value, to be changed as needed
7778
$last_token = $token;
7780
$last_block_type = $block_type;
7781
$token = $$rtokens[$j];
7782
$type = $$rtoken_type[$j];
7783
$block_type = $$rblock_type[$j];
7785
#---------------------------------------------------------------
7787
# handle space on the inside of opening braces
7788
#---------------------------------------------------------------
7791
if ( $is_opening_type{$last_type} ) {
7793
$j_tight_closing_paren = -1;
7795
# let's keep empty matched braces together: () {} []
7797
if ( $token eq $matching_token{$last_token} ) {
7807
# we're considering the right of an opening brace
7808
# tightness = 0 means always pad inside with space
7809
# tightness = 1 means pad inside if "complex"
7810
# tightness = 2 means never pad inside with space
7813
if ( $last_type eq '{'
7814
&& $last_token eq '{'
7815
&& $last_block_type )
7817
$tightness = $rOpts_block_brace_tightness;
7819
else { $tightness = $tightness{$last_token} }
7821
if ( $tightness <= 0 ) {
7824
elsif ( $tightness > 1 ) {
7829
# Patch to count '-foo' as single token so that
7830
# each of $a{-foo} and $a{foo} and $a{'foo'} do
7831
# not get spaces with default formatting.
7835
&& $last_token eq '{'
7836
&& $$rtoken_type[ $j + 1 ] eq 'w' );
7838
# $j_next is where a closing token should be if
7839
# the container has a single token
7841
( $$rtoken_type[ $j_here + 1 ] eq 'b' )
7844
my $tok_next = $$rtokens[$j_next];
7845
my $type_next = $$rtoken_type[$j_next];
7847
# for tightness = 1, if there is just one token
7848
# within the matching pair, we will keep it tight
7850
$tok_next eq $matching_token{$last_token}
7852
# but watch out for this: [ [ ] (misc.t)
7853
&& $last_token ne $token
7857
# remember where to put the space for the closing paren
7858
$j_tight_closing_paren = $j_next;
7866
} # done with opening braces and brackets
7868
if FORMATTER_DEBUG_FLAG_WHITE;
7870
#---------------------------------------------------------------
7872
# handle space on inside of closing brace pairs
7873
#---------------------------------------------------------------
7876
if ( $is_closing_type{$type} ) {
7878
if ( $j == $j_tight_closing_paren ) {
7880
$j_tight_closing_paren = -1;
7885
if ( !defined($ws) ) {
7888
if ( $type eq '}' && $token eq '}' && $block_type ) {
7889
$tightness = $rOpts_block_brace_tightness;
7891
else { $tightness = $tightness{$token} }
7893
$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
7899
if FORMATTER_DEBUG_FLAG_WHITE;
7901
#---------------------------------------------------------------
7903
# use the binary table
7904
#---------------------------------------------------------------
7905
if ( !defined($ws) ) {
7906
$ws = $binary_ws_rules{$last_type}{$type};
7909
if FORMATTER_DEBUG_FLAG_WHITE;
7911
#---------------------------------------------------------------
7913
# some special cases
7914
#---------------------------------------------------------------
7915
if ( $token eq '(' ) {
7917
# This will have to be tweaked as tokenization changes.
7918
# We usually want a space at '} (', for example:
7919
# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
7922
# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
7923
# At present, the above & block is marked as type L/R so this case
7924
# won't go through here.
7925
if ( $last_type eq '}' ) { $ws = WS_YES }
7927
# NOTE: some older versions of Perl had occasional problems if
7928
# spaces are introduced between keywords or functions and opening
7929
# parens. So the default is not to do this except is certain
7930
# cases. The current Perl seems to tolerate spaces.
7932
# Space between keyword and '('
7933
elsif ( $last_type eq 'k' ) {
7935
unless ( $rOpts_space_keyword_paren
7936
|| $space_after_keyword{$last_token} );
7939
# Space between function and '('
7940
# -----------------------------------------------------
7941
# 'w' and 'i' checks for something like:
7942
# myfun( &myfun( ->myfun(
7943
# -----------------------------------------------------
7944
elsif (( $last_type =~ /^[wU]$/ )
7945
|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
7947
$ws = WS_NO unless ($rOpts_space_function_paren);
7950
# space between something like $i and ( in
7951
# for $i ( 0 .. 20 ) {
7952
# FIXME: eventually, type 'i' needs to be split into multiple
7953
# token types so this can be a hardwired rule.
7954
elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
7958
# allow constant function followed by '()' to retain no space
7959
elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {
7964
# patch for SWITCH/CASE: make space at ']{' optional
7965
# since the '{' might begin a case or when block
7966
elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
7970
# keep space between 'sub' and '{' for anonymous sub definition
7971
if ( $type eq '{' ) {
7972
if ( $last_token eq 'sub' ) {
7976
# this is needed to avoid no space in '){'
7977
if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
7979
# avoid any space before the brace or bracket in something like
7980
# @opts{'a','b',...}
7981
if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
7986
elsif ( $type eq 'i' ) {
7988
# never a space before ->
7989
if ( $token =~ /^\-\>/ ) {
7994
# retain any space between '-' and bare word
7995
elsif ( $type eq 'w' || $type eq 'C' ) {
7996
$ws = WS_OPTIONAL if $last_type eq '-';
7998
# never a space before ->
7999
if ( $token =~ /^\-\>/ ) {
8004
# retain any space between '-' and bare word
8005
# example: avoid space between 'USER' and '-' here:
8006
# $myhash{USER-NAME}='steve';
8007
elsif ( $type eq 'm' || $type eq '-' ) {
8008
$ws = WS_OPTIONAL if ( $last_type eq 'w' );
8011
# always space before side comment
8012
elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
8014
# always preserver whatever space was used after a possible
8015
# filehandle (except _) or here doc operator
8018
&& ( ( $last_type eq 'Z' && $last_token ne '_' )
8019
|| $last_type eq 'h' )
8026
if FORMATTER_DEBUG_FLAG_WHITE;
8028
#---------------------------------------------------------------
8030
# default rules not covered above
8031
#---------------------------------------------------------------
8032
# if we fall through to here,
8033
# look at the pre-defined hash tables for the two tokens, and
8034
# if (they are equal) use the common value
8035
# if (either is zero or undef) use the other
8036
# if (either is -1) use it
8050
if ( !defined($ws) ) {
8051
my $wl = $want_left_space{$type};
8052
my $wr = $want_right_space{$last_type};
8053
if ( !defined($wl) ) { $wl = 0 }
8054
if ( !defined($wr) ) { $wr = 0 }
8055
$ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
8058
if ( !defined($ws) ) {
8061
"WS flag is undefined for tokens $last_token $token\n");
8064
# Treat newline as a whitespace. Otherwise, we might combine
8065
# 'Send' and '-recipients' here according to the above rules:
8066
# my $msg = new Fax::Send
8067
# -recipients => $to,
8069
if ( $ws == 0 && $j == 0 ) { $ws = 1 }
8074
&& ( $last_type !~ /^[Zh]$/ ) )
8077
# If this happens, we have a non-fatal but undesirable
8078
# hole in the above rules which should be patched.
8080
"WS flag is zero for tokens $last_token $token\n");
8082
$white_space_flag[$j] = $ws;
8084
FORMATTER_DEBUG_FLAG_WHITE && do {
8085
my $str = substr( $last_token, 0, 15 );
8086
$str .= ' ' x ( 16 - length($str) );
8087
if ( !defined($ws_1) ) { $ws_1 = "*" }
8088
if ( !defined($ws_2) ) { $ws_2 = "*" }
8089
if ( !defined($ws_3) ) { $ws_3 = "*" }
8090
if ( !defined($ws_4) ) { $ws_4 = "*" }
8092
"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
8095
return \@white_space_flag;
8098
{ # begin print_line_of_tokens
8105
my $rcontainer_type;
8106
my $rcontainer_environment;
8109
my $rnesting_tokens;
8111
my $rnesting_blocks;
8114
my $python_indentation_level;
8116
# These local token variables are stored by store_token_to_go:
8119
my $container_environment;
8121
my $in_continued_quote;
8124
my $no_internal_newlines;
8130
# routine to pull the jth token from the line of tokens
8133
$token = $$rtokens[$j];
8134
$type = $$rtoken_type[$j];
8135
$block_type = $$rblock_type[$j];
8136
$container_type = $$rcontainer_type[$j];
8137
$container_environment = $$rcontainer_environment[$j];
8138
$type_sequence = $$rtype_sequence[$j];
8139
$level = $$rlevels[$j];
8140
$slevel = $$rslevels[$j];
8141
$nesting_blocks = $$rnesting_blocks[$j];
8142
$ci_level = $$rci_levels[$j];
8148
sub save_current_token {
8151
$block_type, $ci_level,
8152
$container_environment, $container_type,
8153
$in_continued_quote, $level,
8154
$nesting_blocks, $no_internal_newlines,
8156
$type, $type_sequence,
8160
sub restore_current_token {
8162
$block_type, $ci_level,
8163
$container_environment, $container_type,
8164
$in_continued_quote, $level,
8165
$nesting_blocks, $no_internal_newlines,
8167
$type, $type_sequence,
8172
# Routine to place the current token into the output stream.
8173
# Called once per output token.
8174
sub store_token_to_go {
8176
my $flag = $no_internal_newlines;
8177
if ( $_[0] ) { $flag = 1 }
8179
$tokens_to_go[ ++$max_index_to_go ] = $token;
8180
$types_to_go[$max_index_to_go] = $type;
8181
$nobreak_to_go[$max_index_to_go] = $flag;
8182
$old_breakpoint_to_go[$max_index_to_go] = 0;
8183
$forced_breakpoint_to_go[$max_index_to_go] = 0;
8184
$block_type_to_go[$max_index_to_go] = $block_type;
8185
$type_sequence_to_go[$max_index_to_go] = $type_sequence;
8186
$container_environment_to_go[$max_index_to_go] = $container_environment;
8187
$nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;
8188
$ci_levels_to_go[$max_index_to_go] = $ci_level;
8189
$mate_index_to_go[$max_index_to_go] = -1;
8190
$matching_token_to_go[$max_index_to_go] = '';
8191
$bond_strength_to_go[$max_index_to_go] = 0;
8193
# Note: negative levels are currently retained as a diagnostic so that
8194
# the 'final indentation level' is correctly reported for bad scripts.
8195
# But this means that every use of $level as an index must be checked.
8196
# If this becomes too much of a problem, we might give up and just clip
8198
## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;
8199
$levels_to_go[$max_index_to_go] = $level;
8200
$nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;
8201
$lengths_to_go[ $max_index_to_go + 1 ] =
8202
$lengths_to_go[$max_index_to_go] + length($token);
8204
# Define the indentation that this token would have if it started
8205
# a new line. We have to do this now because we need to know this
8206
# when considering one-line blocks.
8207
set_leading_whitespace( $level, $ci_level, $in_continued_quote );
8209
if ( $type ne 'b' ) {
8210
$last_last_nonblank_index_to_go = $last_nonblank_index_to_go;
8211
$last_last_nonblank_type_to_go = $last_nonblank_type_to_go;
8212
$last_last_nonblank_token_to_go = $last_nonblank_token_to_go;
8213
$last_nonblank_index_to_go = $max_index_to_go;
8214
$last_nonblank_type_to_go = $type;
8215
$last_nonblank_token_to_go = $token;
8216
if ( $type eq ',' ) {
8217
$comma_count_in_batch++;
8221
FORMATTER_DEBUG_FLAG_STORE && do {
8222
my ( $a, $b, $c ) = caller();
8224
"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";
8228
sub insert_new_token_to_go {
8230
# insert a new token into the output stream. use same level as
8231
# previous token; assumes a character at max_index_to_go.
8232
save_current_token();
8233
( $token, $type, $slevel, $no_internal_newlines ) = @_;
8235
if ( $max_index_to_go == UNDEFINED_INDEX ) {
8236
warning("code bug: bad call to insert_new_token_to_go\n");
8238
$level = $levels_to_go[$max_index_to_go];
8240
# FIXME: it seems to be necessary to use the next, rather than
8241
# previous, value of this variable when creating a new blank (align.t)
8242
#my $slevel = $nesting_depth_to_go[$max_index_to_go];
8243
$nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
8244
$ci_level = $ci_levels_to_go[$max_index_to_go];
8245
$container_environment = $container_environment_to_go[$max_index_to_go];
8246
$in_continued_quote = 0;
8248
$type_sequence = "";
8249
store_token_to_go();
8250
restore_current_token();
8254
sub print_line_of_tokens {
8256
my $line_of_tokens = shift;
8258
# This routine is called once per input line to process all of
8259
# the tokens on that line. This is the first stage of
8262
# Full-line comments and blank lines may be processed immediately.
8264
# For normal lines of code, the tokens are stored one-by-one,
8265
# via calls to 'sub store_token_to_go', until a known line break
8266
# point is reached. Then, the batch of collected tokens is
8267
# passed along to 'sub output_line_to_go' for further
8268
# processing. This routine decides if there should be
8269
# whitespace between each pair of non-white tokens, so later
8270
# routines only need to decide on any additional line breaks.
8271
# Any whitespace is initally a single space character. Later,
8272
# the vertical aligner may expand that to be multiple space
8273
# characters if necessary for alignment.
8275
# extract input line number for error messages
8276
$input_line_number = $line_of_tokens->{_line_number};
8278
$rtoken_type = $line_of_tokens->{_rtoken_type};
8279
$rtokens = $line_of_tokens->{_rtokens};
8280
$rlevels = $line_of_tokens->{_rlevels};
8281
$rslevels = $line_of_tokens->{_rslevels};
8282
$rblock_type = $line_of_tokens->{_rblock_type};
8283
$rcontainer_type = $line_of_tokens->{_rcontainer_type};
8284
$rcontainer_environment = $line_of_tokens->{_rcontainer_environment};
8285
$rtype_sequence = $line_of_tokens->{_rtype_sequence};
8286
$input_line = $line_of_tokens->{_line_text};
8287
$rnesting_tokens = $line_of_tokens->{_rnesting_tokens};
8288
$rci_levels = $line_of_tokens->{_rci_levels};
8289
$rnesting_blocks = $line_of_tokens->{_rnesting_blocks};
8291
$in_continued_quote = $starting_in_quote =
8292
$line_of_tokens->{_starting_in_quote};
8293
$in_quote = $line_of_tokens->{_ending_in_quote};
8294
$ending_in_quote = $in_quote;
8295
$python_indentation_level =
8296
$line_of_tokens->{_python_indentation_level};
8301
my $next_nonblank_token;
8302
my $next_nonblank_token_type;
8303
my $rwhite_space_flag;
8305
$jmax = @$rtokens - 1;
8307
$container_type = "";
8308
$container_environment = "";
8309
$type_sequence = "";
8310
$no_internal_newlines = 1 - $rOpts_add_newlines;
8311
$is_static_block_comment = 0;
8313
# Handle a continued quote..
8314
if ($in_continued_quote) {
8316
# A line which is entirely a quote or pattern must go out
8317
# verbatim. Note: the \n is contained in $input_line.
8319
if ( ( $input_line =~ "\t" ) ) {
8320
note_embedded_tab();
8322
write_unindented_line("$input_line");
8323
$last_line_had_side_comment = 0;
8327
# prior to version 20010406, perltidy had a bug which placed
8328
# continuation indentation before the last line of some multiline
8329
# quotes and patterns -- exactly the lines passing this way.
8330
# To help find affected lines in scripts run with these
8331
# versions, run with '-chk', and it will warn of any quotes or
8332
# patterns which might have been modified by these early
8334
if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {
8336
"-chk: please check this line for extra leading whitespace\n"
8341
# Write line verbatim if we are in a formatting skip section
8342
if ($in_format_skipping_section) {
8343
write_unindented_line("$input_line");
8344
$last_line_had_side_comment = 0;
8346
# Note: extra space appended to comment simplifies pattern matching
8348
&& $$rtoken_type[0] eq '#'
8349
&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )
8351
$in_format_skipping_section = 0;
8352
write_logfile_entry("Exiting formatting skip section\n");
8357
# See if we are entering a formatting skip section
8358
if ( $rOpts_format_skipping
8360
&& $$rtoken_type[0] eq '#'
8361
&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )
8364
$in_format_skipping_section = 1;
8365
write_logfile_entry("Entering formatting skip section\n");
8366
write_unindented_line("$input_line");
8367
$last_line_had_side_comment = 0;
8371
# delete trailing blank tokens
8372
if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }
8374
# Handle a blank line..
8377
# For the 'swallow-optional-blank-lines' option, we delete all
8378
# old blank lines and let the blank line rules generate any
8380
if ( !$rOpts_swallow_optional_blank_lines ) {
8382
$file_writer_object->write_blank_code_line();
8383
$last_line_leading_type = 'b';
8385
$last_line_had_side_comment = 0;
8389
# see if this is a static block comment (starts with ## by default)
8390
my $is_static_block_comment_without_leading_space = 0;
8392
&& $$rtoken_type[0] eq '#'
8393
&& $rOpts->{'static-block-comments'}
8394
&& $input_line =~ /$static_block_comment_pattern/o )
8396
$is_static_block_comment = 1;
8397
$is_static_block_comment_without_leading_space =
8398
substr( $input_line, 0, 1 ) eq '#';
8401
# Check for comments which are line directives
8402
# Treat exactly as static block comments without leading space
8403
# reference: perlsyn, near end, section Plain Old Comments (Not!)
8404
# example: '# line 42 "new_filename.plx"'
8407
&& $$rtoken_type[0] eq '#'
8408
&& $input_line =~ /^\# \s*
8410
(?:\s("?)([^"]+)\2)? \s*
8414
$is_static_block_comment = 1;
8415
$is_static_block_comment_without_leading_space = 1;
8418
# create a hanging side comment if appropriate
8421
&& $$rtoken_type[0] eq '#' # only token is a comment
8422
&& $last_line_had_side_comment # last line had side comment
8423
&& $input_line =~ /^\s/ # there is some leading space
8424
&& !$is_static_block_comment # do not make static comment hanging
8425
&& $rOpts->{'hanging-side-comments'} # user is allowing this
8429
# We will insert an empty qw string at the start of the token list
8430
# to force this comment to be a side comment. The vertical aligner
8431
# should then line it up with the previous side comment.
8432
unshift @$rtoken_type, 'q';
8433
unshift @$rtokens, '';
8434
unshift @$rlevels, $$rlevels[0];
8435
unshift @$rslevels, $$rslevels[0];
8436
unshift @$rblock_type, '';
8437
unshift @$rcontainer_type, '';
8438
unshift @$rcontainer_environment, '';
8439
unshift @$rtype_sequence, '';
8440
unshift @$rnesting_tokens, $$rnesting_tokens[0];
8441
unshift @$rci_levels, $$rci_levels[0];
8442
unshift @$rnesting_blocks, $$rnesting_blocks[0];
8446
# remember if this line has a side comment
8447
$last_line_had_side_comment =
8448
( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );
8450
# Handle a block (full-line) comment..
8451
if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {
8453
if ( $rOpts->{'delete-block-comments'} ) { return }
8455
if ( $rOpts->{'tee-block-comments'} ) {
8456
$file_writer_object->tee_on();
8459
destroy_one_line_block();
8460
output_line_to_go();
8462
# output a blank line before block comments
8464
$last_line_leading_type !~ /^[#b]$/
8465
&& $rOpts->{'blanks-before-comments'} # only if allowed
8467
$is_static_block_comment # never before static block comments
8470
flush(); # switching to new output stream
8471
$file_writer_object->write_blank_code_line();
8472
$last_line_leading_type = 'b';
8475
# TRIM COMMENTS -- This could be turned off as a option
8476
$$rtokens[0] =~ s/\s*$//; # trim right end
8479
$rOpts->{'indent-block-comments'}
8480
&& ( !$rOpts->{'indent-spaced-block-comments'}
8481
|| $input_line =~ /^\s+/ )
8482
&& !$is_static_block_comment_without_leading_space
8486
store_token_to_go();
8487
output_line_to_go();
8490
flush(); # switching to new output stream
8491
$file_writer_object->write_code_line( $$rtokens[0] . "\n" );
8492
$last_line_leading_type = '#';
8494
if ( $rOpts->{'tee-block-comments'} ) {
8495
$file_writer_object->tee_off();
8500
# compare input/output indentation except for continuation lines
8501
# (because they have an unknown amount of initial blank space)
8502
# and lines which are quotes (because they may have been outdented)
8503
# Note: this test is placed here because we know the continuation flag
8504
# at this point, which allows us to avoid non-meaningful checks.
8505
my $structural_indentation_level = $$rlevels[0];
8506
compare_indentation_levels( $python_indentation_level,
8507
$structural_indentation_level )
8508
unless ( $python_indentation_level < 0
8509
|| ( $$rci_levels[0] > 0 )
8510
|| ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )
8513
# Patch needed for MakeMaker. Do not break a statement
8514
# in which $VERSION may be calculated. See MakeMaker.pm;
8515
# this is based on the coding in it.
8516
# The first line of a file that matches this will be eval'd:
8517
# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8519
# *VERSION = \'1.01';
8520
# ( $VERSION ) = '$Revision: 1.73 $ ' =~ /\$Revision:\s+([^\s]+)/;
8521
# We will pass such a line straight through without breaking
8522
# it unless -npvl is used
8524
my $is_VERSION_statement = 0;
8527
!$saw_VERSION_in_this_file
8528
&& $input_line =~ /VERSION/ # quick check to reject most lines
8529
&& $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
8532
$saw_VERSION_in_this_file = 1;
8533
$is_VERSION_statement = 1;
8534
write_logfile_entry("passing VERSION line; -npvl deactivates\n");
8535
$no_internal_newlines = 1;
8538
# take care of indentation-only
8539
# NOTE: In previous versions we sent all qw lines out immediately here.
8540
# No longer doing this: also write a line which is entirely a 'qw' list
8541
# to allow stacking of opening and closing tokens. Note that interior
8542
# qw lines will still go out at the end of this routine.
8543
if ( $rOpts->{'indent-only'} ) {
8548
$token = $input_line;
8551
$container_type = "";
8552
$container_environment = "";
8553
$type_sequence = "";
8554
store_token_to_go();
8555
output_line_to_go();
8559
push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding
8560
push( @$rtoken_type, 'b', 'b' );
8561
($rwhite_space_flag) =
8562
set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );
8564
# find input tabbing to allow checks for tabbing disagreement
8566
##$input_line_tabbing = "";
8567
##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }
8569
# if the buffer hasn't been flushed, add a leading space if
8570
# necessary to keep essential whitespace. This is really only
8571
# necessary if we are squeezing out all ws.
8572
if ( $max_index_to_go >= 0 ) {
8574
$old_line_count_in_batch++;
8577
is_essential_whitespace(
8578
$last_last_nonblank_token,
8579
$last_last_nonblank_type,
8580
$tokens_to_go[$max_index_to_go],
8581
$types_to_go[$max_index_to_go],
8587
my $slevel = $$rslevels[0];
8588
insert_new_token_to_go( ' ', 'b', $slevel,
8589
$no_internal_newlines );
8593
# If we just saw the end of an elsif block, write nag message
8594
# if we do not see another elseif or an else.
8595
if ($looking_for_else) {
8597
unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {
8598
write_logfile_entry("(No else block)\n");
8600
$looking_for_else = 0;
8603
# This is a good place to kill incomplete one-line blocks
8604
if ( ( $semicolons_before_block_self_destruct == 0 )
8605
&& ( $max_index_to_go >= 0 )
8606
&& ( $types_to_go[$max_index_to_go] eq ';' )
8607
&& ( $$rtokens[0] ne '}' ) )
8609
destroy_one_line_block();
8610
output_line_to_go();
8613
# loop to process the tokens one-by-one
8617
foreach $j ( 0 .. $jmax ) {
8619
# pull out the local values for this token
8622
if ( $type eq '#' ) {
8624
# trim trailing whitespace
8625
# (there is no option at present to prevent this)
8629
$rOpts->{'delete-side-comments'}
8631
# delete closing side comments if necessary
8632
|| ( $rOpts->{'delete-closing-side-comments'}
8633
&& $token =~ /$closing_side_comment_prefix_pattern/o
8634
&& $last_nonblank_block_type =~
8635
/$closing_side_comment_list_pattern/o )
8638
if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8639
unstore_token_to_go();
8645
# If we are continuing after seeing a right curly brace, flush
8646
# buffer unless we see what we are looking for, as in
8648
if ( $rbrace_follower && $type ne 'b' ) {
8650
unless ( $rbrace_follower->{$token} ) {
8651
output_line_to_go();
8653
$rbrace_follower = undef;
8656
$j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;
8657
$next_nonblank_token = $$rtokens[$j_next];
8658
$next_nonblank_token_type = $$rtoken_type[$j_next];
8660
#--------------------------------------------------------
8661
# Start of section to patch token text
8662
#--------------------------------------------------------
8664
# Modify certain tokens here for whitespace
8665
# The following is not yet done, but could be:
8667
if ( $type =~ /^[wit]$/ ) {
8670
# change '$ var' to '$var' etc
8671
# '-> new' to '->new'
8672
if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
8676
if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }
8679
# change 'LABEL :' to 'LABEL:'
8680
elsif ( $type eq 'J' ) { $token =~ s/\s+//g }
8682
# patch to add space to something like "x10"
8683
# This avoids having to split this token in the pre-tokenizer
8684
elsif ( $type eq 'n' ) {
8685
if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }
8688
elsif ( $type eq 'Q' ) {
8689
note_embedded_tab() if ( $token =~ "\t" );
8691
# make note of something like '$var = s/xxx/yyy/;'
8692
# in case it should have been '$var =~ s/xxx/yyy/;'
8694
$token =~ /^(s|tr|y|m|\/)/
8695
&& $last_nonblank_token =~ /^(=|==|!=)$/
8697
# precededed by simple scalar
8698
&& $last_last_nonblank_type eq 'i'
8699
&& $last_last_nonblank_token =~ /^\$/
8701
# followed by some kind of termination
8702
# (but give complaint if we can's see far enough ahead)
8703
&& $next_nonblank_token =~ /^[; \)\}]$/
8705
# scalar is not decleared
8707
$types_to_go[0] eq 'k'
8708
&& $tokens_to_go[0] =~ /^(my|our|local)$/
8712
my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
8714
"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"
8719
# trim blanks from right of qw quotes
8720
# (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)
8721
elsif ( $type eq 'q' ) {
8723
note_embedded_tab() if ( $token =~ "\t" );
8726
#--------------------------------------------------------
8727
# End of section to patch token text
8728
#--------------------------------------------------------
8730
# insert any needed whitespace
8731
if ( ( $type ne 'b' )
8732
&& ( $max_index_to_go >= 0 )
8733
&& ( $types_to_go[$max_index_to_go] ne 'b' )
8734
&& $rOpts_add_whitespace )
8736
my $ws = $$rwhite_space_flag[$j];
8739
insert_new_token_to_go( ' ', 'b', $slevel,
8740
$no_internal_newlines );
8744
# Do not allow breaks which would promote a side comment to a
8745
# block comment. In order to allow a break before an opening
8746
# or closing BLOCK, followed by a side comment, those sections
8747
# of code will handle this flag separately.
8748
my $side_comment_follows = ( $next_nonblank_token_type eq '#' );
8749
my $is_opening_BLOCK =
8753
&& $block_type ne 't' );
8754
my $is_closing_BLOCK =
8758
&& $block_type ne 't' );
8760
if ( $side_comment_follows
8761
&& !$is_opening_BLOCK
8762
&& !$is_closing_BLOCK )
8764
$no_internal_newlines = 1;
8767
# We're only going to handle breaking for code BLOCKS at this
8768
# (top) level. Other indentation breaks will be handled by
8769
# sub scan_list, which is better suited to dealing with them.
8770
if ($is_opening_BLOCK) {
8772
# Tentatively output this token. This is required before
8773
# calling starting_one_line_block. We may have to unstore
8774
# it, though, if we have to break before it.
8775
store_token_to_go($side_comment_follows);
8777
# Look ahead to see if we might form a one-line block
8779
starting_one_line_block( $j, $jmax, $level, $slevel,
8780
$ci_level, $rtokens, $rtoken_type, $rblock_type );
8781
clear_breakpoint_undo_stack();
8783
# to simplify the logic below, set a flag to indicate if
8784
# this opening brace is far from the keyword which introduces it
8785
my $keyword_on_same_line = 1;
8786
if ( ( $max_index_to_go >= 0 )
8787
&& ( $last_nonblank_type eq ')' ) )
8789
if ( $block_type =~ /^(if|else|elsif)$/
8790
&& ( $tokens_to_go[0] eq '}' )
8791
&& $rOpts_cuddled_else )
8793
$keyword_on_same_line = 1;
8795
elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )
8797
$keyword_on_same_line = 0;
8801
# decide if user requested break before '{'
8804
# use -bl flag if not a sub block of any type
8805
$block_type !~ /^sub/
8806
? $rOpts->{'opening-brace-on-new-line'}
8808
# use -sbl flag unless this is an anonymous sub block
8809
: $block_type !~ /^sub\W*$/
8810
? $rOpts->{'opening-sub-brace-on-new-line'}
8812
# do not break for anonymous subs
8815
# Break before an opening '{' ...
8821
# and we were unable to start looking for a block,
8822
&& $index_start_one_line_block == UNDEFINED_INDEX
8824
# or if it will not be on same line as its keyword, so that
8825
# it will be outdented (eval.t, overload.t), and the user
8826
# has not insisted on keeping it on the right
8827
|| ( !$keyword_on_same_line
8828
&& !$rOpts->{'opening-brace-always-on-right'} )
8833
# but only if allowed
8834
unless ($no_internal_newlines) {
8836
# since we already stored this token, we must unstore it
8837
unstore_token_to_go();
8839
# then output the line
8840
output_line_to_go();
8842
# and now store this token at the start of a new line
8843
store_token_to_go($side_comment_follows);
8847
# Now update for side comment
8848
if ($side_comment_follows) { $no_internal_newlines = 1 }
8850
# now output this line
8851
unless ($no_internal_newlines) {
8852
output_line_to_go();
8856
elsif ($is_closing_BLOCK) {
8858
# If there is a pending one-line block ..
8859
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8861
# we have to terminate it if..
8864
# it is too long (final length may be different from
8865
# initial estimate). note: must allow 1 space for this token
8866
excess_line_length( $index_start_one_line_block,
8867
$max_index_to_go ) >= 0
8869
# or if it has too many semicolons
8870
|| ( $semicolons_before_block_self_destruct == 0
8871
&& $last_nonblank_type ne ';' )
8874
destroy_one_line_block();
8878
# put a break before this closing curly brace if appropriate
8879
unless ( $no_internal_newlines
8880
|| $index_start_one_line_block != UNDEFINED_INDEX )
8883
# add missing semicolon if ...
8884
# there are some tokens
8886
( $max_index_to_go > 0 )
8888
# and we don't have one
8889
&& ( $last_nonblank_type ne ';' )
8891
# patch until some block type issues are fixed:
8892
# Do not add semi-colon for block types '{',
8893
# '}', and ';' because we cannot be sure yet
8894
# that this is a block and not an anonomyous
8895
# hash (blktype.t, blktype1.t)
8896
&& ( $block_type !~ /^[\{\};]$/ )
8898
# it seems best not to add semicolons in these
8899
# special block types: sort|map|grep
8900
&& ( !$is_sort_map_grep{$block_type} )
8902
# and we are allowed to do so.
8903
&& $rOpts->{'add-semicolons'}
8907
save_current_token();
8910
$level = $levels_to_go[$max_index_to_go];
8911
$slevel = $nesting_depth_to_go[$max_index_to_go];
8913
$nesting_blocks_to_go[$max_index_to_go];
8914
$ci_level = $ci_levels_to_go[$max_index_to_go];
8916
$container_type = "";
8917
$container_environment = "";
8918
$type_sequence = "";
8920
# Note - we remove any blank AFTER extracting its
8921
# parameters such as level, etc, above
8922
if ( $types_to_go[$max_index_to_go] eq 'b' ) {
8923
unstore_token_to_go();
8925
store_token_to_go();
8927
note_added_semicolon();
8928
restore_current_token();
8931
# then write out everything before this closing curly brace
8932
output_line_to_go();
8936
# Now update for side comment
8937
if ($side_comment_follows) { $no_internal_newlines = 1 }
8939
# store the closing curly brace
8940
store_token_to_go();
8942
# ok, we just stored a closing curly brace. Often, but
8943
# not always, we want to end the line immediately.
8944
# So now we have to check for special cases.
8946
# if this '}' successfully ends a one-line block..
8947
my $is_one_line_block = 0;
8949
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
8951
# Remember the type of token just before the
8952
# opening brace. It would be more general to use
8953
# a stack, but this will work for one-line blocks.
8954
$is_one_line_block =
8955
$types_to_go[$index_start_one_line_block];
8957
# we have to actually make it by removing tentative
8958
# breaks that were set within it
8959
undo_forced_breakpoint_stack(0);
8960
set_nobreaks( $index_start_one_line_block,
8961
$max_index_to_go - 1 );
8963
# then re-initialize for the next one-line block
8964
destroy_one_line_block();
8966
# then decide if we want to break after the '}' ..
8967
# We will keep going to allow certain brace followers as in:
8968
# do { $ifclosed = 1; last } unless $losing;
8970
# But make a line break if the curly ends a
8971
# significant block:
8973
$is_block_without_semicolon{$block_type}
8975
# if needless semicolon follows we handle it later
8976
&& $next_nonblank_token ne ';'
8979
output_line_to_go() unless ($no_internal_newlines);
8983
# set string indicating what we need to look for brace follower
8985
if ( $block_type eq 'do' ) {
8986
$rbrace_follower = \%is_do_follower;
8988
elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {
8989
$rbrace_follower = \%is_if_brace_follower;
8991
elsif ( $block_type eq 'else' ) {
8992
$rbrace_follower = \%is_else_brace_follower;
8995
# added eval for borris.t
8996
elsif ($is_sort_map_grep_eval{$block_type}
8997
|| $is_one_line_block eq 'G' )
8999
$rbrace_follower = undef;
9004
elsif ( $block_type =~ /^sub\W*$/ ) {
9006
if ($is_one_line_block) {
9007
$rbrace_follower = \%is_anon_sub_1_brace_follower;
9010
$rbrace_follower = \%is_anon_sub_brace_follower;
9014
# None of the above: specify what can follow a closing
9015
# brace of a block which is not an
9016
# if/elsif/else/do/sort/map/grep/eval
9018
# 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t
9020
$rbrace_follower = \%is_other_brace_follower;
9023
# See if an elsif block is followed by another elsif or else;
9025
if ( $block_type eq 'elsif' ) {
9027
if ( $next_nonblank_token_type eq 'b' ) { # end of line?
9028
$looking_for_else = 1; # ok, check on next line
9032
unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {
9033
write_logfile_entry("No else block :(\n");
9038
# keep going after certain block types (map,sort,grep,eval)
9039
# added eval for borris.t
9045
# if no more tokens, postpone decision until re-entring
9046
elsif ( ( $next_nonblank_token_type eq 'b' )
9047
&& $rOpts_add_newlines )
9049
unless ($rbrace_follower) {
9050
output_line_to_go() unless ($no_internal_newlines);
9054
elsif ($rbrace_follower) {
9056
unless ( $rbrace_follower->{$next_nonblank_token} ) {
9057
output_line_to_go() unless ($no_internal_newlines);
9059
$rbrace_follower = undef;
9063
output_line_to_go() unless ($no_internal_newlines);
9066
} # end treatment of closing block token
9069
elsif ( $type eq ';' ) {
9071
# kill one-line blocks with too many semicolons
9072
$semicolons_before_block_self_destruct--;
9074
( $semicolons_before_block_self_destruct < 0 )
9075
|| ( $semicolons_before_block_self_destruct == 0
9076
&& $next_nonblank_token_type !~ /^[b\}]$/ )
9079
destroy_one_line_block();
9082
# Remove unnecessary semicolons, but not after bare
9083
# blocks, where it could be unsafe if the brace is
9087
$last_nonblank_token eq '}'
9089
$is_block_without_semicolon{
9090
$last_nonblank_block_type}
9091
|| $last_nonblank_block_type =~ /^sub\s+\w/
9092
|| $last_nonblank_block_type =~ /^\w+:$/ )
9094
|| $last_nonblank_type eq ';'
9099
$rOpts->{'delete-semicolons'}
9101
# don't delete ; before a # because it would promote it
9102
# to a block comment
9103
&& ( $next_nonblank_token_type ne '#' )
9106
note_deleted_semicolon();
9108
unless ( $no_internal_newlines
9109
|| $index_start_one_line_block != UNDEFINED_INDEX );
9113
write_logfile_entry("Extra ';'\n");
9116
store_token_to_go();
9119
unless ( $no_internal_newlines
9120
|| ( $rOpts_keep_interior_semicolons && $j < $jmax )
9121
|| ( $next_nonblank_token eq '}' ) );
9125
# handle here_doc target string
9126
elsif ( $type eq 'h' ) {
9127
$no_internal_newlines =
9128
1; # no newlines after seeing here-target
9129
destroy_one_line_block();
9130
store_token_to_go();
9133
# handle all other token types
9136
# if this is a blank...
9137
if ( $type eq 'b' ) {
9139
# make it just one character
9140
$token = ' ' if $rOpts_add_whitespace;
9142
# delete it if unwanted by whitespace rules
9143
# or we are deleting all whitespace
9144
my $ws = $$rwhite_space_flag[ $j + 1 ];
9145
if ( ( defined($ws) && $ws == -1 )
9146
|| $rOpts_delete_old_whitespace )
9149
# unless it might make a syntax error
9151
unless is_essential_whitespace(
9152
$last_last_nonblank_token,
9153
$last_last_nonblank_type,
9154
$tokens_to_go[$max_index_to_go],
9155
$types_to_go[$max_index_to_go],
9156
$$rtokens[ $j + 1 ],
9157
$$rtoken_type[ $j + 1 ]
9161
store_token_to_go();
9164
# remember two previous nonblank OUTPUT tokens
9165
if ( $type ne '#' && $type ne 'b' ) {
9166
$last_last_nonblank_token = $last_nonblank_token;
9167
$last_last_nonblank_type = $last_nonblank_type;
9168
$last_nonblank_token = $token;
9169
$last_nonblank_type = $type;
9170
$last_nonblank_block_type = $block_type;
9173
# unset the continued-quote flag since it only applies to the
9174
# first token, and we want to resume normal formatting if
9175
# there are additional tokens on the line
9176
$in_continued_quote = 0;
9178
} # end of loop over all tokens in this 'line_of_tokens'
9180
# we have to flush ..
9183
# if there is a side comment
9184
( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )
9186
# if this line ends in a quote
9187
# NOTE: This is critically important for insuring that quoted lines
9188
# do not get processed by things like -sot and -sct
9191
# if this is a VERSION statement
9192
|| $is_VERSION_statement
9194
# to keep a label on one line if that is how it is now
9195
|| ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )
9197
# if we are instructed to keep all old line breaks
9198
|| !$rOpts->{'delete-old-newlines'}
9201
destroy_one_line_block();
9202
output_line_to_go();
9205
# mark old line breakpoints in current output stream
9206
if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {
9207
$old_breakpoint_to_go[$max_index_to_go] = 1;
9209
} # end sub print_line_of_tokens
9210
} # end print_line_of_tokens
9212
# sub output_line_to_go sends one logical line of tokens on down the
9213
# pipeline to the VerticalAligner package, breaking the line into continuation
9214
# lines as necessary. The line of tokens is ready to go in the "to_go"
9216
sub output_line_to_go {
9218
# debug stuff; this routine can be called from many points
9219
FORMATTER_DEBUG_FLAG_OUTPUT && do {
9220
my ( $a, $b, $c ) = caller;
9222
"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"
9224
my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];
9225
write_diagnostics("$output_str\n");
9228
# just set a tentative breakpoint if we might be in a one-line block
9229
if ( $index_start_one_line_block != UNDEFINED_INDEX ) {
9230
set_forced_breakpoint($max_index_to_go);
9234
my $cscw_block_comment;
9235
$cscw_block_comment = add_closing_side_comment()
9236
if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );
9238
match_opening_and_closing_tokens();
9240
# tell the -lp option we are outputting a batch so it can close
9241
# any unfinished items in its stack
9244
# If this line ends in a code block brace, set breaks at any
9245
# previous closing code block braces to breakup a chain of code
9246
# blocks on one line. This is very rare but can happen for
9247
# user-defined subs. For example we might be looking at this:
9248
# BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {
9249
my $saw_good_break = 0; # flag to force breaks even if short line
9252
# looking for opening or closing block brace
9253
$block_type_to_go[$max_index_to_go]
9255
# but not one of these which are never duplicated on a line:
9256
# until|while|for|if|elsif|else
9257
&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }
9260
my $lev = $nesting_depth_to_go[$max_index_to_go];
9262
# Walk backwards from the end and
9263
# set break at any closing block braces at the same level.
9264
# But quit if we are not in a chain of blocks.
9265
for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {
9266
last if ( $levels_to_go[$i] < $lev ); # stop at a lower level
9267
next if ( $levels_to_go[$i] > $lev ); # skip past higher level
9269
if ( $block_type_to_go[$i] ) {
9270
if ( $tokens_to_go[$i] eq '}' ) {
9271
set_forced_breakpoint($i);
9272
$saw_good_break = 1;
9276
# quit if we see anything besides words, function, blanks
9278
elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last }
9283
my $imax = $max_index_to_go;
9285
# trim any blank tokens
9286
if ( $max_index_to_go >= 0 ) {
9287
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
9288
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
9291
# anything left to write?
9292
if ( $imin <= $imax ) {
9294
# add a blank line before certain key types
9295
if ( $last_line_leading_type !~ /^[#b]/ ) {
9297
my $leading_token = $tokens_to_go[$imin];
9298
my $leading_type = $types_to_go[$imin];
9300
# blank lines before subs except declarations and one-liners
9301
# MCONVERSION LOCATION - for sub tokenization change
9302
if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {
9303
$want_blank = ( $rOpts->{'blanks-before-subs'} )
9305
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9306
$imax ) !~ /^[\;\}]$/
9310
# break before all package declarations
9311
# MCONVERSION LOCATION - for tokenizaton change
9312
elsif ($leading_token =~ /^(package\s)/
9313
&& $leading_type eq 'i' )
9315
$want_blank = ( $rOpts->{'blanks-before-subs'} );
9318
# break before certain key blocks except one-liners
9319
if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {
9320
$want_blank = ( $rOpts->{'blanks-before-subs'} )
9322
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9327
# Break before certain block types if we haven't had a
9328
# break at this level for a while. This is the
9329
# difficult decision..
9330
elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/
9331
&& $leading_type eq 'k' )
9333
my $lc = $nonblank_lines_at_depth[$last_line_leading_level];
9334
if ( !defined($lc) ) { $lc = 0 }
9337
$rOpts->{'blanks-before-blocks'}
9338
&& $lc >= $rOpts->{'long-block-line-count'}
9339
&& $file_writer_object->get_consecutive_nonblank_lines() >=
9340
$rOpts->{'long-block-line-count'}
9342
terminal_type( \@types_to_go, \@block_type_to_go, $imin,
9349
# future: send blank line down normal path to VerticalAligner
9350
Perl::Tidy::VerticalAligner::flush();
9351
$file_writer_object->write_blank_code_line();
9355
# update blank line variables and count number of consecutive
9356
# non-blank, non-comment lines at this level
9357
$last_last_line_leading_level = $last_line_leading_level;
9358
$last_line_leading_level = $levels_to_go[$imin];
9359
if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }
9360
$last_line_leading_type = $types_to_go[$imin];
9361
if ( $last_line_leading_level == $last_last_line_leading_level
9362
&& $last_line_leading_type ne 'b'
9363
&& $last_line_leading_type ne '#'
9364
&& defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )
9366
$nonblank_lines_at_depth[$last_line_leading_level]++;
9369
$nonblank_lines_at_depth[$last_line_leading_level] = 1;
9372
FORMATTER_DEBUG_FLAG_FLUSH && do {
9373
my ( $package, $file, $line ) = caller;
9375
"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";
9378
# add a couple of extra terminal blank tokens
9381
# set all forced breakpoints for good list formatting
9382
my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;
9385
$max_index_to_go > 0
9388
|| $old_line_count_in_batch > 1
9389
|| is_unbalanced_batch()
9391
$comma_count_in_batch
9392
&& ( $rOpts_maximum_fields_per_table > 0
9393
|| $rOpts_comma_arrow_breakpoints == 0 )
9398
$saw_good_break ||= scan_list();
9401
# let $ri_first and $ri_last be references to lists of
9402
# first and last tokens of line fragments to output..
9403
my ( $ri_first, $ri_last );
9405
# write a single line if..
9408
# we aren't allowed to add any newlines
9409
!$rOpts_add_newlines
9411
# or, we don't already have an interior breakpoint
9412
# and we didn't see a good breakpoint
9414
!$forced_breakpoint_count
9417
# and this line is 'short'
9422
@$ri_first = ($imin);
9423
@$ri_last = ($imax);
9426
# otherwise use multiple lines
9429
( $ri_first, $ri_last, my $colon_count ) =
9430
set_continuation_breaks($saw_good_break);
9432
break_all_chain_tokens( $ri_first, $ri_last );
9434
break_equals( $ri_first, $ri_last );
9436
# now we do a correction step to clean this up a bit
9437
# (The only time we would not do this is for debugging)
9438
if ( $rOpts->{'recombine'} ) {
9439
( $ri_first, $ri_last ) =
9440
recombine_breakpoints( $ri_first, $ri_last );
9443
insert_final_breaks( $ri_first, $ri_last ) if $colon_count;
9446
# do corrector step if -lp option is used
9448
if ($rOpts_line_up_parentheses) {
9449
$do_not_pad = correct_lp_indentation( $ri_first, $ri_last );
9451
send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );
9453
prepare_for_new_input_lines();
9455
# output any new -cscw block comment
9456
if ($cscw_block_comment) {
9458
$file_writer_object->write_code_line( $cscw_block_comment . "\n" );
9462
sub note_added_semicolon {
9463
$last_added_semicolon_at = $input_line_number;
9464
if ( $added_semicolon_count == 0 ) {
9465
$first_added_semicolon_at = $last_added_semicolon_at;
9467
$added_semicolon_count++;
9468
write_logfile_entry("Added ';' here\n");
9471
sub note_deleted_semicolon {
9472
$last_deleted_semicolon_at = $input_line_number;
9473
if ( $deleted_semicolon_count == 0 ) {
9474
$first_deleted_semicolon_at = $last_deleted_semicolon_at;
9476
$deleted_semicolon_count++;
9477
write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)
9480
sub note_embedded_tab {
9481
$embedded_tab_count++;
9482
$last_embedded_tab_at = $input_line_number;
9483
if ( !$first_embedded_tab_at ) {
9484
$first_embedded_tab_at = $last_embedded_tab_at;
9487
if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {
9488
write_logfile_entry("Embedded tabs in quote or pattern\n");
9492
sub starting_one_line_block {
9494
# after seeing an opening curly brace, look for the closing brace
9495
# and see if the entire block will fit on a line. This routine is
9496
# not always right because it uses the old whitespace, so a check
9497
# is made later (at the closing brace) to make sure we really
9498
# have a one-line block. We have to do this preliminary check,
9499
# though, because otherwise we would always break at a semicolon
9500
# within a one-line block if the block contains multiple statements.
9502
my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,
9506
# kill any current block - we can only go 1 deep
9507
destroy_one_line_block();
9510
# 1=distance from start of block to opening brace exceeds line length
9515
# shouldn't happen: there must have been a prior call to
9516
# store_token_to_go to put the opening brace in the output stream
9517
if ( $max_index_to_go < 0 ) {
9518
warning("program bug: store_token_to_go called incorrectly\n");
9519
report_definite_bug();
9523
# cannot use one-line blocks with cuddled else else/elsif lines
9524
if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {
9529
my $block_type = $$rblock_type[$j];
9531
# find the starting keyword for this block (such as 'if', 'else', ...)
9533
if ( $block_type =~ /^[\{\}\;\:]$/ ) {
9534
$i_start = $max_index_to_go;
9537
elsif ( $last_last_nonblank_token_to_go eq ')' ) {
9539
# For something like "if (xxx) {", the keyword "if" will be
9540
# just after the most recent break. This will be 0 unless
9541
# we have just killed a one-line block and are starting another.
9543
$i_start = $index_max_forced_break + 1;
9544
if ( $types_to_go[$i_start] eq 'b' ) {
9548
unless ( $tokens_to_go[$i_start] eq $block_type ) {
9553
# the previous nonblank token should start these block types
9555
( $last_last_nonblank_token_to_go eq $block_type )
9556
|| ( $block_type =~ /^sub/
9557
&& $last_last_nonblank_token_to_go =~ /^sub/ )
9560
$i_start = $last_last_nonblank_index_to_go;
9563
# patch for SWITCH/CASE to retain one-line case/when blocks
9564
elsif ( $block_type eq 'case' || $block_type eq 'when' ) {
9565
$i_start = $index_max_forced_break + 1;
9566
if ( $types_to_go[$i_start] eq 'b' ) {
9569
unless ( $tokens_to_go[$i_start] eq $block_type ) {
9578
my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;
9582
# see if length is too long to even start
9583
if ( $pos > $rOpts_maximum_line_length ) {
9587
for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {
9589
# old whitespace could be arbitrarily large, so don't use it
9590
if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }
9591
else { $pos += length( $$rtokens[$i] ) }
9593
# Return false result if we exceed the maximum line length,
9594
if ( $pos > $rOpts_maximum_line_length ) {
9598
# or encounter another opening brace before finding the closing brace.
9599
elsif ($$rtokens[$i] eq '{'
9600
&& $$rtoken_type[$i] eq '{'
9601
&& $$rblock_type[$i] )
9606
# if we find our closing brace..
9607
elsif ($$rtokens[$i] eq '}'
9608
&& $$rtoken_type[$i] eq '}'
9609
&& $$rblock_type[$i] )
9612
# be sure any trailing comment also fits on the line
9614
( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;
9616
if ( $$rtoken_type[$i_nonblank] eq '#' ) {
9617
$pos += length( $$rtokens[$i_nonblank] );
9619
if ( $i_nonblank > $i + 1 ) {
9620
$pos += length( $$rtokens[ $i + 1 ] );
9623
if ( $pos > $rOpts_maximum_line_length ) {
9628
# ok, it's a one-line block
9629
create_one_line_block( $i_start, 20 );
9633
# just keep going for other characters
9638
# Allow certain types of new one-line blocks to form by joining
9639
# input lines. These can be safely done, but for other block types,
9640
# we keep old one-line blocks but do not form new ones. It is not
9641
# always a good idea to make as many one-line blocks as possible,
9642
# so other types are not done. The user can always use -mangle.
9643
if ( $is_sort_map_grep_eval{$block_type} ) {
9644
create_one_line_block( $i_start, 1 );
9650
sub unstore_token_to_go {
9652
# remove most recent token from output stream
9653
if ( $max_index_to_go > 0 ) {
9657
$max_index_to_go = UNDEFINED_INDEX;
9662
sub want_blank_line {
9664
$file_writer_object->want_blank_line();
9667
sub write_unindented_line {
9669
$file_writer_object->write_line( $_[0] );
9674
# If there is a single, long parameter within parens, like this:
9676
# $self->command( "/msg "
9678
# . " You said $1, but did you know that it's square was "
9679
# . $1 * $1 . " ?" );
9681
# we can remove the continuation indentation of the 2nd and higher lines
9682
# to achieve this effect, which is more pleasing:
9684
# $self->command("/msg "
9686
# . " You said $1, but did you know that it's square was "
9687
# . $1 * $1 . " ?");
9689
my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;
9690
my $max_line = @$ri_first - 1;
9692
# must be multiple lines
9693
return unless $max_line > $line_open;
9695
my $lev_start = $levels_to_go[$i_start];
9696
my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];
9698
# see if all additional lines in this container have continuation
9701
my $line_1 = 1 + $line_open;
9702
for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {
9703
my $ibeg = $$ri_first[$n];
9704
my $iend = $$ri_last[$n];
9705
if ( $ibeg eq $closing_index ) { $n--; last }
9706
return if ( $lev_start != $levels_to_go[$ibeg] );
9707
return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );
9708
last if ( $closing_index <= $iend );
9711
# we can reduce the indentation of all continuation lines
9712
my $continuation_line_count = $n - $line_open;
9713
@ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9714
(0) x ($continuation_line_count);
9715
@leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =
9716
@reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];
9719
sub set_logical_padding {
9721
# Look at a batch of lines and see if extra padding can improve the
9722
# alignment when there are certain leading operators. Here is an
9723
# example, in which some extra space is introduced before
9724
# '( $year' to make it line up with the subsequent lines:
9726
# if ( ( $Year < 1601 )
9727
# || ( $Year > 2899 )
9728
# || ( $EndYear < 1601 )
9729
# || ( $EndYear > 2899 ) )
9731
# &Error_OutOfRange;
9734
my ( $ri_first, $ri_last ) = @_;
9735
my $max_line = @$ri_first - 1;
9737
my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,
9738
$tok_next, $type_next, $has_leading_op_next, $has_leading_op );
9740
# looking at each line of this batch..
9741
foreach $line ( 0 .. $max_line - 1 ) {
9743
# see if the next line begins with a logical operator
9744
$ibeg = $$ri_first[$line];
9745
$iend = $$ri_last[$line];
9746
$ibeg_next = $$ri_first[ $line + 1 ];
9747
$tok_next = $tokens_to_go[$ibeg_next];
9748
$type_next = $types_to_go[$ibeg_next];
9750
$has_leading_op_next = ( $tok_next =~ /^\w/ )
9751
? $is_chain_operator{$tok_next} # + - * / : ? && ||
9752
: $is_chain_operator{$type_next}; # and, or
9754
next unless ($has_leading_op_next);
9756
# next line must not be at lesser depth
9758
if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );
9760
# identify the token in this line to be padded on the left
9763
# handle lines at same depth...
9764
if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {
9766
# if this is not first line of the batch ...
9769
# and we have leading operator..
9770
next if $has_leading_op;
9772
# Introduce padding if..
9773
# 1. the previous line is at lesser depth, or
9774
# 2. the previous line ends in an assignment
9775
# 3. the previous line ends in a 'return'
9776
# 4. the previous line ends in a comma
9777
# Example 1: previous line at lesser depth
9778
# if ( ( $Year < 1601 ) # <- we are here but
9779
# || ( $Year > 2899 ) # list has not yet
9780
# || ( $EndYear < 1601 ) # collapsed vertically
9781
# || ( $EndYear > 2899 ) )
9784
# Example 2: previous line ending in assignment:
9786
# $year % 4 ? 0 # <- We are here
9791
# Example 3: previous line ending in comma:
9798
# be sure levels agree (do not indent after an indented 'if')
9799
next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );
9801
# allow padding on first line after a comma but only if:
9802
# (1) this is line 2 and
9803
# (2) there are at more than three lines and
9804
# (3) lines 3 and 4 have the same leading operator
9805
# These rules try to prevent padding within a long
9806
# comma-separated list.
9808
if ( $types_to_go[$iendm] eq ','
9812
my $ibeg_next_next = $$ri_first[ $line + 2 ];
9813
my $tok_next_next = $tokens_to_go[$ibeg_next_next];
9814
$ok_comma = $tok_next_next eq $tok_next;
9819
$is_assignment{ $types_to_go[$iendm] }
9821
|| ( $nesting_depth_to_go[$ibegm] <
9822
$nesting_depth_to_go[$ibeg] )
9823
|| ( $types_to_go[$iendm] eq 'k'
9824
&& $tokens_to_go[$iendm] eq 'return' )
9827
# we will add padding before the first token
9831
# for first line of the batch..
9834
# WARNING: Never indent if first line is starting in a
9835
# continued quote, which would change the quote.
9836
next if $starting_in_quote;
9838
# if this is text after closing '}'
9839
# then look for an interior token to pad
9840
if ( $types_to_go[$ibeg] eq '}' ) {
9844
# otherwise, we might pad if it looks really good
9847
# we might pad token $ibeg, so be sure that it
9848
# is at the same depth as the next line.
9850
if ( $nesting_depth_to_go[$ibeg] !=
9851
$nesting_depth_to_go[$ibeg_next] );
9853
# We can pad on line 1 of a statement if at least 3
9854
# lines will be aligned. Otherwise, it
9855
# can look very confusing.
9857
# We have to be careful not to pad if there are too few
9858
# lines. The current rule is:
9859
# (1) in general we require at least 3 consecutive lines
9860
# with the same leading chain operator token,
9861
# (2) but an exception is that we only require two lines
9862
# with leading colons if there are no more lines. For example,
9863
# the first $i in the following snippet would get padding
9864
# by the second rule:
9866
# $i == 1 ? ( "First", "Color" )
9867
# : $i == 2 ? ( "Then", "Rarity" )
9868
# : ( "Then", "Name" );
9870
if ( $max_line > 1 ) {
9871
my $leading_token = $tokens_to_go[$ibeg_next];
9874
# never indent line 1 of a '.' series because
9875
# previous line is most likely at same level.
9876
# TODO: we should also look at the leasing_spaces
9877
# of the last output line and skip if it is same
9879
next if ( $leading_token eq '.' );
9882
foreach my $l ( 2 .. 3 ) {
9883
last if ( $line + $l > $max_line );
9884
my $ibeg_next_next = $$ri_first[ $line + $l ];
9885
if ( $tokens_to_go[$ibeg_next_next] ne
9893
next if ($tokens_differ);
9894
next if ( $count < 3 && $leading_token ne ':' );
9904
# find interior token to pad if necessary
9905
if ( !defined($ipad) ) {
9907
for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {
9909
# find any unclosed container
9911
unless ( $type_sequence_to_go[$i]
9912
&& $mate_index_to_go[$i] > $iend );
9914
# find next nonblank token to pad
9916
if ( $types_to_go[$ipad] eq 'b' ) {
9918
last if ( $ipad > $iend );
9924
# next line must not be at greater depth
9925
my $iend_next = $$ri_last[ $line + 1 ];
9927
if ( $nesting_depth_to_go[ $iend_next + 1 ] >
9928
$nesting_depth_to_go[$ipad] );
9930
# lines must be somewhat similar to be padded..
9931
my $inext_next = $ibeg_next + 1;
9932
if ( $types_to_go[$inext_next] eq 'b' ) {
9935
my $type = $types_to_go[$ipad];
9936
my $type_next = $types_to_go[ $ipad + 1 ];
9938
# see if there are multiple continuation lines
9939
my $logical_continuation_lines = 1;
9940
if ( $line + 2 <= $max_line ) {
9941
my $leading_token = $tokens_to_go[$ibeg_next];
9942
my $ibeg_next_next = $$ri_first[ $line + 2 ];
9943
if ( $tokens_to_go[$ibeg_next_next] eq $leading_token
9944
&& $nesting_depth_to_go[$ibeg_next] eq
9945
$nesting_depth_to_go[$ibeg_next_next] )
9947
$logical_continuation_lines++;
9951
# see if leading types match
9952
my $types_match = $types_to_go[$inext_next] eq $type;
9953
my $matches_without_bang;
9955
# if first line has leading ! then compare the following token
9956
if ( !$types_match && $type eq '!' ) {
9957
$types_match = $matches_without_bang =
9958
$types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];
9963
# either we have multiple continuation lines to follow
9964
# and we are not padding the first token
9965
( $logical_continuation_lines > 1 && $ipad > 0 )
9973
# and keywords must match if keyword
9976
&& $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]
9982
#----------------------begin special checks--------------
9985
# A check is needed before we can make the pad.
9986
# If we are in a list with some long items, we want each
9987
# item to stand out. So in the following example, the
9988
# first line begining with '$casefold->' would look good
9989
# padded to align with the next line, but then it
9990
# would be indented more than the last line, so we
9994
# $casefold->{code} eq '0041'
9995
# && $casefold->{status} eq 'C'
9996
# && $casefold->{mapping} eq '0061',
10001
# It would be faster, and almost as good, to use a comma
10002
# count, and not pad if comma_count > 1 and the previous
10003
# line did not end with a comma.
10007
my $ibg = $$ri_first[ $line + 1 ];
10008
my $depth = $nesting_depth_to_go[ $ibg + 1 ];
10010
# just use simplified formula for leading spaces to avoid
10011
# needless sub calls
10012
my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];
10014
# look at each line beyond the next ..
10016
foreach $l ( $line + 2 .. $max_line ) {
10017
my $ibg = $$ri_first[$l];
10019
# quit looking at the end of this container
10021
if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )
10022
|| ( $nesting_depth_to_go[$ibg] < $depth );
10024
# cannot do the pad if a later line would be
10026
if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {
10032
# don't pad if we end in a broken list
10033
if ( $l == $max_line ) {
10034
my $i2 = $$ri_last[$l];
10035
if ( $types_to_go[$i2] eq '#' ) {
10036
my $i1 = $$ri_first[$l];
10039
terminal_type( \@types_to_go, \@block_type_to_go, $i1,
10046
# a minus may introduce a quoted variable, and we will
10047
# add the pad only if this line begins with a bare word,
10048
# such as for the word 'Button' here:
10050
# Button => "Print letter \"~$_\"",
10051
# -command => [ sub { print "$_[0]\n" }, $_ ],
10052
# -accelerator => "Meta+$_"
10055
# On the other hand, if 'Button' is quoted, it looks best
10058
# 'Button' => "Print letter \"~$_\"",
10059
# -command => [ sub { print "$_[0]\n" }, $_ ],
10060
# -accelerator => "Meta+$_"
10062
if ( $types_to_go[$ibeg_next] eq 'm' ) {
10063
$ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';
10066
next unless $ok_to_pad;
10068
#----------------------end special check---------------
10070
my $length_1 = total_line_length( $ibeg, $ipad - 1 );
10071
my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );
10072
$pad_spaces = $length_2 - $length_1;
10074
# If the first line has a leading ! and the second does
10075
# not, then remove one space to try to align the next
10076
# leading characters, which are often the same. For example:
10078
# || $ts == $self->Holder
10079
# || $self->Holder->Type eq "Arena" )
10081
# This usually helps readability, but if there are subsequent
10082
# ! operators things will still get messed up. For example:
10084
# if ( !exists $Net::DNS::typesbyname{$qtype}
10085
# && exists $Net::DNS::classesbyname{$qtype}
10086
# && !exists $Net::DNS::classesbyname{$qclass}
10087
# && exists $Net::DNS::typesbyname{$qclass} )
10088
# We can't fix that.
10089
if ($matches_without_bang) { $pad_spaces-- }
10091
# make sure this won't change if -lp is used
10092
my $indentation_1 = $leading_spaces_to_go[$ibeg];
10093
if ( ref($indentation_1) ) {
10094
if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {
10095
my $indentation_2 = $leading_spaces_to_go[$ibeg_next];
10096
unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {
10102
# we might be able to handle a pad of -1 by removing a blank
10104
if ( $pad_spaces < 0 ) {
10106
if ( $pad_spaces == -1 ) {
10107
if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {
10108
$tokens_to_go[ $ipad - 1 ] = '';
10114
# now apply any padding for alignment
10115
if ( $ipad >= 0 && $pad_spaces ) {
10117
my $length_t = total_line_length( $ibeg, $iend );
10118
if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {
10119
$tokens_to_go[$ipad] =
10120
' ' x $pad_spaces . $tokens_to_go[$ipad];
10128
$has_leading_op = $has_leading_op_next;
10129
} # end of loop over lines
10133
sub correct_lp_indentation {
10135
# When the -lp option is used, we need to make a last pass through
10136
# each line to correct the indentation positions in case they differ
10137
# from the predictions. This is necessary because perltidy uses a
10138
# predictor/corrector method for aligning with opening parens. The
10139
# predictor is usually good, but sometimes stumbles. The corrector
10140
# tries to patch things up once the actual opening paren locations
10142
my ( $ri_first, $ri_last ) = @_;
10143
my $do_not_pad = 0;
10145
# Note on flag '$do_not_pad':
10146
# We want to avoid a situation like this, where the aligner inserts
10147
# whitespace before the '=' to align it with a previous '=', because
10148
# otherwise the parens might become mis-aligned in a situation like
10149
# this, where the '=' has become aligned with the previous line,
10150
# pushing the opening '(' forward beyond where we want it.
10152
# $mkFloor::currentRoom = '';
10153
# $mkFloor::c_entry = $c->Entry(
10155
# -relief => 'sunken',
10159
# We leave it to the aligner to decide how to do this.
10161
# first remove continuation indentation if appropriate
10162
my $max_line = @$ri_first - 1;
10164
# looking at each line of this batch..
10165
my ( $ibeg, $iend );
10167
foreach $line ( 0 .. $max_line ) {
10168
$ibeg = $$ri_first[$line];
10169
$iend = $$ri_last[$line];
10171
# looking at each token in this output line..
10173
foreach $i ( $ibeg .. $iend ) {
10175
# How many space characters to place before this token
10176
# for special alignment. Actual padding is done in the
10179
# looking for next unvisited indentation item
10180
my $indentation = $leading_spaces_to_go[$i];
10181
if ( !$indentation->get_MARKED() ) {
10182
$indentation->set_MARKED(1);
10184
# looking for indentation item for which we are aligning
10185
# with parens, braces, and brackets
10186
next unless ( $indentation->get_ALIGN_PAREN() );
10188
# skip closed container on this line
10189
if ( $i > $ibeg ) {
10191
if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }
10192
if ( $type_sequence_to_go[$im]
10193
&& $mate_index_to_go[$im] <= $iend )
10199
if ( $line == 1 && $i == $ibeg ) {
10203
# Ok, let's see what the error is and try to fix it
10205
my $predicted_pos = $indentation->get_SPACES();
10206
if ( $i > $ibeg ) {
10208
# token is mid-line - use length to previous token
10209
$actual_pos = total_line_length( $ibeg, $i - 1 );
10211
# for mid-line token, we must check to see if all
10212
# additional lines have continuation indentation,
10213
# and remove it if so. Otherwise, we do not get
10215
my $closing_index = $indentation->get_CLOSED();
10216
if ( $closing_index > $iend ) {
10217
my $ibeg_next = $$ri_first[ $line + 1 ];
10218
if ( $ci_levels_to_go[$ibeg_next] > 0 ) {
10219
undo_lp_ci( $line, $i, $closing_index, $ri_first,
10224
elsif ( $line > 0 ) {
10226
# handle case where token starts a new line;
10227
# use length of previous line
10228
my $ibegm = $$ri_first[ $line - 1 ];
10229
my $iendm = $$ri_last[ $line - 1 ];
10230
$actual_pos = total_line_length( $ibegm, $iendm );
10234
if ( $types_to_go[ $iendm + 1 ] eq 'b' );
10238
# token is first character of first line of batch
10239
$actual_pos = $predicted_pos;
10242
my $move_right = $actual_pos - $predicted_pos;
10244
# done if no error to correct (gnu2.t)
10245
if ( $move_right == 0 ) {
10246
$indentation->set_RECOVERABLE_SPACES($move_right);
10250
# if we have not seen closure for this indentation in
10251
# this batch, we can only pass on a request to the
10253
my $closing_index = $indentation->get_CLOSED();
10255
if ( $closing_index < 0 ) {
10256
$indentation->set_RECOVERABLE_SPACES($move_right);
10260
# If necessary, look ahead to see if there is really any
10261
# leading whitespace dependent on this whitespace, and
10262
# also find the longest line using this whitespace.
10263
# Since it is always safe to move left if there are no
10264
# dependents, we only need to do this if we may have
10265
# dependent nodes or need to move right.
10267
my $right_margin = 0;
10268
my $have_child = $indentation->get_HAVE_CHILD();
10270
my %saw_indentation;
10271
my $line_count = 1;
10272
$saw_indentation{$indentation} = $indentation;
10274
if ( $have_child || $move_right > 0 ) {
10276
my $max_length = 0;
10277
if ( $i == $ibeg ) {
10278
$max_length = total_line_length( $ibeg, $iend );
10281
# look ahead at the rest of the lines of this batch..
10283
foreach $line_t ( $line + 1 .. $max_line ) {
10284
my $ibeg_t = $$ri_first[$line_t];
10285
my $iend_t = $$ri_last[$line_t];
10286
last if ( $closing_index <= $ibeg_t );
10288
# remember all different indentation objects
10289
my $indentation_t = $leading_spaces_to_go[$ibeg_t];
10290
$saw_indentation{$indentation_t} = $indentation_t;
10293
# remember longest line in the group
10294
my $length_t = total_line_length( $ibeg_t, $iend_t );
10295
if ( $length_t > $max_length ) {
10296
$max_length = $length_t;
10299
$right_margin = $rOpts_maximum_line_length - $max_length;
10300
if ( $right_margin < 0 ) { $right_margin = 0 }
10303
my $first_line_comma_count =
10304
grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];
10305
my $comma_count = $indentation->get_COMMA_COUNT();
10306
my $arrow_count = $indentation->get_ARROW_COUNT();
10308
# This is a simple approximate test for vertical alignment:
10309
# if we broke just after an opening paren, brace, bracket,
10310
# and there are 2 or more commas in the first line,
10311
# and there are no '=>'s,
10312
# then we are probably vertically aligned. We could set
10313
# an exact flag in sub scan_list, but this is good
10315
my $indentation_count = keys %saw_indentation;
10316
my $is_vertically_aligned =
10318
&& $first_line_comma_count > 1
10319
&& $indentation_count == 1
10320
&& ( $arrow_count == 0 || $arrow_count == $line_count ) );
10322
# Make the move if possible ..
10325
# we can always move left
10328
# but we should only move right if we are sure it will
10329
# not spoil vertical alignment
10330
|| ( $comma_count == 0 )
10331
|| ( $comma_count > 0 && !$is_vertically_aligned )
10335
( $move_right <= $right_margin )
10339
foreach ( keys %saw_indentation ) {
10340
$saw_indentation{$_}
10341
->permanently_decrease_AVAILABLE_SPACES( -$move );
10345
# Otherwise, record what we want and the vertical aligner
10346
# will try to recover it.
10348
$indentation->set_RECOVERABLE_SPACES($move_right);
10353
return $do_not_pad;
10356
# flush is called to output any tokens in the pipeline, so that
10357
# an alternate source of lines can be written in the correct order
10360
destroy_one_line_block();
10361
output_line_to_go();
10362
Perl::Tidy::VerticalAligner::flush();
10365
sub reset_block_text_accumulator {
10367
# save text after 'if' and 'elsif' to append after 'else'
10368
if ($accumulating_text_for_block) {
10370
if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {
10371
push @{$rleading_block_if_elsif_text}, $leading_block_text;
10374
$accumulating_text_for_block = "";
10375
$leading_block_text = "";
10376
$leading_block_text_level = 0;
10377
$leading_block_text_length_exceeded = 0;
10378
$leading_block_text_line_number = 0;
10379
$leading_block_text_line_length = 0;
10382
sub set_block_text_accumulator {
10384
$accumulating_text_for_block = $tokens_to_go[$i];
10385
if ( $accumulating_text_for_block !~ /^els/ ) {
10386
$rleading_block_if_elsif_text = [];
10388
$leading_block_text = "";
10389
$leading_block_text_level = $levels_to_go[$i];
10390
$leading_block_text_line_number =
10391
$vertical_aligner_object->get_output_line_number();
10392
$leading_block_text_length_exceeded = 0;
10394
# this will contain the column number of the last character
10395
# of the closing side comment
10396
$leading_block_text_line_length =
10397
length($accumulating_text_for_block) +
10398
length( $rOpts->{'closing-side-comment-prefix'} ) +
10399
$leading_block_text_level * $rOpts_indent_columns + 3;
10402
sub accumulate_block_text {
10405
# accumulate leading text for -csc, ignoring any side comments
10406
if ( $accumulating_text_for_block
10407
&& !$leading_block_text_length_exceeded
10408
&& $types_to_go[$i] ne '#' )
10411
my $added_length = length( $tokens_to_go[$i] );
10412
$added_length += 1 if $i == 0;
10413
my $new_line_length = $leading_block_text_line_length + $added_length;
10415
# we can add this text if we don't exceed some limits..
10418
# we must not have already exceeded the text length limit
10419
length($leading_block_text) <
10420
$rOpts_closing_side_comment_maximum_text
10423
# the new total line length must be below the line length limit
10424
# or the new length must be below the text length limit
10425
# (ie, we may allow one token to exceed the text length limit)
10426
&& ( $new_line_length < $rOpts_maximum_line_length
10427
|| length($leading_block_text) + $added_length <
10428
$rOpts_closing_side_comment_maximum_text )
10430
# UNLESS: we are adding a closing paren before the brace we seek.
10431
# This is an attempt to avoid situations where the ... to be
10432
# added are longer than the omitted right paren, as in:
10434
# foreach my $item (@a_rather_long_variable_name_here) {
10436
# } ## end foreach my $item (@a_rather_long_variable_name_here...
10439
$tokens_to_go[$i] eq ')'
10442
$i + 1 <= $max_index_to_go
10443
&& $block_type_to_go[ $i + 1 ] eq
10444
$accumulating_text_for_block
10446
|| ( $i + 2 <= $max_index_to_go
10447
&& $block_type_to_go[ $i + 2 ] eq
10448
$accumulating_text_for_block )
10454
# add an extra space at each newline
10455
if ( $i == 0 ) { $leading_block_text .= ' ' }
10457
# add the token text
10458
$leading_block_text .= $tokens_to_go[$i];
10459
$leading_block_text_line_length = $new_line_length;
10462
# show that text was truncated if necessary
10463
elsif ( $types_to_go[$i] ne 'b' ) {
10464
$leading_block_text_length_exceeded = 1;
10465
$leading_block_text .= '...';
10471
my %is_if_elsif_else_unless_while_until_for_foreach;
10475
# These block types may have text between the keyword and opening
10476
# curly. Note: 'else' does not, but must be included to allow trailing
10477
# if/elsif text to be appended.
10478
# patch for SWITCH/CASE: added 'case' and 'when'
10479
@_ = qw(if elsif else unless while until for foreach case when);
10480
@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
10483
sub accumulate_csc_text {
10485
# called once per output buffer when -csc is used. Accumulates
10486
# the text placed after certain closing block braces.
10487
# Defines and returns the following for this buffer:
10489
my $block_leading_text = ""; # the leading text of the last '}'
10490
my $rblock_leading_if_elsif_text;
10491
my $i_block_leading_text =
10492
-1; # index of token owning block_leading_text
10493
my $block_line_count = 100; # how many lines the block spans
10494
my $terminal_type = 'b'; # type of last nonblank token
10495
my $i_terminal = 0; # index of last nonblank token
10496
my $terminal_block_type = "";
10498
for my $i ( 0 .. $max_index_to_go ) {
10499
my $type = $types_to_go[$i];
10500
my $block_type = $block_type_to_go[$i];
10501
my $token = $tokens_to_go[$i];
10503
# remember last nonblank token type
10504
if ( $type ne '#' && $type ne 'b' ) {
10505
$terminal_type = $type;
10506
$terminal_block_type = $block_type;
10510
my $type_sequence = $type_sequence_to_go[$i];
10511
if ( $block_type && $type_sequence ) {
10513
if ( $token eq '}' ) {
10515
# restore any leading text saved when we entered this block
10516
if ( defined( $block_leading_text{$type_sequence} ) ) {
10517
( $block_leading_text, $rblock_leading_if_elsif_text ) =
10518
@{ $block_leading_text{$type_sequence} };
10519
$i_block_leading_text = $i;
10520
delete $block_leading_text{$type_sequence};
10521
$rleading_block_if_elsif_text =
10522
$rblock_leading_if_elsif_text;
10525
# if we run into a '}' then we probably started accumulating
10526
# at something like a trailing 'if' clause..no harm done.
10527
if ( $accumulating_text_for_block
10528
&& $levels_to_go[$i] <= $leading_block_text_level )
10530
my $lev = $levels_to_go[$i];
10531
reset_block_text_accumulator();
10534
if ( defined( $block_opening_line_number{$type_sequence} ) )
10536
my $output_line_number =
10537
$vertical_aligner_object->get_output_line_number();
10538
$block_line_count =
10539
$output_line_number -
10540
$block_opening_line_number{$type_sequence} + 1;
10541
delete $block_opening_line_number{$type_sequence};
10545
# Error: block opening line undefined for this line..
10546
# This shouldn't be possible, but it is not a
10547
# significant problem.
10551
elsif ( $token eq '{' ) {
10554
$vertical_aligner_object->get_output_line_number();
10555
$block_opening_line_number{$type_sequence} = $line_number;
10557
if ( $accumulating_text_for_block
10558
&& $levels_to_go[$i] == $leading_block_text_level )
10561
if ( $accumulating_text_for_block eq $block_type ) {
10563
# save any leading text before we enter this block
10564
$block_leading_text{$type_sequence} = [
10565
$leading_block_text,
10566
$rleading_block_if_elsif_text
10568
$block_opening_line_number{$type_sequence} =
10569
$leading_block_text_line_number;
10570
reset_block_text_accumulator();
10574
# shouldn't happen, but not a serious error.
10575
# We were accumulating -csc text for block type
10576
# $accumulating_text_for_block and unexpectedly
10577
# encountered a '{' for block type $block_type.
10584
&& $csc_new_statement_ok
10585
&& $is_if_elsif_else_unless_while_until_for_foreach{$token}
10586
&& $token =~ /$closing_side_comment_list_pattern/o )
10588
set_block_text_accumulator($i);
10592
# note: ignoring type 'q' because of tricks being played
10593
# with 'q' for hanging side comments
10594
if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {
10595
$csc_new_statement_ok =
10596
( $block_type || $type eq 'J' || $type eq ';' );
10599
&& $accumulating_text_for_block
10600
&& $levels_to_go[$i] == $leading_block_text_level )
10602
reset_block_text_accumulator();
10605
accumulate_block_text($i);
10610
# Treat an 'else' block specially by adding preceding 'if' and
10611
# 'elsif' text. Otherwise, the 'end else' is not helpful,
10612
# especially for cuddled-else formatting.
10613
if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {
10614
$block_leading_text =
10615
make_else_csc_text( $i_terminal, $terminal_block_type,
10616
$block_leading_text, $rblock_leading_if_elsif_text );
10619
return ( $terminal_type, $i_terminal, $i_block_leading_text,
10620
$block_leading_text, $block_line_count );
10624
sub make_else_csc_text {
10626
# create additional -csc text for an 'else' and optionally 'elsif',
10627
# depending on the value of switch
10628
# $rOpts_closing_side_comment_else_flag:
10630
# = 0 add 'if' text to trailing else
10631
# = 1 same as 0 plus:
10632
# add 'if' to 'elsif's if can fit in line length
10633
# add last 'elsif' to trailing else if can fit in one line
10634
# = 2 same as 1 but do not check if exceed line length
10636
# $rif_elsif_text = a reference to a list of all previous closing
10637
# side comments created for this if block
10639
my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;
10640
my $csc_text = $block_leading_text;
10642
if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )
10647
my $count = @{$rif_elsif_text};
10648
return $csc_text unless ($count);
10650
my $if_text = '[ if' . $rif_elsif_text->[0];
10652
# always show the leading 'if' text on 'else'
10653
if ( $block_type eq 'else' ) {
10654
$csc_text .= $if_text;
10657
# see if that's all
10658
if ( $rOpts_closing_side_comment_else_flag == 0 ) {
10662
my $last_elsif_text = "";
10663
if ( $count > 1 ) {
10664
$last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];
10665
if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }
10668
# tentatively append one more item
10669
my $saved_text = $csc_text;
10670
if ( $block_type eq 'else' ) {
10671
$csc_text .= $last_elsif_text;
10674
$csc_text .= ' ' . $if_text;
10677
# all done if no length checks requested
10678
if ( $rOpts_closing_side_comment_else_flag == 2 ) {
10682
# undo it if line length exceeded
10684
length($csc_text) +
10685
length($block_type) +
10686
length( $rOpts->{'closing-side-comment-prefix'} ) +
10687
$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;
10688
if ( $length > $rOpts_maximum_line_length ) {
10689
$csc_text = $saved_text;
10694
sub add_closing_side_comment {
10696
# add closing side comments after closing block braces if -csc used
10697
my $cscw_block_comment;
10699
#---------------------------------------------------------------
10700
# Step 1: loop through all tokens of this line to accumulate
10701
# the text needed to create the closing side comments. Also see
10702
# how the line ends.
10703
#---------------------------------------------------------------
10705
my ( $terminal_type, $i_terminal, $i_block_leading_text,
10706
$block_leading_text, $block_line_count )
10707
= accumulate_csc_text();
10709
#---------------------------------------------------------------
10710
# Step 2: make the closing side comment if this ends a block
10711
#---------------------------------------------------------------
10712
my $have_side_comment = $i_terminal != $max_index_to_go;
10714
# if this line might end in a block closure..
10716
$terminal_type eq '}'
10721
# the block is long enough
10722
( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )
10724
# or there is an existing comment to check
10725
|| ( $have_side_comment
10726
&& $rOpts->{'closing-side-comment-warnings'} )
10729
# .. and if this is one of the types of interest
10730
&& $block_type_to_go[$i_terminal] =~
10731
/$closing_side_comment_list_pattern/o
10733
# .. but not an anonymous sub
10734
# These are not normally of interest, and their closing braces are
10735
# often followed by commas or semicolons anyway. This also avoids
10736
# possible erratic output due to line numbering inconsistencies
10737
# in the cases where their closing braces terminate a line.
10738
&& $block_type_to_go[$i_terminal] ne 'sub'
10740
# ..and the corresponding opening brace must is not in this batch
10741
# (because we do not need to tag one-line blocks, although this
10742
# should also be caught with a positive -csci value)
10743
&& $mate_index_to_go[$i_terminal] < 0
10748
# this is the last token (line doesnt have a side comment)
10749
!$have_side_comment
10751
# or the old side comment is a closing side comment
10752
|| $tokens_to_go[$max_index_to_go] =~
10753
/$closing_side_comment_prefix_pattern/o
10758
# then make the closing side comment text
10760
"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";
10762
# append any extra descriptive text collected above
10763
if ( $i_block_leading_text == $i_terminal ) {
10764
$token .= $block_leading_text;
10766
$token =~ s/\s*$//; # trim any trailing whitespace
10768
# handle case of existing closing side comment
10769
if ($have_side_comment) {
10771
# warn if requested and tokens differ significantly
10772
if ( $rOpts->{'closing-side-comment-warnings'} ) {
10773
my $old_csc = $tokens_to_go[$max_index_to_go];
10774
my $new_csc = $token;
10775
$new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'
10776
my $new_trailing_dots = $1;
10777
$old_csc =~ s/\.\.\.\s*$//;
10778
$new_csc =~ s/\s+//g; # trim all whitespace
10779
$old_csc =~ s/\s+//g;
10781
# Patch to handle multiple closing side comments at
10782
# else and elsif's. These have become too complicated
10783
# to check, so if we see an indication of
10784
# '[ if' or '[ # elsif', then assume they were made
10786
if ( $block_type_to_go[$i_terminal] eq 'else' ) {
10787
if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }
10789
elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {
10790
if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }
10793
# if old comment is contained in new comment,
10794
# only compare the common part.
10795
if ( length($new_csc) > length($old_csc) ) {
10796
$new_csc = substr( $new_csc, 0, length($old_csc) );
10799
# if the new comment is shorter and has been limited,
10800
# only compare the common part.
10801
if ( length($new_csc) < length($old_csc) && $new_trailing_dots )
10803
$old_csc = substr( $old_csc, 0, length($new_csc) );
10806
# any remaining difference?
10807
if ( $new_csc ne $old_csc ) {
10809
# just leave the old comment if we are below the threshold
10810
# for creating side comments
10811
if ( $block_line_count <
10812
$rOpts->{'closing-side-comment-interval'} )
10817
# otherwise we'll make a note of it
10821
"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"
10824
# save the old side comment in a new trailing block comment
10825
my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];
10828
$cscw_block_comment =
10829
"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";
10834
# No differences.. we can safely delete old comment if we
10835
# are below the threshold
10836
if ( $block_line_count <
10837
$rOpts->{'closing-side-comment-interval'} )
10840
unstore_token_to_go()
10841
if ( $types_to_go[$max_index_to_go] eq '#' );
10842
unstore_token_to_go()
10843
if ( $types_to_go[$max_index_to_go] eq 'b' );
10848
# switch to the new csc (unless we deleted it!)
10849
$tokens_to_go[$max_index_to_go] = $token if $token;
10852
# handle case of NO existing closing side comment
10855
# insert the new side comment into the output token stream
10857
my $block_type = '';
10858
my $type_sequence = '';
10859
my $container_environment =
10860
$container_environment_to_go[$max_index_to_go];
10861
my $level = $levels_to_go[$max_index_to_go];
10862
my $slevel = $nesting_depth_to_go[$max_index_to_go];
10863
my $no_internal_newlines = 0;
10865
my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];
10866
my $ci_level = $ci_levels_to_go[$max_index_to_go];
10867
my $in_continued_quote = 0;
10869
# first insert a blank token
10870
insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );
10872
# then the side comment
10873
insert_new_token_to_go( $token, $type, $slevel,
10874
$no_internal_newlines );
10877
return $cscw_block_comment;
10880
sub previous_nonblank_token {
10884
return "" if ( $im < 0 );
10885
if ( $types_to_go[$im] eq 'b' ) { $im--; }
10886
return "" if ( $im < 0 );
10887
$name = $tokens_to_go[$im];
10889
# prepend any sub name to an isolated -> to avoid unwanted alignments
10890
# [test case is test8/penco.pl]
10891
if ( $name eq '->' ) {
10893
if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {
10894
$name = $tokens_to_go[$im] . $name;
10900
sub send_lines_to_vertical_aligner {
10902
my ( $ri_first, $ri_last, $do_not_pad ) = @_;
10904
my $rindentation_list = [0]; # ref to indentations for each line
10906
# define the array @matching_token_to_go for the output tokens
10907
# which will be non-blank for each special token (such as =>)
10908
# for which alignment is required.
10909
set_vertical_alignment_markers( $ri_first, $ri_last );
10911
# flush if necessary to avoid unwanted alignment
10912
my $must_flush = 0;
10913
if ( @$ri_first > 1 ) {
10915
# flush before a long if statement
10916
if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {
10921
Perl::Tidy::VerticalAligner::flush();
10924
set_logical_padding( $ri_first, $ri_last );
10926
# loop to prepare each line for shipment
10927
my $n_last_line = @$ri_first - 1;
10929
for my $n ( 0 .. $n_last_line ) {
10930
my $ibeg = $$ri_first[$n];
10931
my $iend = $$ri_last[$n];
10933
my ( $rtokens, $rfields, $rpatterns ) =
10934
make_alignment_patterns( $ibeg, $iend );
10936
my ( $indentation, $lev, $level_end, $terminal_type,
10937
$is_semicolon_terminated, $is_outdented_line )
10938
= set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,
10939
$ri_first, $ri_last, $rindentation_list );
10941
# we will allow outdenting of long lines..
10942
my $outdent_long_lines = (
10944
# which are long quotes, if allowed
10945
( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )
10947
# which are long block comments, if allowed
10949
$types_to_go[$ibeg] eq '#'
10950
&& $rOpts->{'outdent-long-comments'}
10952
# but not if this is a static block comment
10953
&& !$is_static_block_comment
10958
$nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];
10960
my $rvertical_tightness_flags =
10961
set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,
10962
$ri_first, $ri_last );
10964
# flush an outdented line to avoid any unwanted vertical alignment
10965
Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
10967
my $is_terminal_ternary = 0;
10968
if ( $tokens_to_go[$ibeg] eq ':'
10969
|| $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )
10971
if ( ( $terminal_type eq ';' && $level_end <= $lev )
10972
|| ( $level_end < $lev ) )
10974
$is_terminal_ternary = 1;
10978
# send this new line down the pipe
10979
my $forced_breakpoint = $forced_breakpoint_to_go[$iend];
10980
Perl::Tidy::VerticalAligner::append_line(
10987
$forced_breakpoint_to_go[$iend] || $in_comma_list,
10988
$outdent_long_lines,
10989
$is_terminal_ternary,
10990
$is_semicolon_terminated,
10992
$rvertical_tightness_flags,
10996
$tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];
10998
# flush an outdented line to avoid any unwanted vertical alignment
10999
Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);
11003
} # end of loop to output each line
11005
# remember indentation of lines containing opening containers for
11006
# later use by sub set_adjusted_indentation
11007
save_opening_indentation( $ri_first, $ri_last, $rindentation_list );
11010
{ # begin make_alignment_patterns
11012
my %block_type_map;
11017
# map related block names into a common name to
11019
%block_type_map = (
11030
# map certain keywords to the same 'if' class to align
11031
# long if/elsif sequences. [elsif.pl]
11037
'default' => 'given',
11038
'case' => 'switch',
11040
# treat an 'undef' similar to numbers and quotes
11045
sub make_alignment_patterns {
11047
# Here we do some important preliminary work for the
11048
# vertical aligner. We create three arrays for one
11049
# output line. These arrays contain strings that can
11050
# be tested by the vertical aligner to see if
11051
# consecutive lines can be aligned vertically.
11053
# The three arrays are indexed on the vertical
11054
# alignment fields and are:
11055
# @tokens - a list of any vertical alignment tokens for this line.
11056
# These are tokens, such as '=' '&&' '#' etc which
11057
# we want to might align vertically. These are
11058
# decorated with various information such as
11059
# nesting depth to prevent unwanted vertical
11060
# alignment matches.
11061
# @fields - the actual text of the line between the vertical alignment
11063
# @patterns - a modified list of token types, one for each alignment
11064
# field. These should normally each match before alignment is
11065
# allowed, even when the alignment tokens match.
11066
my ( $ibeg, $iend ) = @_;
11070
my $i_start = $ibeg;
11074
my @container_name = ("");
11075
my @multiple_comma_arrows = (undef);
11077
my $j = 0; # field index
11080
for $i ( $ibeg .. $iend ) {
11082
# Keep track of containers balanced on this line only.
11083
# These are used below to prevent unwanted cross-line alignments.
11084
# Unbalanced containers already avoid aligning across
11085
# container boundaries.
11086
if ( $tokens_to_go[$i] eq '(' ) {
11088
# if container is balanced on this line...
11089
my $i_mate = $mate_index_to_go[$i];
11090
if ( $i_mate > $i && $i_mate <= $iend ) {
11092
my $seqno = $type_sequence_to_go[$i];
11093
my $count = comma_arrow_count($seqno);
11094
$multiple_comma_arrows[$depth] = $count && $count > 1;
11096
# Append the previous token name to make the container name
11097
# more unique. This name will also be given to any commas
11098
# within this container, and it helps avoid undesirable
11099
# alignments of different types of containers.
11100
my $name = previous_nonblank_token($i);
11102
$container_name[$depth] = "+" . $name;
11104
# Make the container name even more unique if necessary.
11105
# If we are not vertically aligning this opening paren,
11106
# append a character count to avoid bad alignment because
11107
# it usually looks bad to align commas within continers
11108
# for which the opening parens do not align. Here
11109
# is an example very BAD alignment of commas (because
11110
# the atan2 functions are not all aligned):
11112
# $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +
11113
# $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -
11114
# $X * atan2( $X, 1 ) -
11115
# $Y * atan2( $Y, 1 );
11117
# On the other hand, it is usually okay to align commas if
11118
# opening parens align, such as:
11119
# glVertex3d( $cx + $s * $xs, $cy, $z );
11120
# glVertex3d( $cx, $cy + $s * $ys, $z );
11121
# glVertex3d( $cx - $s * $xs, $cy, $z );
11122
# glVertex3d( $cx, $cy - $s * $ys, $z );
11124
# To distinguish between these situations, we will
11125
# append the length of the line from the previous matching
11126
# token, or beginning of line, to the function name. This
11127
# will allow the vertical aligner to reject undesirable
11130
# if we are not aligning on this paren...
11131
if ( $matching_token_to_go[$i] eq '' ) {
11133
# Sum length from previous alignment, or start of line.
11134
# Note that we have to sum token lengths here because
11135
# padding has been done and so array $lengths_to_go
11139
join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11140
$len += leading_spaces_to_go($i_start)
11141
if ( $i_start == $ibeg );
11143
# tack length onto the container name to make unique
11144
$container_name[$depth] .= "-" . $len;
11148
elsif ( $tokens_to_go[$i] eq ')' ) {
11149
$depth-- if $depth > 0;
11152
# if we find a new synchronization token, we are done with
11154
if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {
11156
my $tok = my $raw_tok = $matching_token_to_go[$i];
11158
# make separators in different nesting depths unique
11159
# by appending the nesting depth digit.
11160
if ( $raw_tok ne '#' ) {
11161
$tok .= "$nesting_depth_to_go[$i]";
11164
# also decorate commas with any container name to avoid
11165
# unwanted cross-line alignments.
11166
if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {
11167
if ( $container_name[$depth] ) {
11168
$tok .= $container_name[$depth];
11172
# Patch to avoid aligning leading and trailing if, unless.
11173
# Mark trailing if, unless statements with container names.
11174
# This makes them different from leading if, unless which
11175
# are not so marked at present. If we ever need to name
11176
# them too, we could use ci to distinguish them.
11177
# Example problem to avoid:
11178
# return ( 2, "DBERROR" )
11179
# if ( $retval == 2 );
11180
# if ( scalar @_ ) {
11181
# my ( $a, $b, $c, $d, $e, $f ) = @_;
11183
if ( $raw_tok eq '(' ) {
11184
my $ci = $ci_levels_to_go[$ibeg];
11185
if ( $container_name[$depth] =~ /^\+(if|unless)/
11188
$tok .= $container_name[$depth];
11192
# Decorate block braces with block types to avoid
11193
# unwanted alignments such as the following:
11194
# foreach ( @{$routput_array} ) { $fh->print($_) }
11195
# eval { $fh->close() };
11196
if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {
11197
my $block_type = $block_type_to_go[$i];
11199
# map certain related block types to allow
11200
# else blocks to align
11201
$block_type = $block_type_map{$block_type}
11202
if ( defined( $block_type_map{$block_type} ) );
11204
# remove sub names to allow one-line sub braces to align
11205
# regardless of name
11206
if ( $block_type =~ /^sub / ) { $block_type = 'sub' }
11208
# allow all control-type blocks to align
11209
if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }
11211
$tok .= $block_type;
11214
# concatenate the text of the consecutive tokens to form
11217
join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );
11219
# store the alignment token for this field
11220
push( @tokens, $tok );
11222
# get ready for the next batch
11225
$patterns[$j] = "";
11228
# continue accumulating tokens
11229
# handle non-keywords..
11230
if ( $types_to_go[$i] ne 'k' ) {
11231
my $type = $types_to_go[$i];
11233
# Mark most things before arrows as a quote to
11234
# get them to line up. Testfile: mixed.pl.
11235
if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {
11236
my $next_type = $types_to_go[ $i + 1 ];
11237
my $i_next_nonblank =
11238
( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
11240
if ( $types_to_go[$i_next_nonblank] eq '=>' ) {
11243
# Patch to ignore leading minus before words,
11244
# by changing pattern 'mQ' into just 'Q',
11245
# so that we can align things like this:
11246
# Button => "Print letter \"~$_\"",
11247
# -command => [ sub { print "$_[0]\n" }, $_ ],
11248
if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }
11252
# patch to make numbers and quotes align
11253
if ( $type eq 'n' ) { $type = 'Q' }
11255
# patch to ignore any ! in patterns
11256
if ( $type eq '!' ) { $type = '' }
11258
$patterns[$j] .= $type;
11261
# for keywords we have to use the actual text
11264
my $tok = $tokens_to_go[$i];
11266
# but map certain keywords to a common string to allow
11268
$tok = $keyword_map{$tok}
11269
if ( defined( $keyword_map{$tok} ) );
11270
$patterns[$j] .= $tok;
11274
# done with this line .. join text of tokens to make the last field
11275
push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );
11276
return ( \@tokens, \@fields, \@patterns );
11279
} # end make_alignment_patterns
11281
{ # begin unmatched_indexes
11283
# closure to keep track of unbalanced containers.
11284
# arrays shared by the routines in this block:
11285
my @unmatched_opening_indexes_in_this_batch;
11286
my @unmatched_closing_indexes_in_this_batch;
11287
my %comma_arrow_count;
11289
sub is_unbalanced_batch {
11290
@unmatched_opening_indexes_in_this_batch +
11291
@unmatched_closing_indexes_in_this_batch;
11294
sub comma_arrow_count {
11296
return $comma_arrow_count{$seqno};
11299
sub match_opening_and_closing_tokens {
11301
# Match up indexes of opening and closing braces, etc, in this batch.
11302
# This has to be done after all tokens are stored because unstoring
11303
# of tokens would otherwise cause trouble.
11305
@unmatched_opening_indexes_in_this_batch = ();
11306
@unmatched_closing_indexes_in_this_batch = ();
11307
%comma_arrow_count = ();
11309
my ( $i, $i_mate, $token );
11310
foreach $i ( 0 .. $max_index_to_go ) {
11311
if ( $type_sequence_to_go[$i] ) {
11312
$token = $tokens_to_go[$i];
11313
if ( $token =~ /^[\(\[\{\?]$/ ) {
11314
push @unmatched_opening_indexes_in_this_batch, $i;
11316
elsif ( $token =~ /^[\)\]\}\:]$/ ) {
11318
$i_mate = pop @unmatched_opening_indexes_in_this_batch;
11319
if ( defined($i_mate) && $i_mate >= 0 ) {
11320
if ( $type_sequence_to_go[$i_mate] ==
11321
$type_sequence_to_go[$i] )
11323
$mate_index_to_go[$i] = $i_mate;
11324
$mate_index_to_go[$i_mate] = $i;
11327
push @unmatched_opening_indexes_in_this_batch,
11329
push @unmatched_closing_indexes_in_this_batch, $i;
11333
push @unmatched_closing_indexes_in_this_batch, $i;
11337
elsif ( $tokens_to_go[$i] eq '=>' ) {
11338
if (@unmatched_opening_indexes_in_this_batch) {
11339
my $j = $unmatched_opening_indexes_in_this_batch[-1];
11340
my $seqno = $type_sequence_to_go[$j];
11341
$comma_arrow_count{$seqno}++;
11347
sub save_opening_indentation {
11349
# This should be called after each batch of tokens is output. It
11350
# saves indentations of lines of all unmatched opening tokens.
11351
# These will be used by sub get_opening_indentation.
11353
my ( $ri_first, $ri_last, $rindentation_list ) = @_;
11355
# we no longer need indentations of any saved indentations which
11356
# are unmatched closing tokens in this batch, because we will
11357
# never encounter them again. So we can delete them to keep
11358
# the hash size down.
11359
foreach (@unmatched_closing_indexes_in_this_batch) {
11360
my $seqno = $type_sequence_to_go[$_];
11361
delete $saved_opening_indentation{$seqno};
11364
# we need to save indentations of any unmatched opening tokens
11365
# in this batch because we may need them in a subsequent batch.
11366
foreach (@unmatched_opening_indexes_in_this_batch) {
11367
my $seqno = $type_sequence_to_go[$_];
11368
$saved_opening_indentation{$seqno} = [
11369
lookup_opening_indentation(
11370
$_, $ri_first, $ri_last, $rindentation_list
11375
} # end unmatched_indexes
11377
sub get_opening_indentation {
11379
# get the indentation of the line which output the opening token
11380
# corresponding to a given closing token in the current output batch.
11383
# $i_closing - index in this line of a closing token ')' '}' or ']'
11385
# $ri_first - reference to list of the first index $i for each output
11386
# line in this batch
11387
# $ri_last - reference to list of the last index $i for each output line
11389
# $rindentation_list - reference to a list containing the indentation
11390
# used for each line.
11393
# -the indentation of the line which contained the opening token
11394
# which matches the token at index $i_opening
11395
# -and its offset (number of columns) from the start of the line
11397
my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;
11399
# first, see if the opening token is in the current batch
11400
my $i_opening = $mate_index_to_go[$i_closing];
11401
my ( $indent, $offset, $is_leading, $exists );
11403
if ( $i_opening >= 0 ) {
11405
# it is..look up the indentation
11406
( $indent, $offset, $is_leading ) =
11407
lookup_opening_indentation( $i_opening, $ri_first, $ri_last,
11408
$rindentation_list );
11411
# if not, it should have been stored in the hash by a previous batch
11413
my $seqno = $type_sequence_to_go[$i_closing];
11415
if ( $saved_opening_indentation{$seqno} ) {
11416
( $indent, $offset, $is_leading ) =
11417
@{ $saved_opening_indentation{$seqno} };
11420
# some kind of serious error
11421
# (example is badfile.t)
11430
# if no sequence number it must be an unbalanced container
11438
return ( $indent, $offset, $is_leading, $exists );
11441
sub lookup_opening_indentation {
11443
# get the indentation of the line in the current output batch
11444
# which output a selected opening token
11447
# $i_opening - index of an opening token in the current output batch
11448
# whose line indentation we need
11449
# $ri_first - reference to list of the first index $i for each output
11450
# line in this batch
11451
# $ri_last - reference to list of the last index $i for each output line
11453
# $rindentation_list - reference to a list containing the indentation
11454
# used for each line. (NOTE: the first slot in
11455
# this list is the last returned line number, and this is
11456
# followed by the list of indentations).
11459
# -the indentation of the line which contained token $i_opening
11460
# -and its offset (number of columns) from the start of the line
11462
my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;
11464
my $nline = $rindentation_list->[0]; # line number of previous lookup
11466
# reset line location if necessary
11467
$nline = 0 if ( $i_opening < $ri_start->[$nline] );
11469
# find the correct line
11470
unless ( $i_opening > $ri_last->[-1] ) {
11471
while ( $i_opening > $ri_last->[$nline] ) { $nline++; }
11474
# error - token index is out of bounds - shouldn't happen
11477
"non-fatal program bug in lookup_opening_indentation - index out of range\n"
11479
report_definite_bug();
11480
$nline = $#{$ri_last};
11483
$rindentation_list->[0] =
11484
$nline; # save line number to start looking next call
11485
my $ibeg = $ri_start->[$nline];
11486
my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;
11487
my $is_leading = ( $ibeg == $i_opening );
11488
return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );
11492
my %is_if_elsif_else_unless_while_until_for_foreach;
11496
# These block types may have text between the keyword and opening
11497
# curly. Note: 'else' does not, but must be included to allow trailing
11498
# if/elsif text to be appended.
11499
# patch for SWITCH/CASE: added 'case' and 'when'
11500
@_ = qw(if elsif else unless while until for foreach case when);
11501
@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);
11504
sub set_adjusted_indentation {
11506
# This routine has the final say regarding the actual indentation of
11507
# a line. It starts with the basic indentation which has been
11508
# defined for the leading token, and then takes into account any
11509
# options that the user has set regarding special indenting and
11512
my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,
11513
$rindentation_list )
11516
# we need to know the last token of this line
11517
my ( $terminal_type, $i_terminal ) =
11518
terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );
11520
my $is_outdented_line = 0;
11522
my $is_semicolon_terminated = $terminal_type eq ';'
11523
&& $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];
11525
##########################################################
11526
# Section 1: set a flag and a default indentation
11528
# Most lines are indented according to the initial token.
11529
# But it is common to outdent to the level just after the
11530
# terminal token in certain cases...
11531
# adjust_indentation flag:
11532
# 0 - do not adjust
11534
# 2 - vertically align with opening token
11536
##########################################################
11537
my $adjust_indentation = 0;
11538
my $default_adjust_indentation = $adjust_indentation;
11541
$opening_indentation, $opening_offset,
11542
$is_leading, $opening_exists
11545
# if we are at a closing token of some type..
11546
if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {
11548
# get the indentation of the line containing the corresponding
11551
$opening_indentation, $opening_offset,
11552
$is_leading, $opening_exists
11554
= get_opening_indentation( $ibeg, $ri_first, $ri_last,
11555
$rindentation_list );
11557
# First set the default behavior:
11558
# default behavior is to outdent closing lines
11559
# of the form: "); }; ]; )->xxx;"
11561
$is_semicolon_terminated
11563
# and 'cuddled parens' of the form: ")->pack("
11565
$terminal_type eq '('
11566
&& $types_to_go[$ibeg] eq ')'
11567
&& ( $nesting_depth_to_go[$iend] + 1 ==
11568
$nesting_depth_to_go[$ibeg] )
11572
$adjust_indentation = 1;
11575
# TESTING: outdent something like '),'
11577
$terminal_type eq ','
11579
# allow just one character before the comma
11580
&& $i_terminal == $ibeg + 1
11582
# requre LIST environment; otherwise, we may outdent too much --
11583
# this can happen in calls without parentheses (overload.t);
11584
&& $container_environment_to_go[$i_terminal] eq 'LIST'
11587
$adjust_indentation = 1;
11590
# undo continuation indentation of a terminal closing token if
11591
# it is the last token before a level decrease. This will allow
11592
# a closing token to line up with its opening counterpart, and
11593
# avoids a indentation jump larger than 1 level.
11594
if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/
11595
&& $i_terminal == $ibeg )
11597
my $ci = $ci_levels_to_go[$ibeg];
11598
my $lev = $levels_to_go[$ibeg];
11599
my $next_type = $types_to_go[ $ibeg + 1 ];
11600
my $i_next_nonblank =
11601
( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );
11602
if ( $i_next_nonblank <= $max_index_to_go
11603
&& $levels_to_go[$i_next_nonblank] < $lev )
11605
$adjust_indentation = 1;
11609
$default_adjust_indentation = $adjust_indentation;
11611
# Now modify default behavior according to user request:
11612
# handle option to indent non-blocks of the form ); }; ];
11613
# But don't do special indentation to something like ')->pack('
11614
if ( !$block_type_to_go[$ibeg] ) {
11615
my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };
11617
if ( $i_terminal <= $ibeg + 1
11618
|| $is_semicolon_terminated )
11620
$adjust_indentation = 2;
11623
$adjust_indentation = 0;
11626
elsif ( $cti == 2 ) {
11627
if ($is_semicolon_terminated) {
11628
$adjust_indentation = 3;
11631
$adjust_indentation = 0;
11634
elsif ( $cti == 3 ) {
11635
$adjust_indentation = 3;
11639
# handle option to indent blocks
11642
$rOpts->{'indent-closing-brace'}
11644
$i_terminal == $ibeg # isolated terminal '}'
11645
|| $is_semicolon_terminated
11649
$adjust_indentation = 3;
11654
# if at ');', '};', '>;', and '];' of a terminal qw quote
11655
elsif ($$rpatterns[0] =~ /^qb*;$/
11656
&& $$rfields[0] =~ /^([\)\}\]\>]);$/ )
11658
if ( $closing_token_indentation{$1} == 0 ) {
11659
$adjust_indentation = 1;
11662
$adjust_indentation = 3;
11666
# if line begins with a ':', align it with any
11667
# previous line leading with corresponding ?
11668
elsif ( $types_to_go[$ibeg] eq ':' ) {
11670
$opening_indentation, $opening_offset,
11671
$is_leading, $opening_exists
11673
= get_opening_indentation( $ibeg, $ri_first, $ri_last,
11674
$rindentation_list );
11675
if ($is_leading) { $adjust_indentation = 2; }
11678
##########################################################
11679
# Section 2: set indentation according to flag set above
11681
# Select the indentation object to define leading
11682
# whitespace. If we are outdenting something like '} } );'
11683
# then we want to use one level below the last token
11684
# ($i_terminal) in order to get it to fully outdent through
11686
##########################################################
11689
my $level_end = $levels_to_go[$iend];
11691
if ( $adjust_indentation == 0 ) {
11692
$indentation = $leading_spaces_to_go[$ibeg];
11693
$lev = $levels_to_go[$ibeg];
11695
elsif ( $adjust_indentation == 1 ) {
11696
$indentation = $reduced_spaces_to_go[$i_terminal];
11697
$lev = $levels_to_go[$i_terminal];
11700
# handle indented closing token which aligns with opening token
11701
elsif ( $adjust_indentation == 2 ) {
11703
# handle option to align closing token with opening token
11704
$lev = $levels_to_go[$ibeg];
11706
# calculate spaces needed to align with opening token
11708
get_SPACES($opening_indentation) + $opening_offset;
11710
# Indent less than the previous line.
11712
# Problem: For -lp we don't exactly know what it was if there
11713
# were recoverable spaces sent to the aligner. A good solution
11714
# would be to force a flush of the vertical alignment buffer, so
11715
# that we would know. For now, this rule is used for -lp:
11717
# When the last line did not start with a closing token we will
11718
# be optimistic that the aligner will recover everything wanted.
11720
# This rule will prevent us from breaking a hierarchy of closing
11721
# tokens, and in a worst case will leave a closing paren too far
11722
# indented, but this is better than frequently leaving it not
11724
my $last_spaces = get_SPACES($last_indentation_written);
11725
if ( $last_leading_token !~ /^[\}\]\)]$/ ) {
11727
get_RECOVERABLE_SPACES($last_indentation_written);
11730
# reset the indentation to the new space count if it works
11731
# only options are all or none: nothing in-between looks good
11732
$lev = $levels_to_go[$ibeg];
11733
if ( $space_count < $last_spaces ) {
11734
if ($rOpts_line_up_parentheses) {
11735
my $lev = $levels_to_go[$ibeg];
11737
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11740
$indentation = $space_count;
11744
# revert to default if it doesnt work
11746
$space_count = leading_spaces_to_go($ibeg);
11747
if ( $default_adjust_indentation == 0 ) {
11748
$indentation = $leading_spaces_to_go[$ibeg];
11750
elsif ( $default_adjust_indentation == 1 ) {
11751
$indentation = $reduced_spaces_to_go[$i_terminal];
11752
$lev = $levels_to_go[$i_terminal];
11757
# Full indentaion of closing tokens (-icb and -icp or -cti=2)
11760
# handle -icb (indented closing code block braces)
11761
# Updated method for indented block braces: indent one full level if
11762
# there is no continuation indentation. This will occur for major
11763
# structures such as sub, if, else, but not for things like map
11766
# Note: only code blocks without continuation indentation are
11767
# handled here (if, else, unless, ..). In the following snippet,
11768
# the terminal brace of the sort block will have continuation
11769
# indentation as shown so it will not be handled by the coding
11770
# here. We would have to undo the continuation indentation to do
11771
# this, but it probably looks ok as is. This is a possible future
11772
# update for semicolon terminated lines.
11774
# if ($sortby eq 'date' or $sortby eq 'size') {
11776
# $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}
11781
if ( $block_type_to_go[$ibeg]
11782
&& $ci_levels_to_go[$i_terminal] == 0 )
11784
my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );
11785
$indentation = $spaces + $rOpts_indent_columns;
11787
# NOTE: for -lp we could create a new indentation object, but
11788
# there is probably no need to do it
11791
# handle -icp and any -icb block braces which fall through above
11792
# test such as the 'sort' block mentioned above.
11795
# There are currently two ways to handle -icp...
11796
# One way is to use the indentation of the previous line:
11797
# $indentation = $last_indentation_written;
11799
# The other way is to use the indentation that the previous line
11800
# would have had if it hadn't been adjusted:
11801
$indentation = $last_unadjusted_indentation;
11803
# Current method: use the minimum of the two. This avoids
11804
# inconsistent indentation.
11805
if ( get_SPACES($last_indentation_written) <
11806
get_SPACES($indentation) )
11808
$indentation = $last_indentation_written;
11812
# use previous indentation but use own level
11813
# to cause list to be flushed properly
11814
$lev = $levels_to_go[$ibeg];
11817
# remember indentation except for multi-line quotes, which get
11819
unless ( $ibeg == 0 && $starting_in_quote ) {
11820
$last_indentation_written = $indentation;
11821
$last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];
11822
$last_leading_token = $tokens_to_go[$ibeg];
11825
# be sure lines with leading closing tokens are not outdented more
11826
# than the line which contained the corresponding opening token.
11828
#############################################################
11829
# updated per bug report in alex_bug.pl: we must not
11830
# mess with the indentation of closing logical braces so
11831
# we must treat something like '} else {' as if it were
11832
# an isolated brace my $is_isolated_block_brace = (
11833
# $iend == $ibeg ) && $block_type_to_go[$ibeg];
11834
#############################################################
11835
my $is_isolated_block_brace = $block_type_to_go[$ibeg]
11836
&& ( $iend == $ibeg
11837
|| $is_if_elsif_else_unless_while_until_for_foreach{
11838
$block_type_to_go[$ibeg] } );
11840
# only do this for a ':; which is aligned with its leading '?'
11841
my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;
11842
if ( defined($opening_indentation)
11843
&& !$is_isolated_block_brace
11844
&& !$is_unaligned_colon )
11846
if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {
11847
$indentation = $opening_indentation;
11851
# remember the indentation of each line of this batch
11852
push @{$rindentation_list}, $indentation;
11854
# outdent lines with certain leading tokens...
11857
# must be first word of this batch
11863
# certain leading keywords if requested
11865
$rOpts->{'outdent-keywords'}
11866
&& $types_to_go[$ibeg] eq 'k'
11867
&& $outdent_keyword{ $tokens_to_go[$ibeg] }
11870
# or labels if requested
11871
|| ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )
11873
# or static block comments if requested
11874
|| ( $types_to_go[$ibeg] eq '#'
11875
&& $rOpts->{'outdent-static-block-comments'}
11876
&& $is_static_block_comment )
11881
my $space_count = leading_spaces_to_go($ibeg);
11882
if ( $space_count > 0 ) {
11883
$space_count -= $rOpts_continuation_indentation;
11884
$is_outdented_line = 1;
11885
if ( $space_count < 0 ) { $space_count = 0 }
11887
# do not promote a spaced static block comment to non-spaced;
11888
# this is not normally necessary but could be for some
11889
# unusual user inputs (such as -ci = -i)
11890
if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {
11894
if ($rOpts_line_up_parentheses) {
11896
new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );
11899
$indentation = $space_count;
11904
return ( $indentation, $lev, $level_end, $terminal_type,
11905
$is_semicolon_terminated, $is_outdented_line );
11909
sub set_vertical_tightness_flags {
11911
my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;
11913
# Define vertical tightness controls for the nth line of a batch.
11914
# We create an array of parameters which tell the vertical aligner
11915
# if we should combine this line with the next line to achieve the
11916
# desired vertical tightness. The array of parameters contains:
11918
# [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace
11919
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
11920
# if closing: spaces of padding to use
11921
# [2] sequence number of container
11922
# [3] valid flag: do not append if this flag is false. Will be
11923
# true if appropriate -vt flag is set. Otherwise, Will be
11924
# made true only for 2 line container in parens with -lp
11926
# These flags are used by sub set_leading_whitespace in
11927
# the vertical aligner
11929
my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];
11931
# For non-BLOCK tokens, we will need to examine the next line
11932
# too, so we won't consider the last line.
11933
if ( $n < $n_last_line ) {
11935
# see if last token is an opening token...not a BLOCK...
11936
my $ibeg_next = $$ri_first[ $n + 1 ];
11937
my $token_end = $tokens_to_go[$iend];
11938
my $iend_next = $$ri_last[ $n + 1 ];
11940
$type_sequence_to_go[$iend]
11941
&& !$block_type_to_go[$iend]
11942
&& $is_opening_token{$token_end}
11944
$opening_vertical_tightness{$token_end} > 0
11946
# allow 2-line method call to be closed up
11947
|| ( $rOpts_line_up_parentheses
11948
&& $token_end eq '('
11950
&& $types_to_go[ $iend - 1 ] ne 'b' )
11955
# avoid multiple jumps in nesting depth in one line if
11957
my $ovt = $opening_vertical_tightness{$token_end};
11958
my $iend_next = $$ri_last[ $n + 1 ];
11961
&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=
11962
$nesting_depth_to_go[$ibeg_next] )
11966
# If -vt flag has not been set, mark this as invalid
11967
# and aligner will validate it if it sees the closing paren
11969
my $valid_flag = $ovt;
11970
@{$rvertical_tightness_flags} =
11971
( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );
11975
# see if first token of next line is a closing token...
11976
# ..and be sure this line does not have a side comment
11977
my $token_next = $tokens_to_go[$ibeg_next];
11978
if ( $type_sequence_to_go[$ibeg_next]
11979
&& !$block_type_to_go[$ibeg_next]
11980
&& $is_closing_token{$token_next}
11981
&& $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!
11983
my $ovt = $opening_vertical_tightness{$token_next};
11984
my $cvt = $closing_vertical_tightness{$token_next};
11987
# never append a trailing line like )->pack(
11988
# because it will throw off later alignment
11990
$nesting_depth_to_go[$ibeg_next] ==
11991
$nesting_depth_to_go[ $iend_next + 1 ] + 1
11996
$container_environment_to_go[$ibeg_next] ne 'LIST'
12000
# allow closing up 2-line method calls
12001
|| ( $rOpts_line_up_parentheses
12002
&& $token_next eq ')' )
12009
# decide which trailing closing tokens to append..
12011
if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }
12013
my $str = join( '',
12014
@types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );
12016
# append closing token if followed by comment or ';'
12017
if ( $str =~ /^b?[#;]/ ) { $ok = 1 }
12021
my $valid_flag = $cvt;
12022
@{$rvertical_tightness_flags} = (
12024
$tightness{$token_next} == 2 ? 0 : 1,
12025
$type_sequence_to_go[$ibeg_next], $valid_flag,
12031
# Opening Token Right
12032
# If requested, move an isolated trailing opening token to the end of
12033
# the previous line which ended in a comma. We could do this
12034
# in sub recombine_breakpoints but that would cause problems
12035
# with -lp formatting. The problem is that indentation will
12036
# quickly move far to the right in nested expressions. By
12037
# doing it after indentation has been set, we avoid changes
12038
# to the indentation. Actual movement of the token takes place
12039
# in sub write_leader_and_string.
12041
$opening_token_right{ $tokens_to_go[$ibeg_next] }
12043
# previous line is not opening
12044
# (use -sot to combine with it)
12045
&& !$is_opening_token{$token_end}
12047
# previous line ended in one of these
12048
# (add other cases if necessary; '=>' and '.' are not necessary
12049
##&& ($is_opening_token{$token_end} || $token_end eq ',')
12050
&& !$block_type_to_go[$ibeg_next]
12052
# this is a line with just an opening token
12053
&& ( $iend_next == $ibeg_next
12054
|| $iend_next == $ibeg_next + 2
12055
&& $types_to_go[$iend_next] eq '#' )
12057
# looks bad if we align vertically with the wrong container
12058
&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]
12061
my $valid_flag = 1;
12062
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12063
@{$rvertical_tightness_flags} =
12064
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );
12067
# Stacking of opening and closing tokens
12069
my $token_beg_next = $tokens_to_go[$ibeg_next];
12071
# patch to make something like 'qw(' behave like an opening paren
12073
if ( $types_to_go[$ibeg_next] eq 'q' ) {
12074
if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {
12075
$token_beg_next = $1;
12079
if ( $is_closing_token{$token_end}
12080
&& $is_closing_token{$token_beg_next} )
12082
$stackable = $stack_closing_token{$token_beg_next}
12083
unless ( $block_type_to_go[$ibeg_next] )
12084
; # shouldn't happen; just checking
12086
elsif ($is_opening_token{$token_end}
12087
&& $is_opening_token{$token_beg_next} )
12089
$stackable = $stack_opening_token{$token_beg_next}
12090
unless ( $block_type_to_go[$ibeg_next] )
12091
; # shouldn't happen; just checking
12096
my $is_semicolon_terminated;
12097
if ( $n + 1 == $n_last_line ) {
12098
my ( $terminal_type, $i_terminal ) = terminal_type(
12099
\@types_to_go, \@block_type_to_go,
12100
$ibeg_next, $iend_next
12102
$is_semicolon_terminated = $terminal_type eq ';'
12103
&& $nesting_depth_to_go[$iend_next] <
12104
$nesting_depth_to_go[$ibeg_next];
12107
# this must be a line with just an opening token
12108
# or end in a semicolon
12110
$is_semicolon_terminated
12111
|| ( $iend_next == $ibeg_next
12112
|| $iend_next == $ibeg_next + 2
12113
&& $types_to_go[$iend_next] eq '#' )
12116
my $valid_flag = 1;
12117
my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;
12118
@{$rvertical_tightness_flags} =
12119
( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,
12125
# Check for a last line with isolated opening BLOCK curly
12126
elsif ($rOpts_block_brace_vertical_tightness
12128
&& $types_to_go[$iend] eq '{'
12129
&& $block_type_to_go[$iend] =~
12130
/$block_brace_vertical_tightness_pattern/o )
12132
@{$rvertical_tightness_flags} =
12133
( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );
12136
# pack in the sequence numbers of the ends of this line
12137
$rvertical_tightness_flags->[4] = get_seqno($ibeg);
12138
$rvertical_tightness_flags->[5] = get_seqno($iend);
12139
return $rvertical_tightness_flags;
12144
# get opening and closing sequence numbers of a token for the vertical
12145
# aligner. Assign qw quotes a value to allow qw opening and closing tokens
12146
# to be treated somewhat like opening and closing tokens for stacking
12147
# tokens by the vertical aligner.
12149
my $seqno = $type_sequence_to_go[$ii];
12150
if ( $types_to_go[$ii] eq 'q' ) {
12153
$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );
12156
if ( !$ending_in_quote ) {
12157
$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );
12165
my %is_vertical_alignment_type;
12166
my %is_vertical_alignment_keyword;
12171
= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
12172
{ ? : => =~ && || // ~~ !~~
12174
@is_vertical_alignment_type{@_} = (1) x scalar(@_);
12176
@_ = qw(if unless and or err eq ne for foreach while until);
12177
@is_vertical_alignment_keyword{@_} = (1) x scalar(@_);
12180
sub set_vertical_alignment_markers {
12182
# This routine takes the first step toward vertical alignment of the
12183
# lines of output text. It looks for certain tokens which can serve as
12184
# vertical alignment markers (such as an '=').
12186
# Method: We look at each token $i in this output batch and set
12187
# $matching_token_to_go[$i] equal to those tokens at which we would
12188
# accept vertical alignment.
12190
# nothing to do if we aren't allowed to change whitespace
12191
if ( !$rOpts_add_whitespace ) {
12192
for my $i ( 0 .. $max_index_to_go ) {
12193
$matching_token_to_go[$i] = '';
12198
my ( $ri_first, $ri_last ) = @_;
12200
# remember the index of last nonblank token before any sidecomment
12201
my $i_terminal = $max_index_to_go;
12202
if ( $types_to_go[$i_terminal] eq '#' ) {
12203
if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {
12204
if ( $i_terminal > 0 ) { --$i_terminal }
12208
# look at each line of this batch..
12209
my $last_vertical_alignment_before_index;
12210
my $vert_last_nonblank_type;
12211
my $vert_last_nonblank_token;
12212
my $vert_last_nonblank_block_type;
12213
my $max_line = @$ri_first - 1;
12214
my ( $i, $type, $token, $block_type, $alignment_type );
12215
my ( $ibeg, $iend, $line );
12217
foreach $line ( 0 .. $max_line ) {
12218
$ibeg = $$ri_first[$line];
12219
$iend = $$ri_last[$line];
12220
$last_vertical_alignment_before_index = -1;
12221
$vert_last_nonblank_type = '';
12222
$vert_last_nonblank_token = '';
12223
$vert_last_nonblank_block_type = '';
12225
# look at each token in this output line..
12226
foreach $i ( $ibeg .. $iend ) {
12227
$alignment_type = '';
12228
$type = $types_to_go[$i];
12229
$block_type = $block_type_to_go[$i];
12230
$token = $tokens_to_go[$i];
12232
# check for flag indicating that we should not align
12234
if ( $matching_token_to_go[$i] ) {
12235
$matching_token_to_go[$i] = '';
12239
#--------------------------------------------------------
12240
# First see if we want to align BEFORE this token
12241
#--------------------------------------------------------
12243
# The first possible token that we can align before
12244
# is index 2 because: 1) it doesn't normally make sense to
12245
# align before the first token and 2) the second
12246
# token must be a blank if we are to align before
12248
if ( $i < $ibeg + 2 ) { }
12250
# must follow a blank token
12251
elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }
12253
# align a side comment --
12254
elsif ( $type eq '#' ) {
12258
# it is a static side comment
12260
$rOpts->{'static-side-comments'}
12261
&& $token =~ /$static_side_comment_pattern/o
12264
# or a closing side comment
12265
|| ( $vert_last_nonblank_block_type
12267
/$closing_side_comment_prefix_pattern/o )
12270
$alignment_type = $type;
12271
} ## Example of a static side comment
12274
# otherwise, do not align two in a row to create a
12276
elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }
12278
# align before one of these keywords
12279
# (within a line, since $i>1)
12280
elsif ( $type eq 'k' ) {
12282
# /^(if|unless|and|or|eq|ne)$/
12283
if ( $is_vertical_alignment_keyword{$token} ) {
12284
$alignment_type = $token;
12288
# align before one of these types..
12289
# Note: add '.' after new vertical aligner is operational
12290
elsif ( $is_vertical_alignment_type{$type} ) {
12291
$alignment_type = $token;
12293
# Do not align a terminal token. Although it might
12294
# occasionally look ok to do this, it has been found to be
12295
# a good general rule. The main problems are:
12296
# (1) that the terminal token (such as an = or :) might get
12297
# moved far to the right where it is hard to see because
12298
# nothing follows it, and
12299
# (2) doing so may prevent other good alignments.
12300
if ( $i == $iend || $i >= $i_terminal ) {
12301
$alignment_type = "";
12304
# Do not align leading ': (' or '. ('. This would prevent
12305
# alignment in something like the following:
12307
# ( $input_line_number < 10 ) ? " "
12308
# : ( $input_line_number < 100 ) ? " "
12312
# ( $case_matters ? $accessor : " lc($accessor) " )
12313
# . ( $yesno ? " eq " : " ne " )
12314
if ( $i == $ibeg + 2
12315
&& $types_to_go[$ibeg] =~ /^[\.\:]$/
12316
&& $types_to_go[ $i - 1 ] eq 'b' )
12318
$alignment_type = "";
12321
# For a paren after keyword, only align something like this:
12323
# elsif ( $b ) { &b }
12324
if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {
12325
$alignment_type = ""
12326
unless $vert_last_nonblank_token =~
12327
/^(if|unless|elsif)$/;
12330
# be sure the alignment tokens are unique
12331
# This didn't work well: reason not determined
12332
# if ($token ne $type) {$alignment_type .= $type}
12335
# NOTE: This is deactivated because it causes the previous
12336
# if/elsif alignment to fail
12337
#elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])
12338
#{ $alignment_type = $type; }
12340
if ($alignment_type) {
12341
$last_vertical_alignment_before_index = $i;
12344
#--------------------------------------------------------
12345
# Next see if we want to align AFTER the previous nonblank
12346
#--------------------------------------------------------
12348
# We want to line up ',' and interior ';' tokens, with the added
12349
# space AFTER these tokens. (Note: interior ';' is included
12350
# because it may occur in short blocks).
12353
# we haven't already set it
12356
# and its not the first token of the line
12359
# and it follows a blank
12360
&& $types_to_go[ $i - 1 ] eq 'b'
12362
# and previous token IS one of these:
12363
&& ( $vert_last_nonblank_type =~ /^[\,\;]$/ )
12365
# and it's NOT one of these
12366
&& ( $type !~ /^[b\#\)\]\}]$/ )
12368
# then go ahead and align
12372
$alignment_type = $vert_last_nonblank_type;
12375
#--------------------------------------------------------
12376
# then store the value
12377
#--------------------------------------------------------
12378
$matching_token_to_go[$i] = $alignment_type;
12379
if ( $type ne 'b' ) {
12380
$vert_last_nonblank_type = $type;
12381
$vert_last_nonblank_token = $token;
12382
$vert_last_nonblank_block_type = $block_type;
12389
sub terminal_type {
12391
# returns type of last token on this line (terminal token), as follows:
12392
# returns # for a full-line comment
12393
# returns ' ' for a blank line
12394
# otherwise returns final token type
12396
my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;
12398
# check for full-line comment..
12399
if ( $$rtype[$ibeg] eq '#' ) {
12400
return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];
12404
# start at end and walk bakwards..
12405
for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {
12407
# skip past any side comment and blanks
12408
next if ( $$rtype[$i] eq 'b' );
12409
next if ( $$rtype[$i] eq '#' );
12411
# found it..make sure it is a BLOCK termination,
12412
# but hide a terminal } after sort/grep/map because it is not
12413
# necessarily the end of the line. (terminal.t)
12414
my $terminal_type = $$rtype[$i];
12416
$terminal_type eq '}'
12417
&& ( !$$rblock_type[$i]
12418
|| ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )
12421
$terminal_type = 'b';
12423
return wantarray ? ( $terminal_type, $i ) : $terminal_type;
12427
return wantarray ? ( ' ', $ibeg ) : ' ';
12432
my %is_good_keyword_breakpoint;
12433
my %is_lt_gt_le_ge;
12435
sub set_bond_strengths {
12439
@_ = qw(if unless while until for foreach);
12440
@is_good_keyword_breakpoint{@_} = (1) x scalar(@_);
12442
@_ = qw(lt gt le ge);
12443
@is_lt_gt_le_ge{@_} = (1) x scalar(@_);
12445
###############################################################
12446
# NOTE: NO_BREAK's set here are HINTS which may not be honored;
12447
# essential NO_BREAKS's must be enforced in section 2, below.
12448
###############################################################
12450
# adding NEW_TOKENS: add a left and right bond strength by
12451
# mimmicking what is done for an existing token type. You
12452
# can skip this step at first and take the default, then
12453
# tweak later to get desired results.
12455
# The bond strengths should roughly follow precenence order where
12456
# possible. If you make changes, please check the results very
12457
# carefully on a variety of scripts.
12459
# no break around possible filehandle
12460
$left_bond_strength{'Z'} = NO_BREAK;
12461
$right_bond_strength{'Z'} = NO_BREAK;
12463
# never put a bare word on a new line:
12464
# example print (STDERR, "bla"); will fail with break after (
12465
$left_bond_strength{'w'} = NO_BREAK;
12467
# blanks always have infinite strength to force breaks after real tokens
12468
$right_bond_strength{'b'} = NO_BREAK;
12470
# try not to break on exponentation
12471
@_ = qw" ** .. ... <=> ";
12472
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12473
@right_bond_strength{@_} = (STRONG) x scalar(@_);
12475
# The comma-arrow has very low precedence but not a good break point
12476
$left_bond_strength{'=>'} = NO_BREAK;
12477
$right_bond_strength{'=>'} = NOMINAL;
12479
# ok to break after label
12480
$left_bond_strength{'J'} = NO_BREAK;
12481
$right_bond_strength{'J'} = NOMINAL;
12482
$left_bond_strength{'j'} = STRONG;
12483
$right_bond_strength{'j'} = STRONG;
12484
$left_bond_strength{'A'} = STRONG;
12485
$right_bond_strength{'A'} = STRONG;
12487
$left_bond_strength{'->'} = STRONG;
12488
$right_bond_strength{'->'} = VERY_STRONG;
12490
# breaking AFTER modulus operator is ok:
12492
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12493
@right_bond_strength{@_} =
12494
( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);
12496
# Break AFTER math operators * and /
12498
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12499
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12501
# Break AFTER weakest math operators + and -
12502
# Make them weaker than * but a bit stronger than '.'
12504
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12505
@right_bond_strength{@_} =
12506
( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);
12508
# breaking BEFORE these is just ok:
12510
@right_bond_strength{@_} = (STRONG) x scalar(@_);
12511
@left_bond_strength{@_} = (NOMINAL) x scalar(@_);
12513
# breaking before the string concatenation operator seems best
12514
# because it can be hard to see at the end of a line
12515
$right_bond_strength{'.'} = STRONG;
12516
$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;
12519
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12520
@right_bond_strength{@_} = (NOMINAL) x scalar(@_);
12522
# make these a little weaker than nominal so that they get
12523
# favored for end-of-line characters
12524
@_ = qw"!= == =~ !~ ~~ !~~";
12525
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12526
@right_bond_strength{@_} =
12527
( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);
12529
# break AFTER these
12530
@_ = qw" < > | & >= <=";
12531
@left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);
12532
@right_bond_strength{@_} =
12533
( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);
12535
# breaking either before or after a quote is ok
12536
# but bias for breaking before a quote
12537
$left_bond_strength{'Q'} = NOMINAL;
12538
$right_bond_strength{'Q'} = NOMINAL + 0.02;
12539
$left_bond_strength{'q'} = NOMINAL;
12540
$right_bond_strength{'q'} = NOMINAL;
12542
# starting a line with a keyword is usually ok
12543
$left_bond_strength{'k'} = NOMINAL;
12545
# we usually want to bond a keyword strongly to what immediately
12546
# follows, rather than leaving it stranded at the end of a line
12547
$right_bond_strength{'k'} = STRONG;
12549
$left_bond_strength{'G'} = NOMINAL;
12550
$right_bond_strength{'G'} = STRONG;
12552
# it is good to break AFTER various assignment operators
12554
= **= += *= &= <<= &&=
12555
-= /= |= >>= ||= //=
12559
@left_bond_strength{@_} = (STRONG) x scalar(@_);
12560
@right_bond_strength{@_} =
12561
( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);
12563
# break BEFORE '&&' and '||' and '//'
12564
# set strength of '||' to same as '=' so that chains like
12565
# $a = $b || $c || $d will break before the first '||'
12566
$right_bond_strength{'||'} = NOMINAL;
12567
$left_bond_strength{'||'} = $right_bond_strength{'='};
12569
# same thing for '//'
12570
$right_bond_strength{'//'} = NOMINAL;
12571
$left_bond_strength{'//'} = $right_bond_strength{'='};
12573
# set strength of && a little higher than ||
12574
$right_bond_strength{'&&'} = NOMINAL;
12575
$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;
12577
$left_bond_strength{';'} = VERY_STRONG;
12578
$right_bond_strength{';'} = VERY_WEAK;
12579
$left_bond_strength{'f'} = VERY_STRONG;
12581
# make right strength of for ';' a little less than '='
12582
# to make for contents break after the ';' to avoid this:
12583
# for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
12584
# $number_of_fields )
12585
# and make it weaker than ',' and 'and' too
12586
$right_bond_strength{'f'} = VERY_WEAK - 0.03;
12588
# The strengths of ?/: should be somewhere between
12589
# an '=' and a quote (NOMINAL),
12590
# make strength of ':' slightly less than '?' to help
12591
# break long chains of ? : after the colons
12592
$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;
12593
$right_bond_strength{':'} = NO_BREAK;
12594
$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;
12595
$right_bond_strength{'?'} = NO_BREAK;
12597
$left_bond_strength{','} = VERY_STRONG;
12598
$right_bond_strength{','} = VERY_WEAK;
12600
# Set bond strengths of certain keywords
12601
# make 'or', 'err', 'and' slightly weaker than a ','
12602
$left_bond_strength{'and'} = VERY_WEAK - 0.01;
12603
$left_bond_strength{'or'} = VERY_WEAK - 0.02;
12604
$left_bond_strength{'err'} = VERY_WEAK - 0.02;
12605
$left_bond_strength{'xor'} = NOMINAL;
12606
$right_bond_strength{'and'} = NOMINAL;
12607
$right_bond_strength{'or'} = NOMINAL;
12608
$right_bond_strength{'err'} = NOMINAL;
12609
$right_bond_strength{'xor'} = STRONG;
12612
# patch-its always ok to break at end of line
12613
$nobreak_to_go[$max_index_to_go] = 0;
12615
# adding a small 'bias' to strengths is a simple way to make a line
12616
# break at the first of a sequence of identical terms. For example,
12617
# to force long string of conditional operators to break with
12618
# each line ending in a ':', we can add a small number to the bond
12619
# strength of each ':'
12620
my $colon_bias = 0;
12627
my $code_bias = -.01;
12631
my $last_nonblank_type = $type;
12632
my $last_nonblank_token = $token;
12633
my $delta_bias = 0.0001;
12634
my $list_str = $left_bond_strength{'?'};
12636
my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
12637
$next_nonblank_type, $next_token, $next_type, $total_nesting_depth,
12640
# preliminary loop to compute bond strengths
12641
for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {
12642
$last_type = $type;
12643
if ( $type ne 'b' ) {
12644
$last_nonblank_type = $type;
12645
$last_nonblank_token = $token;
12647
$type = $types_to_go[$i];
12649
# strength on both sides of a blank is the same
12650
if ( $type eq 'b' && $last_type ne 'b' ) {
12651
$bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
12655
$token = $tokens_to_go[$i];
12656
$block_type = $block_type_to_go[$i];
12658
$next_type = $types_to_go[$i_next];
12659
$next_token = $tokens_to_go[$i_next];
12660
$total_nesting_depth = $nesting_depth_to_go[$i_next];
12661
$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
12662
$next_nonblank_type = $types_to_go[$i_next_nonblank];
12663
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
12665
# Some token chemistry... The decision about where to break a
12666
# line depends upon a "bond strength" between tokens. The LOWER
12667
# the bond strength, the MORE likely a break. The strength
12668
# values are based on trial-and-error, and need to be tweaked
12669
# occasionally to get desired results. Things to keep in mind
12671
# 1. relative strengths are important. small differences
12672
# in strengths can make big formatting differences.
12673
# 2. each indentation level adds one unit of bond strength
12674
# 3. a value of NO_BREAK makes an unbreakable bond
12675
# 4. a value of VERY_WEAK is the strength of a ','
12676
# 5. values below NOMINAL are considered ok break points
12677
# 6. values above NOMINAL are considered poor break points
12678
# We are computing the strength of the bond between the current
12679
# token and the NEXT token.
12680
my $bond_str = VERY_STRONG; # a default, high strength
12682
#---------------------------------------------------------------
12684
# use minimum of left and right bond strengths if defined;
12685
# digraphs and trigraphs like to break on their left
12686
#---------------------------------------------------------------
12687
my $bsr = $right_bond_strength{$type};
12689
if ( !defined($bsr) ) {
12691
if ( $is_digraph{$type} || $is_trigraph{$type} ) {
12695
$bsr = VERY_STRONG;
12699
# define right bond strengths of certain keywords
12700
if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
12701
$bsr = $right_bond_strength{$token};
12703
elsif ( $token eq 'ne' or $token eq 'eq' ) {
12706
my $bsl = $left_bond_strength{$next_nonblank_type};
12708
# set terminal bond strength to the nominal value
12709
# this will cause good preceding breaks to be retained
12710
if ( $i_next_nonblank > $max_index_to_go ) {
12714
if ( !defined($bsl) ) {
12716
if ( $is_digraph{$next_nonblank_type}
12717
|| $is_trigraph{$next_nonblank_type} )
12722
$bsl = VERY_STRONG;
12726
# define right bond strengths of certain keywords
12727
if ( $next_nonblank_type eq 'k'
12728
&& defined( $left_bond_strength{$next_nonblank_token} ) )
12730
$bsl = $left_bond_strength{$next_nonblank_token};
12732
elsif ($next_nonblank_token eq 'ne'
12733
or $next_nonblank_token eq 'eq' )
12737
elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
12738
$bsl = 0.9 * NOMINAL + 0.1 * STRONG;
12741
# Note: it might seem that we would want to keep a NO_BREAK if
12742
# either token has this value. This didn't work, because in an
12743
# arrow list, it prevents the comma from separating from the
12744
# following bare word (which is probably quoted by its arrow).
12745
# So necessary NO_BREAK's have to be handled as special cases
12746
# in the final section.
12747
$bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;
12748
my $bond_str_1 = $bond_str;
12750
#---------------------------------------------------------------
12753
#---------------------------------------------------------------
12755
# allow long lines before final { in an if statement, as in:
12760
# Otherwise, the line before the { tends to be too short.
12761
if ( $type eq ')' ) {
12762
if ( $next_nonblank_type eq '{' ) {
12763
$bond_str = VERY_WEAK + 0.03;
12767
elsif ( $type eq '(' ) {
12768
if ( $next_nonblank_type eq '{' ) {
12769
$bond_str = NOMINAL;
12773
# break on something like '} (', but keep this stronger than a ','
12774
# example is in 'howe.pl'
12775
elsif ( $type eq 'R' or $type eq '}' ) {
12776
if ( $next_nonblank_type eq '(' ) {
12777
$bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;
12781
#-----------------------------------------------------------------
12782
# adjust bond strength bias
12783
#-----------------------------------------------------------------
12785
# TESTING: add any bias set by sub scan_list at old comma
12787
elsif ( $type eq ',' ) {
12788
$bond_str += $bond_strength_to_go[$i];
12791
elsif ( $type eq 'f' ) {
12792
$bond_str += $f_bias;
12793
$f_bias += $delta_bias;
12796
# in long ?: conditionals, bias toward just one set per line (colon.t)
12797
elsif ( $type eq ':' ) {
12798
if ( !$want_break_before{$type} ) {
12799
$bond_str += $colon_bias;
12800
$colon_bias += $delta_bias;
12804
if ( $next_nonblank_type eq ':'
12805
&& $want_break_before{$next_nonblank_type} )
12807
$bond_str += $colon_bias;
12808
$colon_bias += $delta_bias;
12811
# if leading '.' is used, align all but 'short' quotes;
12812
# the idea is to not place something like "\n" on a single line.
12813
elsif ( $next_nonblank_type eq '.' ) {
12814
if ( $want_break_before{'.'} ) {
12816
$last_nonblank_type eq '.'
12819
$rOpts_short_concatenation_item_length )
12820
&& ( $token !~ /^[\)\]\}]$/ )
12823
$dot_bias += $delta_bias;
12825
$bond_str += $dot_bias;
12828
elsif ($next_nonblank_type eq '&&'
12829
&& $want_break_before{$next_nonblank_type} )
12831
$bond_str += $amp_bias;
12832
$amp_bias += $delta_bias;
12834
elsif ($next_nonblank_type eq '||'
12835
&& $want_break_before{$next_nonblank_type} )
12837
$bond_str += $bar_bias;
12838
$bar_bias += $delta_bias;
12840
elsif ( $next_nonblank_type eq 'k' ) {
12842
if ( $next_nonblank_token eq 'and'
12843
&& $want_break_before{$next_nonblank_token} )
12845
$bond_str += $and_bias;
12846
$and_bias += $delta_bias;
12848
elsif ($next_nonblank_token =~ /^(or|err)$/
12849
&& $want_break_before{$next_nonblank_token} )
12851
$bond_str += $or_bias;
12852
$or_bias += $delta_bias;
12855
# FIXME: needs more testing
12856
elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {
12857
$bond_str = $list_str if ( $bond_str > $list_str );
12859
elsif ( $token eq 'err'
12860
&& !$want_break_before{$token} )
12862
$bond_str += $or_bias;
12863
$or_bias += $delta_bias;
12868
&& !$want_break_before{$type} )
12870
$bond_str += $colon_bias;
12871
$colon_bias += $delta_bias;
12873
elsif ( $type eq '&&'
12874
&& !$want_break_before{$type} )
12876
$bond_str += $amp_bias;
12877
$amp_bias += $delta_bias;
12879
elsif ( $type eq '||'
12880
&& !$want_break_before{$type} )
12882
$bond_str += $bar_bias;
12883
$bar_bias += $delta_bias;
12885
elsif ( $type eq 'k' ) {
12887
if ( $token eq 'and'
12888
&& !$want_break_before{$token} )
12890
$bond_str += $and_bias;
12891
$and_bias += $delta_bias;
12893
elsif ( $token eq 'or'
12894
&& !$want_break_before{$token} )
12896
$bond_str += $or_bias;
12897
$or_bias += $delta_bias;
12901
# keep matrix and hash indices together
12902
# but make them a little below STRONG to allow breaking open
12903
# something like {'some-word'}{'some-very-long-word'} at the }{
12905
if ( ( $type eq ']' or $type eq 'R' )
12906
&& ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )
12909
$bond_str = 0.9 * STRONG + 0.1 * NOMINAL;
12912
if ( $next_nonblank_token =~ /^->/ ) {
12914
# increase strength to the point where a break in the following
12915
# will be after the opening paren rather than at the arrow:
12917
if ( $type eq 'i' ) {
12918
$bond_str = 1.45 * STRONG;
12921
elsif ( $type =~ /^[\)\]\}R]$/ ) {
12922
$bond_str = 0.1 * STRONG + 0.9 * NOMINAL;
12925
# otherwise make strength before an '->' a little over a '+'
12927
if ( $bond_str <= NOMINAL ) {
12928
$bond_str = NOMINAL + 0.01;
12933
if ( $token eq ')' && $next_nonblank_token eq '[' ) {
12934
$bond_str = 0.2 * STRONG + 0.8 * NOMINAL;
12937
# map1.t -- correct for a quirk in perl
12939
&& $next_nonblank_type eq 'i'
12940
&& $last_nonblank_type eq 'k'
12941
&& $is_sort_map_grep{$last_nonblank_token} )
12943
# /^(sort|map|grep)$/ )
12945
$bond_str = NO_BREAK;
12948
# extrude.t: do not break before paren at:
12950
if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
12951
$bond_str = NO_BREAK;
12954
# good to break after end of code blocks
12955
if ( $type eq '}' && $block_type ) {
12957
$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
12958
$code_bias += $delta_bias;
12961
if ( $type eq 'k' ) {
12963
# allow certain control keywords to stand out
12964
if ( $next_nonblank_type eq 'k'
12965
&& $is_last_next_redo_return{$token} )
12967
$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
12970
# Don't break after keyword my. This is a quick fix for a
12971
# rare problem with perl. An example is this line from file
12973
# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )
12975
if ( $token eq 'my' ) {
12976
$bond_str = NO_BREAK;
12981
# good to break before 'if', 'unless', etc
12982
if ( $is_if_brace_follower{$next_nonblank_token} ) {
12983
$bond_str = VERY_WEAK;
12986
if ( $next_nonblank_type eq 'k' ) {
12988
# keywords like 'unless', 'if', etc, within statements
12990
if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
12991
$bond_str = VERY_WEAK / 1.05;
12995
# try not to break before a comma-arrow
12996
elsif ( $next_nonblank_type eq '=>' ) {
12997
if ( $bond_str < STRONG ) { $bond_str = STRONG }
13000
#----------------------------------------------------------------------
13001
# only set NO_BREAK's from here on
13002
#----------------------------------------------------------------------
13003
if ( $type eq 'C' or $type eq 'U' ) {
13005
# use strict requires that bare word and => not be separated
13006
if ( $next_nonblank_type eq '=>' ) {
13007
$bond_str = NO_BREAK;
13010
# Never break between a bareword and a following paren because
13011
# perl may give an error. For example, if a break is placed
13012
# between 'to_filehandle' and its '(' the following line will
13013
# give a syntax error [Carp.pm]: my( $no) =fileno(
13014
# to_filehandle( $in)) ;
13015
if ( $next_nonblank_token eq '(' ) {
13016
$bond_str = NO_BREAK;
13020
# use strict requires that bare word within braces not start new line
13021
elsif ( $type eq 'L' ) {
13023
if ( $next_nonblank_type eq 'w' ) {
13024
$bond_str = NO_BREAK;
13028
# in older version of perl, use strict can cause problems with
13029
# breaks before bare words following opening parens. For example,
13030
# this will fail under older versions if a break is made between
13033
# open( MAIL, "a long filename or command");
13035
elsif ( $type eq '{' ) {
13037
if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
13039
# but it's fine to break if the word is followed by a '=>'
13040
# or if it is obviously a sub call
13041
my $i_next_next_nonblank = $i_next_nonblank + 1;
13042
my $next_next_type = $types_to_go[$i_next_next_nonblank];
13043
if ( $next_next_type eq 'b'
13044
&& $i_next_nonblank < $max_index_to_go )
13046
$i_next_next_nonblank++;
13047
$next_next_type = $types_to_go[$i_next_next_nonblank];
13050
##if ( $next_next_type ne '=>' ) {
13051
# these are ok: '->xxx', '=>', '('
13053
# We'll check for an old breakpoint and keep a leading
13054
# bareword if it was that way in the input file.
13055
# Presumably it was ok that way. For example, the
13056
# following would remain unchanged:
13059
# January, February, March, April,
13060
# May, June, July, August,
13061
# September, October, November, December,
13064
# This should be sufficient:
13065
if ( !$old_breakpoint_to_go[$i]
13066
&& ( $next_next_type eq ',' || $next_next_type eq '}' )
13069
$bond_str = NO_BREAK;
13074
elsif ( $type eq 'w' ) {
13076
if ( $next_nonblank_type eq 'R' ) {
13077
$bond_str = NO_BREAK;
13080
# use strict requires that bare word and => not be separated
13081
if ( $next_nonblank_type eq '=>' ) {
13082
$bond_str = NO_BREAK;
13086
# in fact, use strict hates bare words on any new line. For
13087
# example, a break before the underscore here provokes the
13088
# wrath of use strict:
13089
# if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
13090
elsif ( $type eq 'F' ) {
13091
$bond_str = NO_BREAK;
13094
# use strict does not allow separating type info from trailing { }
13095
# testfile is readmail.pl
13096
elsif ( $type eq 't' or $type eq 'i' ) {
13098
if ( $next_nonblank_type eq 'L' ) {
13099
$bond_str = NO_BREAK;
13103
# Do not break between a possible filehandle and a ? or / and do
13104
# not introduce a break after it if there is no blank
13106
elsif ( $type eq 'Z' ) {
13111
# if there is no blank and we do not want one. Examples:
13112
# print $x++ # do not break after $x
13113
# print HTML"HELLO" # break ok after HTML
13116
&& defined( $want_left_space{$next_type} )
13117
&& $want_left_space{$next_type} == WS_NO
13120
# or we might be followed by the start of a quote
13121
|| $next_nonblank_type =~ /^[\/\?]$/
13124
$bond_str = NO_BREAK;
13128
# Do not break before a possible file handle
13129
if ( $next_nonblank_type eq 'Z' ) {
13130
$bond_str = NO_BREAK;
13133
# As a defensive measure, do not break between a '(' and a
13134
# filehandle. In some cases, this can cause an error. For
13135
# example, the following program works:
13142
# But this program fails:
13150
# This is normally only a problem with the 'extrude' option
13151
if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {
13152
$bond_str = NO_BREAK;
13155
# Breaking before a ++ can cause perl to guess wrong. For
13156
# example the following line will cause a syntax error
13157
# with -extrude if we break between '$i' and '++' [fixstyle2]
13158
# print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
13159
elsif ( $next_nonblank_type eq '++' ) {
13160
$bond_str = NO_BREAK;
13163
# Breaking before a ? before a quote can cause trouble if
13164
# they are not separated by a blank.
13165
# Example: a syntax error occurs if you break before the ? here
13166
# my$logic=join$all?' && ':' || ',@regexps;
13167
# From: Professional_Perl_Programming_Code/multifind.pl
13168
elsif ( $next_nonblank_type eq '?' ) {
13169
$bond_str = NO_BREAK
13170
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
13173
# Breaking before a . followed by a number
13174
# can cause trouble if there is no intervening space
13175
# Example: a syntax error occurs if you break before the .2 here
13176
# $str .= pack($endian.2, ensurrogate($ord));
13177
# From: perl58/Unicode.pm
13178
elsif ( $next_nonblank_type eq '.' ) {
13179
$bond_str = NO_BREAK
13180
if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
13183
# patch to put cuddled elses back together when on multiple
13184
# lines, as in: } \n else \n { \n
13185
if ($rOpts_cuddled_else) {
13187
if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )
13188
|| ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )
13190
$bond_str = NO_BREAK;
13194
# keep '}' together with ';'
13195
if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {
13196
$bond_str = NO_BREAK;
13199
# never break between sub name and opening paren
13200
if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {
13201
$bond_str = NO_BREAK;
13204
#---------------------------------------------------------------
13206
# now take nesting depth into account
13207
#---------------------------------------------------------------
13208
# final strength incorporates the bond strength and nesting depth
13211
if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
13212
if ( $total_nesting_depth > 0 ) {
13213
$strength = $bond_str + $total_nesting_depth;
13216
$strength = $bond_str;
13220
$strength = NO_BREAK;
13223
# always break after side comment
13224
if ( $type eq '#' ) { $strength = 0 }
13226
$bond_strength_to_go[$i] = $strength;
13228
FORMATTER_DEBUG_FLAG_BOND && do {
13229
my $str = substr( $token, 0, 15 );
13230
$str .= ' ' x ( 16 - length($str) );
13232
"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";
13239
sub pad_array_to_go {
13241
# to simplify coding in scan_list and set_bond_strengths, it helps
13242
# to create some extra blank tokens at the end of the arrays
13243
$tokens_to_go[ $max_index_to_go + 1 ] = '';
13244
$tokens_to_go[ $max_index_to_go + 2 ] = '';
13245
$types_to_go[ $max_index_to_go + 1 ] = 'b';
13246
$types_to_go[ $max_index_to_go + 2 ] = 'b';
13247
$nesting_depth_to_go[ $max_index_to_go + 1 ] =
13248
$nesting_depth_to_go[$max_index_to_go];
13251
if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {
13252
if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {
13254
# shouldn't happen:
13255
unless ( get_saw_brace_error() ) {
13257
"Program bug in scan_list: hit nesting error which should have been caught\n"
13259
report_definite_bug();
13263
$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;
13268
elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {
13269
$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;
13273
{ # begin scan_list
13276
$block_type, $current_depth,
13278
$i_last_nonblank_token, $last_colon_sequence_number,
13279
$last_nonblank_token, $last_nonblank_type,
13280
$last_old_breakpoint_count, $minimum_depth,
13281
$next_nonblank_block_type, $next_nonblank_token,
13282
$next_nonblank_type, $old_breakpoint_count,
13283
$starting_breakpoint_count, $starting_depth,
13289
@breakpoint_stack, @breakpoint_undo_stack,
13290
@comma_index, @container_type,
13291
@identifier_count_stack, @index_before_arrow,
13292
@interrupted_list, @item_count_stack,
13293
@last_comma_index, @last_dot_index,
13294
@last_nonblank_type, @old_breakpoint_count_stack,
13295
@opening_structure_index_stack, @rfor_semicolon_list,
13296
@has_old_logical_breakpoints, @rand_or_list,
13300
# routine to define essential variables when we go 'up' to
13302
sub check_for_new_minimum_depth {
13304
if ( $depth < $minimum_depth ) {
13306
$minimum_depth = $depth;
13308
# these arrays need not retain values between calls
13309
$breakpoint_stack[$depth] = $starting_breakpoint_count;
13310
$container_type[$depth] = "";
13311
$identifier_count_stack[$depth] = 0;
13312
$index_before_arrow[$depth] = -1;
13313
$interrupted_list[$depth] = 1;
13314
$item_count_stack[$depth] = 0;
13315
$last_nonblank_type[$depth] = "";
13316
$opening_structure_index_stack[$depth] = -1;
13318
$breakpoint_undo_stack[$depth] = undef;
13319
$comma_index[$depth] = undef;
13320
$last_comma_index[$depth] = undef;
13321
$last_dot_index[$depth] = undef;
13322
$old_breakpoint_count_stack[$depth] = undef;
13323
$has_old_logical_breakpoints[$depth] = 0;
13324
$rand_or_list[$depth] = [];
13325
$rfor_semicolon_list[$depth] = [];
13326
$i_equals[$depth] = -1;
13328
# these arrays must retain values between calls
13329
if ( !defined( $has_broken_sublist[$depth] ) ) {
13330
$dont_align[$depth] = 0;
13331
$has_broken_sublist[$depth] = 0;
13332
$want_comma_break[$depth] = 0;
13337
# routine to decide which commas to break at within a container;
13339
# $bp_count = number of comma breakpoints set
13340
# $do_not_break_apart = a flag indicating if container need not
13342
sub set_comma_breakpoints {
13346
my $do_not_break_apart = 0;
13349
if ( $item_count_stack[$dd] ) {
13351
# handle commas not in containers...
13352
if ( $dont_align[$dd] ) {
13353
do_uncontained_comma_breaks($dd);
13356
# handle commas within containers...
13358
my $fbc = $forced_breakpoint_count;
13360
# always open comma lists not preceded by keywords,
13361
# barewords, identifiers (that is, anything that doesn't
13362
# look like a function call)
13363
my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;
13365
set_comma_breakpoints_do(
13367
$opening_structure_index_stack[$dd],
13369
$item_count_stack[$dd],
13370
$identifier_count_stack[$dd],
13372
$next_nonblank_type,
13373
$container_type[$dd],
13374
$interrupted_list[$dd],
13375
\$do_not_break_apart,
13378
$bp_count = $forced_breakpoint_count - $fbc;
13379
$do_not_break_apart = 0 if $must_break_open;
13382
return ( $bp_count, $do_not_break_apart );
13385
sub do_uncontained_comma_breaks {
13387
# Handle commas not in containers...
13388
# This is a catch-all routine for commas that we
13389
# don't know what to do with because the don't fall
13390
# within containers. We will bias the bond strength
13391
# to break at commas which ended lines in the input
13392
# file. This usually works better than just trying
13393
# to put as many items on a line as possible. A
13394
# downside is that if the input file is garbage it
13395
# won't work very well. However, the user can always
13396
# prevent following the old breakpoints with the
13400
foreach my $ii ( @{ $comma_index[$dd] } ) {
13401
if ( $old_breakpoint_to_go[$ii] ) {
13402
$bond_strength_to_go[$ii] = $bias;
13404
# reduce bias magnitude to force breaks in order
13409
# Also put a break before the first comma if
13410
# (1) there was a break there in the input, and
13411
# (2) that was exactly one previous break in the input
13413
# For example, we will follow the user and break after
13414
# 'print' in this snippet:
13416
# "conformability (Not the same dimension)\n",
13417
# "\t", $have, " is ", text_unit($hu), "\n",
13418
# "\t", $want, " is ", text_unit($wu), "\n",
13420
my $i_first_comma = $comma_index[$dd]->[0];
13421
if ( $old_breakpoint_to_go[$i_first_comma] ) {
13422
my $level_comma = $levels_to_go[$i_first_comma];
13425
for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {
13426
if ( $old_breakpoint_to_go[$ii] ) {
13428
last if ( $obp_count > 1 );
13430
if ( $levels_to_go[$ii] == $level_comma );
13433
if ( $ibreak >= 0 && $obp_count == 1 ) {
13434
set_forced_breakpoint($ibreak);
13439
my %is_logical_container;
13442
@_ = qw# if elsif unless while and or err not && | || ? : ! #;
13443
@is_logical_container{@_} = (1) x scalar(@_);
13446
sub set_for_semicolon_breakpoints {
13448
foreach ( @{ $rfor_semicolon_list[$dd] } ) {
13449
set_forced_breakpoint($_);
13453
sub set_logical_breakpoints {
13456
$item_count_stack[$dd] == 0
13457
&& $is_logical_container{ $container_type[$dd] }
13460
|| $has_old_logical_breakpoints[$dd]
13464
# Look for breaks in this order:
13467
foreach my $i ( 0 .. 3 ) {
13468
if ( $rand_or_list[$dd][$i] ) {
13469
foreach ( @{ $rand_or_list[$dd][$i] } ) {
13470
set_forced_breakpoint($_);
13473
# break at any 'if' and 'unless' too
13474
foreach ( @{ $rand_or_list[$dd][4] } ) {
13475
set_forced_breakpoint($_);
13477
$rand_or_list[$dd] = [];
13484
sub is_unbreakable_container {
13486
# never break a container of one of these types
13487
# because bad things can happen (map1.t)
13489
$is_sort_map_grep{ $container_type[$dd] };
13494
# This routine is responsible for setting line breaks for all lists,
13495
# so that hierarchical structure can be displayed and so that list
13496
# items can be vertically aligned. The output of this routine is
13497
# stored in the array @forced_breakpoint_to_go, which is used to set
13498
# final breakpoints.
13500
$starting_depth = $nesting_depth_to_go[0];
13503
$current_depth = $starting_depth;
13505
$last_colon_sequence_number = -1;
13506
$last_nonblank_token = ';';
13507
$last_nonblank_type = ';';
13508
$last_nonblank_block_type = ' ';
13509
$last_old_breakpoint_count = 0;
13510
$minimum_depth = $current_depth + 1; # forces update in check below
13511
$old_breakpoint_count = 0;
13512
$starting_breakpoint_count = $forced_breakpoint_count;
13515
$type_sequence = '';
13517
check_for_new_minimum_depth($current_depth);
13519
my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;
13520
my $want_previous_breakpoint = -1;
13522
my $saw_good_breakpoint;
13523
my $i_line_end = -1;
13524
my $i_line_start = -1;
13526
# loop over all tokens in this batch
13527
while ( ++$i <= $max_index_to_go ) {
13528
if ( $type ne 'b' ) {
13529
$i_last_nonblank_token = $i - 1;
13530
$last_nonblank_type = $type;
13531
$last_nonblank_token = $token;
13532
$last_nonblank_block_type = $block_type;
13534
$type = $types_to_go[$i];
13535
$block_type = $block_type_to_go[$i];
13536
$token = $tokens_to_go[$i];
13537
$type_sequence = $type_sequence_to_go[$i];
13538
my $next_type = $types_to_go[ $i + 1 ];
13539
my $next_token = $tokens_to_go[ $i + 1 ];
13540
my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
13541
$next_nonblank_type = $types_to_go[$i_next_nonblank];
13542
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
13543
$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
13545
# set break if flag was set
13546
if ( $want_previous_breakpoint >= 0 ) {
13547
set_forced_breakpoint($want_previous_breakpoint);
13548
$want_previous_breakpoint = -1;
13551
$last_old_breakpoint_count = $old_breakpoint_count;
13552
if ( $old_breakpoint_to_go[$i] ) {
13554
$i_line_start = $i_next_nonblank;
13556
$old_breakpoint_count++;
13558
# Break before certain keywords if user broke there and
13559
# this is a 'safe' break point. The idea is to retain
13560
# any preferred breaks for sequential list operations,
13561
# like a schwartzian transform.
13562
if ($rOpts_break_at_old_keyword_breakpoints) {
13564
$next_nonblank_type eq 'k'
13565
&& $is_keyword_returning_list{$next_nonblank_token}
13566
&& ( $type =~ /^[=\)\]\}Riw]$/
13568
&& $is_keyword_returning_list{$token} )
13572
# we actually have to set this break next time through
13573
# the loop because if we are at a closing token (such
13574
# as '}') which forms a one-line block, this break might
13576
$want_previous_breakpoint = $i;
13580
next if ( $type eq 'b' );
13581
$depth = $nesting_depth_to_go[ $i + 1 ];
13583
# safety check - be sure we always break after a comment
13584
# Shouldn't happen .. an error here probably means that the
13585
# nobreak flag did not get turned off correctly during
13587
if ( $type eq '#' ) {
13588
if ( $i != $max_index_to_go ) {
13590
"Non-fatal program bug: backup logic needed to break after a comment\n"
13592
report_definite_bug();
13593
$nobreak_to_go[$i] = 0;
13594
set_forced_breakpoint($i);
13598
# Force breakpoints at certain tokens in long lines.
13599
# Note that such breakpoints will be undone later if these tokens
13600
# are fully contained within parens on a line.
13603
# break before a keyword within a line
13607
# if one of these keywords:
13608
&& $token =~ /^(if|unless|while|until|for)$/
13610
# but do not break at something like '1 while'
13611
&& ( $last_nonblank_type ne 'n' || $i > 2 )
13613
# and let keywords follow a closing 'do' brace
13614
&& $last_nonblank_block_type ne 'do'
13619
# or container is broken (by side-comment, etc)
13620
|| ( $next_nonblank_token eq '('
13621
&& $mate_index_to_go[$i_next_nonblank] < $i )
13625
set_forced_breakpoint( $i - 1 );
13628
# remember locations of '||' and '&&' for possible breaks if we
13629
# decide this is a long logical expression.
13630
if ( $type eq '||' ) {
13631
push @{ $rand_or_list[$depth][2] }, $i;
13632
++$has_old_logical_breakpoints[$depth]
13633
if ( ( $i == $i_line_start || $i == $i_line_end )
13634
&& $rOpts_break_at_old_logical_breakpoints );
13636
elsif ( $type eq '&&' ) {
13637
push @{ $rand_or_list[$depth][3] }, $i;
13638
++$has_old_logical_breakpoints[$depth]
13639
if ( ( $i == $i_line_start || $i == $i_line_end )
13640
&& $rOpts_break_at_old_logical_breakpoints );
13642
elsif ( $type eq 'f' ) {
13643
push @{ $rfor_semicolon_list[$depth] }, $i;
13645
elsif ( $type eq 'k' ) {
13646
if ( $token eq 'and' ) {
13647
push @{ $rand_or_list[$depth][1] }, $i;
13648
++$has_old_logical_breakpoints[$depth]
13649
if ( ( $i == $i_line_start || $i == $i_line_end )
13650
&& $rOpts_break_at_old_logical_breakpoints );
13653
# break immediately at 'or's which are probably not in a logical
13654
# block -- but we will break in logical breaks below so that
13655
# they do not add to the forced_breakpoint_count
13656
elsif ( $token eq 'or' ) {
13657
push @{ $rand_or_list[$depth][0] }, $i;
13658
++$has_old_logical_breakpoints[$depth]
13659
if ( ( $i == $i_line_start || $i == $i_line_end )
13660
&& $rOpts_break_at_old_logical_breakpoints );
13661
if ( $is_logical_container{ $container_type[$depth] } ) {
13664
if ($is_long_line) { set_forced_breakpoint($i) }
13665
elsif ( ( $i == $i_line_start || $i == $i_line_end )
13666
&& $rOpts_break_at_old_logical_breakpoints )
13668
$saw_good_breakpoint = 1;
13672
elsif ( $token eq 'if' || $token eq 'unless' ) {
13673
push @{ $rand_or_list[$depth][4] }, $i;
13674
if ( ( $i == $i_line_start || $i == $i_line_end )
13675
&& $rOpts_break_at_old_logical_breakpoints )
13677
set_forced_breakpoint($i);
13681
elsif ( $is_assignment{$type} ) {
13682
$i_equals[$depth] = $i;
13685
if ($type_sequence) {
13687
# handle any postponed closing breakpoints
13688
if ( $token =~ /^[\)\]\}\:]$/ ) {
13689
if ( $type eq ':' ) {
13690
$last_colon_sequence_number = $type_sequence;
13692
# TESTING: retain break at a ':' line break
13693
if ( ( $i == $i_line_start || $i == $i_line_end )
13694
&& $rOpts_break_at_old_ternary_breakpoints )
13698
set_forced_breakpoint($i);
13700
# break at previous '='
13701
if ( $i_equals[$depth] > 0 ) {
13702
set_forced_breakpoint( $i_equals[$depth] );
13703
$i_equals[$depth] = -1;
13707
if ( defined( $postponed_breakpoint{$type_sequence} ) ) {
13708
my $inc = ( $type eq ':' ) ? 0 : 1;
13709
set_forced_breakpoint( $i - $inc );
13710
delete $postponed_breakpoint{$type_sequence};
13714
# set breaks at ?/: if they will get separated (and are
13715
# not a ?/: chain), or if the '?' is at the end of the
13717
elsif ( $token eq '?' ) {
13718
my $i_colon = $mate_index_to_go[$i];
13720
$i_colon <= 0 # the ':' is not in this batch
13721
|| $i == 0 # this '?' is the first token of the line
13723
$max_index_to_go # or this '?' is the last token
13727
# don't break at a '?' if preceded by ':' on
13728
# this line of previous ?/: pair on this line.
13729
# This is an attempt to preserve a chain of ?/:
13730
# expressions (elsif2.t). And don't break if
13731
# this has a side comment.
13732
set_forced_breakpoint($i)
13734
$type_sequence == (
13735
$last_colon_sequence_number +
13736
TYPE_SEQUENCE_INCREMENT
13738
|| $tokens_to_go[$max_index_to_go] eq '#'
13740
set_closing_breakpoint($i);
13745
#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";
13747
#------------------------------------------------------------
13748
# Handle Increasing Depth..
13750
# prepare for a new list when depth increases
13751
# token $i is a '(','{', or '['
13752
#------------------------------------------------------------
13753
if ( $depth > $current_depth ) {
13755
$breakpoint_stack[$depth] = $forced_breakpoint_count;
13756
$breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;
13757
$has_broken_sublist[$depth] = 0;
13758
$identifier_count_stack[$depth] = 0;
13759
$index_before_arrow[$depth] = -1;
13760
$interrupted_list[$depth] = 0;
13761
$item_count_stack[$depth] = 0;
13762
$last_comma_index[$depth] = undef;
13763
$last_dot_index[$depth] = undef;
13764
$last_nonblank_type[$depth] = $last_nonblank_type;
13765
$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;
13766
$opening_structure_index_stack[$depth] = $i;
13767
$rand_or_list[$depth] = [];
13768
$rfor_semicolon_list[$depth] = [];
13769
$i_equals[$depth] = -1;
13770
$want_comma_break[$depth] = 0;
13771
$container_type[$depth] =
13772
( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )
13773
? $last_nonblank_token
13775
$has_old_logical_breakpoints[$depth] = 0;
13777
# if line ends here then signal closing token to break
13778
if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )
13780
set_closing_breakpoint($i);
13783
# Not all lists of values should be vertically aligned..
13784
$dont_align[$depth] =
13786
# code BLOCKS are handled at a higher level
13787
( $block_type ne "" )
13789
# certain paren lists
13790
|| ( $type eq '(' ) && (
13792
# it does not usually look good to align a list of
13793
# identifiers in a parameter list, as in:
13794
# my($var1, $var2, ...)
13795
# (This test should probably be refined, for now I'm just
13796
# testing for any keyword)
13797
( $last_nonblank_type eq 'k' )
13799
# a trailing '(' usually indicates a non-list
13800
|| ( $next_nonblank_type eq '(' )
13803
# patch to outdent opening brace of long if/for/..
13804
# statements (like this one). See similar coding in
13805
# set_continuation breaks. We have also catch it here for
13806
# short line fragments which otherwise will not go through
13807
# set_continuation_breaks.
13811
# if we have the ')' but not its '(' in this batch..
13812
&& ( $last_nonblank_token eq ')' )
13813
&& $mate_index_to_go[$i_last_nonblank_token] < 0
13815
# and user wants brace to left
13816
&& !$rOpts->{'opening-brace-always-on-right'}
13818
&& ( $type eq '{' ) # should be true
13819
&& ( $token eq '{' ) # should be true
13822
set_forced_breakpoint( $i - 1 );
13826
#------------------------------------------------------------
13827
# Handle Decreasing Depth..
13829
# finish off any old list when depth decreases
13830
# token $i is a ')','}', or ']'
13831
#------------------------------------------------------------
13832
elsif ( $depth < $current_depth ) {
13834
check_for_new_minimum_depth($depth);
13836
# force all outer logical containers to break after we see on
13838
$has_old_logical_breakpoints[$depth] ||=
13839
$has_old_logical_breakpoints[$current_depth];
13841
# Patch to break between ') {' if the paren list is broken.
13842
# There is similar logic in set_continuation_breaks for
13843
# non-broken lists.
13845
&& $next_nonblank_block_type
13846
&& $interrupted_list[$current_depth]
13847
&& $next_nonblank_type eq '{'
13848
&& !$rOpts->{'opening-brace-always-on-right'} )
13850
set_forced_breakpoint($i);
13853
#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";
13855
# set breaks at commas if necessary
13856
my ( $bp_count, $do_not_break_apart ) =
13857
set_comma_breakpoints($current_depth);
13859
my $i_opening = $opening_structure_index_stack[$current_depth];
13860
my $saw_opening_structure = ( $i_opening >= 0 );
13862
# this term is long if we had to break at interior commas..
13863
my $is_long_term = $bp_count > 0;
13865
# ..or if the length between opening and closing parens exceeds
13866
# allowed line length
13867
if ( !$is_long_term && $saw_opening_structure ) {
13868
my $i_opening_minus = find_token_starting_list($i_opening);
13870
# Note: we have to allow for one extra space after a
13871
# closing token so that we do not strand a comma or
13872
# semicolon, hence the '>=' here (oneline.t)
13874
excess_line_length( $i_opening_minus, $i ) >= 0;
13877
# We've set breaks after all comma-arrows. Now we have to
13878
# undo them if this can be a one-line block
13879
# (the only breakpoints set will be due to comma-arrows)
13882
# user doesn't require breaking after all comma-arrows
13883
( $rOpts_comma_arrow_breakpoints != 0 )
13885
# and if the opening structure is in this batch
13886
&& $saw_opening_structure
13888
# and either on the same old line
13890
$old_breakpoint_count_stack[$current_depth] ==
13891
$last_old_breakpoint_count
13893
# or user wants to form long blocks with arrows
13894
|| $rOpts_comma_arrow_breakpoints == 2
13897
# and we made some breakpoints between the opening and closing
13898
&& ( $breakpoint_undo_stack[$current_depth] <
13899
$forced_breakpoint_undo_count )
13901
# and this block is short enough to fit on one line
13902
# Note: use < because need 1 more space for possible comma
13907
undo_forced_breakpoint_stack(
13908
$breakpoint_undo_stack[$current_depth] );
13911
# now see if we have any comma breakpoints left
13912
my $has_comma_breakpoints =
13913
( $breakpoint_stack[$current_depth] !=
13914
$forced_breakpoint_count );
13916
# update broken-sublist flag of the outer container
13917
$has_broken_sublist[$depth] =
13918
$has_broken_sublist[$depth]
13919
|| $has_broken_sublist[$current_depth]
13921
|| $has_comma_breakpoints;
13923
# Having come to the closing ')', '}', or ']', now we have to decide if we
13924
# should 'open up' the structure by placing breaks at the opening and
13925
# closing containers. This is a tricky decision. Here are some of the
13926
# basic considerations:
13928
# -If this is a BLOCK container, then any breakpoints will have already
13929
# been set (and according to user preferences), so we need do nothing here.
13931
# -If we have a comma-separated list for which we can align the list items,
13932
# then we need to do so because otherwise the vertical aligner cannot
13933
# currently do the alignment.
13935
# -If this container does itself contain a container which has been broken
13936
# open, then it should be broken open to properly show the structure.
13938
# -If there is nothing to align, and no other reason to break apart,
13939
# then do not do it.
13941
# We will not break open the parens of a long but 'simple' logical expression.
13944
# This is an example of a simple logical expression and its formatting:
13946
# if ( $bigwasteofspace1 && $bigwasteofspace2
13947
# || $bigwasteofspace3 && $bigwasteofspace4 )
13949
# Most people would prefer this than the 'spacey' version:
13952
# $bigwasteofspace1 && $bigwasteofspace2
13953
# || $bigwasteofspace3 && $bigwasteofspace4
13956
# To illustrate the rules for breaking logical expressions, consider:
13960
# and ( exists $ids_excl_uc{$id_uc}
13961
# or grep $id_uc =~ /$_/, @ids_excl_uc ))
13963
# This is on the verge of being difficult to read. The current default is to
13964
# open it up like this:
13969
# and ( exists $ids_excl_uc{$id_uc}
13970
# or grep $id_uc =~ /$_/, @ids_excl_uc )
13973
# This is a compromise which tries to avoid being too dense and to spacey.
13974
# A more spaced version would be:
13980
# exists $ids_excl_uc{$id_uc}
13981
# or grep $id_uc =~ /$_/, @ids_excl_uc
13985
# Some people might prefer the spacey version -- an option could be added. The
13986
# innermost expression contains a long block '( exists $ids_... ')'.
13988
# Here is how the logic goes: We will force a break at the 'or' that the
13989
# innermost expression contains, but we will not break apart its opening and
13990
# closing containers because (1) it contains no multi-line sub-containers itself,
13991
# and (2) there is no alignment to be gained by breaking it open like this
13994
# exists $ids_excl_uc{$id_uc}
13995
# or grep $id_uc =~ /$_/, @ids_excl_uc
13998
# (although this looks perfectly ok and might be good for long expressions). The
13999
# outer 'if' container, though, contains a broken sub-container, so it will be
14000
# broken open to avoid too much density. Also, since it contains no 'or's, there
14001
# will be a forced break at its 'and'.
14003
# set some flags telling something about this container..
14004
my $is_simple_logical_expression = 0;
14005
if ( $item_count_stack[$current_depth] == 0
14006
&& $saw_opening_structure
14007
&& $tokens_to_go[$i_opening] eq '('
14008
&& $is_logical_container{ $container_type[$current_depth] }
14012
# This seems to be a simple logical expression with
14013
# no existing breakpoints. Set a flag to prevent
14015
if ( !$has_comma_breakpoints ) {
14016
$is_simple_logical_expression = 1;
14019
# This seems to be a simple logical expression with
14020
# breakpoints (broken sublists, for example). Break
14021
# at all 'or's and '||'s.
14023
set_logical_breakpoints($current_depth);
14028
&& @{ $rfor_semicolon_list[$current_depth] } )
14030
set_for_semicolon_breakpoints($current_depth);
14032
# open up a long 'for' or 'foreach' container to allow
14033
# leading term alignment unless -lp is used.
14034
$has_comma_breakpoints = 1
14035
unless $rOpts_line_up_parentheses;
14040
# breaks for code BLOCKS are handled at a higher level
14043
# we do not need to break at the top level of an 'if'
14045
&& !$is_simple_logical_expression
14047
## modification to keep ': (' containers vertically tight;
14048
## but probably better to let user set -vt=1 to avoid
14049
## inconsistency with other paren types
14050
## && ($container_type[$current_depth] ne ':')
14052
# otherwise, we require one of these reasons for breaking:
14055
# - this term has forced line breaks
14056
$has_comma_breakpoints
14058
# - the opening container is separated from this batch
14059
# for some reason (comment, blank line, code block)
14060
# - this is a non-paren container spanning multiple lines
14061
|| !$saw_opening_structure
14063
# - this is a long block contained in another breakable
14066
&& $container_environment_to_go[$i_opening] ne
14072
# For -lp option, we must put a breakpoint before
14073
# the token which has been identified as starting
14074
# this indentation level. This is necessary for
14075
# proper alignment.
14076
if ( $rOpts_line_up_parentheses && $saw_opening_structure )
14078
my $item = $leading_spaces_to_go[ $i_opening + 1 ];
14079
if ( $i_opening + 1 < $max_index_to_go
14080
&& $types_to_go[ $i_opening + 1 ] eq 'b' )
14082
$item = $leading_spaces_to_go[ $i_opening + 2 ];
14084
if ( defined($item) ) {
14085
my $i_start_2 = $item->get_STARTING_INDEX();
14087
defined($i_start_2)
14089
# we are breaking after an opening brace, paren,
14090
# so don't break before it too
14091
&& $i_start_2 ne $i_opening
14095
# Only break for breakpoints at the same
14096
# indentation level as the opening paren
14097
my $test1 = $nesting_depth_to_go[$i_opening];
14098
my $test2 = $nesting_depth_to_go[$i_start_2];
14099
if ( $test2 == $test1 ) {
14100
set_forced_breakpoint( $i_start_2 - 1 );
14106
# break after opening structure.
14107
# note: break before closing structure will be automatic
14108
if ( $minimum_depth <= $current_depth ) {
14110
set_forced_breakpoint($i_opening)
14111
unless ( $do_not_break_apart
14112
|| is_unbreakable_container($current_depth) );
14114
# break at '.' of lower depth level before opening token
14115
if ( $last_dot_index[$depth] ) {
14116
set_forced_breakpoint( $last_dot_index[$depth] );
14119
# break before opening structure if preeced by another
14120
# closing structure and a comma. This is normally
14121
# done by the previous closing brace, but not
14122
# if it was a one-line block.
14123
if ( $i_opening > 2 ) {
14125
( $types_to_go[ $i_opening - 1 ] eq 'b' )
14129
if ( $types_to_go[$i_prev] eq ','
14130
&& $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )
14132
set_forced_breakpoint($i_prev);
14135
# also break before something like ':(' or '?('
14138
$types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )
14140
my $token_prev = $tokens_to_go[$i_prev];
14141
if ( $want_break_before{$token_prev} ) {
14142
set_forced_breakpoint($i_prev);
14148
# break after comma following closing structure
14149
if ( $next_type eq ',' ) {
14150
set_forced_breakpoint( $i + 1 );
14153
# break before an '=' following closing structure
14155
$is_assignment{$next_nonblank_type}
14156
&& ( $breakpoint_stack[$current_depth] !=
14157
$forced_breakpoint_count )
14160
set_forced_breakpoint($i);
14163
# break at any comma before the opening structure Added
14164
# for -lp, but seems to be good in general. It isn't
14165
# obvious how far back to look; the '5' below seems to
14166
# work well and will catch the comma in something like
14167
# push @list, myfunc( $param, $param, ..
14169
my $icomma = $last_comma_index[$depth];
14170
if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {
14171
unless ( $forced_breakpoint_to_go[$icomma] ) {
14172
set_forced_breakpoint($icomma);
14175
} # end logic to open up a container
14177
# Break open a logical container open if it was already open
14178
elsif ($is_simple_logical_expression
14179
&& $has_old_logical_breakpoints[$current_depth] )
14181
set_logical_breakpoints($current_depth);
14184
# Handle long container which does not get opened up
14185
elsif ($is_long_term) {
14187
# must set fake breakpoint to alert outer containers that
14189
set_fake_breakpoint();
14193
#------------------------------------------------------------
14194
# Handle this token
14195
#------------------------------------------------------------
14197
$current_depth = $depth;
14199
# handle comma-arrow
14200
if ( $type eq '=>' ) {
14201
next if ( $last_nonblank_type eq '=>' );
14202
next if $rOpts_break_at_old_comma_breakpoints;
14203
next if $rOpts_comma_arrow_breakpoints == 3;
14204
$want_comma_break[$depth] = 1;
14205
$index_before_arrow[$depth] = $i_last_nonblank_token;
14209
elsif ( $type eq '.' ) {
14210
$last_dot_index[$depth] = $i;
14213
# Turn off alignment if we are sure that this is not a list
14214
# environment. To be safe, we will do this if we see certain
14215
# non-list tokens, such as ';', and also the environment is
14216
# not a list. Note that '=' could be in any of the = operators
14217
# (lextest.t). We can't just use the reported environment
14218
# because it can be incorrect in some cases.
14219
elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )
14220
&& $container_environment_to_go[$i] ne 'LIST' )
14222
$dont_align[$depth] = 1;
14223
$want_comma_break[$depth] = 0;
14224
$index_before_arrow[$depth] = -1;
14227
# now just handle any commas
14228
next unless ( $type eq ',' );
14230
$last_dot_index[$depth] = undef;
14231
$last_comma_index[$depth] = $i;
14233
# break here if this comma follows a '=>'
14234
# but not if there is a side comment after the comma
14235
if ( $want_comma_break[$depth] ) {
14237
if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {
14238
$want_comma_break[$depth] = 0;
14239
$index_before_arrow[$depth] = -1;
14243
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14245
# break before the previous token if it looks safe
14246
# Example of something that we will not try to break before:
14247
# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},
14248
# Also we don't want to break at a binary operator (like +):
14252
# $y - $R, -fill => 'black',
14254
my $ibreak = $index_before_arrow[$depth] - 1;
14256
&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )
14258
if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }
14259
if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }
14260
if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {
14262
# don't break pointer calls, such as the following:
14263
# File::Spec->curdir => 1,
14264
# (This is tokenized as adjacent 'w' tokens)
14265
if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {
14266
set_forced_breakpoint($ibreak);
14271
$want_comma_break[$depth] = 0;
14272
$index_before_arrow[$depth] = -1;
14274
# handle list which mixes '=>'s and ','s:
14275
# treat any list items so far as an interrupted list
14276
$interrupted_list[$depth] = 1;
14280
# break after all commas above starting depth
14281
if ( $depth < $starting_depth && !$dont_align[$depth] ) {
14282
set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );
14286
# add this comma to the list..
14287
my $item_count = $item_count_stack[$depth];
14288
if ( $item_count == 0 ) {
14290
# but do not form a list with no opening structure
14293
# open INFILE_COPY, ">$input_file_copy"
14294
# or die ("very long message");
14296
if ( ( $opening_structure_index_stack[$depth] < 0 )
14297
&& $container_environment_to_go[$i] eq 'BLOCK' )
14299
$dont_align[$depth] = 1;
14303
$comma_index[$depth][$item_count] = $i;
14304
++$item_count_stack[$depth];
14305
if ( $last_nonblank_type =~ /^[iR\]]$/ ) {
14306
$identifier_count_stack[$depth]++;
14310
#-------------------------------------------
14311
# end of loop over all tokens in this batch
14312
#-------------------------------------------
14314
# set breaks for any unfinished lists ..
14315
for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {
14317
$interrupted_list[$dd] = 1;
14318
$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );
14319
set_comma_breakpoints($dd);
14320
set_logical_breakpoints($dd)
14321
if ( $has_old_logical_breakpoints[$dd] );
14322
set_for_semicolon_breakpoints($dd);
14324
# break open container...
14325
my $i_opening = $opening_structure_index_stack[$dd];
14326
set_forced_breakpoint($i_opening)
14328
is_unbreakable_container($dd)
14330
# Avoid a break which would place an isolated ' or "
14333
&& $i_opening >= $max_index_to_go - 2
14334
&& $token =~ /^['"]$/ )
14338
# Return a flag indicating if the input file had some good breakpoints.
14339
# This flag will be used to force a break in a line shorter than the
14340
# allowed line length.
14341
if ( $has_old_logical_breakpoints[$current_depth] ) {
14342
$saw_good_breakpoint = 1;
14344
return $saw_good_breakpoint;
14348
sub find_token_starting_list {
14350
# When testing to see if a block will fit on one line, some
14351
# previous token(s) may also need to be on the line; particularly
14352
# if this is a sub call. So we will look back at least one
14353
# token. NOTE: This isn't perfect, but not critical, because
14354
# if we mis-identify a block, it will be wrapped and therefore
14355
# fixed the next time it is formatted.
14356
my $i_opening_paren = shift;
14357
my $i_opening_minus = $i_opening_paren;
14358
my $im1 = $i_opening_paren - 1;
14359
my $im2 = $i_opening_paren - 2;
14360
my $im3 = $i_opening_paren - 3;
14361
my $typem1 = $types_to_go[$im1];
14362
my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';
14363
if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {
14364
$i_opening_minus = $i_opening_paren;
14366
elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {
14367
$i_opening_minus = $im1 if $im1 >= 0;
14369
# walk back to improve length estimate
14370
for ( my $j = $im1 ; $j >= 0 ; $j-- ) {
14371
last if ( $types_to_go[$j] =~ /^[\(\[\{L\}\]\)Rb,]$/ );
14372
$i_opening_minus = $j;
14374
if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }
14376
elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }
14377
elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {
14378
$i_opening_minus = $im2;
14380
return $i_opening_minus;
14383
{ # begin set_comma_breakpoints_do
14385
my %is_keyword_with_special_leading_term;
14389
# These keywords have prototypes which allow a special leading item
14390
# followed by a list
14392
qw(formline grep kill map printf sprintf push chmod join pack unshift);
14393
@is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);
14396
sub set_comma_breakpoints_do {
14398
# Given a list with some commas, set breakpoints at some of the
14399
# commas, if necessary, to make it easy to read. This list is
14402
$depth, $i_opening_paren, $i_closing_paren,
14403
$item_count, $identifier_count, $rcomma_index,
14404
$next_nonblank_type, $list_type, $interrupted,
14405
$rdo_not_break_apart, $must_break_open,
14408
# nothing to do if no commas seen
14409
return if ( $item_count < 1 );
14410
my $i_first_comma = $$rcomma_index[0];
14411
my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];
14412
my $i_last_comma = $i_true_last_comma;
14413
if ( $i_last_comma >= $max_index_to_go ) {
14414
$i_last_comma = $$rcomma_index[ --$item_count - 1 ];
14415
return if ( $item_count < 1 );
14418
#---------------------------------------------------------------
14419
# find lengths of all items in the list to calculate page layout
14420
#---------------------------------------------------------------
14421
my $comma_count = $item_count;
14427
my @max_length = ( 0, 0 );
14428
my $first_term_length;
14429
my $i = $i_opening_paren;
14432
for ( my $j = 0 ; $j < $comma_count ; $j++ ) {
14433
$is_odd = 1 - $is_odd;
14434
$i_prev_plus = $i + 1;
14435
$i = $$rcomma_index[$j];
14438
( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;
14440
( $types_to_go[$i_prev_plus] eq 'b' )
14443
push @i_term_begin, $i_term_begin;
14444
push @i_term_end, $i_term_end;
14445
push @i_term_comma, $i;
14447
# note: currently adding 2 to all lengths (for comma and space)
14449
2 + token_sequence_length( $i_term_begin, $i_term_end );
14450
push @item_lengths, $length;
14453
$first_term_length = $length;
14457
if ( $length > $max_length[$is_odd] ) {
14458
$max_length[$is_odd] = $length;
14463
# now we have to make a distinction between the comma count and item
14464
# count, because the item count will be one greater than the comma
14465
# count if the last item is not terminated with a comma
14467
( $types_to_go[ $i_last_comma + 1 ] eq 'b' )
14468
? $i_last_comma + 1
14471
( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )
14472
? $i_closing_paren - 2
14473
: $i_closing_paren - 1;
14474
my $i_effective_last_comma = $i_last_comma;
14476
my $last_item_length = token_sequence_length( $i_b + 1, $i_e );
14478
if ( $last_item_length > 0 ) {
14480
# add 2 to length because other lengths include a comma and a blank
14481
$last_item_length += 2;
14482
push @item_lengths, $last_item_length;
14483
push @i_term_begin, $i_b + 1;
14484
push @i_term_end, $i_e;
14485
push @i_term_comma, undef;
14487
my $i_odd = $item_count % 2;
14489
if ( $last_item_length > $max_length[$i_odd] ) {
14490
$max_length[$i_odd] = $last_item_length;
14494
$i_effective_last_comma = $i_e + 1;
14496
if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {
14497
$identifier_count++;
14501
#---------------------------------------------------------------
14502
# End of length calculations
14503
#---------------------------------------------------------------
14505
#---------------------------------------------------------------
14506
# Compound List Rule 1:
14507
# Break at (almost) every comma for a list containing a broken
14508
# sublist. This has higher priority than the Interrupted List
14510
#---------------------------------------------------------------
14511
if ( $has_broken_sublist[$depth] ) {
14513
# Break at every comma except for a comma between two
14514
# simple, small terms. This prevents long vertical
14515
# columns of, say, just 0's.
14516
my $small_length = 10; # 2 + actual maximum length wanted
14518
# We'll insert a break in long runs of small terms to
14519
# allow alignment in uniform tables.
14520
my $skipped_count = 0;
14521
my $columns = table_columns_available($i_first_comma);
14522
my $fields = int( $columns / $small_length );
14523
if ( $rOpts_maximum_fields_per_table
14524
&& $fields > $rOpts_maximum_fields_per_table )
14526
$fields = $rOpts_maximum_fields_per_table;
14528
my $max_skipped_count = $fields - 1;
14530
my $is_simple_last_term = 0;
14531
my $is_simple_next_term = 0;
14532
foreach my $j ( 0 .. $item_count ) {
14533
$is_simple_last_term = $is_simple_next_term;
14534
$is_simple_next_term = 0;
14535
if ( $j < $item_count
14536
&& $i_term_end[$j] == $i_term_begin[$j]
14537
&& $item_lengths[$j] <= $small_length )
14539
$is_simple_next_term = 1;
14542
if ( $is_simple_last_term
14543
&& $is_simple_next_term
14544
&& $skipped_count < $max_skipped_count )
14549
$skipped_count = 0;
14550
my $i = $i_term_comma[ $j - 1 ];
14551
last unless defined $i;
14552
set_forced_breakpoint($i);
14556
# always break at the last comma if this list is
14557
# interrupted; we wouldn't want to leave a terminal '{', for
14559
if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }
14563
#my ( $a, $b, $c ) = caller();
14564
#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count
14565
#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";
14566
#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";
14568
#---------------------------------------------------------------
14569
# Interrupted List Rule:
14570
# A list is is forced to use old breakpoints if it was interrupted
14571
# by side comments or blank lines, or requested by user.
14572
#---------------------------------------------------------------
14573
if ( $rOpts_break_at_old_comma_breakpoints
14575
|| $i_opening_paren < 0 )
14577
copy_old_breakpoints( $i_first_comma, $i_true_last_comma );
14581
#---------------------------------------------------------------
14582
# Looks like a list of items. We have to look at it and size it up.
14583
#---------------------------------------------------------------
14585
my $opening_token = $tokens_to_go[$i_opening_paren];
14586
my $opening_environment =
14587
$container_environment_to_go[$i_opening_paren];
14589
#-------------------------------------------------------------------
14590
# Return if this will fit on one line
14591
#-------------------------------------------------------------------
14593
my $i_opening_minus = find_token_starting_list($i_opening_paren);
14595
unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;
14597
#-------------------------------------------------------------------
14598
# Now we know that this block spans multiple lines; we have to set
14599
# at least one breakpoint -- real or fake -- as a signal to break
14600
# open any outer containers.
14601
#-------------------------------------------------------------------
14602
set_fake_breakpoint();
14604
# be sure we do not extend beyond the current list length
14605
if ( $i_effective_last_comma >= $max_index_to_go ) {
14606
$i_effective_last_comma = $max_index_to_go - 1;
14609
# Set a flag indicating if we need to break open to keep -lp
14610
# items aligned. This is necessary if any of the list terms
14611
# exceeds the available space after the '('.
14612
my $need_lp_break_open = $must_break_open;
14613
if ( $rOpts_line_up_parentheses && !$must_break_open ) {
14614
my $columns_if_unbroken = $rOpts_maximum_line_length -
14615
total_line_length( $i_opening_minus, $i_opening_paren );
14616
$need_lp_break_open =
14617
( $max_length[0] > $columns_if_unbroken )
14618
|| ( $max_length[1] > $columns_if_unbroken )
14619
|| ( $first_term_length > $columns_if_unbroken );
14622
# Specify if the list must have an even number of fields or not.
14623
# It is generally safest to assume an even number, because the
14624
# list items might be a hash list. But if we can be sure that
14625
# it is not a hash, then we can allow an odd number for more
14627
my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count
14629
if ( $identifier_count >= $item_count - 1
14630
|| $is_assignment{$next_nonblank_type}
14631
|| ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )
14637
# do we have a long first term which should be
14638
# left on a line by itself?
14639
my $use_separate_first_term = (
14640
$odd_or_even == 1 # only if we can use 1 field/line
14641
&& $item_count > 3 # need several items
14642
&& $first_term_length >
14643
2 * $max_length[0] - 2 # need long first term
14644
&& $first_term_length >
14645
2 * $max_length[1] - 2 # need long first term
14648
# or do we know from the type of list that the first term should
14650
if ( !$use_separate_first_term ) {
14651
if ( $is_keyword_with_special_leading_term{$list_type} ) {
14652
$use_separate_first_term = 1;
14654
# should the container be broken open?
14655
if ( $item_count < 3 ) {
14656
if ( $i_first_comma - $i_opening_paren < 4 ) {
14657
$$rdo_not_break_apart = 1;
14660
elsif ($first_term_length < 20
14661
&& $i_first_comma - $i_opening_paren < 4 )
14663
my $columns = table_columns_available($i_first_comma);
14664
if ( $first_term_length < $columns ) {
14665
$$rdo_not_break_apart = 1;
14672
if ($use_separate_first_term) {
14674
# ..set a break and update starting values
14675
$use_separate_first_term = 1;
14676
set_forced_breakpoint($i_first_comma);
14677
$i_opening_paren = $i_first_comma;
14678
$i_first_comma = $$rcomma_index[1];
14680
return if $comma_count == 1;
14681
shift @item_lengths;
14682
shift @i_term_begin;
14684
shift @i_term_comma;
14687
# if not, update the metrics to include the first term
14689
if ( $first_term_length > $max_length[0] ) {
14690
$max_length[0] = $first_term_length;
14694
# Field width parameters
14695
my $pair_width = ( $max_length[0] + $max_length[1] );
14697
( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];
14699
# Number of free columns across the page width for laying out tables
14700
my $columns = table_columns_available($i_first_comma);
14702
# Estimated maximum number of fields which fit this space
14703
# This will be our first guess
14704
my $number_of_fields_max =
14705
maximum_number_of_fields( $columns, $odd_or_even, $max_width,
14707
my $number_of_fields = $number_of_fields_max;
14709
# Find the best-looking number of fields
14710
# and make this our second guess if possible
14711
my ( $number_of_fields_best, $ri_ragged_break_list,
14712
$new_identifier_count )
14713
= study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,
14716
if ( $number_of_fields_best != 0
14717
&& $number_of_fields_best < $number_of_fields_max )
14719
$number_of_fields = $number_of_fields_best;
14722
# ----------------------------------------------------------------------
14723
# If we are crowded and the -lp option is being used, try to
14724
# undo some indentation
14725
# ----------------------------------------------------------------------
14727
$rOpts_line_up_parentheses
14729
$number_of_fields == 0
14730
|| ( $number_of_fields == 1
14731
&& $number_of_fields != $number_of_fields_best )
14735
my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);
14736
if ( $available_spaces > 0 ) {
14738
my $spaces_wanted = $max_width - $columns; # for 1 field
14740
if ( $number_of_fields_best == 0 ) {
14741
$number_of_fields_best =
14742
get_maximum_fields_wanted( \@item_lengths );
14745
if ( $number_of_fields_best != 1 ) {
14746
my $spaces_wanted_2 =
14747
1 + $pair_width - $columns; # for 2 fields
14748
if ( $available_spaces > $spaces_wanted_2 ) {
14749
$spaces_wanted = $spaces_wanted_2;
14753
if ( $spaces_wanted > 0 ) {
14754
my $deleted_spaces =
14755
reduce_lp_indentation( $i_first_comma, $spaces_wanted );
14758
if ( $deleted_spaces > 0 ) {
14759
$columns = table_columns_available($i_first_comma);
14760
$number_of_fields_max =
14761
maximum_number_of_fields( $columns, $odd_or_even,
14762
$max_width, $pair_width );
14763
$number_of_fields = $number_of_fields_max;
14765
if ( $number_of_fields_best == 1
14766
&& $number_of_fields >= 1 )
14768
$number_of_fields = $number_of_fields_best;
14775
# try for one column if two won't work
14776
if ( $number_of_fields <= 0 ) {
14777
$number_of_fields = int( $columns / $max_width );
14780
# The user can place an upper bound on the number of fields,
14781
# which can be useful for doing maintenance on tables
14782
if ( $rOpts_maximum_fields_per_table
14783
&& $number_of_fields > $rOpts_maximum_fields_per_table )
14785
$number_of_fields = $rOpts_maximum_fields_per_table;
14788
# How many columns (characters) and lines would this container take
14789
# if no additional whitespace were added?
14790
my $packed_columns = token_sequence_length( $i_opening_paren + 1,
14791
$i_effective_last_comma + 1 );
14792
if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero
14793
my $packed_lines = 1 + int( $packed_columns / $columns );
14795
# are we an item contained in an outer list?
14796
my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;
14798
if ( $number_of_fields <= 0 ) {
14800
# #---------------------------------------------------------------
14801
# # We're in trouble. We can't find a single field width that works.
14802
# # There is no simple answer here; we may have a single long list
14804
# #---------------------------------------------------------------
14806
# In many cases, it may be best to not force a break if there is just one
14807
# comma, because the standard continuation break logic will do a better
14810
# In the common case that all but one of the terms can fit
14811
# on a single line, it may look better not to break open the
14812
# containing parens. Consider, for example
14816
# sort { $color_value{$::a} <=> $color_value{$::b}; }
14819
# which will look like this with the container broken:
14823
# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors
14826
# Here is an example of this rule for a long last term:
14828
# log_message( 0, 256, 128,
14829
# "Number of routes in adj-RIB-in to be considered: $peercount" );
14831
# And here is an example with a long first term:
14834
# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",
14835
# $r, $pu, $ps, $cu, $cs, $tt
14837
# if $style eq 'all';
14839
my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];
14840
my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;
14841
my $long_first_term =
14842
excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;
14844
# break at every comma ...
14847
# if requested by user or is best looking
14848
$number_of_fields_best == 1
14850
# or if this is a sublist of a larger list
14851
|| $in_hierarchical_list
14853
# or if multiple commas and we dont have a long first or last
14855
|| ( $comma_count > 1
14856
&& !( $long_last_term || $long_first_term ) )
14859
foreach ( 0 .. $comma_count - 1 ) {
14860
set_forced_breakpoint( $$rcomma_index[$_] );
14863
elsif ($long_last_term) {
14865
set_forced_breakpoint($i_last_comma);
14866
$$rdo_not_break_apart = 1 unless $must_break_open;
14868
elsif ($long_first_term) {
14870
set_forced_breakpoint($i_first_comma);
14874
# let breaks be defined by default bond strength logic
14879
# --------------------------------------------------------
14880
# We have a tentative field count that seems to work.
14881
# How many lines will this require?
14882
# --------------------------------------------------------
14883
my $formatted_lines = $item_count / ($number_of_fields);
14884
if ( $formatted_lines != int $formatted_lines ) {
14885
$formatted_lines = 1 + int $formatted_lines;
14888
# So far we've been trying to fill out to the right margin. But
14889
# compact tables are easier to read, so let's see if we can use fewer
14890
# fields without increasing the number of lines.
14891
$number_of_fields =
14892
compactify_table( $item_count, $number_of_fields, $formatted_lines,
14895
# How many spaces across the page will we fill?
14896
my $columns_per_line =
14897
( int $number_of_fields / 2 ) * $pair_width +
14898
( $number_of_fields % 2 ) * $max_width;
14900
my $formatted_columns;
14902
if ( $number_of_fields > 1 ) {
14903
$formatted_columns =
14904
( $pair_width * ( int( $item_count / 2 ) ) +
14905
( $item_count % 2 ) * $max_width );
14908
$formatted_columns = $max_width * $item_count;
14910
if ( $formatted_columns < $packed_columns ) {
14911
$formatted_columns = $packed_columns;
14914
my $unused_columns = $formatted_columns - $packed_columns;
14916
# set some empirical parameters to help decide if we should try to
14917
# align; high sparsity does not look good, especially with few lines
14918
my $sparsity = ($unused_columns) / ($formatted_columns);
14919
my $max_allowed_sparsity =
14920
( $item_count < 3 ) ? 0.1
14921
: ( $packed_lines == 1 ) ? 0.15
14922
: ( $packed_lines == 2 ) ? 0.4
14925
# Begin check for shortcut methods, which avoid treating a list
14926
# as a table for relatively small parenthesized lists. These
14927
# are usually easier to read if not formatted as tables.
14929
$packed_lines <= 2 # probably can fit in 2 lines
14930
&& $item_count < 9 # doesn't have too many items
14931
&& $opening_environment eq 'BLOCK' # not a sub-container
14932
&& $opening_token eq '(' # is paren list
14936
# Shortcut method 1: for -lp and just one comma:
14937
# This is a no-brainer, just break at the comma.
14939
$rOpts_line_up_parentheses # -lp
14940
&& $item_count == 2 # two items, one comma
14941
&& !$must_break_open
14944
my $i_break = $$rcomma_index[0];
14945
set_forced_breakpoint($i_break);
14946
$$rdo_not_break_apart = 1;
14947
set_non_alignment_flags( $comma_count, $rcomma_index );
14952
# method 2 is for most small ragged lists which might look
14953
# best if not displayed as a table.
14955
( $number_of_fields == 2 && $item_count == 3 )
14957
$new_identifier_count > 0 # isn't all quotes
14958
&& $sparsity > 0.15
14959
) # would be fairly spaced gaps if aligned
14963
my $break_count = set_ragged_breakpoints( \@i_term_comma,
14964
$ri_ragged_break_list );
14965
++$break_count if ($use_separate_first_term);
14967
# NOTE: we should really use the true break count here,
14968
# which can be greater if there are large terms and
14969
# little space, but usually this will work well enough.
14970
unless ($must_break_open) {
14972
if ( $break_count <= 1 ) {
14973
$$rdo_not_break_apart = 1;
14975
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
14977
$$rdo_not_break_apart = 1;
14980
set_non_alignment_flags( $comma_count, $rcomma_index );
14984
} # end shortcut methods
14988
FORMATTER_DEBUG_FLAG_SPARSE && do {
14990
"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";
14994
#---------------------------------------------------------------
14995
# Compound List Rule 2:
14996
# If this list is too long for one line, and it is an item of a
14997
# larger list, then we must format it, regardless of sparsity
14998
# (ian.t). One reason that we have to do this is to trigger
14999
# Compound List Rule 1, above, which causes breaks at all commas of
15000
# all outer lists. In this way, the structure will be properly
15002
#---------------------------------------------------------------
15004
# Decide if this list is too long for one line unless broken
15005
my $total_columns = table_columns_available($i_opening_paren);
15006
my $too_long = $packed_columns > $total_columns;
15008
# For a paren list, include the length of the token just before the
15009
# '(' because this is likely a sub call, and we would have to
15010
# include the sub name on the same line as the list. This is still
15011
# imprecise, but not too bad. (steve.t)
15012
if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {
15014
$too_long = excess_line_length( $i_opening_minus,
15015
$i_effective_last_comma + 1 ) > 0;
15018
# FIXME: For an item after a '=>', try to include the length of the
15019
# thing before the '=>'. This is crude and should be improved by
15020
# actually looking back token by token.
15021
if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {
15022
my $i_opening_minus = $i_opening_paren - 4;
15023
if ( $i_opening_minus >= 0 ) {
15024
$too_long = excess_line_length( $i_opening_minus,
15025
$i_effective_last_comma + 1 ) > 0;
15029
# Always break lists contained in '[' and '{' if too long for 1 line,
15030
# and always break lists which are too long and part of a more complex
15032
my $must_break_open_container = $must_break_open
15034
&& ( $in_hierarchical_list || $opening_token ne '(' ) );
15036
#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";
15038
#---------------------------------------------------------------
15039
# The main decision:
15040
# Now decide if we will align the data into aligned columns. Do not
15041
# attempt to align columns if this is a tiny table or it would be
15042
# too spaced. It seems that the more packed lines we have, the
15043
# sparser the list that can be allowed and still look ok.
15044
#---------------------------------------------------------------
15046
if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )
15047
|| ( $formatted_lines < 2 )
15048
|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )
15052
#---------------------------------------------------------------
15053
# too sparse: would look ugly if aligned in a table;
15054
#---------------------------------------------------------------
15056
# use old breakpoints if this is a 'big' list
15057
# FIXME: goal is to improve set_ragged_breakpoints so that
15058
# this is not necessary.
15059
if ( $packed_lines > 2 && $item_count > 10 ) {
15060
write_logfile_entry("List sparse: using old breakpoints\n");
15061
copy_old_breakpoints( $i_first_comma, $i_last_comma );
15064
# let the continuation logic handle it if 2 lines
15067
my $break_count = set_ragged_breakpoints( \@i_term_comma,
15068
$ri_ragged_break_list );
15069
++$break_count if ($use_separate_first_term);
15071
unless ($must_break_open_container) {
15072
if ( $break_count <= 1 ) {
15073
$$rdo_not_break_apart = 1;
15075
elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )
15077
$$rdo_not_break_apart = 1;
15080
set_non_alignment_flags( $comma_count, $rcomma_index );
15085
#---------------------------------------------------------------
15086
# go ahead and format as a table
15087
#---------------------------------------------------------------
15088
write_logfile_entry(
15089
"List: auto formatting with $number_of_fields fields/row\n");
15091
my $j_first_break =
15092
$use_separate_first_term ? $number_of_fields : $number_of_fields - 1;
15095
my $j = $j_first_break ;
15096
$j < $comma_count ;
15097
$j += $number_of_fields
15100
my $i = $$rcomma_index[$j];
15101
set_forced_breakpoint($i);
15107
sub set_non_alignment_flags {
15109
# set flag which indicates that these commas should not be
15111
my ( $comma_count, $rcomma_index ) = @_;
15112
foreach ( 0 .. $comma_count - 1 ) {
15113
$matching_token_to_go[ $$rcomma_index[$_] ] = 1;
15117
sub study_list_complexity {
15119
# Look for complex tables which should be formatted with one term per line.
15120
# Returns the following:
15122
# \@i_ragged_break_list = list of good breakpoints to avoid lines
15123
# which are hard to read
15124
# $number_of_fields_best = suggested number of fields based on
15125
# complexity; = 0 if any number may be used.
15127
my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;
15128
my $item_count = @{$ri_term_begin};
15129
my $complex_item_count = 0;
15130
my $number_of_fields_best = $rOpts_maximum_fields_per_table;
15131
my $i_max = @{$ritem_lengths} - 1;
15132
##my @item_complexity;
15134
my $i_last_last_break = -3;
15135
my $i_last_break = -2;
15136
my @i_ragged_break_list;
15138
my $definitely_complex = 30;
15139
my $definitely_simple = 12;
15140
my $quote_count = 0;
15142
for my $i ( 0 .. $i_max ) {
15143
my $ib = $ri_term_begin->[$i];
15144
my $ie = $ri_term_end->[$i];
15146
# define complexity: start with the actual term length
15147
my $weighted_length = ( $ritem_lengths->[$i] - 2 );
15149
##TBD: join types here and check for variations
15150
##my $str=join "", @tokens_to_go[$ib..$ie];
15153
if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {
15157
elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {
15161
if ( $ib eq $ie ) {
15162
if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {
15163
$complex_item_count++;
15164
$weighted_length *= 2;
15170
if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {
15171
$complex_item_count++;
15172
$weighted_length *= 2;
15174
if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {
15175
$weighted_length += 4;
15179
# add weight for extra tokens.
15180
$weighted_length += 2 * ( $ie - $ib );
15182
## my $BUB = join '', @tokens_to_go[$ib..$ie];
15183
## print "# COMPLEXITY:$weighted_length $BUB\n";
15185
##push @item_complexity, $weighted_length;
15187
# now mark a ragged break after this item it if it is 'long and
15189
if ( $weighted_length >= $definitely_complex ) {
15191
# if we broke after the previous term
15192
# then break before it too
15193
if ( $i_last_break == $i - 1
15195
&& $i_last_last_break != $i - 2 )
15198
## FIXME: don't strand a small term
15199
pop @i_ragged_break_list;
15200
push @i_ragged_break_list, $i - 2;
15201
push @i_ragged_break_list, $i - 1;
15204
push @i_ragged_break_list, $i;
15205
$i_last_last_break = $i_last_break;
15206
$i_last_break = $i;
15209
# don't break before a small last term -- it will
15210
# not look good on a line by itself.
15211
elsif ($i == $i_max
15212
&& $i_last_break == $i - 1
15213
&& $weighted_length <= $definitely_simple )
15215
pop @i_ragged_break_list;
15219
my $identifier_count = $i_max + 1 - $quote_count;
15221
# Need more tuning here..
15222
if ( $max_width > 12
15223
&& $complex_item_count > $item_count / 2
15224
&& $number_of_fields_best != 2 )
15226
$number_of_fields_best = 1;
15229
return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );
15232
sub get_maximum_fields_wanted {
15234
# Not all tables look good with more than one field of items.
15235
# This routine looks at a table and decides if it should be
15236
# formatted with just one field or not.
15237
# This coding is still under development.
15238
my ($ritem_lengths) = @_;
15240
my $number_of_fields_best = 0;
15242
# For just a few items, we tentatively assume just 1 field.
15243
my $item_count = @{$ritem_lengths};
15244
if ( $item_count <= 5 ) {
15245
$number_of_fields_best = 1;
15248
# For larger tables, look at it both ways and see what looks best
15252
my @max_length = ( 0, 0 );
15253
my @last_length_2 = ( undef, undef );
15254
my @first_length_2 = ( undef, undef );
15255
my $last_length = undef;
15256
my $total_variation_1 = 0;
15257
my $total_variation_2 = 0;
15258
my @total_variation_2 = ( 0, 0 );
15259
for ( my $j = 0 ; $j < $item_count ; $j++ ) {
15261
$is_odd = 1 - $is_odd;
15262
my $length = $ritem_lengths->[$j];
15263
if ( $length > $max_length[$is_odd] ) {
15264
$max_length[$is_odd] = $length;
15267
if ( defined($last_length) ) {
15268
my $dl = abs( $length - $last_length );
15269
$total_variation_1 += $dl;
15271
$last_length = $length;
15273
my $ll = $last_length_2[$is_odd];
15274
if ( defined($ll) ) {
15275
my $dl = abs( $length - $ll );
15276
$total_variation_2[$is_odd] += $dl;
15279
$first_length_2[$is_odd] = $length;
15281
$last_length_2[$is_odd] = $length;
15283
$total_variation_2 = $total_variation_2[0] + $total_variation_2[1];
15285
my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;
15286
unless ( $total_variation_2 < $factor * $total_variation_1 ) {
15287
$number_of_fields_best = 1;
15290
return ($number_of_fields_best);
15293
sub table_columns_available {
15294
my $i_first_comma = shift;
15296
$rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);
15298
# Patch: the vertical formatter does not line up lines whose lengths
15299
# exactly equal the available line length because of allowances
15300
# that must be made for side comments. Therefore, the number of
15301
# available columns is reduced by 1 character.
15306
sub maximum_number_of_fields {
15308
# how many fields will fit in the available space?
15309
my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;
15310
my $max_pairs = int( $columns / $pair_width );
15311
my $number_of_fields = $max_pairs * 2;
15312
if ( $odd_or_even == 1
15313
&& $max_pairs * $pair_width + $max_width <= $columns )
15315
$number_of_fields++;
15317
return $number_of_fields;
15320
sub compactify_table {
15322
# given a table with a certain number of fields and a certain number
15323
# of lines, see if reducing the number of fields will make it look
15325
my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;
15326
if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {
15330
$min_fields = $number_of_fields ;
15331
$min_fields >= $odd_or_even
15332
&& $min_fields * $formatted_lines >= $item_count ;
15333
$min_fields -= $odd_or_even
15336
$number_of_fields = $min_fields;
15339
return $number_of_fields;
15342
sub set_ragged_breakpoints {
15344
# Set breakpoints in a list that cannot be formatted nicely as a
15346
my ( $ri_term_comma, $ri_ragged_break_list ) = @_;
15348
my $break_count = 0;
15349
foreach (@$ri_ragged_break_list) {
15350
my $j = $ri_term_comma->[$_];
15352
set_forced_breakpoint($j);
15356
return $break_count;
15359
sub copy_old_breakpoints {
15360
my ( $i_first_comma, $i_last_comma ) = @_;
15361
for my $i ( $i_first_comma .. $i_last_comma ) {
15362
if ( $old_breakpoint_to_go[$i] ) {
15363
set_forced_breakpoint($i);
15369
my ( $i, $j ) = @_;
15370
if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {
15372
FORMATTER_DEBUG_FLAG_NOBREAK && do {
15373
my ( $a, $b, $c ) = caller();
15375
"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"
15379
@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );
15382
# shouldn't happen; non-critical error
15384
FORMATTER_DEBUG_FLAG_NOBREAK && do {
15385
my ( $a, $b, $c ) = caller();
15387
"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"
15393
sub set_fake_breakpoint {
15395
# Just bump up the breakpoint count as a signal that there are breaks.
15396
# This is useful if we have breaks but may want to postpone deciding where
15398
$forced_breakpoint_count++;
15401
sub set_forced_breakpoint {
15404
return unless defined $i && $i >= 0;
15406
# when called with certain tokens, use bond strengths to decide
15407
# if we break before or after it
15408
my $token = $tokens_to_go[$i];
15410
if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {
15411
if ( $want_break_before{$token} && $i >= 0 ) { $i-- }
15414
# breaks are forced before 'if' and 'unless'
15415
elsif ( $is_if_unless{$token} ) { $i-- }
15417
if ( $i >= 0 && $i <= $max_index_to_go ) {
15418
my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;
15420
FORMATTER_DEBUG_FLAG_FORCE && do {
15421
my ( $a, $b, $c ) = caller();
15423
"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";
15426
if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {
15427
$forced_breakpoint_to_go[$i_nonblank] = 1;
15429
if ( $i_nonblank > $index_max_forced_break ) {
15430
$index_max_forced_break = $i_nonblank;
15432
$forced_breakpoint_count++;
15433
$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =
15436
# if we break at an opening container..break at the closing
15437
if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {
15438
set_closing_breakpoint($i_nonblank);
15444
sub clear_breakpoint_undo_stack {
15445
$forced_breakpoint_undo_count = 0;
15448
sub undo_forced_breakpoint_stack {
15450
my $i_start = shift;
15451
if ( $i_start < 0 ) {
15453
my ( $a, $b, $c ) = caller();
15455
"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "
15459
while ( $forced_breakpoint_undo_count > $i_start ) {
15461
$forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];
15462
if ( $i >= 0 && $i <= $max_index_to_go ) {
15463
$forced_breakpoint_to_go[$i] = 0;
15464
$forced_breakpoint_count--;
15466
FORMATTER_DEBUG_FLAG_UNDOBP && do {
15467
my ( $a, $b, $c ) = caller();
15469
"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"
15474
# shouldn't happen, but not a critical error
15476
FORMATTER_DEBUG_FLAG_UNDOBP && do {
15477
my ( $a, $b, $c ) = caller();
15479
"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"
15486
{ # begin recombine_breakpoints
15495
@is_amp_amp{@_} = (1) x scalar(@_);
15498
@is_ternary{@_} = (1) x scalar(@_);
15500
@_ = qw( + - * / );
15501
@is_math_op{@_} = (1) x scalar(@_);
15504
sub recombine_breakpoints {
15506
# sub set_continuation_breaks is very liberal in setting line breaks
15507
# for long lines, always setting breaks at good breakpoints, even
15508
# when that creates small lines. Occasionally small line fragments
15509
# are produced which would look better if they were combined.
15510
# That's the task of this routine, recombine_breakpoints.
15512
# $ri_beg = ref to array of BEGinning indexes of each line
15513
# $ri_end = ref to array of ENDing indexes of each line
15514
my ( $ri_beg, $ri_end ) = @_;
15516
my $more_to_do = 1;
15518
# We keep looping over all of the lines of this batch
15519
# until there are no more possible recombinations
15520
my $nmax_last = @$ri_end;
15521
while ($more_to_do) {
15525
my $nmax = @$ri_end - 1;
15527
# safety check for infinite loop
15528
unless ( $nmax < $nmax_last ) {
15530
# shouldn't happen because splice below decreases nmax on each pass:
15531
# but i get paranoid sometimes
15532
die "Program bug-infinite loop in recombine breakpoints\n";
15534
$nmax_last = $nmax;
15536
my $previous_outdentable_closing_paren;
15537
my $leading_amp_count = 0;
15538
my $this_line_is_semicolon_terminated;
15540
# loop over all remaining lines in this batch
15541
for $n ( 1 .. $nmax ) {
15543
#----------------------------------------------------------
15544
# If we join the current pair of lines,
15545
# line $n-1 will become the left part of the joined line
15546
# line $n will become the right part of the joined line
15548
# Here are Indexes of the endpoint tokens of the two lines:
15550
# -----line $n-1--- | -----line $n-----
15551
# $ibeg_1 $iend_1 | $ibeg_2 $iend_2
15554
# We want to decide if we should remove the line break
15555
# betwen the tokens at $iend_1 and $ibeg_2
15557
# We will apply a number of ad-hoc tests to see if joining
15558
# here will look ok. The code will just issue a 'next'
15559
# command if the join doesn't look good. If we get through
15560
# the gauntlet of tests, the lines will be recombined.
15561
#----------------------------------------------------------
15563
# beginning and ending tokens of the lines we are working on
15564
my $ibeg_1 = $$ri_beg[ $n - 1 ];
15565
my $iend_1 = $$ri_end[ $n - 1 ];
15566
my $iend_2 = $$ri_end[$n];
15567
my $ibeg_2 = $$ri_beg[$n];
15569
my $ibeg_nmax = $$ri_beg[$nmax];
15571
# some beginning indexes of other lines, which may not exist
15572
my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;
15573
my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;
15574
my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;
15578
#my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -
15579
# $nesting_depth_to_go[$ibeg_1] );
15581
##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";
15583
# If line $n is the last line, we set some flags and
15584
# do any special checks for it
15585
if ( $n == $nmax ) {
15587
# a terminal '{' should stay where it is
15588
next if $types_to_go[$ibeg_2] eq '{';
15590
# set flag if statement $n ends in ';'
15591
$this_line_is_semicolon_terminated =
15592
$types_to_go[$iend_2] eq ';'
15594
# with possible side comment
15595
|| ( $types_to_go[$iend_2] eq '#'
15596
&& $iend_2 - $ibeg_2 >= 2
15597
&& $types_to_go[ $iend_2 - 2 ] eq ';'
15598
&& $types_to_go[ $iend_2 - 1 ] eq 'b' );
15601
#----------------------------------------------------------
15602
# Section 1: examine token at $iend_1 (right end of first line
15604
#----------------------------------------------------------
15606
# an isolated '}' may join with a ';' terminated segment
15607
if ( $types_to_go[$iend_1] eq '}' ) {
15609
# Check for cases where combining a semicolon terminated
15610
# statement with a previous isolated closing paren will
15611
# allow the combined line to be outdented. This is
15612
# generally a good move. For example, we can join up
15613
# the last two lines here:
15615
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15616
# $size, $atime, $mtime, $ctime, $blksize, $blocks
15622
# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
15623
# $size, $atime, $mtime, $ctime, $blksize, $blocks
15626
# which makes the parens line up.
15628
# Another example, from Joe Matarazzo, probably looks best
15629
# with the 'or' clause appended to the trailing paren:
15630
# $self->some_method(
15633
# ) or die "Some_method didn't work";
15635
$previous_outdentable_closing_paren =
15636
$this_line_is_semicolon_terminated # ends in ';'
15637
&& $ibeg_1 == $iend_1 # only one token on last line
15638
&& $tokens_to_go[$iend_1] eq
15639
')' # must be structural paren
15641
# only &&, ||, and : if no others seen
15642
# (but note: our count made below could be wrong
15643
# due to intervening comments)
15644
&& ( $leading_amp_count == 0
15645
|| $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )
15647
# but leading colons probably line up with with a
15648
# previous colon or question (count could be wrong).
15649
&& $types_to_go[$ibeg_2] ne ':'
15651
# only one step in depth allowed. this line must not
15652
# begin with a ')' itself.
15653
&& ( $nesting_depth_to_go[$iend_1] ==
15654
$nesting_depth_to_go[$iend_2] + 1 );
15658
$previous_outdentable_closing_paren
15660
# handle '.' and '?' specially below
15661
|| ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )
15665
# do not recombine lines with ending &&, ||,
15666
elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {
15667
next unless $want_break_before{ $types_to_go[$iend_1] };
15670
# keep a terminal colon
15671
elsif ( $types_to_go[$iend_1] eq ':' ) {
15672
next unless $want_break_before{ $types_to_go[$iend_1] };
15675
# Identify and recombine a broken ?/: chain
15676
elsif ( $types_to_go[$iend_1] eq '?' ) {
15678
# Do not recombine different levels
15680
if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );
15682
# do not recombine unless next line ends in :
15683
next unless $types_to_go[$iend_2] eq ':';
15686
# for lines ending in a comma...
15687
elsif ( $types_to_go[$iend_1] eq ',' ) {
15689
# Do not recombine at comma which is following the
15691
# TODO: might be best to make a special flag
15692
next if ( $old_breakpoint_to_go[$iend_1] );
15694
# an isolated '},' may join with an identifier + ';'
15695
# this is useful for the class of a 'bless' statement (bless.t)
15696
if ( $types_to_go[$ibeg_1] eq '}'
15697
&& $types_to_go[$ibeg_2] eq 'i' )
15700
unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )
15701
&& ( $iend_2 == ( $ibeg_2 + 1 ) )
15702
&& $this_line_is_semicolon_terminated );
15704
# override breakpoint
15705
$forced_breakpoint_to_go[$iend_1] = 0;
15711
# do not recombine after a comma unless this will leave
15713
next unless ( $n + 1 >= $nmax );
15715
# do not recombine if there is a change in indentation depth
15718
$levels_to_go[$iend_1] != $levels_to_go[$iend_2] );
15720
# do not recombine a "complex expression" after a
15721
# comma. "complex" means no parens.
15723
foreach my $ii ( $ibeg_2 .. $iend_2 ) {
15724
if ( $tokens_to_go[$ii] eq '(' ) {
15729
next if $saw_paren;
15734
elsif ( $types_to_go[$iend_1] eq '(' ) {
15736
# No longer doing this
15739
elsif ( $types_to_go[$iend_1] eq ')' ) {
15741
# No longer doing this
15744
# keep a terminal for-semicolon
15745
elsif ( $types_to_go[$iend_1] eq 'f' ) {
15749
# if '=' at end of line ...
15750
elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {
15752
my $is_short_quote =
15753
( $types_to_go[$ibeg_2] eq 'Q'
15754
&& $ibeg_2 == $iend_2
15755
&& length( $tokens_to_go[$ibeg_2] ) <
15756
$rOpts_short_concatenation_item_length );
15758
( $types_to_go[$ibeg_1] eq '?'
15759
&& ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );
15761
# always join an isolated '=', a short quote, or if this
15762
# will put ?/: at start of adjacent lines
15763
if ( $ibeg_1 != $iend_1
15764
&& !$is_short_quote
15771
# unless we can reduce this to two lines
15774
# or three lines, the last with a leading semicolon
15775
|| ( $nmax == $n + 2
15776
&& $types_to_go[$ibeg_nmax] eq ';' )
15778
# or the next line ends with a here doc
15779
|| $types_to_go[$iend_2] eq 'h'
15781
# or the next line ends in an open paren or brace
15782
# and the break hasn't been forced [dima.t]
15783
|| ( !$forced_breakpoint_to_go[$iend_1]
15784
&& $types_to_go[$iend_2] eq '{' )
15787
# do not recombine if the two lines might align well
15788
# this is a very approximate test for this
15790
&& $types_to_go[$ibeg_2] ne
15791
$types_to_go[$ibeg_3] )
15794
# -lp users often prefer this:
15795
# my $title = function($env, $env, $sysarea,
15796
# "bubba Borrower Entry");
15797
# so we will recombine if -lp is used we have ending
15799
if ( !$rOpts_line_up_parentheses
15800
|| $types_to_go[$iend_2] ne ',' )
15803
# otherwise, scan the rhs line up to last token for
15804
# complexity. Note that we are not counting the last
15805
# token in case it is an opening paren.
15807
my $depth = $nesting_depth_to_go[$ibeg_2];
15808
for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {
15809
if ( $nesting_depth_to_go[$i] != $depth ) {
15811
last if ( $tv > 1 );
15813
$depth = $nesting_depth_to_go[$i];
15816
# ok to recombine if no level changes before last token
15819
# otherwise, do not recombine if more than two
15821
next if ( $tv > 1 );
15823
# check total complexity of the two adjacent lines
15824
# that will occur if we do this join
15826
( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;
15827
for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {
15828
if ( $nesting_depth_to_go[$i] != $depth ) {
15830
last if ( $tv > 2 );
15832
$depth = $nesting_depth_to_go[$i];
15835
# do not recombine if total is more than 2 level changes
15836
next if ( $tv > 2 );
15841
unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {
15842
$forced_breakpoint_to_go[$iend_1] = 0;
15847
elsif ( $types_to_go[$iend_1] eq 'k' ) {
15849
# make major control keywords stand out
15854
#/^(last|next|redo|return)$/
15855
$is_last_next_redo_return{ $tokens_to_go[$iend_1] }
15857
# but only if followed by multiple lines
15861
if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {
15863
unless $want_break_before{ $tokens_to_go[$iend_1] };
15867
# handle trailing + - * /
15868
elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {
15870
# combine lines if next line has single number
15871
# or a short term followed by same operator
15872
my $i_next_nonblank = $ibeg_2;
15873
my $i_next_next = $i_next_nonblank + 1;
15874
$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
15875
my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'
15877
$i_next_nonblank == $iend_2
15878
|| ( $i_next_next == $iend_2
15879
&& $is_math_op{ $types_to_go[$i_next_next] } )
15880
|| $types_to_go[$i_next_next] eq ';'
15883
# find token before last operator of previous line
15884
my $iend_1_minus = $iend_1;
15886
if ( $iend_1_minus > $ibeg_1 );
15888
if ( $types_to_go[$iend_1_minus] eq 'b'
15889
&& $iend_1_minus > $ibeg_1 );
15891
my $short_term_follows =
15892
( $types_to_go[$iend_2] eq $types_to_go[$iend_1]
15893
&& $types_to_go[$iend_1_minus] =~ /^[in]$/
15894
&& $iend_2 <= $ibeg_2 + 2
15895
&& length( $tokens_to_go[$ibeg_2] ) <
15896
$rOpts_short_concatenation_item_length );
15899
unless ( $number_follows || $short_term_follows );
15902
#----------------------------------------------------------
15903
# Section 2: Now examine token at $ibeg_2 (left end of second
15905
#----------------------------------------------------------
15907
# join lines identified above as capable of
15908
# causing an outdented line with leading closing paren
15909
if ($previous_outdentable_closing_paren) {
15910
$forced_breakpoint_to_go[$iend_1] = 0;
15913
# do not recombine lines with leading :
15914
elsif ( $types_to_go[$ibeg_2] eq ':' ) {
15915
$leading_amp_count++;
15916
next if $want_break_before{ $types_to_go[$ibeg_2] };
15919
# handle lines with leading &&, ||
15920
elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
15922
$leading_amp_count++;
15924
# ok to recombine if it follows a ? or :
15925
# and is followed by an open paren..
15927
( $is_ternary{ $types_to_go[$ibeg_1] }
15928
&& $tokens_to_go[$iend_2] eq '(' )
15930
# or is followed by a ? or : at same depth
15932
# We are looking for something like this. We can
15933
# recombine the && line with the line above to make the
15934
# structure more clear:
15936
# exists $G->{Attr}->{V}
15937
# && exists $G->{Attr}->{V}->{$u}
15938
# ? %{ $G->{Attr}->{V}->{$u} }
15941
# We should probably leave something like this alone:
15943
# exists $G->{Attr}->{E}
15944
# && exists $G->{Attr}->{E}->{$u}
15945
# && exists $G->{Attr}->{E}->{$u}->{$v}
15946
# ? %{ $G->{Attr}->{E}->{$u}->{$v} }
15948
# so that we either have all of the &&'s (or ||'s)
15949
# on one line, as in the first example, or break at
15950
# each one as in the second example. However, it
15951
# sometimes makes things worse to check for this because
15952
# it prevents multiple recombinations. So this is not done.
15954
&& $is_ternary{ $types_to_go[$ibeg_3] }
15955
&& $nesting_depth_to_go[$ibeg_3] ==
15956
$nesting_depth_to_go[$ibeg_2] );
15958
next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };
15959
$forced_breakpoint_to_go[$iend_1] = 0;
15961
# tweak the bond strength to give this joint priority
15966
# Identify and recombine a broken ?/: chain
15967
elsif ( $types_to_go[$ibeg_2] eq '?' ) {
15969
# Do not recombine different levels
15970
my $lev = $levels_to_go[$ibeg_2];
15971
next if ( $lev ne $levels_to_go[$ibeg_1] );
15973
# Do not recombine a '?' if either next line or
15974
# previous line does not start with a ':'. The reasons
15975
# are that (1) no alignment of the ? will be possible
15976
# and (2) the expression is somewhat complex, so the
15977
# '?' is harder to see in the interior of the line.
15978
my $follows_colon =
15979
$ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';
15980
my $precedes_colon =
15981
$ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';
15982
next unless ( $follows_colon || $precedes_colon );
15984
# we will always combining a ? line following a : line
15985
if ( !$follows_colon ) {
15987
# ...otherwise recombine only if it looks like a chain.
15988
# we will just look at a few nearby lines to see if
15989
# this looks like a chain.
15990
my $local_count = 0;
15991
foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {
15994
&& $types_to_go[$ii] eq ':'
15995
&& $levels_to_go[$ii] == $lev;
15997
next unless ( $local_count > 1 );
15999
$forced_breakpoint_to_go[$iend_1] = 0;
16002
# do not recombine lines with leading '.'
16003
elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {
16004
my $i_next_nonblank = $ibeg_2 + 1;
16005
if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16006
$i_next_nonblank++;
16012
# ... unless there is just one and we can reduce
16013
# this to two lines if we do. For example, this
16017
# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'
16019
# looks better than this:
16020
# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'
16021
# . '$args .= $pat;'
16026
&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]
16029
# ... or this would strand a short quote , like this
16030
# . "some long qoute"
16032
|| ( $types_to_go[$i_next_nonblank] eq 'Q'
16033
&& $i_next_nonblank >= $iend_2 - 1
16034
&& length( $tokens_to_go[$i_next_nonblank] ) <
16035
$rOpts_short_concatenation_item_length )
16039
# handle leading keyword..
16040
elsif ( $types_to_go[$ibeg_2] eq 'k' ) {
16042
# handle leading "or"
16043
if ( $tokens_to_go[$ibeg_2] eq 'or' ) {
16046
$this_line_is_semicolon_terminated
16049
# following 'if' or 'unless' or 'or'
16050
$types_to_go[$ibeg_1] eq 'k'
16051
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
16053
# important: only combine a very simple or
16054
# statement because the step below may have
16055
# combined a trailing 'and' with this or,
16056
# and we do not want to then combine
16057
# everything together
16058
&& ( $iend_2 - $ibeg_2 <= 7 )
16063
# handle leading 'and'
16064
elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {
16066
# Decide if we will combine a single terminal 'and'
16067
# after an 'if' or 'unless'.
16069
# This looks best with the 'and' on the same
16070
# line as the 'if':
16073
# if $seconds and $nu < 2;
16075
# But this looks better as shown:
16078
# if !$this->{Parents}{$_}
16079
# or $this->{Parents}{$_} eq $_;
16083
$this_line_is_semicolon_terminated
16086
# following 'if' or 'unless' or 'or'
16087
$types_to_go[$ibeg_1] eq 'k'
16088
&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }
16089
|| $tokens_to_go[$ibeg_1] eq 'or' )
16094
# handle leading "if" and "unless"
16095
elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {
16097
# FIXME: This is still experimental..may not be too useful
16100
$this_line_is_semicolon_terminated
16102
# previous line begins with 'and' or 'or'
16103
&& $types_to_go[$ibeg_1] eq 'k'
16104
&& $is_and_or{ $tokens_to_go[$ibeg_1] }
16109
# handle all other leading keywords
16112
# keywords look best at start of lines,
16113
# but combine things like "1 while"
16114
unless ( $is_assignment{ $types_to_go[$iend_1] } ) {
16116
if ( ( $types_to_go[$iend_1] ne 'k' )
16117
&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );
16122
# similar treatment of && and || as above for 'and' and 'or':
16123
# NOTE: This block of code is currently bypassed because
16124
# of a previous block but is retained for possible future use.
16125
elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {
16127
# maybe looking at something like:
16128
# unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;
16132
$this_line_is_semicolon_terminated
16134
# previous line begins with an 'if' or 'unless' keyword
16135
&& $types_to_go[$ibeg_1] eq 'k'
16136
&& $is_if_unless{ $tokens_to_go[$ibeg_1] }
16141
# handle leading + - * /
16142
elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {
16143
my $i_next_nonblank = $ibeg_2 + 1;
16144
if ( $types_to_go[$i_next_nonblank] eq 'b' ) {
16145
$i_next_nonblank++;
16148
my $i_next_next = $i_next_nonblank + 1;
16149
$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );
16152
$types_to_go[$i_next_nonblank] eq 'n'
16153
&& ( $i_next_nonblank >= $iend_2 - 1
16154
|| $types_to_go[$i_next_next] eq ';' )
16157
my $iend_1_nonblank =
16158
$types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;
16159
my $iend_2_nonblank =
16160
$types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;
16162
my $is_short_term =
16163
( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]
16164
&& $types_to_go[$iend_2_nonblank] =~ /^[in]$/
16165
&& $types_to_go[$iend_1_nonblank] =~ /^[in]$/
16166
&& $iend_2_nonblank <= $ibeg_2 + 2
16167
&& length( $tokens_to_go[$iend_2_nonblank] ) <
16168
$rOpts_short_concatenation_item_length );
16170
# Combine these lines if this line is a single
16171
# number, or if it is a short term with same
16172
# operator as the previous line. For example, in
16173
# the following code we will combine all of the
16174
# short terms $A, $B, $C, $D, $E, $F, together
16175
# instead of leaving them one per line:
16177
# $A * $B * $C * $D * $E * $F *
16178
# ( 2. * $eps * $sigma * $area ) *
16179
# ( 1. / $tcold**3 - 1. / $thot**3 );
16180
# This can be important in math-intensive code.
16186
# or if we can reduce this to two lines if we do.
16189
&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )
16193
# handle line with leading = or similar
16194
elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {
16195
next unless $n == 1;
16199
# unless we can reduce this to two lines
16202
# or three lines, the last with a leading semicolon
16203
|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )
16205
# or the next line ends with a here doc
16206
|| $types_to_go[$iend_2] eq 'h'
16210
#----------------------------------------------------------
16212
# Combine the lines if we arrive here and it is possible
16213
#----------------------------------------------------------
16215
# honor hard breakpoints
16216
next if ( $forced_breakpoint_to_go[$iend_1] > 0 );
16218
my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;
16220
# combined line cannot be too long
16222
if excess_line_length( $ibeg_1, $iend_2 ) > 0;
16224
# do not recombine if we would skip in indentation levels
16225
if ( $n < $nmax ) {
16226
my $if_next = $$ri_beg[ $n + 1 ];
16229
$levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]
16230
&& $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]
16232
# but an isolated 'if (' is undesirable
16235
&& $iend_1 - $ibeg_1 <= 2
16236
&& $types_to_go[$ibeg_1] eq 'k'
16237
&& $tokens_to_go[$ibeg_1] eq 'if'
16238
&& $tokens_to_go[$iend_1] ne '('
16244
next if ( $bs == NO_BREAK );
16246
# remember the pair with the greatest bond strength
16253
if ( $bs > $bs_best ) {
16260
# recombine the pair with the greatest bond strength
16262
splice @$ri_beg, $n_best, 1;
16263
splice @$ri_end, $n_best - 1, 1;
16265
# keep going if we are still making progress
16269
return ( $ri_beg, $ri_end );
16271
} # end recombine_breakpoints
16273
sub break_all_chain_tokens {
16275
# scan the current breakpoints looking for breaks at certain "chain
16276
# operators" (. : && || + etc) which often occur repeatedly in a long
16277
# statement. If we see a break at any one, break at all similar tokens
16278
# within the same container.
16280
my ( $ri_left, $ri_right ) = @_;
16282
my %saw_chain_type;
16283
my %left_chain_type;
16284
my %right_chain_type;
16285
my %interior_chain_type;
16286
my $nmax = @$ri_right - 1;
16288
# scan the left and right end tokens of all lines
16290
for my $n ( 0 .. $nmax ) {
16291
my $il = $$ri_left[$n];
16292
my $ir = $$ri_right[$n];
16293
my $typel = $types_to_go[$il];
16294
my $typer = $types_to_go[$ir];
16295
$typel = '+' if ( $typel eq '-' ); # treat + and - the same
16296
$typer = '+' if ( $typer eq '-' );
16297
$typel = '*' if ( $typel eq '/' ); # treat * and / the same
16298
$typer = '*' if ( $typer eq '/' );
16299
my $tokenl = $tokens_to_go[$il];
16300
my $tokenr = $tokens_to_go[$ir];
16302
if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {
16303
next if ( $typel eq '?' );
16304
push @{ $left_chain_type{$typel} }, $il;
16305
$saw_chain_type{$typel} = 1;
16308
if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {
16309
next if ( $typer eq '?' );
16310
push @{ $right_chain_type{$typer} }, $ir;
16311
$saw_chain_type{$typer} = 1;
16315
return unless $count;
16317
# now look for any interior tokens of the same types
16319
for my $n ( 0 .. $nmax ) {
16320
my $il = $$ri_left[$n];
16321
my $ir = $$ri_right[$n];
16322
for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {
16323
my $type = $types_to_go[$i];
16324
$type = '+' if ( $type eq '-' );
16325
$type = '*' if ( $type eq '/' );
16326
if ( $saw_chain_type{$type} ) {
16327
push @{ $interior_chain_type{$type} }, $i;
16332
return unless $count;
16334
# now make a list of all new break points
16337
# loop over all chain types
16338
foreach my $type ( keys %saw_chain_type ) {
16340
# quit if just ONE continuation line with leading . For example--
16341
# print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'
16343
last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );
16345
# loop over all interior chain tokens
16346
foreach my $itest ( @{ $interior_chain_type{$type} } ) {
16348
# loop over all left end tokens of same type
16349
if ( $left_chain_type{$type} ) {
16350
next if $nobreak_to_go[ $itest - 1 ];
16351
foreach my $i ( @{ $left_chain_type{$type} } ) {
16352
next unless in_same_container( $i, $itest );
16353
push @insert_list, $itest - 1;
16355
# Break at matching ? if this : is at a different level.
16356
# For example, the ? before $THRf_DEAD in the following
16357
# should get a break if its : gets a break.
16360
# ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE
16361
# : ( $_ & 4 ) ? $THRf_R_DETACHED
16362
# : $THRf_R_JOINABLE;
16364
&& $levels_to_go[$i] != $levels_to_go[$itest] )
16366
my $i_question = $mate_index_to_go[$itest];
16367
if ( $i_question > 0 ) {
16368
push @insert_list, $i_question - 1;
16375
# loop over all right end tokens of same type
16376
if ( $right_chain_type{$type} ) {
16377
next if $nobreak_to_go[$itest];
16378
foreach my $i ( @{ $right_chain_type{$type} } ) {
16379
next unless in_same_container( $i, $itest );
16380
push @insert_list, $itest;
16382
# break at matching ? if this : is at a different level
16384
&& $levels_to_go[$i] != $levels_to_go[$itest] )
16386
my $i_question = $mate_index_to_go[$itest];
16387
if ( $i_question >= 0 ) {
16388
push @insert_list, $i_question;
16397
# insert any new break points
16398
if (@insert_list) {
16399
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16405
# Look for assignment operators that could use a breakpoint.
16406
# For example, in the following snippet
16408
# $HOME = $ENV{HOME}
16411
# || die "no home directory for user $<";
16413
# we could break at the = to get this, which is a little nicer:
16418
# || die "no home directory for user $<";
16420
# The logic here follows the logic in set_logical_padding, which
16421
# will add the padding in the second line to improve alignment.
16423
my ( $ri_left, $ri_right ) = @_;
16424
my $nmax = @$ri_right - 1;
16425
return unless ( $nmax >= 2 );
16427
# scan the left ends of first two lines
16430
for my $n ( 1 .. 2 ) {
16431
my $il = $$ri_left[$n];
16432
my $typel = $types_to_go[$il];
16433
my $tokenl = $tokens_to_go[$il];
16435
my $has_leading_op = ( $tokenl =~ /^\w/ )
16436
? $is_chain_operator{$tokenl} # + - * / : ? && ||
16437
: $is_chain_operator{$typel}; # and, or
16438
return unless ($has_leading_op);
16441
unless ( $tokenl eq $tokbeg
16442
&& $nesting_depth_to_go[$il] eq $depth_beg );
16445
$depth_beg = $nesting_depth_to_go[$il];
16448
# now look for any interior tokens of the same types
16449
my $il = $$ri_left[0];
16450
my $ir = $$ri_right[0];
16452
# now make a list of all new break points
16454
for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {
16455
my $type = $types_to_go[$i];
16456
if ( $is_assignment{$type}
16457
&& $nesting_depth_to_go[$i] eq $depth_beg )
16459
if ( $want_break_before{$type} ) {
16460
push @insert_list, $i - 1;
16463
push @insert_list, $i;
16468
# Break after a 'return' followed by a chain of operators
16469
# return ( $^O !~ /win32|dos/i )
16470
# && ( $^O ne 'VMS' )
16471
# && ( $^O ne 'OS2' )
16472
# && ( $^O ne 'MacOS' );
16475
# ( $^O !~ /win32|dos/i )
16476
# && ( $^O ne 'VMS' )
16477
# && ( $^O ne 'OS2' )
16478
# && ( $^O ne 'MacOS' );
16480
if ( $types_to_go[$i] eq 'k'
16481
&& $tokens_to_go[$i] eq 'return'
16483
&& $nesting_depth_to_go[$i] eq $depth_beg )
16485
push @insert_list, $i;
16488
return unless (@insert_list);
16490
# One final check...
16491
# scan second and thrid lines and be sure there are no assignments
16492
# we want to avoid breaking at an = to make something like this:
16494
# $html_icons{"$type-$state"}
16495
# or $icon = $html_icons{$type}
16496
# or $icon = $html_icons{$state} )
16497
for my $n ( 1 .. 2 ) {
16498
my $il = $$ri_left[$n];
16499
my $ir = $$ri_right[$n];
16500
for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {
16501
my $type = $types_to_go[$i];
16503
if ( $is_assignment{$type}
16504
&& $nesting_depth_to_go[$i] eq $depth_beg );
16508
# ok, insert any new break point
16509
if (@insert_list) {
16510
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16514
sub insert_final_breaks {
16516
my ( $ri_left, $ri_right ) = @_;
16518
my $nmax = @$ri_right - 1;
16520
# scan the left and right end tokens of all lines
16522
my $i_first_colon = -1;
16523
for my $n ( 0 .. $nmax ) {
16524
my $il = $$ri_left[$n];
16525
my $ir = $$ri_right[$n];
16526
my $typel = $types_to_go[$il];
16527
my $typer = $types_to_go[$ir];
16528
return if ( $typel eq '?' );
16529
return if ( $typer eq '?' );
16530
if ( $typel eq ':' ) { $i_first_colon = $il; last; }
16531
elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }
16534
# For long ternary chains,
16535
# if the first : we see has its # ? is in the interior
16536
# of a preceding line, then see if there are any good
16537
# breakpoints before the ?.
16538
if ( $i_first_colon > 0 ) {
16539
my $i_question = $mate_index_to_go[$i_first_colon];
16540
if ( $i_question > 0 ) {
16542
for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {
16543
my $token = $tokens_to_go[$ii];
16544
my $type = $types_to_go[$ii];
16546
# For now, a good break is either a comma or a 'return'.
16547
if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )
16548
&& in_same_container( $ii, $i_question ) )
16550
push @insert_list, $ii;
16555
# insert any new break points
16556
if (@insert_list) {
16557
insert_additional_breaks( \@insert_list, $ri_left, $ri_right );
16563
sub in_same_container {
16565
# check to see if tokens at i1 and i2 are in the
16566
# same container, and not separated by a comma, ? or :
16567
my ( $i1, $i2 ) = @_;
16568
my $type = $types_to_go[$i1];
16569
my $depth = $nesting_depth_to_go[$i1];
16570
return unless ( $nesting_depth_to_go[$i2] == $depth );
16571
if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }
16573
###########################################################
16574
# This is potentially a very slow routine and not critical.
16575
# For safety just give up for large differences.
16576
# See test file 'infinite_loop.txt'
16577
# TODO: replace this loop with a data structure
16578
###########################################################
16579
return if ( $i2-$i1 > 200 );
16581
for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {
16582
next if ( $nesting_depth_to_go[$i] > $depth );
16583
return if ( $nesting_depth_to_go[$i] < $depth );
16585
my $tok = $tokens_to_go[$i];
16586
$tok = ',' if $tok eq '=>'; # treat => same as ,
16588
# Example: we would not want to break at any of these .'s
16589
# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"
16590
if ( $type ne ':' ) {
16591
return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';
16594
return if ( $tok =~ /^[\,]$/ );
16600
sub set_continuation_breaks {
16602
# Define an array of indexes for inserting newline characters to
16603
# keep the line lengths below the maximum desired length. There is
16604
# an implied break after the last token, so it need not be included.
16607
# This routine is part of series of routines which adjust line
16608
# lengths. It is only called if a statement is longer than the
16609
# maximum line length, or if a preliminary scanning located
16610
# desirable break points. Sub scan_list has already looked at
16611
# these tokens and set breakpoints (in array
16612
# $forced_breakpoint_to_go[$i]) where it wants breaks (for example
16613
# after commas, after opening parens, and before closing parens).
16614
# This routine will honor these breakpoints and also add additional
16615
# breakpoints as necessary to keep the line length below the maximum
16616
# requested. It bases its decision on where the 'bond strength' is
16619
# Output: returns references to the arrays:
16622
# which contain the indexes $i of the first and last tokens on each
16625
# In addition, the array:
16626
# $forced_breakpoint_to_go[$i]
16627
# may be updated to be =1 for any index $i after which there must be
16628
# a break. This signals later routines not to undo the breakpoint.
16630
my $saw_good_break = shift;
16631
my @i_first = (); # the first index to output
16632
my @i_last = (); # the last index to output
16633
my @i_colon_breaks = (); # needed to decide if we have to break at ?'s
16634
if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }
16636
set_bond_strengths();
16639
my $imax = $max_index_to_go;
16640
if ( $types_to_go[$imin] eq 'b' ) { $imin++ }
16641
if ( $types_to_go[$imax] eq 'b' ) { $imax-- }
16642
my $i_begin = $imin; # index for starting next iteration
16644
my $leading_spaces = leading_spaces_to_go($imin);
16645
my $line_count = 0;
16646
my $last_break_strength = NO_BREAK;
16647
my $i_last_break = -1;
16648
my $max_bias = 0.001;
16649
my $tiny_bias = 0.0001;
16650
my $leading_alignment_token = "";
16651
my $leading_alignment_type = "";
16653
# see if any ?/:'s are in order
16654
my $colons_in_order = 1;
16656
my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];
16657
my $colon_count = @colon_list;
16658
foreach (@colon_list) {
16659
if ( $_ eq $last_tok ) { $colons_in_order = 0; last }
16663
# This is a sufficient but not necessary condition for colon chain
16664
my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );
16666
#-------------------------------------------------------
16667
# BEGINNING of main loop to set continuation breakpoints
16668
# Keep iterating until we reach the end
16669
#-------------------------------------------------------
16670
while ( $i_begin <= $imax ) {
16671
my $lowest_strength = NO_BREAK;
16672
my $starting_sum = $lengths_to_go[$i_begin];
16675
my $lowest_next_token = '';
16676
my $lowest_next_type = 'b';
16677
my $i_lowest_next_nonblank = -1;
16679
#-------------------------------------------------------
16680
# BEGINNING of inner loop to find the best next breakpoint
16681
#-------------------------------------------------------
16682
for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {
16683
my $type = $types_to_go[$i_test];
16684
my $token = $tokens_to_go[$i_test];
16685
my $next_type = $types_to_go[ $i_test + 1 ];
16686
my $next_token = $tokens_to_go[ $i_test + 1 ];
16687
my $i_next_nonblank =
16688
( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );
16689
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
16690
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16691
my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];
16692
my $strength = $bond_strength_to_go[$i_test];
16693
my $must_break = 0;
16695
# FIXME: TESTING: Might want to be able to break after these
16696
# force an immediate break at certain operators
16697
# with lower level than the start of the line
16700
$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/
16701
|| ( $next_nonblank_type eq 'k'
16702
&& $next_nonblank_token =~ /^(and|or)$/ )
16704
&& ( $nesting_depth_to_go[$i_begin] >
16705
$nesting_depth_to_go[$i_next_nonblank] )
16708
set_forced_breakpoint($i_next_nonblank);
16713
# Try to put a break where requested by scan_list
16714
$forced_breakpoint_to_go[$i_test]
16716
# break between ) { in a continued line so that the '{' can
16718
# See similar logic in scan_list which catches instances
16719
# where a line is just something like ') {'
16721
&& ( $token eq ')' )
16722
&& ( $next_nonblank_type eq '{' )
16723
&& ($next_nonblank_block_type)
16724
&& !$rOpts->{'opening-brace-always-on-right'} )
16726
# There is an implied forced break at a terminal opening brace
16727
|| ( ( $type eq '{' ) && ( $i_test == $imax ) )
16731
# Forced breakpoints must sometimes be overridden, for example
16732
# because of a side comment causing a NO_BREAK. It is easier
16733
# to catch this here than when they are set.
16734
if ( $strength < NO_BREAK ) {
16735
$strength = $lowest_strength - $tiny_bias;
16740
# quit if a break here would put a good terminal token on
16741
# the next line and we already have a possible break
16744
&& ( $next_nonblank_type =~ /^[\;\,]$/ )
16748
$lengths_to_go[ $i_next_nonblank + 1 ] -
16750
) > $rOpts_maximum_line_length
16754
last if ( $i_lowest >= 0 );
16757
# Avoid a break which would strand a single punctuation
16758
# token. For example, we do not want to strand a leading
16759
# '.' which is followed by a long quoted string.
16762
&& ( $i_test == $i_begin )
16763
&& ( $i_test < $imax )
16764
&& ( $token eq $type )
16768
$lengths_to_go[ $i_test + 1 ] -
16770
) <= $rOpts_maximum_line_length
16776
if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {
16782
if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )
16785
# break at previous best break if it would have produced
16786
# a leading alignment of certain common tokens, and it
16787
# is different from the latest candidate break
16789
if ($leading_alignment_type);
16791
# Force at least one breakpoint if old code had good
16792
# break It is only called if a breakpoint is required or
16793
# desired. This will probably need some adjustments
16794
# over time. A goal is to try to be sure that, if a new
16795
# side comment is introduced into formated text, then
16796
# the same breakpoints will occur. scbreak.t
16799
$i_test == $imax # we are at the end
16800
&& !$forced_breakpoint_count #
16801
&& $saw_good_break # old line had good break
16802
&& $type =~ /^[#;\{]$/ # and this line ends in
16803
# ';' or side comment
16804
&& $i_last_break < 0 # and we haven't made a break
16805
&& $i_lowest > 0 # and we saw a possible break
16806
&& $i_lowest < $imax - 1 # (but not just before this ;)
16807
&& $strength - $lowest_strength < 0.5 * WEAK # and it's good
16810
$lowest_strength = $strength;
16811
$i_lowest = $i_test;
16812
$lowest_next_token = $next_nonblank_token;
16813
$lowest_next_type = $next_nonblank_type;
16814
$i_lowest_next_nonblank = $i_next_nonblank;
16815
last if $must_break;
16817
# set flags to remember if a break here will produce a
16818
# leading alignment of certain common tokens
16819
if ( $line_count > 0
16821
&& ( $lowest_strength - $last_break_strength <= $max_bias )
16824
my $i_last_end = $i_begin - 1;
16825
if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }
16826
my $tok_beg = $tokens_to_go[$i_begin];
16827
my $type_beg = $types_to_go[$i_begin];
16830
# check for leading alignment of certain tokens
16832
$tok_beg eq $next_nonblank_token
16833
&& $is_chain_operator{$tok_beg}
16834
&& ( $type_beg eq 'k'
16835
|| $type_beg eq $tok_beg )
16836
&& $nesting_depth_to_go[$i_begin] >=
16837
$nesting_depth_to_go[$i_next_nonblank]
16840
|| ( $tokens_to_go[$i_last_end] eq $token
16841
&& $is_chain_operator{$token}
16842
&& ( $type eq 'k' || $type eq $token )
16843
&& $nesting_depth_to_go[$i_last_end] >=
16844
$nesting_depth_to_go[$i_test] )
16847
$leading_alignment_token = $next_nonblank_token;
16848
$leading_alignment_type = $next_nonblank_type;
16854
( $i_test >= $imax )
16859
$lengths_to_go[ $i_test + 2 ] -
16861
) > $rOpts_maximum_line_length
16864
FORMATTER_DEBUG_FLAG_BREAK
16866
"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";
16868
# allow one extra terminal token after exceeding line length
16869
# if it would strand this token.
16870
if ( $rOpts_fuzzy_line_length
16872
&& ( $i_lowest == $i_test )
16873
&& ( length($token) > 1 )
16874
&& ( $next_nonblank_type =~ /^[\;\,]$/ ) )
16881
( $i_test == $imax ) # we're done if no more tokens,
16883
( $i_lowest >= 0 ) # or no more space and we have a break
16889
#-------------------------------------------------------
16890
# END of inner loop to find the best next breakpoint
16891
# Now decide exactly where to put the breakpoint
16892
#-------------------------------------------------------
16894
# it's always ok to break at imax if no other break was found
16895
if ( $i_lowest < 0 ) { $i_lowest = $imax }
16897
# semi-final index calculation
16898
my $i_next_nonblank = (
16899
( $types_to_go[ $i_lowest + 1 ] eq 'b' )
16903
my $next_nonblank_type = $types_to_go[$i_next_nonblank];
16904
my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16906
#-------------------------------------------------------
16907
# ?/: rule 1 : if a break here will separate a '?' on this
16908
# line from its closing ':', then break at the '?' instead.
16909
#-------------------------------------------------------
16911
foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {
16912
next unless ( $tokens_to_go[$i] eq '?' );
16914
# do not break if probable sequence of ?/: statements
16915
next if ($is_colon_chain);
16917
# do not break if statement is broken by side comment
16920
$tokens_to_go[$max_index_to_go] eq '#'
16921
&& terminal_type( \@types_to_go, \@block_type_to_go, 0,
16922
$max_index_to_go ) !~ /^[\;\}]$/
16925
# no break needed if matching : is also on the line
16927
if ( $mate_index_to_go[$i] >= 0
16928
&& $mate_index_to_go[$i] <= $i_next_nonblank );
16931
if ( $want_break_before{'?'} ) { $i_lowest-- }
16935
#-------------------------------------------------------
16936
# END of inner loop to find the best next breakpoint:
16937
# Break the line after the token with index i=$i_lowest
16938
#-------------------------------------------------------
16940
# final index calculation
16941
$i_next_nonblank = (
16942
( $types_to_go[ $i_lowest + 1 ] eq 'b' )
16946
$next_nonblank_type = $types_to_go[$i_next_nonblank];
16947
$next_nonblank_token = $tokens_to_go[$i_next_nonblank];
16949
FORMATTER_DEBUG_FLAG_BREAK
16950
&& print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";
16952
#-------------------------------------------------------
16953
# ?/: rule 2 : if we break at a '?', then break at its ':'
16955
# Note: this rule is also in sub scan_list to handle a break
16956
# at the start and end of a line (in case breaks are dictated
16957
# by side comments).
16958
#-------------------------------------------------------
16959
if ( $next_nonblank_type eq '?' ) {
16960
set_closing_breakpoint($i_next_nonblank);
16962
elsif ( $types_to_go[$i_lowest] eq '?' ) {
16963
set_closing_breakpoint($i_lowest);
16966
#-------------------------------------------------------
16967
# ?/: rule 3 : if we break at a ':' then we save
16968
# its location for further work below. We may need to go
16969
# back and break at its '?'.
16970
#-------------------------------------------------------
16971
if ( $next_nonblank_type eq ':' ) {
16972
push @i_colon_breaks, $i_next_nonblank;
16974
elsif ( $types_to_go[$i_lowest] eq ':' ) {
16975
push @i_colon_breaks, $i_lowest;
16978
# here we should set breaks for all '?'/':' pairs which are
16979
# separated by this line
16983
# save this line segment, after trimming blanks at the ends
16985
( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );
16987
( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );
16989
# set a forced breakpoint at a container opening, if necessary, to
16990
# signal a break at a closing container. Excepting '(' for now.
16991
if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/
16992
&& !$forced_breakpoint_to_go[$i_lowest] )
16994
set_closing_breakpoint($i_lowest);
16997
# get ready to go again
16998
$i_begin = $i_lowest + 1;
16999
$last_break_strength = $lowest_strength;
17000
$i_last_break = $i_lowest;
17001
$leading_alignment_token = "";
17002
$leading_alignment_type = "";
17003
$lowest_next_token = '';
17004
$lowest_next_type = 'b';
17006
if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {
17010
# update indentation size
17011
if ( $i_begin <= $imax ) {
17012
$leading_spaces = leading_spaces_to_go($i_begin);
17016
#-------------------------------------------------------
17017
# END of main loop to set continuation breakpoints
17018
# Now go back and make any necessary corrections
17019
#-------------------------------------------------------
17021
#-------------------------------------------------------
17022
# ?/: rule 4 -- if we broke at a ':', then break at
17023
# corresponding '?' unless this is a chain of ?: expressions
17024
#-------------------------------------------------------
17025
if (@i_colon_breaks) {
17027
# using a simple method for deciding if we are in a ?/: chain --
17028
# this is a chain if it has multiple ?/: pairs all in order;
17030
# Note that if line starts in a ':' we count that above as a break
17031
my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );
17033
unless ($is_chain) {
17034
my @insert_list = ();
17035
foreach (@i_colon_breaks) {
17036
my $i_question = $mate_index_to_go[$_];
17037
if ( $i_question >= 0 ) {
17038
if ( $want_break_before{'?'} ) {
17040
if ( $i_question > 0
17041
&& $types_to_go[$i_question] eq 'b' )
17047
if ( $i_question >= 0 ) {
17048
push @insert_list, $i_question;
17051
insert_additional_breaks( \@insert_list, \@i_first, \@i_last );
17055
return ( \@i_first, \@i_last, $colon_count );
17058
sub insert_additional_breaks {
17060
# this routine will add line breaks at requested locations after
17061
# sub set_continuation_breaks has made preliminary breaks.
17063
my ( $ri_break_list, $ri_first, $ri_last ) = @_;
17066
my $line_number = 0;
17068
foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {
17070
$i_f = $$ri_first[$line_number];
17071
$i_l = $$ri_last[$line_number];
17072
while ( $i_break_left >= $i_l ) {
17075
# shouldn't happen unless caller passes bad indexes
17076
if ( $line_number >= @$ri_last ) {
17078
"Non-fatal program bug: couldn't set break at $i_break_left\n"
17080
report_definite_bug();
17083
$i_f = $$ri_first[$line_number];
17084
$i_l = $$ri_last[$line_number];
17087
my $i_break_right = $i_break_left + 1;
17088
if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }
17090
if ( $i_break_left >= $i_f
17091
&& $i_break_left < $i_l
17092
&& $i_break_right > $i_f
17093
&& $i_break_right <= $i_l )
17095
splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );
17096
splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );
17101
sub set_closing_breakpoint {
17103
# set a breakpoint at a matching closing token
17104
# at present, this is only used to break at a ':' which matches a '?'
17105
my $i_break = shift;
17107
if ( $mate_index_to_go[$i_break] >= 0 ) {
17109
# CAUTION: infinite recursion possible here:
17110
# set_closing_breakpoint calls set_forced_breakpoint, and
17111
# set_forced_breakpoint call set_closing_breakpoint
17112
# ( test files attrib.t, BasicLyx.pm.html).
17113
# Don't reduce the '2' in the statement below
17114
if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {
17116
# break before } ] and ), but sub set_forced_breakpoint will decide
17117
# to break before or after a ? and :
17118
my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;
17119
set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );
17123
my $type_sequence = $type_sequence_to_go[$i_break];
17124
if ($type_sequence) {
17125
my $closing_token = $matching_token{ $tokens_to_go[$i_break] };
17126
$postponed_breakpoint{$type_sequence} = 1;
17131
# check to see if output line tabbing agrees with input line
17132
# this can be very useful for debugging a script which has an extra
17134
sub compare_indentation_levels {
17136
my ( $python_indentation_level, $structural_indentation_level ) = @_;
17137
if ( ( $python_indentation_level ne $structural_indentation_level ) ) {
17138
$last_tabbing_disagreement = $input_line_number;
17140
if ($in_tabbing_disagreement) {
17143
$tabbing_disagreement_count++;
17145
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17146
write_logfile_entry(
17147
"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"
17150
$in_tabbing_disagreement = $input_line_number;
17151
$first_tabbing_disagreement = $in_tabbing_disagreement
17152
unless ($first_tabbing_disagreement);
17157
if ($in_tabbing_disagreement) {
17159
if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {
17160
write_logfile_entry(
17161
"End indentation disagreement from input line $in_tabbing_disagreement\n"
17164
if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {
17165
write_logfile_entry(
17166
"No further tabbing disagreements will be noted\n");
17169
$in_tabbing_disagreement = 0;
17174
#####################################################################
17176
# the Perl::Tidy::IndentationItem class supplies items which contain
17177
# how much whitespace should be used at the start of a line
17179
#####################################################################
17181
package Perl::Tidy::IndentationItem;
17183
# Indexes for indentation items
17184
use constant SPACES => 0; # total leading white spaces
17185
use constant LEVEL => 1; # the indentation 'level'
17186
use constant CI_LEVEL => 2; # the 'continuation level'
17187
use constant AVAILABLE_SPACES => 3; # how many left spaces available
17189
use constant CLOSED => 4; # index where we saw closing '}'
17190
use constant COMMA_COUNT => 5; # how many commas at this level?
17191
use constant SEQUENCE_NUMBER => 6; # output batch number
17192
use constant INDEX => 7; # index in output batch list
17193
use constant HAVE_CHILD => 8; # any dependents?
17194
use constant RECOVERABLE_SPACES => 9; # how many spaces to the right
17195
# we would like to move to get
17196
# alignment (negative if left)
17197
use constant ALIGN_PAREN => 10; # do we want to try to align
17198
# with an opening structure?
17199
use constant MARKED => 11; # if visited by corrector logic
17200
use constant STACK_DEPTH => 12; # indentation nesting depth
17201
use constant STARTING_INDEX => 13; # first token index of this level
17202
use constant ARROW_COUNT => 14; # how many =>'s
17206
# Create an 'indentation_item' which describes one level of leading
17207
# whitespace when the '-lp' indentation is used. We return
17208
# a reference to an anonymous array of associated variables.
17209
# See above constants for storage scheme.
17211
$class, $spaces, $level,
17212
$ci_level, $available_spaces, $index,
17213
$gnu_sequence_number, $align_paren, $stack_depth,
17217
my $arrow_count = 0;
17218
my $comma_count = 0;
17219
my $have_child = 0;
17220
my $want_right_spaces = 0;
17223
$spaces, $level, $ci_level,
17224
$available_spaces, $closed, $comma_count,
17225
$gnu_sequence_number, $index, $have_child,
17226
$want_right_spaces, $align_paren, $marked,
17227
$stack_depth, $starting_index, $arrow_count,
17231
sub permanently_decrease_AVAILABLE_SPACES {
17233
# make a permanent reduction in the available indentation spaces
17234
# at one indentation item. NOTE: if there are child nodes, their
17235
# total SPACES must be reduced by the caller.
17237
my ( $item, $spaces_needed ) = @_;
17238
my $available_spaces = $item->get_AVAILABLE_SPACES();
17239
my $deleted_spaces =
17240
( $available_spaces > $spaces_needed )
17242
: $available_spaces;
17243
$item->decrease_AVAILABLE_SPACES($deleted_spaces);
17244
$item->decrease_SPACES($deleted_spaces);
17245
$item->set_RECOVERABLE_SPACES(0);
17247
return $deleted_spaces;
17250
sub tentatively_decrease_AVAILABLE_SPACES {
17252
# We are asked to tentatively delete $spaces_needed of indentation
17253
# for a indentation item. We may want to undo this later. NOTE: if
17254
# there are child nodes, their total SPACES must be reduced by the
17256
my ( $item, $spaces_needed ) = @_;
17257
my $available_spaces = $item->get_AVAILABLE_SPACES();
17258
my $deleted_spaces =
17259
( $available_spaces > $spaces_needed )
17261
: $available_spaces;
17262
$item->decrease_AVAILABLE_SPACES($deleted_spaces);
17263
$item->decrease_SPACES($deleted_spaces);
17264
$item->increase_RECOVERABLE_SPACES($deleted_spaces);
17265
return $deleted_spaces;
17268
sub get_STACK_DEPTH {
17270
return $self->[STACK_DEPTH];
17275
return $self->[SPACES];
17280
return $self->[MARKED];
17284
my ( $self, $value ) = @_;
17285
if ( defined($value) ) {
17286
$self->[MARKED] = $value;
17288
return $self->[MARKED];
17291
sub get_AVAILABLE_SPACES {
17293
return $self->[AVAILABLE_SPACES];
17296
sub decrease_SPACES {
17297
my ( $self, $value ) = @_;
17298
if ( defined($value) ) {
17299
$self->[SPACES] -= $value;
17301
return $self->[SPACES];
17304
sub decrease_AVAILABLE_SPACES {
17305
my ( $self, $value ) = @_;
17306
if ( defined($value) ) {
17307
$self->[AVAILABLE_SPACES] -= $value;
17309
return $self->[AVAILABLE_SPACES];
17312
sub get_ALIGN_PAREN {
17314
return $self->[ALIGN_PAREN];
17317
sub get_RECOVERABLE_SPACES {
17319
return $self->[RECOVERABLE_SPACES];
17322
sub set_RECOVERABLE_SPACES {
17323
my ( $self, $value ) = @_;
17324
if ( defined($value) ) {
17325
$self->[RECOVERABLE_SPACES] = $value;
17327
return $self->[RECOVERABLE_SPACES];
17330
sub increase_RECOVERABLE_SPACES {
17331
my ( $self, $value ) = @_;
17332
if ( defined($value) ) {
17333
$self->[RECOVERABLE_SPACES] += $value;
17335
return $self->[RECOVERABLE_SPACES];
17340
return $self->[CI_LEVEL];
17345
return $self->[LEVEL];
17348
sub get_SEQUENCE_NUMBER {
17350
return $self->[SEQUENCE_NUMBER];
17355
return $self->[INDEX];
17358
sub get_STARTING_INDEX {
17360
return $self->[STARTING_INDEX];
17363
sub set_HAVE_CHILD {
17364
my ( $self, $value ) = @_;
17365
if ( defined($value) ) {
17366
$self->[HAVE_CHILD] = $value;
17368
return $self->[HAVE_CHILD];
17371
sub get_HAVE_CHILD {
17373
return $self->[HAVE_CHILD];
17376
sub set_ARROW_COUNT {
17377
my ( $self, $value ) = @_;
17378
if ( defined($value) ) {
17379
$self->[ARROW_COUNT] = $value;
17381
return $self->[ARROW_COUNT];
17384
sub get_ARROW_COUNT {
17386
return $self->[ARROW_COUNT];
17389
sub set_COMMA_COUNT {
17390
my ( $self, $value ) = @_;
17391
if ( defined($value) ) {
17392
$self->[COMMA_COUNT] = $value;
17394
return $self->[COMMA_COUNT];
17397
sub get_COMMA_COUNT {
17399
return $self->[COMMA_COUNT];
17403
my ( $self, $value ) = @_;
17404
if ( defined($value) ) {
17405
$self->[CLOSED] = $value;
17407
return $self->[CLOSED];
17412
return $self->[CLOSED];
17415
#####################################################################
17417
# the Perl::Tidy::VerticalAligner::Line class supplies an object to
17418
# contain a single output line
17420
#####################################################################
17422
package Perl::Tidy::VerticalAligner::Line;
17429
use constant JMAX => 0;
17430
use constant JMAX_ORIGINAL_LINE => 1;
17431
use constant RTOKENS => 2;
17432
use constant RFIELDS => 3;
17433
use constant RPATTERNS => 4;
17434
use constant INDENTATION => 5;
17435
use constant LEADING_SPACE_COUNT => 6;
17436
use constant OUTDENT_LONG_LINES => 7;
17437
use constant LIST_TYPE => 8;
17438
use constant IS_HANGING_SIDE_COMMENT => 9;
17439
use constant RALIGNMENTS => 10;
17440
use constant MAXIMUM_LINE_LENGTH => 11;
17441
use constant RVERTICAL_TIGHTNESS_FLAGS => 12;
17444
$_index_map{jmax} = JMAX;
17445
$_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;
17446
$_index_map{rtokens} = RTOKENS;
17447
$_index_map{rfields} = RFIELDS;
17448
$_index_map{rpatterns} = RPATTERNS;
17449
$_index_map{indentation} = INDENTATION;
17450
$_index_map{leading_space_count} = LEADING_SPACE_COUNT;
17451
$_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;
17452
$_index_map{list_type} = LIST_TYPE;
17453
$_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;
17454
$_index_map{ralignments} = RALIGNMENTS;
17455
$_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;
17456
$_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;
17458
my @_default_data = ();
17459
$_default_data[JMAX] = undef;
17460
$_default_data[JMAX_ORIGINAL_LINE] = undef;
17461
$_default_data[RTOKENS] = undef;
17462
$_default_data[RFIELDS] = undef;
17463
$_default_data[RPATTERNS] = undef;
17464
$_default_data[INDENTATION] = undef;
17465
$_default_data[LEADING_SPACE_COUNT] = undef;
17466
$_default_data[OUTDENT_LONG_LINES] = undef;
17467
$_default_data[LIST_TYPE] = undef;
17468
$_default_data[IS_HANGING_SIDE_COMMENT] = undef;
17469
$_default_data[RALIGNMENTS] = [];
17470
$_default_data[MAXIMUM_LINE_LENGTH] = undef;
17471
$_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;
17475
# methods to count object population
17477
sub get_count { $_count; }
17478
sub _increment_count { ++$_count }
17479
sub _decrement_count { --$_count }
17482
# Constructor may be called as a class method
17484
my ( $caller, %arg ) = @_;
17485
my $caller_is_obj = ref($caller);
17486
my $class = $caller_is_obj || $caller;
17488
my $self = bless [], $class;
17490
$self->[RALIGNMENTS] = [];
17493
foreach ( keys %_index_map ) {
17494
$index = $_index_map{$_};
17495
if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17496
elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
17497
else { $self->[$index] = $_default_data[$index] }
17500
$self->_increment_count();
17505
$_[0]->_decrement_count();
17508
sub get_jmax { $_[0]->[JMAX] }
17509
sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }
17510
sub get_rtokens { $_[0]->[RTOKENS] }
17511
sub get_rfields { $_[0]->[RFIELDS] }
17512
sub get_rpatterns { $_[0]->[RPATTERNS] }
17513
sub get_indentation { $_[0]->[INDENTATION] }
17514
sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }
17515
sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }
17516
sub get_list_type { $_[0]->[LIST_TYPE] }
17517
sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }
17518
sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }
17520
sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }
17521
sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }
17522
sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }
17523
sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }
17525
sub get_starting_column {
17526
$_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();
17529
sub increment_column {
17530
$_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );
17532
sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }
17534
sub current_field_width {
17538
return $self->get_column($j);
17541
return $self->get_column($j) - $self->get_column( $j - 1 );
17545
sub field_width_growth {
17548
return $self->get_column($j) - $self->get_starting_column($j);
17551
sub starting_field_width {
17555
return $self->get_starting_column($j);
17558
return $self->get_starting_column($j) -
17559
$self->get_starting_column( $j - 1 );
17563
sub increase_field_width {
17566
my ( $j, $pad ) = @_;
17567
my $jmax = $self->get_jmax();
17568
for my $k ( $j .. $jmax ) {
17569
$self->increment_column( $k, $pad );
17573
sub get_available_space_on_right {
17575
my $jmax = $self->get_jmax();
17576
return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);
17579
sub set_jmax { $_[0]->[JMAX] = $_[1] }
17580
sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }
17581
sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }
17582
sub set_rfields { $_[0]->[RFIELDS] = $_[1] }
17583
sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }
17584
sub set_indentation { $_[0]->[INDENTATION] = $_[1] }
17585
sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }
17586
sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }
17587
sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }
17588
sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }
17589
sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }
17593
#####################################################################
17595
# the Perl::Tidy::VerticalAligner::Alignment class holds information
17596
# on a single column being aligned
17598
#####################################################################
17599
package Perl::Tidy::VerticalAligner::Alignment;
17607
# Symbolic array indexes
17608
use constant COLUMN => 0; # the current column number
17609
use constant STARTING_COLUMN => 1; # column number when created
17610
use constant MATCHING_TOKEN => 2; # what token we are matching
17611
use constant STARTING_LINE => 3; # the line index of creation
17612
use constant ENDING_LINE => 4; # the most recent line to use it
17613
use constant SAVED_COLUMN => 5; # the most recent line to use it
17614
use constant SERIAL_NUMBER => 6; # unique number for this alignment
17615
# (just its index in an array)
17617
# Correspondence between variables and array indexes
17619
$_index_map{column} = COLUMN;
17620
$_index_map{starting_column} = STARTING_COLUMN;
17621
$_index_map{matching_token} = MATCHING_TOKEN;
17622
$_index_map{starting_line} = STARTING_LINE;
17623
$_index_map{ending_line} = ENDING_LINE;
17624
$_index_map{saved_column} = SAVED_COLUMN;
17625
$_index_map{serial_number} = SERIAL_NUMBER;
17627
my @_default_data = ();
17628
$_default_data[COLUMN] = undef;
17629
$_default_data[STARTING_COLUMN] = undef;
17630
$_default_data[MATCHING_TOKEN] = undef;
17631
$_default_data[STARTING_LINE] = undef;
17632
$_default_data[ENDING_LINE] = undef;
17633
$_default_data[SAVED_COLUMN] = undef;
17634
$_default_data[SERIAL_NUMBER] = undef;
17636
# class population count
17639
sub get_count { $_count; }
17640
sub _increment_count { ++$_count }
17641
sub _decrement_count { --$_count }
17646
my ( $caller, %arg ) = @_;
17647
my $caller_is_obj = ref($caller);
17648
my $class = $caller_is_obj || $caller;
17650
my $self = bless [], $class;
17652
foreach ( keys %_index_map ) {
17653
my $index = $_index_map{$_};
17654
if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }
17655
elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }
17656
else { $self->[$index] = $_default_data[$index] }
17658
$self->_increment_count();
17663
$_[0]->_decrement_count();
17666
sub get_column { return $_[0]->[COLUMN] }
17667
sub get_starting_column { return $_[0]->[STARTING_COLUMN] }
17668
sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }
17669
sub get_starting_line { return $_[0]->[STARTING_LINE] }
17670
sub get_ending_line { return $_[0]->[ENDING_LINE] }
17671
sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }
17673
sub set_column { $_[0]->[COLUMN] = $_[1] }
17674
sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }
17675
sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }
17676
sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }
17677
sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }
17678
sub increment_column { $_[0]->[COLUMN] += $_[1] }
17680
sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }
17681
sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }
17685
package Perl::Tidy::VerticalAligner;
17687
# The Perl::Tidy::VerticalAligner package collects output lines and
17688
# attempts to line up certain common tokens, such as => and #, which are
17689
# identified by the calling routine.
17691
# There are two main routines: append_line and flush. Append acts as a
17692
# storage buffer, collecting lines into a group which can be vertically
17693
# aligned. When alignment is no longer possible or desirable, it dumps
17694
# the group to flush.
17696
# append_line -----> flush
17704
# Caution: these debug flags produce a lot of output
17705
# They should all be 0 except when debugging small scripts
17707
use constant VALIGN_DEBUG_FLAG_APPEND => 0;
17708
use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;
17709
use constant VALIGN_DEBUG_FLAG_TERNARY => 0;
17711
my $debug_warning = sub {
17712
print "VALIGN_DEBUGGING with key $_[0]\n";
17715
VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');
17716
VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');
17721
$vertical_aligner_self
17723
$maximum_alignment_index
17727
$previous_minimum_jmax_seen
17728
$previous_maximum_jmax_seen
17729
$maximum_line_index
17734
$last_group_level_written
17735
$last_leading_space_count
17739
$last_comment_column
17740
$last_side_comment_line_number
17741
$last_side_comment_length
17742
$last_side_comment_level
17743
$outdented_line_count
17744
$first_outdented_line_at
17745
$last_outdented_line_at
17746
$diagnostics_object
17748
$file_writer_object
17749
@side_comment_history
17750
$comment_leading_space_count
17751
$is_matching_terminal_line
17758
$cached_line_leading_space_count
17759
$cached_seqno_string
17762
$last_nonblank_seqno_string
17766
$rOpts_maximum_line_length
17767
$rOpts_continuation_indentation
17768
$rOpts_indent_columns
17770
$rOpts_entab_leading_whitespace
17773
$rOpts_fixed_position_side_comment
17774
$rOpts_minimum_space_to_comment
17782
( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )
17785
# variables describing the entire space group:
17786
$ralignment_list = [];
17788
$last_group_level_written = -1;
17789
$extra_indent_ok = 0; # can we move all lines to the right?
17790
$last_side_comment_length = 0;
17791
$maximum_jmax_seen = 0;
17792
$minimum_jmax_seen = 0;
17793
$previous_minimum_jmax_seen = 0;
17794
$previous_maximum_jmax_seen = 0;
17796
# variables describing each line of the group
17797
@group_lines = (); # list of all lines in group
17799
$outdented_line_count = 0;
17800
$first_outdented_line_at = 0;
17801
$last_outdented_line_at = 0;
17802
$last_side_comment_line_number = 0;
17803
$last_side_comment_level = -1;
17804
$is_matching_terminal_line = 0;
17806
# most recent 3 side comments; [ line number, column ]
17807
$side_comment_history[0] = [ -300, 0 ];
17808
$side_comment_history[1] = [ -200, 0 ];
17809
$side_comment_history[2] = [ -100, 0 ];
17811
# write_leader_and_string cache:
17812
$cached_line_text = "";
17813
$cached_line_type = 0;
17814
$cached_line_flag = 0;
17816
$cached_line_valid = 0;
17817
$cached_line_leading_space_count = 0;
17818
$cached_seqno_string = "";
17820
# string of sequence numbers joined together
17821
$seqno_string = "";
17822
$last_nonblank_seqno_string = "";
17824
# frequently used parameters
17825
$rOpts_indent_columns = $rOpts->{'indent-columns'};
17826
$rOpts_tabs = $rOpts->{'tabs'};
17827
$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};
17828
$rOpts_fixed_position_side_comment =
17829
$rOpts->{'fixed-position-side-comment'};
17830
$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};
17831
$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};
17832
$rOpts_valign = $rOpts->{'valign'};
17834
forget_side_comment();
17836
initialize_for_new_group();
17838
$vertical_aligner_self = {};
17839
bless $vertical_aligner_self, $class;
17840
return $vertical_aligner_self;
17843
sub initialize_for_new_group {
17844
$maximum_line_index = -1; # lines in the current group
17845
$maximum_alignment_index = -1; # alignments in current group
17846
$zero_count = 0; # count consecutive lines without tokens
17847
$current_line = undef; # line being matched for alignment
17848
$group_maximum_gap = 0; # largest gap introduced
17850
$marginal_match = 0;
17851
$comment_leading_space_count = 0;
17852
$last_leading_space_count = 0;
17855
# interface to Perl::Tidy::Diagnostics routines
17856
sub write_diagnostics {
17857
if ($diagnostics_object) {
17858
$diagnostics_object->write_diagnostics(@_);
17862
# interface to Perl::Tidy::Logger routines
17864
if ($logger_object) {
17865
$logger_object->warning(@_);
17869
sub write_logfile_entry {
17870
if ($logger_object) {
17871
$logger_object->write_logfile_entry(@_);
17875
sub report_definite_bug {
17876
if ($logger_object) {
17877
$logger_object->report_definite_bug();
17883
# return the number of leading spaces associated with an indentation
17884
# variable $indentation is either a constant number of spaces or an
17885
# object with a get_SPACES method.
17886
my $indentation = shift;
17887
return ref($indentation) ? $indentation->get_SPACES() : $indentation;
17890
sub get_RECOVERABLE_SPACES {
17892
# return the number of spaces (+ means shift right, - means shift left)
17893
# that we would like to shift a group of lines with the same indentation
17894
# to get them to line up with their opening parens
17895
my $indentation = shift;
17896
return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;
17899
sub get_STACK_DEPTH {
17901
my $indentation = shift;
17902
return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;
17905
sub make_alignment {
17906
my ( $col, $token ) = @_;
17908
# make one new alignment at column $col which aligns token $token
17909
++$maximum_alignment_index;
17910
my $alignment = new Perl::Tidy::VerticalAligner::Alignment(
17912
starting_column => $col,
17913
matching_token => $token,
17914
starting_line => $maximum_line_index,
17915
ending_line => $maximum_line_index,
17916
serial_number => $maximum_alignment_index,
17918
$ralignment_list->[$maximum_alignment_index] = $alignment;
17922
sub dump_alignments {
17924
"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";
17925
for my $i ( 0 .. $maximum_alignment_index ) {
17926
my $column = $ralignment_list->[$i]->get_column();
17927
my $starting_column = $ralignment_list->[$i]->get_starting_column();
17928
my $matching_token = $ralignment_list->[$i]->get_matching_token();
17929
my $starting_line = $ralignment_list->[$i]->get_starting_line();
17930
my $ending_line = $ralignment_list->[$i]->get_ending_line();
17932
"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";
17936
sub save_alignment_columns {
17937
for my $i ( 0 .. $maximum_alignment_index ) {
17938
$ralignment_list->[$i]->save_column();
17942
sub restore_alignment_columns {
17943
for my $i ( 0 .. $maximum_alignment_index ) {
17944
$ralignment_list->[$i]->restore_column();
17948
sub forget_side_comment {
17949
$last_comment_column = 0;
17954
# sub append is called to place one line in the current vertical group.
17956
# The input parameters are:
17957
# $level = indentation level of this line
17958
# $rfields = reference to array of fields
17959
# $rpatterns = reference to array of patterns, one per field
17960
# $rtokens = reference to array of tokens starting fields 1,2,..
17962
# Here is an example of what this package does. In this example,
17963
# we are trying to line up both the '=>' and the '#'.
17965
# '18' => 'grave', # \`
17966
# '19' => 'acute', # `'
17967
# '20' => 'caron', # \v
17968
# <-tabs-><f1-><--field 2 ---><-f3->
17971
# col1 col2 col3 col4
17973
# The calling routine has already broken the entire line into 3 fields as
17974
# indicated. (So the work of identifying promising common tokens has
17975
# already been done).
17977
# In this example, there will be 2 tokens being matched: '=>' and '#'.
17978
# They are the leading parts of fields 2 and 3, but we do need to know
17979
# what they are so that we can dump a group of lines when these tokens
17982
# The fields contain the actual characters of each field. The patterns
17983
# are like the fields, but they contain mainly token types instead
17984
# of tokens, so they have fewer characters. They are used to be
17985
# sure we are matching fields of similar type.
17987
# In this example, there will be 4 column indexes being adjusted. The
17988
# first one is always at zero. The interior columns are at the start of
17989
# the matching tokens, and the last one tracks the maximum line length.
17991
# Basically, each time a new line comes in, it joins the current vertical
17992
# group if possible. Otherwise it causes the current group to be dumped
17993
# and a new group is started.
17995
# For each new group member, the column locations are increased, as
17996
# necessary, to make room for the new fields. When the group is finally
17997
# output, these column numbers are used to compute the amount of spaces of
17998
# padding needed for each field.
18000
# Programming note: the fields are assumed not to have any tab characters.
18001
# Tabs have been previously removed except for tabs in quoted strings and
18002
# side comments. Tabs in these fields can mess up the column counting.
18003
# The log file warns the user if there are any such tabs.
18006
$level, $level_end,
18007
$indentation, $rfields,
18008
$rtokens, $rpatterns,
18009
$is_forced_break, $outdent_long_lines,
18010
$is_terminal_ternary, $is_terminal_statement,
18011
$do_not_pad, $rvertical_tightness_flags,
18015
# number of fields is $jmax
18016
# number of tokens between fields is $jmax-1
18017
my $jmax = $#{$rfields};
18019
my $leading_space_count = get_SPACES($indentation);
18021
# set outdented flag to be sure we either align within statements or
18022
# across statement boundaries, but not both.
18023
my $is_outdented = $last_leading_space_count > $leading_space_count;
18024
$last_leading_space_count = $leading_space_count;
18026
# Patch: undo for hanging side comment
18027
my $is_hanging_side_comment =
18028
( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
18029
$is_outdented = 0 if $is_hanging_side_comment;
18031
VALIGN_DEBUG_FLAG_APPEND0 && do {
18033
"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";
18036
# Validate cached line if necessary: If we can produce a container
18037
# with just 2 lines total by combining an existing cached opening
18038
# token with the closing token to follow, then we will mark both
18039
# cached flags as valid.
18040
if ($rvertical_tightness_flags) {
18041
if ( $maximum_line_index <= 0
18042
&& $cached_line_type
18044
&& $rvertical_tightness_flags->[2]
18045
&& $rvertical_tightness_flags->[2] == $cached_seqno )
18047
$rvertical_tightness_flags->[3] ||= 1;
18048
$cached_line_valid ||= 1;
18052
# do not join an opening block brace with an unbalanced line
18053
# unless requested with a flag value of 2
18054
if ( $cached_line_type == 3
18055
&& $maximum_line_index < 0
18056
&& $cached_line_flag < 2
18057
&& $level_jump != 0 )
18059
$cached_line_valid = 0;
18062
# patch until new aligner is finished
18063
if ($do_not_pad) { my_flush() }
18065
# shouldn't happen:
18066
if ( $level < 0 ) { $level = 0 }
18068
# do not align code across indentation level changes
18069
# or if vertical alignment is turned off for debugging
18070
if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {
18072
# we are allowed to shift a group of lines to the right if its
18073
# level is greater than the previous and next group
18075
( $level < $group_level && $last_group_level_written < $group_level );
18079
# If we know that this line will get flushed out by itself because
18080
# of level changes, we can leave the extra_indent_ok flag set.
18081
# That way, if we get an external flush call, we will still be
18082
# able to do some -lp alignment if necessary.
18083
$extra_indent_ok = ( $is_terminal_statement && $level > $group_level );
18085
$group_level = $level;
18087
# wait until after the above flush to get the leading space
18088
# count because it may have been changed if the -icp flag is in
18090
$leading_space_count = get_SPACES($indentation);
18094
# --------------------------------------------------------------------
18095
# Patch to collect outdentable block COMMENTS
18096
# --------------------------------------------------------------------
18097
my $is_blank_line = "";
18098
my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );
18099
if ( $group_type eq 'COMMENT' ) {
18103
&& $outdent_long_lines
18104
&& $leading_space_count == $comment_leading_space_count
18109
$group_lines[ ++$maximum_line_index ] = $rfields->[0];
18117
# --------------------------------------------------------------------
18118
# add dummy fields for terminal ternary
18119
# --------------------------------------------------------------------
18120
my $j_terminal_match;
18121
if ( $is_terminal_ternary && $current_line ) {
18122
$j_terminal_match =
18123
fix_terminal_ternary( $rfields, $rtokens, $rpatterns );
18124
$jmax = @{$rfields} - 1;
18127
# --------------------------------------------------------------------
18128
# add dummy fields for else statement
18129
# --------------------------------------------------------------------
18130
if ( $rfields->[0] =~ /^else\s*$/
18132
&& $level_jump == 0 )
18134
$j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );
18135
$jmax = @{$rfields} - 1;
18138
# --------------------------------------------------------------------
18139
# Step 1. Handle simple line of code with no fields to match.
18140
# --------------------------------------------------------------------
18141
if ( $jmax <= 0 ) {
18144
if ( $maximum_line_index >= 0
18145
&& !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )
18148
# flush the current group if it has some aligned columns..
18149
if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }
18151
# flush current group if we are just collecting side comments..
18154
# ...and we haven't seen a comment lately
18155
( $zero_count > 3 )
18157
# ..or if this new line doesn't fit to the left of the comments
18158
|| ( ( $leading_space_count + length( $$rfields[0] ) ) >
18159
$group_lines[0]->get_column(0) )
18166
# patch to start new COMMENT group if this comment may be outdented
18167
if ( $is_block_comment
18168
&& $outdent_long_lines
18169
&& $maximum_line_index < 0 )
18171
$group_type = 'COMMENT';
18172
$comment_leading_space_count = $leading_space_count;
18173
$group_lines[ ++$maximum_line_index ] = $rfields->[0];
18177
# just write this line directly if no current group, no side comment,
18178
# and no space recovery is needed.
18179
if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )
18181
write_leader_and_string( $leading_space_count, $$rfields[0], 0,
18182
$outdent_long_lines, $rvertical_tightness_flags );
18190
# programming check: (shouldn't happen)
18191
# an error here implies an incorrect call was made
18192
if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {
18194
"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"
18196
report_definite_bug();
18199
# --------------------------------------------------------------------
18200
# create an object to hold this line
18201
# --------------------------------------------------------------------
18202
my $new_line = new Perl::Tidy::VerticalAligner::Line(
18204
jmax_original_line => $jmax,
18205
rtokens => $rtokens,
18206
rfields => $rfields,
18207
rpatterns => $rpatterns,
18208
indentation => $indentation,
18209
leading_space_count => $leading_space_count,
18210
outdent_long_lines => $outdent_long_lines,
18212
is_hanging_side_comment => $is_hanging_side_comment,
18213
maximum_line_length => $rOpts->{'maximum-line-length'},
18214
rvertical_tightness_flags => $rvertical_tightness_flags,
18217
# Initialize a global flag saying if the last line of the group should
18218
# match end of group and also terminate the group. There should be no
18219
# returns between here and where the flag is handled at the bottom.
18220
my $col_matching_terminal = 0;
18221
if ( defined($j_terminal_match) ) {
18223
# remember the column of the terminal ? or { to match with
18224
$col_matching_terminal = $current_line->get_column($j_terminal_match);
18226
# set global flag for sub decide_if_aligned
18227
$is_matching_terminal_line = 1;
18230
# --------------------------------------------------------------------
18231
# It simplifies things to create a zero length side comment
18233
# --------------------------------------------------------------------
18234
make_side_comment( $new_line, $level_end );
18236
# --------------------------------------------------------------------
18237
# Decide if this is a simple list of items.
18238
# There are 3 list types: none, comma, comma-arrow.
18239
# We use this below to be less restrictive in deciding what to align.
18240
# --------------------------------------------------------------------
18241
if ($is_forced_break) {
18242
decide_if_list($new_line);
18245
if ($current_line) {
18247
# --------------------------------------------------------------------
18248
# Allow hanging side comment to join current group, if any
18249
# This will help keep side comments aligned, because otherwise we
18250
# will have to start a new group, making alignment less likely.
18251
# --------------------------------------------------------------------
18252
join_hanging_comment( $new_line, $current_line )
18253
if $is_hanging_side_comment;
18255
# --------------------------------------------------------------------
18256
# If there is just one previous line, and it has more fields
18257
# than the new line, try to join fields together to get a match with
18258
# the new line. At the present time, only a single leading '=' is
18259
# allowed to be compressed out. This is useful in rare cases where
18260
# a table is forced to use old breakpoints because of side comments,
18261
# and the table starts out something like this:
18262
# my %MonthChars = ('0', 'Jan', # side comment
18265
# Eliminating the '=' field will allow the remaining fields to line up.
18266
# This situation does not occur if there are no side comments
18267
# because scan_list would put a break after the opening '('.
18268
# --------------------------------------------------------------------
18269
eliminate_old_fields( $new_line, $current_line );
18271
# --------------------------------------------------------------------
18272
# If the new line has more fields than the current group,
18273
# see if we can match the first fields and combine the remaining
18274
# fields of the new line.
18275
# --------------------------------------------------------------------
18276
eliminate_new_fields( $new_line, $current_line );
18278
# --------------------------------------------------------------------
18279
# Flush previous group unless all common tokens and patterns match..
18280
# --------------------------------------------------------------------
18281
check_match( $new_line, $current_line );
18283
# --------------------------------------------------------------------
18284
# See if there is space for this line in the current group (if any)
18285
# --------------------------------------------------------------------
18286
if ($current_line) {
18287
check_fit( $new_line, $current_line );
18291
# --------------------------------------------------------------------
18292
# Append this line to the current group (or start new group)
18293
# --------------------------------------------------------------------
18294
accept_line($new_line);
18296
# Future update to allow this to vary:
18297
$current_line = $new_line if ( $maximum_line_index == 0 );
18299
# output this group if it ends in a terminal else or ternary line
18300
if ( defined($j_terminal_match) ) {
18302
# if there is only one line in the group (maybe due to failure to match
18303
# perfectly with previous lines), then align the ? or { of this
18304
# terminal line with the previous one unless that would make the line
18306
if ( $maximum_line_index == 0 ) {
18307
my $col_now = $current_line->get_column($j_terminal_match);
18308
my $pad = $col_matching_terminal - $col_now;
18309
my $padding_available =
18310
$current_line->get_available_space_on_right();
18311
if ( $pad > 0 && $pad <= $padding_available ) {
18312
$current_line->increase_field_width( $j_terminal_match, $pad );
18316
$is_matching_terminal_line = 0;
18319
# --------------------------------------------------------------------
18320
# Step 8. Some old debugging stuff
18321
# --------------------------------------------------------------------
18322
VALIGN_DEBUG_FLAG_APPEND && do {
18323
print "APPEND fields:";
18324
dump_array(@$rfields);
18325
print "APPEND tokens:";
18326
dump_array(@$rtokens);
18327
print "APPEND patterns:";
18328
dump_array(@$rpatterns);
18335
sub join_hanging_comment {
18338
my $jmax = $line->get_jmax();
18339
return 0 unless $jmax == 1; # must be 2 fields
18340
my $rtokens = $line->get_rtokens();
18341
return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..
18342
my $rfields = $line->get_rfields();
18343
return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...
18344
my $old_line = shift;
18345
my $maximum_field_index = $old_line->get_jmax();
18347
unless $maximum_field_index > $jmax; # the current line has more fields
18348
my $rpatterns = $line->get_rpatterns();
18350
$line->set_is_hanging_side_comment(1);
18351
$jmax = $maximum_field_index;
18352
$line->set_jmax($jmax);
18353
$$rfields[$jmax] = $$rfields[1];
18354
$$rtokens[ $jmax - 1 ] = $$rtokens[0];
18355
$$rpatterns[ $jmax - 1 ] = $$rpatterns[0];
18356
for ( my $j = 1 ; $j < $jmax ; $j++ ) {
18357
$$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?
18358
$$rtokens[ $j - 1 ] = "";
18359
$$rpatterns[ $j - 1 ] = "";
18364
sub eliminate_old_fields {
18366
my $new_line = shift;
18367
my $jmax = $new_line->get_jmax();
18368
if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }
18369
if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }
18371
# there must be one previous line
18372
return unless ( $maximum_line_index == 0 );
18374
my $old_line = shift;
18375
my $maximum_field_index = $old_line->get_jmax();
18377
###############################################
18378
# this line must have fewer fields
18379
return unless $maximum_field_index > $jmax;
18380
###############################################
18382
# Identify specific cases where field elimination is allowed:
18383
# case=1: both lines have comma-separated lists, and the first
18384
# line has an equals
18385
# case=2: both lines have leading equals
18387
# case 1 is the default
18390
# See if case 2: both lines have leading '='
18391
# We'll require smiliar leading patterns in this case
18392
my $old_rtokens = $old_line->get_rtokens();
18393
my $rtokens = $new_line->get_rtokens();
18394
my $rpatterns = $new_line->get_rpatterns();
18395
my $old_rpatterns = $old_line->get_rpatterns();
18396
if ( $rtokens->[0] =~ /^=\d*$/
18397
&& $old_rtokens->[0] eq $rtokens->[0]
18398
&& $old_rpatterns->[0] eq $rpatterns->[0] )
18403
# not too many fewer fields in new line for case 1
18404
return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );
18406
# case 1 must have side comment
18407
my $old_rfields = $old_line->get_rfields();
18410
&& length( $$old_rfields[$maximum_field_index] ) == 0 );
18412
my $rfields = $new_line->get_rfields();
18414
my $hid_equals = 0;
18416
my @new_alignments = ();
18417
my @new_fields = ();
18418
my @new_matching_patterns = ();
18419
my @new_matching_tokens = ();
18423
my $current_field = '';
18424
my $current_pattern = '';
18426
# loop over all old tokens
18428
for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {
18429
$current_field .= $$old_rfields[$k];
18430
$current_pattern .= $$old_rpatterns[$k];
18431
last if ( $j > $jmax - 1 );
18433
if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {
18435
$new_fields[$j] = $current_field;
18436
$new_matching_patterns[$j] = $current_pattern;
18437
$current_field = '';
18438
$current_pattern = '';
18439
$new_matching_tokens[$j] = $$old_rtokens[$k];
18440
$new_alignments[$j] = $old_line->get_alignment($k);
18445
if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {
18446
last if ( $case == 2 ); # avoid problems with stuff
18447
# like: $a=$b=$c=$d;
18451
if ( $in_match && $case == 1 )
18452
; # disallow gaps in matching field types in case 1
18456
# Modify the current state if we are successful.
18457
# We must exactly reach the ends of both lists for success.
18458
if ( ( $j == $jmax )
18459
&& ( $current_field eq '' )
18460
&& ( $case != 1 || $hid_equals ) )
18462
$k = $maximum_field_index;
18463
$current_field .= $$old_rfields[$k];
18464
$current_pattern .= $$old_rpatterns[$k];
18465
$new_fields[$j] = $current_field;
18466
$new_matching_patterns[$j] = $current_pattern;
18468
$new_alignments[$j] = $old_line->get_alignment($k);
18469
$maximum_field_index = $j;
18471
$old_line->set_alignments(@new_alignments);
18472
$old_line->set_jmax($jmax);
18473
$old_line->set_rtokens( \@new_matching_tokens );
18474
$old_line->set_rfields( \@new_fields );
18475
$old_line->set_rpatterns( \@$rpatterns );
18479
# create an empty side comment if none exists
18480
sub make_side_comment {
18481
my $new_line = shift;
18482
my $level_end = shift;
18483
my $jmax = $new_line->get_jmax();
18484
my $rtokens = $new_line->get_rtokens();
18486
# if line does not have a side comment...
18487
if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {
18488
my $rfields = $new_line->get_rfields();
18489
my $rpatterns = $new_line->get_rpatterns();
18490
$$rtokens[$jmax] = '#';
18491
$$rfields[ ++$jmax ] = '';
18492
$$rpatterns[$jmax] = '#';
18493
$new_line->set_jmax($jmax);
18494
$new_line->set_jmax_original_line($jmax);
18497
# line has a side comment..
18500
# don't remember old side comment location for very long
18501
my $line_number = $vertical_aligner_self->get_output_line_number();
18502
my $rfields = $new_line->get_rfields();
18504
$line_number - $last_side_comment_line_number > 12
18506
# and don't remember comment location across block level changes
18507
|| ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )
18510
forget_side_comment();
18512
$last_side_comment_line_number = $line_number;
18513
$last_side_comment_level = $level_end;
18517
sub decide_if_list {
18521
# A list will be taken to be a line with a forced break in which all
18522
# of the field separators are commas or comma-arrows (except for the
18525
# List separator tokens are things like ',3' or '=>2',
18526
# where the trailing digit is the nesting depth. Allow braces
18527
# to allow nested list items.
18528
my $rtokens = $line->get_rtokens();
18529
my $test_token = $$rtokens[0];
18530
if ( $test_token =~ /^(\,|=>)/ ) {
18531
my $list_type = $test_token;
18532
my $jmax = $line->get_jmax();
18534
foreach ( 1 .. $jmax - 2 ) {
18535
if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {
18540
$line->set_list_type($list_type);
18544
sub eliminate_new_fields {
18546
return unless ( $maximum_line_index >= 0 );
18547
my ( $new_line, $old_line ) = @_;
18548
my $jmax = $new_line->get_jmax();
18550
my $old_rtokens = $old_line->get_rtokens();
18551
my $rtokens = $new_line->get_rtokens();
18552
my $is_assignment =
18553
( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );
18555
# must be monotonic variation
18556
return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );
18558
# must be more fields in the new line
18559
my $maximum_field_index = $old_line->get_jmax();
18560
return unless ( $maximum_field_index < $jmax );
18562
unless ($is_assignment) {
18564
unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )
18565
; # only if monotonic
18567
# never combine fields of a comma list
18569
unless ( $maximum_field_index > 1 )
18570
&& ( $new_line->get_list_type() !~ /^,/ );
18573
my $rfields = $new_line->get_rfields();
18574
my $rpatterns = $new_line->get_rpatterns();
18575
my $old_rpatterns = $old_line->get_rpatterns();
18577
# loop over all OLD tokens except comment and check match
18580
for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {
18581
if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )
18582
|| ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )
18589
# first tokens agree, so combine extra new tokens
18591
for $k ( $maximum_field_index .. $jmax - 1 ) {
18593
$$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];
18594
$$rfields[$k] = "";
18595
$$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];
18596
$$rpatterns[$k] = "";
18599
$$rtokens[ $maximum_field_index - 1 ] = '#';
18600
$$rfields[$maximum_field_index] = $$rfields[$jmax];
18601
$$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];
18602
$jmax = $maximum_field_index;
18604
$new_line->set_jmax($jmax);
18607
sub fix_terminal_ternary {
18609
# Add empty fields as necessary to align a ternary term
18614
# : $year % 100 ? 1
18615
# : $year % 400 ? 0
18618
# returns 1 if the terminal item should be indented
18620
my ( $rfields, $rtokens, $rpatterns ) = @_;
18622
my $jmax = @{$rfields} - 1;
18623
my $old_line = $group_lines[$maximum_line_index];
18624
my $rfields_old = $old_line->get_rfields();
18626
my $rpatterns_old = $old_line->get_rpatterns();
18627
my $rtokens_old = $old_line->get_rtokens();
18628
my $maximum_field_index = $old_line->get_jmax();
18630
# look for the question mark after the :
18632
my $depth_question;
18634
for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {
18635
my $tok = $rtokens_old->[$j];
18636
if ( $tok =~ /^\?(\d+)$/ ) {
18637
$depth_question = $1;
18639
# depth must be correct
18640
next unless ( $depth_question eq $group_level );
18643
if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
18644
$pad = " " x length($1);
18647
return; # shouldn't happen
18652
return unless ( defined($jquestion) ); # shouldn't happen
18654
# Now splice the tokens and patterns of the previous line
18655
# into the else line to insure a match. Add empty fields
18657
my $jadd = $jquestion;
18659
# Work on copies of the actual arrays in case we have
18660
# to return due to an error
18661
my @fields = @{$rfields};
18662
my @patterns = @{$rpatterns};
18663
my @tokens = @{$rtokens};
18665
VALIGN_DEBUG_FLAG_TERNARY && do {
18667
print "CURRENT FIELDS=<@{$rfields_old}>\n";
18668
print "CURRENT TOKENS=<@{$rtokens_old}>\n";
18669
print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
18670
print "UNMODIFIED FIELDS=<@{$rfields}>\n";
18671
print "UNMODIFIED TOKENS=<@{$rtokens}>\n";
18672
print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
18675
# handle cases of leading colon on this line
18676
if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
18678
my ( $colon, $therest ) = ( $1, $2 );
18680
# Handle sub-case of first field with leading colon plus additional code
18681
# This is the usual situation as at the '1' below:
18683
# : $year % 400 ? 0
18687
# Split the first field after the leading colon and insert padding.
18688
# Note that this padding will remain even if the terminal value goes
18689
# out on a separate line. This does not seem to look to bad, so no
18690
# mechanism has been included to undo it.
18691
my $field1 = shift @fields;
18692
unshift @fields, ( $colon, $pad . $therest );
18694
# change the leading pattern from : to ?
18695
return unless ( $patterns[0] =~ s/^\:/?/ );
18697
# install leading tokens and patterns of existing line
18698
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
18699
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
18701
# insert appropriate number of empty fields
18702
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18705
# handle sub-case of first field just equal to leading colon.
18706
# This can happen for example in the example below where
18707
# the leading '(' would create a new alignment token
18708
# : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
18709
# : ( $mname = $name . '->' );
18712
return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
18714
# prepend a leading ? onto the second pattern
18715
$patterns[1] = "?b" . $patterns[1];
18717
# pad the second field
18718
$fields[1] = $pad . $fields[1];
18720
# install leading tokens and patterns of existing line, replacing
18721
# leading token and inserting appropriate number of empty fields
18722
splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
18723
splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
18724
splice( @fields, 1, 0, ('') x $jadd ) if $jadd;
18728
# Handle case of no leading colon on this line. This will
18729
# be the case when -wba=':' is used. For example,
18730
# $year % 400 ? 0 :
18734
# install leading tokens and patterns of existing line
18735
$patterns[0] = '?' . 'b' . $patterns[0];
18736
unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );
18737
unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
18739
# insert appropriate number of empty fields
18740
$jadd = $jquestion + 1;
18741
$fields[0] = $pad . $fields[0];
18742
splice( @fields, 0, 0, ('') x $jadd ) if $jadd;
18745
VALIGN_DEBUG_FLAG_TERNARY && do {
18747
print "MODIFIED TOKENS=<@tokens>\n";
18748
print "MODIFIED PATTERNS=<@patterns>\n";
18749
print "MODIFIED FIELDS=<@fields>\n";
18752
# all ok .. update the arrays
18753
@{$rfields} = @fields;
18754
@{$rtokens} = @tokens;
18755
@{$rpatterns} = @patterns;
18757
# force a flush after this line
18761
sub fix_terminal_else {
18763
# Add empty fields as necessary to align a balanced terminal
18764
# else block to a previous if/elsif/unless block,
18767
# if ( 1 || $x ) { print "ok 13\n"; }
18768
# else { print "not ok 13\n"; }
18770
# returns 1 if the else block should be indented
18772
my ( $rfields, $rtokens, $rpatterns ) = @_;
18773
my $jmax = @{$rfields} - 1;
18774
return unless ( $jmax > 0 );
18776
# check for balanced else block following if/elsif/unless
18777
my $rfields_old = $current_line->get_rfields();
18779
# TBD: add handling for 'case'
18780
return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
18782
# look for the opening brace after the else, and extrace the depth
18783
my $tok_brace = $rtokens->[0];
18785
if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
18787
# probably: "else # side_comment"
18790
my $rpatterns_old = $current_line->get_rpatterns();
18791
my $rtokens_old = $current_line->get_rtokens();
18792
my $maximum_field_index = $current_line->get_jmax();
18794
# be sure the previous if/elsif is followed by an opening paren
18796
my $tok_paren = '(' . $depth_brace;
18797
my $tok_test = $rtokens_old->[$jparen];
18798
return unless ( $tok_test eq $tok_paren ); # shouldn't happen
18800
# Now find the opening block brace
18802
for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {
18803
my $tok = $rtokens_old->[$j];
18804
if ( $tok eq $tok_brace ) {
18809
return unless ( defined($jbrace) ); # shouldn't happen
18811
# Now splice the tokens and patterns of the previous line
18812
# into the else line to insure a match. Add empty fields
18814
my $jadd = $jbrace - $jparen;
18815
splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
18816
splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
18817
splice( @{$rfields}, 1, 0, ('') x $jadd );
18819
# force a flush after this line if it does not follow a case
18821
unless ( $rfields_old->[0] =~ /^case\s*$/ );
18824
{ # sub check_match
18825
my %is_good_alignment;
18829
# Vertically aligning on certain "good" tokens is usually okay
18830
# so we can be less restrictive in marginal cases.
18831
@_ = qw( { ? => = );
18833
@is_good_alignment{@_} = (1) x scalar(@_);
18838
# See if the current line matches the current vertical alignment group.
18839
# If not, flush the current group.
18840
my $new_line = shift;
18841
my $old_line = shift;
18843
# uses global variables:
18844
# $previous_minimum_jmax_seen
18845
# $maximum_jmax_seen
18846
# $maximum_line_index
18848
my $jmax = $new_line->get_jmax();
18849
my $maximum_field_index = $old_line->get_jmax();
18851
# flush if this line has too many fields
18852
if ( $jmax > $maximum_field_index ) { goto NO_MATCH }
18854
# flush if adding this line would make a non-monotonic field count
18856
( $maximum_field_index > $jmax ) # this has too few fields
18858
( $previous_minimum_jmax_seen <
18859
$jmax ) # and wouldn't be monotonic
18860
|| ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )
18867
# otherwise see if this line matches the current group
18868
my $jmax_original_line = $new_line->get_jmax_original_line();
18869
my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
18870
my $rtokens = $new_line->get_rtokens();
18871
my $rfields = $new_line->get_rfields();
18872
my $rpatterns = $new_line->get_rpatterns();
18873
my $list_type = $new_line->get_list_type();
18875
my $group_list_type = $old_line->get_list_type();
18876
my $old_rpatterns = $old_line->get_rpatterns();
18877
my $old_rtokens = $old_line->get_rtokens();
18879
my $jlimit = $jmax - 1;
18880
if ( $maximum_field_index > $jmax ) {
18881
$jlimit = $jmax_original_line;
18882
--$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );
18885
# handle comma-separated lists ..
18886
if ( $group_list_type && ( $list_type eq $group_list_type ) ) {
18887
for my $j ( 0 .. $jlimit ) {
18888
my $old_tok = $$old_rtokens[$j];
18889
next unless $old_tok;
18890
my $new_tok = $$rtokens[$j];
18891
next unless $new_tok;
18893
# lists always match ...
18894
# unless they would align any '=>'s with ','s
18896
if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/
18897
|| $new_tok =~ /^=>/ && $old_tok =~ /^,/ );
18901
# do detailed check for everything else except hanging side comments
18902
elsif ( !$is_hanging_side_comment ) {
18904
my $leading_space_count = $new_line->get_leading_space_count();
18908
my $saw_good_alignment;
18910
for my $j ( 0 .. $jlimit ) {
18912
my $old_tok = $$old_rtokens[$j];
18913
my $new_tok = $$rtokens[$j];
18915
# Note on encoding used for alignment tokens:
18916
# -------------------------------------------
18917
# Tokens are "decorated" with information which can help
18918
# prevent unwanted alignments. Consider for example the
18919
# following two lines:
18920
# local ( $xn, $xd ) = split( '/', &'rnorm(@_) );
18921
# local ( $i, $f ) = &'bdiv( $xn, $xd );
18922
# There are three alignment tokens in each line, a comma,
18923
# an =, and a comma. In the first line these three tokens
18925
# ,4+local-18 =3 ,4+split-7
18926
# and in the second line they are encoded as
18927
# ,4+local-18 =3 ,4+&'bdiv-8
18928
# Tokens always at least have token name and nesting
18929
# depth. So in this example the ='s are at depth 3 and
18930
# the ,'s are at depth 4. This prevents aligning tokens
18931
# of different depths. Commas contain additional
18932
# information, as follows:
18933
# , {depth} + {container name} - {spaces to opening paren}
18934
# This allows us to reject matching the rightmost commas
18935
# in the above two lines, since they are for different
18936
# function calls. This encoding is done in
18937
# 'sub send_lines_to_vertical_aligner'.
18939
# Pick off actual token.
18940
# Everything up to the first digit is the actual token.
18941
my $alignment_token = $new_tok;
18942
if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }
18944
# see if the decorated tokens match
18945
my $tokens_match = $new_tok eq $old_tok
18947
# Exception for matching terminal : of ternary statement..
18948
# consider containers prefixed by ? and : a match
18949
|| ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );
18951
# No match if the alignment tokens differ...
18952
if ( !$tokens_match ) {
18954
# ...Unless this is a side comment
18958
# and there is either at least one alignment token
18959
# or this is a single item following a list. This
18960
# latter rule is required for 'December' to join
18961
# the following list:
18963
# '', 'January', 'February', 'March',
18964
# 'April', 'May', 'June', 'July',
18965
# 'August', 'September', 'October', 'November',
18968
# If it doesn't then the -lp formatting will fail.
18969
&& ( $j > 0 || $old_tok =~ /^,/ )
18972
$marginal_match = 1
18973
if ( $marginal_match == 0
18974
&& $maximum_line_index == 0 );
18981
# Calculate amount of padding required to fit this in.
18982
# $pad is the number of spaces by which we must increase
18983
# the current field to squeeze in this field.
18985
length( $$rfields[$j] ) - $old_line->current_field_width($j);
18986
if ( $j == 0 ) { $pad += $leading_space_count; }
18988
# remember max pads to limit marginal cases
18989
if ( $alignment_token ne '#' ) {
18990
if ( $pad > $max_pad ) { $max_pad = $pad }
18991
if ( $pad < $min_pad ) { $min_pad = $pad }
18993
if ( $is_good_alignment{$alignment_token} ) {
18994
$saw_good_alignment = 1;
18997
# If patterns don't match, we have to be careful...
18998
if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {
19000
# flag this as a marginal match since patterns differ
19001
$marginal_match = 1
19002
if ( $marginal_match == 0 && $maximum_line_index == 0 );
19004
# We have to be very careful about aligning commas
19005
# when the pattern's don't match, because it can be
19006
# worse to create an alignment where none is needed
19007
# than to omit one. Here's an example where the ','s
19008
# are not in named continers. The first line below
19009
# should not match the next two:
19010
# ( $a, $b ) = ( $b, $r );
19011
# ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
19012
# ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
19013
if ( $alignment_token eq ',' ) {
19015
# do not align commas unless they are in named containers
19016
goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );
19019
# do not align parens unless patterns match;
19020
# large ugly spaces can occur in math expressions.
19021
elsif ( $alignment_token eq '(' ) {
19023
# But we can allow a match if the parens don't
19024
# require any padding.
19025
if ( $pad != 0 ) { goto NO_MATCH }
19028
# Handle an '=' alignment with different patterns to
19030
elsif ( $alignment_token eq '=' ) {
19032
# It is best to be a little restrictive when
19033
# aligning '=' tokens. Here is an example of
19034
# two lines that we will not align:
19037
# The problem is that one is a 'my' declaration,
19038
# and the other isn't, so they're not very similar.
19039
# We will filter these out by comparing the first
19040
# letter of the pattern. This is crude, but works
19043
substr( $$old_rpatterns[$j], 0, 1 ) ne
19044
substr( $$rpatterns[$j], 0, 1 ) )
19049
# If we pass that test, we'll call it a marginal match.
19050
# Here is an example of a marginal match:
19052
# $op = compile_bblock($op);
19053
# The left tokens are both identifiers, but
19054
# one accesses a hash and the other doesn't.
19055
# We'll let this be a tentative match and undo
19056
# it later if we don't find more than 2 lines
19058
elsif ( $maximum_line_index == 0 ) {
19060
2; # =2 prevents being undone below
19065
# Don't let line with fewer fields increase column widths
19067
if ( $maximum_field_index > $jmax ) {
19069
# Exception: suspend this rule to allow last lines to join
19070
if ( $pad > 0 ) { goto NO_MATCH; }
19072
} ## end for my $j ( 0 .. $jlimit)
19074
# Turn off the "marginal match" flag in some cases...
19075
# A "marginal match" occurs when the alignment tokens agree
19076
# but there are differences in the other tokens (patterns).
19077
# If we leave the marginal match flag set, then the rule is that we
19078
# will align only if there are more than two lines in the group.
19079
# We will turn of the flag if we almost have a match
19080
# and either we have seen a good alignment token or we
19081
# just need a small pad (2 spaces) to fit. These rules are
19082
# the result of experimentation. Tokens which misaligned by just
19083
# one or two characters are annoying. On the other hand,
19084
# large gaps to less important alignment tokens are also annoying.
19085
if ( $marginal_match == 1
19086
&& $jmax == $maximum_field_index
19087
&& ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )
19090
$marginal_match = 0;
19092
##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";
19095
# We have a match (even if marginal).
19096
# If the current line has fewer fields than the current group
19097
# but otherwise matches, copy the remaining group fields to
19098
# make it a perfect match.
19099
if ( $maximum_field_index > $jmax ) {
19100
my $comment = $$rfields[$jmax];
19101
for $jmax ( $jlimit .. $maximum_field_index ) {
19102
$$rtokens[$jmax] = $$old_rtokens[$jmax];
19103
$$rfields[ ++$jmax ] = '';
19104
$$rpatterns[$jmax] = $$old_rpatterns[$jmax];
19106
$$rfields[$jmax] = $comment;
19107
$new_line->set_jmax($jmax);
19112
##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";
19120
return unless ( $maximum_line_index >= 0 );
19121
my $new_line = shift;
19122
my $old_line = shift;
19124
my $jmax = $new_line->get_jmax();
19125
my $leading_space_count = $new_line->get_leading_space_count();
19126
my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
19127
my $rtokens = $new_line->get_rtokens();
19128
my $rfields = $new_line->get_rfields();
19129
my $rpatterns = $new_line->get_rpatterns();
19131
my $group_list_type = $group_lines[0]->get_list_type();
19133
my $padding_so_far = 0;
19134
my $padding_available = $old_line->get_available_space_on_right();
19136
# save current columns in case this doesn't work
19137
save_alignment_columns();
19139
my ( $j, $pad, $eight );
19140
my $maximum_field_index = $old_line->get_jmax();
19141
for $j ( 0 .. $jmax ) {
19143
$pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);
19146
$pad += $leading_space_count;
19149
# remember largest gap of the group, excluding gap to side comment
19151
&& $group_maximum_gap < -$pad
19153
&& $j < $jmax - 1 )
19155
$group_maximum_gap = -$pad;
19160
## This patch helps sometimes, but it doesn't check to see if
19161
## the line is too long even without the side comment. It needs
19163
##don't let a long token with no trailing side comment push
19164
##side comments out, or end a group. (sidecmt1.t)
19165
##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);
19167
# This line will need space; lets see if we want to accept it..
19170
# not if this won't fit
19171
( $pad > $padding_available )
19173
# previously, there were upper bounds placed on padding here
19174
# (maximum_whitespace_columns), but they were not really helpful
19179
# revert to starting state then flush; things didn't work out
19180
restore_alignment_columns();
19185
# patch to avoid excessive gaps in previous lines,
19186
# due to a line of fewer fields.
19187
# return join( ".",
19188
# $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},
19189
# $self->{"area"}, $self->{"id"}, $self->{"sel"} );
19190
next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );
19192
# looks ok, squeeze this field in
19193
$old_line->increase_field_width( $j, $pad );
19194
$padding_available -= $pad;
19196
# remember largest gap of the group, excluding gap to side comment
19197
if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {
19198
$group_maximum_gap = $pad;
19205
# The current line either starts a new alignment group or is
19206
# accepted into the current alignment group.
19207
my $new_line = shift;
19208
$group_lines[ ++$maximum_line_index ] = $new_line;
19210
# initialize field lengths if starting new group
19211
if ( $maximum_line_index == 0 ) {
19213
my $jmax = $new_line->get_jmax();
19214
my $rfields = $new_line->get_rfields();
19215
my $rtokens = $new_line->get_rtokens();
19217
my $col = $new_line->get_leading_space_count();
19219
for $j ( 0 .. $jmax ) {
19220
$col += length( $$rfields[$j] );
19222
# create initial alignments for the new group
19224
if ( $j < $jmax ) { $token = $$rtokens[$j] }
19225
my $alignment = make_alignment( $col, $token );
19226
$new_line->set_alignment( $j, $alignment );
19229
$maximum_jmax_seen = $jmax;
19230
$minimum_jmax_seen = $jmax;
19233
# use previous alignments otherwise
19235
my @new_alignments =
19236
$group_lines[ $maximum_line_index - 1 ]->get_alignments();
19237
$new_line->set_alignments(@new_alignments);
19240
# remember group jmax extremes for next call to append_line
19241
$previous_minimum_jmax_seen = $minimum_jmax_seen;
19242
$previous_maximum_jmax_seen = $maximum_jmax_seen;
19247
# debug routine to dump array contents
19252
# flush() sends the current Perl::Tidy::VerticalAligner group down the
19253
# pipeline to Perl::Tidy::FileWriter.
19255
# This is the external flush, which also empties the cache
19258
if ( $maximum_line_index < 0 ) {
19259
if ($cached_line_type) {
19260
$seqno_string = $cached_seqno_string;
19261
entab_and_output( $cached_line_text,
19262
$cached_line_leading_space_count,
19263
$last_group_level_written );
19264
$cached_line_type = 0;
19265
$cached_line_text = "";
19266
$cached_seqno_string = "";
19274
# This is the internal flush, which leaves the cache intact
19277
return if ( $maximum_line_index < 0 );
19279
# handle a group of comment lines
19280
if ( $group_type eq 'COMMENT' ) {
19282
VALIGN_DEBUG_FLAG_APPEND0 && do {
19283
my ( $a, $b, $c ) = caller();
19285
"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";
19288
my $leading_space_count = $comment_leading_space_count;
19289
my $leading_string = get_leading_string($leading_space_count);
19291
# zero leading space count if any lines are too long
19292
my $max_excess = 0;
19293
for my $i ( 0 .. $maximum_line_index ) {
19294
my $str = $group_lines[$i];
19296
length($str) + $leading_space_count - $rOpts_maximum_line_length;
19297
if ( $excess > $max_excess ) {
19298
$max_excess = $excess;
19302
if ( $max_excess > 0 ) {
19303
$leading_space_count -= $max_excess;
19304
if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
19305
$last_outdented_line_at =
19306
$file_writer_object->get_output_line_number();
19307
unless ($outdented_line_count) {
19308
$first_outdented_line_at = $last_outdented_line_at;
19310
$outdented_line_count += ( $maximum_line_index + 1 );
19313
# write the group of lines
19314
my $outdent_long_lines = 0;
19315
for my $i ( 0 .. $maximum_line_index ) {
19316
write_leader_and_string( $leading_space_count, $group_lines[$i], 0,
19317
$outdent_long_lines, "" );
19321
# handle a group of code lines
19324
VALIGN_DEBUG_FLAG_APPEND0 && do {
19325
my $group_list_type = $group_lines[0]->get_list_type();
19326
my ( $a, $b, $c ) = caller();
19327
my $maximum_field_index = $group_lines[0]->get_jmax();
19329
"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";
19333
# some small groups are best left unaligned
19334
my $do_not_align = decide_if_aligned();
19336
# optimize side comment location
19337
$do_not_align = adjust_side_comment($do_not_align);
19339
# recover spaces for -lp option if possible
19340
my $extra_leading_spaces = get_extra_leading_spaces();
19342
# all lines of this group have the same basic leading spacing
19343
my $group_leader_length = $group_lines[0]->get_leading_space_count();
19345
# add extra leading spaces if helpful
19346
my $min_ci_gap = improve_continuation_indentation( $do_not_align,
19347
$group_leader_length );
19349
# loop to output all lines
19350
for my $i ( 0 .. $maximum_line_index ) {
19351
my $line = $group_lines[$i];
19352
write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,
19353
$group_leader_length, $extra_leading_spaces );
19356
initialize_for_new_group();
19359
sub decide_if_aligned {
19361
# Do not try to align two lines which are not really similar
19362
return unless $maximum_line_index == 1;
19363
return if ($is_matching_terminal_line);
19365
my $group_list_type = $group_lines[0]->get_list_type();
19367
my $do_not_align = (
19369
# always align lists
19374
# don't align if it was just a marginal match
19377
# don't align two lines with big gap
19378
|| $group_maximum_gap > 12
19380
# or lines with differing number of alignment tokens
19381
# TODO: this could be improved. It occasionally rejects
19383
|| $previous_maximum_jmax_seen != $previous_minimum_jmax_seen
19387
# But try to convert them into a simple comment group if the first line
19388
# a has side comment
19389
my $rfields = $group_lines[0]->get_rfields();
19390
my $maximum_field_index = $group_lines[0]->get_jmax();
19392
&& ( $maximum_line_index > 0 )
19393
&& ( length( $$rfields[$maximum_field_index] ) > 0 ) )
19398
return $do_not_align;
19401
sub adjust_side_comment {
19403
my $do_not_align = shift;
19405
# let's see if we can move the side comment field out a little
19406
# to improve readability (the last field is always a side comment field)
19407
my $have_side_comment = 0;
19408
my $first_side_comment_line = -1;
19409
my $maximum_field_index = $group_lines[0]->get_jmax();
19410
for my $i ( 0 .. $maximum_line_index ) {
19411
my $line = $group_lines[$i];
19413
if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {
19414
$have_side_comment = 1;
19415
$first_side_comment_line = $i;
19420
my $kmax = $maximum_field_index + 1;
19422
if ($have_side_comment) {
19424
my $line = $group_lines[0];
19426
# the maximum space without exceeding the line length:
19427
my $avail = $line->get_available_space_on_right();
19429
# try to use the previous comment column
19430
my $side_comment_column = $line->get_column( $kmax - 2 );
19431
my $move = $last_comment_column - $side_comment_column;
19433
## my $sc_line0 = $side_comment_history[0]->[0];
19434
## my $sc_col0 = $side_comment_history[0]->[1];
19435
## my $sc_line1 = $side_comment_history[1]->[0];
19436
## my $sc_col1 = $side_comment_history[1]->[1];
19437
## my $sc_line2 = $side_comment_history[2]->[0];
19438
## my $sc_col2 = $side_comment_history[2]->[1];
19440
## # FUTURE UPDATES:
19441
## # Be sure to ignore 'do not align' and '} # end comments'
19442
## # Find first $move > 0 and $move <= $avail as follows:
19443
## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12
19444
## # 2. try sc_col2 if (line-sc_line2) < 12
19445
## # 3. try min possible space, plus up to 8,
19446
## # 4. try min possible space
19448
if ( $kmax > 0 && !$do_not_align ) {
19450
# but if this doesn't work, give up and use the minimum space
19451
if ( $move > $avail ) {
19452
$move = $rOpts_minimum_space_to_comment - 1;
19455
# but we want some minimum space to the comment
19456
my $min_move = $rOpts_minimum_space_to_comment - 1;
19458
&& $last_side_comment_length > 0
19459
&& ( $first_side_comment_line == 0 )
19460
&& $group_level == $last_group_level_written )
19465
if ( $move < $min_move ) {
19469
# prevously, an upper bound was placed on $move here,
19470
# (maximum_space_to_comment), but it was not helpful
19472
# don't exceed the available space
19473
if ( $move > $avail ) { $move = $avail }
19475
# we can only increase space, never decrease
19477
$line->increase_field_width( $maximum_field_index - 1, $move );
19480
# remember this column for the next group
19481
$last_comment_column = $line->get_column( $kmax - 2 );
19485
# try to at least line up the existing side comment location
19486
if ( $kmax > 0 && $move > 0 && $move < $avail ) {
19487
$line->increase_field_width( $maximum_field_index - 1, $move );
19491
# reset side comment column if we can't align
19493
forget_side_comment();
19497
return $do_not_align;
19500
sub improve_continuation_indentation {
19501
my ( $do_not_align, $group_leader_length ) = @_;
19503
# See if we can increase the continuation indentation
19504
# to move all continuation lines closer to the next field
19505
# (unless it is a comment).
19507
# '$min_ci_gap'is the extra indentation that we may need to introduce.
19508
# We will only introduce this to fields which already have some ci.
19509
# Without this variable, we would occasionally get something like this
19512
# use overload '+' => \&plus,
19514
# '*' => \&multiply,
19517
# 'atan2' => \&atan2,
19519
# Whereas with this variable, we can shift variables over to get this:
19521
# use overload '+' => \&plus,
19523
# '*' => \&multiply,
19526
# 'atan2' => \&atan2,
19528
## BUB: Deactivated####################
19529
# The trouble with this patch is that it may, for example,
19530
# move in some 'or's or ':'s, and leave some out, so that the
19531
# left edge alignment suffers.
19533
###########################################
19535
my $maximum_field_index = $group_lines[0]->get_jmax();
19537
my $min_ci_gap = $rOpts_maximum_line_length;
19538
if ( $maximum_field_index > 1 && !$do_not_align ) {
19540
for my $i ( 0 .. $maximum_line_index ) {
19541
my $line = $group_lines[$i];
19542
my $leading_space_count = $line->get_leading_space_count();
19543
my $rfields = $line->get_rfields();
19546
$line->get_column(0) -
19547
$leading_space_count -
19548
length( $$rfields[0] );
19550
if ( $leading_space_count > $group_leader_length ) {
19551
if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }
19555
if ( $min_ci_gap >= $rOpts_maximum_line_length ) {
19562
return $min_ci_gap;
19565
sub write_vertically_aligned_line {
19567
my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,
19568
$extra_leading_spaces )
19570
my $rfields = $line->get_rfields();
19571
my $leading_space_count = $line->get_leading_space_count();
19572
my $outdent_long_lines = $line->get_outdent_long_lines();
19573
my $maximum_field_index = $line->get_jmax();
19574
my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
19576
# add any extra spaces
19577
if ( $leading_space_count > $group_leader_length ) {
19578
$leading_space_count += $min_ci_gap;
19581
my $str = $$rfields[0];
19583
# loop to concatenate all fields of this line and needed padding
19584
my $total_pad_count = 0;
19586
for $j ( 1 .. $maximum_field_index ) {
19588
# skip zero-length side comments
19590
if ( ( $j == $maximum_field_index )
19591
&& ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )
19594
# compute spaces of padding before this field
19595
my $col = $line->get_column( $j - 1 );
19596
$pad = $col - ( length($str) + $leading_space_count );
19598
if ($do_not_align) {
19600
( $j < $maximum_field_index )
19602
: $rOpts_minimum_space_to_comment - 1;
19605
# if the -fpsc flag is set, move the side comment to the selected
19606
# column if and only if it is possible, ignoring constraints on
19607
# line length and minimum space to comment
19608
if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )
19610
my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;
19611
if ( $newpad >= 0 ) { $pad = $newpad; }
19614
# accumulate the padding
19615
if ( $pad > 0 ) { $total_pad_count += $pad; }
19618
if ( !defined $$rfields[$j] ) {
19619
write_diagnostics("UNDEFined field at j=$j\n");
19622
# only add padding when we have a finite field;
19623
# this avoids extra terminal spaces if we have empty fields
19624
if ( length( $$rfields[$j] ) > 0 ) {
19625
$str .= ' ' x $total_pad_count;
19626
$total_pad_count = 0;
19627
$str .= $$rfields[$j];
19630
$total_pad_count = 0;
19633
# update side comment history buffer
19634
if ( $j == $maximum_field_index ) {
19635
my $lineno = $file_writer_object->get_output_line_number();
19636
shift @side_comment_history;
19637
push @side_comment_history, [ $lineno, $col ];
19641
my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );
19643
# ship this line off
19644
write_leader_and_string( $leading_space_count + $extra_leading_spaces,
19645
$str, $side_comment_length, $outdent_long_lines,
19646
$rvertical_tightness_flags );
19649
sub get_extra_leading_spaces {
19651
#----------------------------------------------------------
19652
# Define any extra indentation space (for the -lp option).
19654
# If a list has side comments, sub scan_list must dump the
19655
# list before it sees everything. When this happens, it sets
19656
# the indentation to the standard scheme, but notes how
19657
# many spaces it would have liked to use. We may be able
19658
# to recover that space here in the event that that all of the
19659
# lines of a list are back together again.
19660
#----------------------------------------------------------
19662
my $extra_leading_spaces = 0;
19663
if ($extra_indent_ok) {
19664
my $object = $group_lines[0]->get_indentation();
19665
if ( ref($object) ) {
19666
my $extra_indentation_spaces_wanted =
19667
get_RECOVERABLE_SPACES($object);
19669
# all indentation objects must be the same
19671
for $i ( 1 .. $maximum_line_index ) {
19672
if ( $object != $group_lines[$i]->get_indentation() ) {
19673
$extra_indentation_spaces_wanted = 0;
19678
if ($extra_indentation_spaces_wanted) {
19680
# the maximum space without exceeding the line length:
19681
my $avail = $group_lines[0]->get_available_space_on_right();
19682
$extra_leading_spaces =
19683
( $avail > $extra_indentation_spaces_wanted )
19684
? $extra_indentation_spaces_wanted
19687
# update the indentation object because with -icp the terminal
19688
# ');' will use the same adjustment.
19689
$object->permanently_decrease_AVAILABLE_SPACES(
19690
-$extra_leading_spaces );
19694
return $extra_leading_spaces;
19697
sub combine_fields {
19699
# combine all fields except for the comment field ( sidecmt.t )
19700
# Uses global variables:
19702
# $maximum_line_index
19704
my $maximum_field_index = $group_lines[0]->get_jmax();
19705
for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {
19706
my $line = $group_lines[$j];
19707
my $rfields = $line->get_rfields();
19708
foreach ( 1 .. $maximum_field_index - 1 ) {
19709
$$rfields[0] .= $$rfields[$_];
19711
$$rfields[1] = $$rfields[$maximum_field_index];
19713
$line->set_jmax(1);
19714
$line->set_column( 0, 0 );
19715
$line->set_column( 1, 0 );
19718
$maximum_field_index = 1;
19720
for $j ( 0 .. $maximum_line_index ) {
19721
my $line = $group_lines[$j];
19722
my $rfields = $line->get_rfields();
19723
for $k ( 0 .. $maximum_field_index ) {
19724
my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);
19726
$pad += $group_lines[$j]->get_leading_space_count();
19729
if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }
19735
sub get_output_line_number {
19737
# the output line number reported to a caller is the number of items
19738
# written plus the number of items in the buffer
19740
1 + $maximum_line_index + $file_writer_object->get_output_line_number();
19743
sub write_leader_and_string {
19745
my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,
19746
$rvertical_tightness_flags )
19749
# handle outdenting of long lines:
19750
if ($outdent_long_lines) {
19753
$side_comment_length +
19754
$leading_space_count -
19755
$rOpts_maximum_line_length;
19756
if ( $excess > 0 ) {
19757
$leading_space_count = 0;
19758
$last_outdented_line_at =
19759
$file_writer_object->get_output_line_number();
19761
unless ($outdented_line_count) {
19762
$first_outdented_line_at = $last_outdented_line_at;
19764
$outdented_line_count++;
19768
# Make preliminary leading whitespace. It could get changed
19769
# later by entabbing, so we have to keep track of any changes
19770
# to the leading_space_count from here on.
19771
my $leading_string =
19772
$leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
19774
# Unpack any recombination data; it was packed by
19775
# sub send_lines_to_vertical_aligner. Contents:
19777
# [0] type: 1=opening 2=closing 3=opening block brace
19778
# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
19779
# if closing: spaces of padding to use
19780
# [2] sequence number of container
19781
# [3] valid flag: do not append if this flag is false
19783
my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
19785
if ($rvertical_tightness_flags) {
19787
$open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
19789
) = @{$rvertical_tightness_flags};
19792
$seqno_string = $seqno_end;
19794
# handle any cached line ..
19795
# either append this line to it or write it out
19796
if ( length($cached_line_text) ) {
19798
if ( !$cached_line_valid ) {
19799
entab_and_output( $cached_line_text,
19800
$cached_line_leading_space_count,
19801
$last_group_level_written );
19804
# handle cached line with opening container token
19805
elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
19807
my $gap = $leading_space_count - length($cached_line_text);
19809
# handle option of just one tight opening per line:
19810
if ( $cached_line_flag == 1 ) {
19811
if ( defined($open_or_close) && $open_or_close == 1 ) {
19817
$leading_string = $cached_line_text . ' ' x $gap;
19818
$leading_space_count = $cached_line_leading_space_count;
19819
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
19822
entab_and_output( $cached_line_text,
19823
$cached_line_leading_space_count,
19824
$last_group_level_written );
19828
# handle cached line to place before this closing container token
19830
my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;
19832
if ( length($test_line) <= $rOpts_maximum_line_length ) {
19834
$seqno_string = $cached_seqno_string . ':' . $seqno_beg;
19836
# Patch to outdent closing tokens ending # in ');'
19837
# If we are joining a line like ');' to a previous stacked
19838
# set of closing tokens, then decide if we may outdent the
19839
# combined stack to the indentation of the ');'. Since we
19840
# should not normally outdent any of the other tokens more than
19841
# the indentation of the lines that contained them, we will
19842
# only do this if all of the corresponding opening
19843
# tokens were on the same line. This can happen with
19844
# -sot and -sct. For example, it is ok here:
19845
# __PACKAGE__->load_components( qw(
19850
# But, for example, we do not outdent in this example because
19851
# that would put the closing sub brace out farther than the
19852
# opening sub brace:
19854
# perltidy -sot -sct
19856
# '<Control-f>' => sub {
19858
# my $e = $c->XEvent;
19859
# itemsUnderArea $c;
19862
if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {
19864
# The way to tell this is if the stacked sequence numbers
19865
# of this output line are the reverse of the stacked
19866
# sequence numbers of the previous non-blank line of
19867
# sequence numbers. So we can join if the previous
19868
# nonblank string of tokens is the mirror image. For
19869
# example if stack )}] is 13:8:6 then we are looking for a
19870
# leading stack like [{( which is 6:8:13 We only need to
19871
# check the two ends, because the intermediate tokens must
19872
# fall in order. Note on speed: having to split on colons
19873
# and eliminate multiple colons might appear to be slow,
19874
# but it's not an issue because we almost never come
19875
# through here. In a typical file we don't.
19876
$seqno_string =~ s/^:+//;
19877
$last_nonblank_seqno_string =~ s/^:+//;
19878
$seqno_string =~ s/:+/:/g;
19879
$last_nonblank_seqno_string =~ s/:+/:/g;
19881
# how many spaces can we outdent?
19883
$cached_line_leading_space_count - $leading_space_count;
19885
&& length($seqno_string)
19886
&& length($last_nonblank_seqno_string) ==
19887
length($seqno_string) )
19890
( split ':', $last_nonblank_seqno_string );
19891
my @seqno_now = ( split ':', $seqno_string );
19892
if ( $seqno_now[-1] == $seqno_last[0]
19893
&& $seqno_now[0] == $seqno_last[-1] )
19897
# for absolute safety, be sure we only remove
19899
my $ws = substr( $test_line, 0, $diff );
19900
if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
19902
$test_line = substr( $test_line, $diff );
19903
$cached_line_leading_space_count -= $diff;
19906
# shouldn't happen, but not critical:
19908
## ERROR transferring indentation here
19915
$leading_string = "";
19916
$leading_space_count = $cached_line_leading_space_count;
19919
entab_and_output( $cached_line_text,
19920
$cached_line_leading_space_count,
19921
$last_group_level_written );
19925
$cached_line_type = 0;
19926
$cached_line_text = "";
19928
# make the line to be written
19929
my $line = $leading_string . $str;
19931
# write or cache this line
19932
if ( !$open_or_close || $side_comment_length > 0 ) {
19933
entab_and_output( $line, $leading_space_count, $group_level );
19936
$cached_line_text = $line;
19937
$cached_line_type = $open_or_close;
19938
$cached_line_flag = $tightness_flag;
19939
$cached_seqno = $seqno;
19940
$cached_line_valid = $valid;
19941
$cached_line_leading_space_count = $leading_space_count;
19942
$cached_seqno_string = $seqno_string;
19945
$last_group_level_written = $group_level;
19946
$last_side_comment_length = $side_comment_length;
19947
$extra_indent_ok = 0;
19950
sub entab_and_output {
19951
my ( $line, $leading_space_count, $level ) = @_;
19953
# The line is currently correct if there is no tabbing (recommended!)
19954
# We may have to lop off some leading spaces and replace with tabs.
19955
if ( $leading_space_count > 0 ) {
19957
# Nothing to do if no tabs
19958
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
19959
|| $rOpts_indent_columns <= 0 )
19965
# Handle entab option
19966
elsif ($rOpts_entab_leading_whitespace) {
19968
$leading_space_count % $rOpts_entab_leading_whitespace;
19970
int( $leading_space_count / $rOpts_entab_leading_whitespace );
19971
my $leading_string = "\t" x $tab_count . ' ' x $space_count;
19972
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
19973
substr( $line, 0, $leading_space_count ) = $leading_string;
19977
# REMOVE AFTER TESTING
19978
# shouldn't happen - program error counting whitespace
19979
# we'll skip entabbing
19981
"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
19986
# Handle option of one tab per level
19988
my $leading_string = ( "\t" x $level );
19990
$leading_space_count - $level * $rOpts_indent_columns;
19992
# shouldn't happen:
19993
if ( $space_count < 0 ) {
19995
"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"
19997
$leading_string = ( ' ' x $leading_space_count );
20000
$leading_string .= ( ' ' x $space_count );
20002
if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {
20003
substr( $line, 0, $leading_space_count ) = $leading_string;
20007
# REMOVE AFTER TESTING
20008
# shouldn't happen - program error counting whitespace
20009
# we'll skip entabbing
20011
"Error entabbing in entab_and_output: expected count=$leading_space_count\n"
20016
$file_writer_object->write_code_line( $line . "\n" );
20017
if ($seqno_string) {
20018
$last_nonblank_seqno_string = $seqno_string;
20022
{ # begin get_leading_string
20024
my @leading_string_cache;
20026
sub get_leading_string {
20028
# define the leading whitespace string for this line..
20029
my $leading_whitespace_count = shift;
20031
# Handle case of zero whitespace, which includes multi-line quotes
20032
# (which may have a finite level; this prevents tab problems)
20033
if ( $leading_whitespace_count <= 0 ) {
20037
# look for previous result
20038
elsif ( $leading_string_cache[$leading_whitespace_count] ) {
20039
return $leading_string_cache[$leading_whitespace_count];
20042
# must compute a string for this number of spaces
20043
my $leading_string;
20045
# Handle simple case of no tabs
20046
if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )
20047
|| $rOpts_indent_columns <= 0 )
20049
$leading_string = ( ' ' x $leading_whitespace_count );
20052
# Handle entab option
20053
elsif ($rOpts_entab_leading_whitespace) {
20055
$leading_whitespace_count % $rOpts_entab_leading_whitespace;
20056
my $tab_count = int(
20057
$leading_whitespace_count / $rOpts_entab_leading_whitespace );
20058
$leading_string = "\t" x $tab_count . ' ' x $space_count;
20061
# Handle option of one tab per level
20063
$leading_string = ( "\t" x $group_level );
20065
$leading_whitespace_count - $group_level * $rOpts_indent_columns;
20067
# shouldn't happen:
20068
if ( $space_count < 0 ) {
20070
"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"
20072
$leading_string = ( ' ' x $leading_whitespace_count );
20075
$leading_string .= ( ' ' x $space_count );
20078
$leading_string_cache[$leading_whitespace_count] = $leading_string;
20079
return $leading_string;
20081
} # end get_leading_string
20083
sub report_anything_unusual {
20085
if ( $outdented_line_count > 0 ) {
20086
write_logfile_entry(
20087
"$outdented_line_count long lines were outdented:\n");
20088
write_logfile_entry(
20089
" First at output line $first_outdented_line_at\n");
20091
if ( $outdented_line_count > 1 ) {
20092
write_logfile_entry(
20093
" Last at output line $last_outdented_line_at\n");
20095
write_logfile_entry(
20096
" use -noll to prevent outdenting, -l=n to increase line length\n"
20098
write_logfile_entry("\n");
20102
#####################################################################
20104
# the Perl::Tidy::FileWriter class writes the output file
20106
#####################################################################
20108
package Perl::Tidy::FileWriter;
20110
# Maximum number of little messages; probably need not be changed.
20111
use constant MAX_NAG_MESSAGES => 6;
20113
sub write_logfile_entry {
20115
my $logger_object = $self->{_logger_object};
20116
if ($logger_object) {
20117
$logger_object->write_logfile_entry(@_);
20123
my ( $line_sink_object, $rOpts, $logger_object ) = @_;
20126
_line_sink_object => $line_sink_object,
20127
_logger_object => $logger_object,
20129
_output_line_number => 1,
20130
_consecutive_blank_lines => 0,
20131
_consecutive_nonblank_lines => 0,
20132
_first_line_length_error => 0,
20133
_max_line_length_error => 0,
20134
_last_line_length_error => 0,
20135
_first_line_length_error_at => 0,
20136
_max_line_length_error_at => 0,
20137
_last_line_length_error_at => 0,
20138
_line_length_error_count => 0,
20139
_max_output_line_length => 0,
20140
_max_output_line_length_at => 0,
20146
$self->{_line_sink_object}->tee_on();
20151
$self->{_line_sink_object}->tee_off();
20154
sub get_output_line_number {
20156
return $self->{_output_line_number};
20159
sub decrement_output_line_number {
20161
$self->{_output_line_number}--;
20164
sub get_consecutive_nonblank_lines {
20166
return $self->{_consecutive_nonblank_lines};
20169
sub reset_consecutive_blank_lines {
20171
$self->{_consecutive_blank_lines} = 0;
20174
sub want_blank_line {
20176
unless ( $self->{_consecutive_blank_lines} ) {
20177
$self->write_blank_code_line();
20181
sub write_blank_code_line {
20183
my $rOpts = $self->{_rOpts};
20185
if ( $self->{_consecutive_blank_lines} >=
20186
$rOpts->{'maximum-consecutive-blank-lines'} );
20187
$self->{_consecutive_blank_lines}++;
20188
$self->{_consecutive_nonblank_lines} = 0;
20189
$self->write_line("\n");
20192
sub write_code_line {
20196
if ( $a =~ /^\s*$/ ) {
20197
my $rOpts = $self->{_rOpts};
20199
if ( $self->{_consecutive_blank_lines} >=
20200
$rOpts->{'maximum-consecutive-blank-lines'} );
20201
$self->{_consecutive_blank_lines}++;
20202
$self->{_consecutive_nonblank_lines} = 0;
20205
$self->{_consecutive_blank_lines} = 0;
20206
$self->{_consecutive_nonblank_lines}++;
20208
$self->write_line($a);
20215
# TODO: go through and see if the test is necessary here
20216
if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }
20218
$self->{_line_sink_object}->write_line($a);
20220
# This calculation of excess line length ignores any internal tabs
20221
my $rOpts = $self->{_rOpts};
20222
my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;
20223
if ( $a =~ /^\t+/g ) {
20224
$exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );
20227
# Note that we just incremented output line number to future value
20228
# so we must subtract 1 for current line number
20229
if ( length($a) > 1 + $self->{_max_output_line_length} ) {
20230
$self->{_max_output_line_length} = length($a) - 1;
20231
$self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;
20234
if ( $exceed > 0 ) {
20235
my $output_line_number = $self->{_output_line_number};
20236
$self->{_last_line_length_error} = $exceed;
20237
$self->{_last_line_length_error_at} = $output_line_number - 1;
20238
if ( $self->{_line_length_error_count} == 0 ) {
20239
$self->{_first_line_length_error} = $exceed;
20240
$self->{_first_line_length_error_at} = $output_line_number - 1;
20244
$self->{_last_line_length_error} > $self->{_max_line_length_error} )
20246
$self->{_max_line_length_error} = $exceed;
20247
$self->{_max_line_length_error_at} = $output_line_number - 1;
20250
if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {
20251
$self->write_logfile_entry(
20252
"Line length exceeded by $exceed characters\n");
20254
$self->{_line_length_error_count}++;
20259
sub report_line_length_errors {
20261
my $rOpts = $self->{_rOpts};
20262
my $line_length_error_count = $self->{_line_length_error_count};
20263
if ( $line_length_error_count == 0 ) {
20264
$self->write_logfile_entry(
20265
"No lines exceeded $rOpts->{'maximum-line-length'} characters\n");
20266
my $max_output_line_length = $self->{_max_output_line_length};
20267
my $max_output_line_length_at = $self->{_max_output_line_length_at};
20268
$self->write_logfile_entry(
20269
" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"
20275
my $word = ( $line_length_error_count > 1 ) ? "s" : "";
20276
$self->write_logfile_entry(
20277
"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"
20280
$word = ( $line_length_error_count > 1 ) ? "First" : "";
20281
my $first_line_length_error = $self->{_first_line_length_error};
20282
my $first_line_length_error_at = $self->{_first_line_length_error_at};
20283
$self->write_logfile_entry(
20284
" $word at line $first_line_length_error_at by $first_line_length_error characters\n"
20287
if ( $line_length_error_count > 1 ) {
20288
my $max_line_length_error = $self->{_max_line_length_error};
20289
my $max_line_length_error_at = $self->{_max_line_length_error_at};
20290
my $last_line_length_error = $self->{_last_line_length_error};
20291
my $last_line_length_error_at = $self->{_last_line_length_error_at};
20292
$self->write_logfile_entry(
20293
" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"
20295
$self->write_logfile_entry(
20296
" Last at line $last_line_length_error_at by $last_line_length_error characters\n"
20302
#####################################################################
20304
# The Perl::Tidy::Debugger class shows line tokenization
20306
#####################################################################
20308
package Perl::Tidy::Debugger;
20312
my ( $class, $filename ) = @_;
20315
_debug_file => $filename,
20316
_debug_file_opened => 0,
20321
sub really_open_debug_file {
20324
my $debug_file = $self->{_debug_file};
20326
unless ( $fh = IO::File->new("> $debug_file") ) {
20327
warn("can't open $debug_file: $!\n");
20329
$self->{_debug_file_opened} = 1;
20330
$self->{_fh} = $fh;
20332
"Use -dump-token-types (-dtt) to get a list of token type codes\n";
20335
sub close_debug_file {
20338
my $fh = $self->{_fh};
20339
if ( $self->{_debug_file_opened} ) {
20341
eval { $self->{_fh}->close() };
20345
sub write_debug_entry {
20347
# This is a debug dump routine which may be modified as necessary
20348
# to dump tokens on a line-by-line basis. The output will be written
20349
# to the .DEBUG file when the -D flag is entered.
20351
my $line_of_tokens = shift;
20353
my $input_line = $line_of_tokens->{_line_text};
20354
my $rtoken_type = $line_of_tokens->{_rtoken_type};
20355
my $rtokens = $line_of_tokens->{_rtokens};
20356
my $rlevels = $line_of_tokens->{_rlevels};
20357
my $rslevels = $line_of_tokens->{_rslevels};
20358
my $rblock_type = $line_of_tokens->{_rblock_type};
20359
my $input_line_number = $line_of_tokens->{_line_number};
20360
my $line_type = $line_of_tokens->{_line_type};
20364
my $token_str = "$input_line_number: ";
20365
my $reconstructed_original = "$input_line_number: ";
20366
my $block_str = "$input_line_number: ";
20368
#$token_str .= "$line_type: ";
20369
#$reconstructed_original .= "$line_type: ";
20372
my @next_char = ( '"', '"' );
20374
unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }
20375
my $fh = $self->{_fh};
20377
for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {
20380
if ( $$rtoken_type[$j] eq 'k' ) {
20381
$pattern .= $$rtokens[$j];
20384
$pattern .= $$rtoken_type[$j];
20386
$reconstructed_original .= $$rtokens[$j];
20387
$block_str .= "($$rblock_type[$j])";
20388
$num = length( $$rtokens[$j] );
20389
my $type_str = $$rtoken_type[$j];
20391
# be sure there are no blank tokens (shouldn't happen)
20392
# This can only happen if a programming error has been made
20393
# because all valid tokens are non-blank
20394
if ( $type_str eq ' ' ) {
20395
print $fh "BLANK TOKEN on the next line\n";
20396
$type_str = $next_char[$i_next];
20397
$i_next = 1 - $i_next;
20400
if ( length($type_str) == 1 ) {
20401
$type_str = $type_str x $num;
20403
$token_str .= $type_str;
20406
# Write what you want here ...
20407
# print $fh "$input_line\n";
20408
# print $fh "$pattern\n";
20409
print $fh "$reconstructed_original\n";
20410
print $fh "$token_str\n";
20412
#print $fh "$block_str\n";
20415
#####################################################################
20417
# The Perl::Tidy::LineBuffer class supplies a 'get_line()'
20418
# method for returning the next line to be parsed, as well as a
20419
# 'peek_ahead()' method
20421
# The input parameter is an object with a 'get_line()' method
20422
# which returns the next line to be parsed
20424
#####################################################################
20426
package Perl::Tidy::LineBuffer;
20431
my $line_source_object = shift;
20434
_line_source_object => $line_source_object,
20435
_rlookahead_buffer => [],
20441
my $buffer_index = shift;
20443
my $line_source_object = $self->{_line_source_object};
20444
my $rlookahead_buffer = $self->{_rlookahead_buffer};
20445
if ( $buffer_index < scalar(@$rlookahead_buffer) ) {
20446
$line = $$rlookahead_buffer[$buffer_index];
20449
$line = $line_source_object->get_line();
20450
push( @$rlookahead_buffer, $line );
20458
my $line_source_object = $self->{_line_source_object};
20459
my $rlookahead_buffer = $self->{_rlookahead_buffer};
20461
if ( scalar(@$rlookahead_buffer) ) {
20462
$line = shift @$rlookahead_buffer;
20465
$line = $line_source_object->get_line();
20470
########################################################################
20472
# the Perl::Tidy::Tokenizer package is essentially a filter which
20473
# reads lines of perl source code from a source object and provides
20474
# corresponding tokenized lines through its get_line() method. Lines
20475
# flow from the source_object to the caller like this:
20477
# source_object --> LineBuffer_object --> Tokenizer --> calling routine
20478
# get_line() get_line() get_line() line_of_tokens
20480
# The source object can be any object with a get_line() method which
20481
# supplies one line (a character string) perl call.
20482
# The LineBuffer object is created by the Tokenizer.
20483
# The Tokenizer returns a reference to a data structure 'line_of_tokens'
20484
# containing one tokenized line for each call to its get_line() method.
20486
# WARNING: This is not a real class yet. Only one tokenizer my be used.
20488
########################################################################
20490
package Perl::Tidy::Tokenizer;
20494
# Caution: these debug flags produce a lot of output
20495
# They should all be 0 except when debugging small scripts
20497
use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;
20498
use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;
20499
use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;
20500
use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;
20501
use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;
20503
my $debug_warning = sub {
20504
print "TOKENIZER_DEBUGGING with key $_[0]\n";
20507
TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');
20508
TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');
20509
TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');
20510
TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');
20511
TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');
20517
# PACKAGE VARIABLES for for processing an entire FILE.
20521
$last_nonblank_token
20522
$last_nonblank_type
20523
$last_nonblank_block_type
20531
%user_function_prototype
20533
%is_block_list_function
20534
%saw_function_definition
20538
$square_bracket_depth
20543
@nesting_sequence_number
20544
@current_sequence_number
20546
@paren_semicolon_count
20547
@paren_structural_type
20549
@brace_structural_type
20550
@brace_statement_type
20553
@square_bracket_type
20554
@square_bracket_structural_type
20556
@nested_ternary_flag
20557
@starting_line_of_current_depth
20560
# GLOBAL CONSTANTS for routines in this package
20562
%is_indirect_object_taker
20564
%expecting_operator_token
20565
%expecting_operator_types
20566
%expecting_term_types
20567
%expecting_term_token
20569
%is_file_test_operator
20571
%is_valid_token_type
20573
%is_code_block_token
20575
@opening_brace_names
20576
@closing_brace_names
20577
%is_keyword_taking_list
20578
%is_q_qq_qw_qx_qr_s_y_tr_m
20581
# possible values of operator_expected()
20582
use constant TERM => -1;
20583
use constant UNKNOWN => 0;
20584
use constant OPERATOR => 1;
20586
# possible values of context
20587
use constant SCALAR_CONTEXT => -1;
20588
use constant UNKNOWN_CONTEXT => 0;
20589
use constant LIST_CONTEXT => 1;
20591
# Maximum number of little messages; probably need not be changed.
20592
use constant MAX_NAG_MESSAGES => 6;
20596
# methods to count instances
20598
sub get_count { $_count; }
20599
sub _increment_count { ++$_count }
20600
sub _decrement_count { --$_count }
20604
$_[0]->_decrement_count();
20611
# Note: 'tabs' and 'indent_columns' are temporary and should be
20614
source_object => undef,
20615
debugger_object => undef,
20616
diagnostics_object => undef,
20617
logger_object => undef,
20618
starting_level => undef,
20619
indent_columns => 4,
20621
look_for_hash_bang => 0,
20623
look_for_autoloader => 1,
20624
look_for_selfloader => 1,
20625
starting_line_number => 1,
20627
my %args = ( %defaults, @_ );
20629
# we are given an object with a get_line() method to supply source lines
20630
my $source_object = $args{source_object};
20632
# we create another object with a get_line() and peek_ahead() method
20633
my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);
20635
# Tokenizer state data is as follows:
20636
# _rhere_target_list reference to list of here-doc targets
20637
# _here_doc_target the target string for a here document
20638
# _here_quote_character the type of here-doc quoting (" ' ` or none)
20639
# to determine if interpolation is done
20640
# _quote_target character we seek if chasing a quote
20641
# _line_start_quote line where we started looking for a long quote
20642
# _in_here_doc flag indicating if we are in a here-doc
20643
# _in_pod flag set if we are in pod documentation
20644
# _in_error flag set if we saw severe error (binary in script)
20645
# _in_data flag set if we are in __DATA__ section
20646
# _in_end flag set if we are in __END__ section
20647
# _in_format flag set if we are in a format description
20648
# _in_attribute_list flag telling if we are looking for attributes
20649
# _in_quote flag telling if we are chasing a quote
20650
# _starting_level indentation level of first line
20651
# _input_tabstr string denoting one indentation level of input file
20652
# _know_input_tabstr flag indicating if we know _input_tabstr
20653
# _line_buffer_object object with get_line() method to supply source code
20654
# _diagnostics_object place to write debugging information
20655
# _unexpected_error_count error count used to limit output
20656
# _lower_case_labels_at line numbers where lower case labels seen
20657
$tokenizer_self = {
20658
_rhere_target_list => [],
20660
_here_doc_target => "",
20661
_here_quote_character => "",
20667
_in_attribute_list => 0,
20669
_quote_target => "",
20670
_line_start_quote => -1,
20671
_starting_level => $args{starting_level},
20672
_know_starting_level => defined( $args{starting_level} ),
20673
_tabs => $args{tabs},
20674
_indent_columns => $args{indent_columns},
20675
_look_for_hash_bang => $args{look_for_hash_bang},
20676
_trim_qw => $args{trim_qw},
20677
_input_tabstr => "",
20678
_know_input_tabstr => -1,
20679
_last_line_number => $args{starting_line_number} - 1,
20680
_saw_perl_dash_P => 0,
20681
_saw_perl_dash_w => 0,
20682
_saw_use_strict => 0,
20683
_saw_v_string => 0,
20684
_look_for_autoloader => $args{look_for_autoloader},
20685
_look_for_selfloader => $args{look_for_selfloader},
20686
_saw_autoloader => 0,
20687
_saw_selfloader => 0,
20688
_saw_hash_bang => 0,
20691
_saw_negative_indentation => 0,
20692
_started_tokenizing => 0,
20693
_line_buffer_object => $line_buffer_object,
20694
_debugger_object => $args{debugger_object},
20695
_diagnostics_object => $args{diagnostics_object},
20696
_logger_object => $args{logger_object},
20697
_unexpected_error_count => 0,
20698
_started_looking_for_here_target_at => 0,
20699
_nearly_matched_here_target_at => undef,
20701
_rlower_case_labels_at => undef,
20704
prepare_for_a_new_file();
20705
find_starting_indentation_level();
20707
bless $tokenizer_self, $class;
20709
# This is not a full class yet, so die if an attempt is made to
20710
# create more than one object.
20712
if ( _increment_count() > 1 ) {
20714
"Attempt to create more than 1 object in $class, which is not a true class yet\n";
20717
return $tokenizer_self;
20721
# interface to Perl::Tidy::Logger routines
20723
my $logger_object = $tokenizer_self->{_logger_object};
20724
if ($logger_object) {
20725
$logger_object->warning(@_);
20730
my $logger_object = $tokenizer_self->{_logger_object};
20731
if ($logger_object) {
20732
$logger_object->complain(@_);
20736
sub write_logfile_entry {
20737
my $logger_object = $tokenizer_self->{_logger_object};
20738
if ($logger_object) {
20739
$logger_object->write_logfile_entry(@_);
20743
sub interrupt_logfile {
20744
my $logger_object = $tokenizer_self->{_logger_object};
20745
if ($logger_object) {
20746
$logger_object->interrupt_logfile();
20750
sub resume_logfile {
20751
my $logger_object = $tokenizer_self->{_logger_object};
20752
if ($logger_object) {
20753
$logger_object->resume_logfile();
20757
sub increment_brace_error {
20758
my $logger_object = $tokenizer_self->{_logger_object};
20759
if ($logger_object) {
20760
$logger_object->increment_brace_error();
20764
sub report_definite_bug {
20765
my $logger_object = $tokenizer_self->{_logger_object};
20766
if ($logger_object) {
20767
$logger_object->report_definite_bug();
20771
sub brace_warning {
20772
my $logger_object = $tokenizer_self->{_logger_object};
20773
if ($logger_object) {
20774
$logger_object->brace_warning(@_);
20778
sub get_saw_brace_error {
20779
my $logger_object = $tokenizer_self->{_logger_object};
20780
if ($logger_object) {
20781
$logger_object->get_saw_brace_error();
20788
# interface to Perl::Tidy::Diagnostics routines
20789
sub write_diagnostics {
20790
if ( $tokenizer_self->{_diagnostics_object} ) {
20791
$tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);
20795
sub report_tokenization_errors {
20799
my $level = get_indentation_level();
20800
if ( $level != $tokenizer_self->{_starting_level} ) {
20801
warning("final indentation level: $level\n");
20804
check_final_nesting_depths();
20806
if ( $tokenizer_self->{_look_for_hash_bang}
20807
&& !$tokenizer_self->{_saw_hash_bang} )
20810
"hit EOF without seeing hash-bang line; maybe don't need -x?\n");
20813
if ( $tokenizer_self->{_in_format} ) {
20814
warning("hit EOF while in format description\n");
20817
if ( $tokenizer_self->{_in_pod} ) {
20819
# Just write log entry if this is after __END__ or __DATA__
20820
# because this happens to often, and it is not likely to be
20822
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
20823
write_logfile_entry(
20824
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
20830
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"
20836
if ( $tokenizer_self->{_in_here_doc} ) {
20837
my $here_doc_target = $tokenizer_self->{_here_doc_target};
20838
my $started_looking_for_here_target_at =
20839
$tokenizer_self->{_started_looking_for_here_target_at};
20840
if ($here_doc_target) {
20842
"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"
20847
"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"
20850
my $nearly_matched_here_target_at =
20851
$tokenizer_self->{_nearly_matched_here_target_at};
20852
if ($nearly_matched_here_target_at) {
20854
"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"
20859
if ( $tokenizer_self->{_in_quote} ) {
20860
my $line_start_quote = $tokenizer_self->{_line_start_quote};
20861
my $quote_target = $tokenizer_self->{_quote_target};
20863
( $tokenizer_self->{_in_attribute_list} )
20867
"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"
20871
unless ( $tokenizer_self->{_saw_perl_dash_w} ) {
20872
if ( $] < 5.006 ) {
20873
write_logfile_entry("Suggest including '-w parameter'\n");
20876
write_logfile_entry("Suggest including 'use warnings;'\n");
20880
if ( $tokenizer_self->{_saw_perl_dash_P} ) {
20881
write_logfile_entry("Use of -P parameter for defines is discouraged\n");
20884
unless ( $tokenizer_self->{_saw_use_strict} ) {
20885
write_logfile_entry("Suggest including 'use strict;'\n");
20888
# it is suggested that lables have at least one upper case character
20889
# for legibility and to avoid code breakage as new keywords are introduced
20890
if ( $tokenizer_self->{_rlower_case_labels_at} ) {
20891
my @lower_case_labels_at =
20892
@{ $tokenizer_self->{_rlower_case_labels_at} };
20893
write_logfile_entry(
20894
"Suggest using upper case characters in label(s)\n");
20896
write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");
20900
sub report_v_string {
20902
# warn if this version can't handle v-strings
20904
unless ( $tokenizer_self->{_saw_v_string} ) {
20905
$tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};
20907
if ( $] < 5.006 ) {
20909
"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"
20914
sub get_input_line_number {
20915
return $tokenizer_self->{_last_line_number};
20918
# returns the next tokenized line
20923
# USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
20924
# $square_bracket_depth, $paren_depth
20926
my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();
20927
$tokenizer_self->{_line_text} = $input_line;
20929
return undef unless ($input_line);
20931
my $input_line_number = ++$tokenizer_self->{_last_line_number};
20933
# Find and remove what characters terminate this line, including any
20935
my $input_line_separator = "";
20936
if ( chomp($input_line) ) { $input_line_separator = $/ }
20938
# TODO: what other characters should be included here?
20939
if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
20940
$input_line_separator = $2 . $input_line_separator;
20943
# for backwards compatability we keep the line text terminated with
20944
# a newline character
20945
$input_line .= "\n";
20946
$tokenizer_self->{_line_text} = $input_line; # update
20948
# create a data structure describing this line which will be
20949
# returned to the caller.
20951
# _line_type codes are:
20952
# SYSTEM - system-specific code before hash-bang line
20953
# CODE - line of perl code (including comments)
20954
# POD_START - line starting pod, such as '=head'
20955
# POD - pod documentation text
20956
# POD_END - last line of pod section, '=cut'
20957
# HERE - text of here-document
20958
# HERE_END - last line of here-doc (target word)
20959
# FORMAT - format section
20960
# FORMAT_END - last line of format section, '.'
20961
# DATA_START - __DATA__ line
20962
# DATA - unidentified text following __DATA__
20963
# END_START - __END__ line
20964
# END - unidentified text following __END__
20965
# ERROR - we are in big trouble, probably not a perl script
20968
# _curly_brace_depth - depth of curly braces at start of line
20969
# _square_bracket_depth - depth of square brackets at start of line
20970
# _paren_depth - depth of parens at start of line
20971
# _starting_in_quote - this line continues a multi-line quote
20972
# (so don't trim leading blanks!)
20973
# _ending_in_quote - this line ends in a multi-line quote
20974
# (so don't trim trailing blanks!)
20975
my $line_of_tokens = {
20976
_line_type => 'EOF',
20977
_line_text => $input_line,
20978
_line_number => $input_line_number,
20979
_rtoken_type => undef,
20982
_rslevels => undef,
20983
_rblock_type => undef,
20984
_rcontainer_type => undef,
20985
_rcontainer_environment => undef,
20986
_rtype_sequence => undef,
20987
_rnesting_tokens => undef,
20988
_rci_levels => undef,
20989
_rnesting_blocks => undef,
20990
_python_indentation_level => -1, ## 0,
20991
_starting_in_quote => 0, # to be set by subroutine
20992
_ending_in_quote => 0,
20993
_curly_brace_depth => $brace_depth,
20994
_square_bracket_depth => $square_bracket_depth,
20995
_paren_depth => $paren_depth,
20996
_quote_character => '',
20999
# must print line unchanged if we are in a here document
21000
if ( $tokenizer_self->{_in_here_doc} ) {
21002
$line_of_tokens->{_line_type} = 'HERE';
21003
my $here_doc_target = $tokenizer_self->{_here_doc_target};
21004
my $here_quote_character = $tokenizer_self->{_here_quote_character};
21005
my $candidate_target = $input_line;
21006
chomp $candidate_target;
21007
if ( $candidate_target eq $here_doc_target ) {
21008
$tokenizer_self->{_nearly_matched_here_target_at} = undef;
21009
$line_of_tokens->{_line_type} = 'HERE_END';
21010
write_logfile_entry("Exiting HERE document $here_doc_target\n");
21012
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21013
if (@$rhere_target_list) { # there can be multiple here targets
21014
( $here_doc_target, $here_quote_character ) =
21015
@{ shift @$rhere_target_list };
21016
$tokenizer_self->{_here_doc_target} = $here_doc_target;
21017
$tokenizer_self->{_here_quote_character} =
21018
$here_quote_character;
21019
write_logfile_entry(
21020
"Entering HERE document $here_doc_target\n");
21021
$tokenizer_self->{_nearly_matched_here_target_at} = undef;
21022
$tokenizer_self->{_started_looking_for_here_target_at} =
21023
$input_line_number;
21026
$tokenizer_self->{_in_here_doc} = 0;
21027
$tokenizer_self->{_here_doc_target} = "";
21028
$tokenizer_self->{_here_quote_character} = "";
21032
# check for error of extra whitespace
21033
# note for PERL6: leading whitespace is allowed
21035
$candidate_target =~ s/\s*$//;
21036
$candidate_target =~ s/^\s*//;
21037
if ( $candidate_target eq $here_doc_target ) {
21038
$tokenizer_self->{_nearly_matched_here_target_at} =
21039
$input_line_number;
21042
return $line_of_tokens;
21045
# must print line unchanged if we are in a format section
21046
elsif ( $tokenizer_self->{_in_format} ) {
21048
if ( $input_line =~ /^\.[\s#]*$/ ) {
21049
write_logfile_entry("Exiting format section\n");
21050
$tokenizer_self->{_in_format} = 0;
21051
$line_of_tokens->{_line_type} = 'FORMAT_END';
21054
$line_of_tokens->{_line_type} = 'FORMAT';
21056
return $line_of_tokens;
21059
# must print line unchanged if we are in pod documentation
21060
elsif ( $tokenizer_self->{_in_pod} ) {
21062
$line_of_tokens->{_line_type} = 'POD';
21063
if ( $input_line =~ /^=cut/ ) {
21064
$line_of_tokens->{_line_type} = 'POD_END';
21065
write_logfile_entry("Exiting POD section\n");
21066
$tokenizer_self->{_in_pod} = 0;
21068
if ( $input_line =~ /^\#\!.*perl\b/ ) {
21070
"Hash-bang in pod can cause older versions of perl to fail! \n"
21074
return $line_of_tokens;
21077
# must print line unchanged if we have seen a severe error (i.e., we
21078
# are seeing illegal tokens and connot continue. Syntax errors do
21079
# not pass this route). Calling routine can decide what to do, but
21080
# the default can be to just pass all lines as if they were after __END__
21081
elsif ( $tokenizer_self->{_in_error} ) {
21082
$line_of_tokens->{_line_type} = 'ERROR';
21083
return $line_of_tokens;
21086
# print line unchanged if we are __DATA__ section
21087
elsif ( $tokenizer_self->{_in_data} ) {
21089
# ...but look for POD
21090
# Note that the _in_data and _in_end flags remain set
21091
# so that we return to that state after seeing the
21092
# end of a pod section
21093
if ( $input_line =~ /^=(?!cut)/ ) {
21094
$line_of_tokens->{_line_type} = 'POD_START';
21095
write_logfile_entry("Entering POD section\n");
21096
$tokenizer_self->{_in_pod} = 1;
21097
return $line_of_tokens;
21100
$line_of_tokens->{_line_type} = 'DATA';
21101
return $line_of_tokens;
21105
# print line unchanged if we are in __END__ section
21106
elsif ( $tokenizer_self->{_in_end} ) {
21108
# ...but look for POD
21109
# Note that the _in_data and _in_end flags remain set
21110
# so that we return to that state after seeing the
21111
# end of a pod section
21112
if ( $input_line =~ /^=(?!cut)/ ) {
21113
$line_of_tokens->{_line_type} = 'POD_START';
21114
write_logfile_entry("Entering POD section\n");
21115
$tokenizer_self->{_in_pod} = 1;
21116
return $line_of_tokens;
21119
$line_of_tokens->{_line_type} = 'END';
21120
return $line_of_tokens;
21124
# check for a hash-bang line if we haven't seen one
21125
if ( !$tokenizer_self->{_saw_hash_bang} ) {
21126
if ( $input_line =~ /^\#\!.*perl\b/ ) {
21127
$tokenizer_self->{_saw_hash_bang} = $input_line_number;
21129
# check for -w and -P flags
21130
if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {
21131
$tokenizer_self->{_saw_perl_dash_P} = 1;
21134
if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {
21135
$tokenizer_self->{_saw_perl_dash_w} = 1;
21138
if ( ( $input_line_number > 1 )
21139
&& ( !$tokenizer_self->{_look_for_hash_bang} ) )
21142
# this is helpful for VMS systems; we may have accidentally
21143
# tokenized some DCL commands
21144
if ( $tokenizer_self->{_started_tokenizing} ) {
21146
"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"
21150
complain("Useless hash-bang after line 1\n");
21154
# Report the leading hash-bang as a system line
21155
# This will prevent -dac from deleting it
21157
$line_of_tokens->{_line_type} = 'SYSTEM';
21158
return $line_of_tokens;
21163
# wait for a hash-bang before parsing if the user invoked us with -x
21164
if ( $tokenizer_self->{_look_for_hash_bang}
21165
&& !$tokenizer_self->{_saw_hash_bang} )
21167
$line_of_tokens->{_line_type} = 'SYSTEM';
21168
return $line_of_tokens;
21171
# a first line of the form ': #' will be marked as SYSTEM
21172
# since lines of this form may be used by tcsh
21173
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
21174
$line_of_tokens->{_line_type} = 'SYSTEM';
21175
return $line_of_tokens;
21178
# now we know that it is ok to tokenize the line...
21179
# the line tokenizer will modify any of these private variables:
21180
# _rhere_target_list
21187
my $ending_in_quote_last = $tokenizer_self->{_in_quote};
21188
tokenize_this_line($line_of_tokens);
21190
# Now finish defining the return structure and return it
21191
$line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};
21193
# handle severe error (binary data in script)
21194
if ( $tokenizer_self->{_in_error} ) {
21195
$tokenizer_self->{_in_quote} = 0; # to avoid any more messages
21196
warning("Giving up after error\n");
21197
$line_of_tokens->{_line_type} = 'ERROR';
21198
reset_indentation_level(0); # avoid error messages
21199
return $line_of_tokens;
21202
# handle start of pod documentation
21203
if ( $tokenizer_self->{_in_pod} ) {
21205
# This gets tricky..above a __DATA__ or __END__ section, perl
21206
# accepts '=cut' as the start of pod section. But afterwards,
21207
# only pod utilities see it and they may ignore an =cut without
21208
# leading =head. In any case, this isn't good.
21209
if ( $input_line =~ /^=cut\b/ ) {
21210
if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {
21211
complain("=cut while not in pod ignored\n");
21212
$tokenizer_self->{_in_pod} = 0;
21213
$line_of_tokens->{_line_type} = 'POD_END';
21216
$line_of_tokens->{_line_type} = 'POD_START';
21218
"=cut starts a pod section .. this can fool pod utilities.\n"
21220
write_logfile_entry("Entering POD section\n");
21225
$line_of_tokens->{_line_type} = 'POD_START';
21226
write_logfile_entry("Entering POD section\n");
21229
return $line_of_tokens;
21232
# update indentation levels for log messages
21233
if ( $input_line !~ /^\s*$/ ) {
21234
my $rlevels = $line_of_tokens->{_rlevels};
21235
my $structural_indentation_level = $$rlevels[0];
21236
my ( $python_indentation_level, $msg ) =
21237
find_indentation_level( $input_line, $structural_indentation_level );
21238
if ($msg) { write_logfile_entry("$msg") }
21239
if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {
21240
$line_of_tokens->{_python_indentation_level} =
21241
$python_indentation_level;
21245
# see if this line contains here doc targets
21246
my $rhere_target_list = $tokenizer_self->{_rhere_target_list};
21247
if (@$rhere_target_list) {
21249
my ( $here_doc_target, $here_quote_character ) =
21250
@{ shift @$rhere_target_list };
21251
$tokenizer_self->{_in_here_doc} = 1;
21252
$tokenizer_self->{_here_doc_target} = $here_doc_target;
21253
$tokenizer_self->{_here_quote_character} = $here_quote_character;
21254
write_logfile_entry("Entering HERE document $here_doc_target\n");
21255
$tokenizer_self->{_started_looking_for_here_target_at} =
21256
$input_line_number;
21259
# NOTE: __END__ and __DATA__ statements are written unformatted
21260
# because they can theoretically contain additional characters
21261
# which are not tokenized (and cannot be read with <DATA> either!).
21262
if ( $tokenizer_self->{_in_data} ) {
21263
$line_of_tokens->{_line_type} = 'DATA_START';
21264
write_logfile_entry("Starting __DATA__ section\n");
21265
$tokenizer_self->{_saw_data} = 1;
21267
# keep parsing after __DATA__ if use SelfLoader was seen
21268
if ( $tokenizer_self->{_saw_selfloader} ) {
21269
$tokenizer_self->{_in_data} = 0;
21270
write_logfile_entry(
21271
"SelfLoader seen, continuing; -nlsl deactivates\n");
21274
return $line_of_tokens;
21277
elsif ( $tokenizer_self->{_in_end} ) {
21278
$line_of_tokens->{_line_type} = 'END_START';
21279
write_logfile_entry("Starting __END__ section\n");
21280
$tokenizer_self->{_saw_end} = 1;
21282
# keep parsing after __END__ if use AutoLoader was seen
21283
if ( $tokenizer_self->{_saw_autoloader} ) {
21284
$tokenizer_self->{_in_end} = 0;
21285
write_logfile_entry(
21286
"AutoLoader seen, continuing; -nlal deactivates\n");
21288
return $line_of_tokens;
21291
# now, finally, we know that this line is type 'CODE'
21292
$line_of_tokens->{_line_type} = 'CODE';
21294
# remember if we have seen any real code
21295
if ( !$tokenizer_self->{_started_tokenizing}
21296
&& $input_line !~ /^\s*$/
21297
&& $input_line !~ /^\s*#/ )
21299
$tokenizer_self->{_started_tokenizing} = 1;
21302
if ( $tokenizer_self->{_debugger_object} ) {
21303
$tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);
21306
# Note: if keyword 'format' occurs in this line code, it is still CODE
21307
# (keyword 'format' need not start a line)
21308
if ( $tokenizer_self->{_in_format} ) {
21309
write_logfile_entry("Entering format section\n");
21312
if ( $tokenizer_self->{_in_quote}
21313
and ( $tokenizer_self->{_line_start_quote} < 0 ) )
21316
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
21318
( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )
21320
$tokenizer_self->{_line_start_quote} = $input_line_number;
21321
write_logfile_entry(
21322
"Start multi-line quote or pattern ending in $quote_target\n");
21325
elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )
21326
and !$tokenizer_self->{_in_quote} )
21328
$tokenizer_self->{_line_start_quote} = -1;
21329
write_logfile_entry("End of multi-line quote or pattern\n");
21332
# we are returning a line of CODE
21333
return $line_of_tokens;
21336
sub find_starting_indentation_level {
21338
# USES GLOBAL VARIABLES: $tokenizer_self
21339
my $starting_level = 0;
21340
my $know_input_tabstr = -1; # flag for find_indentation_level
21342
# use value if given as parameter
21343
if ( $tokenizer_self->{_know_starting_level} ) {
21344
$starting_level = $tokenizer_self->{_starting_level};
21347
# if we know there is a hash_bang line, the level must be zero
21348
elsif ( $tokenizer_self->{_look_for_hash_bang} ) {
21349
$tokenizer_self->{_know_starting_level} = 1;
21352
# otherwise figure it out from the input file
21356
my $structural_indentation_level = -1; # flag for find_indentation_level
21360
$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
21363
# if first line is #! then assume starting level is zero
21364
if ( $i == 1 && $line =~ /^\#\!/ ) {
21365
$starting_level = 0;
21368
next if ( $line =~ /^\s*#/ ); # must not be comment
21369
next if ( $line =~ /^\s*$/ ); # must not be blank
21370
( $starting_level, $msg ) =
21371
find_indentation_level( $line, $structural_indentation_level );
21372
if ($msg) { write_logfile_entry("$msg") }
21375
$msg = "Line $i implies starting-indentation-level = $starting_level\n";
21377
if ( $starting_level > 0 ) {
21379
my $input_tabstr = $tokenizer_self->{_input_tabstr};
21380
if ( $input_tabstr eq "\t" ) {
21381
$msg .= "by guessing input tabbing uses 1 tab per level\n";
21384
my $cols = length($input_tabstr);
21386
"by guessing input tabbing uses $cols blanks per level\n";
21389
write_logfile_entry("$msg");
21391
$tokenizer_self->{_starting_level} = $starting_level;
21392
reset_indentation_level($starting_level);
21395
# Find indentation level given a input line. At the same time, try to
21396
# figure out the input tabbing scheme.
21398
# There are two types of calls:
21400
# Type 1: $structural_indentation_level < 0
21401
# In this case we have to guess $input_tabstr to figure out the level.
21403
# Type 2: $structural_indentation_level >= 0
21404
# In this case the level of this line is known, and this routine can
21405
# update the tabbing string, if still unknown, to make the level correct.
21407
sub find_indentation_level {
21408
my ( $line, $structural_indentation_level ) = @_;
21410
# USES GLOBAL VARIABLES: $tokenizer_self
21414
my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};
21415
my $input_tabstr = $tokenizer_self->{_input_tabstr};
21417
# find leading whitespace
21418
my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";
21420
# make first guess at input tabbing scheme if necessary
21421
if ( $know_input_tabstr < 0 ) {
21423
$know_input_tabstr = 0;
21425
if ( $tokenizer_self->{_tabs} ) {
21426
$input_tabstr = "\t";
21427
if ( length($leading_whitespace) > 0 ) {
21428
if ( $leading_whitespace !~ /\t/ ) {
21430
my $cols = $tokenizer_self->{_indent_columns};
21432
if ( length($leading_whitespace) < $cols ) {
21433
$cols = length($leading_whitespace);
21435
$input_tabstr = " " x $cols;
21440
$input_tabstr = " " x $tokenizer_self->{_indent_columns};
21442
if ( length($leading_whitespace) > 0 ) {
21443
if ( $leading_whitespace =~ /^\t/ ) {
21444
$input_tabstr = "\t";
21448
$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21449
$tokenizer_self->{_input_tabstr} = $input_tabstr;
21452
# determine the input tabbing scheme if possible
21453
if ( ( $know_input_tabstr == 0 )
21454
&& ( length($leading_whitespace) > 0 )
21455
&& ( $structural_indentation_level > 0 ) )
21457
my $saved_input_tabstr = $input_tabstr;
21459
# check for common case of one tab per indentation level
21460
if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21461
if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {
21462
$input_tabstr = "\t";
21463
$msg = "Guessing old indentation was tab character\n";
21469
# detab any tabs based on 8 blanks per tab
21471
if ( $leading_whitespace =~ s/^\t+/ /g ) {
21472
$entabbed = "entabbed";
21475
# now compute tabbing from number of spaces
21477
length($leading_whitespace) / $structural_indentation_level;
21478
if ( $columns == int $columns ) {
21480
"Guessing old indentation was $columns $entabbed spaces\n";
21483
$columns = int $columns;
21485
"old indentation is unclear, using $columns $entabbed spaces\n";
21487
$input_tabstr = " " x $columns;
21489
$know_input_tabstr = 1;
21490
$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;
21491
$tokenizer_self->{_input_tabstr} = $input_tabstr;
21493
# see if mistakes were made
21494
if ( ( $tokenizer_self->{_starting_level} > 0 )
21495
&& !$tokenizer_self->{_know_starting_level} )
21498
if ( $input_tabstr ne $saved_input_tabstr ) {
21500
"I made a bad starting level guess; rerun with a value for -sil \n"
21506
# use current guess at input tabbing to get input indentation level
21508
# Patch to handle a common case of entabbed leading whitespace
21509
# If the leading whitespace equals 4 spaces and we also have
21510
# tabs, detab the input whitespace assuming 8 spaces per tab.
21511
if ( length($input_tabstr) == 4 ) {
21512
$leading_whitespace =~ s/^\t+/ /g;
21515
if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {
21518
while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )
21524
return ( $level, $msg );
21527
# This is a currently unused debug routine
21528
sub dump_functions {
21532
foreach $pkg ( keys %is_user_function ) {
21533
print $fh "\nnon-constant subs in package $pkg\n";
21535
foreach $sub ( keys %{ $is_user_function{$pkg} } ) {
21537
if ( $is_block_list_function{$pkg}{$sub} ) {
21538
$msg = 'block_list';
21541
if ( $is_block_function{$pkg}{$sub} ) {
21544
print $fh "$sub $msg\n";
21548
foreach $pkg ( keys %is_constant ) {
21549
print $fh "\nconstants and constant subs in package $pkg\n";
21551
foreach $sub ( keys %{ $is_constant{$pkg} } ) {
21552
print $fh "$sub\n";
21559
# count number of 1's in a string of 1's and 0's
21560
# example: ones_count("010101010101") gives 6
21561
return ( my $cis = $_[0] ) =~ tr/1/0/;
21564
sub prepare_for_a_new_file {
21566
# previous tokens needed to determine what to expect next
21567
$last_nonblank_token = ';'; # the only possible starting state which
21568
$last_nonblank_type = ';'; # will make a leading brace a code block
21569
$last_nonblank_block_type = '';
21571
# scalars for remembering statement types across multiple lines
21572
$statement_type = ''; # '' or 'use' or 'sub..' or 'case..'
21573
$in_attribute_list = 0;
21575
# scalars for remembering where we are in the file
21576
$current_package = "main";
21577
$context = UNKNOWN_CONTEXT;
21579
# hashes used to remember function information
21580
%is_constant = (); # user-defined constants
21581
%is_user_function = (); # user-defined functions
21582
%user_function_prototype = (); # their prototypes
21583
%is_block_function = ();
21584
%is_block_list_function = ();
21585
%saw_function_definition = ();
21587
# variables used to track depths of various containers
21588
# and report nesting errors
21591
$square_bracket_depth = 0;
21592
@current_depth[ 0 .. $#closing_brace_names ] =
21593
(0) x scalar @closing_brace_names;
21596
@nesting_sequence_number[ 0 .. $#closing_brace_names ] =
21597
( 0 .. $#closing_brace_names );
21598
@current_sequence_number = ();
21599
$paren_type[$paren_depth] = '';
21600
$paren_semicolon_count[$paren_depth] = 0;
21601
$paren_structural_type[$brace_depth] = '';
21602
$brace_type[$brace_depth] = ';'; # identify opening brace as code block
21603
$brace_structural_type[$brace_depth] = '';
21604
$brace_statement_type[$brace_depth] = "";
21605
$brace_context[$brace_depth] = UNKNOWN_CONTEXT;
21606
$brace_package[$paren_depth] = $current_package;
21607
$square_bracket_type[$square_bracket_depth] = '';
21608
$square_bracket_structural_type[$square_bracket_depth] = '';
21610
initialize_tokenizer_state();
21613
{ # begin tokenize_this_line
21615
use constant BRACE => 0;
21616
use constant SQUARE_BRACKET => 1;
21617
use constant PAREN => 2;
21618
use constant QUESTION_COLON => 3;
21620
# TV1: scalars for processing one LINE.
21621
# Re-initialized on each entry to sub tokenize_this_line.
21623
$block_type, $container_type, $expecting,
21624
$i, $i_tok, $input_line,
21625
$input_line_number, $last_nonblank_i, $max_token_index,
21626
$next_tok, $next_type, $peeked_ahead,
21627
$prototype, $rhere_target_list, $rtoken_map,
21628
$rtoken_type, $rtokens, $tok,
21629
$type, $type_sequence, $indent_flag,
21632
# TV2: refs to ARRAYS for processing one LINE
21633
# Re-initialized on each call.
21634
my $routput_token_list = []; # stack of output token indexes
21635
my $routput_token_type = []; # token types
21636
my $routput_block_type = []; # types of code block
21637
my $routput_container_type = []; # paren types, such as if, elsif, ..
21638
my $routput_type_sequence = []; # nesting sequential number
21639
my $routput_indent_flag = []; #
21641
# TV3: SCALARS for quote variables. These are initialized with a
21642
# subroutine call and continually updated as lines are processed.
21643
my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
21644
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );
21646
# TV4: SCALARS for multi-line identifiers and
21647
# statements. These are initialized with a subroutine call
21648
# and continually updated as lines are processed.
21649
my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );
21651
# TV5: SCALARS for tracking indentation level.
21652
# Initialized once and continually updated as lines are
21655
$nesting_token_string, $nesting_type_string,
21656
$nesting_block_string, $nesting_block_flag,
21657
$nesting_list_string, $nesting_list_flag,
21658
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
21659
$in_statement_continuation, $level_in_tokenizer,
21660
$slevel_in_tokenizer, $rslevel_stack,
21663
# TV6: SCALARS for remembering several previous
21664
# tokens. Initialized once and continually updated as
21665
# lines are processed.
21667
$last_nonblank_container_type, $last_nonblank_type_sequence,
21668
$last_last_nonblank_token, $last_last_nonblank_type,
21669
$last_last_nonblank_block_type, $last_last_nonblank_container_type,
21670
$last_last_nonblank_type_sequence, $last_nonblank_prototype,
21673
# ----------------------------------------------------------------
21674
# beginning of tokenizer variable access and manipulation routines
21675
# ----------------------------------------------------------------
21677
sub initialize_tokenizer_state {
21679
# TV1: initialized on each call
21680
# TV2: initialized on each call
21684
$quote_character = "";
21687
$quoted_string_1 = "";
21688
$quoted_string_2 = "";
21689
$allowed_quote_modifiers = "";
21692
$id_scan_state = '';
21695
$indented_if_level = 0;
21698
$nesting_token_string = "";
21699
$nesting_type_string = "";
21700
$nesting_block_string = '1'; # initially in a block
21701
$nesting_block_flag = 1;
21702
$nesting_list_string = '0'; # initially not in a list
21703
$nesting_list_flag = 0; # initially not in a list
21704
$ci_string_in_tokenizer = "";
21705
$continuation_string_in_tokenizer = "0";
21706
$in_statement_continuation = 0;
21707
$level_in_tokenizer = 0;
21708
$slevel_in_tokenizer = 0;
21709
$rslevel_stack = [];
21712
$last_nonblank_container_type = '';
21713
$last_nonblank_type_sequence = '';
21714
$last_last_nonblank_token = ';';
21715
$last_last_nonblank_type = ';';
21716
$last_last_nonblank_block_type = '';
21717
$last_last_nonblank_container_type = '';
21718
$last_last_nonblank_type_sequence = '';
21719
$last_nonblank_prototype = "";
21722
sub save_tokenizer_state {
21725
$block_type, $container_type, $expecting,
21726
$i, $i_tok, $input_line,
21727
$input_line_number, $last_nonblank_i, $max_token_index,
21728
$next_tok, $next_type, $peeked_ahead,
21729
$prototype, $rhere_target_list, $rtoken_map,
21730
$rtoken_type, $rtokens, $tok,
21731
$type, $type_sequence, $indent_flag,
21735
$routput_token_list, $routput_token_type,
21736
$routput_block_type, $routput_container_type,
21737
$routput_type_sequence, $routput_indent_flag,
21741
$in_quote, $quote_type,
21742
$quote_character, $quote_pos,
21743
$quote_depth, $quoted_string_1,
21744
$quoted_string_2, $allowed_quote_modifiers,
21748
[ $id_scan_state, $identifier, $want_paren, $indented_if_level ];
21751
$nesting_token_string, $nesting_type_string,
21752
$nesting_block_string, $nesting_block_flag,
21753
$nesting_list_string, $nesting_list_flag,
21754
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
21755
$in_statement_continuation, $level_in_tokenizer,
21756
$slevel_in_tokenizer, $rslevel_stack,
21760
$last_nonblank_container_type,
21761
$last_nonblank_type_sequence,
21762
$last_last_nonblank_token,
21763
$last_last_nonblank_type,
21764
$last_last_nonblank_block_type,
21765
$last_last_nonblank_container_type,
21766
$last_last_nonblank_type_sequence,
21767
$last_nonblank_prototype,
21769
return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];
21772
sub restore_tokenizer_state {
21774
my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};
21776
$block_type, $container_type, $expecting,
21777
$i, $i_tok, $input_line,
21778
$input_line_number, $last_nonblank_i, $max_token_index,
21779
$next_tok, $next_type, $peeked_ahead,
21780
$prototype, $rhere_target_list, $rtoken_map,
21781
$rtoken_type, $rtokens, $tok,
21782
$type, $type_sequence, $indent_flag,
21786
$routput_token_list, $routput_token_type,
21787
$routput_block_type, $routput_container_type,
21788
$routput_type_sequence, $routput_type_sequence,
21792
$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,
21793
$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,
21796
( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =
21800
$nesting_token_string, $nesting_type_string,
21801
$nesting_block_string, $nesting_block_flag,
21802
$nesting_list_string, $nesting_list_flag,
21803
$ci_string_in_tokenizer, $continuation_string_in_tokenizer,
21804
$in_statement_continuation, $level_in_tokenizer,
21805
$slevel_in_tokenizer, $rslevel_stack,
21809
$last_nonblank_container_type,
21810
$last_nonblank_type_sequence,
21811
$last_last_nonblank_token,
21812
$last_last_nonblank_type,
21813
$last_last_nonblank_block_type,
21814
$last_last_nonblank_container_type,
21815
$last_last_nonblank_type_sequence,
21816
$last_nonblank_prototype,
21820
sub get_indentation_level {
21822
# patch to avoid reporting error if indented if is not terminated
21823
if ($indented_if_level) { return $level_in_tokenizer - 1 }
21824
return $level_in_tokenizer;
21827
sub reset_indentation_level {
21828
$level_in_tokenizer = $_[0];
21829
$slevel_in_tokenizer = $_[0];
21830
push @{$rslevel_stack}, $slevel_in_tokenizer;
21834
$peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;
21837
# ------------------------------------------------------------
21838
# end of tokenizer variable access and manipulation routines
21839
# ------------------------------------------------------------
21841
# ------------------------------------------------------------
21842
# beginning of various scanner interface routines
21843
# ------------------------------------------------------------
21844
sub scan_replacement_text {
21846
# check for here-docs in replacement text invoked by
21847
# a substitution operator with executable modifier 'e'.
21850
# $replacement_text
21852
# $rht = reference to any here-doc targets
21853
my ($replacement_text) = @_;
21856
return undef unless ( $replacement_text =~ /<</ );
21858
write_logfile_entry("scanning replacement text for here-doc targets\n");
21860
# save the logger object for error messages
21861
my $logger_object = $tokenizer_self->{_logger_object};
21863
# localize all package variables
21865
$tokenizer_self, $last_nonblank_token,
21866
$last_nonblank_type, $last_nonblank_block_type,
21867
$statement_type, $in_attribute_list,
21868
$current_package, $context,
21869
%is_constant, %is_user_function,
21870
%user_function_prototype, %is_block_function,
21871
%is_block_list_function, %saw_function_definition,
21872
$brace_depth, $paren_depth,
21873
$square_bracket_depth, @current_depth,
21874
@total_depth, $total_depth,
21875
@nesting_sequence_number, @current_sequence_number,
21876
@paren_type, @paren_semicolon_count,
21877
@paren_structural_type, @brace_type,
21878
@brace_structural_type, @brace_statement_type,
21879
@brace_context, @brace_package,
21880
@square_bracket_type, @square_bracket_structural_type,
21881
@depth_array, @starting_line_of_current_depth,
21882
@nested_ternary_flag,
21885
# save all lexical variables
21886
my $rstate = save_tokenizer_state();
21887
_decrement_count(); # avoid error check for multiple tokenizers
21889
# make a new tokenizer
21891
my $rpending_logfile_message;
21892
my $source_object =
21893
Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,
21894
$rpending_logfile_message );
21895
my $tokenizer = Perl::Tidy::Tokenizer->new(
21896
source_object => $source_object,
21897
logger_object => $logger_object,
21898
starting_line_number => $input_line_number,
21901
# scan the replacement text
21902
1 while ( $tokenizer->get_line() );
21904
# remove any here doc targets
21906
if ( $tokenizer_self->{_in_here_doc} ) {
21910
$tokenizer_self->{_here_doc_target},
21911
$tokenizer_self->{_here_quote_character}
21913
if ( $tokenizer_self->{_rhere_target_list} ) {
21914
push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };
21915
$tokenizer_self->{_rhere_target_list} = undef;
21917
$tokenizer_self->{_in_here_doc} = undef;
21920
# now its safe to report errors
21921
$tokenizer->report_tokenization_errors();
21923
# restore all tokenizer lexical variables
21924
restore_tokenizer_state($rstate);
21926
# return the here doc targets
21930
sub scan_bare_identifier {
21931
( $i, $tok, $type, $prototype ) =
21932
scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,
21933
$rtoken_map, $max_token_index );
21936
sub scan_identifier {
21937
( $i, $tok, $type, $id_scan_state, $identifier ) =
21938
scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,
21939
$max_token_index, $expecting );
21943
( $i, $tok, $type, $id_scan_state ) =
21944
scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,
21945
$id_scan_state, $max_token_index );
21950
( $i, $type, $number ) =
21951
scan_number_do( $input_line, $i, $rtoken_map, $type,
21952
$max_token_index );
21956
# a sub to warn if token found where term expected
21957
sub error_if_expecting_TERM {
21958
if ( $expecting == TERM ) {
21959
if ( $really_want_term{$last_nonblank_type} ) {
21960
unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,
21961
$rtoken_type, $input_line );
21967
# a sub to warn if token found where operator expected
21968
sub error_if_expecting_OPERATOR {
21969
if ( $expecting == OPERATOR ) {
21970
my $thing = defined $_[0] ? $_[0] : $tok;
21971
unexpected( $thing, "operator", $i_tok, $last_nonblank_i,
21972
$rtoken_map, $rtoken_type, $input_line );
21973
if ( $i_tok == 0 ) {
21974
interrupt_logfile();
21975
warning("Missing ';' above?\n");
21982
# ------------------------------------------------------------
21983
# end scanner interfaces
21984
# ------------------------------------------------------------
21986
my %is_for_foreach;
21987
@_ = qw(for foreach);
21988
@is_for_foreach{@_} = (1) x scalar(@_);
21992
@is_my_our{@_} = (1) x scalar(@_);
21994
# These keywords may introduce blocks after parenthesized expressions,
21996
# keyword ( .... ) { BLOCK }
21997
# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'
21998
my %is_blocktype_with_paren;
21999
@_ = qw(if elsif unless while until for foreach switch case given when);
22000
@is_blocktype_with_paren{@_} = (1) x scalar(@_);
22002
# ------------------------------------------------------------
22003
# begin hash of code for handling most token types
22004
# ------------------------------------------------------------
22005
my $tokenization_code = {
22007
# no special code for these types yet, but syntax checks
22042
error_if_expecting_TERM()
22043
if ( $expecting == TERM );
22046
error_if_expecting_TERM()
22047
if ( $expecting == TERM );
22051
# start looking for a scalar
22052
error_if_expecting_OPERATOR("Scalar")
22053
if ( $expecting == OPERATOR );
22056
if ( $identifier eq '$^W' ) {
22057
$tokenizer_self->{_saw_perl_dash_w} = 1;
22060
# Check for indentifier in indirect object slot
22061
# (vorboard.pl, sort.t). Something like:
22062
# /^(print|printf|sort|exec|system)$/
22064
$is_indirect_object_taker{$last_nonblank_token}
22066
|| ( ( $last_nonblank_token eq '(' )
22067
&& $is_indirect_object_taker{ $paren_type[$paren_depth] } )
22068
|| ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object
22077
$paren_semicolon_count[$paren_depth] = 0;
22079
$container_type = $want_paren;
22083
$container_type = $last_nonblank_token;
22085
# We can check for a syntax error here of unexpected '(',
22086
# but this is going to get messy...
22088
$expecting == OPERATOR
22090
# be sure this is not a method call of the form
22091
# &method(...), $method->(..), &{method}(...),
22092
# $ref[2](list) is ok & short for $ref[2]->(list)
22093
# NOTE: at present, braces in something like &{ xxx }
22094
# are not marked as a block, we might have a method call
22095
&& $last_nonblank_token !~ /^([\]\}\&]|\-\>)/
22100
# ref: camel 3 p 703.
22101
if ( $last_last_nonblank_token eq 'do' ) {
22103
"do SUBROUTINE is deprecated; consider & or -> notation\n"
22108
# if this is an empty list, (), then it is not an
22109
# error; for example, we might have a constant pi and
22110
# invoke it with pi() or just pi;
22111
my ( $next_nonblank_token, $i_next ) =
22112
find_next_nonblank_token( $i, $rtokens,
22113
$max_token_index );
22114
if ( $next_nonblank_token ne ')' ) {
22116
error_if_expecting_OPERATOR('(');
22118
if ( $last_nonblank_type eq 'C' ) {
22120
"$last_nonblank_token has a void prototype\n";
22122
elsif ( $last_nonblank_type eq 'i' ) {
22124
&& $last_nonblank_token =~ /^\$/ )
22127
"Do you mean '$last_nonblank_token->(' ?\n";
22131
interrupt_logfile();
22135
} ## end if ( $next_nonblank_token...
22136
} ## end else [ if ( $last_last_nonblank_token...
22137
} ## end if ( $expecting == OPERATOR...
22139
$paren_type[$paren_depth] = $container_type;
22140
( $type_sequence, $indent_flag ) =
22141
increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22143
# propagate types down through nested parens
22144
# for example: the second paren in 'if ((' would be structural
22145
# since the first is.
22147
if ( $last_nonblank_token eq '(' ) {
22148
$type = $last_nonblank_type;
22151
# We exclude parens as structural after a ',' because it
22152
# causes subtle problems with continuation indentation for
22153
# something like this, where the first 'or' will not get
22158
# ( not defined $check )
22160
# or $check eq "new"
22161
# or $check eq "old",
22164
# Likewise, we exclude parens where a statement can start
22165
# because of problems with continuation indentation, like
22168
# ($firstline =~ /^#\!.*perl/)
22169
# and (print $File::Find::name, "\n")
22172
# (ref($usage_fref) =~ /CODE/)
22174
# : (&blast_usage, &blast_params, &blast_general_params);
22180
if ( $last_nonblank_type eq ')' ) {
22182
"Syntax error? found token '$last_nonblank_type' then '('\n"
22185
$paren_structural_type[$paren_depth] = $type;
22189
( $type_sequence, $indent_flag ) =
22190
decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );
22192
if ( $paren_structural_type[$paren_depth] eq '{' ) {
22196
$container_type = $paren_type[$paren_depth];
22198
# /^(for|foreach)$/
22199
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {
22200
my $num_sc = $paren_semicolon_count[$paren_depth];
22201
if ( $num_sc > 0 && $num_sc != 2 ) {
22202
warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");
22206
if ( $paren_depth > 0 ) { $paren_depth-- }
22209
if ( $last_nonblank_type eq ',' ) {
22210
complain("Repeated ','s \n");
22213
# patch for operator_expected: note if we are in the list (use.t)
22214
if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22215
## FIXME: need to move this elsewhere, perhaps check after a '('
22216
## elsif ($last_nonblank_token eq '(') {
22217
## warning("Leading ','s illegal in some versions of perl\n");
22221
$context = UNKNOWN_CONTEXT;
22222
$statement_type = '';
22224
# /^(for|foreach)$/
22225
if ( $is_for_foreach{ $paren_type[$paren_depth] } )
22226
{ # mark ; in for loop
22228
# Be careful: we do not want a semicolon such as the
22229
# following to be included:
22231
# for (sort {strcoll($a,$b);} keys %investments) {
22233
if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]
22234
&& $square_bracket_depth ==
22235
$depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )
22239
$paren_semicolon_count[$paren_depth]++;
22245
error_if_expecting_OPERATOR("String")
22246
if ( $expecting == OPERATOR );
22249
$allowed_quote_modifiers = "";
22252
error_if_expecting_OPERATOR("String")
22253
if ( $expecting == OPERATOR );
22256
$allowed_quote_modifiers = "";
22259
error_if_expecting_OPERATOR("String")
22260
if ( $expecting == OPERATOR );
22263
$allowed_quote_modifiers = "";
22268
if ( $expecting == UNKNOWN ) { # indeterminte, must guess..
22270
( $is_pattern, $msg ) =
22271
guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
22272
$max_token_index );
22275
write_diagnostics("DIVIDE:$msg\n");
22276
write_logfile_entry($msg);
22279
else { $is_pattern = ( $expecting == TERM ) }
22284
$allowed_quote_modifiers = '[cgimosxp]';
22286
else { # not a pattern; check for a /= token
22288
if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=
22294
#DEBUG - collecting info on what tokens follow a divide
22295
# for development of guessing algorithm
22296
#if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {
22297
# #write_diagnostics( "DIVIDE? $input_line\n" );
22303
# if we just saw a ')', we will label this block with
22304
# its type. We need to do this to allow sub
22305
# code_block_type to determine if this brace starts a
22306
# code block or anonymous hash. (The type of a paren
22307
# pair is the preceding token, such as 'if', 'else',
22309
$container_type = "";
22311
# ATTRS: for a '{' following an attribute list, reset
22312
# things to look like we just saw the sub name
22313
if ( $statement_type =~ /^sub/ ) {
22314
$last_nonblank_token = $statement_type;
22315
$last_nonblank_type = 'i';
22316
$statement_type = "";
22319
# patch for SWITCH/CASE: hide these keywords from an immediately
22320
# following opening brace
22321
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
22322
&& $statement_type eq $last_nonblank_token )
22324
$last_nonblank_token = ";";
22327
elsif ( $last_nonblank_token eq ')' ) {
22328
$last_nonblank_token = $paren_type[ $paren_depth + 1 ];
22330
# defensive move in case of a nesting error (pbug.t)
22331
# in which this ')' had no previous '('
22332
# this nesting error will have been caught
22333
if ( !defined($last_nonblank_token) ) {
22334
$last_nonblank_token = 'if';
22337
# check for syntax error here;
22338
unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {
22339
my $list = join( ' ', sort keys %is_blocktype_with_paren );
22341
"syntax error at ') {', didn't see one of: $list\n");
22345
# patch for paren-less for/foreach glitch, part 2.
22346
# see note below under 'qw'
22347
elsif ($last_nonblank_token eq 'qw'
22348
&& $is_for_foreach{$want_paren} )
22350
$last_nonblank_token = $want_paren;
22351
if ( $last_last_nonblank_token eq $want_paren ) {
22353
"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"
22360
# now identify which of the three possible types of
22361
# curly braces we have: hash index container, anonymous
22362
# hash reference, or code block.
22364
# non-structural (hash index) curly brace pair
22365
# get marked 'L' and 'R'
22366
if ( is_non_structural_brace() ) {
22369
# patch for SWITCH/CASE:
22370
# allow paren-less identifier after 'when'
22371
# if the brace is preceded by a space
22372
if ( $statement_type eq 'when'
22373
&& $last_nonblank_type eq 'i'
22374
&& $last_last_nonblank_type eq 'k'
22375
&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )
22378
$block_type = $statement_type;
22382
# code and anonymous hash have the same type, '{', but are
22383
# distinguished by 'block_type',
22384
# which will be blank for an anonymous hash
22387
$block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,
22388
$max_token_index );
22390
# patch to promote bareword type to function taking block
22392
&& $last_nonblank_type eq 'w'
22393
&& $last_nonblank_i >= 0 )
22395
if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {
22396
$routput_token_type->[$last_nonblank_i] = 'G';
22400
# patch for SWITCH/CASE: if we find a stray opening block brace
22401
# where we might accept a 'case' or 'when' block, then take it
22402
if ( $statement_type eq 'case'
22403
|| $statement_type eq 'when' )
22405
if ( !$block_type || $block_type eq '}' ) {
22406
$block_type = $statement_type;
22410
$brace_type[ ++$brace_depth ] = $block_type;
22411
$brace_package[$brace_depth] = $current_package;
22412
( $type_sequence, $indent_flag ) =
22413
increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22414
$brace_structural_type[$brace_depth] = $type;
22415
$brace_context[$brace_depth] = $context;
22416
$brace_statement_type[$brace_depth] = $statement_type;
22419
$block_type = $brace_type[$brace_depth];
22420
if ($block_type) { $statement_type = '' }
22421
if ( defined( $brace_package[$brace_depth] ) ) {
22422
$current_package = $brace_package[$brace_depth];
22425
# can happen on brace error (caught elsewhere)
22428
( $type_sequence, $indent_flag ) =
22429
decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );
22431
if ( $brace_structural_type[$brace_depth] eq 'L' ) {
22435
# propagate type information for 'do' and 'eval' blocks.
22436
# This is necessary to enable us to know if an operator
22437
# or term is expected next
22438
if ( $is_block_operator{ $brace_type[$brace_depth] } ) {
22439
$tok = $brace_type[$brace_depth];
22442
$context = $brace_context[$brace_depth];
22443
$statement_type = $brace_statement_type[$brace_depth];
22444
if ( $brace_depth > 0 ) { $brace_depth--; }
22446
'&' => sub { # maybe sub call? start looking
22448
# We have to check for sub call unless we are sure we
22449
# are expecting an operator. This example from s2p
22450
# got mistaken as a q operator in an early version:
22451
# print BODY &q(<<'EOT');
22452
if ( $expecting != OPERATOR ) {
22458
'<' => sub { # angle operator or less than?
22460
if ( $expecting != OPERATOR ) {
22462
find_angle_operator_termination( $input_line, $i, $rtoken_map,
22463
$expecting, $max_token_index );
22469
'?' => sub { # ?: conditional or starting pattern?
22473
if ( $expecting == UNKNOWN ) {
22476
( $is_pattern, $msg ) =
22477
guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,
22478
$max_token_index );
22480
if ($msg) { write_logfile_entry($msg) }
22482
else { $is_pattern = ( $expecting == TERM ) }
22487
$allowed_quote_modifiers = '[cgimosxp]';
22490
( $type_sequence, $indent_flag ) =
22491
increase_nesting_depth( QUESTION_COLON,
22492
$$rtoken_map[$i_tok] );
22495
'*' => sub { # typeglob, or multiply?
22497
if ( $expecting == TERM ) {
22502
if ( $$rtokens[ $i + 1 ] eq '=' ) {
22507
elsif ( $$rtokens[ $i + 1 ] eq '*' ) {
22511
if ( $$rtokens[ $i + 1 ] eq '=' ) {
22519
'.' => sub { # what kind of . ?
22521
if ( $expecting != OPERATOR ) {
22523
if ( $type eq '.' ) {
22524
error_if_expecting_TERM()
22525
if ( $expecting == TERM );
22533
# if this is the first nonblank character, call it a label
22534
# since perl seems to just swallow it
22535
if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {
22539
# ATTRS: check for a ':' which introduces an attribute list
22540
# (this might eventually get its own token type)
22541
elsif ( $statement_type =~ /^sub/ ) {
22543
$in_attribute_list = 1;
22546
# check for scalar attribute, such as
22547
# my $foo : shared = 1;
22548
elsif ($is_my_our{$statement_type}
22549
&& $current_depth[QUESTION_COLON] == 0 )
22552
$in_attribute_list = 1;
22555
# otherwise, it should be part of a ?/: operator
22557
( $type_sequence, $indent_flag ) =
22558
decrease_nesting_depth( QUESTION_COLON,
22559
$$rtoken_map[$i_tok] );
22560
if ( $last_nonblank_token eq '?' ) {
22561
warning("Syntax error near ? :\n");
22565
'+' => sub { # what kind of plus?
22567
if ( $expecting == TERM ) {
22568
my $number = scan_number();
22570
# unary plus is safest assumption if not a number
22571
if ( !defined($number) ) { $type = 'p'; }
22573
elsif ( $expecting == OPERATOR ) {
22576
if ( $next_type eq 'w' ) { $type = 'p' }
22581
error_if_expecting_OPERATOR("Array")
22582
if ( $expecting == OPERATOR );
22585
'%' => sub { # hash or modulo?
22587
# first guess is hash if no following blank
22588
if ( $expecting == UNKNOWN ) {
22589
if ( $next_type ne 'b' ) { $expecting = TERM }
22591
if ( $expecting == TERM ) {
22596
$square_bracket_type[ ++$square_bracket_depth ] =
22597
$last_nonblank_token;
22598
( $type_sequence, $indent_flag ) =
22599
increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
22601
# It may seem odd, but structural square brackets have
22602
# type '{' and '}'. This simplifies the indentation logic.
22603
if ( !is_non_structural_brace() ) {
22606
$square_bracket_structural_type[$square_bracket_depth] = $type;
22609
( $type_sequence, $indent_flag ) =
22610
decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );
22612
if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )
22616
if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }
22618
'-' => sub { # what kind of minus?
22620
if ( ( $expecting != OPERATOR )
22621
&& $is_file_test_operator{$next_tok} )
22623
my ( $next_nonblank_token, $i_next ) =
22624
find_next_nonblank_token( $i + 1, $rtokens,
22625
$max_token_index );
22627
# check for a quoted word like "-w=>xx";
22628
# it is sufficient to just check for a following '='
22629
if ( $next_nonblank_token eq '=' ) {
22638
elsif ( $expecting == TERM ) {
22639
my $number = scan_number();
22641
# maybe part of bareword token? unary is safest
22642
if ( !defined($number) ) { $type = 'm'; }
22645
elsif ( $expecting == OPERATOR ) {
22649
if ( $next_type eq 'w' ) {
22657
# check for special variables like ${^WARNING_BITS}
22658
if ( $expecting == TERM ) {
22660
# FIXME: this should work but will not catch errors
22661
# because we also have to be sure that previous token is
22662
# a type character ($,@,%).
22663
if ( $last_nonblank_token eq '{'
22664
&& ( $next_tok =~ /^[A-Za-z_]/ ) )
22667
if ( $next_tok eq 'W' ) {
22668
$tokenizer_self->{_saw_perl_dash_w} = 1;
22670
$tok = $tok . $next_tok;
22676
unless ( error_if_expecting_TERM() ) {
22678
# Something like this is valid but strange:
22680
complain("The '^' seems unusual here\n");
22686
'::' => sub { # probably a sub call
22687
scan_bare_identifier();
22689
'<<' => sub { # maybe a here-doc?
22691
unless ( $i < $max_token_index )
22692
; # here-doc not possible if end of line
22694
if ( $expecting != OPERATOR ) {
22695
my ( $found_target, $here_doc_target, $here_quote_character,
22698
$found_target, $here_doc_target, $here_quote_character, $i,
22701
= find_here_doc( $expecting, $i, $rtokens, $rtoken_map,
22702
$max_token_index );
22704
if ($found_target) {
22705
push @{$rhere_target_list},
22706
[ $here_doc_target, $here_quote_character ];
22708
if ( length($here_doc_target) > 80 ) {
22709
my $truncated = substr( $here_doc_target, 0, 80 );
22710
complain("Long here-target: '$truncated' ...\n");
22712
elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {
22714
"Unconventional here-target: '$here_doc_target'\n"
22718
elsif ( $expecting == TERM ) {
22719
unless ($saw_error) {
22721
# shouldn't happen..
22722
warning("Program bug; didn't find here doc target\n");
22723
report_definite_bug();
22732
# if -> points to a bare word, we must scan for an identifier,
22733
# otherwise something like ->y would look like the y operator
22737
# type = 'pp' for pre-increment, '++' for post-increment
22739
if ( $expecting == TERM ) { $type = 'pp' }
22740
elsif ( $expecting == UNKNOWN ) {
22741
my ( $next_nonblank_token, $i_next ) =
22742
find_next_nonblank_token( $i, $rtokens, $max_token_index );
22743
if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
22748
if ( $last_nonblank_type eq $tok ) {
22749
complain("Repeated '=>'s \n");
22752
# patch for operator_expected: note if we are in the list (use.t)
22753
# TODO: make version numbers a new token type
22754
if ( $statement_type eq 'use' ) { $statement_type = '_use' }
22757
# type = 'mm' for pre-decrement, '--' for post-decrement
22760
if ( $expecting == TERM ) { $type = 'mm' }
22761
elsif ( $expecting == UNKNOWN ) {
22762
my ( $next_nonblank_token, $i_next ) =
22763
find_next_nonblank_token( $i, $rtokens, $max_token_index );
22764
if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
22769
error_if_expecting_TERM()
22770
if ( $expecting == TERM );
22774
error_if_expecting_TERM()
22775
if ( $expecting == TERM );
22779
error_if_expecting_TERM()
22780
if ( $expecting == TERM );
22784
# ------------------------------------------------------------
22785
# end hash of code for handling individual token types
22786
# ------------------------------------------------------------
22788
my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );
22790
# These block types terminate statements and do not need a trailing
22792
# patched for SWITCH/CASE:
22793
my %is_zero_continuation_block_type;
22794
@_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;
22795
if elsif else unless while until for foreach switch case given when);
22796
@is_zero_continuation_block_type{@_} = (1) x scalar(@_);
22798
my %is_not_zero_continuation_block_type;
22799
@_ = qw(sort grep map do eval);
22800
@is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);
22802
my %is_logical_container;
22803
@_ = qw(if elsif unless while and or err not && ! || for foreach);
22804
@is_logical_container{@_} = (1) x scalar(@_);
22806
my %is_binary_type;
22808
@is_binary_type{@_} = (1) x scalar(@_);
22810
my %is_binary_keyword;
22811
@_ = qw(and or err eq ne cmp);
22812
@is_binary_keyword{@_} = (1) x scalar(@_);
22814
# 'L' is token for opening { at hash key
22815
my %is_opening_type;
22816
@_ = qw" L { ( [ ";
22817
@is_opening_type{@_} = (1) x scalar(@_);
22819
# 'R' is token for closing } at hash key
22820
my %is_closing_type;
22821
@_ = qw" R } ) ] ";
22822
@is_closing_type{@_} = (1) x scalar(@_);
22824
my %is_redo_last_next_goto;
22825
@_ = qw(redo last next goto);
22826
@is_redo_last_next_goto{@_} = (1) x scalar(@_);
22828
my %is_use_require;
22829
@_ = qw(use require);
22830
@is_use_require{@_} = (1) x scalar(@_);
22832
my %is_sub_package;
22833
@_ = qw(sub package);
22834
@is_sub_package{@_} = (1) x scalar(@_);
22836
# This hash holds the hash key in $tokenizer_self for these keywords:
22837
my %is_format_END_DATA = (
22838
'format' => '_in_format',
22839
'__END__' => '_in_end',
22840
'__DATA__' => '_in_data',
22843
# ref: camel 3 p 147,
22844
# but perl may accept undocumented flags
22845
# perl 5.10 adds 'p' (preserve)
22846
my %quote_modifiers = (
22847
's' => '[cegimosxp]',
22850
'm' => '[cgimosxp]',
22851
'qr' => '[imosxp]',
22858
# table showing how many quoted things to look for after quote operator..
22859
# s, y, tr have 2 (pattern and replacement)
22860
# others have 1 (pattern only)
22861
my %quote_items = (
22873
sub tokenize_this_line {
22875
# This routine breaks a line of perl code into tokens which are of use in
22876
# indentation and reformatting. One of my goals has been to define tokens
22877
# such that a newline may be inserted between any pair of tokens without
22878
# changing or invalidating the program. This version comes close to this,
22879
# although there are necessarily a few exceptions which must be caught by
22880
# the formatter. Many of these involve the treatment of bare words.
22882
# The tokens and their types are returned in arrays. See previous
22883
# routine for their names.
22885
# See also the array "valid_token_types" in the BEGIN section for an
22888
# To simplify things, token types are either a single character, or they
22889
# are identical to the tokens themselves.
22891
# As a debugging aid, the -D flag creates a file containing a side-by-side
22892
# comparison of the input string and its tokenization for each line of a file.
22893
# This is an invaluable debugging aid.
22895
# In addition to tokens, and some associated quantities, the tokenizer
22896
# also returns flags indication any special line types. These include
22897
# quotes, here_docs, formats.
22899
# -----------------------------------------------------------------------
22901
# How to add NEW_TOKENS:
22903
# New token types will undoubtedly be needed in the future both to keep up
22904
# with changes in perl and to help adapt the tokenizer to other applications.
22906
# Here are some notes on the minimal steps. I wrote these notes while
22907
# adding the 'v' token type for v-strings, which are things like version
22908
# numbers 5.6.0, and ip addresses, and will use that as an example. ( You
22909
# can use your editor to search for the string "NEW_TOKENS" to find the
22910
# appropriate sections to change):
22912
# *. Try to talk somebody else into doing it! If not, ..
22914
# *. Make a backup of your current version in case things don't work out!
22916
# *. Think of a new, unused character for the token type, and add to
22917
# the array @valid_token_types in the BEGIN section of this package.
22918
# For example, I used 'v' for v-strings.
22920
# *. Implement coding to recognize the $type of the token in this routine.
22921
# This is the hardest part, and is best done by immitating or modifying
22922
# some of the existing coding. For example, to recognize v-strings, I
22923
# patched 'sub scan_bare_identifier' to recognize v-strings beginning with
22924
# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.
22926
# *. Update sub operator_expected. This update is critically important but
22927
# the coding is trivial. Look at the comments in that routine for help.
22928
# For v-strings, which should behave like numbers, I just added 'v' to the
22929
# regex used to handle numbers and strings (types 'n' and 'Q').
22931
# *. Implement a 'bond strength' rule in sub set_bond_strengths in
22932
# Perl::Tidy::Formatter for breaking lines around this token type. You can
22933
# skip this step and take the default at first, then adjust later to get
22934
# desired results. For adding type 'v', I looked at sub bond_strength and
22935
# saw that number type 'n' was using default strengths, so I didn't do
22936
# anything. I may tune it up someday if I don't like the way line
22937
# breaks with v-strings look.
22939
# *. Implement a 'whitespace' rule in sub set_white_space_flag in
22940
# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine
22941
# and saw that type 'n' used spaces on both sides, so I just added 'v'
22942
# to the array @spaces_both_sides.
22944
# *. Update HtmlWriter package so that users can colorize the token as
22945
# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in
22946
# that package. For v-strings, I initially chose to use a default color
22947
# equal to the default for numbers, but it might be nice to change that
22950
# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.
22952
# *. Run lots and lots of debug tests. Start with special files designed
22953
# to test the new token type. Run with the -D flag to create a .DEBUG
22954
# file which shows the tokenization. When these work ok, test as many old
22955
# scripts as possible. Start with all of the '.t' files in the 'test'
22956
# directory of the distribution file. Compare .tdy output with previous
22957
# version and updated version to see the differences. Then include as
22958
# many more files as possible. My own technique has been to collect a huge
22959
# number of perl scripts (thousands!) into one directory and run perltidy
22960
# *, then run diff between the output of the previous version and the
22963
# *. For another example, search for the smartmatch operator '~~'
22964
# with your editor to see where updates were made for it.
22966
# -----------------------------------------------------------------------
22968
my $line_of_tokens = shift;
22969
my ($untrimmed_input_line) = $line_of_tokens->{_line_text};
22971
# patch while coding change is underway
22972
# make callers private data to allow access
22973
# $tokenizer_self = $caller_tokenizer_self;
22975
# extract line number for use in error messages
22976
$input_line_number = $line_of_tokens->{_line_number};
22978
# reinitialize for multi-line quote
22979
$line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';
22981
# check for pod documentation
22982
if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {
22984
# must not be in multi-line quote
22985
# and must not be in an eqn
22986
if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )
22988
$tokenizer_self->{_in_pod} = 1;
22993
$input_line = $untrimmed_input_line;
22997
# trim start of this line unless we are continuing a quoted line
22998
# do not trim end because we might end in a quote (test: deken4.pl)
22999
# Perl::Tidy::Formatter will delete needless trailing blanks
23000
unless ( $in_quote && ( $quote_type eq 'Q' ) ) {
23001
$input_line =~ s/^\s*//; # trim left end
23004
# update the copy of the line for use in error messages
23005
# This must be exactly what we give the pre_tokenizer
23006
$tokenizer_self->{_line_text} = $input_line;
23008
# re-initialize for the main loop
23009
$routput_token_list = []; # stack of output token indexes
23010
$routput_token_type = []; # token types
23011
$routput_block_type = []; # types of code block
23012
$routput_container_type = []; # paren types, such as if, elsif, ..
23013
$routput_type_sequence = []; # nesting sequential number
23015
$rhere_target_list = [];
23017
$tok = $last_nonblank_token;
23018
$type = $last_nonblank_type;
23019
$prototype = $last_nonblank_prototype;
23020
$last_nonblank_i = -1;
23021
$block_type = $last_nonblank_block_type;
23022
$container_type = $last_nonblank_container_type;
23023
$type_sequence = $last_nonblank_type_sequence;
23027
# tokenization is done in two stages..
23028
# stage 1 is a very simple pre-tokenization
23029
my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
23031
# a little optimization for a full-line comment
23032
if ( !$in_quote && ( $input_line =~ /^#/ ) ) {
23033
$max_tokens_wanted = 1 # no use tokenizing a comment
23036
# start by breaking the line into pre-tokens
23037
( $rtokens, $rtoken_map, $rtoken_type ) =
23038
pre_tokenize( $input_line, $max_tokens_wanted );
23040
$max_token_index = scalar(@$rtokens) - 1;
23041
push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic
23042
push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced
23043
push( @$rtoken_type, 'b', 'b', 'b' );
23045
# initialize for main loop
23046
for $i ( 0 .. $max_token_index + 3 ) {
23047
$routput_token_type->[$i] = "";
23048
$routput_block_type->[$i] = "";
23049
$routput_container_type->[$i] = "";
23050
$routput_type_sequence->[$i] = "";
23051
$routput_indent_flag->[$i] = 0;
23056
# ------------------------------------------------------------
23057
# begin main tokenization loop
23058
# ------------------------------------------------------------
23060
# we are looking at each pre-token of one line and combining them
23062
while ( ++$i <= $max_token_index ) {
23064
if ($in_quote) { # continue looking for end of a quote
23065
$type = $quote_type;
23067
unless ( @{$routput_token_list} )
23068
{ # initialize if continuation line
23069
push( @{$routput_token_list}, $i );
23070
$routput_token_type->[$i] = $type;
23073
$tok = $quote_character unless ( $quote_character =~ /^\s*$/ );
23075
# scan for the end of the quote or pattern
23077
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
23078
$quoted_string_1, $quoted_string_2
23081
$i, $in_quote, $quote_character,
23082
$quote_pos, $quote_depth, $quoted_string_1,
23083
$quoted_string_2, $rtokens, $rtoken_map,
23087
# all done if we didn't find it
23088
last if ($in_quote);
23090
# save pattern and replacement text for rescanning
23091
my $qs1 = $quoted_string_1;
23092
my $qs2 = $quoted_string_2;
23094
# re-initialize for next search
23095
$quote_character = '';
23098
$quoted_string_1 = "";
23099
$quoted_string_2 = "";
23100
last if ( ++$i > $max_token_index );
23102
# look for any modifiers
23103
if ($allowed_quote_modifiers) {
23105
# check for exact quote modifiers
23106
if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {
23107
my $str = $$rtokens[$i];
23108
my $saw_modifier_e;
23109
while ( $str =~ /\G$allowed_quote_modifiers/gc ) {
23110
my $pos = pos($str);
23111
my $char = substr( $str, $pos - 1, 1 );
23112
$saw_modifier_e ||= ( $char eq 'e' );
23115
# For an 'e' quote modifier we must scan the replacement
23116
# text for here-doc targets.
23117
if ($saw_modifier_e) {
23119
my $rht = scan_replacement_text($qs1);
23121
# Change type from 'Q' to 'h' for quotes with
23122
# here-doc targets so that the formatter (see sub
23123
# print_line_of_tokens) will not make any line
23124
# breaks after this point.
23126
push @{$rhere_target_list}, @{$rht};
23128
if ( $i_tok < 0 ) {
23129
my $ilast = $routput_token_list->[-1];
23130
$routput_token_type->[$ilast] = $type;
23135
if ( defined( pos($str) ) ) {
23138
if ( pos($str) == length($str) ) {
23139
last if ( ++$i > $max_token_index );
23142
# Looks like a joined quote modifier
23143
# and keyword, maybe something like
23144
# s/xxx/yyy/gefor @k=...
23145
# Example is "galgen.pl". Would have to split
23146
# the word and insert a new token in the
23147
# pre-token list. This is so rare that I haven't
23148
# done it. Will just issue a warning citation.
23150
# This error might also be triggered if my quote
23151
# modifier characters are incomplete
23155
Partial match to quote modifier $allowed_quote_modifiers at word: '$str'
23156
Please put a space between quote modifiers and trailing keywords.
23159
# print "token $$rtokens[$i]\n";
23160
# my $num = length($str) - pos($str);
23161
# $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);
23162
# print "continuing with new token $$rtokens[$i]\n";
23164
# skipping past this token does least damage
23165
last if ( ++$i > $max_token_index );
23170
# example file: rokicki4.pl
23171
# This error might also be triggered if my quote
23172
# modifier characters are incomplete
23173
write_logfile_entry(
23174
"Note: found word $str at quote modifier location\n"
23180
$allowed_quote_modifiers = "";
23184
unless ( $tok =~ /^\s*$/ ) {
23186
# try to catch some common errors
23187
if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {
23189
if ( $last_nonblank_token eq 'eq' ) {
23190
complain("Should 'eq' be '==' here ?\n");
23192
elsif ( $last_nonblank_token eq 'ne' ) {
23193
complain("Should 'ne' be '!=' here ?\n");
23197
$last_last_nonblank_token = $last_nonblank_token;
23198
$last_last_nonblank_type = $last_nonblank_type;
23199
$last_last_nonblank_block_type = $last_nonblank_block_type;
23200
$last_last_nonblank_container_type =
23201
$last_nonblank_container_type;
23202
$last_last_nonblank_type_sequence =
23203
$last_nonblank_type_sequence;
23204
$last_nonblank_token = $tok;
23205
$last_nonblank_type = $type;
23206
$last_nonblank_prototype = $prototype;
23207
$last_nonblank_block_type = $block_type;
23208
$last_nonblank_container_type = $container_type;
23209
$last_nonblank_type_sequence = $type_sequence;
23210
$last_nonblank_i = $i_tok;
23213
# store previous token type
23214
if ( $i_tok >= 0 ) {
23215
$routput_token_type->[$i_tok] = $type;
23216
$routput_block_type->[$i_tok] = $block_type;
23217
$routput_container_type->[$i_tok] = $container_type;
23218
$routput_type_sequence->[$i_tok] = $type_sequence;
23219
$routput_indent_flag->[$i_tok] = $indent_flag;
23221
my $pre_tok = $$rtokens[$i]; # get the next pre-token
23222
my $pre_type = $$rtoken_type[$i]; # and type
23224
$type = $pre_type; # to be modified as necessary
23225
$block_type = ""; # blank for all tokens except code block braces
23226
$container_type = ""; # blank for all tokens except some parens
23227
$type_sequence = ""; # blank for all tokens except ?/:
23229
$prototype = ""; # blank for all tokens except user defined subs
23232
# this pre-token will start an output token
23233
push( @{$routput_token_list}, $i_tok );
23235
# continue gathering identifier if necessary
23236
# but do not start on blanks and comments
23237
if ( $id_scan_state && $pre_type !~ /[b#]/ ) {
23239
if ( $id_scan_state =~ /^(sub|package)/ ) {
23246
last if ($id_scan_state);
23247
next if ( ( $i > 0 ) || $type );
23249
# didn't find any token; start over
23254
# handle whitespace tokens..
23255
next if ( $type eq 'b' );
23256
my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';
23257
my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';
23259
# Build larger tokens where possible, since we are not in a quote.
23261
# First try to assemble digraphs. The following tokens are
23262
# excluded and handled specially:
23263
# '/=' is excluded because the / might start a pattern.
23264
# 'x=' is excluded since it might be $x=, with $ on previous line
23265
# '**' and *= might be typeglobs of punctuation variables
23266
# I have allowed tokens starting with <, such as <=,
23267
# because I don't think these could be valid angle operators.
23268
# test file: storrs4.pl
23269
my $test_tok = $tok . $$rtokens[ $i + 1 ];
23270
my $combine_ok = $is_digraph{$test_tok};
23272
# check for special cases which cannot be combined
23275
# '//' must be defined_or operator if an operator is expected.
23276
# TODO: Code for other ambiguous digraphs (/=, x=, **, *=)
23277
# could be migrated here for clarity
23278
if ( $test_tok eq '//' ) {
23279
my $next_type = $$rtokens[ $i + 1 ];
23281
operator_expected( $prev_type, $tok, $next_type );
23282
$combine_ok = 0 unless ( $expecting == OPERATOR );
23288
&& ( $test_tok ne '/=' ) # might be pattern
23289
&& ( $test_tok ne 'x=' ) # might be $x
23290
&& ( $test_tok ne '**' ) # typeglob?
23291
&& ( $test_tok ne '*=' ) # typeglob?
23297
# Now try to assemble trigraphs. Note that all possible
23298
# perl trigraphs can be constructed by appending a character
23300
$test_tok = $tok . $$rtokens[ $i + 1 ];
23302
if ( $is_trigraph{$test_tok} ) {
23309
$next_tok = $$rtokens[ $i + 1 ];
23310
$next_type = $$rtoken_type[ $i + 1 ];
23312
TOKENIZER_DEBUG_FLAG_TOKENIZE && do {
23315
$last_nonblank_token, $tok,
23316
$next_tok, $brace_depth,
23317
$brace_type[$brace_depth], $paren_depth,
23318
$paren_type[$paren_depth]
23320
print "TOKENIZE:(@debug_list)\n";
23323
# turn off attribute list on first non-blank, non-bareword
23324
if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }
23326
###############################################################
23327
# We have the next token, $tok.
23328
# Now we have to examine this token and decide what it is
23329
# and define its $type
23331
# section 1: bare words
23332
###############################################################
23334
if ( $pre_type eq 'w' ) {
23335
$expecting = operator_expected( $prev_type, $tok, $next_type );
23336
my ( $next_nonblank_token, $i_next ) =
23337
find_next_nonblank_token( $i, $rtokens, $max_token_index );
23339
# ATTRS: handle sub and variable attributes
23340
if ($in_attribute_list) {
23342
# treat bare word followed by open paren like qw(
23343
if ( $next_nonblank_token eq '(' ) {
23344
$in_quote = $quote_items{'q'};
23345
$allowed_quote_modifiers = $quote_modifiers{'q'};
23351
# handle bareword not followed by open paren
23358
# quote a word followed by => operator
23359
if ( $next_nonblank_token eq '=' ) {
23361
if ( $$rtokens[ $i_next + 1 ] eq '>' ) {
23362
if ( $is_constant{$current_package}{$tok} ) {
23365
elsif ( $is_user_function{$current_package}{$tok} ) {
23368
$user_function_prototype{$current_package}{$tok};
23370
elsif ( $tok =~ /^v\d+$/ ) {
23372
report_v_string($tok);
23374
else { $type = 'w' }
23380
# quote a bare word within braces..like xxx->{s}; note that we
23381
# must be sure this is not a structural brace, to avoid
23382
# mistaking {s} in the following for a quoted bare word:
23383
# for(@[){s}bla}BLA}
23384
# Also treat q in something like var{-q} as a bare word, not qoute operator
23385
##if ( ( $last_nonblank_type eq 'L' )
23386
## && ( $next_nonblank_token eq '}' ) )
23388
$next_nonblank_token eq '}'
23390
$last_nonblank_type eq 'L'
23391
|| ( $last_nonblank_type eq 'm'
23392
&& $last_last_nonblank_type eq 'L' )
23400
# a bare word immediately followed by :: is not a keyword;
23401
# use $tok_kw when testing for keywords to avoid a mistake
23403
if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )
23408
# handle operator x (now we know it isn't $x=)
23409
if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {
23410
if ( $tok eq 'x' ) {
23412
if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=
23422
# FIXME: Patch: mark something like x4 as an integer for now
23423
# It gets fixed downstream. This is easier than
23424
# splitting the pretoken.
23430
elsif ( ( $tok eq 'strict' )
23431
and ( $last_nonblank_token eq 'use' ) )
23433
$tokenizer_self->{_saw_use_strict} = 1;
23434
scan_bare_identifier();
23437
elsif ( ( $tok eq 'warnings' )
23438
and ( $last_nonblank_token eq 'use' ) )
23440
$tokenizer_self->{_saw_perl_dash_w} = 1;
23442
# scan as identifier, so that we pick up something like:
23443
# use warnings::register
23444
scan_bare_identifier();
23448
$tok eq 'AutoLoader'
23449
&& $tokenizer_self->{_look_for_autoloader}
23451
$last_nonblank_token eq 'use'
23453
# these regexes are from AutoSplit.pm, which we want
23455
|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/
23456
|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/
23460
write_logfile_entry("AutoLoader seen, -nlal deactivates\n");
23461
$tokenizer_self->{_saw_autoloader} = 1;
23462
$tokenizer_self->{_look_for_autoloader} = 0;
23463
scan_bare_identifier();
23467
$tok eq 'SelfLoader'
23468
&& $tokenizer_self->{_look_for_selfloader}
23469
&& ( $last_nonblank_token eq 'use'
23470
|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/
23471
|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )
23474
write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");
23475
$tokenizer_self->{_saw_selfloader} = 1;
23476
$tokenizer_self->{_look_for_selfloader} = 0;
23477
scan_bare_identifier();
23480
elsif ( ( $tok eq 'constant' )
23481
and ( $last_nonblank_token eq 'use' ) )
23483
scan_bare_identifier();
23484
my ( $next_nonblank_token, $i_next ) =
23485
find_next_nonblank_token( $i, $rtokens,
23486
$max_token_index );
23488
if ($next_nonblank_token) {
23490
if ( $is_keyword{$next_nonblank_token} ) {
23492
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
23496
# FIXME: could check for error in which next token is
23497
# not a word (number, punctuation, ..)
23499
$is_constant{$current_package}
23500
{$next_nonblank_token} = 1;
23505
# various quote operators
23506
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
23507
if ( $expecting == OPERATOR ) {
23509
# patch for paren-less for/foreach glitch, part 1
23510
# perl will accept this construct as valid:
23512
# foreach my $key qw\Uno Due Tres Quadro\ {
23513
# print "Set $key\n";
23515
unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )
23517
error_if_expecting_OPERATOR();
23520
$in_quote = $quote_items{$tok};
23521
$allowed_quote_modifiers = $quote_modifiers{$tok};
23523
# All quote types are 'Q' except possibly qw quotes.
23524
# qw quotes are special in that they may generally be trimmed
23525
# of leading and trailing whitespace. So they are given a
23526
# separate type, 'q', unless requested otherwise.
23528
( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )
23531
$quote_type = $type;
23534
# check for a statement label
23536
( $next_nonblank_token eq ':' )
23537
&& ( $$rtokens[ $i_next + 1 ] ne ':' )
23538
&& ( $i_next <= $max_token_index ) # colon on same line
23542
if ( $tok !~ /[A-Z]/ ) {
23543
push @{ $tokenizer_self->{_rlower_case_labels_at} },
23544
$input_line_number;
23552
# 'sub' || 'package'
23553
elsif ( $is_sub_package{$tok_kw} ) {
23554
error_if_expecting_OPERATOR()
23555
if ( $expecting == OPERATOR );
23559
# Note on token types for format, __DATA__, __END__:
23560
# It simplifies things to give these type ';', so that when we
23561
# start rescanning we will be expecting a token of type TERM.
23562
# We will switch to type 'k' before outputting the tokens.
23563
elsif ( $is_format_END_DATA{$tok_kw} ) {
23564
$type = ';'; # make tokenizer look for TERM next
23565
$tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;
23569
elsif ( $is_keyword{$tok_kw} ) {
23572
# Since for and foreach may not be followed immediately
23573
# by an opening paren, we have to remember which keyword
23574
# is associated with the next '('
23575
if ( $is_for_foreach{$tok} ) {
23576
if ( new_statement_ok() ) {
23577
$want_paren = $tok;
23581
# recognize 'use' statements, which are special
23582
elsif ( $is_use_require{$tok} ) {
23583
$statement_type = $tok;
23584
error_if_expecting_OPERATOR()
23585
if ( $expecting == OPERATOR );
23588
# remember my and our to check for trailing ": shared"
23589
elsif ( $is_my_our{$tok} ) {
23590
$statement_type = $tok;
23593
# Check for misplaced 'elsif' and 'else', but allow isolated
23594
# else or elsif blocks to be formatted. This is indicated
23595
# by a last noblank token of ';'
23596
elsif ( $tok eq 'elsif' ) {
23597
if ( $last_nonblank_token ne ';'
23598
&& $last_nonblank_block_type !~
23599
/^(if|elsif|unless)$/ )
23602
"expecting '$tok' to follow one of 'if|elsif|unless'\n"
23606
elsif ( $tok eq 'else' ) {
23608
# patched for SWITCH/CASE
23609
if ( $last_nonblank_token ne ';'
23610
&& $last_nonblank_block_type !~
23611
/^(if|elsif|unless|case|when)$/ )
23614
"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"
23618
elsif ( $tok eq 'continue' ) {
23619
if ( $last_nonblank_token ne ';'
23620
&& $last_nonblank_block_type !~
23621
/(^(\{|\}|;|while|until|for|foreach)|:$)/ )
23624
# note: ';' '{' and '}' in list above
23625
# because continues can follow bare blocks;
23626
# ':' is labeled block
23628
############################################
23629
# NOTE: This check has been deactivated because
23630
# continue has an alternative usage for given/when
23631
# blocks in perl 5.10
23632
## warning("'$tok' should follow a block\n");
23633
############################################
23637
# patch for SWITCH/CASE if 'case' and 'when are
23638
# treated as keywords.
23639
elsif ( $tok eq 'when' || $tok eq 'case' ) {
23640
$statement_type = $tok; # next '{' is block
23643
# indent trailing if/unless/while/until
23644
# outdenting will be handled by later indentation loop
23645
if ( $tok =~ /^(if|unless|while|until)$/
23646
&& $next_nonblank_token ne '(' )
23652
# check for inline label following
23653
# /^(redo|last|next|goto)$/
23654
elsif (( $last_nonblank_type eq 'k' )
23655
&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )
23661
# something else --
23664
scan_bare_identifier();
23665
if ( $type eq 'w' ) {
23667
if ( $expecting == OPERATOR ) {
23669
# don't complain about possible indirect object
23673
# sub new($) { ... }
23674
# $b = new A::; # calls A::new
23675
# $c = new A; # same thing but suspicious
23676
# This will call A::new but we have a 'new' in
23677
# main:: which looks like a constant.
23679
if ( $last_nonblank_type eq 'C' ) {
23680
if ( $tok !~ /::$/ ) {
23682
Expecting operator after '$last_nonblank_token' but found bare word '$tok'
23683
Maybe indirectet object notation?
23688
error_if_expecting_OPERATOR("bareword");
23692
# mark bare words immediately followed by a paren as
23694
$next_tok = $$rtokens[ $i + 1 ];
23695
if ( $next_tok eq '(' ) {
23699
# underscore after file test operator is file handle
23700
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
23704
# patch for SWITCH/CASE if 'case' and 'when are
23705
# not treated as keywords:
23709
&& $brace_type[$brace_depth] eq 'switch'
23711
|| ( $tok eq 'when'
23712
&& $brace_type[$brace_depth] eq 'given' )
23715
$statement_type = $tok; # next '{' is block
23716
$type = 'k'; # for keyword syntax coloring
23719
# patch for SWITCH/CASE if switch and given not keywords
23720
# Switch is not a perl 5 keyword, but we will gamble
23721
# and mark switch followed by paren as a keyword. This
23722
# is only necessary to get html syntax coloring nice,
23723
# and does not commit this as being a switch/case.
23724
if ( $next_nonblank_token eq '('
23725
&& ( $tok eq 'switch' || $tok eq 'given' ) )
23727
$type = 'k'; # for keyword syntax coloring
23733
###############################################################
23734
# section 2: strings of digits
23735
###############################################################
23736
elsif ( $pre_type eq 'd' ) {
23737
$expecting = operator_expected( $prev_type, $tok, $next_type );
23738
error_if_expecting_OPERATOR("Number")
23739
if ( $expecting == OPERATOR );
23740
my $number = scan_number();
23741
if ( !defined($number) ) {
23743
# shouldn't happen - we should always get a number
23744
warning("non-number beginning with digit--program bug\n");
23745
report_definite_bug();
23749
###############################################################
23750
# section 3: all other tokens
23751
###############################################################
23754
last if ( $tok eq '#' );
23755
my $code = $tokenization_code->{$tok};
23758
operator_expected( $prev_type, $tok, $next_type );
23765
# -----------------------------
23766
# end of main tokenization loop
23767
# -----------------------------
23769
if ( $i_tok >= 0 ) {
23770
$routput_token_type->[$i_tok] = $type;
23771
$routput_block_type->[$i_tok] = $block_type;
23772
$routput_container_type->[$i_tok] = $container_type;
23773
$routput_type_sequence->[$i_tok] = $type_sequence;
23774
$routput_indent_flag->[$i_tok] = $indent_flag;
23777
unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {
23778
$last_last_nonblank_token = $last_nonblank_token;
23779
$last_last_nonblank_type = $last_nonblank_type;
23780
$last_last_nonblank_block_type = $last_nonblank_block_type;
23781
$last_last_nonblank_container_type = $last_nonblank_container_type;
23782
$last_last_nonblank_type_sequence = $last_nonblank_type_sequence;
23783
$last_nonblank_token = $tok;
23784
$last_nonblank_type = $type;
23785
$last_nonblank_block_type = $block_type;
23786
$last_nonblank_container_type = $container_type;
23787
$last_nonblank_type_sequence = $type_sequence;
23788
$last_nonblank_prototype = $prototype;
23791
# reset indentation level if necessary at a sub or package
23792
# in an attempt to recover from a nesting error
23793
if ( $level_in_tokenizer < 0 ) {
23794
if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {
23795
reset_indentation_level(0);
23796
brace_warning("resetting level to 0 at $1 $2\n");
23800
# all done tokenizing this line ...
23801
# now prepare the final list of tokens and types
23803
my @token_type = (); # stack of output token types
23804
my @block_type = (); # stack of output code block types
23805
my @container_type = (); # stack of output code container types
23806
my @type_sequence = (); # stack of output type sequence numbers
23807
my @tokens = (); # output tokens
23808
my @levels = (); # structural brace levels of output tokens
23809
my @slevels = (); # secondary nesting levels of output tokens
23810
my @nesting_tokens = (); # string of tokens leading to this depth
23811
my @nesting_types = (); # string of token types leading to this depth
23812
my @nesting_blocks = (); # string of block types leading to this depth
23813
my @nesting_lists = (); # string of list types leading to this depth
23814
my @ci_string = (); # string needed to compute continuation indentation
23815
my @container_environment = (); # BLOCK or LIST
23816
my $container_environment = '';
23817
my $im = -1; # previous $i value
23819
my $ci_string_sum = ones_count($ci_string_in_tokenizer);
23821
# Computing Token Indentation
23823
# The final section of the tokenizer forms tokens and also computes
23824
# parameters needed to find indentation. It is much easier to do it
23825
# in the tokenizer than elsewhere. Here is a brief description of how
23826
# indentation is computed. Perl::Tidy computes indentation as the sum
23829
# (1) structural indentation, such as if/else/elsif blocks
23830
# (2) continuation indentation, such as long parameter call lists.
23832
# These are occasionally called primary and secondary indentation.
23834
# Structural indentation is introduced by tokens of type '{', although
23835
# the actual tokens might be '{', '(', or '['. Structural indentation
23836
# is of two types: BLOCK and non-BLOCK. Default structural indentation
23837
# is 4 characters if the standard indentation scheme is used.
23839
# Continuation indentation is introduced whenever a line at BLOCK level
23840
# is broken before its termination. Default continuation indentation
23841
# is 2 characters in the standard indentation scheme.
23843
# Both types of indentation may be nested arbitrarily deep and
23844
# interlaced. The distinction between the two is somewhat arbitrary.
23846
# For each token, we will define two variables which would apply if
23847
# the current statement were broken just before that token, so that
23848
# that token started a new line:
23850
# $level = the structural indentation level,
23851
# $ci_level = the continuation indentation level
23853
# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),
23854
# assuming defaults. However, in some special cases it is customary
23855
# to modify $ci_level from this strict value.
23857
# The total structural indentation is easy to compute by adding and
23858
# subtracting 1 from a saved value as types '{' and '}' are seen. The
23859
# running value of this variable is $level_in_tokenizer.
23861
# The total continuation is much more difficult to compute, and requires
23862
# several variables. These veriables are:
23864
# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for
23865
# each indentation level, if there are intervening open secondary
23866
# structures just prior to that level.
23867
# $continuation_string_in_tokenizer = a string of 1's and 0's indicating
23868
# if the last token at that level is "continued", meaning that it
23869
# is not the first token of an expression.
23870
# $nesting_block_string = a string of 1's and 0's indicating, for each
23871
# indentation level, if the level is of type BLOCK or not.
23872
# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string
23873
# $nesting_list_string = a string of 1's and 0's indicating, for each
23874
# indentation level, if it is is appropriate for list formatting.
23875
# If so, continuation indentation is used to indent long list items.
23876
# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string
23877
# @{$rslevel_stack} = a stack of total nesting depths at each
23878
# structural indentation level, where "total nesting depth" means
23879
# the nesting depth that would occur if every nesting token -- '{', '[',
23880
# and '(' -- , regardless of context, is used to compute a nesting
23883
#my $nesting_block_flag = ($nesting_block_string =~ /1$/);
23884
#my $nesting_list_flag = ($nesting_list_string =~ /1$/);
23886
my ( $ci_string_i, $level_i, $nesting_block_string_i,
23887
$nesting_list_string_i, $nesting_token_string_i,
23888
$nesting_type_string_i, );
23890
foreach $i ( @{$routput_token_list} )
23891
{ # scan the list of pre-tokens indexes
23893
# self-checking for valid token types
23894
my $type = $routput_token_type->[$i];
23895
my $forced_indentation_flag = $routput_indent_flag->[$i];
23897
# See if we should undo the $forced_indentation_flag.
23898
# Forced indentation after 'if', 'unless', 'while' and 'until'
23899
# expressions without trailing parens is optional and doesn't
23900
# always look good. It is usually okay for a trailing logical
23901
# expression, but if the expression is a function call, code block,
23902
# or some kind of list it puts in an unwanted extra indentation
23903
# level which is hard to remove.
23905
# Example where extra indentation looks ok:
23907
# if $det_a < 0 and $det_b > 0
23908
# or $det_a > 0 and $det_b < 0;
23910
# Example where extra indentation is not needed because
23911
# the eval brace also provides indentation:
23912
# print "not " if defined eval {
23913
# reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;
23916
# The following rule works fairly well:
23917
# Undo the flag if the end of this line, or start of the next
23918
# line, is an opening container token or a comma.
23919
# This almost always works, but if not after another pass it will
23921
if ( $forced_indentation_flag && $type eq 'k' ) {
23923
my $ilast = $routput_token_list->[$ixlast];
23924
my $toklast = $routput_token_type->[$ilast];
23925
if ( $toklast eq '#' ) {
23927
$ilast = $routput_token_list->[$ixlast];
23928
$toklast = $routput_token_type->[$ilast];
23930
if ( $toklast eq 'b' ) {
23932
$ilast = $routput_token_list->[$ixlast];
23933
$toklast = $routput_token_type->[$ilast];
23935
if ( $toklast =~ /^[\{,]$/ ) {
23936
$forced_indentation_flag = 0;
23939
( $toklast, my $i_next ) =
23940
find_next_nonblank_token( $max_token_index, $rtokens,
23941
$max_token_index );
23942
if ( $toklast =~ /^[\{,]$/ ) {
23943
$forced_indentation_flag = 0;
23948
# if we are already in an indented if, see if we should outdent
23949
if ($indented_if_level) {
23951
# don't try to nest trailing if's - shouldn't happen
23952
if ( $type eq 'k' ) {
23953
$forced_indentation_flag = 0;
23956
# check for the normal case - outdenting at next ';'
23957
elsif ( $type eq ';' ) {
23958
if ( $level_in_tokenizer == $indented_if_level ) {
23959
$forced_indentation_flag = -1;
23960
$indented_if_level = 0;
23964
# handle case of missing semicolon
23965
elsif ( $type eq '}' ) {
23966
if ( $level_in_tokenizer == $indented_if_level ) {
23967
$indented_if_level = 0;
23969
# TBD: This could be a subroutine call
23970
$level_in_tokenizer--;
23971
if ( @{$rslevel_stack} > 1 ) {
23972
pop( @{$rslevel_stack} );
23974
if ( length($nesting_block_string) > 1 )
23975
{ # true for valid script
23976
chop $nesting_block_string;
23977
chop $nesting_list_string;
23984
my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken
23985
$level_i = $level_in_tokenizer;
23987
# This can happen by running perltidy on non-scripts
23988
# although it could also be bug introduced by programming change.
23989
# Perl silently accepts a 032 (^Z) and takes it as the end
23990
if ( !$is_valid_token_type{$type} ) {
23991
my $val = ord($type);
23993
"unexpected character decimal $val ($type) in script\n");
23994
$tokenizer_self->{_in_error} = 1;
23997
# ----------------------------------------------------------------
23998
# TOKEN TYPE PATCHES
23999
# output __END__, __DATA__, and format as type 'k' instead of ';'
24000
# to make html colors correct, etc.
24001
my $fix_type = $type;
24002
if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }
24004
# output anonymous 'sub' as keyword
24005
if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }
24007
# -----------------------------------------------------------------
24009
$nesting_token_string_i = $nesting_token_string;
24010
$nesting_type_string_i = $nesting_type_string;
24011
$nesting_block_string_i = $nesting_block_string;
24012
$nesting_list_string_i = $nesting_list_string;
24014
# set primary indentation levels based on structural braces
24015
# Note: these are set so that the leading braces have a HIGHER
24016
# level than their CONTENTS, which is convenient for indentation
24017
# Also, define continuation indentation for each token.
24018
if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )
24021
# use environment before updating
24022
$container_environment =
24023
$nesting_block_flag ? 'BLOCK'
24024
: $nesting_list_flag ? 'LIST'
24027
# if the difference between total nesting levels is not 1,
24028
# there are intervening non-structural nesting types between
24029
# this '{' and the previous unclosed '{'
24030
my $intervening_secondary_structure = 0;
24031
if ( @{$rslevel_stack} ) {
24032
$intervening_secondary_structure =
24033
$slevel_in_tokenizer - $rslevel_stack->[-1];
24036
# Continuation Indentation
24038
# Having tried setting continuation indentation both in the formatter and
24039
# in the tokenizer, I can say that setting it in the tokenizer is much,
24040
# much easier. The formatter already has too much to do, and can't
24041
# make decisions on line breaks without knowing what 'ci' will be at
24042
# arbitrary locations.
24044
# But a problem with setting the continuation indentation (ci) here
24045
# in the tokenizer is that we do not know where line breaks will actually
24046
# be. As a result, we don't know if we should propagate continuation
24047
# indentation to higher levels of structure.
24049
# For nesting of only structural indentation, we never need to do this.
24050
# For example, in a long if statement, like this
24052
# if ( !$output_block_type[$i]
24053
# && ($in_statement_continuation) )
24058
# the second line has ci but we do normally give the lines within the BLOCK
24059
# any ci. This would be true if we had blocks nested arbitrarily deeply.
24061
# But consider something like this, where we have created a break after
24062
# an opening paren on line 1, and the paren is not (currently) a
24063
# structural indentation token:
24065
# my $file = $menubar->Menubutton(
24066
# qw/-text File -underline 0 -menuitems/ => [
24068
# Cascade => '~View',
24072
# The second line has ci, so it would seem reasonable to propagate it
24073
# down, giving the third line 1 ci + 1 indentation. This suggests the
24074
# following rule, which is currently used to propagating ci down: if there
24075
# are any non-structural opening parens (or brackets, or braces), before
24076
# an opening structural brace, then ci is propagated down, and otherwise
24077
# not. The variable $intervening_secondary_structure contains this
24078
# information for the current token, and the string
24079
# "$ci_string_in_tokenizer" is a stack of previous values of this
24082
# save the current states
24083
push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );
24084
$level_in_tokenizer++;
24086
if ($forced_indentation_flag) {
24088
# break BEFORE '?' when there is forced indentation
24089
if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }
24090
if ( $type eq 'k' ) {
24091
$indented_if_level = $level_in_tokenizer;
24095
if ( $routput_block_type->[$i] ) {
24096
$nesting_block_flag = 1;
24097
$nesting_block_string .= '1';
24100
$nesting_block_flag = 0;
24101
$nesting_block_string .= '0';
24104
# we will use continuation indentation within containers
24105
# which are not blocks and not logical expressions
24107
if ( !$routput_block_type->[$i] ) {
24109
# propagate flag down at nested open parens
24110
if ( $routput_container_type->[$i] eq '(' ) {
24111
$bit = 1 if $nesting_list_flag;
24114
# use list continuation if not a logical grouping
24115
# /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/
24119
$is_logical_container{ $routput_container_type->[$i]
24123
$nesting_list_string .= $bit;
24124
$nesting_list_flag = $bit;
24126
$ci_string_in_tokenizer .=
24127
( $intervening_secondary_structure != 0 ) ? '1' : '0';
24128
$ci_string_sum = ones_count($ci_string_in_tokenizer);
24129
$continuation_string_in_tokenizer .=
24130
( $in_statement_continuation > 0 ) ? '1' : '0';
24132
# Sometimes we want to give an opening brace continuation indentation,
24133
# and sometimes not. For code blocks, we don't do it, so that the leading
24134
# '{' gets outdented, like this:
24136
# if ( !$output_block_type[$i]
24137
# && ($in_statement_continuation) )
24140
# For other types, we will give them continuation indentation. For example,
24141
# here is how a list looks with the opening paren indented:
24144
# ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],
24145
# [ "homer", "marge", "bart" ], );
24147
# This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)
24149
my $total_ci = $ci_string_sum;
24151
!$routput_block_type->[$i] # patch: skip for BLOCK
24152
&& ($in_statement_continuation)
24153
&& !( $forced_indentation_flag && $type eq ':' )
24156
$total_ci += $in_statement_continuation
24157
unless ( $ci_string_in_tokenizer =~ /1$/ );
24160
$ci_string_i = $total_ci;
24161
$in_statement_continuation = 0;
24164
elsif ($type eq '}'
24166
|| $forced_indentation_flag < 0 )
24169
# only a nesting error in the script would prevent popping here
24170
if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }
24172
$level_i = --$level_in_tokenizer;
24174
# restore previous level values
24175
if ( length($nesting_block_string) > 1 )
24176
{ # true for valid script
24177
chop $nesting_block_string;
24178
$nesting_block_flag = ( $nesting_block_string =~ /1$/ );
24179
chop $nesting_list_string;
24180
$nesting_list_flag = ( $nesting_list_string =~ /1$/ );
24182
chop $ci_string_in_tokenizer;
24183
$ci_string_sum = ones_count($ci_string_in_tokenizer);
24185
$in_statement_continuation =
24186
chop $continuation_string_in_tokenizer;
24188
# zero continuation flag at terminal BLOCK '}' which
24189
# ends a statement.
24190
if ( $routput_block_type->[$i] ) {
24192
# ...These include non-anonymous subs
24193
# note: could be sub ::abc { or sub 'abc
24194
if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {
24196
# note: older versions of perl require the /gc modifier
24197
# here or else the \G does not work.
24198
if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )
24200
$in_statement_continuation = 0;
24204
# ...and include all block types except user subs with
24205
# block prototypes and these: (sort|grep|map|do|eval)
24206
# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/
24208
$is_zero_continuation_block_type{
24209
$routput_block_type->[$i] } )
24211
$in_statement_continuation = 0;
24214
# ..but these are not terminal types:
24215
# /^(sort|grep|map|do|eval)$/ )
24217
$is_not_zero_continuation_block_type{
24218
$routput_block_type->[$i] } )
24222
# ..and a block introduced by a label
24223
# /^\w+\s*:$/gc ) {
24224
elsif ( $routput_block_type->[$i] =~ /:$/ ) {
24225
$in_statement_continuation = 0;
24228
# user function with block prototype
24230
$in_statement_continuation = 0;
24234
# If we are in a list, then
24235
# we must set continuatoin indentation at the closing
24236
# paren of something like this (paren after $check):
24239
# ( not defined $check )
24241
# or $check eq "new"
24242
# or $check eq "old",
24244
elsif ( $tok eq ')' ) {
24245
$in_statement_continuation = 1
24246
if $routput_container_type->[$i] =~ /^[;,\{\}]$/;
24249
elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }
24252
# use environment after updating
24253
$container_environment =
24254
$nesting_block_flag ? 'BLOCK'
24255
: $nesting_list_flag ? 'LIST'
24257
$ci_string_i = $ci_string_sum + $in_statement_continuation;
24258
$nesting_block_string_i = $nesting_block_string;
24259
$nesting_list_string_i = $nesting_list_string;
24262
# not a structural indentation type..
24265
$container_environment =
24266
$nesting_block_flag ? 'BLOCK'
24267
: $nesting_list_flag ? 'LIST'
24270
# zero the continuation indentation at certain tokens so
24271
# that they will be at the same level as its container. For
24272
# commas, this simplifies the -lp indentation logic, which
24273
# counts commas. For ?: it makes them stand out.
24274
if ($nesting_list_flag) {
24275
if ( $type =~ /^[,\?\:]$/ ) {
24276
$in_statement_continuation = 0;
24280
# be sure binary operators get continuation indentation
24282
$container_environment
24283
&& ( $type eq 'k' && $is_binary_keyword{$tok}
24284
|| $is_binary_type{$type} )
24287
$in_statement_continuation = 1;
24290
# continuation indentation is sum of any open ci from previous
24291
# levels plus the current level
24292
$ci_string_i = $ci_string_sum + $in_statement_continuation;
24294
# update continuation flag ...
24295
# if this isn't a blank or comment..
24296
if ( $type ne 'b' && $type ne '#' ) {
24298
# and we are in a BLOCK
24299
if ($nesting_block_flag) {
24301
# the next token after a ';' and label starts a new stmt
24302
if ( $type eq ';' || $type eq 'J' ) {
24303
$in_statement_continuation = 0;
24306
# otherwise, we are continuing the current statement
24308
$in_statement_continuation = 1;
24312
# if we are not in a BLOCK..
24315
# do not use continuation indentation if not list
24316
# environment (could be within if/elsif clause)
24317
if ( !$nesting_list_flag ) {
24318
$in_statement_continuation = 0;
24321
# otherwise, the next token after a ',' starts a new term
24322
elsif ( $type eq ',' ) {
24323
$in_statement_continuation = 0;
24326
# otherwise, we are continuing the current term
24328
$in_statement_continuation = 1;
24334
if ( $level_in_tokenizer < 0 ) {
24335
unless ( $tokenizer_self->{_saw_negative_indentation} ) {
24336
$tokenizer_self->{_saw_negative_indentation} = 1;
24337
warning("Starting negative indentation\n");
24341
# set secondary nesting levels based on all continment token types
24342
# Note: these are set so that the nesting depth is the depth
24343
# of the PREVIOUS TOKEN, which is convenient for setting
24344
# the stength of token bonds
24345
my $slevel_i = $slevel_in_tokenizer;
24348
if ( $is_opening_type{$type} ) {
24349
$slevel_in_tokenizer++;
24350
$nesting_token_string .= $tok;
24351
$nesting_type_string .= $type;
24355
elsif ( $is_closing_type{$type} ) {
24356
$slevel_in_tokenizer--;
24357
my $char = chop $nesting_token_string;
24359
if ( $char ne $matching_start_token{$tok} ) {
24360
$nesting_token_string .= $char . $tok;
24361
$nesting_type_string .= $type;
24364
chop $nesting_type_string;
24368
push( @block_type, $routput_block_type->[$i] );
24369
push( @ci_string, $ci_string_i );
24370
push( @container_environment, $container_environment );
24371
push( @container_type, $routput_container_type->[$i] );
24372
push( @levels, $level_i );
24373
push( @nesting_tokens, $nesting_token_string_i );
24374
push( @nesting_types, $nesting_type_string_i );
24375
push( @slevels, $slevel_i );
24376
push( @token_type, $fix_type );
24377
push( @type_sequence, $routput_type_sequence->[$i] );
24378
push( @nesting_blocks, $nesting_block_string );
24379
push( @nesting_lists, $nesting_list_string );
24381
# now form the previous token
24384
$$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters
24388
substr( $input_line, $$rtoken_map[$im], $num ) );
24394
$num = length($input_line) - $$rtoken_map[$im]; # make the last token
24396
push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );
24399
$tokenizer_self->{_in_attribute_list} = $in_attribute_list;
24400
$tokenizer_self->{_in_quote} = $in_quote;
24401
$tokenizer_self->{_quote_target} =
24402
$in_quote ? matching_end_token($quote_character) : "";
24403
$tokenizer_self->{_rhere_target_list} = $rhere_target_list;
24405
$line_of_tokens->{_rtoken_type} = \@token_type;
24406
$line_of_tokens->{_rtokens} = \@tokens;
24407
$line_of_tokens->{_rblock_type} = \@block_type;
24408
$line_of_tokens->{_rcontainer_type} = \@container_type;
24409
$line_of_tokens->{_rcontainer_environment} = \@container_environment;
24410
$line_of_tokens->{_rtype_sequence} = \@type_sequence;
24411
$line_of_tokens->{_rlevels} = \@levels;
24412
$line_of_tokens->{_rslevels} = \@slevels;
24413
$line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;
24414
$line_of_tokens->{_rci_levels} = \@ci_string;
24415
$line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;
24419
} # end tokenize_this_line
24421
#########i#############################################################
24422
# Tokenizer routines which assist in identifying token types
24423
#######################################################################
24425
sub operator_expected {
24427
# Many perl symbols have two or more meanings. For example, '<<'
24428
# can be a shift operator or a here-doc operator. The
24429
# interpretation of these symbols depends on the current state of
24430
# the tokenizer, which may either be expecting a term or an
24431
# operator. For this example, a << would be a shift if an operator
24432
# is expected, and a here-doc if a term is expected. This routine
24433
# is called to make this decision for any current token. It returns
24434
# one of three possible values:
24436
# OPERATOR - operator expected (or at least, not a term)
24437
# UNKNOWN - can't tell
24438
# TERM - a term is expected (or at least, not an operator)
24440
# The decision is based on what has been seen so far. This
24441
# information is stored in the "$last_nonblank_type" and
24442
# "$last_nonblank_token" variables. For example, if the
24443
# $last_nonblank_type is '=~', then we are expecting a TERM, whereas
24444
# if $last_nonblank_type is 'n' (numeric), we are expecting an
24447
# If a UNKNOWN is returned, the calling routine must guess. A major
24448
# goal of this tokenizer is to minimize the possiblity of returning
24449
# UNKNOWN, because a wrong guess can spoil the formatting of a
24452
# adding NEW_TOKENS: it is critically important that this routine be
24453
# updated to allow it to determine if an operator or term is to be
24454
# expected after the new token. Doing this simply involves adding
24455
# the new token character to one of the regexes in this routine or
24456
# to one of the hash lists
24457
# that it uses, which are initialized in the BEGIN section.
24458
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
24461
my ( $prev_type, $tok, $next_type ) = @_;
24463
my $op_expected = UNKNOWN;
24465
#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";
24467
# Note: function prototype is available for token type 'U' for future
24468
# program development. It contains the leading and trailing parens,
24469
# and no blanks. It might be used to eliminate token type 'C', for
24470
# example (prototype = '()'). Thus:
24471
# if ($last_nonblank_type eq 'U') {
24472
# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";
24475
# A possible filehandle (or object) requires some care...
24476
if ( $last_nonblank_type eq 'Z' ) {
24479
if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {
24480
$op_expected = UNKNOWN;
24483
# For possible file handle like "$a", Perl uses weird parsing rules.
24485
# print $a/2,"/hi"; - division
24486
# print $a / 2,"/hi"; - division
24487
# print $a/ 2,"/hi"; - division
24488
# print $a /2,"/hi"; - pattern (and error)!
24489
elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {
24490
$op_expected = TERM;
24493
# Note when an operation is being done where a
24494
# filehandle might be expected, since a change in whitespace
24495
# could change the interpretation of the statement.
24497
if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {
24498
complain("operator in print statement not recommended\n");
24499
$op_expected = OPERATOR;
24504
# handle something after 'do' and 'eval'
24505
elsif ( $is_block_operator{$last_nonblank_token} ) {
24507
# something like $a = eval "expression";
24509
if ( $last_nonblank_type eq 'k' ) {
24510
$op_expected = TERM; # expression or list mode following keyword
24513
# something like $a = do { BLOCK } / 2;
24516
$op_expected = OPERATOR; # block mode following }
24520
# handle bare word..
24521
elsif ( $last_nonblank_type eq 'w' ) {
24523
# unfortunately, we can't tell what type of token to expect next
24524
# after most bare words
24525
$op_expected = UNKNOWN;
24528
# operator, but not term possible after these types
24529
# Note: moved ')' from type to token because parens in list context
24530
# get marked as '{' '}' now. This is a minor glitch in the following:
24531
# my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();
24533
elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )
24534
|| ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )
24536
$op_expected = OPERATOR;
24538
# in a 'use' statement, numbers and v-strings are not true
24539
# numbers, so to avoid incorrect error messages, we will
24540
# mark them as unknown for now (use.t)
24541
# TODO: it would be much nicer to create a new token V for VERSION
24542
# number in a use statement. Then this could be a check on type V
24543
# and related patches which change $statement_type for '=>'
24544
# and ',' could be removed. Further, it would clean things up to
24545
# scan the 'use' statement with a separate subroutine.
24546
if ( ( $statement_type eq 'use' )
24547
&& ( $last_nonblank_type =~ /^[nv]$/ ) )
24549
$op_expected = UNKNOWN;
24553
# no operator after many keywords, such as "die", "warn", etc
24554
elsif ( $expecting_term_token{$last_nonblank_token} ) {
24556
# patch for dor.t (defined or).
24557
# perl functions which may be unary operators
24558
# TODO: This list is incomplete, and these should be put
24561
&& $next_type eq '/'
24562
&& $last_nonblank_type eq 'k'
24563
&& $last_nonblank_token =~ /^eof|undef|shift|pop$/ )
24565
$op_expected = OPERATOR;
24568
$op_expected = TERM;
24572
# no operator after things like + - ** (i.e., other operators)
24573
elsif ( $expecting_term_types{$last_nonblank_type} ) {
24574
$op_expected = TERM;
24577
# a few operators, like "time", have an empty prototype () and so
24578
# take no parameters but produce a value to operate on
24579
elsif ( $expecting_operator_token{$last_nonblank_token} ) {
24580
$op_expected = OPERATOR;
24583
# post-increment and decrement produce values to be operated on
24584
elsif ( $expecting_operator_types{$last_nonblank_type} ) {
24585
$op_expected = OPERATOR;
24588
# no value to operate on after sub block
24589
elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }
24591
# a right brace here indicates the end of a simple block.
24592
# all non-structural right braces have type 'R'
24593
# all braces associated with block operator keywords have been given those
24594
# keywords as "last_nonblank_token" and caught above.
24595
# (This statement is order dependent, and must come after checking
24596
# $last_nonblank_token).
24597
elsif ( $last_nonblank_type eq '}' ) {
24599
# patch for dor.t (defined or).
24601
&& $next_type eq '/'
24602
&& $last_nonblank_token eq ']' )
24604
$op_expected = OPERATOR;
24607
$op_expected = TERM;
24611
# something else..what did I forget?
24614
# collecting diagnostics on unknown operator types..see what was missed
24615
$op_expected = UNKNOWN;
24617
"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"
24621
TOKENIZER_DEBUG_FLAG_EXPECT && do {
24623
"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
24625
return $op_expected;
24628
sub new_statement_ok {
24630
# return true if the current token can start a new statement
24631
# USES GLOBAL VARIABLES: $last_nonblank_type
24633
return label_ok() # a label would be ok here
24635
|| $last_nonblank_type eq 'J'; # or we follow a label
24641
# Decide if a bare word followed by a colon here is a label
24642
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24643
# $brace_depth, @brace_type
24645
# if it follows an opening or closing code block curly brace..
24646
if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )
24647
&& $last_nonblank_type eq $last_nonblank_token )
24650
# it is a label if and only if the curly encloses a code block
24651
return $brace_type[$brace_depth];
24654
# otherwise, it is a label if and only if it follows a ';'
24657
return ( $last_nonblank_type eq ';' );
24661
sub code_block_type {
24663
# Decide if this is a block of code, and its type.
24664
# Must be called only when $type = $token = '{'
24665
# The problem is to distinguish between the start of a block of code
24666
# and the start of an anonymous hash reference
24667
# Returns "" if not code block, otherwise returns 'last_nonblank_token'
24668
# to indicate the type of code block. (For example, 'last_nonblank_token'
24669
# might be 'if' for an if block, 'else' for an else block, etc).
24670
# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,
24671
# $last_nonblank_block_type, $brace_depth, @brace_type
24673
# handle case of multiple '{'s
24675
# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";
24677
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
24678
if ( $last_nonblank_token eq '{'
24679
&& $last_nonblank_type eq $last_nonblank_token )
24682
# opening brace where a statement may appear is probably
24683
# a code block but might be and anonymous hash reference
24684
if ( $brace_type[$brace_depth] ) {
24685
return decide_if_code_block( $i, $rtokens, $rtoken_type,
24686
$max_token_index );
24689
# cannot start a code block within an anonymous hash
24695
elsif ( $last_nonblank_token eq ';' ) {
24697
# an opening brace where a statement may appear is probably
24698
# a code block but might be and anonymous hash reference
24699
return decide_if_code_block( $i, $rtokens, $rtoken_type,
24700
$max_token_index );
24703
# handle case of '}{'
24704
elsif ($last_nonblank_token eq '}'
24705
&& $last_nonblank_type eq $last_nonblank_token )
24708
# a } { situation ...
24709
# could be hash reference after code block..(blktype1.t)
24710
if ($last_nonblank_block_type) {
24711
return decide_if_code_block( $i, $rtokens, $rtoken_type,
24712
$max_token_index );
24715
# must be a block if it follows a closing hash reference
24717
return $last_nonblank_token;
24721
# NOTE: braces after type characters start code blocks, but for
24722
# simplicity these are not identified as such. See also
24723
# sub is_non_structural_brace.
24724
# elsif ( $last_nonblank_type eq 't' ) {
24725
# return $last_nonblank_token;
24728
# brace after label:
24729
elsif ( $last_nonblank_type eq 'J' ) {
24730
return $last_nonblank_token;
24733
# otherwise, look at previous token. This must be a code block if
24734
# it follows any of these:
24735
# /^(BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|if|elsif|else|unless|do|while|until|eval|for|foreach|map|grep|sort)$/
24736
elsif ( $is_code_block_token{$last_nonblank_token} ) {
24737
return $last_nonblank_token;
24740
# or a sub definition
24741
elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )
24742
&& $last_nonblank_token =~ /^sub\b/ )
24744
return $last_nonblank_token;
24747
# user-defined subs with block parameters (like grep/map/eval)
24748
elsif ( $last_nonblank_type eq 'G' ) {
24749
return $last_nonblank_token;
24753
elsif ( $last_nonblank_type eq 'w' ) {
24754
return decide_if_code_block( $i, $rtokens, $rtoken_type,
24755
$max_token_index );
24758
# anything else must be anonymous hash reference
24764
sub decide_if_code_block {
24766
# USES GLOBAL VARIABLES: $last_nonblank_token
24767
my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;
24768
my ( $next_nonblank_token, $i_next ) =
24769
find_next_nonblank_token( $i, $rtokens, $max_token_index );
24771
# we are at a '{' where a statement may appear.
24772
# We must decide if this brace starts an anonymous hash or a code
24774
# return "" if anonymous hash, and $last_nonblank_token otherwise
24776
# initialize to be code BLOCK
24777
my $code_block_type = $last_nonblank_token;
24779
# Check for the common case of an empty anonymous hash reference:
24780
# Maybe something like sub { { } }
24781
if ( $next_nonblank_token eq '}' ) {
24782
$code_block_type = "";
24787
# To guess if this '{' is an anonymous hash reference, look ahead
24788
# and test as follows:
24790
# it is a hash reference if next come:
24791
# - a string or digit followed by a comma or =>
24792
# - bareword followed by =>
24793
# otherwise it is a code block
24795
# Examples of anonymous hash ref:
24799
# Examples of code blocks:
24800
# {1; print "hello\n", 1;}
24803
# We are only going to look ahead one more (nonblank/comment) line.
24804
# Strange formatting could cause a bad guess, but that's unlikely.
24805
my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];
24806
my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];
24807
my ( $rpre_tokens, $rpre_types ) =
24808
peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but
24809
# generous, and prevents
24811
# time in mangled files
24812
if ( defined($rpre_types) && @$rpre_types ) {
24813
push @pre_types, @$rpre_types;
24814
push @pre_tokens, @$rpre_tokens;
24817
# put a sentinal token to simplify stopping the search
24818
push @pre_types, '}';
24821
$jbeg = 1 if $pre_types[0] eq 'b';
24823
# first look for one of these
24825
# - bareword with leading -
24829
if ( $pre_types[$j] =~ /^[\'\"]/ ) {
24831
# find the closing quote; don't worry about escapes
24832
my $quote_mark = $pre_types[$j];
24833
for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {
24834
if ( $pre_types[$k] eq $quote_mark ) {
24836
my $next = $pre_types[$j];
24841
elsif ( $pre_types[$j] eq 'd' ) {
24844
elsif ( $pre_types[$j] eq 'w' ) {
24845
unless ( $is_keyword{ $pre_tokens[$j] } ) {
24849
elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {
24852
if ( $j > $jbeg ) {
24854
$j++ if $pre_types[$j] eq 'b';
24856
# it's a hash ref if a comma or => follow next
24857
if ( $pre_types[$j] eq ','
24858
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )
24860
$code_block_type = "";
24865
return $code_block_type;
24870
# report unexpected token type and show where it is
24871
# USES GLOBAL VARIABLES: $tokenizer_self
24872
my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
24873
$rpretoken_type, $input_line )
24876
if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {
24877
my $msg = "found $found where $expecting expected";
24878
my $pos = $$rpretoken_map[$i_tok];
24879
interrupt_logfile();
24880
my $input_line_number = $tokenizer_self->{_last_line_number};
24881
my ( $offset, $numbered_line, $underline ) =
24882
make_numbered_line( $input_line_number, $input_line, $pos );
24883
$underline = write_on_underline( $underline, $pos - $offset, '^' );
24886
if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {
24887
my $pos_prev = $$rpretoken_map[$last_nonblank_i];
24889
if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {
24890
$num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;
24893
$num = $pos - $pos_prev;
24895
if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }
24898
write_on_underline( $underline, $pos_prev - $offset, '-' x $num );
24899
$trailer = " (previous token underlined)";
24901
warning( $numbered_line . "\n" );
24902
warning( $underline . "\n" );
24903
warning( $msg . $trailer . "\n" );
24908
sub is_non_structural_brace {
24910
# Decide if a brace or bracket is structural or non-structural
24911
# by looking at the previous token and type
24912
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token
24914
# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.
24915
# Tentatively deactivated because it caused the wrong operator expectation
24917
# $user = @vars[1] / 100;
24918
# Must update sub operator_expected before re-implementing.
24919
# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {
24923
# NOTE: braces after type characters start code blocks, but for
24924
# simplicity these are not identified as such. See also
24925
# sub code_block_type
24926
# if ($last_nonblank_type eq 't') {return 0}
24928
# otherwise, it is non-structural if it is decorated
24929
# by type information.
24930
# For example, the '{' here is non-structural: ${xxx}
24932
$last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/
24934
# or if we follow a hash or array closing curly brace or bracket
24935
# For example, the second '{' in this is non-structural: $a{'x'}{'y'}
24936
# because the first '}' would have been given type 'R'
24937
|| $last_nonblank_type =~ /^([R\]])$/
24941
#########i#############################################################
24942
# Tokenizer routines for tracking container nesting depths
24943
#######################################################################
24945
# The following routines keep track of nesting depths of the nesting
24946
# types, ( [ { and ?. This is necessary for determining the indentation
24947
# level, and also for debugging programs. Not only do they keep track of
24948
# nesting depths of the individual brace types, but they check that each
24949
# of the other brace types is balanced within matching pairs. For
24950
# example, if the program sees this sequence:
24954
# then it can determine that there is an extra left paren somewhere
24955
# between the { and the }. And so on with every other possible
24956
# combination of outer and inner brace types. For another
24961
# which has an extra ] within the parens.
24963
# The brace types have indexes 0 .. 3 which are indexes into
24966
# The pair ? : are treated as just another nesting type, with ? acting
24967
# as the opening brace and : acting as the closing brace.
24971
# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];
24973
# saves the nesting depth of brace type $b (where $b is either of the other
24974
# nesting types) when brace type $a enters a new depth. When this depth
24975
# decreases, a check is made that the current depth of brace types $b is
24976
# unchanged, or otherwise there must have been an error. This can
24977
# be very useful for localizing errors, particularly when perl runs to
24978
# the end of a large file (such as this one) and announces that there
24979
# is a problem somewhere.
24981
# A numerical sequence number is maintained for every nesting type,
24982
# so that each matching pair can be uniquely identified in a simple
24985
sub increase_nesting_depth {
24986
my ( $aa, $pos ) = @_;
24988
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
24989
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
24991
$current_depth[$aa]++;
24993
$total_depth[$aa][ $current_depth[$aa] ] = $total_depth;
24994
my $input_line_number = $tokenizer_self->{_last_line_number};
24995
my $input_line = $tokenizer_self->{_line_text};
24997
# Sequence numbers increment by number of items. This keeps
24998
# a unique set of numbers but still allows the relative location
24999
# of any type to be determined.
25000
$nesting_sequence_number[$aa] += scalar(@closing_brace_names);
25001
my $seqno = $nesting_sequence_number[$aa];
25002
$current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;
25004
$starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =
25005
[ $input_line_number, $input_line, $pos ];
25007
for $bb ( 0 .. $#closing_brace_names ) {
25008
next if ( $bb == $aa );
25009
$depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];
25012
# set a flag for indenting a nested ternary statement
25014
if ( $aa == QUESTION_COLON ) {
25015
$nested_ternary_flag[ $current_depth[$aa] ] = 0;
25016
if ( $current_depth[$aa] > 1 ) {
25017
if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {
25018
my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];
25019
if ( $pdepth == $total_depth - 1 ) {
25021
$nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;
25026
return ( $seqno, $indent );
25029
sub decrease_nesting_depth {
25031
my ( $aa, $pos ) = @_;
25033
# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,
25034
# @current_sequence_number, @depth_array, @starting_line_of_current_depth
25037
my $input_line_number = $tokenizer_self->{_last_line_number};
25038
my $input_line = $tokenizer_self->{_line_text};
25042
if ( $current_depth[$aa] > 0 ) {
25044
# set a flag for un-indenting after seeing a nested ternary statement
25045
$seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];
25046
if ( $aa == QUESTION_COLON ) {
25047
$outdent = $nested_ternary_flag[ $current_depth[$aa] ];
25050
# check that any brace types $bb contained within are balanced
25051
for $bb ( 0 .. $#closing_brace_names ) {
25052
next if ( $bb == $aa );
25054
unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==
25055
$current_depth[$bb] )
25058
$current_depth[$bb] -
25059
$depth_array[$aa][$bb][ $current_depth[$aa] ];
25061
# don't whine too many times
25062
my $saw_brace_error = get_saw_brace_error();
25064
$saw_brace_error <= MAX_NAG_MESSAGES
25066
# if too many closing types have occured, we probably
25067
# already caught this error
25068
&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )
25071
interrupt_logfile();
25073
$starting_line_of_current_depth[$aa]
25074
[ $current_depth[$aa] ];
25076
my $rel = [ $input_line_number, $input_line, $pos ];
25080
if ( $diff == 1 || $diff == -1 ) {
25088
? $opening_brace_names[$bb]
25089
: $closing_brace_names[$bb];
25090
write_error_indicator_pair( @$rsl, '^' );
25092
Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el
25097
$starting_line_of_current_depth[$bb]
25098
[ $current_depth[$bb] ];
25101
" The most recent un-matched $bname is on line $ml\n";
25102
write_error_indicator_pair( @$rml, '^' );
25104
write_error_indicator_pair( @$rel, '^' );
25108
increment_brace_error();
25111
$current_depth[$aa]--;
25115
my $saw_brace_error = get_saw_brace_error();
25116
if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {
25118
There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number
25120
indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );
25122
increment_brace_error();
25124
return ( $seqno, $outdent );
25127
sub check_final_nesting_depths {
25130
# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth
25132
for $aa ( 0 .. $#closing_brace_names ) {
25134
if ( $current_depth[$aa] ) {
25136
$starting_line_of_current_depth[$aa][ $current_depth[$aa] ];
25139
Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]
25140
The most recent un-matched $opening_brace_names[$aa] is on line $sl
25142
indicate_error( $msg, @$rsl, '^' );
25143
increment_brace_error();
25148
#########i#############################################################
25149
# Tokenizer routines for looking ahead in input stream
25150
#######################################################################
25152
sub peek_ahead_for_n_nonblank_pre_tokens {
25154
# returns next n pretokens if they exist
25155
# returns undef's if hits eof without seeing any pretokens
25156
# USES GLOBAL VARIABLES: $tokenizer_self
25157
my $max_pretokens = shift;
25160
my ( $rpre_tokens, $rmap, $rpre_types );
25162
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25164
$line =~ s/^\s*//; # trim leading blanks
25165
next if ( length($line) <= 0 ); # skip blank
25166
next if ( $line =~ /^#/ ); # skip comment
25167
( $rpre_tokens, $rmap, $rpre_types ) =
25168
pre_tokenize( $line, $max_pretokens );
25171
return ( $rpre_tokens, $rpre_types );
25174
# look ahead for next non-blank, non-comment line of code
25175
sub peek_ahead_for_nonblank_token {
25177
# USES GLOBAL VARIABLES: $tokenizer_self
25178
my ( $rtokens, $max_token_index ) = @_;
25182
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )
25184
$line =~ s/^\s*//; # trim leading blanks
25185
next if ( length($line) <= 0 ); # skip blank
25186
next if ( $line =~ /^#/ ); # skip comment
25187
my ( $rtok, $rmap, $rtype ) =
25188
pre_tokenize( $line, 2 ); # only need 2 pre-tokens
25189
my $j = $max_token_index + 1;
25192
foreach $tok (@$rtok) {
25193
last if ( $tok =~ "\n" );
25194
$$rtokens[ ++$j ] = $tok;
25201
#########i#############################################################
25202
# Tokenizer guessing routines for ambiguous situations
25203
#######################################################################
25205
sub guess_if_pattern_or_conditional {
25207
# this routine is called when we have encountered a ? following an
25208
# unknown bareword, and we must decide if it starts a pattern or not
25209
# input parameters:
25210
# $i - token index of the ? starting possible pattern
25211
# output parameters:
25212
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern
25213
# msg = a warning or diagnostic message
25214
# USES GLOBAL VARIABLES: $last_nonblank_token
25215
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25216
my $is_pattern = 0;
25217
my $msg = "guessing that ? after $last_nonblank_token starts a ";
25219
if ( $i >= $max_token_index ) {
25220
$msg .= "conditional (no end to pattern found on the line)\n";
25225
my $next_token = $$rtokens[$i]; # first token after ?
25227
# look for a possible ending ? on this line..
25229
my $quote_depth = 0;
25230
my $quote_character = '';
25234
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25237
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25238
$quote_pos, $quote_depth, $max_token_index );
25242
# we didn't find an ending ? on this line,
25243
# so we bias towards conditional
25245
$msg .= "conditional (no ending ? on this line)\n";
25247
# we found an ending ?, so we bias towards a pattern
25251
if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25253
$msg .= "pattern (found ending ? and pattern expected)\n";
25256
$msg .= "pattern (uncertain, but found ending ?)\n";
25260
return ( $is_pattern, $msg );
25263
sub guess_if_pattern_or_division {
25265
# this routine is called when we have encountered a / following an
25266
# unknown bareword, and we must decide if it starts a pattern or is a
25268
# input parameters:
25269
# $i - token index of the / starting possible pattern
25270
# output parameters:
25271
# $is_pattern = 0 if probably division, =1 if probably a pattern
25272
# msg = a warning or diagnostic message
25273
# USES GLOBAL VARIABLES: $last_nonblank_token
25274
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
25275
my $is_pattern = 0;
25276
my $msg = "guessing that / after $last_nonblank_token starts a ";
25278
if ( $i >= $max_token_index ) {
25279
"division (no end to pattern found on the line)\n";
25283
my $divide_expected =
25284
numerator_expected( $i, $rtokens, $max_token_index );
25286
my $next_token = $$rtokens[$i]; # first token after slash
25288
# look for a possible ending / on this line..
25290
my $quote_depth = 0;
25291
my $quote_character = '';
25295
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
25298
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
25299
$quote_pos, $quote_depth, $max_token_index );
25303
# we didn't find an ending / on this line,
25304
# so we bias towards division
25305
if ( $divide_expected >= 0 ) {
25307
$msg .= "division (no ending / on this line)\n";
25310
$msg = "multi-line pattern (division not possible)\n";
25316
# we found an ending /, so we bias towards a pattern
25319
if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {
25321
if ( $divide_expected >= 0 ) {
25323
if ( $i - $ibeg > 60 ) {
25324
$msg .= "division (matching / too distant)\n";
25328
$msg .= "pattern (but division possible too)\n";
25334
$msg .= "pattern (division not possible)\n";
25339
if ( $divide_expected >= 0 ) {
25341
$msg .= "division (pattern not possible)\n";
25346
"pattern (uncertain, but division would not work here)\n";
25351
return ( $is_pattern, $msg );
25354
# try to resolve here-doc vs. shift by looking ahead for
25355
# non-code or the end token (currently only looks for end token)
25356
# returns 1 if it is probably a here doc, 0 if not
25357
sub guess_if_here_doc {
25359
# This is how many lines we will search for a target as part of the
25360
# guessing strategy. It is a constant because there is probably
25361
# little reason to change it.
25362
# USES GLOBAL VARIABLES: $tokenizer_self, $current_package
25364
use constant HERE_DOC_WINDOW => 40;
25366
my $next_token = shift;
25367
my $here_doc_expected = 0;
25370
my $msg = "checking <<";
25372
while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )
25376
if ( $line =~ /^$next_token$/ ) {
25377
$msg .= " -- found target $next_token ahead $k lines\n";
25378
$here_doc_expected = 1; # got it
25381
last if ( $k >= HERE_DOC_WINDOW );
25384
unless ($here_doc_expected) {
25386
if ( !defined($line) ) {
25387
$here_doc_expected = -1; # hit eof without seeing target
25388
$msg .= " -- must be shift; target $next_token not in file\n";
25391
else { # still unsure..taking a wild guess
25393
if ( !$is_constant{$current_package}{$next_token} ) {
25394
$here_doc_expected = 1;
25396
" -- guessing it's a here-doc ($next_token not a constant)\n";
25400
" -- guessing it's a shift ($next_token is a constant)\n";
25404
write_logfile_entry($msg);
25405
return $here_doc_expected;
25408
#########i#############################################################
25409
# Tokenizer Routines for scanning identifiers and related items
25410
#######################################################################
25412
sub scan_bare_identifier_do {
25414
# this routine is called to scan a token starting with an alphanumeric
25415
# variable or package separator, :: or '.
25416
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,
25417
# $last_nonblank_type,@paren_type, $paren_depth
25419
my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,
25423
my $package = undef;
25427
# we have to back up one pretoken at a :: since each : is one pretoken
25428
if ( $tok eq '::' ) { $i_beg-- }
25429
if ( $tok eq '->' ) { $i_beg-- }
25430
my $pos_beg = $$rtoken_map[$i_beg];
25431
pos($input_line) = $pos_beg;
25438
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {
25440
my $pos = pos($input_line);
25441
my $numc = $pos - $pos_beg;
25442
$tok = substr( $input_line, $pos_beg, $numc );
25444
# type 'w' includes anything without leading type info
25445
# ($,%,@,*) including something like abc::def::ghi
25449
if ( defined($2) ) { $sub_name = $2; }
25450
if ( defined($1) ) {
25453
# patch: don't allow isolated package name which just ends
25454
# in the old style package separator (single quote). Example:
25456
if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {
25460
$package =~ s/\'/::/g;
25461
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25462
$package =~ s/::$//;
25465
$package = $current_package;
25467
if ( $is_keyword{$tok} ) {
25472
# if it is a bareword..
25473
if ( $type eq 'w' ) {
25475
# check for v-string with leading 'v' type character
25476
# (This seems to have presidence over filehandle, type 'Y')
25477
if ( $tok =~ /^v\d[_\d]*$/ ) {
25479
# we only have the first part - something like 'v101' -
25481
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
25482
$pos = pos($input_line);
25483
$numc = $pos - $pos_beg;
25484
$tok = substr( $input_line, $pos_beg, $numc );
25488
# warn if this version can't handle v-strings
25489
report_v_string($tok);
25492
elsif ( $is_constant{$package}{$sub_name} ) {
25496
# bareword after sort has implied empty prototype; for example:
25497
# @sorted = sort numerically ( 53, 29, 11, 32, 7 );
25498
# This has priority over whatever the user has specified.
25499
elsif ($last_nonblank_token eq 'sort'
25500
&& $last_nonblank_type eq 'k' )
25505
# Note: strangely, perl does not seem to really let you create
25506
# functions which act like eval and do, in the sense that eval
25507
# and do may have operators following the final }, but any operators
25508
# that you create with prototype (&) apparently do not allow
25509
# trailing operators, only terms. This seems strange.
25510
# If this ever changes, here is the update
25511
# to make perltidy behave accordingly:
25513
# elsif ( $is_block_function{$package}{$tok} ) {
25514
# $tok='eval'; # patch to do braces like eval - doesn't work
25517
# FIXME: This could become a separate type to allow for different
25519
elsif ( $is_block_function{$package}{$sub_name} ) {
25523
elsif ( $is_block_list_function{$package}{$sub_name} ) {
25526
elsif ( $is_user_function{$package}{$sub_name} ) {
25528
$prototype = $user_function_prototype{$package}{$sub_name};
25531
# check for indirect object
25534
# added 2001-03-27: must not be followed immediately by '('
25536
( $input_line !~ m/\G\(/gc )
25541
# preceded by keyword like 'print', 'printf' and friends
25542
$is_indirect_object_taker{$last_nonblank_token}
25544
# or preceded by something like 'print(' or 'printf('
25546
( $last_nonblank_token eq '(' )
25547
&& $is_indirect_object_taker{ $paren_type[$paren_depth]
25555
# may not be indirect object unless followed by a space
25556
if ( $input_line =~ m/\G\s+/gc ) {
25560
# Perl's indirect object notation is a very bad
25561
# thing and can cause subtle bugs, especially for
25562
# beginning programmers. And I haven't even been
25563
# able to figure out a sane warning scheme which
25564
# doesn't get in the way of good scripts.
25566
# Complain if a filehandle has any lower case
25567
# letters. This is suggested good practice.
25568
# Use 'sub_name' because something like
25569
# main::MYHANDLE is ok for filehandle
25570
if ( $sub_name =~ /[a-z]/ ) {
25572
# could be bug caused by older perltidy if
25574
if ( $input_line =~ m/\G\s*\(/gc ) {
25576
"Caution: unknown word '$tok' in indirect object slot\n"
25582
# bareword not followed by a space -- may not be filehandle
25583
# (may be function call defined in a 'use' statement)
25590
# Now we must convert back from character position
25591
# to pre_token index.
25592
# I don't think an error flag can occur here ..but who knows
25595
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25597
warning("scan_bare_identifier: Possibly invalid tokenization\n");
25601
# no match but line not blank - could be syntax error
25602
# perl will take '::' alone without complaint
25606
# change this warning to log message if it becomes annoying
25607
warning("didn't find identifier after leading ::\n");
25609
return ( $i, $tok, $type, $prototype );
25614
# This is the new scanner and will eventually replace scan_identifier.
25615
# Only type 'sub' and 'package' are implemented.
25616
# Token types $ * % @ & -> are not yet implemented.
25618
# Scan identifier following a type token.
25619
# The type of call depends on $id_scan_state: $id_scan_state = ''
25620
# for starting call, in which case $tok must be the token defining
25623
# If the type token is the last nonblank token on the line, a value
25624
# of $id_scan_state = $tok is returned, indicating that further
25625
# calls must be made to get the identifier. If the type token is
25626
# not the last nonblank token on the line, the identifier is
25627
# scanned and handled and a value of '' is returned.
25628
# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,
25629
# $statement_type, $tokenizer_self
25631
my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,
25635
my ( $i_beg, $pos_beg );
25637
#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
25638
#my ($a,$b,$c) = caller;
25639
#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";
25641
# on re-entry, start scanning at first token on the line
25642
if ($id_scan_state) {
25647
# on initial entry, start scanning just after type token
25650
$id_scan_state = $tok;
25654
# find $i_beg = index of next nonblank token,
25655
# and handle empty lines
25656
my $blank_line = 0;
25657
my $next_nonblank_token = $$rtokens[$i_beg];
25658
if ( $i_beg > $max_token_index ) {
25663
# only a '#' immediately after a '$' is not a comment
25664
if ( $next_nonblank_token eq '#' ) {
25665
unless ( $tok eq '$' ) {
25670
if ( $next_nonblank_token =~ /^\s/ ) {
25671
( $next_nonblank_token, $i_beg ) =
25672
find_next_nonblank_token_on_this_line( $i_beg, $rtokens,
25673
$max_token_index );
25674
if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {
25680
# handle non-blank line; identifier, if any, must follow
25681
unless ($blank_line) {
25683
if ( $id_scan_state eq 'sub' ) {
25684
( $i, $tok, $type, $id_scan_state ) = do_scan_sub(
25685
$input_line, $i, $i_beg,
25686
$tok, $type, $rtokens,
25687
$rtoken_map, $id_scan_state, $max_token_index
25691
elsif ( $id_scan_state eq 'package' ) {
25692
( $i, $tok, $type ) =
25693
do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,
25694
$rtoken_map, $max_token_index );
25695
$id_scan_state = '';
25699
warning("invalid token in scan_id: $tok\n");
25700
$id_scan_state = '';
25704
if ( $id_scan_state && ( !defined($type) || !$type ) ) {
25706
# shouldn't happen:
25708
"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"
25710
report_definite_bug();
25713
TOKENIZER_DEBUG_FLAG_NSCAN && do {
25715
"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";
25717
return ( $i, $tok, $type, $id_scan_state );
25720
sub check_prototype {
25721
my ( $proto, $package, $subname ) = @_;
25722
return unless ( defined($package) && defined($subname) );
25723
if ( defined($proto) ) {
25724
$proto =~ s/^\s*\(\s*//;
25725
$proto =~ s/\s*\)$//;
25727
$is_user_function{$package}{$subname} = 1;
25728
$user_function_prototype{$package}{$subname} = "($proto)";
25730
# prototypes containing '&' must be treated specially..
25731
if ( $proto =~ /\&/ ) {
25733
# right curly braces of prototypes ending in
25734
# '&' may be followed by an operator
25735
if ( $proto =~ /\&$/ ) {
25736
$is_block_function{$package}{$subname} = 1;
25739
# right curly braces of prototypes NOT ending in
25740
# '&' may NOT be followed by an operator
25741
elsif ( $proto !~ /\&$/ ) {
25742
$is_block_list_function{$package}{$subname} = 1;
25747
$is_constant{$package}{$subname} = 1;
25751
$is_user_function{$package}{$subname} = 1;
25755
sub do_scan_package {
25757
# do_scan_package parses a package name
25758
# it is called with $i_beg equal to the index of the first nonblank
25759
# token following a 'package' token.
25760
# USES GLOBAL VARIABLES: $current_package,
25762
my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,
25765
my $package = undef;
25766
my $pos_beg = $$rtoken_map[$i_beg];
25767
pos($input_line) = $pos_beg;
25769
# handle non-blank line; package name, if any, must follow
25770
if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {
25772
$package = ( defined($1) && $1 ) ? $1 : 'main';
25773
$package =~ s/\'/::/g;
25774
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
25775
$package =~ s/::$//;
25776
my $pos = pos($input_line);
25777
my $numc = $pos - $pos_beg;
25778
$tok = 'package ' . substr( $input_line, $pos_beg, $numc );
25781
# Now we must convert back from character position
25782
# to pre_token index.
25783
# I don't think an error flag can occur here ..but ?
25786
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
25787
if ($error) { warning("Possibly invalid package\n") }
25788
$current_package = $package;
25791
my ( $next_nonblank_token, $i_next ) =
25792
find_next_nonblank_token( $i, $rtokens, $max_token_index );
25793
if ( $next_nonblank_token !~ /^[;\}]$/ ) {
25795
"Unexpected '$next_nonblank_token' after package name '$tok'\n"
25800
# no match but line not blank --
25801
# could be a label with name package, like package: , for example.
25806
return ( $i, $tok, $type );
25809
sub scan_identifier_do {
25811
# This routine assembles tokens into identifiers. It maintains a
25812
# scan state, id_scan_state. It updates id_scan_state based upon
25813
# current id_scan_state and token, and returns an updated
25814
# id_scan_state and the next index after the identifier.
25815
# USES GLOBAL VARIABLES: $context, $last_nonblank_token,
25816
# $last_nonblank_type
25818
my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,
25823
my $tok_begin = $$rtokens[$i_begin];
25824
if ( $tok_begin eq ':' ) { $tok_begin = '::' }
25825
my $id_scan_state_begin = $id_scan_state;
25826
my $identifier_begin = $identifier;
25827
my $tok = $tok_begin;
25830
# these flags will be used to help figure out the type:
25831
my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );
25834
# allow old package separator (') except in 'use' statement
25835
my $allow_tick = ( $last_nonblank_token ne 'use' );
25837
# get started by defining a type and a state if necessary
25838
unless ($id_scan_state) {
25839
$context = UNKNOWN_CONTEXT;
25841
# fixup for digraph
25842
if ( $tok eq '>' ) {
25846
$identifier = $tok;
25848
if ( $tok eq '$' || $tok eq '*' ) {
25849
$id_scan_state = '$';
25850
$context = SCALAR_CONTEXT;
25852
elsif ( $tok eq '%' || $tok eq '@' ) {
25853
$id_scan_state = '$';
25854
$context = LIST_CONTEXT;
25856
elsif ( $tok eq '&' ) {
25857
$id_scan_state = '&';
25859
elsif ( $tok eq 'sub' or $tok eq 'package' ) {
25860
$saw_alpha = 0; # 'sub' is considered type info here
25861
$id_scan_state = '$';
25862
$identifier .= ' '; # need a space to separate sub from sub name
25864
elsif ( $tok eq '::' ) {
25865
$id_scan_state = 'A';
25867
elsif ( $tok =~ /^[A-Za-z_]/ ) {
25868
$id_scan_state = ':';
25870
elsif ( $tok eq '->' ) {
25871
$id_scan_state = '$';
25876
my ( $a, $b, $c ) = caller;
25877
warning("Program Bug: scan_identifier given bad token = $tok \n");
25878
warning(" called from sub $a line: $c\n");
25879
report_definite_bug();
25881
$saw_type = !$saw_alpha;
25885
$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );
25888
# now loop to gather the identifier
25891
while ( $i < $max_token_index ) {
25892
$i_save = $i unless ( $tok =~ /^\s*$/ );
25893
$tok = $$rtokens[ ++$i ];
25895
if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {
25900
if ( $id_scan_state eq '$' ) { # starting variable name
25902
if ( $tok eq '$' ) {
25904
$identifier .= $tok;
25906
# we've got a punctuation variable if end of line (punct.t)
25907
if ( $i == $max_token_index ) {
25909
$id_scan_state = '';
25913
elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..
25915
$id_scan_state = ':'; # now need ::
25916
$identifier .= $tok;
25918
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
25920
$id_scan_state = ':'; # now need ::
25921
$identifier .= $tok;
25923
# Perl will accept leading digits in identifiers,
25924
# although they may not always produce useful results.
25925
# Something like $main::0 is ok. But this also works:
25927
# sub howdy::123::bubba{ print "bubba $54321!\n" }
25928
# howdy::123::bubba();
25931
elsif ( $tok =~ /^[0-9]/ ) { # numeric
25933
$id_scan_state = ':'; # now need ::
25934
$identifier .= $tok;
25936
elsif ( $tok eq '::' ) {
25937
$id_scan_state = 'A';
25938
$identifier .= $tok;
25940
elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array
25941
$identifier .= $tok; # keep same state, a $ could follow
25943
elsif ( $tok eq '{' ) {
25945
# check for something like ${#} or ${�}
25946
if ( $identifier eq '$'
25947
&& $i + 2 <= $max_token_index
25948
&& $$rtokens[ $i + 2 ] eq '}'
25949
&& $$rtokens[ $i + 1 ] !~ /[\s\w]/ )
25951
my $next2 = $$rtokens[ $i + 2 ];
25952
my $next1 = $$rtokens[ $i + 1 ];
25953
$identifier .= $tok . $next1 . $next2;
25955
$id_scan_state = '';
25959
# skip something like ${xxx} or ->{
25960
$id_scan_state = '';
25962
# if this is the first token of a line, any tokens for this
25963
# identifier have already been accumulated
25964
if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }
25969
# space ok after leading $ % * & @
25970
elsif ( $tok =~ /^\s*$/ ) {
25972
if ( $identifier =~ /^[\$\%\*\&\@]/ ) {
25974
if ( length($identifier) > 1 ) {
25975
$id_scan_state = '';
25977
$type = 'i'; # probably punctuation variable
25982
# spaces after $'s are common, and space after @
25983
# is harmless, so only complain about space
25984
# after other type characters. Space after $ and
25985
# @ will be removed in formatting. Report space
25986
# after % and * because they might indicate a
25987
# parsing error. In other words '% ' might be a
25988
# modulo operator. Delete this warning if it
25990
if ( $identifier !~ /^[\@\$]$/ ) {
25992
"Space in identifier, following $identifier\n";
25998
# space after '->' is ok
26000
elsif ( $tok eq '^' ) {
26002
# check for some special variables like $^W
26003
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26004
$identifier .= $tok;
26005
$id_scan_state = 'A';
26007
# Perl accepts '$^]' or '@^]', but
26008
# there must not be a space before the ']'.
26009
my $next1 = $$rtokens[ $i + 1 ];
26010
if ( $next1 eq ']' ) {
26012
$identifier .= $next1;
26013
$id_scan_state = "";
26018
$id_scan_state = '';
26021
else { # something else
26023
# check for various punctuation variables
26024
if ( $identifier =~ /^[\$\*\@\%]$/ ) {
26025
$identifier .= $tok;
26028
elsif ( $identifier eq '$#' ) {
26030
if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }
26032
# perl seems to allow just these: $#: $#- $#+
26033
elsif ( $tok =~ /^[\:\-\+]$/ ) {
26035
$identifier .= $tok;
26039
write_logfile_entry( 'Use of $# is deprecated' . "\n" );
26042
elsif ( $identifier eq '$$' ) {
26044
# perl does not allow references to punctuation
26045
# variables without braces. For example, this
26049
# You would have to use
26053
if ( $tok eq '{' ) { $type = 't' }
26054
else { $type = 'i' }
26056
elsif ( $identifier eq '->' ) {
26061
if ( length($identifier) == 1 ) { $identifier = ''; }
26063
$id_scan_state = '';
26067
elsif ( $id_scan_state eq '&' ) { # starting sub call?
26069
if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..
26070
$id_scan_state = ':'; # now need ::
26072
$identifier .= $tok;
26074
elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..
26075
$id_scan_state = ':'; # now need ::
26077
$identifier .= $tok;
26079
elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
26080
$id_scan_state = ':'; # now need ::
26082
$identifier .= $tok;
26084
elsif ( $tok =~ /^\s*$/ ) { # allow space
26086
elsif ( $tok eq '::' ) { # leading ::
26087
$id_scan_state = 'A'; # accept alpha next
26088
$identifier .= $tok;
26090
elsif ( $tok eq '{' ) {
26091
if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }
26093
$id_scan_state = '';
26098
# punctuation variable?
26099
# testfile: cunningham4.pl
26101
# We have to be careful here. If we are in an unknown state,
26102
# we will reject the punctuation variable. In the following
26103
# example the '&' is a binary opeator but we are in an unknown
26104
# state because there is no sigil on 'Prima', so we don't
26105
# know what it is. But it is a bad guess that
26106
# '&~' is a punction variable.
26107
# $self->{text}->{colorMap}->[
26108
# Prima::PodView::COLOR_CODE_FOREGROUND
26109
# & ~tb::COLOR_INDEX ] =
26110
# $sec->{ColorCode}
26111
if ( $identifier eq '&' && $expecting ) {
26112
$identifier .= $tok;
26119
$id_scan_state = '';
26123
elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)
26125
if ( $tok =~ /^[A-Za-z_]/ ) { # found it
26126
$identifier .= $tok;
26127
$id_scan_state = ':'; # now need ::
26130
elsif ( $tok eq "'" && $allow_tick ) {
26131
$identifier .= $tok;
26132
$id_scan_state = ':'; # now need ::
26135
elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
26136
$identifier .= $tok;
26137
$id_scan_state = ':'; # now need ::
26140
elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26141
$id_scan_state = '(';
26142
$identifier .= $tok;
26144
elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26145
$id_scan_state = ')';
26146
$identifier .= $tok;
26149
$id_scan_state = '';
26154
elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha
26156
if ( $tok eq '::' ) { # got it
26157
$identifier .= $tok;
26158
$id_scan_state = 'A'; # now require alpha
26160
elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here
26161
$identifier .= $tok;
26162
$id_scan_state = ':'; # now need ::
26165
elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above
26166
$identifier .= $tok;
26167
$id_scan_state = ':'; # now need ::
26170
elsif ( $tok eq "'" && $allow_tick ) { # tick
26172
if ( $is_keyword{$identifier} ) {
26173
$id_scan_state = ''; # that's all
26177
$identifier .= $tok;
26180
elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {
26181
$id_scan_state = '(';
26182
$identifier .= $tok;
26184
elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {
26185
$id_scan_state = ')';
26186
$identifier .= $tok;
26189
$id_scan_state = ''; # that's all
26194
elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype
26196
if ( $tok eq '(' ) { # got it
26197
$identifier .= $tok;
26198
$id_scan_state = ')'; # now find the end of it
26200
elsif ( $tok =~ /^\s*$/ ) { # blank - keep going
26201
$identifier .= $tok;
26204
$id_scan_state = ''; # that's all - no prototype
26209
elsif ( $id_scan_state eq ')' ) { # looking for ) to end
26211
if ( $tok eq ')' ) { # got it
26212
$identifier .= $tok;
26213
$id_scan_state = ''; # all done
26216
elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {
26217
$identifier .= $tok;
26219
else { # probable error in script, but keep going
26220
warning("Unexpected '$tok' while seeking end of prototype\n");
26221
$identifier .= $tok;
26224
else { # can get here due to error in initialization
26225
$id_scan_state = '';
26231
if ( $id_scan_state eq ')' ) {
26232
warning("Hit end of line while seeking ) to end prototype\n");
26235
# once we enter the actual identifier, it may not extend beyond
26236
# the end of the current line
26237
if ( $id_scan_state =~ /^[A\:\(\)]/ ) {
26238
$id_scan_state = '';
26240
if ( $i < 0 ) { $i = 0 }
26247
if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {
26250
else { $type = 'i' }
26252
elsif ( $identifier eq '->' ) {
26256
( length($identifier) > 1 )
26258
# In something like '@$=' we have an identifier '@$'
26259
# In something like '$${' we have type '$$' (and only
26260
# part of an identifier)
26261
&& !( $identifier =~ /\$$/ && $tok eq '{' )
26262
&& ( $identifier !~ /^(sub |package )$/ )
26267
else { $type = 't' }
26269
elsif ($saw_alpha) {
26271
# type 'w' includes anything without leading type info
26272
# ($,%,@,*) including something like abc::def::ghi
26277
} # this can happen on a restart
26281
$tok = $identifier;
26282
if ($message) { write_logfile_entry($message) }
26289
TOKENIZER_DEBUG_FLAG_SCAN_ID && do {
26290
my ( $a, $b, $c ) = caller;
26292
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";
26294
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";
26296
return ( $i, $tok, $type, $id_scan_state, $identifier );
26301
# saved package and subnames in case prototype is on separate line
26302
my ( $package_saved, $subname_saved );
26306
# do_scan_sub parses a sub name and prototype
26307
# it is called with $i_beg equal to the index of the first nonblank
26308
# token following a 'sub' token.
26310
# TODO: add future error checks to be sure we have a valid
26311
# sub name. For example, 'sub &doit' is wrong. Also, be sure
26312
# a name is given if and only if a non-anonymous sub is
26314
# USES GLOBAL VARS: $current_package, $last_nonblank_token,
26315
# $in_attribute_list, %saw_function_definition,
26319
$input_line, $i, $i_beg,
26320
$tok, $type, $rtokens,
26321
$rtoken_map, $id_scan_state, $max_token_index
26323
$id_scan_state = ""; # normally we get everything in one call
26324
my $subname = undef;
26325
my $package = undef;
26330
my $pos_beg = $$rtoken_map[$i_beg];
26331
pos($input_line) = $pos_beg;
26333
# sub NAME PROTO ATTRS
26335
$input_line =~ m/\G\s*
26336
((?:\w*(?:'|::))*) # package - something that ends in :: or '
26337
(\w+) # NAME - required
26338
(\s*\([^){]*\))? # PROTO - something in parens
26339
(\s*:)? # ATTRS - leading : of attribute list
26348
$package = ( defined($1) && $1 ) ? $1 : $current_package;
26349
$package =~ s/\'/::/g;
26350
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
26351
$package =~ s/::$//;
26352
my $pos = pos($input_line);
26353
my $numc = $pos - $pos_beg;
26354
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
26358
# Look for prototype/attributes not preceded on this line by subname;
26359
# This might be an anonymous sub with attributes,
26360
# or a prototype on a separate line from its sub name
26362
$input_line =~ m/\G(\s*\([^){]*\))? # PROTO
26363
(\s*:)? # ATTRS leading ':'
26372
# Handle prototype on separate line from subname
26373
if ($subname_saved) {
26374
$package = $package_saved;
26375
$subname = $subname_saved;
26376
$tok = $last_nonblank_token;
26383
# ATTRS: if there are attributes, back up and let the ':' be
26384
# found later by the scanner.
26385
my $pos = pos($input_line);
26387
$pos -= length($attrs);
26390
my $next_nonblank_token = $tok;
26392
# catch case of line with leading ATTR ':' after anonymous sub
26393
if ( $pos == $pos_beg && $tok eq ':' ) {
26395
$in_attribute_list = 1;
26398
# We must convert back from character position
26399
# to pre_token index.
26402
# I don't think an error flag can occur here ..but ?
26404
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
26405
$max_token_index );
26406
if ($error) { warning("Possibly invalid sub\n") }
26408
# check for multiple definitions of a sub
26409
( $next_nonblank_token, my $i_next ) =
26410
find_next_nonblank_token_on_this_line( $i, $rtokens,
26411
$max_token_index );
26414
if ( $next_nonblank_token =~ /^(\s*|#)$/ )
26415
{ # skip blank or side comment
26416
my ( $rpre_tokens, $rpre_types ) =
26417
peek_ahead_for_n_nonblank_pre_tokens(1);
26418
if ( defined($rpre_tokens) && @$rpre_tokens ) {
26419
$next_nonblank_token = $rpre_tokens->[0];
26422
$next_nonblank_token = '}';
26425
$package_saved = "";
26426
$subname_saved = "";
26427
if ( $next_nonblank_token eq '{' ) {
26430
# Check for multiple definitions of a sub, but
26431
# it is ok to have multiple sub BEGIN, etc,
26432
# so we do not complain if name is all caps
26433
if ( $saw_function_definition{$package}{$subname}
26434
&& $subname !~ /^[A-Z]+$/ )
26436
my $lno = $saw_function_definition{$package}{$subname};
26438
"already saw definition of 'sub $subname' in package '$package' at line $lno\n"
26441
$saw_function_definition{$package}{$subname} =
26442
$tokenizer_self->{_last_line_number};
26445
elsif ( $next_nonblank_token eq ';' ) {
26447
elsif ( $next_nonblank_token eq '}' ) {
26450
# ATTRS - if an attribute list follows, remember the name
26451
# of the sub so the next opening brace can be labeled.
26452
# Setting 'statement_type' causes any ':'s to introduce
26454
elsif ( $next_nonblank_token eq ':' ) {
26455
$statement_type = $tok;
26458
# see if PROTO follows on another line:
26459
elsif ( $next_nonblank_token eq '(' ) {
26460
if ( $attrs || $proto ) {
26462
"unexpected '(' after definition or declaration of sub '$subname'\n"
26466
$id_scan_state = 'sub'; # we must come back to get proto
26467
$statement_type = $tok;
26468
$package_saved = $package;
26469
$subname_saved = $subname;
26472
elsif ($next_nonblank_token) { # EOF technically ok
26474
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"
26477
check_prototype( $proto, $package, $subname );
26480
# no match but line not blank
26483
return ( $i, $tok, $type, $id_scan_state );
26487
#########i###############################################################
26488
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
26489
#########################################################################
26491
sub find_next_nonblank_token {
26492
my ( $i, $rtokens, $max_token_index ) = @_;
26494
if ( $i >= $max_token_index ) {
26495
if ( !peeked_ahead() ) {
26498
peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
26501
my $next_nonblank_token = $$rtokens[ ++$i ];
26503
if ( $next_nonblank_token =~ /^\s*$/ ) {
26504
$next_nonblank_token = $$rtokens[ ++$i ];
26506
return ( $next_nonblank_token, $i );
26509
sub numerator_expected {
26511
# this is a filter for a possible numerator, in support of guessing
26512
# for the / pattern delimiter token.
26517
# Note: I am using the convention that variables ending in
26518
# _expected have these 3 possible values.
26519
my ( $i, $rtokens, $max_token_index ) = @_;
26520
my $next_token = $$rtokens[ $i + 1 ];
26521
if ( $next_token eq '=' ) { $i++; } # handle /=
26522
my ( $next_nonblank_token, $i_next ) =
26523
find_next_nonblank_token( $i, $rtokens, $max_token_index );
26525
if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
26530
if ( $next_nonblank_token =~ /^\s*$/ ) {
26539
sub pattern_expected {
26541
# This is the start of a filter for a possible pattern.
26542
# It looks at the token after a possbible pattern and tries to
26543
# determine if that token could end a pattern.
26548
my ( $i, $rtokens, $max_token_index ) = @_;
26549
my $next_token = $$rtokens[ $i + 1 ];
26550
if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier
26551
my ( $next_nonblank_token, $i_next ) =
26552
find_next_nonblank_token( $i, $rtokens, $max_token_index );
26554
# list of tokens which may follow a pattern
26555
# (can probably be expanded)
26556
if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )
26562
if ( $next_nonblank_token =~ /^\s*$/ ) {
26571
sub find_next_nonblank_token_on_this_line {
26572
my ( $i, $rtokens, $max_token_index ) = @_;
26573
my $next_nonblank_token;
26575
if ( $i < $max_token_index ) {
26576
$next_nonblank_token = $$rtokens[ ++$i ];
26578
if ( $next_nonblank_token =~ /^\s*$/ ) {
26580
if ( $i < $max_token_index ) {
26581
$next_nonblank_token = $$rtokens[ ++$i ];
26586
$next_nonblank_token = "";
26588
return ( $next_nonblank_token, $i );
26591
sub find_angle_operator_termination {
26593
# We are looking at a '<' and want to know if it is an angle operator.
26594
# We are to return:
26595
# $i = pretoken index of ending '>' if found, current $i otherwise
26596
# $type = 'Q' if found, '>' otherwise
26597
my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;
26600
pos($input_line) = 1 + $$rtoken_map[$i];
26604
# we just have to find the next '>' if a term is expected
26605
if ( $expecting == TERM ) { $filter = '[\>]' }
26607
# we have to guess if we don't know what is expected
26608
elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }
26610
# shouldn't happen - we shouldn't be here if operator is expected
26611
else { warning("Program Bug in find_angle_operator_termination\n") }
26613
# To illustrate what we might be looking at, in case we are
26614
# guessing, here are some examples of valid angle operators
26621
# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)
26622
# <${PREFIX}*img*.$IMAGE_TYPE>
26623
# <img*.$IMAGE_TYPE>
26624
# <Timg*.$IMAGE_TYPE>
26625
# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>
26627
# Here are some examples of lines which do not have angle operators:
26628
# return undef unless $self->[2]++ < $#{$self->[1]};
26631
# the following line from dlister.pl caused trouble:
26632
# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";
26634
# If the '<' starts an angle operator, it must end on this line and
26635
# it must not have certain characters like ';' and '=' in it. I use
26636
# this to limit the testing. This filter should be improved if
26639
if ( $input_line =~ /($filter)/g ) {
26643
# We MAY have found an angle operator termination if we get
26644
# here, but we need to do more to be sure we haven't been
26646
my $pos = pos($input_line);
26648
my $pos_beg = $$rtoken_map[$i];
26649
my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );
26651
# Reject if the closing '>' follows a '-' as in:
26652
# if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }
26653
if ( $expecting eq UNKNOWN ) {
26654
my $check = substr( $input_line, $pos - 2, 1 );
26655
if ( $check eq '-' ) {
26656
return ( $i, $type );
26660
######################################debug#####
26661
#write_diagnostics( "ANGLE? :$str\n");
26662
#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";
26663
######################################debug#####
26667
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26669
# It may be possible that a quote ends midway in a pretoken.
26670
# If this happens, it may be necessary to split the pretoken.
26673
"Possible tokinization error..please check this line\n");
26674
report_possible_bug();
26677
# Now let's see where we stand....
26678
# OK if math op not possible
26679
if ( $expecting == TERM ) {
26682
# OK if there are no more than 2 pre-tokens inside
26683
# (not possible to write 2 token math between < and >)
26684
# This catches most common cases
26685
elsif ( $i <= $i_beg + 3 ) {
26686
write_diagnostics("ANGLE(1 or 2 tokens): $str\n");
26692
# Let's try a Brace Test: any braces inside must balance
26694
while ( $str =~ /\{/g ) { $br++ }
26695
while ( $str =~ /\}/g ) { $br-- }
26697
while ( $str =~ /\[/g ) { $sb++ }
26698
while ( $str =~ /\]/g ) { $sb-- }
26700
while ( $str =~ /\(/g ) { $pr++ }
26701
while ( $str =~ /\)/g ) { $pr-- }
26703
# if braces do not balance - not angle operator
26704
if ( $br || $sb || $pr ) {
26708
"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");
26711
# we should keep doing more checks here...to be continued
26712
# Tentatively accepting this as a valid angle operator.
26713
# There are lots more things that can be checked.
26716
"ANGLE-Guessing yes: $str expecting=$expecting\n");
26717
write_logfile_entry("Guessing angle operator here: $str\n");
26722
# didn't find ending >
26724
if ( $expecting == TERM ) {
26725
warning("No ending > for angle operator\n");
26729
return ( $i, $type );
26732
sub scan_number_do {
26734
# scan a number in any of the formats that Perl accepts
26735
# Underbars (_) are allowed in decimal numbers.
26736
# input parameters -
26737
# $input_line - the string to scan
26738
# $i - pre_token index to start scanning
26739
# $rtoken_map - reference to the pre_token map giving starting
26740
# character position in $input_line of token $i
26741
# output parameters -
26742
# $i - last pre_token index of the number just scanned
26743
# number - the number (characters); or undef if not a number
26745
my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;
26746
my $pos_beg = $$rtoken_map[$i];
26749
my $number = undef;
26750
my $type = $input_type;
26752
my $first_char = substr( $input_line, $pos_beg, 1 );
26754
# Look for bad starting characters; Shouldn't happen..
26755
if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
26756
warning("Program bug - scan_number given character $first_char\n");
26757
report_definite_bug();
26758
return ( $i, $type, $number );
26761
# handle v-string without leading 'v' character ('Two Dot' rule)
26763
# TODO: v-strings may contain underscores
26764
pos($input_line) = $pos_beg;
26765
if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
26766
$pos = pos($input_line);
26767
my $numc = $pos - $pos_beg;
26768
$number = substr( $input_line, $pos_beg, $numc );
26770
report_v_string($number);
26773
# handle octal, hex, binary
26774
if ( !defined($number) ) {
26775
pos($input_line) = $pos_beg;
26776
if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )
26778
$pos = pos($input_line);
26779
my $numc = $pos - $pos_beg;
26780
$number = substr( $input_line, $pos_beg, $numc );
26786
if ( !defined($number) ) {
26787
pos($input_line) = $pos_beg;
26789
if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {
26790
$pos = pos($input_line);
26792
# watch out for things like 0..40 which would give 0. by this;
26793
if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )
26794
&& ( substr( $input_line, $pos, 1 ) eq '.' ) )
26798
my $numc = $pos - $pos_beg;
26799
$number = substr( $input_line, $pos_beg, $numc );
26804
# filter out non-numbers like e + - . e2 .e3 +e6
26805
# the rule: at least one digit, and any 'e' must be preceded by a digit
26807
$number !~ /\d/ # no digits
26808
|| ( $number =~ /^(.*)[eE]/
26809
&& $1 !~ /\d/ ) # or no digits before the 'e'
26813
$type = $input_type;
26814
return ( $i, $type, $number );
26817
# Found a number; now we must convert back from character position
26818
# to pre_token index. An error here implies user syntax error.
26819
# An example would be an invalid octal number like '009'.
26822
inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );
26823
if ($error) { warning("Possibly invalid number\n") }
26825
return ( $i, $type, $number );
26828
sub inverse_pretoken_map {
26830
# Starting with the current pre_token index $i, scan forward until
26831
# finding the index of the next pre_token whose position is $pos.
26832
my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;
26835
while ( ++$i <= $max_token_index ) {
26837
if ( $pos <= $$rtoken_map[$i] ) {
26839
# Let the calling routine handle errors in which we do not
26840
# land on a pre-token boundary. It can happen by running
26841
# perltidy on some non-perl scripts, for example.
26842
if ( $pos < $$rtoken_map[$i] ) { $error = 1 }
26847
return ( $i, $error );
26850
sub find_here_doc {
26852
# find the target of a here document, if any
26853
# input parameters:
26854
# $i - token index of the second < of <<
26855
# ($i must be less than the last token index if this is called)
26856
# output parameters:
26857
# $found_target = 0 didn't find target; =1 found target
26858
# HERE_TARGET - the target string (may be empty string)
26859
# $i - unchanged if not here doc,
26860
# or index of the last token of the here target
26861
# $saw_error - flag noting unbalanced quote on here target
26862
my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
26864
my $found_target = 0;
26865
my $here_doc_target = '';
26866
my $here_quote_character = '';
26868
my ( $next_nonblank_token, $i_next_nonblank, $next_token );
26869
$next_token = $$rtokens[ $i + 1 ];
26871
# perl allows a backslash before the target string (heredoc.t)
26873
if ( $next_token eq '\\' ) {
26875
$next_token = $$rtokens[ $i + 2 ];
26878
( $next_nonblank_token, $i_next_nonblank ) =
26879
find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );
26881
if ( $next_nonblank_token =~ /[\'\"\`]/ ) {
26884
my $quote_depth = 0;
26889
$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,
26892
= follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,
26893
$here_quote_character, $quote_pos, $quote_depth, $max_token_index );
26895
if ($in_quote) { # didn't find end of quote, so no target found
26897
if ( $expecting == TERM ) {
26899
"Did not find here-doc string terminator ($here_quote_character) before end of line \n"
26904
else { # found ending quote
26909
for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {
26910
$tokj = $$rtokens[$j];
26912
# we have to remove any backslash before the quote character
26913
# so that the here-doc-target exactly matches this string
26917
&& $$rtokens[ $j + 1 ] eq $here_quote_character );
26918
$here_doc_target .= $tokj;
26923
elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {
26925
write_logfile_entry(
26926
"found blank here-target after <<; suggest using \"\"\n");
26929
elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<
26931
my $here_doc_expected;
26932
if ( $expecting == UNKNOWN ) {
26933
$here_doc_expected = guess_if_here_doc($next_token);
26936
$here_doc_expected = 1;
26939
if ($here_doc_expected) {
26941
$here_doc_target = $next_token;
26948
if ( $expecting == TERM ) {
26950
write_logfile_entry("Note: bare here-doc operator <<\n");
26957
# patch to neglect any prepended backslash
26958
if ( $found_target && $backslash ) { $i++ }
26960
return ( $found_target, $here_doc_target, $here_quote_character, $i,
26966
# follow (or continue following) quoted string(s)
26967
# $in_quote return code:
26968
# 0 - ok, found end
26969
# 1 - still must find end of quote whose target is $quote_character
26970
# 2 - still looking for end of first of two quotes
26972
# Returns updated strings:
26973
# $quoted_string_1 = quoted string seen while in_quote=1
26974
# $quoted_string_2 = quoted string seen while in_quote=2
26976
$i, $in_quote, $quote_character,
26977
$quote_pos, $quote_depth, $quoted_string_1,
26978
$quoted_string_2, $rtokens, $rtoken_map,
26982
my $in_quote_starting = $in_quote;
26985
if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow
26988
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
26991
= follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,
26992
$quote_pos, $quote_depth, $max_token_index );
26993
$quoted_string_2 .= $quoted_string;
26994
if ( $in_quote == 1 ) {
26995
if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }
26996
$quote_character = '';
26999
$quoted_string_2 .= "\n";
27003
if ( $in_quote == 1 ) { # one (more) quote to follow
27006
$i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27009
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
27010
$quote_pos, $quote_depth, $max_token_index );
27011
$quoted_string_1 .= $quoted_string;
27012
if ( $in_quote == 1 ) {
27013
$quoted_string_1 .= "\n";
27016
return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
27017
$quoted_string_1, $quoted_string_2 );
27020
sub follow_quoted_string {
27022
# scan for a specific token, skipping escaped characters
27023
# if the quote character is blank, use the first non-blank character
27024
# input parameters:
27025
# $rtokens = reference to the array of tokens
27026
# $i = the token index of the first character to search
27027
# $in_quote = number of quoted strings being followed
27028
# $beginning_tok = the starting quote character
27029
# $quote_pos = index to check next for alphanumeric delimiter
27030
# output parameters:
27031
# $i = the token index of the ending quote character
27032
# $in_quote = decremented if found end, unchanged if not
27033
# $beginning_tok = the starting quote character
27034
# $quote_pos = index to check next for alphanumeric delimiter
27035
# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.
27036
# $quoted_string = the text of the quote (without quotation tokens)
27037
my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,
27040
my ( $tok, $end_tok );
27041
my $i = $i_beg - 1;
27042
my $quoted_string = "";
27044
TOKENIZER_DEBUG_FLAG_QUOTE && do {
27046
"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";
27049
# get the corresponding end token
27050
if ( $beginning_tok !~ /^\s*$/ ) {
27051
$end_tok = matching_end_token($beginning_tok);
27054
# a blank token means we must find and use the first non-blank one
27056
my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>
27058
while ( $i < $max_token_index ) {
27059
$tok = $$rtokens[ ++$i ];
27061
if ( $tok !~ /^\s*$/ ) {
27063
if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {
27064
$i = $max_token_index;
27068
if ( length($tok) > 1 ) {
27069
if ( $quote_pos <= 0 ) { $quote_pos = 1 }
27070
$beginning_tok = substr( $tok, $quote_pos - 1, 1 );
27073
$beginning_tok = $tok;
27076
$end_tok = matching_end_token($beginning_tok);
27082
$allow_quote_comments = 1;
27087
# There are two different loops which search for the ending quote
27088
# character. In the rare case of an alphanumeric quote delimiter, we
27089
# have to look through alphanumeric tokens character-by-character, since
27090
# the pre-tokenization process combines multiple alphanumeric
27091
# characters, whereas for a non-alphanumeric delimiter, only tokens of
27092
# length 1 can match.
27094
###################################################################
27095
# Case 1 (rare): loop for case of alphanumeric quote delimiter..
27096
# "quote_pos" is the position the current word to begin searching
27097
###################################################################
27098
if ( $beginning_tok =~ /\w/ ) {
27100
# Note this because it is not recommended practice except
27101
# for obfuscated perl contests
27102
if ( $in_quote == 1 ) {
27103
write_logfile_entry(
27104
"Note: alphanumeric quote delimiter ($beginning_tok) \n");
27107
while ( $i < $max_token_index ) {
27109
if ( $quote_pos == 0 || ( $i < 0 ) ) {
27110
$tok = $$rtokens[ ++$i ];
27112
if ( $tok eq '\\' ) {
27114
# retain backslash unless it hides the end token
27115
$quoted_string .= $tok
27116
unless $$rtokens[ $i + 1 ] eq $end_tok;
27118
last if ( $i >= $max_token_index );
27119
$tok = $$rtokens[ ++$i ];
27122
my $old_pos = $quote_pos;
27124
unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )
27128
$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );
27130
if ( $quote_pos > 0 ) {
27133
substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );
27137
if ( $quote_depth == 0 ) {
27143
$quoted_string .= substr( $tok, $old_pos );
27148
########################################################################
27149
# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..
27150
########################################################################
27153
while ( $i < $max_token_index ) {
27154
$tok = $$rtokens[ ++$i ];
27156
if ( $tok eq $end_tok ) {
27159
if ( $quote_depth == 0 ) {
27164
elsif ( $tok eq $beginning_tok ) {
27167
elsif ( $tok eq '\\' ) {
27169
# retain backslash unless it hides the beginning or end token
27170
$tok = $$rtokens[ ++$i ];
27171
$quoted_string .= '\\'
27172
unless ( $tok eq $end_tok || $tok eq $beginning_tok );
27174
$quoted_string .= $tok;
27177
if ( $i > $max_token_index ) { $i = $max_token_index }
27178
return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,
27182
sub indicate_error {
27183
my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;
27184
interrupt_logfile();
27186
write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );
27190
sub write_error_indicator_pair {
27191
my ( $line_number, $input_line, $pos, $carrat ) = @_;
27192
my ( $offset, $numbered_line, $underline ) =
27193
make_numbered_line( $line_number, $input_line, $pos );
27194
$underline = write_on_underline( $underline, $pos - $offset, $carrat );
27195
warning( $numbered_line . "\n" );
27196
$underline =~ s/\s*$//;
27197
warning( $underline . "\n" );
27200
sub make_numbered_line {
27202
# Given an input line, its line number, and a character position of
27203
# interest, create a string not longer than 80 characters of the form
27204
# $lineno: sub_string
27205
# such that the sub_string of $str contains the position of interest
27207
# Here is an example of what we want, in this case we add trailing
27208
# '...' because the line is long.
27210
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27212
# Here is another example, this time in which we used leading '...'
27213
# because of excessive length:
27215
# 2: ... er of the World Wide Web Consortium's
27217
# input parameters are:
27218
# $lineno = line number
27219
# $str = the text of the line
27220
# $pos = position of interest (the error) : 0 = first character
27223
# - $offset = an offset which corrects the position in case we only
27224
# display part of a line, such that $pos-$offset is the effective
27225
# position from the start of the displayed line.
27226
# - $numbered_line = the numbered line as above,
27227
# - $underline = a blank 'underline' which is all spaces with the same
27228
# number of characters as the numbered line.
27230
my ( $lineno, $str, $pos ) = @_;
27231
my $offset = ( $pos < 60 ) ? 0 : $pos - 40;
27232
my $excess = length($str) - $offset - 68;
27233
my $numc = ( $excess > 0 ) ? 68 : undef;
27235
if ( defined($numc) ) {
27236
if ( $offset == 0 ) {
27237
$str = substr( $str, $offset, $numc - 4 ) . " ...";
27240
$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";
27245
if ( $offset == 0 ) {
27248
$str = "... " . substr( $str, $offset + 4 );
27252
my $numbered_line = sprintf( "%d: ", $lineno );
27253
$offset -= length($numbered_line);
27254
$numbered_line .= $str;
27255
my $underline = " " x length($numbered_line);
27256
return ( $offset, $numbered_line, $underline );
27259
sub write_on_underline {
27261
# The "underline" is a string that shows where an error is; it starts
27262
# out as a string of blanks with the same length as the numbered line of
27263
# code above it, and we have to add marking to show where an error is.
27264
# In the example below, we want to write the string '--^' just below
27265
# the line of bad code:
27267
# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...
27269
# We are given the current underline string, plus a position and a
27270
# string to write on it.
27272
# In the above example, there will be 2 calls to do this:
27273
# First call: $pos=19, pos_chr=^
27274
# Second call: $pos=16, pos_chr=---
27276
# This is a trivial thing to do with substr, but there is some
27279
my ( $underline, $pos, $pos_chr ) = @_;
27281
# check for error..shouldn't happen
27282
unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {
27285
my $excess = length($pos_chr) + $pos - length($underline);
27286
if ( $excess > 0 ) {
27287
$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );
27289
substr( $underline, $pos, length($pos_chr) ) = $pos_chr;
27290
return ($underline);
27295
# Break a string, $str, into a sequence of preliminary tokens. We
27296
# are interested in these types of tokens:
27297
# words (type='w'), example: 'max_tokens_wanted'
27298
# digits (type = 'd'), example: '0755'
27299
# whitespace (type = 'b'), example: ' '
27300
# any other single character (i.e. punct; type = the character itself).
27301
# We cannot do better than this yet because we might be in a quoted
27302
# string or pattern. Caller sets $max_tokens_wanted to 0 to get all
27304
my ( $str, $max_tokens_wanted ) = @_;
27306
# we return references to these 3 arrays:
27307
my @tokens = (); # array of the tokens themselves
27308
my @token_map = (0); # string position of start of each token
27309
my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct
27314
if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }
27317
# note that this must come before words!
27318
elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }
27321
elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }
27323
# single-character punctuation
27324
elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }
27328
return ( \@tokens, \@token_map, \@type );
27332
push @token_map, pos($str);
27334
} while ( --$max_tokens_wanted != 0 );
27336
return ( \@tokens, \@token_map, \@type );
27341
# this is an old debug routine
27342
my ( $rtokens, $rtoken_map ) = @_;
27343
my $num = scalar(@$rtokens);
27346
for ( $i = 0 ; $i < $num ; $i++ ) {
27347
my $len = length( $$rtokens[$i] );
27348
print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";
27352
sub matching_end_token {
27354
# find closing character for a pattern
27355
my $beginning_token = shift;
27357
if ( $beginning_token eq '{' ) {
27360
elsif ( $beginning_token eq '[' ) {
27363
elsif ( $beginning_token eq '<' ) {
27366
elsif ( $beginning_token eq '(' ) {
27374
sub dump_token_types {
27378
# This should be the latest list of token types in use
27379
# adding NEW_TOKENS: add a comment here
27380
print $fh <<'END_OF_LIST';
27382
Here is a list of the token types currently used for lines of type 'CODE'.
27383
For the following tokens, the "type" of a token is just the token itself.
27385
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27386
( ) <= >= == =~ !~ != ++ -- /= x=
27387
... **= <<= >>= &&= ||= //= <=>
27388
, + - / * | % ! x ~ = \ ? : . < > ^ &
27390
The following additional token types are defined:
27393
b blank (white space)
27394
{ indent: opening structural curly brace or square bracket or paren
27395
(code block, anonymous hash reference, or anonymous array reference)
27396
} outdent: right structural curly brace or square bracket or paren
27397
[ left non-structural square bracket (enclosing an array index)
27398
] right non-structural square bracket
27399
( left non-structural paren (all but a list right of an =)
27400
) right non-structural parena
27401
L left non-structural curly brace (enclosing a key)
27402
R right non-structural curly brace
27403
; terminal semicolon
27404
f indicates a semicolon in a "for" statement
27405
h here_doc operator <<
27407
Q indicates a quote or pattern
27408
q indicates a qw quote block
27410
C user-defined constant or constant function (with void prototype = ())
27411
U user-defined function taking parameters
27412
G user-defined function taking block parameter (like grep/map/eval)
27413
M (unused, but reserved for subroutine definition name)
27414
P (unused, but -html uses it to label pod text)
27415
t type indicater such as %,$,@,*,&,sub
27416
w bare word (perhaps a subroutine call)
27417
i identifier of some type (with leading %, $, @, *, &, sub, -> )
27420
F a file test operator (like -e)
27422
Z identifier in indirect object slot: may be file handle, object
27423
J LABEL: code block label
27424
j LABEL after next, last, redo, goto
27427
pp pre-increment operator ++
27428
mm pre-decrement operator --
27429
A : used as attribute separator
27431
Here are the '_line_type' codes used internally:
27432
SYSTEM - system-specific code before hash-bang line
27433
CODE - line of perl code (including comments)
27434
POD_START - line starting pod, such as '=head'
27435
POD - pod documentation text
27436
POD_END - last line of pod section, '=cut'
27437
HERE - text of here-document
27438
HERE_END - last line of here-doc (target word)
27439
FORMAT - format section
27440
FORMAT_END - last line of format section, '.'
27441
DATA_START - __DATA__ line
27442
DATA - unidentified text following __DATA__
27443
END_START - __END__ line
27444
END - unidentified text following __END__
27445
ERROR - we are in big trouble, probably not a perl script
27451
# These names are used in error messages
27452
@opening_brace_names = qw# '{' '[' '(' '?' #;
27453
@closing_brace_names = qw# '}' ']' ')' ':' #;
27456
.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
27457
<= >= == =~ !~ != ++ -- /= x= ~~
27459
@is_digraph{@digraphs} = (1) x scalar(@digraphs);
27461
my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );
27462
@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);
27464
# make a hash of all valid token types for self-checking the tokenizer
27465
# (adding NEW_TOKENS : select a new character and add to this list)
27466
my @valid_token_types = qw#
27467
A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v
27468
{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &
27470
push( @valid_token_types, @digraphs );
27471
push( @valid_token_types, @trigraphs );
27472
push( @valid_token_types, '#' );
27473
push( @valid_token_types, ',' );
27474
@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);
27476
# a list of file test letters, as in -e (Table 3-4 of 'camel 3')
27477
my @file_test_operators =
27478
qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);
27479
@is_file_test_operator{@file_test_operators} =
27480
(1) x scalar(@file_test_operators);
27482
# these functions have prototypes of the form (&), so when they are
27483
# followed by a block, that block MAY BE followed by an operator.
27484
@_ = qw( do eval );
27485
@is_block_operator{@_} = (1) x scalar(@_);
27487
# these functions allow an identifier in the indirect object slot
27488
@_ = qw( print printf sort exec system say);
27489
@is_indirect_object_taker{@_} = (1) x scalar(@_);
27491
# These tokens may precede a code block
27492
# patched for SWITCH/CASE
27494
qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
27495
unless do while until eval for foreach map grep sort
27496
switch case given when);
27497
@is_code_block_token{@_} = (1) x scalar(@_);
27499
# I'll build the list of keywords incrementally
27502
# keywords and tokens after which a value or pattern is expected,
27503
# but not an operator. In other words, these should consume terms
27504
# to their right, or at least they are not expected to be followed
27505
# immediately by operators.
27506
my @value_requestor = qw(
27727
# patched above for SWITCH/CASE given/when err say
27728
# 'err' is a fairly safe addition.
27729
# TODO: 'default' still needed if appropriate
27730
# 'use feature' seen, but perltidy works ok without it.
27731
# Concerned that 'default' could break code.
27732
push( @Keywords, @value_requestor );
27734
# These are treated the same but are not keywords:
27739
push( @value_requestor, @extra_vr );
27741
@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);
27743
# this list contains keywords which do not look for arguments,
27744
# so that they might be followed by an operator, or at least
27746
my @operator_requestor = qw(
27770
push( @Keywords, @operator_requestor );
27772
# These are treated the same but are not considered keywords:
27779
push( @operator_requestor, @extra_or );
27781
@expecting_operator_token{@operator_requestor} =
27782
(1) x scalar(@operator_requestor);
27784
# these token TYPES expect trailing operator but not a term
27785
# note: ++ and -- are post-increment and decrement, 'C' = constant
27786
my @operator_requestor_types = qw( ++ -- C <> q );
27787
@expecting_operator_types{@operator_requestor_types} =
27788
(1) x scalar(@operator_requestor_types);
27790
# these token TYPES consume values (terms)
27791
# note: pp and mm are pre-increment and decrement
27792
# f=semicolon in for, F=file test operator
27793
my @value_requestor_type = qw#
27794
L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x
27795
**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=
27796
<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~
27797
f F pp mm Y p m U J G j >> << ^ t
27799
push( @value_requestor_type, ',' )
27800
; # (perl doesn't like a ',' in a qw block)
27801
@expecting_term_types{@value_requestor_type} =
27802
(1) x scalar(@value_requestor_type);
27804
# Note: the following valid token types are not assigned here to
27805
# hashes requesting to be followed by values or terms, but are
27806
# instead currently hard-coded into sub operator_expected:
27807
# ) -> :: Q R Z ] b h i k n v w } #
27809
# For simple syntax checking, it is nice to have a list of operators which
27810
# will really be unhappy if not followed by a term. This includes most
27812
%really_want_term = %expecting_term_types;
27814
# with these exceptions...
27815
delete $really_want_term{'U'}; # user sub, depends on prototype
27816
delete $really_want_term{'F'}; # file test works on $_ if no following term
27817
delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;
27820
@_ = qw(q qq qw qx qr s y tr m);
27821
@is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);
27823
# These keywords are handled specially in the tokenizer code:
27824
my @special_keywords = qw(
27840
push( @Keywords, @special_keywords );
27842
# Keywords after which list formatting may be used
27843
# WARNING: do not include |map|grep|eval or perl may die on
27844
# syntax errors (map1.t).
27845
my @keyword_taking_list = qw(
27917
@is_keyword_taking_list{@keyword_taking_list} =
27918
(1) x scalar(@keyword_taking_list);
27920
# These are not used in any way yet
27921
# my @unused_keywords = qw(
27928
# The list of keywords was extracted from function 'keyword' in
27929
# perl file toke.c version 5.005.03, using this utility, plus a
27930
# little editing: (file getkwd.pl):
27931
# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }
27932
# Add 'get' prefix where necessary, then split into the above lists.
27933
# This list should be updated as necessary.
27934
# The list should not contain these special variables:
27935
# ARGV DATA ENV SIG STDERR STDIN STDOUT
27938
@is_keyword{@Keywords} = (1) x scalar(@Keywords);
27945
Perl::Tidy - Parses and beautifies perl source
27951
Perl::Tidy::perltidy(
27953
destination => $destination,
27956
perltidyrc => $perltidyrc,
27957
logfile => $logfile,
27958
errorfile => $errorfile,
27959
formatter => $formatter, # callback object (see below)
27960
dump_options => $dump_options,
27961
dump_options_type => $dump_options_type,
27966
This module makes the functionality of the perltidy utility available to perl
27967
scripts. Any or all of the input parameters may be omitted, in which case the
27968
@ARGV array will be used to provide input parameters as described
27969
in the perltidy(1) man page.
27971
For example, the perltidy script is basically just this:
27974
Perl::Tidy::perltidy();
27976
The module accepts input and output streams by a variety of methods.
27977
The following list of parameters may be any of a the following: a
27978
filename, an ARRAY reference, a SCALAR reference, or an object with
27979
either a B<getline> or B<print> method, as appropriate.
27981
source - the source of the script to be formatted
27982
destination - the destination of the formatted output
27983
stderr - standard error output
27984
perltidyrc - the .perltidyrc file
27985
logfile - the .LOG file stream, if any
27986
errorfile - the .ERR file stream, if any
27987
dump_options - ref to a hash to receive parameters (see below),
27988
dump_options_type - controls contents of dump_options
27989
dump_getopt_flags - ref to a hash to receive Getopt flags
27990
dump_options_category - ref to a hash giving category of options
27991
dump_abbreviations - ref to a hash giving all abbreviations
27993
The following chart illustrates the logic used to decide how to
27996
ref($param) $param is assumed to be:
27997
----------- ---------------------
27999
SCALAR ref to string
28001
(other) object with getline (if source) or print method
28003
If the parameter is an object, and the object has a B<close> method, that
28004
close method will be called at the end of the stream.
28010
If the B<source> parameter is given, it defines the source of the
28015
If the B<destination> parameter is given, it will be used to define the
28016
file or memory location to receive output of perltidy.
28020
The B<stderr> parameter allows the calling program to capture the output
28021
to what would otherwise go to the standard error output device.
28025
If the B<perltidyrc> file is given, it will be used instead of any
28026
F<.perltidyrc> configuration file that would otherwise be used.
28030
If the B<argv> parameter is given, it will be used instead of the
28031
B<@ARGV> array. The B<argv> parameter may be a string, a reference to a
28032
string, or a reference to an array. If it is a string or reference to a
28033
string, it will be parsed into an array of items just as if it were a
28034
command line string.
28038
If the B<dump_options> parameter is given, it must be the reference to a hash.
28039
In this case, the parameters contained in any perltidyrc configuration file
28040
will be placed in this hash and perltidy will return immediately. This is
28041
equivalent to running perltidy with --dump-options, except that the perameters
28042
are returned in a hash rather than dumped to standard output. Also, by default
28043
only the parameters in the perltidyrc file are returned, but this can be
28044
changed (see the next parameter). This parameter provides a convenient method
28045
for external programs to read a perltidyrc file. An example program using
28046
this feature, F<perltidyrc_dump.pl>, is included in the distribution.
28048
Any combination of the B<dump_> parameters may be used together.
28050
=item dump_options_type
28052
This parameter is a string which can be used to control the parameters placed
28053
in the hash reference supplied by B<dump_options>. The possible values are
28054
'perltidyrc' (default) and 'full'. The 'full' parameter causes both the
28055
default options plus any options found in a perltidyrc file to be returned.
28057
=item dump_getopt_flags
28059
If the B<dump_getopt_flags> parameter is given, it must be the reference to a
28060
hash. This hash will receive all of the parameters that perltidy understands
28061
and flags that are passed to Getopt::Long. This parameter may be
28062
used alone or with the B<dump_options> flag. Perltidy will
28063
exit immediately after filling this hash. See the demo program
28064
F<perltidyrc_dump.pl> for example usage.
28066
=item dump_options_category
28068
If the B<dump_options_category> parameter is given, it must be the reference to a
28069
hash. This hash will receive a hash with keys equal to all long parameter names
28070
and values equal to the title of the corresponding section of the perltidy manual.
28071
See the demo program F<perltidyrc_dump.pl> for example usage.
28073
=item dump_abbreviations
28075
If the B<dump_abbreviations> parameter is given, it must be the reference to a
28076
hash. This hash will receive all abbreviations used by Perl::Tidy. See the
28077
demo program F<perltidyrc_dump.pl> for example usage.
28083
The following example passes perltidy a snippet as a reference
28084
to a string and receives the result back in a reference to
28089
# some messy source code to format
28090
my $source = <<'EOM';
28092
my @editors=('Emacs', 'Vi '); my $rand = rand();
28093
print "A poll of 10 random programmers gave these results:\n";
28095
my $i=int ($rand+rand());
28096
print " $editors[$i] users are from Venus" . ", " .
28097
"$editors[1-$i] users are from Mars" .
28102
# We'll pass it as ref to SCALAR and receive it in a ref to ARRAY
28104
perltidy( source => \$source, destination => \@dest );
28105
foreach (@dest) {print}
28107
=head1 Using the B<formatter> Callback Object
28109
The B<formatter> parameter is an optional callback object which allows
28110
the calling program to receive tokenized lines directly from perltidy for
28111
further specialized processing. When this parameter is used, the two
28112
formatting options which are built into perltidy (beautification or
28113
html) are ignored. The following diagram illustrates the logical flow:
28115
|-- (normal route) -> code beautification
28116
caller->perltidy->|-- (-html flag ) -> create html
28117
|-- (formatter given)-> callback to write_line
28119
This can be useful for processing perl scripts in some way. The
28120
parameter C<$formatter> in the perltidy call,
28122
formatter => $formatter,
28124
is an object created by the caller with a C<write_line> method which
28125
will accept and process tokenized lines, one line per call. Here is
28126
a simple example of a C<write_line> which merely prints the line number,
28127
the line type (as determined by perltidy), and the text of the line:
28131
# This is called from perltidy line-by-line
28133
my $line_of_tokens = shift;
28134
my $line_type = $line_of_tokens->{_line_type};
28135
my $input_line_number = $line_of_tokens->{_line_number};
28136
my $input_line = $line_of_tokens->{_line_text};
28137
print "$input_line_number:$line_type:$input_line";
28140
The complete program, B<perllinetype>, is contained in the examples section of
28141
the source distribution. As this example shows, the callback method
28142
receives a parameter B<$line_of_tokens>, which is a reference to a hash
28143
of other useful information. This example uses these hash entries:
28145
$line_of_tokens->{_line_number} - the line number (1,2,...)
28146
$line_of_tokens->{_line_text} - the text of the line
28147
$line_of_tokens->{_line_type} - the type of the line, one of:
28149
SYSTEM - system-specific code before hash-bang line
28150
CODE - line of perl code (including comments)
28151
POD_START - line starting pod, such as '=head'
28152
POD - pod documentation text
28153
POD_END - last line of pod section, '=cut'
28154
HERE - text of here-document
28155
HERE_END - last line of here-doc (target word)
28156
FORMAT - format section
28157
FORMAT_END - last line of format section, '.'
28158
DATA_START - __DATA__ line
28159
DATA - unidentified text following __DATA__
28160
END_START - __END__ line
28161
END - unidentified text following __END__
28162
ERROR - we are in big trouble, probably not a perl script
28164
Most applications will be only interested in lines of type B<CODE>. For
28165
another example, let's write a program which checks for one of the
28166
so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which
28167
can slow down processing. Here is a B<write_line>, from the example
28168
program B<find_naughty.pl>, which does that:
28172
# This is called back from perltidy line-by-line
28173
# We're looking for $`, $&, and $'
28174
my ( $self, $line_of_tokens ) = @_;
28176
# pull out some stuff we might need
28177
my $line_type = $line_of_tokens->{_line_type};
28178
my $input_line_number = $line_of_tokens->{_line_number};
28179
my $input_line = $line_of_tokens->{_line_text};
28180
my $rtoken_type = $line_of_tokens->{_rtoken_type};
28181
my $rtokens = $line_of_tokens->{_rtokens};
28184
# skip comments, pod, etc
28185
return if ( $line_type ne 'CODE' );
28187
# loop over tokens looking for $`, $&, and $'
28188
for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
28190
# we only want to examine token types 'i' (identifier)
28191
next unless $$rtoken_type[$j] eq 'i';
28193
# pull out the actual token text
28194
my $token = $$rtokens[$j];
28197
if ( $token =~ /^\$[\`\&\']$/ ) {
28199
"$input_line_number: $token\n";
28204
This example pulls out these tokenization variables from the $line_of_tokens
28207
$rtoken_type = $line_of_tokens->{_rtoken_type};
28208
$rtokens = $line_of_tokens->{_rtokens};
28210
The variable C<$rtoken_type> is a reference to an array of token type codes,
28211
and C<$rtokens> is a reference to a corresponding array of token text.
28212
These are obviously only defined for lines of type B<CODE>.
28213
Perltidy classifies tokens into types, and has a brief code for each type.
28214
You can get a complete list at any time by running perltidy from the
28217
perltidy --dump-token-types
28219
In the present example, we are only looking for tokens of type B<i>
28220
(identifiers), so the for loop skips past all other types. When an
28221
identifier is found, its actual text is checked to see if it is one
28222
being sought. If so, the above write_line prints the token and its
28225
The B<formatter> feature is relatively new in perltidy, and further
28226
documentation needs to be written to complete its description. However,
28227
several example programs have been written and can be found in the
28228
B<examples> section of the source distribution. Probably the best way
28229
to get started is to find one of the examples which most closely matches
28230
your application and start modifying it.
28232
For help with perltidy's pecular way of breaking lines into tokens, you
28233
might run, from the command line,
28235
perltidy -D filename
28237
where F<filename> is a short script of interest. This will produce
28238
F<filename.DEBUG> with interleaved lines of text and their token types.
28239
The B<-D> flag has been in perltidy from the beginning for this purpose.
28240
If you want to see the code which creates this file, it is
28241
C<write_debug_entry> in Tidy.pm.
28249
Thanks to Hugh Myers who developed the initial modular interface
28254
This man page documents Perl::Tidy version 20071205.
28259
perltidy at users.sourceforge.net
28263
The perltidy(1) man page describes all of the features of perltidy. It
28264
can be found at http://perltidy.sourceforge.net.