~ubuntu-branches/debian/sid/universalindentgui/sid : revision 8

1

############################################################

2

#

3

# perltidy - a perl script indenter and formatter

4

#

5

6

# Distributed under the GPL license agreement; see file COPYING

7

#

8

# This program is free software; you can redistribute it and/or modify

9

# it under the terms of the GNU General Public License as published by

10

# the Free Software Foundation; either version 2 of the License, or

11

# (at your option) any later version.

12

#

13

# This program is distributed in the hope that it will be useful,

14

# but WITHOUT ANY WARRANTY; without even the implied warranty of

15

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

16

# GNU General Public License for more details.

17

#

18

# You should have received a copy of the GNU General Public License

19

# along with this program; if not, write to the Free Software

20

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

21

#

22

# For brief instructions instructions, try 'perltidy -h'.

23

# For more complete documentation, try 'man perltidy'

24

# or visit http://perltidy.sourceforge.net

25

#

26

# This script is an example of the default style. It was formatted with:

27

#

28

# perltidy Tidy.pm

29

#

30

# Code Contributions:

31

# Michael Cartmell supplied code for adaptation to VMS and helped with

32

# v-strings.

33

# Hugh S. Myers supplied sub streamhandle and the supporting code to

34

# create a Perl::Tidy module which can operate on strings, arrays, etc.

35

# Yves Orton supplied coding to help detect Windows versions.

36

# Axel Rose supplied a patch for MacPerl.

37

# Sebastien Aperghis-Tramoni supplied a patch for the defined or operator.

38

# Dan Tyrell contributed a patch for binary I/O.

39

# Ueli Hugenschmidt contributed a patch for -fpsc

40

# Many others have supplied key ideas, suggestions, and bug reports;

41

# see the CHANGES file.

42

#

43

############################################################

44

45

package Perl::Tidy;

46

use 5.004; # need IO::File from 5.004 or later

47

BEGIN { $^W = 1; } # turn on warnings

48

49

use strict;

50

use Exporter;

51

use Carp;

52

$|++;

53

54

use vars qw{

55

$VERSION

56

@ISA

57

@EXPORT

58

$missing_file_spec

59

};

60

61

@ISA = qw( Exporter );

62

@EXPORT = qw( &perltidy );

63

64

use IO::File;

65

use File::Basename;

66

67

BEGIN {

68

( $VERSION = q($Id: Tidy.pm,v 1.73 2007/12/05 17:51:17 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker

69

}

70

71

sub streamhandle {

72

73

# given filename and mode (r or w), create an object which:

74

# has a 'getline' method if mode='r', and

75

# has a 'print' method if mode='w'.

76

# The objects also need a 'close' method.

77

#

78

# How the object is made:

79

#

80

# if $filename is: Make object using:

81

# ---------------- -----------------

82

# '-' (STDIN if mode = 'r', STDOUT if mode='w')

83

# string IO::File

84

# ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray)

85

# STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar)

86

# object object

87

# (check for 'print' method for 'w' mode)

88

# (check for 'getline' method for 'r' mode)

89

my $ref = ref( my $filename = shift );

90

my $mode = shift;

91

my $New;

92

my $fh;

93

94

# handle a reference

95

if ($ref) {

96

if ( $ref eq 'ARRAY' ) {

97

$New = sub { Perl::Tidy::IOScalarArray->new(@_) };

98

}

99

elsif ( $ref eq 'SCALAR' ) {

100

$New = sub { Perl::Tidy::IOScalar->new(@_) };

101

}

102

else {

103

104

# Accept an object with a getline method for reading. Note:

105

# IO::File is built-in and does not respond to the defined

106

# operator. If this causes trouble, the check can be

107

# skipped and we can just let it crash if there is no

108

# getline.

109

if ( $mode =~ /[rR]/ ) {

110

if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) {

111

$New = sub { $filename };

112

}

113

else {

114

$New = sub { undef };

115

confess <<EOM;

116

------------------------------------------------------------------------

117

No 'getline' method is defined for object of class $ref

118

Please check your call to Perl::Tidy::perltidy. Trace follows.

119

------------------------------------------------------------------------

120

EOM

121

}

122

}

123

124

# Accept an object with a print method for writing.

125

# See note above about IO::File

126

if ( $mode =~ /[wW]/ ) {

127

if ( $ref eq 'IO::File' || defined &{ $ref . "::print" } ) {

128

$New = sub { $filename };

129

}

130

else {

131

$New = sub { undef };

132

confess <<EOM;

133

------------------------------------------------------------------------

134

No 'print' method is defined for object of class $ref

135

Please check your call to Perl::Tidy::perltidy. Trace follows.

136

------------------------------------------------------------------------

137

EOM

138

}

139

}

140

}

141

}

142

143

# handle a string

144

else {

145

if ( $filename eq '-' ) {

146

$New = sub { $mode eq 'w' ? *STDOUT : *STDIN }

147

}

148

else {

149

$New = sub { IO::File->new(@_) };

150

}

151

}

152

$fh = $New->( $filename, $mode )

153

or warn "Couldn't open file:$filename in mode:$mode : $!\n";

154

return $fh, ( $ref or $filename );

155

}

156

157

sub find_input_line_ending {

158

159

# Peek at a file and return first line ending character.

160

# Quietly return undef in case of any trouble.

161

my ($input_file) = @_;

162

my $ending;

163

164

# silently ignore input from object or stdin

165

if ( ref($input_file) || $input_file eq '-' ) {

166

return $ending;

167

}

168

open( INFILE, $input_file ) || return $ending;

169

170

binmode INFILE;

171

my $buf;

172

read( INFILE, $buf, 1024 );

173

close INFILE;

174

if ( $buf && $buf =~ /([\012\015]+)/ ) {

175

my $test = $1;

176

177

# dos

178

if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" }

179

180

# mac

181

elsif ( $test =~ /^\015+$/ ) { $ending = "\015" }

182

183

# unix

184

elsif ( $test =~ /^\012+$/ ) { $ending = "\012" }

185

186

# unknown

187

else { }

188

}

189

190

# no ending seen

191

else { }

192

193

return $ending;

194

}

195

196

sub catfile {

197

198

# concatenate a path and file basename

199

# returns undef in case of error

200

201

BEGIN { eval "require File::Spec"; $missing_file_spec = $@; }

202

203

# use File::Spec if we can

204

unless ($missing_file_spec) {

205

return File::Spec->catfile(@_);

206

}

207

208

# Perl 5.004 systems may not have File::Spec so we'll make

209

# a simple try. We assume File::Basename is available.

210

# return undef if not successful.

211

my $name = pop @_;

212

my $path = join '/', @_;

213

my $test_file = $path . $name;

214

my ( $test_name, $test_path ) = fileparse($test_file);

215

return $test_file if ( $test_name eq $name );

216

return undef if ( $^O eq 'VMS' );

217

218

# this should work at least for Windows and Unix:

219

$test_file = $path . '/' . $name;

220

( $test_name, $test_path ) = fileparse($test_file);

221

return $test_file if ( $test_name eq $name );

222

return undef;

223

}

224

225

sub make_temporary_filename {

226

227

# Make a temporary filename.

228

#

229

# The POSIX tmpnam() function tends to be unreliable for non-unix

230

# systems (at least for the win32 systems that I've tested), so use

231

# a pre-defined name. A slight disadvantage of this is that two

232

# perltidy runs in the same working directory may conflict.

233

# However, the chance of that is small and managable by the user.

234

# An alternative would be to check for the file's existance and use,

235

# say .TMP0, .TMP1, etc, but that scheme has its own problems. So,

236

# keep it simple.

237

my $name = "perltidy.TMP";

238

if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) {

239

return $name;

240

}

241

eval "use POSIX qw(tmpnam)";

242

if ($@) { return $name }

243

use IO::File;

244

245

# just make a couple of tries before giving up and using the default

246

for ( 0 .. 1 ) {

247

my $tmpname = tmpnam();

248

my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL );

249

if ($fh) {

250

$fh->close();

251

return ($tmpname);

252

last;

253

}

254

}

255

return ($name);

256

}

257

258

# Here is a map of the flow of data from the input source to the output

259

# line sink:

260

#

261

# LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter-->

262

# input groups output

263

# lines tokens lines of lines lines

264

# lines

265

#

266

# The names correspond to the package names responsible for the unit processes.

267

#

268

# The overall process is controlled by the "main" package.

269

#

270

# LineSource is the stream of input lines

271

#

272

# Tokenizer analyzes a line and breaks it into tokens, peeking ahead

273

# if necessary. A token is any section of the input line which should be

274

# manipulated as a single entity during formatting. For example, a single

275

# ',' character is a token, and so is an entire side comment. It handles

276

# the complexities of Perl syntax, such as distinguishing between '<<' as

277

# a shift operator and as a here-document, or distinguishing between '/'

278

# as a divide symbol and as a pattern delimiter.

279

#

280

# Formatter inserts and deletes whitespace between tokens, and breaks

281

# sequences of tokens at appropriate points as output lines. It bases its

282

# decisions on the default rules as modified by any command-line options.

283

#

284

# VerticalAligner collects groups of lines together and tries to line up

285

# certain tokens, such as '=>', '#', and '=' by adding whitespace.

286

#

287

# FileWriter simply writes lines to the output stream.

288

#

289

# The Logger package, not shown, records significant events and warning

290

# messages. It writes a .LOG file, which may be saved with a

291

# '-log' or a '-g' flag.

292

293

{

294

295

# variables needed by interrupt handler:

296

my $tokenizer;

297

my $input_file;

298

299

# this routine may be called to give a status report if interrupted. If a

300

# parameter is given, it will call exit with that parameter. This is no

301

# longer used because it works under Unix but not under Windows.

302

sub interrupt_handler {

303

304

my $exit_flag = shift;

305

print STDERR "perltidy interrupted";

306

if ($tokenizer) {

307

my $input_line_number =

308

Perl::Tidy::Tokenizer::get_input_line_number();

309

print STDERR " at line $input_line_number";

310

}

311

if ($input_file) {

312

313

if ( ref $input_file ) { print STDERR " of reference to:" }

314

else { print STDERR " of file:" }

315

print STDERR " $input_file";

316

}

317

print STDERR "\n";

318

exit $exit_flag if defined($exit_flag);

319

}

320

321

sub perltidy {

322

323

my %defaults = (

324

argv => undef,

325

destination => undef,

326

formatter => undef,

327

logfile => undef,

328

errorfile => undef,

329

perltidyrc => undef,

330

source => undef,

331

stderr => undef,

332

dump_options => undef,

333

dump_options_type => undef,

334

dump_getopt_flags => undef,

335

dump_options_category => undef,

336

dump_options_range => undef,

337

dump_abbreviations => undef,

338

);

339

340

# don't overwrite callers ARGV

341

local @ARGV = @ARGV;

342

343

my %input_hash = @_;

344

345

if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) {

346

local $" = ')(';

347

my @good_keys = sort keys %defaults;

348

@bad_keys = sort @bad_keys;

349

confess <<EOM;

350

------------------------------------------------------------------------

351

Unknown perltidy parameter : (@bad_keys)

352

perltidy only understands : (@good_keys)

353

------------------------------------------------------------------------

354

355

EOM

356

}

357

358

my $get_hash_ref = sub {

359

my ($key) = @_;

360

my $hash_ref = $input_hash{$key};

361

if ( defined($hash_ref) ) {

362

unless ( ref($hash_ref) eq 'HASH' ) {

363

my $what = ref($hash_ref);

364

my $but_is =

365

$what ? "but is ref to $what" : "but is not a reference";

366

croak <<EOM;

367

------------------------------------------------------------------------

368

error in call to perltidy:

369

-$key must be reference to HASH $but_is

370

------------------------------------------------------------------------

371

EOM

372

}

373

}

374

return $hash_ref;

375

};

376

377

%input_hash = ( %defaults, %input_hash );

378

my $argv = $input_hash{'argv'};

379

my $destination_stream = $input_hash{'destination'};

380

my $errorfile_stream = $input_hash{'errorfile'};

381

my $logfile_stream = $input_hash{'logfile'};

382

my $perltidyrc_stream = $input_hash{'perltidyrc'};

383

my $source_stream = $input_hash{'source'};

384

my $stderr_stream = $input_hash{'stderr'};

385

my $user_formatter = $input_hash{'formatter'};

386

387

# various dump parameters

388

my $dump_options_type = $input_hash{'dump_options_type'};

389

my $dump_options = $get_hash_ref->('dump_options');

390

my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags');

391

my $dump_options_category = $get_hash_ref->('dump_options_category');

392

my $dump_abbreviations = $get_hash_ref->('dump_abbreviations');

393

my $dump_options_range = $get_hash_ref->('dump_options_range');

394

395

# validate dump_options_type

396

if ( defined($dump_options) ) {

397

unless ( defined($dump_options_type) ) {

398

$dump_options_type = 'perltidyrc';

399

}

400

unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) {

401

croak <<EOM;

402

------------------------------------------------------------------------

403

Please check value of -dump_options_type in call to perltidy;

404

saw: '$dump_options_type'

405

expecting: 'perltidyrc' or 'full'

406

------------------------------------------------------------------------

407

EOM

408

409

}

410

}

411

else {

412

$dump_options_type = "";

413

}

414

415

if ($user_formatter) {

416

417

# if the user defines a formatter, there is no output stream,

418

# but we need a null stream to keep coding simple

419

$destination_stream = Perl::Tidy::DevNull->new();

420

}

421

422

# see if ARGV is overridden

423

if ( defined($argv) ) {

424

425

my $rargv = ref $argv;

426

if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef }

427

428

# ref to ARRAY

429

if ($rargv) {

430

if ( $rargv eq 'ARRAY' ) {

431

@ARGV = @$argv;

432

}

433

else {

434

croak <<EOM;

435

------------------------------------------------------------------------

436

Please check value of -argv in call to perltidy;

437

it must be a string or ref to ARRAY but is: $rargv

438

------------------------------------------------------------------------

439

EOM

440

}

441

}

442

443

# string

444

else {

445

my ( $rargv, $msg ) = parse_args($argv);

446

if ($msg) {

447

die <<EOM;

448

Error parsing this string passed to to perltidy with 'argv':

449

$msg

450

EOM

451

}

452

@ARGV = @{$rargv};

453

}

454

}

455

456

# redirect STDERR if requested

457

if ($stderr_stream) {

458

my ( $fh_stderr, $stderr_file ) =

459

Perl::Tidy::streamhandle( $stderr_stream, 'w' );

460

if ($fh_stderr) { *STDERR = $fh_stderr }

461

else {

462

croak <<EOM;

463

------------------------------------------------------------------------

464

Unable to redirect STDERR to $stderr_stream

465

Please check value of -stderr in call to perltidy

466

------------------------------------------------------------------------

467

EOM

468

}

469

}

470

471

my $rpending_complaint;

472

$$rpending_complaint = "";

473

my $rpending_logfile_message;

474

$$rpending_logfile_message = "";

475

476

my ( $is_Windows, $Windows_type ) =

477

look_for_Windows($rpending_complaint);

478

479

# VMS file names are restricted to a 40.40 format, so we append _tdy

480

# instead of .tdy, etc. (but see also sub check_vms_filename)

481

my $dot;

482

my $dot_pattern;

483

if ( $^O eq 'VMS' ) {

484

$dot = '_';

485

$dot_pattern = '_';

486

}

487

else {

488

$dot = '.';

489

$dot_pattern = '\.'; # must escape for use in regex

490

}

491

492

# handle command line options

493

my ( $rOpts, $config_file, $rraw_options, $saw_extrude, $roption_string,

494

$rexpansion, $roption_category, $roption_range )

495

= process_command_line(

496

$perltidyrc_stream, $is_Windows, $Windows_type,

497

$rpending_complaint, $dump_options_type,

498

);

499

500

# return or exit immediately after all dumps

501

my $quit_now = 0;

502

503

# Getopt parameters and their flags

504

if ( defined($dump_getopt_flags) ) {

505

$quit_now = 1;

506

foreach my $op ( @{$roption_string} ) {

507

my $opt = $op;

508

my $flag = "";

509

510

# Examples:

511

# some-option=s

512

# some-option=i

513

# some-option:i

514

# some-option!

515

if ( $opt =~ /(.*)(!|=.*|:.*)$/ ) {

516

$opt = $1;

517

$flag = $2;

518

}

519

$dump_getopt_flags->{$opt} = $flag;

520

}

521

}

522

523

if ( defined($dump_options_category) ) {

524

$quit_now = 1;

525

%{$dump_options_category} = %{$roption_category};

526

}

527

528

if ( defined($dump_options_range) ) {

529

$quit_now = 1;

530

%{$dump_options_range} = %{$roption_range};

531

}

532

533

if ( defined($dump_abbreviations) ) {

534

$quit_now = 1;

535

%{$dump_abbreviations} = %{$rexpansion};

536

}

537

538

if ( defined($dump_options) ) {

539

$quit_now = 1;

540

%{$dump_options} = %{$rOpts};

541

}

542

543

return if ($quit_now);

544

545

# make printable string of options for this run as possible diagnostic

546

my $readable_options = readable_options( $rOpts, $roption_string );

547

548

# dump from command line

549

if ( $rOpts->{'dump-options'} ) {

550

print STDOUT $readable_options;

551

exit 1;

552

}

553

554

check_options( $rOpts, $is_Windows, $Windows_type,

555

$rpending_complaint );

556

557

if ($user_formatter) {

558

$rOpts->{'format'} = 'user';

559

}

560

561

# there must be one entry here for every possible format

562

my %default_file_extension = (

563

tidy => 'tdy',

564

html => 'html',

565

user => '',

566

);

567

568

# be sure we have a valid output format

569

unless ( exists $default_file_extension{ $rOpts->{'format'} } ) {

570

my $formats = join ' ',

571

sort map { "'" . $_ . "'" } keys %default_file_extension;

572

my $fmt = $rOpts->{'format'};

573

die "-format='$fmt' but must be one of: $formats\n";

574

}

575

576

my $output_extension =

577

make_extension( $rOpts->{'output-file-extension'},

578

$default_file_extension{ $rOpts->{'format'} }, $dot );

579

580

my $backup_extension =

581

make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot );

582

583

my $html_toc_extension =

584

make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot );

585

586

my $html_src_extension =

587

make_extension( $rOpts->{'html-src-extension'}, 'src', $dot );

588

589

# check for -b option;

590

my $in_place_modify = $rOpts->{'backup-and-modify-in-place'}

591

&& $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode

592

&& @ARGV > 0; # silently ignore if standard input;

593

# this allows -b to be in a .perltidyrc file

594

# without error messages when running from an editor

595

596

# turn off -b with warnings in case of conflicts with other options

597

if ($in_place_modify) {

598

if ( $rOpts->{'standard-output'} ) {

599

warn "Ignoring -b; you may not use -b and -st together\n";

600

$in_place_modify = 0;

601

}

602

if ($destination_stream) {

603

warn

604

"Ignoring -b; you may not specify a destination array and -b together\n";

605

$in_place_modify = 0;

606

}

607

if ($source_stream) {

608

warn

609

"Ignoring -b; you may not specify a source array and -b together\n";

610

$in_place_modify = 0;

611

}

612

if ( $rOpts->{'outfile'} ) {

613

warn "Ignoring -b; you may not use -b and -o together\n";

614

$in_place_modify = 0;

615

}

616

if ( defined( $rOpts->{'output-path'} ) ) {

617

warn "Ignoring -b; you may not use -b and -opath together\n";

618

$in_place_modify = 0;

619

}

620

}

621

622

Perl::Tidy::Formatter::check_options($rOpts);

623

if ( $rOpts->{'format'} eq 'html' ) {

624

Perl::Tidy::HtmlWriter->check_options($rOpts);

625

}

626

627

# make the pattern of file extensions that we shouldn't touch

628

my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)";

629

if ($output_extension) {

630

my $ext = quotemeta($output_extension);

631

$forbidden_file_extensions .= "|$ext";

632

}

633

if ( $in_place_modify && $backup_extension ) {

634

my $ext = quotemeta($backup_extension);

635

$forbidden_file_extensions .= "|$ext";

636

}

637

$forbidden_file_extensions .= ')$';

638

639

# Create a diagnostics object if requested;

640

# This is only useful for code development

641

my $diagnostics_object = undef;

642

if ( $rOpts->{'DIAGNOSTICS'} ) {

643

$diagnostics_object = Perl::Tidy::Diagnostics->new();

644

}

645

646

# no filenames should be given if input is from an array

647

if ($source_stream) {

648

if ( @ARGV > 0 ) {

649

die

650

"You may not specify any filenames when a source array is given\n";

651

}

652

653

# we'll stuff the source array into ARGV

654

unshift( @ARGV, $source_stream );

655

656

# No special treatment for source stream which is a filename.

657

# This will enable checks for binary files and other bad stuff.

658

$source_stream = undef unless ref($source_stream);

659

}

660

661

# use stdin by default if no source array and no args

662

else {

663

unshift( @ARGV, '-' ) unless @ARGV;

664

}

665

666

# loop to process all files in argument list

667

my $number_of_files = @ARGV;

668

my $formatter = undef;

669

$tokenizer = undef;

670

while ( $input_file = shift @ARGV ) {

671

my $fileroot;

672

my $input_file_permissions;

673

674

#---------------------------------------------------------------

675

# determine the input file name

676

#---------------------------------------------------------------

677

if ($source_stream) {

678

$fileroot = "perltidy";

679

}

680

elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN

681

$fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc

682

$in_place_modify = 0;

683

}

684

else {

685

$fileroot = $input_file;

686

unless ( -e $input_file ) {

687

688

# file doesn't exist - check for a file glob

689

if ( $input_file =~ /([\?\*\[\{])/ ) {

690

691

# Windows shell may not remove quotes, so do it

692

my $input_file = $input_file;

693

if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 }

694

if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 }

695

my $pattern = fileglob_to_re($input_file);

696

eval "/$pattern/";

697

if ( !$@ && opendir( DIR, './' ) ) {

698

my @files =

699

grep { /$pattern/ && !-d $_ } readdir(DIR);

700

closedir(DIR);

701

if (@files) {

702

unshift @ARGV, @files;

703

next;

704

}

705

}

706

}

707

print "skipping file: '$input_file': no matches found\n";

708

next;

709

}

710

711

unless ( -f $input_file ) {

712

print "skipping file: $input_file: not a regular file\n";

713

next;

714

}

715

716

unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) {

717

print

718

"skipping file: $input_file: Non-text (override with -f)\n";

719

next;

720

}

721

722

# we should have a valid filename now

723

$fileroot = $input_file;

724

$input_file_permissions = ( stat $input_file )[2] & 07777;

725

726

if ( $^O eq 'VMS' ) {

727

( $fileroot, $dot ) = check_vms_filename($fileroot);

728

}

729

730

# add option to change path here

731

if ( defined( $rOpts->{'output-path'} ) ) {

732

733

my ( $base, $old_path ) = fileparse($fileroot);

734

my $new_path = $rOpts->{'output-path'};

735

unless ( -d $new_path ) {

736

unless ( mkdir $new_path, 0777 ) {

737

die "unable to create directory $new_path: $!\n";

738

}

739

}

740

my $path = $new_path;

741

$fileroot = catfile( $path, $base );

742

unless ($fileroot) {

743

die <<EOM;

744

------------------------------------------------------------------------

745

Problem combining $new_path and $base to make a filename; check -opath

746

------------------------------------------------------------------------

747

EOM

748

}

749

}

750

}

751

752

# Skip files with same extension as the output files because

753

# this can lead to a messy situation with files like

754

# script.tdy.tdy.tdy ... or worse problems ... when you

755

# rerun perltidy over and over with wildcard input.

756

if (

757

!$source_stream

758

&& ( $input_file =~ /$forbidden_file_extensions/o

759

|| $input_file eq 'DIAGNOSTICS' )

760

)

761

{

762

print "skipping file: $input_file: wrong extension\n";

763

next;

764

}

765

766

# the 'source_object' supplies a method to read the input file

767

my $source_object =

768

Perl::Tidy::LineSource->new( $input_file, $rOpts,

769

$rpending_logfile_message );

770

next unless ($source_object);

771

772

# register this file name with the Diagnostics package

773

$diagnostics_object->set_input_file($input_file)

774

if $diagnostics_object;

775

776

#---------------------------------------------------------------

777

# determine the output file name

778

#---------------------------------------------------------------

779

my $output_file = undef;

780

my $actual_output_extension;

781

782

if ( $rOpts->{'outfile'} ) {

783

784

if ( $number_of_files <= 1 ) {

785

786

if ( $rOpts->{'standard-output'} ) {

787

die "You may not use -o and -st together\n";

788

}

789

elsif ($destination_stream) {

790

die

791

"You may not specify a destination array and -o together\n";

792

}

793

elsif ( defined( $rOpts->{'output-path'} ) ) {

794

die "You may not specify -o and -opath together\n";

795

}

796

elsif ( defined( $rOpts->{'output-file-extension'} ) ) {

797

die "You may not specify -o and -oext together\n";

798

}

799

$output_file = $rOpts->{outfile};

800

801

# make sure user gives a file name after -o

802

if ( $output_file =~ /^-/ ) {

803

die "You must specify a valid filename after -o\n";

804

}

805

806

# do not overwrite input file with -o

807

if ( defined($input_file_permissions)

808

&& ( $output_file eq $input_file ) )

809

{

810

die

811

"Use 'perltidy -b $input_file' to modify in-place\n";

812

}

813

}

814

else {

815

die "You may not use -o with more than one input file\n";

816

}

817

}

818

elsif ( $rOpts->{'standard-output'} ) {

819

if ($destination_stream) {

820

die

821

"You may not specify a destination array and -st together\n";

822

}

823

$output_file = '-';

824

825

if ( $number_of_files <= 1 ) {

826

}

827

else {

828

die "You may not use -st with more than one input file\n";

829

}

830

}

831

elsif ($destination_stream) {

832

$output_file = $destination_stream;

833

}

834

elsif ($source_stream) { # source but no destination goes to stdout

835

$output_file = '-';

836

}

837

elsif ( $input_file eq '-' ) {

838

$output_file = '-';

839

}

840

else {

841

if ($in_place_modify) {

842

$output_file = IO::File->new_tmpfile()

843

or die "cannot open temp file for -b option: $!\n";

844

}

845

else {

846

$actual_output_extension = $output_extension;

847

$output_file = $fileroot . $output_extension;

848

}

849

}

850

851

# the 'sink_object' knows how to write the output file

852

my $tee_file = $fileroot . $dot . "TEE";

853

854

my $line_separator = $rOpts->{'output-line-ending'};

855

if ( $rOpts->{'preserve-line-endings'} ) {

856

$line_separator = find_input_line_ending($input_file);

857

}

858

859

# Eventually all I/O may be done with binmode, but for now it is

860

# only done when a user requests a particular line separator

861

# through the -ple or -ole flags

862

my $binmode = 0;

863

if ( defined($line_separator) ) { $binmode = 1 }

864

else { $line_separator = "\n" }

865

866

my $sink_object =

867

Perl::Tidy::LineSink->new( $output_file, $tee_file,

868

$line_separator, $rOpts, $rpending_logfile_message, $binmode );

869

870

#---------------------------------------------------------------

871

# initialize the error logger

872

#---------------------------------------------------------------

873

my $warning_file = $fileroot . $dot . "ERR";

874

if ($errorfile_stream) { $warning_file = $errorfile_stream }

875

my $log_file = $fileroot . $dot . "LOG";

876

if ($logfile_stream) { $log_file = $logfile_stream }

877

878

my $logger_object =

879

Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file,

880

$saw_extrude );

881

write_logfile_header(

882

$rOpts, $logger_object, $config_file,

883

$rraw_options, $Windows_type, $readable_options,

884

);

885

if ($$rpending_logfile_message) {

886

$logger_object->write_logfile_entry($$rpending_logfile_message);

887

}

888

if ($$rpending_complaint) {

889

$logger_object->complain($$rpending_complaint);

890

}

891

892

#---------------------------------------------------------------

893

# initialize the debug object, if any

894

#---------------------------------------------------------------

895

my $debugger_object = undef;

896

if ( $rOpts->{DEBUG} ) {

897

$debugger_object =

898

Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" );

899

}

900

901

#---------------------------------------------------------------

902

# create a formatter for this file : html writer or pretty printer

903

#---------------------------------------------------------------

904

905

# we have to delete any old formatter because, for safety,

906

# the formatter will check to see that there is only one.

907

$formatter = undef;

908

909

if ($user_formatter) {

910

$formatter = $user_formatter;

911

}

912

elsif ( $rOpts->{'format'} eq 'html' ) {

913

$formatter =

914

Perl::Tidy::HtmlWriter->new( $fileroot, $output_file,

915

$actual_output_extension, $html_toc_extension,

916

$html_src_extension );

917

}

918

elsif ( $rOpts->{'format'} eq 'tidy' ) {

919

$formatter = Perl::Tidy::Formatter->new(

920

logger_object => $logger_object,

921

diagnostics_object => $diagnostics_object,

922

sink_object => $sink_object,

923

);

924

}

925

else {

926

die "I don't know how to do -format=$rOpts->{'format'}\n";

927

}

928

929

unless ($formatter) {

930

die "Unable to continue with $rOpts->{'format'} formatting\n";

931

}

932

933

#---------------------------------------------------------------

934

# create the tokenizer for this file

935

#---------------------------------------------------------------

936

$tokenizer = undef; # must destroy old tokenizer

937

$tokenizer = Perl::Tidy::Tokenizer->new(

938

source_object => $source_object,

939

logger_object => $logger_object,

940

debugger_object => $debugger_object,

941

diagnostics_object => $diagnostics_object,

942

starting_level => $rOpts->{'starting-indentation-level'},

943

tabs => $rOpts->{'tabs'},

944

indent_columns => $rOpts->{'indent-columns'},

945

look_for_hash_bang => $rOpts->{'look-for-hash-bang'},

946

look_for_autoloader => $rOpts->{'look-for-autoloader'},

947

look_for_selfloader => $rOpts->{'look-for-selfloader'},

948

trim_qw => $rOpts->{'trim-qw'},

949

);

950

951

#---------------------------------------------------------------

952

# now we can do it

953

#---------------------------------------------------------------

954

process_this_file( $tokenizer, $formatter );

955

956

#---------------------------------------------------------------

957

# close the input source and report errors

958

#---------------------------------------------------------------

959

$source_object->close_input_file();

960

961

# get file names to use for syntax check

962

my $ifname = $source_object->get_input_file_copy_name();

963

my $ofname = $sink_object->get_output_file_copy();

964

965

#---------------------------------------------------------------

966

# handle the -b option (backup and modify in-place)

967

#---------------------------------------------------------------

968

if ($in_place_modify) {

969

unless ( -f $input_file ) {

970

971

# oh, oh, no real file to backup ..

972

# shouldn't happen because of numerous preliminary checks

973

die print

974

"problem with -b backing up input file '$input_file': not a file\n";

975

}

976

my $backup_name = $input_file . $backup_extension;

977

if ( -f $backup_name ) {

978

unlink($backup_name)

979

or die

980

"unable to remove previous '$backup_name' for -b option; check permissions: $!\n";

981

}

982

rename( $input_file, $backup_name )

983

or die

984

"problem renaming $input_file to $backup_name for -b option: $!\n";

985

$ifname = $backup_name;

986

987

seek( $output_file, 0, 0 )

988

or die "unable to rewind tmp file for -b option: $!\n";

989

990

my $fout = IO::File->new("> $input_file")

991

or die

992

"problem opening $input_file for write for -b option; check directory permissions: $!\n";

993

binmode $fout;

994

my $line;

995

while ( $line = $output_file->getline() ) {

996

$fout->print($line);

997

}

998

$fout->close();

999

$output_file = $input_file;

1000

$ofname = $input_file;

1001

}

1002

1003

#---------------------------------------------------------------

1004

# clean up and report errors

1005

#---------------------------------------------------------------

1006

$sink_object->close_output_file() if $sink_object;

1007

$debugger_object->close_debug_file() if $debugger_object;

1008

1009

my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes

1010

if ($output_file) {

1011

1012

if ($input_file_permissions) {

1013

1014

# give output script same permissions as input script, but

1015

# make it user-writable or else we can't run perltidy again.

1016

# Thus we retain whatever executable flags were set.

1017

if ( $rOpts->{'format'} eq 'tidy' ) {

1018

chmod( $input_file_permissions | 0600, $output_file );

1019

}

1020

1021

# else use default permissions for html and any other format

1022

1023

}

1024

if ( $logger_object && $rOpts->{'check-syntax'} ) {

1025

$infile_syntax_ok =

1026

check_syntax( $ifname, $ofname, $logger_object, $rOpts );

1027

}

1028

}

1029

1030

$logger_object->finish( $infile_syntax_ok, $formatter )

1031

if $logger_object;

1032

} # end of loop to process all files

1033

} # end of main program

1034

}

1035

1036

sub fileglob_to_re {

1037

1038

# modified (corrected) from version in find2perl

1039

my $x = shift;

1040

$x =~ s#([./^\$()])#\\$1#g; # escape special characters

1041

$x =~ s#\*#.*#g; # '*' -> '.*'

1042

$x =~ s#\?#.#g; # '?' -> '.'

1043

"^$x\\z"; # match whole word

1044

}

1045

1046

sub make_extension {

1047

1048

# Make a file extension, including any leading '.' if necessary

1049

# The '.' may actually be an '_' under VMS

1050

my ( $extension, $default, $dot ) = @_;

1051

1052

# Use the default if none specified

1053

$extension = $default unless ($extension);

1054

1055

# Only extensions with these leading characters get a '.'

1056

# This rule gives the user some freedom

1057

if ( $extension =~ /^[a-zA-Z0-9]/ ) {

1058

$extension = $dot . $extension;

1059

}

1060

return $extension;

1061

}

1062

1063

sub write_logfile_header {

1064

my (

1065

$rOpts, $logger_object, $config_file,

1066

$rraw_options, $Windows_type, $readable_options

1067

) = @_;

1068

$logger_object->write_logfile_entry(

1069

"perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n"

1070

);

1071

if ($Windows_type) {

1072

$logger_object->write_logfile_entry("Windows type is $Windows_type\n");

1073

}

1074

my $options_string = join( ' ', @$rraw_options );

1075

1076

if ($config_file) {

1077

$logger_object->write_logfile_entry(

1078

"Found Configuration File >>> $config_file \n");

1079

}

1080

$logger_object->write_logfile_entry(

1081

"Configuration and command line parameters for this run:\n");

1082

$logger_object->write_logfile_entry("$options_string\n");

1083

1084

if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) {

1085

$rOpts->{'logfile'} = 1; # force logfile to be saved

1086

$logger_object->write_logfile_entry(

1087

"Final parameter set for this run\n");

1088

$logger_object->write_logfile_entry(

1089

"------------------------------------\n");

1090

1091

$logger_object->write_logfile_entry($readable_options);

1092

1093

$logger_object->write_logfile_entry(

1094

"------------------------------------\n");

1095

}

1096

$logger_object->write_logfile_entry(

1097

"To find error messages search for 'WARNING' with your editor\n");

1098

}

1099

1100

sub generate_options {

1101

1102

######################################################################

1103

# Generate and return references to:

1104

# @option_string - the list of options to be passed to Getopt::Long

1105

# @defaults - the list of default options

1106

# %expansion - a hash showing how all abbreviations are expanded

1107

# %category - a hash giving the general category of each option

1108

# %option_range - a hash giving the valid ranges of certain options

1109

1110

# Note: a few options are not documented in the man page and usage

1111

# message. This is because these are experimental or debug options and

1112

# may or may not be retained in future versions.

1113

#

1114

# Here are the undocumented flags as far as I know. Any of them

1115

# may disappear at any time. They are mainly for fine-tuning

1116

# and debugging.

1117

#

1118

# fll --> fuzzy-line-length # a trivial parameter which gets

1119

# turned off for the extrude option

1120

# which is mainly for debugging

1121

# chk --> check-multiline-quotes # check for old bug; to be deleted

1122

# scl --> short-concatenation-item-length # helps break at '.'

1123

# recombine # for debugging line breaks

1124

# valign # for debugging vertical alignment

1125

# I --> DIAGNOSTICS # for debugging

1126

######################################################################

1127

1128

# here is a summary of the Getopt codes:

1129

# <none> does not take an argument

1130

# =s takes a mandatory string

1131

# :s takes an optional string (DO NOT USE - filenames will get eaten up)

1132

# =i takes a mandatory integer

1133

# :i takes an optional integer (NOT RECOMMENDED - can cause trouble)

1134

# ! does not take an argument and may be negated

1135

# i.e., -foo and -nofoo are allowed

1136

# a double dash signals the end of the options list

1137

#

1138

#---------------------------------------------------------------

1139

# Define the option string passed to GetOptions.

1140

#---------------------------------------------------------------

1141

1142

my @option_string = ();

1143

my %expansion = ();

1144

my %option_category = ();

1145

my %option_range = ();

1146

my $rexpansion = \%expansion;

1147

1148

# names of categories in manual

1149

# leading integers will allow sorting

1150

my @category_name = (

1151

'0. I/O control',

1152

'1. Basic formatting options',

1153

'2. Code indentation control',

1154

'3. Whitespace control',

1155

'4. Comment controls',

1156

'5. Linebreak controls',

1157

'6. Controlling list formatting',

1158

'7. Retaining or ignoring existing line breaks',

1159

'8. Blank line control',

1160

'9. Other controls',

1161

'10. HTML options',

1162

'11. pod2html options',

1163

'12. Controlling HTML properties',

1164

'13. Debugging',

1165

);

1166

1167

# These options are parsed directly by perltidy:

1168

# help h

1169

# version v

1170

# However, they are included in the option set so that they will

1171

# be seen in the options dump.

1172

1173

# These long option names have no abbreviations or are treated specially

1174

@option_string = qw(

1175

html!

1176

noprofile

1177

no-profile

1178

npro

1179

recombine!

1180

valign!

1181

);

1182

1183

my $category = 13; # Debugging

1184

foreach (@option_string) {

1185

my $opt = $_; # must avoid changing the actual flag

1186

$opt =~ s/!$//;

1187

$option_category{$opt} = $category_name[$category];

1188

}

1189

1190

$category = 11; # HTML

1191

$option_category{html} = $category_name[$category];

1192

1193

# routine to install and check options

1194

my $add_option = sub {

1195

my ( $long_name, $short_name, $flag ) = @_;

1196

push @option_string, $long_name . $flag;

1197

$option_category{$long_name} = $category_name[$category];

1198

if ($short_name) {

1199

if ( $expansion{$short_name} ) {

1200

my $existing_name = $expansion{$short_name}[0];

1201

die

1202

"redefining abbreviation $short_name for $long_name; already used for $existing_name\n";

1203

}

1204

$expansion{$short_name} = [$long_name];

1205

if ( $flag eq '!' ) {

1206

my $nshort_name = 'n' . $short_name;

1207

my $nolong_name = 'no' . $long_name;

1208

if ( $expansion{$nshort_name} ) {

1209

my $existing_name = $expansion{$nshort_name}[0];

1210

die

1211

"attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n";

1212

}

1213

$expansion{$nshort_name} = [$nolong_name];

1214

}

1215

}

1216

};

1217

1218

# Install long option names which have a simple abbreviation.

1219

# Options with code '!' get standard negation ('no' for long names,

1220

# 'n' for abbreviations). Categories follow the manual.

1221

1222

###########################

1223

$category = 0; # I/O_Control

1224

###########################

1225

$add_option->( 'backup-and-modify-in-place', 'b', '!' );

1226

$add_option->( 'backup-file-extension', 'bext', '=s' );

1227

$add_option->( 'force-read-binary', 'f', '!' );

1228

$add_option->( 'format', 'fmt', '=s' );

1229

$add_option->( 'logfile', 'log', '!' );

1230

$add_option->( 'logfile-gap', 'g', ':i' );

1231

$add_option->( 'outfile', 'o', '=s' );

1232

$add_option->( 'output-file-extension', 'oext', '=s' );

1233

$add_option->( 'output-path', 'opath', '=s' );

1234

$add_option->( 'profile', 'pro', '=s' );

1235

$add_option->( 'quiet', 'q', '!' );

1236

$add_option->( 'standard-error-output', 'se', '!' );

1237

$add_option->( 'standard-output', 'st', '!' );

1238

$add_option->( 'warning-output', 'w', '!' );

1239

1240

# options which are both toggle switches and values moved here

1241

# to hide from tidyview (which does not show category 0 flags):

1242

# -ole moved here from category 1

1243

# -sil moved here from category 2

1244

$add_option->( 'output-line-ending', 'ole', '=s' );

1245

$add_option->( 'starting-indentation-level', 'sil', '=i' );

1246

1247

########################################

1248

$category = 1; # Basic formatting options

1249

########################################

1250

$add_option->( 'check-syntax', 'syn', '!' );

1251

$add_option->( 'entab-leading-whitespace', 'et', '=i' );

1252

$add_option->( 'indent-columns', 'i', '=i' );

1253

$add_option->( 'maximum-line-length', 'l', '=i' );

1254

$add_option->( 'perl-syntax-check-flags', 'pscf', '=s' );

1255

$add_option->( 'preserve-line-endings', 'ple', '!' );

1256

$add_option->( 'tabs', 't', '!' );

1257

1258

########################################

1259

$category = 2; # Code indentation control

1260

########################################

1261

$add_option->( 'continuation-indentation', 'ci', '=i' );

1262

$add_option->( 'line-up-parentheses', 'lp', '!' );

1263

$add_option->( 'outdent-keyword-list', 'okwl', '=s' );

1264

$add_option->( 'outdent-keywords', 'okw', '!' );

1265

$add_option->( 'outdent-labels', 'ola', '!' );

1266

$add_option->( 'outdent-long-quotes', 'olq', '!' );

1267

$add_option->( 'indent-closing-brace', 'icb', '!' );

1268

$add_option->( 'closing-token-indentation', 'cti', '=i' );

1269

$add_option->( 'closing-paren-indentation', 'cpi', '=i' );

1270

$add_option->( 'closing-brace-indentation', 'cbi', '=i' );

1271

$add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' );

1272

$add_option->( 'brace-left-and-indent', 'bli', '!' );

1273

$add_option->( 'brace-left-and-indent-list', 'blil', '=s' );

1274

1275

########################################

1276

$category = 3; # Whitespace control

1277

########################################

1278

$add_option->( 'add-semicolons', 'asc', '!' );

1279

$add_option->( 'add-whitespace', 'aws', '!' );

1280

$add_option->( 'block-brace-tightness', 'bbt', '=i' );

1281

$add_option->( 'brace-tightness', 'bt', '=i' );

1282

$add_option->( 'delete-old-whitespace', 'dws', '!' );

1283

$add_option->( 'delete-semicolons', 'dsm', '!' );

1284

$add_option->( 'nospace-after-keyword', 'nsak', '=s' );

1285

$add_option->( 'nowant-left-space', 'nwls', '=s' );

1286

$add_option->( 'nowant-right-space', 'nwrs', '=s' );

1287

$add_option->( 'paren-tightness', 'pt', '=i' );

1288

$add_option->( 'space-after-keyword', 'sak', '=s' );

1289

$add_option->( 'space-for-semicolon', 'sfs', '!' );

1290

$add_option->( 'space-function-paren', 'sfp', '!' );

1291

$add_option->( 'space-keyword-paren', 'skp', '!' );

1292

$add_option->( 'space-terminal-semicolon', 'sts', '!' );

1293

$add_option->( 'square-bracket-tightness', 'sbt', '=i' );

1294

$add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' );

1295

$add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' );

1296

$add_option->( 'trim-qw', 'tqw', '!' );

1297

$add_option->( 'want-left-space', 'wls', '=s' );

1298

$add_option->( 'want-right-space', 'wrs', '=s' );

1299

1300

########################################

1301

$category = 4; # Comment controls

1302

########################################

1303

$add_option->( 'closing-side-comment-else-flag', 'csce', '=i' );

1304

$add_option->( 'closing-side-comment-interval', 'csci', '=i' );

1305

$add_option->( 'closing-side-comment-list', 'cscl', '=s' );

1306

$add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' );

1307

$add_option->( 'closing-side-comment-prefix', 'cscp', '=s' );

1308

$add_option->( 'closing-side-comment-warnings', 'cscw', '!' );

1309

$add_option->( 'closing-side-comments', 'csc', '!' );

1310

$add_option->( 'format-skipping', 'fs', '!' );

1311

$add_option->( 'format-skipping-begin', 'fsb', '=s' );

1312

$add_option->( 'format-skipping-end', 'fse', '=s' );

1313

$add_option->( 'hanging-side-comments', 'hsc', '!' );

1314

$add_option->( 'indent-block-comments', 'ibc', '!' );

1315

$add_option->( 'indent-spaced-block-comments', 'isbc', '!' );

1316

$add_option->( 'fixed-position-side-comment', 'fpsc', '=i' );

1317

$add_option->( 'minimum-space-to-comment', 'msc', '=i' );

1318

$add_option->( 'outdent-long-comments', 'olc', '!' );

1319

$add_option->( 'outdent-static-block-comments', 'osbc', '!' );

1320

$add_option->( 'static-block-comment-prefix', 'sbcp', '=s' );

1321

$add_option->( 'static-block-comments', 'sbc', '!' );

1322

$add_option->( 'static-side-comment-prefix', 'sscp', '=s' );

1323

$add_option->( 'static-side-comments', 'ssc', '!' );

1324

1325

########################################

1326

$category = 5; # Linebreak controls

1327

########################################

1328

$add_option->( 'add-newlines', 'anl', '!' );

1329

$add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' );

1330

$add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' );

1331

$add_option->( 'brace-vertical-tightness', 'bvt', '=i' );

1332

$add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' );

1333

$add_option->( 'cuddled-else', 'ce', '!' );

1334

$add_option->( 'delete-old-newlines', 'dnl', '!' );

1335

$add_option->( 'opening-brace-always-on-right', 'bar', '!' );

1336

$add_option->( 'opening-brace-on-new-line', 'bl', '!' );

1337

$add_option->( 'opening-hash-brace-right', 'ohbr', '!' );

1338

$add_option->( 'opening-paren-right', 'opr', '!' );

1339

$add_option->( 'opening-square-bracket-right', 'osbr', '!' );

1340

$add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' );

1341

$add_option->( 'paren-vertical-tightness', 'pvt', '=i' );

1342

$add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' );

1343

$add_option->( 'stack-closing-hash-brace', 'schb', '!' );

1344

$add_option->( 'stack-closing-paren', 'scp', '!' );

1345

$add_option->( 'stack-closing-square-bracket', 'scsb', '!' );

1346

$add_option->( 'stack-opening-hash-brace', 'sohb', '!' );

1347

$add_option->( 'stack-opening-paren', 'sop', '!' );

1348

$add_option->( 'stack-opening-square-bracket', 'sosb', '!' );

1349

$add_option->( 'vertical-tightness', 'vt', '=i' );

1350

$add_option->( 'vertical-tightness-closing', 'vtc', '=i' );

1351

$add_option->( 'want-break-after', 'wba', '=s' );

1352

$add_option->( 'want-break-before', 'wbb', '=s' );

1353

$add_option->( 'break-after-all-operators', 'baao', '!' );

1354

$add_option->( 'break-before-all-operators', 'bbao', '!' );

1355

$add_option->( 'keep-interior-semicolons', 'kis', '!' );

1356

1357

########################################

1358

$category = 6; # Controlling list formatting

1359

########################################

1360

$add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' );

1361

$add_option->( 'comma-arrow-breakpoints', 'cab', '=i' );

1362

$add_option->( 'maximum-fields-per-table', 'mft', '=i' );

1363

1364

########################################

1365

$category = 7; # Retaining or ignoring existing line breaks

1366

########################################

1367

$add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' );

1368

$add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' );

1369

$add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' );

1370

$add_option->( 'ignore-old-breakpoints', 'iob', '!' );

1371

1372

########################################

1373

$category = 8; # Blank line control

1374

########################################

1375

$add_option->( 'blanks-before-blocks', 'bbb', '!' );

1376

$add_option->( 'blanks-before-comments', 'bbc', '!' );

1377

$add_option->( 'blanks-before-subs', 'bbs', '!' );

1378

$add_option->( 'long-block-line-count', 'lbl', '=i' );

1379

$add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' );

1380

$add_option->( 'swallow-optional-blank-lines', 'sob', '!' );

1381

1382

########################################

1383

$category = 9; # Other controls

1384

########################################

1385

$add_option->( 'delete-block-comments', 'dbc', '!' );

1386

$add_option->( 'delete-closing-side-comments', 'dcsc', '!' );

1387

$add_option->( 'delete-pod', 'dp', '!' );

1388

$add_option->( 'delete-side-comments', 'dsc', '!' );

1389

$add_option->( 'tee-block-comments', 'tbc', '!' );

1390

$add_option->( 'tee-pod', 'tp', '!' );

1391

$add_option->( 'tee-side-comments', 'tsc', '!' );

1392

$add_option->( 'look-for-autoloader', 'lal', '!' );

1393

$add_option->( 'look-for-hash-bang', 'x', '!' );

1394

$add_option->( 'look-for-selfloader', 'lsl', '!' );

1395

$add_option->( 'pass-version-line', 'pvl', '!' );

1396

1397

########################################

1398

$category = 13; # Debugging

1399

########################################

1400

$add_option->( 'DEBUG', 'D', '!' );

1401

$add_option->( 'DIAGNOSTICS', 'I', '!' );

1402

$add_option->( 'check-multiline-quotes', 'chk', '!' );

1403

$add_option->( 'dump-defaults', 'ddf', '!' );

1404

$add_option->( 'dump-long-names', 'dln', '!' );

1405

$add_option->( 'dump-options', 'dop', '!' );

1406

$add_option->( 'dump-profile', 'dpro', '!' );

1407

$add_option->( 'dump-short-names', 'dsn', '!' );

1408

$add_option->( 'dump-token-types', 'dtt', '!' );

1409

$add_option->( 'dump-want-left-space', 'dwls', '!' );

1410

$add_option->( 'dump-want-right-space', 'dwrs', '!' );

1411

$add_option->( 'fuzzy-line-length', 'fll', '!' );

1412

$add_option->( 'help', 'h', '' );

1413

$add_option->( 'short-concatenation-item-length', 'scl', '=i' );

1414

$add_option->( 'show-options', 'opt', '!' );

1415

$add_option->( 'version', 'v', '' );

1416

1417

#---------------------------------------------------------------------

1418

1419

# The Perl::Tidy::HtmlWriter will add its own options to the string

1420

Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string );

1421

1422

########################################

1423

# Set categories 10, 11, 12

1424

########################################

1425

# Based on their known order

1426

$category = 12; # HTML properties

1427

foreach my $opt (@option_string) {

1428

my $long_name = $opt;

1429

$long_name =~ s/(!|=.*|:.*)$//;

1430

unless ( defined( $option_category{$long_name} ) ) {

1431

if ( $long_name =~ /^html-linked/ ) {

1432

$category = 10; # HTML options

1433

}

1434

elsif ( $long_name =~ /^pod2html/ ) {

1435

$category = 11; # Pod2html

1436

}

1437

$option_category{$long_name} = $category_name[$category];

1438

}

1439

}

1440

1441

#---------------------------------------------------------------

1442

# Assign valid ranges to certain options

1443

#---------------------------------------------------------------

1444

# In the future, these may be used to make preliminary checks

1445

# hash keys are long names

1446

# If key or value is undefined:

1447

# strings may have any value

1448

# integer ranges are >=0

1449

# If value is defined:

1450

# value is [qw(any valid words)] for strings

1451

# value is [min, max] for integers

1452

# if min is undefined, there is no lower limit

1453

# if max is undefined, there is no upper limit

1454

# Parameters not listed here have defaults

1455

%option_range = (

1456

'format' => [ 'tidy', 'html', 'user' ],

1457

'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ],

1458

1459

'block-brace-tightness' => [ 0, 2 ],

1460

'brace-tightness' => [ 0, 2 ],

1461

'paren-tightness' => [ 0, 2 ],

1462

'square-bracket-tightness' => [ 0, 2 ],

1463

1464

'block-brace-vertical-tightness' => [ 0, 2 ],

1465

'brace-vertical-tightness' => [ 0, 2 ],

1466

'brace-vertical-tightness-closing' => [ 0, 2 ],

1467

'paren-vertical-tightness' => [ 0, 2 ],

1468

'paren-vertical-tightness-closing' => [ 0, 2 ],

1469

'square-bracket-vertical-tightness' => [ 0, 2 ],

1470

'square-bracket-vertical-tightness-closing' => [ 0, 2 ],

1471

'vertical-tightness' => [ 0, 2 ],

1472

'vertical-tightness-closing' => [ 0, 2 ],

1473

1474

'closing-brace-indentation' => [ 0, 3 ],

1475

'closing-paren-indentation' => [ 0, 3 ],

1476

'closing-square-bracket-indentation' => [ 0, 3 ],

1477

'closing-token-indentation' => [ 0, 3 ],

1478

1479

'closing-side-comment-else-flag' => [ 0, 2 ],

1480

'comma-arrow-breakpoints' => [ 0, 3 ],

1481

);

1482

1483

# Note: we could actually allow negative ci if someone really wants it:

1484

# $option_range{'continuation-indentation'} = [ undef, undef ];

1485

1486

#---------------------------------------------------------------

1487

# Assign default values to the above options here, except

1488

# for 'outfile' and 'help'.

1489

# These settings should approximate the perlstyle(1) suggestions.

1490

#---------------------------------------------------------------

1491

my @defaults = qw(

1492

add-newlines

1493

add-semicolons

1494

add-whitespace

1495

blanks-before-blocks

1496

blanks-before-comments

1497

blanks-before-subs

1498

block-brace-tightness=0

1499

block-brace-vertical-tightness=0

1500

brace-tightness=1

1501

brace-vertical-tightness-closing=0

1502

brace-vertical-tightness=0

1503

break-at-old-logical-breakpoints

1504

break-at-old-ternary-breakpoints

1505

break-at-old-keyword-breakpoints

1506

comma-arrow-breakpoints=1

1507

nocheck-syntax

1508

closing-side-comment-interval=6

1509

closing-side-comment-maximum-text=20

1510

closing-side-comment-else-flag=0

1511

closing-paren-indentation=0

1512

closing-brace-indentation=0

1513

closing-square-bracket-indentation=0

1514

continuation-indentation=2

1515

delete-old-newlines

1516

delete-semicolons

1517

fuzzy-line-length

1518

hanging-side-comments

1519

indent-block-comments

1520

indent-columns=4

1521

long-block-line-count=8

1522

look-for-autoloader

1523

look-for-selfloader

1524

maximum-consecutive-blank-lines=1

1525

maximum-fields-per-table=0

1526

maximum-line-length=80

1527

minimum-space-to-comment=4

1528

nobrace-left-and-indent

1529

nocuddled-else

1530

nodelete-old-whitespace

1531

nohtml

1532

nologfile

1533

noquiet

1534

noshow-options

1535

nostatic-side-comments

1536

noswallow-optional-blank-lines

1537

notabs

1538

nowarning-output

1539

outdent-labels

1540

outdent-long-quotes

1541

outdent-long-comments

1542

paren-tightness=1

1543

paren-vertical-tightness-closing=0

1544

paren-vertical-tightness=0

1545

pass-version-line

1546

recombine

1547

valign

1548

short-concatenation-item-length=8

1549

space-for-semicolon

1550

square-bracket-tightness=1

1551

square-bracket-vertical-tightness-closing=0

1552

square-bracket-vertical-tightness=0

1553

static-block-comments

1554

trim-qw

1555

format=tidy

1556

backup-file-extension=bak

1557

format-skipping

1558

1559

pod2html

1560

html-table-of-contents

1561

html-entities

1562

);

1563

1564

push @defaults, "perl-syntax-check-flags=-c -T";

1565

1566

#---------------------------------------------------------------

1567

# Define abbreviations which will be expanded into the above primitives.

1568

# These may be defined recursively.

1569

#---------------------------------------------------------------

1570

%expansion = (

1571

%expansion,

1572

'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)],

1573

'fnl' => [qw(freeze-newlines)],

1574

'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)],

1575

'fws' => [qw(freeze-whitespace)],

1576

'indent-only' => [qw(freeze-newlines freeze-whitespace)],

1577

'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)],

1578

'nooutdent-long-lines' =>

1579

[qw(nooutdent-long-quotes nooutdent-long-comments)],

1580

'noll' => [qw(nooutdent-long-lines)],

1581

'io' => [qw(indent-only)],

1582

'delete-all-comments' =>

1583

[qw(delete-block-comments delete-side-comments delete-pod)],

1584

'nodelete-all-comments' =>

1585

[qw(nodelete-block-comments nodelete-side-comments nodelete-pod)],

1586

'dac' => [qw(delete-all-comments)],

1587

'ndac' => [qw(nodelete-all-comments)],

1588

'gnu' => [qw(gnu-style)],

1589

'pbp' => [qw(perl-best-practices)],

1590

'tee-all-comments' =>

1591

[qw(tee-block-comments tee-side-comments tee-pod)],

1592

'notee-all-comments' =>

1593

[qw(notee-block-comments notee-side-comments notee-pod)],

1594

'tac' => [qw(tee-all-comments)],

1595

'ntac' => [qw(notee-all-comments)],

1596

'html' => [qw(format=html)],

1597

'nhtml' => [qw(format=tidy)],

1598

'tidy' => [qw(format=tidy)],

1599

1600

'break-after-comma-arrows' => [qw(cab=0)],

1601

'nobreak-after-comma-arrows' => [qw(cab=1)],

1602

'baa' => [qw(cab=0)],

1603

'nbaa' => [qw(cab=1)],

1604

1605

'break-at-old-trinary-breakpoints' => [qw(bot)],

1606

1607

'cti=0' => [qw(cpi=0 cbi=0 csbi=0)],

1608

'cti=1' => [qw(cpi=1 cbi=1 csbi=1)],

1609

'cti=2' => [qw(cpi=2 cbi=2 csbi=2)],

1610

'icp' => [qw(cpi=2 cbi=2 csbi=2)],

1611

'nicp' => [qw(cpi=0 cbi=0 csbi=0)],

1612

1613

'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)],

1614

'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)],

1615

'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)],

1616

'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)],

1617

'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)],

1618

1619

'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)],

1620

'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)],

1621

'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)],

1622

1623

'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)],

1624

'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)],

1625

'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)],

1626

1627

'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],

1628

'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],

1629

'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],

1630

1631

'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)],

1632

'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)],

1633

'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)],

1634

1635

'otr' => [qw(opr ohbr osbr)],

1636

'opening-token-right' => [qw(opr ohbr osbr)],

1637

'notr' => [qw(nopr nohbr nosbr)],

1638

'noopening-token-right' => [qw(nopr nohbr nosbr)],

1639

1640

'sot' => [qw(sop sohb sosb)],

1641

'nsot' => [qw(nsop nsohb nsosb)],

1642

'stack-opening-tokens' => [qw(sop sohb sosb)],

1643

'nostack-opening-tokens' => [qw(nsop nsohb nsosb)],

1644

1645

'sct' => [qw(scp schb scsb)],

1646

'stack-closing-tokens' => => [qw(scp schb scsb)],

1647

'nsct' => [qw(nscp nschb nscsb)],

1648

'nostack-opening-tokens' => [qw(nscp nschb nscsb)],

1649

1650

# 'mangle' originally deleted pod and comments, but to keep it

1651

# reversible, it no longer does. But if you really want to

1652

# delete them, just use:

1653

# -mangle -dac

1654

1655

# An interesting use for 'mangle' is to do this:

1656

# perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new

1657

# which will form as many one-line blocks as possible

1658

1659

'mangle' => [

1660

qw(

1661

check-syntax

1662

delete-old-newlines

1663

delete-old-whitespace

1664

delete-semicolons

1665

indent-columns=0

1666

maximum-consecutive-blank-lines=0

1667

maximum-line-length=100000

1668

noadd-newlines

1669

noadd-semicolons

1670

noadd-whitespace

1671

noblanks-before-blocks

1672

noblanks-before-subs

1673

notabs

1674

)

1675

],

1676

1677

# 'extrude' originally deleted pod and comments, but to keep it

1678

# reversible, it no longer does. But if you really want to

1679

# delete them, just use

1680

# extrude -dac

1681

#

1682

# An interesting use for 'extrude' is to do this:

1683

# perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new

1684

# which will break up all one-line blocks.

1685

1686

'extrude' => [

1687

qw(

1688

check-syntax

1689

ci=0

1690

delete-old-newlines

1691

delete-old-whitespace

1692

delete-semicolons

1693

indent-columns=0

1694

maximum-consecutive-blank-lines=0

1695

maximum-line-length=1

1696

noadd-semicolons

1697

noadd-whitespace

1698

noblanks-before-blocks

1699

noblanks-before-subs

1700

nofuzzy-line-length

1701

notabs

1702

norecombine

1703

)

1704

],

1705

1706

# this style tries to follow the GNU Coding Standards (which do

1707

# not really apply to perl but which are followed by some perl

1708

# programmers).

1709

'gnu-style' => [

1710

qw(

1711

lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1

1712

)

1713

],

1714

1715

# Style suggested in Damian Conway's Perl Best Practices

1716

'perl-best-practices' => [

1717

qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq),

1718

q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=)

1719

],

1720

1721

# Additional styles can be added here

1722

);

1723

1724

Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion );

1725

1726

# Uncomment next line to dump all expansions for debugging:

1727

# dump_short_names(\%expansion);

1728

return (

1729

\@option_string, \@defaults, \%expansion,

1730

\%option_category, \%option_range

1731

);

1732

1733

} # end of generate_options

1734

1735

sub process_command_line {

1736

1737

my (

1738

$perltidyrc_stream, $is_Windows, $Windows_type,

1739

$rpending_complaint, $dump_options_type

1740

) = @_;

1741

1742

use Getopt::Long;

1743

1744

my (

1745

$roption_string, $rdefaults, $rexpansion,

1746

$roption_category, $roption_range

1747

) = generate_options();

1748

1749

#---------------------------------------------------------------

1750

# set the defaults by passing the above list through GetOptions

1751

#---------------------------------------------------------------

1752

my %Opts = ();

1753

{

1754

local @ARGV;

1755

my $i;

1756

1757

# do not load the defaults if we are just dumping perltidyrc

1758

unless ( $dump_options_type eq 'perltidyrc' ) {

1759

for $i (@$rdefaults) { push @ARGV, "--" . $i }

1760

}

1761

1762

# Patch to save users Getopt::Long configuration

1763

# and set to Getopt::Long defaults. Use eval to avoid

1764

# breaking old versions of Perl without these routines.

1765

my $glc;

1766

eval { $glc = Getopt::Long::Configure() };

1767

unless ($@) {

1768

eval { Getopt::Long::ConfigDefaults() };

1769

}

1770

else { $glc = undef }

1771

1772

if ( !GetOptions( \%Opts, @$roption_string ) ) {

1773

die "Programming Bug: error in setting default options";

1774

}

1775

1776

# Patch to put the previous Getopt::Long configuration back

1777

eval { Getopt::Long::Configure($glc) } if defined $glc;

1778

}

1779

1780

my $word;

1781

my @raw_options = ();

1782

my $config_file = "";

1783

my $saw_ignore_profile = 0;

1784

my $saw_extrude = 0;

1785

my $saw_dump_profile = 0;

1786

my $i;

1787

1788

#---------------------------------------------------------------

1789

# Take a first look at the command-line parameters. Do as many

1790

# immediate dumps as possible, which can avoid confusion if the

1791

# perltidyrc file has an error.

1792

#---------------------------------------------------------------

1793

foreach $i (@ARGV) {

1794

1795

$i =~ s/^--/-/;

1796

if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) {

1797

$saw_ignore_profile = 1;

1798

}

1799

1800

# note: this must come before -pro and -profile, below:

1801

elsif ( $i =~ /^-(dump-profile|dpro)$/ ) {

1802

$saw_dump_profile = 1;

1803

}

1804

elsif ( $i =~ /^-(pro|profile)=(.+)/ ) {

1805

if ($config_file) {

1806

warn

1807

"Only one -pro=filename allowed, using '$2' instead of '$config_file'\n";

1808

}

1809

$config_file = $2;

1810

unless ( -e $config_file ) {

1811

warn "cannot find file given with -pro=$config_file: $!\n";

1812

$config_file = "";

1813

}

1814

}

1815

elsif ( $i =~ /^-(pro|profile)=?$/ ) {

1816

die "usage: -pro=filename or --profile=filename, no spaces\n";

1817

}

1818

elsif ( $i =~ /^-extrude$/ ) {

1819

$saw_extrude = 1;

1820

}

1821

elsif ( $i =~ /^-(help|h|HELP|H)$/ ) {

1822

usage();

1823

exit 1;

1824

}

1825

elsif ( $i =~ /^-(version|v)$/ ) {

1826

show_version();

1827

exit 1;

1828

}

1829

elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) {

1830

dump_defaults(@$rdefaults);

1831

exit 1;

1832

}

1833

elsif ( $i =~ /^-(dump-long-names|dln)$/ ) {

1834

dump_long_names(@$roption_string);

1835

exit 1;

1836

}

1837

elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) {

1838

dump_short_names($rexpansion);

1839

exit 1;

1840

}

1841

elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) {

1842

Perl::Tidy::Tokenizer->dump_token_types(*STDOUT);

1843

exit 1;

1844

}

1845

}

1846

1847

if ( $saw_dump_profile && $saw_ignore_profile ) {

1848

warn "No profile to dump because of -npro\n";

1849

exit 1;

1850

}

1851

1852

#---------------------------------------------------------------

1853

# read any .perltidyrc configuration file

1854

#---------------------------------------------------------------

1855

unless ($saw_ignore_profile) {

1856

1857

# resolve possible conflict between $perltidyrc_stream passed

1858

# as call parameter to perltidy and -pro=filename on command

1859

# line.

1860

if ($perltidyrc_stream) {

1861

if ($config_file) {

1862

warn <<EOM;

1863

Conflict: a perltidyrc configuration file was specified both as this

1864

perltidy call parameter: $perltidyrc_stream

1865

and with this -profile=$config_file.

1866

Using -profile=$config_file.

1867

EOM

1868

}

1869

else {

1870

$config_file = $perltidyrc_stream;

1871

}

1872

}

1873

1874

# look for a config file if we don't have one yet

1875

my $rconfig_file_chatter;

1876

$$rconfig_file_chatter = "";

1877

$config_file =

1878

find_config_file( $is_Windows, $Windows_type, $rconfig_file_chatter,

1879

$rpending_complaint )

1880

unless $config_file;

1881

1882

# open any config file

1883

my $fh_config;

1884

if ($config_file) {

1885

( $fh_config, $config_file ) =

1886

Perl::Tidy::streamhandle( $config_file, 'r' );

1887

unless ($fh_config) {

1888

$$rconfig_file_chatter .=

1889

"# $config_file exists but cannot be opened\n";

1890

}

1891

}

1892

1893

if ($saw_dump_profile) {

1894

if ($saw_dump_profile) {

1895

dump_config_file( $fh_config, $config_file,

1896

$rconfig_file_chatter );

1897

exit 1;

1898

}

1899

}

1900

1901

if ($fh_config) {

1902

1903

my ( $rconfig_list, $death_message ) =

1904

read_config_file( $fh_config, $config_file, $rexpansion );

1905

die $death_message if ($death_message);

1906

1907

# process any .perltidyrc parameters right now so we can

1908

# localize errors

1909

if (@$rconfig_list) {

1910

local @ARGV = @$rconfig_list;

1911

1912

expand_command_abbreviations( $rexpansion, \@raw_options,

1913

$config_file );

1914

1915

if ( !GetOptions( \%Opts, @$roption_string ) ) {

1916

die

1917

"Error in this config file: $config_file \nUse -npro to ignore this file, -h for help'\n";

1918

}

1919

1920

# Anything left in this local @ARGV is an error and must be

1921

# invalid bare words from the configuration file. We cannot

1922

# check this earlier because bare words may have been valid

1923

# values for parameters. We had to wait for GetOptions to have

1924

# a look at @ARGV.

1925

if (@ARGV) {

1926

my $count = @ARGV;

1927

my $str = "\'" . pop(@ARGV) . "\'";

1928

while ( my $param = pop(@ARGV) ) {

1929

if ( length($str) < 70 ) {

1930

$str .= ", '$param'";

1931

}

1932

else {

1933

$str .= ", ...";

1934

last;

1935

}

1936

}

1937

die <<EOM;

1938

There are $count unrecognized values in the configuration file '$config_file':

1939

$str

1940

Use leading dashes for parameters. Use -npro to ignore this file.

1941

EOM

1942

}

1943

1944

# Undo any options which cause premature exit. They are not

1945

# appropriate for a config file, and it could be hard to

1946

# diagnose the cause of the premature exit.

1947

foreach (

1948

qw{

1949

dump-defaults

1950

dump-long-names

1951

dump-options

1952

dump-profile

1953

dump-short-names

1954

dump-token-types

1955

dump-want-left-space

1956

dump-want-right-space

1957

help

1958

stylesheet

1959

version

1960

}

1961

)

1962

{

1963

1964

if ( defined( $Opts{$_} ) ) {

1965

delete $Opts{$_};

1966

warn "ignoring --$_ in config file: $config_file\n";

1967

}

1968

}

1969

}

1970

}

1971

}

1972

1973

#---------------------------------------------------------------

1974

# now process the command line parameters

1975

#---------------------------------------------------------------

1976

expand_command_abbreviations( $rexpansion, \@raw_options, $config_file );

1977

1978

if ( !GetOptions( \%Opts, @$roption_string ) ) {

1979

die "Error on command line; for help try 'perltidy -h'\n";

1980

}

1981

1982

return ( \%Opts, $config_file, \@raw_options, $saw_extrude, $roption_string,

1983

$rexpansion, $roption_category, $roption_range );

1984

} # end of process_command_line

1985

1986

sub check_options {

1987

1988

my ( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ) = @_;

1989

1990

#---------------------------------------------------------------

1991

# check and handle any interactions among the basic options..

1992

#---------------------------------------------------------------

1993

1994

# Since -vt, -vtc, and -cti are abbreviations, but under

1995

# msdos, an unquoted input parameter like vtc=1 will be

1996

# seen as 2 parameters, vtc and 1, so the abbreviations

1997

# won't be seen. Therefore, we will catch them here if

1998

# they get through.

1999

2000

if ( defined $rOpts->{'vertical-tightness'} ) {

2001

my $vt = $rOpts->{'vertical-tightness'};

2002

$rOpts->{'paren-vertical-tightness'} = $vt;

2003

$rOpts->{'square-bracket-vertical-tightness'} = $vt;

2004

$rOpts->{'brace-vertical-tightness'} = $vt;

2005

}

2006

2007

if ( defined $rOpts->{'vertical-tightness-closing'} ) {

2008

my $vtc = $rOpts->{'vertical-tightness-closing'};

2009

$rOpts->{'paren-vertical-tightness-closing'} = $vtc;

2010

$rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc;

2011

$rOpts->{'brace-vertical-tightness-closing'} = $vtc;

2012

}

2013

2014

if ( defined $rOpts->{'closing-token-indentation'} ) {

2015

my $cti = $rOpts->{'closing-token-indentation'};

2016

$rOpts->{'closing-square-bracket-indentation'} = $cti;

2017

$rOpts->{'closing-brace-indentation'} = $cti;

2018

$rOpts->{'closing-paren-indentation'} = $cti;

2019

}

2020

2021

# In quiet mode, there is no log file and hence no way to report

2022

# results of syntax check, so don't do it.

2023

if ( $rOpts->{'quiet'} ) {

2024

$rOpts->{'check-syntax'} = 0;

2025

}

2026

2027

# can't check syntax if no output

2028

if ( $rOpts->{'format'} ne 'tidy' ) {

2029

$rOpts->{'check-syntax'} = 0;

2030

}

2031

2032

# Never let Windows 9x/Me systems run syntax check -- this will prevent a

2033

# wide variety of nasty problems on these systems, because they cannot

2034

# reliably run backticks. Don't even think about changing this!

2035

if ( $rOpts->{'check-syntax'}

2036

&& $is_Windows

2037

&& ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) )

2038

{

2039

$rOpts->{'check-syntax'} = 0;

2040

}

2041

2042

# It's really a bad idea to check syntax as root unless you wrote

2043

# the script yourself. FIXME: not sure if this works with VMS

2044

unless ($is_Windows) {

2045

2046

if ( $< == 0 && $rOpts->{'check-syntax'} ) {

2047

$rOpts->{'check-syntax'} = 0;

2048

$$rpending_complaint .=

2049

"Syntax check deactivated for safety; you shouldn't run this as root\n";

2050

}

2051

}

2052

2053

# see if user set a non-negative logfile-gap

2054

if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) {

2055

2056

# a zero gap will be taken as a 1

2057

if ( $rOpts->{'logfile-gap'} == 0 ) {

2058

$rOpts->{'logfile-gap'} = 1;

2059

}

2060

2061

# setting a non-negative logfile gap causes logfile to be saved

2062

$rOpts->{'logfile'} = 1;

2063

}

2064

2065

# not setting logfile gap, or setting it negative, causes default of 50

2066

else {

2067

$rOpts->{'logfile-gap'} = 50;

2068

}

2069

2070

# set short-cut flag when only indentation is to be done.

2071

# Note that the user may or may not have already set the

2072

# indent-only flag.

2073

if ( !$rOpts->{'add-whitespace'}

2074

&& !$rOpts->{'delete-old-whitespace'}

2075

&& !$rOpts->{'add-newlines'}

2076

&& !$rOpts->{'delete-old-newlines'} )

2077

{

2078

$rOpts->{'indent-only'} = 1;

2079

}

2080

2081

# -isbc implies -ibc

2082

if ( $rOpts->{'indent-spaced-block-comments'} ) {

2083

$rOpts->{'indent-block-comments'} = 1;

2084

}

2085

2086

# -bli flag implies -bl

2087

if ( $rOpts->{'brace-left-and-indent'} ) {

2088

$rOpts->{'opening-brace-on-new-line'} = 1;

2089

}

2090

2091

if ( $rOpts->{'opening-brace-always-on-right'}

2092

&& $rOpts->{'opening-brace-on-new-line'} )

2093

{

2094

warn <<EOM;

2095

Conflict: you specified both 'opening-brace-always-on-right' (-bar) and

2096

'opening-brace-on-new-line' (-bl). Ignoring -bl.

2097

EOM

2098

$rOpts->{'opening-brace-on-new-line'} = 0;

2099

}

2100

2101

# it simplifies things if -bl is 0 rather than undefined

2102

if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) {

2103

$rOpts->{'opening-brace-on-new-line'} = 0;

2104

}

2105

2106

# -sbl defaults to -bl if not defined

2107

if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) {

2108

$rOpts->{'opening-sub-brace-on-new-line'} =

2109

$rOpts->{'opening-brace-on-new-line'};

2110

}

2111

2112

# set shortcut flag if no blanks to be written

2113

unless ( $rOpts->{'maximum-consecutive-blank-lines'} ) {

2114

$rOpts->{'swallow-optional-blank-lines'} = 1;

2115

}

2116

2117

if ( $rOpts->{'entab-leading-whitespace'} ) {

2118

if ( $rOpts->{'entab-leading-whitespace'} < 0 ) {

2119

warn "-et=n must use a positive integer; ignoring -et\n";

2120

$rOpts->{'entab-leading-whitespace'} = undef;

2121

}

2122

2123

# entab leading whitespace has priority over the older 'tabs' option

2124

if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; }

2125

}

2126

}

2127

2128

sub expand_command_abbreviations {

2129

2130

# go through @ARGV and expand any abbreviations

2131

2132

my ( $rexpansion, $rraw_options, $config_file ) = @_;

2133

my ($word);

2134

2135

# set a pass limit to prevent an infinite loop;

2136

# 10 should be plenty, but it may be increased to allow deeply

2137

# nested expansions.

2138

my $max_passes = 10;

2139

my @new_argv = ();

2140

2141

# keep looping until all expansions have been converted into actual

2142

# dash parameters..

2143

for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) {

2144

my @new_argv = ();

2145

my $abbrev_count = 0;

2146

2147

# loop over each item in @ARGV..

2148

foreach $word (@ARGV) {

2149

2150

# convert any leading 'no-' to just 'no'

2151

if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 }

2152

2153

# if it is a dash flag (instead of a file name)..

2154

if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) {

2155

2156

my $abr = $1;

2157

my $flags = $2;

2158

2159

# save the raw input for debug output in case of circular refs

2160

if ( $pass_count == 0 ) {

2161

push( @$rraw_options, $word );

2162

}

2163

2164

# recombine abbreviation and flag, if necessary,

2165

# to allow abbreviations with arguments such as '-vt=1'

2166

if ( $rexpansion->{ $abr . $flags } ) {

2167

$abr = $abr . $flags;

2168

$flags = "";

2169

}

2170

2171

# if we see this dash item in the expansion hash..

2172

if ( $rexpansion->{$abr} ) {

2173

$abbrev_count++;

2174

2175

# stuff all of the words that it expands to into the

2176

# new arg list for the next pass

2177

foreach my $abbrev ( @{ $rexpansion->{$abr} } ) {

2178

next unless $abbrev; # for safety; shouldn't happen

2179

push( @new_argv, '--' . $abbrev . $flags );

2180

}

2181

}

2182

2183

# not in expansion hash, must be actual long name

2184

else {

2185

push( @new_argv, $word );

2186

}

2187

}

2188

2189

# not a dash item, so just save it for the next pass

2190

else {

2191

push( @new_argv, $word );

2192

}

2193

} # end of this pass

2194

2195

# update parameter list @ARGV to the new one

2196

@ARGV = @new_argv;

2197

last unless ( $abbrev_count > 0 );

2198

2199

# make sure we are not in an infinite loop

2200

if ( $pass_count == $max_passes ) {

2201

print STDERR

2202

"I'm tired. We seem to be in an infinite loop trying to expand aliases.\n";

2203

print STDERR "Here are the raw options\n";

2204

local $" = ')(';

2205

print STDERR "(@$rraw_options)\n";

2206

my $num = @new_argv;

2207

2208

if ( $num < 50 ) {

2209

print STDERR "After $max_passes passes here is ARGV\n";

2210

print STDERR "(@new_argv)\n";

2211

}

2212

else {

2213

print STDERR "After $max_passes passes ARGV has $num entries\n";

2214

}

2215

2216

if ($config_file) {

2217

die <<"DIE";

2218

Please check your configuration file $config_file for circular-references.

2219

To deactivate it, use -npro.

2220

DIE

2221

}

2222

else {

2223

die <<'DIE';

2224

Program bug - circular-references in the %expansion hash, probably due to

2225

a recent program change.

2226

DIE

2227

}

2228

} # end of check for circular references

2229

} # end of loop over all passes

2230

}

2231

2232

# Debug routine -- this will dump the expansion hash

2233

sub dump_short_names {

2234

my $rexpansion = shift;

2235

print STDOUT <<EOM;

2236

List of short names. This list shows how all abbreviations are

2237

translated into other abbreviations and, eventually, into long names.

2238

New abbreviations may be defined in a .perltidyrc file.

2239

For a list of all long names, use perltidy --dump-long-names (-dln).

2240

--------------------------------------------------------------------------

2241

EOM

2242

foreach my $abbrev ( sort keys %$rexpansion ) {

2243

my @list = @{ $$rexpansion{$abbrev} };

2244

print STDOUT "$abbrev --> @list\n";

2245

}

2246

}

2247

2248

sub check_vms_filename {

2249

2250

# given a valid filename (the perltidy input file)

2251

# create a modified filename and separator character

2252

# suitable for VMS.

2253

#

2254

# Contributed by Michael Cartmell

2255

#

2256

my ( $base, $path ) = fileparse( $_[0] );

2257

2258

# remove explicit ; version

2259

$base =~ s/;-?\d*$//

2260

2261

# remove explicit . version ie two dots in filename NB ^ escapes a dot

2262

or $base =~ s/( # begin capture $1

2263

(?:^|[^^])\. # match a dot not preceded by a caret

2264

(?: # followed by nothing

2265

| # or

2266

.*[^^] # anything ending in a non caret

2267

)

2268

) # end capture $1

2269

\.-?\d*$ # match . version number

2270

/$1/x;

2271

2272

# normalise filename, if there are no unescaped dots then append one

2273

$base .= '.' unless $base =~ /(?:^|[^^])\./;

2274

2275

# if we don't already have an extension then we just append the extention

2276

my $separator = ( $base =~ /\.$/ ) ? "" : "_";

2277

return ( $path . $base, $separator );

2278

}

2279

2280

sub Win_OS_Type {

2281

2282

# TODO: are these more standard names?

2283

# Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003

2284

2285

# Returns a string that determines what MS OS we are on.

2286

# Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003

2287

# Returns blank string if not an MS system.

2288

# Original code contributed by: Yves Orton

2289

# We need to know this to decide where to look for config files

2290

2291

my $rpending_complaint = shift;

2292

my $os = "";

2293

return $os unless $^O =~ /win32|dos/i; # is it a MS box?

2294

2295

# Systems built from Perl source may not have Win32.pm

2296

# But probably have Win32::GetOSVersion() anyway so the

2297

# following line is not 'required':

2298

# return $os unless eval('require Win32');

2299

2300

# Use the standard API call to determine the version

2301

my ( $undef, $major, $minor, $build, $id );

2302

eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() };

2303

2304

#

2305

# NAME ID MAJOR MINOR

2306

# Windows NT 4 2 4 0

2307

# Windows 2000 2 5 0

2308

# Windows XP 2 5 1

2309

# Windows Server 2003 2 5 2

2310

2311

return "win32s" unless $id; # If id==0 then its a win32s box.

2312

$os = { # Magic numbers from MSDN

2313

# documentation of GetOSVersion

2314

1 => {

2315

0 => "95",

2316

10 => "98",

2317

90 => "Me"

2318

},

2319

2 => {

2320

0 => "2000", # or NT 4, see below

2321

1 => "XP/.Net",

2322

2 => "Win2003",

2323

51 => "NT3.51"

2324

}

2325

}->{$id}->{$minor};

2326

2327

# If $os is undefined, the above code is out of date. Suggested updates

2328

# are welcome.

2329

unless ( defined $os ) {

2330

$os = "";

2331

$$rpending_complaint .= <<EOS;

2332

Error trying to discover Win_OS_Type: $id:$major:$minor Has no name of record!

2333

We won't be able to look for a system-wide config file.

2334

EOS

2335

}

2336

2337

# Unfortunately the logic used for the various versions isnt so clever..

2338

# so we have to handle an outside case.

2339

return ( $os eq "2000" && $major != 5 ) ? "NT4" : $os;

2340

}

2341

2342

sub is_unix {

2343

return

2344

( $^O !~ /win32|dos/i )

2345

&& ( $^O ne 'VMS' )

2346

&& ( $^O ne 'OS2' )

2347

&& ( $^O ne 'MacOS' );

2348

}

2349

2350

sub look_for_Windows {

2351

2352

# determine Windows sub-type and location of

2353

# system-wide configuration files

2354

my $rpending_complaint = shift;

2355

my $is_Windows = ( $^O =~ /win32|dos/i );

2356

my $Windows_type = Win_OS_Type($rpending_complaint) if $is_Windows;

2357

return ( $is_Windows, $Windows_type );

2358

}

2359

2360

sub find_config_file {

2361

2362

# look for a .perltidyrc configuration file

2363

my ( $is_Windows, $Windows_type, $rconfig_file_chatter,

2364

$rpending_complaint ) = @_;

2365

2366

$$rconfig_file_chatter .= "# Config file search...system reported as:";

2367

if ($is_Windows) {

2368

$$rconfig_file_chatter .= "Windows $Windows_type\n";

2369

}

2370

else {

2371

$$rconfig_file_chatter .= " $^O\n";

2372

}

2373

2374

# sub to check file existance and record all tests

2375

my $exists_config_file = sub {

2376

my $config_file = shift;

2377

return 0 unless $config_file;

2378

$$rconfig_file_chatter .= "# Testing: $config_file\n";

2379

return -f $config_file;

2380

};

2381

2382

my $config_file;

2383

2384

# look in current directory first

2385

$config_file = ".perltidyrc";

2386

return $config_file if $exists_config_file->($config_file);

2387

2388

# Default environment vars.

2389

my @envs = qw(PERLTIDY HOME);

2390

2391

# Check the NT/2k/XP locations, first a local machine def, then a

2392

# network def

2393

push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i;

2394

2395

# Now go through the enviornment ...

2396

foreach my $var (@envs) {

2397

$$rconfig_file_chatter .= "# Examining: \$ENV{$var}";

2398

if ( defined( $ENV{$var} ) ) {

2399

$$rconfig_file_chatter .= " = $ENV{$var}\n";

2400

2401

# test ENV{ PERLTIDY } as file:

2402

if ( $var eq 'PERLTIDY' ) {

2403

$config_file = "$ENV{$var}";

2404

return $config_file if $exists_config_file->($config_file);

2405

}

2406

2407

# test ENV as directory:

2408

$config_file = catfile( $ENV{$var}, ".perltidyrc" );

2409

return $config_file if $exists_config_file->($config_file);

2410

}

2411

else {

2412

$$rconfig_file_chatter .= "\n";

2413

}

2414

}

2415

2416

# then look for a system-wide definition

2417

# where to look varies with OS

2418

if ($is_Windows) {

2419

2420

if ($Windows_type) {

2421

my ( $os, $system, $allusers ) =

2422

Win_Config_Locs( $rpending_complaint, $Windows_type );

2423

2424

# Check All Users directory, if there is one.

2425

if ($allusers) {

2426

$config_file = catfile( $allusers, ".perltidyrc" );

2427

return $config_file if $exists_config_file->($config_file);

2428

}

2429

2430

# Check system directory.

2431

$config_file = catfile( $system, ".perltidyrc" );

2432

return $config_file if $exists_config_file->($config_file);

2433

}

2434

}

2435

2436

# Place to add customization code for other systems

2437

elsif ( $^O eq 'OS2' ) {

2438

}

2439

elsif ( $^O eq 'MacOS' ) {

2440

}

2441

elsif ( $^O eq 'VMS' ) {

2442

}

2443

2444

# Assume some kind of Unix

2445

else {

2446

2447

$config_file = "/usr/local/etc/perltidyrc";

2448

return $config_file if $exists_config_file->($config_file);

2449

2450

$config_file = "/etc/perltidyrc";

2451

return $config_file if $exists_config_file->($config_file);

2452

}

2453

2454

# Couldn't find a config file

2455

return;

2456

}

2457

2458

sub Win_Config_Locs {

2459

2460

# In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP),

2461

# or undef if its not a win32 OS. In list context returns OS, System

2462

# Directory, and All Users Directory. All Users will be empty on a

2463

# 9x/Me box. Contributed by: Yves Orton.

2464

2465

my $rpending_complaint = shift;

2466

my $os = (@_) ? shift : Win_OS_Type();

2467

return unless $os;

2468

2469

my $system = "";

2470

my $allusers = "";

2471

2472

if ( $os =~ /9[58]|Me/ ) {

2473

$system = "C:/Windows";

2474

}

2475

elsif ( $os =~ /NT|XP|200?/ ) {

2476

$system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/";

2477

$allusers =

2478

( $os =~ /NT/ )

2479

? "C:/WinNT/profiles/All Users/"

2480

: "C:/Documents and Settings/All Users/";

2481

}

2482

else {

2483

2484

# This currently would only happen on a win32s computer. I dont have

2485

# one to test, so I am unsure how to proceed. Suggestions welcome!

2486

$$rpending_complaint .=

2487

"I dont know a sensible place to look for config files on an $os system.\n";

2488

return;

2489

}

2490

return wantarray ? ( $os, $system, $allusers ) : $os;

2491

}

2492

2493

sub dump_config_file {

2494

my $fh = shift;

2495

my $config_file = shift;

2496

my $rconfig_file_chatter = shift;

2497

print STDOUT "$$rconfig_file_chatter";

2498

if ($fh) {

2499

print STDOUT "# Dump of file: '$config_file'\n";

2500

while ( my $line = $fh->getline() ) { print STDOUT $line }

2501

eval { $fh->close() };

2502

}

2503

else {

2504

print STDOUT "# ...no config file found\n";

2505

}

2506

}

2507

2508

sub read_config_file {

2509

2510

my ( $fh, $config_file, $rexpansion ) = @_;

2511

my @config_list = ();

2512

2513

# file is bad if non-empty $death_message is returned

2514

my $death_message = "";

2515

2516

my $name = undef;

2517

my $line_no;

2518

while ( my $line = $fh->getline() ) {

2519

$line_no++;

2520

chomp $line;

2521

next if $line =~ /^\s*#/; # skip full-line comment

2522

( $line, $death_message ) =

2523

strip_comment( $line, $config_file, $line_no );

2524

last if ($death_message);

2525

$line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends

2526

next unless $line;

2527

2528

# look for something of the general form

2529

# newname { body }

2530

# or just

2531

# body

2532

2533

if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) {

2534

my ( $newname, $body, $curly ) = ( $2, $3, $4 );

2535

2536

# handle a new alias definition

2537

if ($newname) {

2538

if ($name) {

2539

$death_message =

2540

"No '}' seen after $name and before $newname in config file $config_file line $.\n";

2541

last;

2542

}

2543

$name = $newname;

2544

2545

if ( ${$rexpansion}{$name} ) {

2546

local $" = ')(';

2547

my @names = sort keys %$rexpansion;

2548

$death_message =

2549

"Here is a list of all installed aliases\n(@names)\n"

2550

. "Attempting to redefine alias ($name) in config file $config_file line $.\n";

2551

last;

2552

}

2553

${$rexpansion}{$name} = [];

2554

}

2555

2556

# now do the body

2557

if ($body) {

2558

2559

my ( $rbody_parts, $msg ) = parse_args($body);

2560

if ($msg) {

2561

$death_message = <<EOM;

2562

Error reading file '$config_file' at line number $line_no.

2563

$msg

2564

Please fix this line or use -npro to avoid reading this file

2565

EOM

2566

last;

2567

}

2568

2569

if ($name) {

2570

2571

# remove leading dashes if this is an alias

2572

foreach (@$rbody_parts) { s/^\-+//; }

2573

push @{ ${$rexpansion}{$name} }, @$rbody_parts;

2574

}

2575

else {

2576

push( @config_list, @$rbody_parts );

2577

}

2578

}

2579

2580

if ($curly) {

2581

unless ($name) {

2582

$death_message =

2583

"Unexpected '}' seen in config file $config_file line $.\n";

2584

last;

2585

}

2586

$name = undef;

2587

}

2588

}

2589

}

2590

eval { $fh->close() };

2591

return ( \@config_list, $death_message );

2592

}

2593

2594

sub strip_comment {

2595

2596

my ( $instr, $config_file, $line_no ) = @_;

2597

my $msg = "";

2598

2599

# nothing to do if no comments

2600

if ( $instr !~ /#/ ) {

2601

return ( $instr, $msg );

2602

}

2603

2604

# use simple method of no quotes

2605

elsif ( $instr !~ /['"]/ ) {

2606

$instr =~ s/\s*\#.*$//; # simple trim

2607

return ( $instr, $msg );

2608

}

2609

2610

# handle comments and quotes

2611

my $outstr = "";

2612

my $quote_char = "";

2613

while (1) {

2614

2615

# looking for ending quote character

2616

if ($quote_char) {

2617

if ( $instr =~ /\G($quote_char)/gc ) {

2618

$quote_char = "";

2619

$outstr .= $1;

2620

}

2621

elsif ( $instr =~ /\G(.)/gc ) {

2622

$outstr .= $1;

2623

}

2624

2625

# error..we reached the end without seeing the ending quote char

2626

else {

2627

$msg = <<EOM;

2628

Error reading file $config_file at line number $line_no.

2629

Did not see ending quote character <$quote_char> in this text:

2630

$instr

2631

Please fix this line or use -npro to avoid reading this file

2632

EOM

2633

last;

2634

}

2635

}

2636

2637

# accumulating characters and looking for start of a quoted string

2638

else {

2639

if ( $instr =~ /\G([\"\'])/gc ) {

2640

$outstr .= $1;

2641

$quote_char = $1;

2642

}

2643

elsif ( $instr =~ /\G#/gc ) {

2644

last;

2645

}

2646

elsif ( $instr =~ /\G(.)/gc ) {

2647

$outstr .= $1;

2648

}

2649

else {

2650

last;

2651

}

2652

}

2653

}

2654

return ( $outstr, $msg );

2655

}

2656

2657

sub parse_args {

2658

2659

# Parse a command string containing multiple string with possible

2660

# quotes, into individual commands. It might look like this, for example:

2661

#

2662

# -wba=" + - " -some-thing -wbb='. && ||'

2663

#

2664

# There is no need, at present, to handle escaped quote characters.

2665

# (They are not perltidy tokens, so needn't be in strings).

2666

2667

my ($body) = @_;

2668

my @body_parts = ();

2669

my $quote_char = "";

2670

my $part = "";

2671

my $msg = "";

2672

while (1) {

2673

2674

# looking for ending quote character

2675

if ($quote_char) {

2676

if ( $body =~ /\G($quote_char)/gc ) {

2677

$quote_char = "";

2678

}

2679

elsif ( $body =~ /\G(.)/gc ) {

2680

$part .= $1;

2681

}

2682

2683

# error..we reached the end without seeing the ending quote char

2684

else {

2685

if ( length($part) ) { push @body_parts, $part; }

2686

$msg = <<EOM;

2687

Did not see ending quote character <$quote_char> in this text:

2688

$body

2689

EOM

2690

last;

2691

}

2692

}

2693

2694

# accumulating characters and looking for start of a quoted string

2695

else {

2696

if ( $body =~ /\G([\"\'])/gc ) {

2697

$quote_char = $1;

2698

}

2699

elsif ( $body =~ /\G(\s+)/gc ) {

2700

if ( length($part) ) { push @body_parts, $part; }

2701

$part = "";

2702

}

2703

elsif ( $body =~ /\G(.)/gc ) {

2704

$part .= $1;

2705

}

2706

else {

2707

if ( length($part) ) { push @body_parts, $part; }

2708

last;

2709

}

2710

}

2711

}

2712

return ( \@body_parts, $msg );

2713

}

2714

2715

sub dump_long_names {

2716

2717

my @names = sort @_;

2718

print STDOUT <<EOM;

2719

# Command line long names (passed to GetOptions)

2720

#---------------------------------------------------------------

2721

# here is a summary of the Getopt codes:

2722

# <none> does not take an argument

2723

# =s takes a mandatory string

2724

# :s takes an optional string

2725

# =i takes a mandatory integer

2726

# :i takes an optional integer

2727

# ! does not take an argument and may be negated

2728

# i.e., -foo and -nofoo are allowed

2729

# a double dash signals the end of the options list

2730

#

2731

#---------------------------------------------------------------

2732

EOM

2733

2734

foreach (@names) { print STDOUT "$_\n" }

2735

}

2736

2737

sub dump_defaults {

2738

my @defaults = sort @_;

2739

print STDOUT "Default command line options:\n";

2740

foreach (@_) { print STDOUT "$_\n" }

2741

}

2742

2743

sub readable_options {

2744

2745

# return options for this run as a string which could be

2746

# put in a perltidyrc file

2747

my ( $rOpts, $roption_string ) = @_;

2748

my %Getopt_flags;

2749

my $rGetopt_flags = \%Getopt_flags;

2750

my $readable_options = "# Final parameter set for this run.\n";

2751

$readable_options .=

2752

"# See utility 'perltidyrc_dump.pl' for nicer formatting.\n";

2753

foreach my $opt ( @{$roption_string} ) {

2754

my $flag = "";

2755

if ( $opt =~ /(.*)(!|=.*)$/ ) {

2756

$opt = $1;

2757

$flag = $2;

2758

}

2759

if ( defined( $rOpts->{$opt} ) ) {

2760

$rGetopt_flags->{$opt} = $flag;

2761

}

2762

}

2763

foreach my $key ( sort keys %{$rOpts} ) {

2764

my $flag = $rGetopt_flags->{$key};

2765

my $value = $rOpts->{$key};

2766

my $prefix = '--';

2767

my $suffix = "";

2768

if ($flag) {

2769

if ( $flag =~ /^=/ ) {

2770

if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' }

2771

$suffix = "=" . $value;

2772

}

2773

elsif ( $flag =~ /^!/ ) {

2774

$prefix .= "no" unless ($value);

2775

}

2776

else {

2777

2778

# shouldn't happen

2779

$readable_options .=

2780

"# ERROR in dump_options: unrecognized flag $flag for $key\n";

2781

}

2782

}

2783

$readable_options .= $prefix . $key . $suffix . "\n";

2784

}

2785

return $readable_options;

2786

}

2787

2788

sub show_version {

2789

print <<"EOM";

2790

This is perltidy, v$VERSION

2791

2792

2793

2794

Perltidy is free software and may be copied under the terms of the GNU

2795

General Public License, which is included in the distribution files.

2796

2797

Complete documentation for perltidy can be found using 'man perltidy'

2798

or on the internet at http://perltidy.sourceforge.net.

2799

EOM

2800

}

2801

2802

sub usage {

2803

2804

print STDOUT <<EOF;

2805

This is perltidy version $VERSION, a perl script indenter. Usage:

2806

2807

perltidy [ options ] file1 file2 file3 ...

2808

(output goes to file1.tdy, file2.tdy, file3.tdy, ...)

2809

perltidy [ options ] file1 -o outfile

2810

perltidy [ options ] file1 -st >outfile

2811

perltidy [ options ] <infile >outfile

2812

2813

Options have short and long forms. Short forms are shown; see

2814

man pages for long forms. Note: '=s' indicates a required string,

2815

and '=n' indicates a required integer.

2816

2817

I/O control

2818

-h show this help

2819

-o=file name of the output file (only if single input file)

2820

-oext=s change output extension from 'tdy' to s

2821

-opath=path change path to be 'path' for output files

2822

-b backup original to .bak and modify file in-place

2823

-bext=s change default backup extension from 'bak' to s

2824

-q deactivate error messages (for running under editor)

2825

-w include non-critical warning messages in the .ERR error output

2826

-syn run perl -c to check syntax (default under unix systems)

2827

-log save .LOG file, which has useful diagnostics

2828

-f force perltidy to read a binary file

2829

-g like -log but writes more detailed .LOG file, for debugging scripts

2830

-opt write the set of options actually used to a .LOG file

2831

-npro ignore .perltidyrc configuration command file

2832

-pro=file read configuration commands from file instead of .perltidyrc

2833

-st send output to standard output, STDOUT

2834

-se send error output to standard error output, STDERR

2835

-v display version number to standard output and quit

2836

2837

Basic Options:

2838

-i=n use n columns per indentation level (default n=4)

2839

-t tabs: use one tab character per indentation level, not recommeded

2840

-nt no tabs: use n spaces per indentation level (default)

2841

-et=n entab leading whitespace n spaces per tab; not recommended

2842

-io "indent only": just do indentation, no other formatting.

2843

-sil=n set starting indentation level to n; use if auto detection fails

2844

-ole=s specify output line ending (s=dos or win, mac, unix)

2845

-ple keep output line endings same as input (input must be filename)

2846

2847

Whitespace Control

2848

-fws freeze whitespace; this disables all whitespace changes

2849

and disables the following switches:

2850

-bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight)

2851

-bbt same as -bt but for code block braces; same as -bt if not given

2852

-bbvt block braces vertically tight; use with -bl or -bli

2853

-bbvtl=s make -bbvt to apply to selected list of block types

2854

-pt=n paren tightness (n=0, 1 or 2)

2855

-sbt=n square bracket tightness (n=0, 1, or 2)

2856

-bvt=n brace vertical tightness,

2857

n=(0=open, 1=close unless multiple steps on a line, 2=always close)

2858

-pvt=n paren vertical tightness (see -bvt for n)

2859

-sbvt=n square bracket vertical tightness (see -bvt for n)

2860

-bvtc=n closing brace vertical tightness:

2861

n=(0=open, 1=sometimes close, 2=always close)

2862

-pvtc=n closing paren vertical tightness, see -bvtc for n.

2863

-sbvtc=n closing square bracket vertical tightness, see -bvtc for n.

2864

-ci=n sets continuation indentation=n, default is n=2 spaces

2865

-lp line up parentheses, brackets, and non-BLOCK braces

2866

-sfs add space before semicolon in for( ; ; )

2867

-aws allow perltidy to add whitespace (default)

2868

-dws delete all old non-essential whitespace

2869

-icb indent closing brace of a code block

2870

-cti=n closing indentation of paren, square bracket, or non-block brace:

2871

n=0 none, =1 align with opening, =2 one full indentation level

2872

-icp equivalent to -cti=2

2873

-wls=s want space left of tokens in string; i.e. -nwls='+ - * /'

2874

-wrs=s want space right of tokens in string;

2875

-sts put space before terminal semicolon of a statement

2876

-sak=s put space between keywords given in s and '(';

2877

-nsak=s no space between keywords in s and '('; i.e. -nsak='my our local'

2878

2879

Line Break Control

2880

-fnl freeze newlines; this disables all line break changes

2881

and disables the following switches:

2882

-anl add newlines; ok to introduce new line breaks

2883

-bbs add blank line before subs and packages

2884

-bbc add blank line before block comments

2885

-bbb add blank line between major blocks

2886

-sob swallow optional blank lines

2887

-ce cuddled else; use this style: '} else {'

2888

-dnl delete old newlines (default)

2889

-mbl=n maximum consecutive blank lines (default=1)

2890

-l=n maximum line length; default n=80

2891

-bl opening brace on new line

2892

-sbl opening sub brace on new line. value of -bl is used if not given.

2893

-bli opening brace on new line and indented

2894

-bar opening brace always on right, even for long clauses

2895

-vt=n vertical tightness (requires -lp); n controls break after opening

2896

token: 0=never 1=no break if next line balanced 2=no break

2897

-vtc=n vertical tightness of closing container; n controls if closing

2898

token starts new line: 0=always 1=not unless list 1=never

2899

-wba=s want break after tokens in string; i.e. wba=': .'

2900

-wbb=s want break before tokens in string

2901

2902

Following Old Breakpoints

2903

-kis keep interior semicolons. Allows multiple statements per line.

2904

-boc break at old comma breaks: turns off all automatic list formatting

2905

-bol break at old logical breakpoints: or, and, ||, && (default)

2906

-bok break at old list keyword breakpoints such as map, sort (default)

2907

-bot break at old conditional (ternary ?:) operator breakpoints (default)

2908

-cab=n break at commas after a comma-arrow (=>):

2909

n=0 break at all commas after =>

2910

n=1 stable: break unless this breaks an existing one-line container

2911

n=2 break only if a one-line container cannot be formed

2912

n=3 do not treat commas after => specially at all

2913

2914

Comment controls

2915

-ibc indent block comments (default)

2916

-isbc indent spaced block comments; may indent unless no leading space

2917

-msc=n minimum desired spaces to side comment, default 4

2918

-fpsc=n fix position for side comments; default 0;

2919

-csc add or update closing side comments after closing BLOCK brace

2920

-dcsc delete closing side comments created by a -csc command

2921

-cscp=s change closing side comment prefix to be other than '## end'

2922

-cscl=s change closing side comment to apply to selected list of blocks

2923

-csci=n minimum number of lines needed to apply a -csc tag, default n=6

2924

-csct=n maximum number of columns of appended text, default n=20

2925

-cscw causes warning if old side comment is overwritten with -csc

2926

2927

-sbc use 'static block comments' identified by leading '##' (default)

2928

-sbcp=s change static block comment identifier to be other than '##'

2929

-osbc outdent static block comments

2930

2931

-ssc use 'static side comments' identified by leading '##' (default)

2932

-sscp=s change static side comment identifier to be other than '##'

2933

2934

Delete selected text

2935

-dac delete all comments AND pod

2936

-dbc delete block comments

2937

-dsc delete side comments

2938

-dp delete pod

2939

2940

Send selected text to a '.TEE' file

2941

-tac tee all comments AND pod

2942

-tbc tee block comments

2943

-tsc tee side comments

2944

-tp tee pod

2945

2946

Outdenting

2947

-olq outdent long quoted strings (default)

2948

-olc outdent a long block comment line

2949

-ola outdent statement labels

2950

-okw outdent control keywords (redo, next, last, goto, return)

2951

-okwl=s specify alternative keywords for -okw command

2952

2953

Other controls

2954

-mft=n maximum fields per table; default n=40

2955

-x do not format lines before hash-bang line (i.e., for VMS)

2956

-asc allows perltidy to add a ';' when missing (default)

2957

-dsm allows perltidy to delete an unnecessary ';' (default)

2958

2959

Combinations of other parameters

2960

-gnu attempt to follow GNU Coding Standards as applied to perl

2961

-mangle remove as many newlines as possible (but keep comments and pods)

2962

-extrude insert as many newlines as possible

2963

2964

Dump and die, debugging

2965

-dop dump options used in this run to standard output and quit

2966

-ddf dump default options to standard output and quit

2967

-dsn dump all option short names to standard output and quit

2968

-dln dump option long names to standard output and quit

2969

-dpro dump whatever configuration file is in effect to standard output

2970

-dtt dump all token types to standard output and quit

2971

2972

HTML

2973

-html write an html file (see 'man perl2web' for many options)

2974

Note: when -html is used, no indentation or formatting are done.

2975

Hint: try perltidy -html -css=mystyle.css filename.pl

2976

and edit mystyle.css to change the appearance of filename.html.

2977

-nnn gives line numbers

2978

-pre only writes out <pre>..</pre> code section

2979

-toc places a table of contents to subs at the top (default)

2980

-pod passes pod text through pod2html (default)

2981

-frm write html as a frame (3 files)

2982

-text=s extra extension for table of contents if -frm, default='toc'

2983

-sext=s extra extension for file content if -frm, default='src'

2984

2985

A prefix of "n" negates short form toggle switches, and a prefix of "no"

2986

negates the long forms. For example, -nasc means don't add missing

2987

semicolons.

2988

2989

If you are unable to see this entire text, try "perltidy -h | more"

2990

For more detailed information, and additional options, try "man perltidy",

2991

or go to the perltidy home page at http://perltidy.sourceforge.net

2992

EOF

2993

2994

}

2995

2996

sub process_this_file {

2997

2998

my ( $truth, $beauty ) = @_;

2999

3000

# loop to process each line of this file

3001

while ( my $line_of_tokens = $truth->get_line() ) {

3002

$beauty->write_line($line_of_tokens);

3003

}

3004

3005

# finish up

3006

eval { $beauty->finish_formatting() };

3007

$truth->report_tokenization_errors();

3008

}

3009

3010

sub check_syntax {

3011

3012

# Use 'perl -c' to make sure that we did not create bad syntax

3013

# This is a very good independent check for programming errors

3014

#

3015

# Given names of the input and output files, ($ifname, $ofname),

3016

# we do the following:

3017

# - check syntax of the input file

3018

# - if bad, all done (could be an incomplete code snippet)

3019

# - if infile syntax ok, then check syntax of the output file;

3020

# - if outfile syntax bad, issue warning; this implies a code bug!

3021

# - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good

3022

3023

my ( $ifname, $ofname, $logger_object, $rOpts ) = @_;

3024

my $infile_syntax_ok = 0;

3025

my $line_of_dashes = '-' x 42 . "\n";

3026

3027

my $flags = $rOpts->{'perl-syntax-check-flags'};

3028

3029

# be sure we invoke perl with -c

3030

# note: perl will accept repeated flags like '-c -c'. It is safest

3031

# to append another -c than try to find an interior bundled c, as

3032

# in -Tc, because such a 'c' might be in a quoted string, for example.

3033

if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" }

3034

3035

# be sure we invoke perl with -x if requested

3036

# same comments about repeated parameters applies

3037

if ( $rOpts->{'look-for-hash-bang'} ) {

3038

if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" }

3039

}

3040

3041

# this shouldn't happen unless a termporary file couldn't be made

3042

if ( $ifname eq '-' ) {

3043

$logger_object->write_logfile_entry(

3044

"Cannot run perl -c on STDIN and STDOUT\n");

3045

return $infile_syntax_ok;

3046

}

3047

3048

$logger_object->write_logfile_entry(

3049

"checking input file syntax with perl $flags\n");

3050

$logger_object->write_logfile_entry($line_of_dashes);

3051

3052

# Not all operating systems/shells support redirection of the standard

3053

# error output.

3054

my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1';

3055

3056

my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection );

3057

$logger_object->write_logfile_entry("$perl_output\n");

3058

3059

if ( $perl_output =~ /syntax\s*OK/ ) {

3060

$infile_syntax_ok = 1;

3061

$logger_object->write_logfile_entry($line_of_dashes);

3062

$logger_object->write_logfile_entry(

3063

"checking output file syntax with perl $flags ...\n");

3064

$logger_object->write_logfile_entry($line_of_dashes);

3065

3066

my $perl_output =

3067

do_syntax_check( $ofname, $flags, $error_redirection );

3068

$logger_object->write_logfile_entry("$perl_output\n");

3069

3070

unless ( $perl_output =~ /syntax\s*OK/ ) {

3071

$logger_object->write_logfile_entry($line_of_dashes);

3072

$logger_object->warning(

3073

"The output file has a syntax error when tested with perl $flags $ofname !\n"

3074

);

3075

$logger_object->warning(

3076

"This implies an error in perltidy; the file $ofname is bad\n");

3077

$logger_object->report_definite_bug();

3078

3079

# the perl version number will be helpful for diagnosing the problem

3080

$logger_object->write_logfile_entry(

3081

qx/perl -v $error_redirection/ . "\n" );

3082

}

3083

}

3084

else {

3085

3086

# Only warn of perl -c syntax errors. Other messages,

3087

# such as missing modules, are too common. They can be

3088

# seen by running with perltidy -w

3089

$logger_object->complain("A syntax check using perl $flags gives: \n");

3090

$logger_object->complain($line_of_dashes);

3091

$logger_object->complain("$perl_output\n");

3092

$logger_object->complain($line_of_dashes);

3093

$infile_syntax_ok = -1;

3094

$logger_object->write_logfile_entry($line_of_dashes);

3095

$logger_object->write_logfile_entry(

3096

"The output file will not be checked because of input file problems\n"

3097

);

3098

}

3099

return $infile_syntax_ok;

3100

}

3101

3102

sub do_syntax_check {

3103

my ( $fname, $flags, $error_redirection ) = @_;

3104

3105

# We have to quote the filename in case it has unusual characters

3106

# or spaces. Example: this filename #CM11.pm# gives trouble.

3107

$fname = '"' . $fname . '"';

3108

3109

# Under VMS something like -T will become -t (and an error) so we

3110

# will put quotes around the flags. Double quotes seem to work on

3111

# Unix/Windows/VMS, but this may not work on all systems. (Single

3112

# quotes do not work under Windows). It could become necessary to

3113

# put double quotes around each flag, such as: -"c" -"T"

3114

# We may eventually need some system-dependent coding here.

3115

$flags = '"' . $flags . '"';

3116

3117

# now wish for luck...

3118

return qx/perl $flags $fname $error_redirection/;

3119

}

3120

3121

#####################################################################

3122

#

3123

# This is a stripped down version of IO::Scalar

3124

# Given a reference to a scalar, it supplies either:

3125

# a getline method which reads lines (mode='r'), or

3126

# a print method which reads lines (mode='w')

3127

#

3128

#####################################################################

3129

package Perl::Tidy::IOScalar;

3130

use Carp;

3131

3132

sub new {

3133

my ( $package, $rscalar, $mode ) = @_;

3134

my $ref = ref $rscalar;

3135

if ( $ref ne 'SCALAR' ) {

3136

confess <<EOM;

3137

------------------------------------------------------------------------

3138

expecting ref to SCALAR but got ref to ($ref); trace follows:

3139

------------------------------------------------------------------------

3140

EOM

3141

3142

}

3143

if ( $mode eq 'w' ) {

3144

$$rscalar = "";

3145

return bless [ $rscalar, $mode ], $package;

3146

}

3147

elsif ( $mode eq 'r' ) {

3148

3149

# Convert a scalar to an array.

3150

# This avoids looking for "\n" on each call to getline

3151

my @array = map { $_ .= "\n" } split /\n/, ${$rscalar};

3152

my $i_next = 0;

3153

return bless [ \@array, $mode, $i_next ], $package;

3154

}

3155

else {

3156

confess <<EOM;

3157

------------------------------------------------------------------------

3158

expecting mode = 'r' or 'w' but got mode ($mode); trace follows:

3159

------------------------------------------------------------------------

3160

EOM

3161

}

3162

}

3163

3164

sub getline {

3165

my $self = shift;

3166

my $mode = $self->[1];

3167

if ( $mode ne 'r' ) {

3168

confess <<EOM;

3169

------------------------------------------------------------------------

3170

getline call requires mode = 'r' but mode = ($mode); trace follows:

3171

------------------------------------------------------------------------

3172

EOM

3173

}

3174

my $i = $self->[2]++;

3175

##my $line = $self->[0]->[$i];

3176

return $self->[0]->[$i];

3177

}

3178

3179

sub print {

3180

my $self = shift;

3181

my $mode = $self->[1];

3182

if ( $mode ne 'w' ) {

3183

confess <<EOM;

3184

------------------------------------------------------------------------

3185

print call requires mode = 'w' but mode = ($mode); trace follows:

3186

------------------------------------------------------------------------

3187

EOM

3188

}

3189

${ $self->[0] } .= $_[0];

3190

}

3191

sub close { return }

3192

3193

#####################################################################

3194

#

3195

# This is a stripped down version of IO::ScalarArray

3196

# Given a reference to an array, it supplies either:

3197

# a getline method which reads lines (mode='r'), or

3198

# a print method which reads lines (mode='w')

3199

#

3200

# NOTE: this routine assumes that that there aren't any embedded

3201

# newlines within any of the array elements. There are no checks

3202

# for that.

3203

#

3204

#####################################################################

3205

package Perl::Tidy::IOScalarArray;

3206

use Carp;

3207

3208

sub new {

3209

my ( $package, $rarray, $mode ) = @_;

3210

my $ref = ref $rarray;

3211

if ( $ref ne 'ARRAY' ) {

3212

confess <<EOM;

3213

------------------------------------------------------------------------

3214

expecting ref to ARRAY but got ref to ($ref); trace follows:

3215

------------------------------------------------------------------------

3216

EOM

3217

3218

}

3219

if ( $mode eq 'w' ) {

3220

@$rarray = ();

3221

return bless [ $rarray, $mode ], $package;

3222

}

3223

elsif ( $mode eq 'r' ) {

3224

my $i_next = 0;

3225

return bless [ $rarray, $mode, $i_next ], $package;

3226

}

3227

else {

3228

confess <<EOM;

3229

------------------------------------------------------------------------

3230

expecting mode = 'r' or 'w' but got mode ($mode); trace follows:

3231

------------------------------------------------------------------------

3232

EOM

3233

}

3234

}

3235

3236

sub getline {

3237

my $self = shift;

3238

my $mode = $self->[1];

3239

if ( $mode ne 'r' ) {

3240

confess <<EOM;

3241

------------------------------------------------------------------------

3242

getline requires mode = 'r' but mode = ($mode); trace follows:

3243

------------------------------------------------------------------------

3244

EOM

3245

}

3246

my $i = $self->[2]++;

3247

return $self->[0]->[$i];

3248

}

3249

3250

sub print {

3251

my $self = shift;

3252

my $mode = $self->[1];

3253

if ( $mode ne 'w' ) {

3254

confess <<EOM;

3255

------------------------------------------------------------------------

3256

print requires mode = 'w' but mode = ($mode); trace follows:

3257

------------------------------------------------------------------------

3258

EOM

3259

}

3260

push @{ $self->[0] }, $_[0];

3261

}

3262

sub close { return }

3263

3264

#####################################################################

3265

#

3266

# the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method

3267

# which returns the next line to be parsed

3268

#

3269

#####################################################################

3270

3271

package Perl::Tidy::LineSource;

3272

3273

sub new {

3274

3275

my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_;

3276

my $input_file_copy = undef;

3277

my $fh_copy;

3278

3279

my $input_line_ending;

3280

if ( $rOpts->{'preserve-line-endings'} ) {

3281

$input_line_ending = Perl::Tidy::find_input_line_ending($input_file);

3282

}

3283

3284

( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' );

3285

return undef unless $fh;

3286

3287

# in order to check output syntax when standard output is used,

3288

# or when it is an object, we have to make a copy of the file

3289

if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} )

3290

{

3291

3292

# Turning off syntax check when input output is used.

3293

# The reason is that temporary files cause problems on

3294

# on many systems.

3295

$rOpts->{'check-syntax'} = 0;

3296

$input_file_copy = '-';

3297

3298

$$rpending_logfile_message .= <<EOM;

3299

Note: --syntax check will be skipped because standard input is used

3300

EOM

3301

3302

}

3303

3304

return bless {

3305

_fh => $fh,

3306

_fh_copy => $fh_copy,

3307

_filename => $input_file,

3308

_input_file_copy => $input_file_copy,

3309

_input_line_ending => $input_line_ending,

3310

_rinput_buffer => [],

3311

_started => 0,

3312

}, $class;

3313

}

3314

3315

sub get_input_file_copy_name {

3316

my $self = shift;

3317

my $ifname = $self->{_input_file_copy};

3318

unless ($ifname) {

3319

$ifname = $self->{_filename};

3320

}

3321

return $ifname;

3322

}

3323

3324

sub close_input_file {

3325

my $self = shift;

3326

eval { $self->{_fh}->close() };

3327

eval { $self->{_fh_copy}->close() } if $self->{_fh_copy};

3328

}

3329

3330

sub get_line {

3331

my $self = shift;

3332

my $line = undef;

3333

my $fh = $self->{_fh};

3334

my $fh_copy = $self->{_fh_copy};

3335

my $rinput_buffer = $self->{_rinput_buffer};

3336

3337

if ( scalar(@$rinput_buffer) ) {

3338

$line = shift @$rinput_buffer;

3339

}

3340

else {

3341

$line = $fh->getline();

3342

3343

# patch to read raw mac files under unix, dos

3344

# see if the first line has embedded \r's

3345

if ( $line && !$self->{_started} ) {

3346

if ( $line =~ /[\015][^\015\012]/ ) {

3347

3348

# found one -- break the line up and store in a buffer

3349

@$rinput_buffer = map { $_ . "\n" } split /\015/, $line;

3350

my $count = @$rinput_buffer;

3351

$line = shift @$rinput_buffer;

3352

}

3353

$self->{_started}++;

3354

}

3355

}

3356

if ( $line && $fh_copy ) { $fh_copy->print($line); }

3357

return $line;

3358

}

3359

3360

#####################################################################

3361

#

3362

# the Perl::Tidy::LineSink class supplies a write_line method for

3363

# actual file writing

3364

#

3365

#####################################################################

3366

3367

package Perl::Tidy::LineSink;

3368

3369

sub new {

3370

3371

my ( $class, $output_file, $tee_file, $line_separator, $rOpts,

3372

$rpending_logfile_message, $binmode )

3373

= @_;

3374

my $fh = undef;

3375

my $fh_copy = undef;

3376

my $fh_tee = undef;

3377

my $output_file_copy = "";

3378

my $output_file_open = 0;

3379

3380

if ( $rOpts->{'format'} eq 'tidy' ) {

3381

( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' );

3382

unless ($fh) { die "Cannot write to output stream\n"; }

3383

$output_file_open = 1;

3384

if ($binmode) {

3385

if ( ref($fh) eq 'IO::File' ) {

3386

binmode $fh;

3387

}

3388

if ( $output_file eq '-' ) { binmode STDOUT }

3389

}

3390

}

3391

3392

# in order to check output syntax when standard output is used,

3393

# or when it is an object, we have to make a copy of the file

3394

if ( $output_file eq '-' || ref $output_file ) {

3395

if ( $rOpts->{'check-syntax'} ) {

3396

3397

# Turning off syntax check when standard output is used.

3398

# The reason is that temporary files cause problems on

3399

# on many systems.

3400

$rOpts->{'check-syntax'} = 0;

3401

$output_file_copy = '-';

3402

$$rpending_logfile_message .= <<EOM;

3403

Note: --syntax check will be skipped because standard output is used

3404

EOM

3405

3406

}

3407

}

3408

3409

bless {

3410

_fh => $fh,

3411

_fh_copy => $fh_copy,

3412

_fh_tee => $fh_tee,

3413

_output_file => $output_file,

3414

_output_file_open => $output_file_open,

3415

_output_file_copy => $output_file_copy,

3416

_tee_flag => 0,

3417

_tee_file => $tee_file,

3418

_tee_file_opened => 0,

3419

_line_separator => $line_separator,

3420

_binmode => $binmode,

3421

}, $class;

3422

}

3423

3424

sub write_line {

3425

3426

my $self = shift;

3427

my $fh = $self->{_fh};

3428

my $fh_copy = $self->{_fh_copy};

3429

3430

my $output_file_open = $self->{_output_file_open};

3431

chomp $_[0];

3432

$_[0] .= $self->{_line_separator};

3433

3434

$fh->print( $_[0] ) if ( $self->{_output_file_open} );

3435

print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} );

3436

3437

if ( $self->{_tee_flag} ) {

3438

unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() }

3439

my $fh_tee = $self->{_fh_tee};

3440

print $fh_tee $_[0];

3441

}

3442

}

3443

3444

sub get_output_file_copy {

3445

my $self = shift;

3446

my $ofname = $self->{_output_file_copy};

3447

unless ($ofname) {

3448

$ofname = $self->{_output_file};

3449

}

3450

return $ofname;

3451

}

3452

3453

sub tee_on {

3454

my $self = shift;

3455

$self->{_tee_flag} = 1;

3456

}

3457

3458

sub tee_off {

3459

my $self = shift;

3460

$self->{_tee_flag} = 0;

3461

}

3462

3463

sub really_open_tee_file {

3464

my $self = shift;

3465

my $tee_file = $self->{_tee_file};

3466

my $fh_tee;

3467

$fh_tee = IO::File->new(">$tee_file")

3468

or die("couldn't open TEE file $tee_file: $!\n");

3469

binmode $fh_tee if $self->{_binmode};

3470

$self->{_tee_file_opened} = 1;

3471

$self->{_fh_tee} = $fh_tee;

3472

}

3473

3474

sub close_output_file {

3475

my $self = shift;

3476

eval { $self->{_fh}->close() } if $self->{_output_file_open};

3477

eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} );

3478

$self->close_tee_file();

3479

}

3480

3481

sub close_tee_file {

3482

my $self = shift;

3483

3484

if ( $self->{_tee_file_opened} ) {

3485

eval { $self->{_fh_tee}->close() };

3486

$self->{_tee_file_opened} = 0;

3487

}

3488

}

3489

3490

#####################################################################

3491

#

3492

# The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is

3493

# useful for program development.

3494

#

3495

# Only one such file is created regardless of the number of input

3496

# files processed. This allows the results of processing many files

3497

# to be summarized in a single file.

3498

#

3499

#####################################################################

3500

3501

package Perl::Tidy::Diagnostics;

3502

3503

sub new {

3504

3505

my $class = shift;

3506

bless {

3507

_write_diagnostics_count => 0,

3508

_last_diagnostic_file => "",

3509

_input_file => "",

3510

_fh => undef,

3511

}, $class;

3512

}

3513

3514

sub set_input_file {

3515

my $self = shift;

3516

$self->{_input_file} = $_[0];

3517

}

3518

3519

# This is a diagnostic routine which is useful for program development.

3520

# Output from debug messages go to a file named DIAGNOSTICS, where

3521

# they are labeled by file and line. This allows many files to be

3522

# scanned at once for some particular condition of interest.

3523

sub write_diagnostics {

3524

my $self = shift;

3525

3526

unless ( $self->{_write_diagnostics_count} ) {

3527

open DIAGNOSTICS, ">DIAGNOSTICS"

3528

or death("couldn't open DIAGNOSTICS: $!\n");

3529

}

3530

3531

my $last_diagnostic_file = $self->{_last_diagnostic_file};

3532

my $input_file = $self->{_input_file};

3533

if ( $last_diagnostic_file ne $input_file ) {

3534

print DIAGNOSTICS "\nFILE:$input_file\n";

3535

}

3536

$self->{_last_diagnostic_file} = $input_file;

3537

my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number();

3538

print DIAGNOSTICS "$input_line_number:\t@_";

3539

$self->{_write_diagnostics_count}++;

3540

}

3541

3542

#####################################################################

3543

#

3544

# The Perl::Tidy::Logger class writes the .LOG and .ERR files

3545

#

3546

#####################################################################

3547

3548

package Perl::Tidy::Logger;

3549

3550

sub new {

3551

my $class = shift;

3552

my $fh;

3553

my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_;

3554

3555

# remove any old error output file

3556

unless ( ref($warning_file) ) {

3557

if ( -e $warning_file ) { unlink($warning_file) }

3558

}

3559

3560

bless {

3561

_log_file => $log_file,

3562

_fh_warnings => undef,

3563

_rOpts => $rOpts,

3564

_fh_warnings => undef,

3565

_last_input_line_written => 0,

3566

_at_end_of_file => 0,

3567

_use_prefix => 1,

3568

_block_log_output => 0,

3569

_line_of_tokens => undef,

3570

_output_line_number => undef,

3571

_wrote_line_information_string => 0,

3572

_wrote_column_headings => 0,

3573

_warning_file => $warning_file,

3574

_warning_count => 0,

3575

_complaint_count => 0,

3576

_saw_code_bug => -1, # -1=no 0=maybe 1=for sure

3577

_saw_brace_error => 0,

3578

_saw_extrude => $saw_extrude,

3579

_output_array => [],

3580

}, $class;

3581

}

3582

3583

sub close_log_file {

3584

3585

my $self = shift;

3586

if ( $self->{_fh_warnings} ) {

3587

eval { $self->{_fh_warnings}->close() };

3588

$self->{_fh_warnings} = undef;

3589

}

3590

}

3591

3592

sub get_warning_count {

3593

my $self = shift;

3594

return $self->{_warning_count};

3595

}

3596

3597

sub get_use_prefix {

3598

my $self = shift;

3599

return $self->{_use_prefix};

3600

}

3601

3602

sub block_log_output {

3603

my $self = shift;

3604

$self->{_block_log_output} = 1;

3605

}

3606

3607

sub unblock_log_output {

3608

my $self = shift;

3609

$self->{_block_log_output} = 0;

3610

}

3611

3612

sub interrupt_logfile {

3613

my $self = shift;

3614

$self->{_use_prefix} = 0;

3615

$self->warning("\n");

3616

$self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" );

3617

}

3618

3619

sub resume_logfile {

3620

my $self = shift;

3621

$self->write_logfile_entry( '#' x 60 . "\n" );

3622

$self->{_use_prefix} = 1;

3623

}

3624

3625

sub we_are_at_the_last_line {

3626

my $self = shift;

3627

unless ( $self->{_wrote_line_information_string} ) {

3628

$self->write_logfile_entry("Last line\n\n");

3629

}

3630

$self->{_at_end_of_file} = 1;

3631

}

3632

3633

# record some stuff in case we go down in flames

3634

sub black_box {

3635

my $self = shift;

3636

my ( $line_of_tokens, $output_line_number ) = @_;

3637

my $input_line = $line_of_tokens->{_line_text};

3638

my $input_line_number = $line_of_tokens->{_line_number};

3639

3640

# save line information in case we have to write a logfile message

3641

$self->{_line_of_tokens} = $line_of_tokens;

3642

$self->{_output_line_number} = $output_line_number;

3643

$self->{_wrote_line_information_string} = 0;

3644

3645

my $last_input_line_written = $self->{_last_input_line_written};

3646

my $rOpts = $self->{_rOpts};

3647

if (

3648

(

3649

( $input_line_number - $last_input_line_written ) >=

3650

$rOpts->{'logfile-gap'}

3651

)

3652

|| ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ )

3653

)

3654

{

3655

my $rlevels = $line_of_tokens->{_rlevels};

3656

my $structural_indentation_level = $$rlevels[0];

3657

$self->{_last_input_line_written} = $input_line_number;

3658

( my $out_str = $input_line ) =~ s/^\s*//;

3659

chomp $out_str;

3660

3661

$out_str = ( '.' x $structural_indentation_level ) . $out_str;

3662

3663

if ( length($out_str) > 35 ) {

3664

$out_str = substr( $out_str, 0, 35 ) . " ....";

3665

}

3666

$self->logfile_output( "", "$out_str\n" );

3667

}

3668

}

3669

3670

sub write_logfile_entry {

3671

my $self = shift;

3672

3673

# add leading >>> to avoid confusing error mesages and code

3674

$self->logfile_output( ">>>", "@_" );

3675

}

3676

3677

sub write_column_headings {

3678

my $self = shift;

3679

3680

$self->{_wrote_column_headings} = 1;

3681

my $routput_array = $self->{_output_array};

3682

push @{$routput_array}, <<EOM;

3683

The nesting depths in the table below are at the start of the lines.

3684

The indicated output line numbers are not always exact.

3685

ci = levels of continuation indentation; bk = 1 if in BLOCK, 0 if not.

3686

3687

in:out indent c b nesting code + messages; (messages begin with >>>)

3688

lines levels i k (code begins with one '.' per indent level)

3689

------ ----- - - -------- -------------------------------------------

3690

EOM

3691

}

3692

3693

sub make_line_information_string {

3694

3695

# make columns of information when a logfile message needs to go out

3696

my $self = shift;

3697

my $line_of_tokens = $self->{_line_of_tokens};

3698

my $input_line_number = $line_of_tokens->{_line_number};

3699

my $line_information_string = "";

3700

if ($input_line_number) {

3701

3702

my $output_line_number = $self->{_output_line_number};

3703

my $brace_depth = $line_of_tokens->{_curly_brace_depth};

3704

my $paren_depth = $line_of_tokens->{_paren_depth};

3705

my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth};

3706

my $python_indentation_level =

3707

$line_of_tokens->{_python_indentation_level};

3708

my $rlevels = $line_of_tokens->{_rlevels};

3709

my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens};

3710

my $rci_levels = $line_of_tokens->{_rci_levels};

3711

my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks};

3712

3713

my $structural_indentation_level = $$rlevels[0];

3714

3715

$self->write_column_headings() unless $self->{_wrote_column_headings};

3716

3717

# keep logfile columns aligned for scripts up to 999 lines;

3718

# for longer scripts it doesn't really matter

3719

my $extra_space = "";

3720

$extra_space .=

3721

( $input_line_number < 10 ) ? " "

3722

: ( $input_line_number < 100 ) ? " "

3723

: "";

3724

$extra_space .=

3725

( $output_line_number < 10 ) ? " "

3726

: ( $output_line_number < 100 ) ? " "

3727

: "";

3728

3729

# there are 2 possible nesting strings:

3730

# the original which looks like this: (0 [1 {2

3731

# the new one, which looks like this: {{[

3732

# the new one is easier to read, and shows the order, but

3733

# could be arbitrarily long, so we use it unless it is too long

3734

my $nesting_string =

3735

"($paren_depth [$square_bracket_depth {$brace_depth";

3736

my $nesting_string_new = $$rnesting_tokens[0];

3737

3738

my $ci_level = $$rci_levels[0];

3739

if ( $ci_level > 9 ) { $ci_level = '*' }

3740

my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0';

3741

3742

if ( length($nesting_string_new) <= 8 ) {

3743

$nesting_string =

3744

$nesting_string_new . " " x ( 8 - length($nesting_string_new) );

3745

}

3746

if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 }

3747

$line_information_string =

3748

"L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string";

3749

}

3750

return $line_information_string;

3751

}

3752

3753

sub logfile_output {

3754

my $self = shift;

3755

my ( $prompt, $msg ) = @_;

3756

return if ( $self->{_block_log_output} );

3757

3758

my $routput_array = $self->{_output_array};

3759

if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) {

3760

push @{$routput_array}, "$msg";

3761

}

3762

else {

3763

my $line_information_string = $self->make_line_information_string();

3764

$self->{_wrote_line_information_string} = 1;

3765

3766

if ($line_information_string) {

3767

push @{$routput_array}, "$line_information_string $prompt$msg";

3768

}

3769

else {

3770

push @{$routput_array}, "$msg";

3771

}

3772

}

3773

}

3774

3775

sub get_saw_brace_error {

3776

my $self = shift;

3777

return $self->{_saw_brace_error};

3778

}

3779

3780

sub increment_brace_error {

3781

my $self = shift;

3782

$self->{_saw_brace_error}++;

3783

}

3784

3785

sub brace_warning {

3786

my $self = shift;

3787

use constant BRACE_WARNING_LIMIT => 10;

3788

my $saw_brace_error = $self->{_saw_brace_error};

3789

3790

if ( $saw_brace_error < BRACE_WARNING_LIMIT ) {

3791

$self->warning(@_);

3792

}

3793

$saw_brace_error++;

3794

$self->{_saw_brace_error} = $saw_brace_error;

3795

3796

if ( $saw_brace_error == BRACE_WARNING_LIMIT ) {

3797

$self->warning("No further warnings of this type will be given\n");

3798

}

3799

}

3800

3801

sub complain {

3802

3803

# handle non-critical warning messages based on input flag

3804

my $self = shift;

3805

my $rOpts = $self->{_rOpts};

3806

3807

# these appear in .ERR output only if -w flag is used

3808

if ( $rOpts->{'warning-output'} ) {

3809

$self->warning(@_);

3810

}

3811

3812

# otherwise, they go to the .LOG file

3813

else {

3814

$self->{_complaint_count}++;

3815

$self->write_logfile_entry(@_);

3816

}

3817

}

3818

3819

sub warning {

3820

3821

# report errors to .ERR file (or stdout)

3822

my $self = shift;

3823

use constant WARNING_LIMIT => 50;

3824

3825

my $rOpts = $self->{_rOpts};

3826

unless ( $rOpts->{'quiet'} ) {

3827

3828

my $warning_count = $self->{_warning_count};

3829

unless ($warning_count) {

3830

my $warning_file = $self->{_warning_file};

3831

my $fh_warnings;

3832

if ( $rOpts->{'standard-error-output'} ) {

3833

$fh_warnings = *STDERR;

3834

}

3835

else {

3836

( $fh_warnings, my $filename ) =

3837

Perl::Tidy::streamhandle( $warning_file, 'w' );

3838

$fh_warnings or die("couldn't open $filename $!\n");

3839

warn "## Please see file $filename\n";

3840

}

3841

$self->{_fh_warnings} = $fh_warnings;

3842

}

3843

3844

my $fh_warnings = $self->{_fh_warnings};

3845

if ( $warning_count < WARNING_LIMIT ) {

3846

if ( $self->get_use_prefix() > 0 ) {

3847

my $input_line_number =

3848

Perl::Tidy::Tokenizer::get_input_line_number();

3849

$fh_warnings->print("$input_line_number:\t@_");

3850

$self->write_logfile_entry("WARNING: @_");

3851

}

3852

else {

3853

$fh_warnings->print(@_);

3854

$self->write_logfile_entry(@_);

3855

}

3856

}

3857

$warning_count++;

3858

$self->{_warning_count} = $warning_count;

3859

3860

if ( $warning_count == WARNING_LIMIT ) {

3861

$fh_warnings->print("No further warnings will be given\n");

3862

}

3863

}

3864

}

3865

3866

# programming bug codes:

3867

# -1 = no bug

3868

# 0 = maybe, not sure.

3869

# 1 = definitely

3870

sub report_possible_bug {

3871

my $self = shift;

3872

my $saw_code_bug = $self->{_saw_code_bug};

3873

$self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug;

3874

}

3875

3876

sub report_definite_bug {

3877

my $self = shift;

3878

$self->{_saw_code_bug} = 1;

3879

}

3880

3881

sub ask_user_for_bug_report {

3882

my $self = shift;

3883

3884

my ( $infile_syntax_ok, $formatter ) = @_;

3885

my $saw_code_bug = $self->{_saw_code_bug};

3886

if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) {

3887

$self->warning(<<EOM);

3888

3889

You may have encountered a code bug in perltidy. If you think so, and

3890

the problem is not listed in the BUGS file at

3891

http://perltidy.sourceforge.net, please report it so that it can be

3892

corrected. Include the smallest possible script which has the problem,

3893

along with the .LOG file. See the manual pages for contact information.

3894

Thank you!

3895

EOM

3896

3897

}

3898

elsif ( $saw_code_bug == 1 ) {

3899

if ( $self->{_saw_extrude} ) {

3900

$self->warning(<<EOM);

3901

3902

You may have encountered a bug in perltidy. However, since you are using the

3903

-extrude option, the problem may be with perl or one of its modules, which have

3904

occasional problems with this type of file. If you believe that the

3905

problem is with perltidy, and the problem is not listed in the BUGS file at

3906

http://perltidy.sourceforge.net, please report it so that it can be corrected.

3907

Include the smallest possible script which has the problem, along with the .LOG

3908

file. See the manual pages for contact information.

3909

Thank you!

3910

EOM

3911

}

3912

else {

3913

$self->warning(<<EOM);

3914

3915

Oops, you seem to have encountered a bug in perltidy. Please check the

3916

BUGS file at http://perltidy.sourceforge.net. If the problem is not

3917

listed there, please report it so that it can be corrected. Include the

3918

smallest possible script which produces this message, along with the

3919

.LOG file if appropriate. See the manual pages for contact information.

3920

Your efforts are appreciated.

3921

Thank you!

3922

EOM

3923

my $added_semicolon_count = 0;

3924

eval {

3925

$added_semicolon_count =

3926

$formatter->get_added_semicolon_count();

3927

};

3928

if ( $added_semicolon_count > 0 ) {

3929

$self->warning(<<EOM);

3930

3931

The log file shows that perltidy added $added_semicolon_count semicolons.

3932

Please rerun with -nasc to see if that is the cause of the syntax error. Even

3933

if that is the problem, please report it so that it can be fixed.

3934

EOM

3935

3936

}

3937

}

3938

}

3939

}

3940

3941

sub finish {

3942

3943

# called after all formatting to summarize errors

3944

my $self = shift;

3945

my ( $infile_syntax_ok, $formatter ) = @_;

3946

3947

my $rOpts = $self->{_rOpts};

3948

my $warning_count = $self->{_warning_count};

3949

my $saw_code_bug = $self->{_saw_code_bug};

3950

3951

my $save_logfile =

3952

( $saw_code_bug == 0 && $infile_syntax_ok == 1 )

3953

|| $saw_code_bug == 1

3954

|| $rOpts->{'logfile'};

3955

my $log_file = $self->{_log_file};

3956

if ($warning_count) {

3957

if ($save_logfile) {

3958

$self->block_log_output(); # avoid echoing this to the logfile

3959

$self->warning(

3960

"The logfile $log_file may contain useful information\n");

3961

$self->unblock_log_output();

3962

}

3963

3964

if ( $self->{_complaint_count} > 0 ) {

3965

$self->warning(

3966

"To see $self->{_complaint_count} non-critical warnings rerun with -w\n"

3967

);

3968

}

3969

3970

if ( $self->{_saw_brace_error}

3971

&& ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) )

3972

{

3973

$self->warning("To save a full .LOG file rerun with -g\n");

3974

}

3975

}

3976

$self->ask_user_for_bug_report( $infile_syntax_ok, $formatter );

3977

3978

if ($save_logfile) {

3979

my $log_file = $self->{_log_file};

3980

my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' );

3981

if ($fh) {

3982

my $routput_array = $self->{_output_array};

3983

foreach ( @{$routput_array} ) { $fh->print($_) }

3984

eval { $fh->close() };

3985

}

3986

}

3987

}

3988

3989

#####################################################################

3990

#

3991

# The Perl::Tidy::DevNull class supplies a dummy print method

3992

#

3993

#####################################################################

3994

3995

package Perl::Tidy::DevNull;

3996

sub new { return bless {}, $_[0] }

3997

sub print { return }

3998

sub close { return }

3999

4000

#####################################################################

4001

#

4002

# The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html

4003

#

4004

#####################################################################

4005

4006

package Perl::Tidy::HtmlWriter;

4007

4008

use File::Basename;

4009

4010

# class variables

4011

use vars qw{

4012

%html_color

4013

%html_bold

4014

%html_italic

4015

%token_short_names

4016

%short_to_long_names

4017

$rOpts

4018

$css_filename

4019

$css_linkname

4020

$missing_html_entities

4021

};

4022

4023

# replace unsafe characters with HTML entity representation if HTML::Entities

4024

# is available

4025

{ eval "use HTML::Entities"; $missing_html_entities = $@; }

4026

4027

sub new {

4028

4029

my ( $class, $input_file, $html_file, $extension, $html_toc_extension,

4030

$html_src_extension )

4031

= @_;

4032

4033

my $html_file_opened = 0;

4034

my $html_fh;

4035

( $html_fh, my $html_filename ) =

4036

Perl::Tidy::streamhandle( $html_file, 'w' );

4037

unless ($html_fh) {

4038

warn("can't open $html_file: $!\n");

4039

return undef;

4040

}

4041

$html_file_opened = 1;

4042

4043

if ( !$input_file || $input_file eq '-' || ref($input_file) ) {

4044

$input_file = "NONAME";

4045

}

4046

4047

# write the table of contents to a string

4048

my $toc_string;

4049

my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' );

4050

4051

my $html_pre_fh;

4052

my @pre_string_stack;

4053

if ( $rOpts->{'html-pre-only'} ) {

4054

4055

# pre section goes directly to the output stream

4056

$html_pre_fh = $html_fh;

4057

$html_pre_fh->print( <<"PRE_END");

4058

<pre>

4059

PRE_END

4060

}

4061

else {

4062

4063

# pre section go out to a temporary string

4064

my $pre_string;

4065

$html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );

4066

push @pre_string_stack, \$pre_string;

4067

}

4068

4069

# pod text gets diverted if the 'pod2html' is used

4070

my $html_pod_fh;

4071

my $pod_string;

4072

if ( $rOpts->{'pod2html'} ) {

4073

if ( $rOpts->{'html-pre-only'} ) {

4074

undef $rOpts->{'pod2html'};

4075

}

4076

else {

4077

eval "use Pod::Html";

4078

if ($@) {

4079

warn

4080

"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";

4081

undef $rOpts->{'pod2html'};

4082

}

4083

else {

4084

$html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );

4085

}

4086

}

4087

}

4088

4089

my $toc_filename;

4090

my $src_filename;

4091

if ( $rOpts->{'frames'} ) {

4092

unless ($extension) {

4093

warn

4094

"cannot use frames without a specified output extension; ignoring -frm\n";

4095

undef $rOpts->{'frames'};

4096

}

4097

else {

4098

$toc_filename = $input_file . $html_toc_extension . $extension;

4099

$src_filename = $input_file . $html_src_extension . $extension;

4100

}

4101

}

4102

4103

# ----------------------------------------------------------

4104

# Output is now directed as follows:

4105

# html_toc_fh <-- table of contents items

4106

# html_pre_fh <-- the <pre> section of formatted code, except:

4107

# html_pod_fh <-- pod goes here with the pod2html option

4108

# ----------------------------------------------------------

4109

4110

my $title = $rOpts->{'title'};

4111

unless ($title) {

4112

( $title, my $path ) = fileparse($input_file);

4113

}

4114

my $toc_item_count = 0;

4115

my $in_toc_package = "";

4116

my $last_level = 0;

4117

bless {

4118

_input_file => $input_file, # name of input file

4119

_title => $title, # title, unescaped

4120

_html_file => $html_file, # name of .html output file

4121

_toc_filename => $toc_filename, # for frames option

4122

_src_filename => $src_filename, # for frames option

4123

_html_file_opened => $html_file_opened, # a flag

4124

_html_fh => $html_fh, # the output stream

4125

_html_pre_fh => $html_pre_fh, # pre section goes here

4126

_rpre_string_stack => \@pre_string_stack, # stack of pre sections

4127

_html_pod_fh => $html_pod_fh, # pod goes here if pod2html

4128

_rpod_string => \$pod_string, # string holding pod

4129

_pod_cut_count => 0, # how many =cut's?

4130

_html_toc_fh => $html_toc_fh, # fh for table of contents

4131

_rtoc_string => \$toc_string, # string holding toc

4132

_rtoc_item_count => \$toc_item_count, # how many toc items

4133

_rin_toc_package => \$in_toc_package, # package name

4134

_rtoc_name_count => {}, # hash to track unique names

4135

_rpackage_stack => [], # stack to check for package

4136

# name changes

4137

_rlast_level => \$last_level, # brace indentation level

4138

}, $class;

4139

}

4140

4141

sub add_toc_item {

4142

4143

# Add an item to the html table of contents.

4144

# This is called even if no table of contents is written,

4145

# because we still want to put the anchors in the <pre> text.

4146

# We are given an anchor name and its type; types are:

4147

# 'package', 'sub', '__END__', '__DATA__', 'EOF'

4148

# There must be an 'EOF' call at the end to wrap things up.

4149

my $self = shift;

4150

my ( $name, $type ) = @_;

4151

my $html_toc_fh = $self->{_html_toc_fh};

4152

my $html_pre_fh = $self->{_html_pre_fh};

4153

my $rtoc_name_count = $self->{_rtoc_name_count};

4154

my $rtoc_item_count = $self->{_rtoc_item_count};

4155

my $rlast_level = $self->{_rlast_level};

4156

my $rin_toc_package = $self->{_rin_toc_package};

4157

my $rpackage_stack = $self->{_rpackage_stack};

4158

4159

# packages contain sublists of subs, so to avoid errors all package

4160

# items are written and finished with the following routines

4161

my $end_package_list = sub {

4162

if ($$rin_toc_package) {

4163

$html_toc_fh->print("</ul>\n</li>\n");

4164

$$rin_toc_package = "";

4165

}

4166

};

4167

4168

my $start_package_list = sub {

4169

my ( $unique_name, $package ) = @_;

4170

if ($$rin_toc_package) { $end_package_list->() }

4171

$html_toc_fh->print(<<EOM);

4172

<li><a href=\"#$unique_name\">package $package</a>

4173

<ul>

4174

EOM

4175

$$rin_toc_package = $package;

4176

};

4177

4178

# start the table of contents on the first item

4179

unless ($$rtoc_item_count) {

4180

4181

# but just quit if we hit EOF without any other entries

4182

# in this case, there will be no toc

4183

return if ( $type eq 'EOF' );

4184

$html_toc_fh->print( <<"TOC_END");

4185

<a name="code-index"></a>

4186

<ul>

4187

TOC_END

4188

}

4189

$$rtoc_item_count++;

4190

4191

# make a unique anchor name for this location:

4192

# - packages get a 'package-' prefix

4193

# - subs use their names

4194

my $unique_name = $name;

4195

if ( $type eq 'package' ) { $unique_name = "package-$name" }

4196

4197

# append '-1', '-2', etc if necessary to make unique; this will

4198

# be unique because subs and packages cannot have a '-'

4199

if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) {

4200

$unique_name .= "-$count";

4201

}

4202

4203

# - all names get terminal '-' if pod2html is used, to avoid

4204

# conflicts with anchor names created by pod2html

4205

if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' }

4206

4207

# start/stop lists of subs

4208

if ( $type eq 'sub' ) {

4209

my $package = $rpackage_stack->[$$rlast_level];

4210

unless ($package) { $package = 'main' }

4211

4212

# if we're already in a package/sub list, be sure its the right

4213

# package or else close it

4214

if ( $$rin_toc_package && $$rin_toc_package ne $package ) {

4215

$end_package_list->();

4216

}

4217

4218

# start a package/sub list if necessary

4219

unless ($$rin_toc_package) {

4220

$start_package_list->( $unique_name, $package );

4221

}

4222

}

4223

4224

# now write an entry in the toc for this item

4225

if ( $type eq 'package' ) {

4226

$start_package_list->( $unique_name, $name );

4227

}

4228

elsif ( $type eq 'sub' ) {

4229

$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");

4230

}

4231

else {

4232

$end_package_list->();

4233

$html_toc_fh->print("<li><a href=\"#$unique_name\">$name</a></li>\n");

4234

}

4235

4236

# write the anchor in the <pre> section

4237

$html_pre_fh->print("<a name=\"$unique_name\"></a>");

4238

4239

# end the table of contents, if any, on the end of file

4240

if ( $type eq 'EOF' ) {

4241

$html_toc_fh->print( <<"TOC_END");

4242

</ul>

4243

4244

TOC_END

4245

}

4246

}

4247

4248

BEGIN {

4249

4250

# This is the official list of tokens which may be identified by the

4251

# user. Long names are used as getopt keys. Short names are

4252

# convenient short abbreviations for specifying input. Short names

4253

# somewhat resemble token type characters, but are often different

4254

# because they may only be alphanumeric, to allow command line

4255

# input. Also, note that because of case insensitivity of html,

4256

# this table must be in a single case only (I've chosen to use all

4257

# lower case).

4258

# When adding NEW_TOKENS: update this hash table

4259

# short names => long names

4260

%short_to_long_names = (

4261

'n' => 'numeric',

4262

'p' => 'paren',

4263

'q' => 'quote',

4264

's' => 'structure',

4265

'c' => 'comment',

4266

'v' => 'v-string',

4267

'cm' => 'comma',

4268

'w' => 'bareword',

4269

'co' => 'colon',

4270

'pu' => 'punctuation',

4271

'i' => 'identifier',

4272

'j' => 'label',

4273

'h' => 'here-doc-target',

4274

'hh' => 'here-doc-text',

4275

'k' => 'keyword',

4276

'sc' => 'semicolon',

4277

'm' => 'subroutine',

4278

'pd' => 'pod-text',

4279

);

4280

4281

# Now we have to map actual token types into one of the above short

4282

# names; any token types not mapped will get 'punctuation'

4283

# properties.

4284

4285

# The values of this hash table correspond to the keys of the

4286

# previous hash table.

4287

# The keys of this hash table are token types and can be seen

4288

# by running with --dump-token-types (-dtt).

4289

4290

# When adding NEW_TOKENS: update this hash table

4291

# $type => $short_name

4292

%token_short_names = (

4293

'#' => 'c',

4294

'n' => 'n',

4295

'v' => 'v',

4296

'k' => 'k',

4297

'F' => 'k',

4298

'Q' => 'q',

4299

'q' => 'q',

4300

'J' => 'j',

4301

'j' => 'j',

4302

'h' => 'h',

4303

'H' => 'hh',

4304

'w' => 'w',

4305

',' => 'cm',

4306

'=>' => 'cm',

4307

';' => 'sc',

4308

':' => 'co',

4309

'f' => 'sc',

4310

'(' => 'p',

4311

')' => 'p',

4312

'M' => 'm',

4313

'P' => 'pd',

4314

'A' => 'co',

4315

);

4316

4317

# These token types will all be called identifiers for now

4318

# FIXME: need to separate user defined modules as separate type

4319

my @identifier = qw" i t U C Y Z G :: ";

4320

@token_short_names{@identifier} = ('i') x scalar(@identifier);

4321

4322

# These token types will be called 'structure'

4323

my @structure = qw" { } ";

4324

@token_short_names{@structure} = ('s') x scalar(@structure);

4325

4326

# OLD NOTES: save for reference

4327

# Any of these could be added later if it would be useful.

4328

# For now, they will by default become punctuation

4329

# my @list = qw" L R [ ] ";

4330

# @token_long_names{@list} = ('non-structure') x scalar(@list);

4331

#

4332

# my @list = qw"

4333

# / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm

4334

# ";

4335

# @token_long_names{@list} = ('math') x scalar(@list);

4336

#

4337

# my @list = qw" & &= ~ ~= ^ ^= | |= ";

4338

# @token_long_names{@list} = ('bit') x scalar(@list);

4339

#

4340

# my @list = qw" == != < > <= <=> ";

4341

# @token_long_names{@list} = ('numerical-comparison') x scalar(@list);

4342

#

4343

# my @list = qw" && || ! &&= ||= //= ";

4344

# @token_long_names{@list} = ('logical') x scalar(@list);

4345

#

4346

# my @list = qw" . .= =~ !~ x x= ";

4347

# @token_long_names{@list} = ('string-operators') x scalar(@list);

4348

#

4349

# # Incomplete..

4350

# my @list = qw" .. -> <> ... \ ? ";

4351

# @token_long_names{@list} = ('misc-operators') x scalar(@list);

4352

4353

}

4354

4355

sub make_getopt_long_names {

4356

my $class = shift;

4357

my ($rgetopt_names) = @_;

4358

while ( my ( $short_name, $name ) = each %short_to_long_names ) {

4359

push @$rgetopt_names, "html-color-$name=s";

4360

push @$rgetopt_names, "html-italic-$name!";

4361

push @$rgetopt_names, "html-bold-$name!";

4362

}

4363

push @$rgetopt_names, "html-color-background=s";

4364

push @$rgetopt_names, "html-linked-style-sheet=s";

4365

push @$rgetopt_names, "nohtml-style-sheets";

4366

push @$rgetopt_names, "html-pre-only";

4367

push @$rgetopt_names, "html-line-numbers";

4368

push @$rgetopt_names, "html-entities!";

4369

push @$rgetopt_names, "stylesheet";

4370

push @$rgetopt_names, "html-table-of-contents!";

4371

push @$rgetopt_names, "pod2html!";

4372

push @$rgetopt_names, "frames!";

4373

push @$rgetopt_names, "html-toc-extension=s";

4374

push @$rgetopt_names, "html-src-extension=s";

4375

4376

# Pod::Html parameters:

4377

push @$rgetopt_names, "backlink=s";

4378

push @$rgetopt_names, "cachedir=s";

4379

push @$rgetopt_names, "htmlroot=s";

4380

push @$rgetopt_names, "libpods=s";

4381

push @$rgetopt_names, "podpath=s";

4382

push @$rgetopt_names, "podroot=s";

4383

push @$rgetopt_names, "title=s";

4384

4385

# Pod::Html parameters with leading 'pod' which will be removed

4386

# before the call to Pod::Html

4387

push @$rgetopt_names, "podquiet!";

4388

push @$rgetopt_names, "podverbose!";

4389

push @$rgetopt_names, "podrecurse!";

4390

push @$rgetopt_names, "podflush";

4391

push @$rgetopt_names, "podheader!";

4392

push @$rgetopt_names, "podindex!";

4393

}

4394

4395

sub make_abbreviated_names {

4396

4397

# We're appending things like this to the expansion list:

4398

# 'hcc' => [qw(html-color-comment)],

4399

# 'hck' => [qw(html-color-keyword)],

4400

# etc

4401

my $class = shift;

4402

my ($rexpansion) = @_;

4403

4404

# abbreviations for color/bold/italic properties

4405

while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {

4406

${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"];

4407

${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"];

4408

${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"];

4409

${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"];

4410

${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"];

4411

}

4412

4413

# abbreviations for all other html options

4414

${$rexpansion}{"hcbg"} = ["html-color-background"];

4415

${$rexpansion}{"pre"} = ["html-pre-only"];

4416

${$rexpansion}{"toc"} = ["html-table-of-contents"];

4417

${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"];

4418

${$rexpansion}{"nnn"} = ["html-line-numbers"];

4419

${$rexpansion}{"hent"} = ["html-entities"];

4420

${$rexpansion}{"nhent"} = ["nohtml-entities"];

4421

${$rexpansion}{"css"} = ["html-linked-style-sheet"];

4422

${$rexpansion}{"nss"} = ["nohtml-style-sheets"];

4423

${$rexpansion}{"ss"} = ["stylesheet"];

4424

${$rexpansion}{"pod"} = ["pod2html"];

4425

${$rexpansion}{"npod"} = ["nopod2html"];

4426

${$rexpansion}{"frm"} = ["frames"];

4427

${$rexpansion}{"nfrm"} = ["noframes"];

4428

${$rexpansion}{"text"} = ["html-toc-extension"];

4429

${$rexpansion}{"sext"} = ["html-src-extension"];

4430

}

4431

4432

sub check_options {

4433

4434

# This will be called once after options have been parsed

4435

my $class = shift;

4436

$rOpts = shift;

4437

4438

# X11 color names for default settings that seemed to look ok

4439

# (these color names are only used for programming clarity; the hex

4440

# numbers are actually written)

4441

use constant ForestGreen => "#228B22";

4442

use constant SaddleBrown => "#8B4513";

4443

use constant magenta4 => "#8B008B";

4444

use constant IndianRed3 => "#CD5555";

4445

use constant DeepSkyBlue4 => "#00688B";

4446

use constant MediumOrchid3 => "#B452CD";

4447

use constant black => "#000000";

4448

use constant white => "#FFFFFF";

4449

use constant red => "#FF0000";

4450

4451

# set default color, bold, italic properties

4452

# anything not listed here will be given the default (punctuation) color --

4453

# these types currently not listed and get default: ws pu s sc cm co p

4454

# When adding NEW_TOKENS: add an entry here if you don't want defaults

4455

4456

# set_default_properties( $short_name, default_color, bold?, italic? );

4457

set_default_properties( 'c', ForestGreen, 0, 0 );

4458

set_default_properties( 'pd', ForestGreen, 0, 1 );

4459

set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown

4460

set_default_properties( 'q', IndianRed3, 0, 0 );

4461

set_default_properties( 'hh', IndianRed3, 0, 1 );

4462

set_default_properties( 'h', IndianRed3, 1, 0 );

4463

set_default_properties( 'i', DeepSkyBlue4, 0, 0 );

4464

set_default_properties( 'w', black, 0, 0 );

4465

set_default_properties( 'n', MediumOrchid3, 0, 0 );

4466

set_default_properties( 'v', MediumOrchid3, 0, 0 );

4467

set_default_properties( 'j', IndianRed3, 1, 0 );

4468

set_default_properties( 'm', red, 1, 0 );

4469

4470

set_default_color( 'html-color-background', white );

4471

set_default_color( 'html-color-punctuation', black );

4472

4473

# setup property lookup tables for tokens based on their short names

4474

# every token type has a short name, and will use these tables

4475

# to do the html markup

4476

while ( my ( $short_name, $long_name ) = each %short_to_long_names ) {

4477

$html_color{$short_name} = $rOpts->{"html-color-$long_name"};

4478

$html_bold{$short_name} = $rOpts->{"html-bold-$long_name"};

4479

$html_italic{$short_name} = $rOpts->{"html-italic-$long_name"};

4480

}

4481

4482

# write style sheet to STDOUT and die if requested

4483

if ( defined( $rOpts->{'stylesheet'} ) ) {

4484

write_style_sheet_file('-');

4485

exit 1;

4486

}

4487

4488

# make sure user gives a file name after -css

4489

if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) {

4490

$css_linkname = $rOpts->{'html-linked-style-sheet'};

4491

if ( $css_linkname =~ /^-/ ) {

4492

die "You must specify a valid filename after -css\n";

4493

}

4494

}

4495

4496

# check for conflict

4497

if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) {

4498

$rOpts->{'nohtml-style-sheets'} = 0;

4499

warning("You can't specify both -css and -nss; -nss ignored\n");

4500

}

4501

4502

# write a style sheet file if necessary

4503

if ($css_linkname) {

4504

4505

# if the selected filename exists, don't write, because user may

4506

# have done some work by hand to create it; use backup name instead

4507

# Also, this will avoid a potential disaster in which the user

4508

# forgets to specify the style sheet, like this:

4509

# perltidy -html -css myfile1.pl myfile2.pl

4510

# This would cause myfile1.pl to parsed as the style sheet by GetOpts

4511

my $css_filename = $css_linkname;

4512

unless ( -e $css_filename ) {

4513

write_style_sheet_file($css_filename);

4514

}

4515

}

4516

$missing_html_entities = 1 unless $rOpts->{'html-entities'};

4517

}

4518

4519

sub write_style_sheet_file {

4520

4521

my $css_filename = shift;

4522

my $fh;

4523

unless ( $fh = IO::File->new("> $css_filename") ) {

4524

die "can't open $css_filename: $!\n";

4525

}

4526

write_style_sheet_data($fh);

4527

eval { $fh->close };

4528

}

4529

4530

sub write_style_sheet_data {

4531

4532

# write the style sheet data to an open file handle

4533

my $fh = shift;

4534

4535

my $bg_color = $rOpts->{'html-color-background'};

4536

my $text_color = $rOpts->{'html-color-punctuation'};

4537

4538

# pre-bgcolor is new, and may not be defined

4539

my $pre_bg_color = $rOpts->{'html-pre-color-background'};

4540

$pre_bg_color = $bg_color unless $pre_bg_color;

4541

4542

$fh->print(<<"EOM");

4543

/* default style sheet generated by perltidy */

4544

body {background: $bg_color; color: $text_color}

4545

pre { color: $text_color;

4546

background: $pre_bg_color;

4547

font-family: courier;

4548

}

4549

4550

EOM

4551

4552

foreach my $short_name ( sort keys %short_to_long_names ) {

4553

my $long_name = $short_to_long_names{$short_name};

4554

4555

my $abbrev = '.' . $short_name;

4556

if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment

4557

my $color = $html_color{$short_name};

4558

if ( !defined($color) ) { $color = $text_color }

4559

$fh->print("$abbrev \{ color: $color;");

4560

4561

if ( $html_bold{$short_name} ) {

4562

$fh->print(" font-weight:bold;");

4563

}

4564

4565

if ( $html_italic{$short_name} ) {

4566

$fh->print(" font-style:italic;");

4567

}

4568

$fh->print("} /* $long_name */\n");

4569

}

4570

}

4571

4572

sub set_default_color {

4573

4574

# make sure that options hash $rOpts->{$key} contains a valid color

4575

my ( $key, $color ) = @_;

4576

if ( $rOpts->{$key} ) { $color = $rOpts->{$key} }

4577

$rOpts->{$key} = check_RGB($color);

4578

}

4579

4580

sub check_RGB {

4581

4582

# if color is a 6 digit hex RGB value, prepend a #, otherwise

4583

# assume that it is a valid ascii color name

4584

my ($color) = @_;

4585

if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" }

4586

return $color;

4587

}

4588

4589

sub set_default_properties {

4590

my ( $short_name, $color, $bold, $italic ) = @_;

4591

4592

set_default_color( "html-color-$short_to_long_names{$short_name}", $color );

4593

my $key;

4594

$key = "html-bold-$short_to_long_names{$short_name}";

4595

$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold;

4596

$key = "html-italic-$short_to_long_names{$short_name}";

4597

$rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic;

4598

}

4599

4600

sub pod_to_html {

4601

4602

# Use Pod::Html to process the pod and make the page

4603

# then merge the perltidy code sections into it.

4604

# return 1 if success, 0 otherwise

4605

my $self = shift;

4606

my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_;

4607

my $input_file = $self->{_input_file};

4608

my $title = $self->{_title};

4609

my $success_flag = 0;

4610

4611

# don't try to use pod2html if no pod

4612

unless ($pod_string) {

4613

return $success_flag;

4614

}

4615

4616

# Pod::Html requires a real temporary filename

4617

# If we are making a frame, we have a name available

4618

# Otherwise, we have to fine one

4619

my $tmpfile;

4620

if ( $rOpts->{'frames'} ) {

4621

$tmpfile = $self->{_toc_filename};

4622

}

4623

else {

4624

$tmpfile = Perl::Tidy::make_temporary_filename();

4625

}

4626

my $fh_tmp = IO::File->new( $tmpfile, 'w' );

4627

unless ($fh_tmp) {

4628

warn "unable to open temporary file $tmpfile; cannot use pod2html\n";

4629

return $success_flag;

4630

}

4631

4632

#------------------------------------------------------------------

4633

# Warning: a temporary file is open; we have to clean up if

4634

# things go bad. From here on all returns should be by going to

4635

# RETURN so that the temporary file gets unlinked.

4636

#------------------------------------------------------------------

4637

4638

# write the pod text to the temporary file

4639

$fh_tmp->print($pod_string);

4640

$fh_tmp->close();

4641

4642

# Hand off the pod to pod2html.

4643

# Note that we can use the same temporary filename for input and output

4644

# because of the way pod2html works.

4645

{

4646

4647

my @args;

4648

push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title";

4649

my $kw;

4650

4651

# Flags with string args:

4652

# "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s",

4653

# "podpath=s", "podroot=s"

4654

# Note: -css=s is handled by perltidy itself

4655

foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) {

4656

if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" }

4657

}

4658

4659

# Toggle switches; these have extra leading 'pod'

4660

# "header!", "index!", "recurse!", "quiet!", "verbose!"

4661

foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) {

4662

my $kwd = $kw; # allows us to strip 'pod'

4663

if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" }

4664

elsif ( defined( $rOpts->{$kw} ) ) {

4665

$kwd =~ s/^pod//;

4666

push @args, "--no$kwd";

4667

}

4668

}

4669

4670

# "flush",

4671

$kw = 'podflush';

4672

if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" }

4673

4674

# Must clean up if pod2html dies (it can);

4675

# Be careful not to overwrite callers __DIE__ routine

4676

local $SIG{__DIE__} = sub {

4677

print $_[0];

4678

unlink $tmpfile if -e $tmpfile;

4679

exit 1;

4680

};

4681

4682

pod2html(@args);

4683

}

4684

$fh_tmp = IO::File->new( $tmpfile, 'r' );

4685

unless ($fh_tmp) {

4686

4687

# this error shouldn't happen ... we just used this filename

4688

warn "unable to open temporary file $tmpfile; cannot use pod2html\n";

4689

goto RETURN;

4690

}

4691

4692

my $html_fh = $self->{_html_fh};

4693

my @toc;

4694

my $in_toc;

4695

my $no_print;

4696

4697

# This routine will write the html selectively and store the toc

4698

my $html_print = sub {

4699

foreach (@_) {

4700

$html_fh->print($_) unless ($no_print);

4701

if ($in_toc) { push @toc, $_ }

4702

}

4703

};

4704

4705

# loop over lines of html output from pod2html and merge in

4706

# the necessary perltidy html sections

4707

my ( $saw_body, $saw_index, $saw_body_end );

4708

while ( my $line = $fh_tmp->getline() ) {

4709

4710

if ( $line =~ /^\s*<html>\s*$/i ) {

4711

my $date = localtime;

4712

$html_print->("\n");

4713

$html_print->($line);

4714

}

4715

4716

# Copy the perltidy css, if any, after <body> tag

4717

elsif ( $line =~ /^\s*<body.*>\s*$/i ) {

4718

$saw_body = 1;

4719

$html_print->($css_string) if $css_string;

4720

$html_print->($line);

4721

4722

# add a top anchor and heading

4723

$html_print->("<a name=\"-top-\"></a>\n");

4724

$title = escape_html($title);

4725

$html_print->("<h1>$title</h1>\n");

4726

}

4727

elsif ( $line =~ /^\s*\s*$/i ) {

4728

$in_toc = 1;

4729

4730

# when frames are used, an extra table of contents in the

4731

# contents panel is confusing, so don't print it

4732

$no_print = $rOpts->{'frames'}

4733

|| !$rOpts->{'html-table-of-contents'};

4734

$html_print->("<h2>Doc Index:</h2>\n") if $rOpts->{'frames'};

4735

$html_print->($line);

4736

}

4737

4738

# Copy the perltidy toc, if any, after the Pod::Html toc

4739

elsif ( $line =~ /^\s*\s*$/i ) {

4740

$saw_index = 1;

4741

$html_print->($line);

4742

if ($toc_string) {

4743

$html_print->("<hr />\n") if $rOpts->{'frames'};

4744

$html_print->("<h2>Code Index:</h2>\n");

4745

my @toc = map { $_ .= "\n" } split /\n/, $toc_string;

4746

$html_print->(@toc);

4747

}

4748

$in_toc = 0;

4749

$no_print = 0;

4750

}

4751

4752

# Copy one perltidy section after each marker

4753

elsif ( $line =~ /^(.*)(.*)$/ ) {

4754

$line = $2;

4755

$html_print->($1) if $1;

4756

4757

# Intermingle code and pod sections if we saw multiple =cut's.

4758

if ( $self->{_pod_cut_count} > 1 ) {

4759

my $rpre_string = shift(@$rpre_string_stack);

4760

if ($$rpre_string) {

4761

$html_print->('<pre>');

4762

$html_print->($$rpre_string);

4763

$html_print->('</pre>');

4764

}

4765

else {

4766

4767

# shouldn't happen: we stored a string before writing

4768

# each marker.

4769

warn

4770

"Problem merging html stream with pod2html; order may be wrong\n";

4771

}

4772

$html_print->($line);

4773

}

4774

4775

# If didn't see multiple =cut lines, we'll put the pod out first

4776

# and then the code, because it's less confusing.

4777

else {

4778

4779

# since we are not intermixing code and pod, we don't need

4780

# or want any <hr> lines which separated pod and code

4781

$html_print->($line) unless ( $line =~ /^\s*<hr>\s*$/i );

4782

}

4783

}

4784

4785

# Copy any remaining code section before the </body> tag

4786

elsif ( $line =~ /^\s*<\/body>\s*$/i ) {

4787

$saw_body_end = 1;

4788

if (@$rpre_string_stack) {

4789

unless ( $self->{_pod_cut_count} > 1 ) {

4790

$html_print->('<hr />');

4791

}

4792

while ( my $rpre_string = shift(@$rpre_string_stack) ) {

4793

$html_print->('<pre>');

4794

$html_print->($$rpre_string);

4795

$html_print->('</pre>');

4796

}

4797

}

4798

$html_print->($line);

4799

}

4800

else {

4801

$html_print->($line);

4802

}

4803

}

4804

4805

$success_flag = 1;

4806

unless ($saw_body) {

4807

warn "Did not see <body> in pod2html output\n";

4808

$success_flag = 0;

4809

}

4810

unless ($saw_body_end) {

4811

warn "Did not see </body> in pod2html output\n";

4812

$success_flag = 0;

4813

}

4814

unless ($saw_index) {

4815

warn "Did not find INDEX END in pod2html output\n";

4816

$success_flag = 0;

4817

}

4818

4819

RETURN:

4820

eval { $html_fh->close() };

4821

4822

# note that we have to unlink tmpfile before making frames

4823

# because the tmpfile may be one of the names used for frames

4824

unlink $tmpfile if -e $tmpfile;

4825

if ( $success_flag && $rOpts->{'frames'} ) {

4826

$self->make_frame( \@toc );

4827

}

4828

return $success_flag;

4829

}

4830

4831

sub make_frame {

4832

4833

# Make a frame with table of contents in the left panel

4834

# and the text in the right panel.

4835

# On entry:

4836

# $html_filename contains the no-frames html output

4837

# $rtoc is a reference to an array with the table of contents

4838

my $self = shift;

4839

my ($rtoc) = @_;

4840

my $input_file = $self->{_input_file};

4841

my $html_filename = $self->{_html_file};

4842

my $toc_filename = $self->{_toc_filename};

4843

my $src_filename = $self->{_src_filename};

4844

my $title = $self->{_title};

4845

$title = escape_html($title);

4846

4847

# FUTURE input parameter:

4848

my $top_basename = "";

4849

4850

# We need to produce 3 html files:

4851

# 1. - the table of contents

4852

# 2. - the contents (source code) itself

4853

# 3. - the frame which contains them

4854

4855

# get basenames for relative links

4856

my ( $toc_basename, $toc_path ) = fileparse($toc_filename);

4857

my ( $src_basename, $src_path ) = fileparse($src_filename);

4858

4859

# 1. Make the table of contents panel, with appropriate changes

4860

# to the anchor names

4861

my $src_frame_name = 'SRC';

4862

my $first_anchor =

4863

write_toc_html( $title, $toc_filename, $src_basename, $rtoc,

4864

$src_frame_name );

4865

4866

# 2. The current .html filename is renamed to be the contents panel

4867

rename( $html_filename, $src_filename )

4868

or die "Cannot rename $html_filename to $src_filename:$!\n";

4869

4870

# 3. Then use the original html filename for the frame

4871

write_frame_html(

4872

$title, $html_filename, $top_basename,

4873

$toc_basename, $src_basename, $src_frame_name

4874

);

4875

}

4876

4877

sub write_toc_html {

4878

4879

# write a separate html table of contents file for frames

4880

my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_;

4881

my $fh = IO::File->new( $toc_filename, 'w' )

4882

or die "Cannot open $toc_filename:$!\n";

4883

$fh->print(<<EOM);

4884

<html>

4885

<head>

4886

<title>$title</title>

4887

</head>

4888

<body>

4889

<h1><a href=\"$src_basename#-top-" target="$src_frame_name">$title</a></h1>

4890

EOM

4891

4892

my $first_anchor =

4893

change_anchor_names( $rtoc, $src_basename, "$src_frame_name" );

4894

$fh->print( join "", @$rtoc );

4895

4896

$fh->print(<<EOM);

4897

</body>

4898

</html>

4899

EOM

4900

4901

}

4902

4903

sub write_frame_html {

4904

4905

# write an html file to be the table of contents frame

4906

my (

4907

$title, $frame_filename, $top_basename,

4908

$toc_basename, $src_basename, $src_frame_name

4909

) = @_;

4910

4911

my $fh = IO::File->new( $frame_filename, 'w' )

4912

or die "Cannot open $toc_basename:$!\n";

4913

4914

$fh->print(<<EOM);

4915

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"

4916

"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">

4917

<?xml version="1.0" encoding="iso-8859-1" ?>

4918

4919

<head>

4920

<title>$title</title>

4921

</head>

4922

EOM

4923

4924

# two left panels, one right, if master index file

4925

if ($top_basename) {

4926

$fh->print(<<EOM);

4927

4928

4929

4930

4931

</frameset>

4932

EOM

4933

}

4934

4935

# one left panels, one right, if no master index file

4936

else {

4937

$fh->print(<<EOM);

4938

4939

4940

EOM

4941

}

4942

$fh->print(<<EOM);

4943

4944

4945

<body>

4946

<p>If you see this message, you are using a non-frame-capable web client.</p>

4947

<p>This document contains:</p>

4948

<ul>

4949

<li><a href="$toc_basename">A table of contents</a></li>

4950

<li><a href="$src_basename">The source code</a></li>

4951

</ul>

4952

</body>

4953

</noframes>

4954

</frameset>

4955

</html>

4956

EOM

4957

}

4958

4959

sub change_anchor_names {

4960

4961

# add a filename and target to anchors

4962

# also return the first anchor

4963

my ( $rlines, $filename, $target ) = @_;

4964

my $first_anchor;

4965

foreach my $line (@$rlines) {

4966

4967

# We're looking for lines like this:

4968

# <LI><A HREF="#synopsis">SYNOPSIS</A></LI>

4969

# ---- - -------- -----------------

4970

# $1 $4 $5

4971

if ( $line =~ /^(.*)<a(.*)href\s*=\s*"([^#]*)#([^"]+)"[^>]*>(.*)$/i ) {

4972

my $pre = $1;

4973

my $name = $4;

4974

my $post = $5;

4975

my $href = "$filename#$name";

4976

$line = "$pre<a href=\"$href\" target=\"$target\">$post\n";

4977

unless ($first_anchor) { $first_anchor = $href }

4978

}

4979

}

4980

return $first_anchor;

4981

}

4982

4983

sub close_html_file {

4984

my $self = shift;

4985

return unless $self->{_html_file_opened};

4986

4987

my $html_fh = $self->{_html_fh};

4988

my $rtoc_string = $self->{_rtoc_string};

4989

4990

# There are 3 basic paths to html output...

4991

4992

# ---------------------------------

4993

# Path 1: finish up if in -pre mode

4994

# ---------------------------------

4995

if ( $rOpts->{'html-pre-only'} ) {

4996

$html_fh->print( <<"PRE_END");

4997

</pre>

4998

PRE_END

4999

eval { $html_fh->close() };

5000

return;

5001

}

5002

5003

# Finish the index

5004

$self->add_toc_item( 'EOF', 'EOF' );

5005

5006

my $rpre_string_stack = $self->{_rpre_string_stack};

5007

5008

# Patch to darken the <pre> background color in case of pod2html and

5009

# interleaved code/documentation. Otherwise, the distinction

5010

# between code and documentation is blurred.

5011

if ( $rOpts->{pod2html}

5012

&& $self->{_pod_cut_count} >= 1

5013

&& $rOpts->{'html-color-background'} eq '#FFFFFF' )

5014

{

5015

$rOpts->{'html-pre-color-background'} = '#F0F0F0';

5016

}

5017

5018

# put the css or its link into a string, if used

5019

my $css_string;

5020

my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );

5021

5022

# use css linked to another file

5023

if ( $rOpts->{'html-linked-style-sheet'} ) {

5024

$fh_css->print(

5025

qq(<link rel="stylesheet" href="$css_linkname" type="text/css" />)

5026

);

5027

}

5028

5029

# use css embedded in this file

5030

elsif ( !$rOpts->{'nohtml-style-sheets'} ) {

5031

$fh_css->print( <<'ENDCSS');

5032

5033

<!--

5034

ENDCSS

5035

write_style_sheet_data($fh_css);

5036

$fh_css->print( <<"ENDCSS");

5037

-->

5038

</style>

5039

ENDCSS

5040

}

5041

5042

# -----------------------------------------------------------

5043

# path 2: use pod2html if requested

5044

# If we fail for some reason, continue on to path 3

5045

# -----------------------------------------------------------

5046

if ( $rOpts->{'pod2html'} ) {

5047

my $rpod_string = $self->{_rpod_string};

5048

$self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,

5049

$rpre_string_stack )

5050

&& return;

5051

}

5052

5053

# --------------------------------------------------

5054

# path 3: write code in html, with pod only in italics

5055

# --------------------------------------------------

5056

my $input_file = $self->{_input_file};

5057

my $title = escape_html($input_file);

5058

my $date = localtime;

5059

$html_fh->print( <<"HTML_START");

5060

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"

5061

"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

5062

5063

5064

<head>

5065

<title>$title</title>

5066

HTML_START

5067

5068

# output the css, if used

5069

if ($css_string) {

5070

$html_fh->print($css_string);

5071

$html_fh->print( <<"ENDCSS");

5072

</head>

5073

<body>

5074

ENDCSS

5075

}

5076

else {

5077

5078

$html_fh->print( <<"HTML_START");

5079

</head>

5080

<body bgcolor=\"$rOpts->{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">

5081

HTML_START

5082

}

5083

5084

$html_fh->print("<a name=\"-top-\"></a>\n");

5085

$html_fh->print( <<"EOM");

5086

<h1>$title</h1>

5087

EOM

5088

5089

# copy the table of contents

5090

if ( $$rtoc_string

5091

&& !$rOpts->{'frames'}

5092

&& $rOpts->{'html-table-of-contents'} )

5093

{

5094

$html_fh->print($$rtoc_string);

5095

}

5096

5097

# copy the pre section(s)

5098

my $fname_comment = $input_file;

5099

$fname_comment =~ s/--+/-/g; # protect HTML comment tags

5100

$html_fh->print( <<"END_PRE");

5101

<hr />

5102

5103

<pre>

5104

END_PRE

5105

5106

foreach my $rpre_string (@$rpre_string_stack) {

5107

$html_fh->print($$rpre_string);

5108

}

5109

5110

# and finish the html page

5111

$html_fh->print( <<"HTML_END");

5112

</pre>

5113

</body>

5114

</html>

5115

HTML_END

5116

eval { $html_fh->close() }; # could be object without close method

5117

5118

if ( $rOpts->{'frames'} ) {

5119

my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string;

5120

$self->make_frame( \@toc );

5121

}

5122

}

5123

5124

sub markup_tokens {

5125

my $self = shift;

5126

my ( $rtokens, $rtoken_type, $rlevels ) = @_;

5127

my ( @colored_tokens, $j, $string, $type, $token, $level );

5128

my $rlast_level = $self->{_rlast_level};

5129

my $rpackage_stack = $self->{_rpackage_stack};

5130

5131

for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {

5132

$type = $$rtoken_type[$j];

5133

$token = $$rtokens[$j];

5134

$level = $$rlevels[$j];

5135

$level = 0 if ( $level < 0 );

5136

5137

#-------------------------------------------------------

5138

# Update the package stack. The package stack is needed to keep

5139

# the toc correct because some packages may be declared within

5140

# blocks and go out of scope when we leave the block.

5141

#-------------------------------------------------------

5142

if ( $level > $$rlast_level ) {

5143

unless ( $rpackage_stack->[ $level - 1 ] ) {

5144

$rpackage_stack->[ $level - 1 ] = 'main';

5145

}

5146

$rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ];

5147

}

5148

elsif ( $level < $$rlast_level ) {

5149

my $package = $rpackage_stack->[$level];

5150

unless ($package) { $package = 'main' }

5151

5152

# if we change packages due to a nesting change, we

5153

# have to make an entry in the toc

5154

if ( $package ne $rpackage_stack->[ $level + 1 ] ) {

5155

$self->add_toc_item( $package, 'package' );

5156

}

5157

}

5158

$$rlast_level = $level;

5159

5160

#-------------------------------------------------------

5161

# Intercept a sub name here; split it

5162

# into keyword 'sub' and sub name; and add an

5163

# entry in the toc

5164

#-------------------------------------------------------

5165

if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) {

5166

$token = $self->markup_html_element( $1, 'k' );

5167

push @colored_tokens, $token;

5168

$token = $2;

5169

$type = 'M';

5170

5171

# but don't include sub declarations in the toc;

5172

# these wlll have leading token types 'i;'

5173

my $signature = join "", @$rtoken_type;

5174

unless ( $signature =~ /^i;/ ) {

5175

my $subname = $token;

5176

$subname =~ s/[\s\(].*$//; # remove any attributes and prototype

5177

$self->add_toc_item( $subname, 'sub' );

5178

}

5179

}

5180

5181

#-------------------------------------------------------

5182

# Intercept a package name here; split it

5183

# into keyword 'package' and name; add to the toc,

5184

# and update the package stack

5185

#-------------------------------------------------------

5186

if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) {

5187

$token = $self->markup_html_element( $1, 'k' );

5188

push @colored_tokens, $token;

5189

$token = $2;

5190

$type = 'i';

5191

$self->add_toc_item( "$token", 'package' );

5192

$rpackage_stack->[$level] = $token;

5193

}

5194

5195

$token = $self->markup_html_element( $token, $type );

5196

push @colored_tokens, $token;

5197

}

5198

return ( \@colored_tokens );

5199

}

5200

5201

sub markup_html_element {

5202

my $self = shift;

5203

my ( $token, $type ) = @_;

5204

5205

return $token if ( $type eq 'b' ); # skip a blank token

5206

return $token if ( $token =~ /^\s*$/ ); # skip a blank line

5207

$token = escape_html($token);

5208

5209

# get the short abbreviation for this token type

5210

my $short_name = $token_short_names{$type};

5211

if ( !defined($short_name) ) {

5212

$short_name = "pu"; # punctuation is default

5213

}

5214

5215

# handle style sheets..

5216

if ( !$rOpts->{'nohtml-style-sheets'} ) {

5217

if ( $short_name ne 'pu' ) {

5218

$token = qq(<span class="$short_name">) . $token . "</span>";

5219

}

5220

}

5221

5222

# handle no style sheets..

5223

else {

5224

my $color = $html_color{$short_name};

5225

5226

if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) {

5227

$token = qq(<font color="$color">) . $token . "</font>";

5228

}

5229

if ( $html_italic{$short_name} ) { $token = "<i>$token</i>" }

5230

if ( $html_bold{$short_name} ) { $token = "<b>$token</b>" }

5231

}

5232

return $token;

5233

}

5234

5235

sub escape_html {

5236

5237

my $token = shift;

5238

if ($missing_html_entities) {

5239

$token =~ s/\&/&/g;

5240

$token =~ s/\</</g;

5241

$token =~ s/\>/>/g;

5242

$token =~ s/\"/"/g;

5243

}

5244

else {

5245

HTML::Entities::encode_entities($token);

5246

}

5247

return $token;

5248

}

5249

5250

sub finish_formatting {

5251

5252

# called after last line

5253

my $self = shift;

5254

$self->close_html_file();

5255

return;

5256

}

5257

5258

sub write_line {

5259

5260

my $self = shift;

5261

return unless $self->{_html_file_opened};

5262

my $html_pre_fh = $self->{_html_pre_fh};

5263

my ($line_of_tokens) = @_;

5264

my $line_type = $line_of_tokens->{_line_type};

5265

my $input_line = $line_of_tokens->{_line_text};

5266

my $line_number = $line_of_tokens->{_line_number};

5267

chomp $input_line;

5268

5269

# markup line of code..

5270

my $html_line;

5271

if ( $line_type eq 'CODE' ) {

5272

my $rtoken_type = $line_of_tokens->{_rtoken_type};

5273

my $rtokens = $line_of_tokens->{_rtokens};

5274

my $rlevels = $line_of_tokens->{_rlevels};

5275

5276

if ( $input_line =~ /(^\s*)/ ) {

5277

$html_line = $1;

5278

}

5279

else {

5280

$html_line = "";

5281

}

5282

my ($rcolored_tokens) =

5283

$self->markup_tokens( $rtokens, $rtoken_type, $rlevels );

5284

$html_line .= join '', @$rcolored_tokens;

5285

}

5286

5287

# markup line of non-code..

5288

else {

5289

my $line_character;

5290

if ( $line_type eq 'HERE' ) { $line_character = 'H' }

5291

elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' }

5292

elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' }

5293

elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' }

5294

elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' }

5295

elsif ( $line_type eq 'END_START' ) {

5296

$line_character = 'k';

5297

$self->add_toc_item( '__END__', '__END__' );

5298

}

5299

elsif ( $line_type eq 'DATA_START' ) {

5300

$line_character = 'k';

5301

$self->add_toc_item( '__DATA__', '__DATA__' );

5302

}

5303

elsif ( $line_type =~ /^POD/ ) {

5304

$line_character = 'P';

5305

if ( $rOpts->{'pod2html'} ) {

5306

my $html_pod_fh = $self->{_html_pod_fh};

5307

if ( $line_type eq 'POD_START' ) {

5308

5309

my $rpre_string_stack = $self->{_rpre_string_stack};

5310

my $rpre_string = $rpre_string_stack->[-1];

5311

5312

# if we have written any non-blank lines to the

5313

# current pre section, start writing to a new output

5314

# string

5315

if ( $$rpre_string =~ /\S/ ) {

5316

my $pre_string;

5317

$html_pre_fh =

5318

Perl::Tidy::IOScalar->new( \$pre_string, 'w' );

5319

$self->{_html_pre_fh} = $html_pre_fh;

5320

push @$rpre_string_stack, \$pre_string;

5321

5322

# leave a marker in the pod stream so we know

5323

# where to put the pre section we just

5324

# finished.

5325

my $for_html = '=for html'; # don't confuse pod utils

5326

$html_pod_fh->print(<<EOM);

5327

5328

$for_html

5329

5330

5331

EOM

5332

}

5333

5334

# otherwise, just clear the current string and start

5335

# over

5336

else {

5337

$$rpre_string = "";

5338

$html_pod_fh->print("\n");

5339

}

5340

}

5341

$html_pod_fh->print( $input_line . "\n" );

5342

if ( $line_type eq 'POD_END' ) {

5343

$self->{_pod_cut_count}++;

5344

$html_pod_fh->print("\n");

5345

}

5346

return;

5347

}

5348

}

5349

else { $line_character = 'Q' }

5350

$html_line = $self->markup_html_element( $input_line, $line_character );

5351

}

5352

5353

# add the line number if requested

5354

if ( $rOpts->{'html-line-numbers'} ) {

5355

my $extra_space .=

5356

( $line_number < 10 ) ? " "

5357

: ( $line_number < 100 ) ? " "

5358

: ( $line_number < 1000 ) ? " "

5359

: "";

5360

$html_line = $extra_space . $line_number . " " . $html_line;

5361

}

5362

5363

# write the line

5364

$html_pre_fh->print("$html_line\n");

5365

}

5366

5367

#####################################################################

5368

#

5369

# The Perl::Tidy::Formatter package adds indentation, whitespace, and

5370

# line breaks to the token stream

5371

#

5372

# WARNING: This is not a real class for speed reasons. Only one

5373

# Formatter may be used.

5374

#

5375

#####################################################################

5376

5377

package Perl::Tidy::Formatter;

5378

5379

BEGIN {

5380

5381

# Caution: these debug flags produce a lot of output

5382

# They should all be 0 except when debugging small scripts

5383

use constant FORMATTER_DEBUG_FLAG_BOND => 0;

5384

use constant FORMATTER_DEBUG_FLAG_BREAK => 0;

5385

use constant FORMATTER_DEBUG_FLAG_CI => 0;

5386

use constant FORMATTER_DEBUG_FLAG_FLUSH => 0;

5387

use constant FORMATTER_DEBUG_FLAG_FORCE => 0;

5388

use constant FORMATTER_DEBUG_FLAG_LIST => 0;

5389

use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0;

5390

use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0;

5391

use constant FORMATTER_DEBUG_FLAG_SPARSE => 0;

5392

use constant FORMATTER_DEBUG_FLAG_STORE => 0;

5393

use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0;

5394

use constant FORMATTER_DEBUG_FLAG_WHITE => 0;

5395

5396

my $debug_warning = sub {

5397

print "FORMATTER_DEBUGGING with key $_[0]\n";

5398

};

5399

5400

FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND');

5401

FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK');

5402

FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI');

5403

FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH');

5404

FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE');

5405

FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST');

5406

FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK');

5407

FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT');

5408

FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE');

5409

FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE');

5410

FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP');

5411

FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE');

5412

}

5413

5414

use Carp;

5415

use vars qw{

5416

5417

@gnu_stack

5418

$max_gnu_stack_index

5419

$gnu_position_predictor

5420

$line_start_index_to_go

5421

$last_indentation_written

5422

$last_unadjusted_indentation

5423

$last_leading_token

5424

5425

$saw_VERSION_in_this_file

5426

$saw_END_or_DATA_

5427

5428

@gnu_item_list

5429

$max_gnu_item_index

5430

$gnu_sequence_number

5431

$last_output_indentation

5432

%last_gnu_equals

5433

%gnu_comma_count

5434

%gnu_arrow_count

5435

5436

@block_type_to_go

5437

@type_sequence_to_go

5438

@container_environment_to_go

5439

@bond_strength_to_go

5440

@forced_breakpoint_to_go

5441

@lengths_to_go

5442

@levels_to_go

5443

@leading_spaces_to_go

5444

@reduced_spaces_to_go

5445

@matching_token_to_go

5446

@mate_index_to_go

5447

@nesting_blocks_to_go

5448

@ci_levels_to_go

5449

@nesting_depth_to_go

5450

@nobreak_to_go

5451

@old_breakpoint_to_go

5452

@tokens_to_go

5453

@types_to_go

5454

5455

%saved_opening_indentation

5456

5457

$max_index_to_go

5458

$comma_count_in_batch

5459

$old_line_count_in_batch

5460

$last_nonblank_index_to_go

5461

$last_nonblank_type_to_go

5462

$last_nonblank_token_to_go

5463

$last_last_nonblank_index_to_go

5464

$last_last_nonblank_type_to_go

5465

$last_last_nonblank_token_to_go

5466

@nonblank_lines_at_depth

5467

$starting_in_quote

5468

$ending_in_quote

5469

5470

$in_format_skipping_section

5471

$format_skipping_pattern_begin

5472

$format_skipping_pattern_end

5473

5474

$forced_breakpoint_count

5475

$forced_breakpoint_undo_count

5476

@forced_breakpoint_undo_stack

5477

%postponed_breakpoint

5478

5479

$tabbing

5480

$embedded_tab_count

5481

$first_embedded_tab_at

5482

$last_embedded_tab_at

5483

$deleted_semicolon_count

5484

$first_deleted_semicolon_at

5485

$last_deleted_semicolon_at

5486

$added_semicolon_count

5487

$first_added_semicolon_at

5488

$last_added_semicolon_at

5489

$first_tabbing_disagreement

5490

$last_tabbing_disagreement

5491

$in_tabbing_disagreement

5492

$tabbing_disagreement_count

5493

$input_line_tabbing

5494

5495

$last_line_type

5496

$last_line_leading_type

5497

$last_line_leading_level

5498

$last_last_line_leading_level

5499

5500

%block_leading_text

5501

%block_opening_line_number

5502

$csc_new_statement_ok

5503

$accumulating_text_for_block

5504

$leading_block_text

5505

$rleading_block_if_elsif_text

5506

$leading_block_text_level

5507

$leading_block_text_length_exceeded

5508

$leading_block_text_line_length

5509

$leading_block_text_line_number

5510

$closing_side_comment_prefix_pattern

5511

$closing_side_comment_list_pattern

5512

5513

$last_nonblank_token

5514

$last_nonblank_type

5515

$last_last_nonblank_token

5516

$last_last_nonblank_type

5517

$last_nonblank_block_type

5518

$last_output_level

5519

%is_do_follower

5520

%is_if_brace_follower

5521

%space_after_keyword

5522

$rbrace_follower

5523

$looking_for_else

5524

%is_last_next_redo_return

5525

%is_other_brace_follower

5526

%is_else_brace_follower

5527

%is_anon_sub_brace_follower

5528

%is_anon_sub_1_brace_follower

5529

%is_sort_map_grep

5530

%is_sort_map_grep_eval

5531

%is_sort_map_grep_eval_do

5532

%is_block_without_semicolon

5533

%is_if_unless

5534

%is_and_or

5535

%is_assignment

5536

%is_chain_operator

5537

%is_if_unless_and_or_last_next_redo_return

5538

%is_until_while_for_if_elsif_else

5539

5540

@has_broken_sublist

5541

@dont_align

5542

@want_comma_break

5543

5544

$is_static_block_comment

5545

$index_start_one_line_block

5546

$semicolons_before_block_self_destruct

5547

$index_max_forced_break

5548

$input_line_number

5549

$diagnostics_object

5550

$vertical_aligner_object

5551

$logger_object

5552

$file_writer_object

5553

$formatter_self

5554

@ci_stack

5555

$last_line_had_side_comment

5556

%want_break_before

5557

%outdent_keyword

5558

$static_block_comment_pattern

5559

$static_side_comment_pattern

5560

%opening_vertical_tightness

5561

%closing_vertical_tightness

5562

%closing_token_indentation

5563

5564

%opening_token_right

5565

%stack_opening_token

5566

%stack_closing_token

5567

5568

$block_brace_vertical_tightness_pattern

5569

5570

$rOpts_add_newlines

5571

$rOpts_add_whitespace

5572

$rOpts_block_brace_tightness

5573

$rOpts_block_brace_vertical_tightness

5574

$rOpts_brace_left_and_indent

5575

$rOpts_comma_arrow_breakpoints

5576

$rOpts_break_at_old_keyword_breakpoints

5577

$rOpts_break_at_old_comma_breakpoints

5578

$rOpts_break_at_old_logical_breakpoints

5579

$rOpts_break_at_old_ternary_breakpoints

5580

$rOpts_closing_side_comment_else_flag

5581

$rOpts_closing_side_comment_maximum_text

5582

$rOpts_continuation_indentation

5583

$rOpts_cuddled_else

5584

$rOpts_delete_old_whitespace

5585

$rOpts_fuzzy_line_length

5586

$rOpts_indent_columns

5587

$rOpts_line_up_parentheses

5588

$rOpts_maximum_fields_per_table

5589

$rOpts_maximum_line_length

5590

$rOpts_short_concatenation_item_length

5591

$rOpts_swallow_optional_blank_lines

5592

$rOpts_ignore_old_breakpoints

5593

$rOpts_format_skipping

5594

$rOpts_space_function_paren

5595

$rOpts_space_keyword_paren

5596

$rOpts_keep_interior_semicolons

5597

5598

$half_maximum_line_length

5599

5600

%is_opening_type

5601

%is_closing_type

5602

%is_keyword_returning_list

5603

%tightness

5604

%matching_token

5605

$rOpts

5606

%right_bond_strength

5607

%left_bond_strength

5608

%binary_ws_rules

5609

%want_left_space

5610

%want_right_space

5611

%is_digraph

5612

%is_trigraph

5613

$bli_pattern

5614

$bli_list_string

5615

%is_closing_type

5616

%is_opening_type

5617

%is_closing_token

5618

%is_opening_token

5619

};

5620

5621

BEGIN {

5622

5623

# default list of block types for which -bli would apply

5624

$bli_list_string = 'if else elsif unless while for foreach do : sub';

5625

5626

@_ = qw(

5627

.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>

5628

<= >= == =~ !~ != ++ -- /= x=

5629

);

5630

@is_digraph{@_} = (1) x scalar(@_);

5631

5632

@_ = qw( ... **= <<= >>= &&= ||= //= <=> );

5633

@is_trigraph{@_} = (1) x scalar(@_);

5634

5635

@_ = qw(

5636

= **= += *= &= <<= &&=

5637

-= /= |= >>= ||= //=

5638

.= %= ^=

5639

x=

5640

);

5641

@is_assignment{@_} = (1) x scalar(@_);

5642

5643

@_ = qw(

5644

grep

5645

keys

5646

map

5647

reverse

5648

sort

5649

split

5650

);

5651

@is_keyword_returning_list{@_} = (1) x scalar(@_);

5652

5653

@_ = qw(is if unless and or err last next redo return);

5654

@is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_);

5655

5656

# always break after a closing curly of these block types:

5657

@_ = qw(until while for if elsif else);

5658

@is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_);

5659

5660

@_ = qw(last next redo return);

5661

@is_last_next_redo_return{@_} = (1) x scalar(@_);

5662

5663

@_ = qw(sort map grep);

5664

@is_sort_map_grep{@_} = (1) x scalar(@_);

5665

5666

@_ = qw(sort map grep eval);

5667

@is_sort_map_grep_eval{@_} = (1) x scalar(@_);

5668

5669

@_ = qw(sort map grep eval do);

5670

@is_sort_map_grep_eval_do{@_} = (1) x scalar(@_);

5671

5672

@_ = qw(if unless);

5673

@is_if_unless{@_} = (1) x scalar(@_);

5674

5675

@_ = qw(and or err);

5676

@is_and_or{@_} = (1) x scalar(@_);

5677

5678

# Identify certain operators which often occur in chains.

5679

# Note: the minus (-) causes a side effect of padding of the first line in

5680

# something like this (by sub set_logical_padding):

5681

# Checkbutton => 'Transmission checked',

5682

# -variable => \$TRANS

5683

# This usually improves appearance so it seems ok.

5684

@_ = qw(&& || and or : ? . + - * /);

5685

@is_chain_operator{@_} = (1) x scalar(@_);

5686

5687

# We can remove semicolons after blocks preceded by these keywords

5688

@_ =

5689

qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else

5690

unless while until for foreach);

5691

@is_block_without_semicolon{@_} = (1) x scalar(@_);

5692

5693

# 'L' is token for opening { at hash key

5694

@_ = qw" L { ( [ ";

5695

@is_opening_type{@_} = (1) x scalar(@_);

5696

5697

# 'R' is token for closing } at hash key

5698

@_ = qw" R } ) ] ";

5699

@is_closing_type{@_} = (1) x scalar(@_);

5700

5701

@_ = qw" { ( [ ";

5702

@is_opening_token{@_} = (1) x scalar(@_);

5703

5704

@_ = qw" } ) ] ";

5705

@is_closing_token{@_} = (1) x scalar(@_);

5706

}

5707

5708

# whitespace codes

5709

use constant WS_YES => 1;

5710

use constant WS_OPTIONAL => 0;

5711

use constant WS_NO => -1;

5712

5713

# Token bond strengths.

5714

use constant NO_BREAK => 10000;

5715

use constant VERY_STRONG => 100;

5716

use constant STRONG => 2.1;

5717

use constant NOMINAL => 1.1;

5718

use constant WEAK => 0.8;

5719

use constant VERY_WEAK => 0.55;

5720

5721

# values for testing indexes in output array

5722

use constant UNDEFINED_INDEX => -1;

5723

5724

# Maximum number of little messages; probably need not be changed.

5725

use constant MAX_NAG_MESSAGES => 6;

5726

5727

# increment between sequence numbers for each type

5728

# For example, ?: pairs might have numbers 7,11,15,...

5729

use constant TYPE_SEQUENCE_INCREMENT => 4;

5730

5731

{

5732

5733

# methods to count instances

5734

my $_count = 0;

5735

sub get_count { $_count; }

5736

sub _increment_count { ++$_count }

5737

sub _decrement_count { --$_count }

5738

}

5739

5740

sub trim {

5741

5742

# trim leading and trailing whitespace from a string

5743

$_[0] =~ s/\s+$//;

5744

$_[0] =~ s/^\s+//;

5745

return $_[0];

5746

}

5747

5748

sub split_words {

5749

5750

# given a string containing words separated by whitespace,

5751

# return the list of words

5752

my ($str) = @_;

5753

return unless $str;

5754

$str =~ s/\s+$//;

5755

$str =~ s/^\s+//;

5756

return split( /\s+/, $str );

5757

}

5758

5759

# interface to Perl::Tidy::Logger routines

5760

sub warning {

5761

if ($logger_object) {

5762

$logger_object->warning(@_);

5763

}

5764

}

5765

5766

sub complain {

5767

if ($logger_object) {

5768

$logger_object->complain(@_);

5769

}

5770

}

5771

5772

sub write_logfile_entry {

5773

if ($logger_object) {

5774

$logger_object->write_logfile_entry(@_);

5775

}

5776

}

5777

5778

sub black_box {

5779

if ($logger_object) {

5780

$logger_object->black_box(@_);

5781

}

5782

}

5783

5784

sub report_definite_bug {

5785

if ($logger_object) {

5786

$logger_object->report_definite_bug();

5787

}

5788

}

5789

5790

sub get_saw_brace_error {

5791

if ($logger_object) {

5792

$logger_object->get_saw_brace_error();

5793

}

5794

}

5795

5796

sub we_are_at_the_last_line {

5797

if ($logger_object) {

5798

$logger_object->we_are_at_the_last_line();

5799

}

5800

}

5801

5802

# interface to Perl::Tidy::Diagnostics routine

5803

sub write_diagnostics {

5804

5805

if ($diagnostics_object) {

5806

$diagnostics_object->write_diagnostics(@_);

5807

}

5808

}

5809

5810

sub get_added_semicolon_count {

5811

my $self = shift;

5812

return $added_semicolon_count;

5813

}

5814

5815

sub DESTROY {

5816

$_[0]->_decrement_count();

5817

}

5818

5819

sub new {

5820

5821

my $class = shift;

5822

5823

# we are given an object with a write_line() method to take lines

5824

my %defaults = (

5825

sink_object => undef,

5826

diagnostics_object => undef,

5827

logger_object => undef,

5828

);

5829

my %args = ( %defaults, @_ );

5830

5831

$logger_object = $args{logger_object};

5832

$diagnostics_object = $args{diagnostics_object};

5833

5834

# we create another object with a get_line() and peek_ahead() method

5835

my $sink_object = $args{sink_object};

5836

$file_writer_object =

5837

Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );

5838

5839

# initialize the leading whitespace stack to negative levels

5840

# so that we can never run off the end of the stack

5841

$gnu_position_predictor = 0; # where the current token is predicted to be

5842

$max_gnu_stack_index = 0;

5843

$max_gnu_item_index = -1;

5844

$gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 );

5845

@gnu_item_list = ();

5846

$last_output_indentation = 0;

5847

$last_indentation_written = 0;

5848

$last_unadjusted_indentation = 0;

5849

$last_leading_token = "";

5850

5851

$saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};

5852

$saw_END_or_DATA_ = 0;

5853

5854

@block_type_to_go = ();

5855

@type_sequence_to_go = ();

5856

@container_environment_to_go = ();

5857

@bond_strength_to_go = ();

5858

@forced_breakpoint_to_go = ();

5859

@lengths_to_go = (); # line length to start of ith token

5860

@levels_to_go = ();

5861

@matching_token_to_go = ();

5862

@mate_index_to_go = ();

5863

@nesting_blocks_to_go = ();

5864

@ci_levels_to_go = ();

5865

@nesting_depth_to_go = (0);

5866

@nobreak_to_go = ();

5867

@old_breakpoint_to_go = ();

5868

@tokens_to_go = ();

5869

@types_to_go = ();

5870

@leading_spaces_to_go = ();

5871

@reduced_spaces_to_go = ();

5872

5873

@dont_align = ();

5874

@has_broken_sublist = ();

5875

@want_comma_break = ();

5876

5877

@ci_stack = ("");

5878

$first_tabbing_disagreement = 0;

5879

$last_tabbing_disagreement = 0;

5880

$tabbing_disagreement_count = 0;

5881

$in_tabbing_disagreement = 0;

5882

$input_line_tabbing = undef;

5883

5884

$last_line_type = "";

5885

$last_last_line_leading_level = 0;

5886

$last_line_leading_level = 0;

5887

$last_line_leading_type = '#';

5888

5889

$last_nonblank_token = ';';

5890

$last_nonblank_type = ';';

5891

$last_last_nonblank_token = ';';

5892

$last_last_nonblank_type = ';';

5893

$last_nonblank_block_type = "";

5894

$last_output_level = 0;

5895

$looking_for_else = 0;

5896

$embedded_tab_count = 0;

5897

$first_embedded_tab_at = 0;

5898

$last_embedded_tab_at = 0;

5899

$deleted_semicolon_count = 0;

5900

$first_deleted_semicolon_at = 0;

5901

$last_deleted_semicolon_at = 0;

5902

$added_semicolon_count = 0;

5903

$first_added_semicolon_at = 0;

5904

$last_added_semicolon_at = 0;

5905

$last_line_had_side_comment = 0;

5906

$is_static_block_comment = 0;

5907

%postponed_breakpoint = ();

5908

5909

# variables for adding side comments

5910

%block_leading_text = ();

5911

%block_opening_line_number = ();

5912

$csc_new_statement_ok = 1;

5913

5914

%saved_opening_indentation = ();

5915

$in_format_skipping_section = 0;

5916

5917

reset_block_text_accumulator();

5918

5919

prepare_for_new_input_lines();

5920

5921

$vertical_aligner_object =

5922

Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,

5923

$logger_object, $diagnostics_object );

5924

5925

if ( $rOpts->{'entab-leading-whitespace'} ) {

5926

write_logfile_entry(

5927

"Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"

5928

);

5929

}

5930

elsif ( $rOpts->{'tabs'} ) {

5931

write_logfile_entry("Indentation will be with a tab character\n");

5932

}

5933

else {

5934

write_logfile_entry(

5935

"Indentation will be with $rOpts->{'indent-columns'} spaces\n");

5936

}

5937

5938

# This was the start of a formatter referent, but object-oriented

5939

# coding has turned out to be too slow here.

5940

$formatter_self = {};

5941

5942

bless $formatter_self, $class;

5943

5944

# Safety check..this is not a class yet

5945

if ( _increment_count() > 1 ) {

5946

confess

5947

"Attempt to create more than 1 object in $class, which is not a true class yet\n";

5948

}

5949

return $formatter_self;

5950

}

5951

5952

sub prepare_for_new_input_lines {

5953

5954

$gnu_sequence_number++; # increment output batch counter

5955

%last_gnu_equals = ();

5956

%gnu_comma_count = ();

5957

%gnu_arrow_count = ();

5958

$line_start_index_to_go = 0;

5959

$max_gnu_item_index = UNDEFINED_INDEX;

5960

$index_max_forced_break = UNDEFINED_INDEX;

5961

$max_index_to_go = UNDEFINED_INDEX;

5962

$last_nonblank_index_to_go = UNDEFINED_INDEX;

5963

$last_nonblank_type_to_go = '';

5964

$last_nonblank_token_to_go = '';

5965

$last_last_nonblank_index_to_go = UNDEFINED_INDEX;

5966

$last_last_nonblank_type_to_go = '';

5967

$last_last_nonblank_token_to_go = '';

5968

$forced_breakpoint_count = 0;

5969

$forced_breakpoint_undo_count = 0;

5970

$rbrace_follower = undef;

5971

$lengths_to_go[0] = 0;

5972

$old_line_count_in_batch = 1;

5973

$comma_count_in_batch = 0;

5974

$starting_in_quote = 0;

5975

5976

destroy_one_line_block();

5977

}

5978

5979

sub write_line {

5980

5981

my $self = shift;

5982

my ($line_of_tokens) = @_;

5983

5984

my $line_type = $line_of_tokens->{_line_type};

5985

my $input_line = $line_of_tokens->{_line_text};

5986

5987

# _line_type codes are:

5988

# SYSTEM - system-specific code before hash-bang line

5989

# CODE - line of perl code (including comments)

5990

# POD_START - line starting pod, such as '=head'

5991

# POD - pod documentation text

5992

# POD_END - last line of pod section, '=cut'

5993

# HERE - text of here-document

5994

# HERE_END - last line of here-doc (target word)

5995

# FORMAT - format section

5996

# FORMAT_END - last line of format section, '.'

5997

# DATA_START - __DATA__ line

5998

# DATA - unidentified text following __DATA__

5999

# END_START - __END__ line

6000

# END - unidentified text following __END__

6001

# ERROR - we are in big trouble, probably not a perl script

6002

6003

# put a blank line after an =cut which comes before __END__ and __DATA__

6004

# (required by podchecker)

6005

if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {

6006

$file_writer_object->reset_consecutive_blank_lines();

6007

if ( $input_line !~ /^\s*$/ ) { want_blank_line() }

6008

}

6009

6010

# handle line of code..

6011

if ( $line_type eq 'CODE' ) {

6012

6013

# let logger see all non-blank lines of code

6014

if ( $input_line !~ /^\s*$/ ) {

6015

my $output_line_number =

6016

$vertical_aligner_object->get_output_line_number();

6017

black_box( $line_of_tokens, $output_line_number );

6018

}

6019

print_line_of_tokens($line_of_tokens);

6020

}

6021

6022

# handle line of non-code..

6023

else {

6024

6025

# set special flags

6026

my $skip_line = 0;

6027

my $tee_line = 0;

6028

if ( $line_type =~ /^POD/ ) {

6029

6030

# Pod docs should have a preceding blank line. But be

6031

# very careful in __END__ and __DATA__ sections, because:

6032

# 1. the user may be using this section for any purpose whatsoever

6033

# 2. the blank counters are not active there

6034

# It should be safe to request a blank line between an

6035

# __END__ or __DATA__ and an immediately following '=head'

6036

# type line, (types END_START and DATA_START), but not for

6037

# any other lines of type END or DATA.

6038

if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }

6039

if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; }

6040

if ( !$skip_line

6041

&& $line_type eq 'POD_START'

6042

&& $last_line_type !~ /^(END|DATA)$/ )

6043

{

6044

want_blank_line();

6045

}

6046

}

6047

6048

# leave the blank counters in a predictable state

6049

# after __END__ or __DATA__

6050

elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {

6051

$file_writer_object->reset_consecutive_blank_lines();

6052

$saw_END_or_DATA_ = 1;

6053

}

6054

6055

# write unindented non-code line

6056

if ( !$skip_line ) {

6057

if ($tee_line) { $file_writer_object->tee_on() }

6058

write_unindented_line($input_line);

6059

if ($tee_line) { $file_writer_object->tee_off() }

6060

}

6061

}

6062

$last_line_type = $line_type;

6063

}

6064

6065

sub create_one_line_block {

6066

$index_start_one_line_block = $_[0];

6067

$semicolons_before_block_self_destruct = $_[1];

6068

}

6069

6070

sub destroy_one_line_block {

6071

$index_start_one_line_block = UNDEFINED_INDEX;

6072

$semicolons_before_block_self_destruct = 0;

6073

}

6074

6075

sub leading_spaces_to_go {

6076

6077

# return the number of indentation spaces for a token in the output stream;

6078

# these were previously stored by 'set_leading_whitespace'.

6079

6080

return get_SPACES( $leading_spaces_to_go[ $_[0] ] );

6081

6082

}

6083

6084

sub get_SPACES {

6085

6086

# return the number of leading spaces associated with an indentation

6087

# variable $indentation is either a constant number of spaces or an object

6088

# with a get_SPACES method.

6089

my $indentation = shift;

6090

return ref($indentation) ? $indentation->get_SPACES() : $indentation;

6091

}

6092

6093

sub get_RECOVERABLE_SPACES {

6094

6095

# return the number of spaces (+ means shift right, - means shift left)

6096

# that we would like to shift a group of lines with the same indentation

6097

# to get them to line up with their opening parens

6098

my $indentation = shift;

6099

return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;

6100

}

6101

6102

sub get_AVAILABLE_SPACES_to_go {

6103

6104

my $item = $leading_spaces_to_go[ $_[0] ];

6105

6106

# return the number of available leading spaces associated with an

6107

# indentation variable. $indentation is either a constant number of

6108

# spaces or an object with a get_AVAILABLE_SPACES method.

6109

return ref($item) ? $item->get_AVAILABLE_SPACES() : 0;

6110

}

6111

6112

sub new_lp_indentation_item {

6113

6114

# this is an interface to the IndentationItem class

6115

my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;

6116

6117

# A negative level implies not to store the item in the item_list

6118

my $index = 0;

6119

if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }

6120

6121

my $item = Perl::Tidy::IndentationItem->new(

6122

$spaces, $level,

6123

$ci_level, $available_spaces,

6124

$index, $gnu_sequence_number,

6125

$align_paren, $max_gnu_stack_index,

6126

$line_start_index_to_go,

6127

);

6128

6129

if ( $level >= 0 ) {

6130

$gnu_item_list[$max_gnu_item_index] = $item;

6131

}

6132

6133

return $item;

6134

}

6135

6136

sub set_leading_whitespace {

6137

6138

# This routine defines leading whitespace

6139

# given: the level and continuation_level of a token,

6140

# define: space count of leading string which would apply if it

6141

# were the first token of a new line.

6142

6143

my ( $level, $ci_level, $in_continued_quote ) = @_;

6144

6145

# modify for -bli, which adds one continuation indentation for

6146

# opening braces

6147

if ( $rOpts_brace_left_and_indent

6148

&& $max_index_to_go == 0

6149

&& $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )

6150

{

6151

$ci_level++;

6152

}

6153

6154

# patch to avoid trouble when input file has negative indentation.

6155

# other logic should catch this error.

6156

if ( $level < 0 ) { $level = 0 }

6157

6158

#-------------------------------------------

6159

# handle the standard indentation scheme

6160

#-------------------------------------------

6161

unless ($rOpts_line_up_parentheses) {

6162

my $space_count =

6163

$ci_level * $rOpts_continuation_indentation +

6164

$level * $rOpts_indent_columns;

6165

my $ci_spaces =

6166

( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;

6167

6168

if ($in_continued_quote) {

6169

$space_count = 0;

6170

$ci_spaces = 0;

6171

}

6172

$leading_spaces_to_go[$max_index_to_go] = $space_count;

6173

$reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;

6174

return;

6175

}

6176

6177

#-------------------------------------------------------------

6178

# handle case of -lp indentation..

6179

#-------------------------------------------------------------

6180

6181

# The continued_quote flag means that this is the first token of a

6182

# line, and it is the continuation of some kind of multi-line quote

6183

# or pattern. It requires special treatment because it must have no

6184

# added leading whitespace. So we create a special indentation item

6185

# which is not in the stack.

6186

if ($in_continued_quote) {

6187

my $space_count = 0;

6188

my $available_space = 0;

6189

$level = -1; # flag to prevent storing in item_list

6190

$leading_spaces_to_go[$max_index_to_go] =

6191

$reduced_spaces_to_go[$max_index_to_go] =

6192

new_lp_indentation_item( $space_count, $level, $ci_level,

6193

$available_space, 0 );

6194

return;

6195

}

6196

6197

# get the top state from the stack

6198

my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES();

6199

my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();

6200

my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();

6201

6202

my $type = $types_to_go[$max_index_to_go];

6203

my $token = $tokens_to_go[$max_index_to_go];

6204

my $total_depth = $nesting_depth_to_go[$max_index_to_go];

6205

6206

if ( $type eq '{' || $type eq '(' ) {

6207

6208

$gnu_comma_count{ $total_depth + 1 } = 0;

6209

$gnu_arrow_count{ $total_depth + 1 } = 0;

6210

6211

# If we come to an opening token after an '=' token of some type,

6212

# see if it would be helpful to 'break' after the '=' to save space

6213

my $last_equals = $last_gnu_equals{$total_depth};

6214

if ( $last_equals && $last_equals > $line_start_index_to_go ) {

6215

6216

# find the position if we break at the '='

6217

my $i_test = $last_equals;

6218

if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }

6219

6220

# TESTING

6221

##my $too_close = ($i_test==$max_index_to_go-1);

6222

6223

my $test_position = total_line_length( $i_test, $max_index_to_go );

6224

6225

if (

6226

6227

# the equals is not just before an open paren (testing)

6228

##!$too_close &&

6229

6230

# if we are beyond the midpoint

6231

$gnu_position_predictor > $half_maximum_line_length

6232

6233

# or we are beyont the 1/4 point and there was an old

6234

# break at the equals

6235

|| (

6236

$gnu_position_predictor > $half_maximum_line_length / 2

6237

&& (

6238

$old_breakpoint_to_go[$last_equals]

6239

|| ( $last_equals > 0

6240

&& $old_breakpoint_to_go[ $last_equals - 1 ] )

6241

|| ( $last_equals > 1

6242

&& $types_to_go[ $last_equals - 1 ] eq 'b'

6243

&& $old_breakpoint_to_go[ $last_equals - 2 ] )

6244

)

6245

)

6246

)

6247

{

6248

6249

# then make the switch -- note that we do not set a real

6250

# breakpoint here because we may not really need one; sub

6251

# scan_list will do that if necessary

6252

$line_start_index_to_go = $i_test + 1;

6253

$gnu_position_predictor = $test_position;

6254

}

6255

}

6256

}

6257

6258

# Check for decreasing depth ..

6259

# Note that one token may have both decreasing and then increasing

6260

# depth. For example, (level, ci) can go from (1,1) to (2,0). So,

6261

# in this example we would first go back to (1,0) then up to (2,0)

6262

# in a single call.

6263

if ( $level < $current_level || $ci_level < $current_ci_level ) {

6264

6265

# loop to find the first entry at or completely below this level

6266

my ( $lev, $ci_lev );

6267

while (1) {

6268

if ($max_gnu_stack_index) {

6269

6270

# save index of token which closes this level

6271

$gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go);

6272

6273

# Undo any extra indentation if we saw no commas

6274

my $available_spaces =

6275

$gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES();

6276

6277

my $comma_count = 0;

6278

my $arrow_count = 0;

6279

if ( $type eq '}' || $type eq ')' ) {

6280

$comma_count = $gnu_comma_count{$total_depth};

6281

$arrow_count = $gnu_arrow_count{$total_depth};

6282

$comma_count = 0 unless $comma_count;

6283

$arrow_count = 0 unless $arrow_count;

6284

}

6285

$gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count);

6286

$gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count);

6287

6288

if ( $available_spaces > 0 ) {

6289

6290

if ( $comma_count <= 0 || $arrow_count > 0 ) {

6291

6292

my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX();

6293

my $seqno =

6294

$gnu_stack[$max_gnu_stack_index]

6295

->get_SEQUENCE_NUMBER();

6296

6297

# Be sure this item was created in this batch. This

6298

# should be true because we delete any available

6299

# space from open items at the end of each batch.

6300

if ( $gnu_sequence_number != $seqno

6301

|| $i > $max_gnu_item_index )

6302

{

6303

warning(

6304

"Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"

6305

);

6306

report_definite_bug();

6307

}

6308

6309

else {

6310

if ( $arrow_count == 0 ) {

6311

$gnu_item_list[$i]

6312

->permanently_decrease_AVAILABLE_SPACES(

6313

$available_spaces);

6314

}

6315

else {

6316

$gnu_item_list[$i]

6317

->tentatively_decrease_AVAILABLE_SPACES(

6318

$available_spaces);

6319

}

6320

6321

my $j;

6322

for (

6323

$j = $i + 1 ;

6324

$j <= $max_gnu_item_index ;

6325

$j++

6326

)

6327

{

6328

$gnu_item_list[$j]

6329

->decrease_SPACES($available_spaces);

6330

}

6331

}

6332

}

6333

}

6334

6335

# go down one level

6336

--$max_gnu_stack_index;

6337

$lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL();

6338

$ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL();

6339

6340

# stop when we reach a level at or below the current level

6341

if ( $lev <= $level && $ci_lev <= $ci_level ) {

6342

$space_count =

6343

$gnu_stack[$max_gnu_stack_index]->get_SPACES();

6344

$current_level = $lev;

6345

$current_ci_level = $ci_lev;

6346

last;

6347

}

6348

}

6349

6350

# reached bottom of stack .. should never happen because

6351

# only negative levels can get here, and $level was forced

6352

# to be positive above.

6353

else {

6354

warning(

6355

"program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"

6356

);

6357

report_definite_bug();

6358

last;

6359

}

6360

}

6361

}

6362

6363

# handle increasing depth

6364

if ( $level > $current_level || $ci_level > $current_ci_level ) {

6365

6366

# Compute the standard incremental whitespace. This will be

6367

# the minimum incremental whitespace that will be used. This

6368

# choice results in a smooth transition between the gnu-style

6369

# and the standard style.

6370

my $standard_increment =

6371

( $level - $current_level ) * $rOpts_indent_columns +

6372

( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation;

6373

6374

# Now we have to define how much extra incremental space

6375

# ("$available_space") we want. This extra space will be

6376

# reduced as necessary when long lines are encountered or when

6377

# it becomes clear that we do not have a good list.

6378

my $available_space = 0;

6379

my $align_paren = 0;

6380

my $excess = 0;

6381

6382

# initialization on empty stack..

6383

if ( $max_gnu_stack_index == 0 ) {

6384

$space_count = $level * $rOpts_indent_columns;

6385

}

6386

6387

# if this is a BLOCK, add the standard increment

6388

elsif ($last_nonblank_block_type) {

6389

$space_count += $standard_increment;

6390

}

6391

6392

# if last nonblank token was not structural indentation,

6393

# just use standard increment

6394

elsif ( $last_nonblank_type ne '{' ) {

6395

$space_count += $standard_increment;

6396

}

6397

6398

# otherwise use the space to the first non-blank level change token

6399

else {

6400

6401

$space_count = $gnu_position_predictor;

6402

6403

my $min_gnu_indentation =

6404

$gnu_stack[$max_gnu_stack_index]->get_SPACES();

6405

6406

$available_space = $space_count - $min_gnu_indentation;

6407

if ( $available_space >= $standard_increment ) {

6408

$min_gnu_indentation += $standard_increment;

6409

}

6410

elsif ( $available_space > 1 ) {

6411

$min_gnu_indentation += $available_space + 1;

6412

}

6413

elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) {

6414

if ( ( $tightness{$last_nonblank_token} < 2 ) ) {

6415

$min_gnu_indentation += 2;

6416

}

6417

else {

6418

$min_gnu_indentation += 1;

6419

}

6420

}

6421

else {

6422

$min_gnu_indentation += $standard_increment;

6423

}

6424

$available_space = $space_count - $min_gnu_indentation;

6425

6426

if ( $available_space < 0 ) {

6427

$space_count = $min_gnu_indentation;

6428

$available_space = 0;

6429

}

6430

$align_paren = 1;

6431

}

6432

6433

# update state, but not on a blank token

6434

if ( $types_to_go[$max_index_to_go] ne 'b' ) {

6435

6436

$gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1);

6437

6438

++$max_gnu_stack_index;

6439

$gnu_stack[$max_gnu_stack_index] =

6440

new_lp_indentation_item( $space_count, $level, $ci_level,

6441

$available_space, $align_paren );

6442

6443

# If the opening paren is beyond the half-line length, then

6444

# we will use the minimum (standard) indentation. This will

6445

# help avoid problems associated with running out of space

6446

# near the end of a line. As a result, in deeply nested

6447

# lists, there will be some indentations which are limited

6448

# to this minimum standard indentation. But the most deeply

6449

# nested container will still probably be able to shift its

6450

# parameters to the right for proper alignment, so in most

6451

# cases this will not be noticable.

6452

if ( $available_space > 0

6453

&& $space_count > $half_maximum_line_length )

6454

{

6455

$gnu_stack[$max_gnu_stack_index]

6456

->tentatively_decrease_AVAILABLE_SPACES($available_space);

6457

}

6458

}

6459

}

6460

6461

# Count commas and look for non-list characters. Once we see a

6462

# non-list character, we give up and don't look for any more commas.

6463

if ( $type eq '=>' ) {

6464

$gnu_arrow_count{$total_depth}++;

6465

6466

# tentatively treating '=>' like '=' for estimating breaks

6467

# TODO: this could use some experimentation

6468

$last_gnu_equals{$total_depth} = $max_index_to_go;

6469

}

6470

6471

elsif ( $type eq ',' ) {

6472

$gnu_comma_count{$total_depth}++;

6473

}

6474

6475

elsif ( $is_assignment{$type} ) {

6476

$last_gnu_equals{$total_depth} = $max_index_to_go;

6477

}

6478

6479

# this token might start a new line

6480

# if this is a non-blank..

6481

if ( $type ne 'b' ) {

6482

6483

# and if ..

6484

if (

6485

6486

# this is the first nonblank token of the line

6487

$max_index_to_go == 1 && $types_to_go[0] eq 'b'

6488

6489

# or previous character was one of these:

6490

|| $last_nonblank_type_to_go =~ /^([\:\?\,f])$/

6491

6492

# or previous character was opening and this does not close it

6493

|| ( $last_nonblank_type_to_go eq '{' && $type ne '}' )

6494

|| ( $last_nonblank_type_to_go eq '(' and $type ne ')' )

6495

6496

# or this token is one of these:

6497

|| $type =~ /^([\.]|\|\||\&\&)$/

6498

6499

# or this is a closing structure

6500

|| ( $last_nonblank_type_to_go eq '}'

6501

&& $last_nonblank_token_to_go eq $last_nonblank_type_to_go )

6502

6503

# or previous token was keyword 'return'

6504

|| ( $last_nonblank_type_to_go eq 'k'

6505

&& ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) )

6506

6507

# or starting a new line at certain keywords is fine

6508

|| ( $type eq 'k'

6509

&& $is_if_unless_and_or_last_next_redo_return{$token} )

6510

6511

# or this is after an assignment after a closing structure

6512

|| (

6513

$is_assignment{$last_nonblank_type_to_go}

6514

&& (

6515

$last_last_nonblank_type_to_go =~ /^[\}\)\]]$/

6516

6517

# and it is significantly to the right

6518

|| $gnu_position_predictor > $half_maximum_line_length

6519

)

6520

)

6521

)

6522

{

6523

check_for_long_gnu_style_lines();

6524

$line_start_index_to_go = $max_index_to_go;

6525

6526

# back up 1 token if we want to break before that type

6527

# otherwise, we may strand tokens like '?' or ':' on a line

6528

if ( $line_start_index_to_go > 0 ) {

6529

if ( $last_nonblank_type_to_go eq 'k' ) {

6530

6531

if ( $want_break_before{$last_nonblank_token_to_go} ) {

6532

$line_start_index_to_go--;

6533

}

6534

}

6535

elsif ( $want_break_before{$last_nonblank_type_to_go} ) {

6536

$line_start_index_to_go--;

6537

}

6538

}

6539

}

6540

}

6541

6542

# remember the predicted position of this token on the output line

6543

if ( $max_index_to_go > $line_start_index_to_go ) {

6544

$gnu_position_predictor =

6545

total_line_length( $line_start_index_to_go, $max_index_to_go );

6546

}

6547

else {

6548

$gnu_position_predictor = $space_count +

6549

token_sequence_length( $max_index_to_go, $max_index_to_go );

6550

}

6551

6552

# store the indentation object for this token

6553

# this allows us to manipulate the leading whitespace

6554

# (in case we have to reduce indentation to fit a line) without

6555

# having to change any token values

6556

$leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index];

6557

$reduced_spaces_to_go[$max_index_to_go] =

6558

( $max_gnu_stack_index > 0 && $ci_level )

6559

? $gnu_stack[ $max_gnu_stack_index - 1 ]

6560

: $gnu_stack[$max_gnu_stack_index];

6561

return;

6562

}

6563

6564

sub check_for_long_gnu_style_lines {

6565

6566

# look at the current estimated maximum line length, and

6567

# remove some whitespace if it exceeds the desired maximum

6568

6569

# this is only for the '-lp' style

6570

return unless ($rOpts_line_up_parentheses);

6571

6572

# nothing can be done if no stack items defined for this line

6573

return if ( $max_gnu_item_index == UNDEFINED_INDEX );

6574

6575

# see if we have exceeded the maximum desired line length

6576

# keep 2 extra free because they are needed in some cases

6577

# (result of trial-and-error testing)

6578

my $spaces_needed =

6579

$gnu_position_predictor - $rOpts_maximum_line_length + 2;

6580

6581

return if ( $spaces_needed < 0 );

6582

6583

# We are over the limit, so try to remove a requested number of

6584

# spaces from leading whitespace. We are only allowed to remove

6585

# from whitespace items created on this batch, since others have

6586

# already been used and cannot be undone.

6587

my @candidates = ();

6588

my $i;

6589

6590

# loop over all whitespace items created for the current batch

6591

for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {

6592

my $item = $gnu_item_list[$i];

6593

6594

# item must still be open to be a candidate (otherwise it

6595

# cannot influence the current token)

6596

next if ( $item->get_CLOSED() >= 0 );

6597

6598

my $available_spaces = $item->get_AVAILABLE_SPACES();

6599

6600

if ( $available_spaces > 0 ) {

6601

push( @candidates, [ $i, $available_spaces ] );

6602

}

6603

}

6604

6605

return unless (@candidates);

6606

6607

# sort by available whitespace so that we can remove whitespace

6608

# from the maximum available first

6609

@candidates = sort { $b->[1] <=> $a->[1] } @candidates;

6610

6611

# keep removing whitespace until we are done or have no more

6612

my $candidate;

6613

foreach $candidate (@candidates) {

6614

my ( $i, $available_spaces ) = @{$candidate};

6615

my $deleted_spaces =

6616

( $available_spaces > $spaces_needed )

6617

? $spaces_needed

6618

: $available_spaces;

6619

6620

# remove the incremental space from this item

6621

$gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces);

6622

6623

my $i_debug = $i;

6624

6625

# update the leading whitespace of this item and all items

6626

# that came after it

6627

for ( ; $i <= $max_gnu_item_index ; $i++ ) {

6628

6629

my $old_spaces = $gnu_item_list[$i]->get_SPACES();

6630

if ( $old_spaces > $deleted_spaces ) {

6631

$gnu_item_list[$i]->decrease_SPACES($deleted_spaces);

6632

}

6633

6634

# shouldn't happen except for code bug:

6635

else {

6636

my $level = $gnu_item_list[$i_debug]->get_LEVEL();

6637

my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL();

6638

my $old_level = $gnu_item_list[$i]->get_LEVEL();

6639

my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL();

6640

warning(

6641

"program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n"

6642

);

6643

report_definite_bug();

6644

}

6645

}

6646

$gnu_position_predictor -= $deleted_spaces;

6647

$spaces_needed -= $deleted_spaces;

6648

last unless ( $spaces_needed > 0 );

6649

}

6650

}

6651

6652

sub finish_lp_batch {

6653

6654

# This routine is called once after each each output stream batch is

6655

# finished to undo indentation for all incomplete -lp

6656

# indentation levels. It is too risky to leave a level open,

6657

# because then we can't backtrack in case of a long line to follow.

6658

# This means that comments and blank lines will disrupt this

6659

# indentation style. But the vertical aligner may be able to

6660

# get the space back if there are side comments.

6661

6662

# this is only for the 'lp' style

6663

return unless ($rOpts_line_up_parentheses);

6664

6665

# nothing can be done if no stack items defined for this line

6666

return if ( $max_gnu_item_index == UNDEFINED_INDEX );

6667

6668

# loop over all whitespace items created for the current batch

6669

my $i;

6670

for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) {

6671

my $item = $gnu_item_list[$i];

6672

6673

# only look for open items

6674

next if ( $item->get_CLOSED() >= 0 );

6675

6676

# Tentatively remove all of the available space

6677

# (The vertical aligner will try to get it back later)

6678

my $available_spaces = $item->get_AVAILABLE_SPACES();

6679

if ( $available_spaces > 0 ) {

6680

6681

# delete incremental space for this item

6682

$gnu_item_list[$i]

6683

->tentatively_decrease_AVAILABLE_SPACES($available_spaces);

6684

6685

# Reduce the total indentation space of any nodes that follow

6686

# Note that any such nodes must necessarily be dependents

6687

# of this node.

6688

foreach ( $i + 1 .. $max_gnu_item_index ) {

6689

$gnu_item_list[$_]->decrease_SPACES($available_spaces);

6690

}

6691

}

6692

}

6693

return;

6694

}

6695

6696

sub reduce_lp_indentation {

6697

6698

# reduce the leading whitespace at token $i if possible by $spaces_needed

6699

# (a large value of $spaces_needed will remove all excess space)

6700

# NOTE: to be called from scan_list only for a sequence of tokens

6701

# contained between opening and closing parens/braces/brackets

6702

6703

my ( $i, $spaces_wanted ) = @_;

6704

my $deleted_spaces = 0;

6705

6706

my $item = $leading_spaces_to_go[$i];

6707

my $available_spaces = $item->get_AVAILABLE_SPACES();

6708

6709

if (

6710

$available_spaces > 0

6711

&& ( ( $spaces_wanted <= $available_spaces )

6712

|| !$item->get_HAVE_CHILD() )

6713

)

6714

{

6715

6716

# we'll remove these spaces, but mark them as recoverable

6717

$deleted_spaces =

6718

$item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted);

6719

}

6720

6721

return $deleted_spaces;

6722

}

6723

6724

sub token_sequence_length {

6725

6726

# return length of tokens ($ifirst .. $ilast) including first & last

6727

# returns 0 if $ifirst > $ilast

6728

my $ifirst = shift;

6729

my $ilast = shift;

6730

return 0 if ( $ilast < 0 || $ifirst > $ilast );

6731

return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 );

6732

return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst];

6733

}

6734

6735

sub total_line_length {

6736

6737

# return length of a line of tokens ($ifirst .. $ilast)

6738

my $ifirst = shift;

6739

my $ilast = shift;

6740

if ( $ifirst < 0 ) { $ifirst = 0 }

6741

6742

return leading_spaces_to_go($ifirst) +

6743

token_sequence_length( $ifirst, $ilast );

6744

}

6745

6746

sub excess_line_length {

6747

6748

# return number of characters by which a line of tokens ($ifirst..$ilast)

6749

# exceeds the allowable line length.

6750

my $ifirst = shift;

6751

my $ilast = shift;

6752

if ( $ifirst < 0 ) { $ifirst = 0 }

6753

return leading_spaces_to_go($ifirst) +

6754

token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length;

6755

}

6756

6757

sub finish_formatting {

6758

6759

# flush buffer and write any informative messages

6760

my $self = shift;

6761

6762

flush();

6763

$file_writer_object->decrement_output_line_number()

6764

; # fix up line number since it was incremented

6765

we_are_at_the_last_line();

6766

if ( $added_semicolon_count > 0 ) {

6767

my $first = ( $added_semicolon_count > 1 ) ? "First" : "";

6768

my $what =

6769

( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was";

6770

write_logfile_entry("$added_semicolon_count $what added:\n");

6771

write_logfile_entry(

6772

" $first at input line $first_added_semicolon_at\n");

6773

6774

if ( $added_semicolon_count > 1 ) {

6775

write_logfile_entry(

6776

" Last at input line $last_added_semicolon_at\n");

6777

}

6778

write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n");

6779

write_logfile_entry("\n");

6780

}

6781

6782

if ( $deleted_semicolon_count > 0 ) {

6783

my $first = ( $deleted_semicolon_count > 1 ) ? "First" : "";

6784

my $what =

6785

( $deleted_semicolon_count > 1 )

6786

? "semicolons were"

6787

: "semicolon was";

6788

write_logfile_entry(

6789

"$deleted_semicolon_count unnecessary $what deleted:\n");

6790

write_logfile_entry(

6791

" $first at input line $first_deleted_semicolon_at\n");

6792

6793

if ( $deleted_semicolon_count > 1 ) {

6794

write_logfile_entry(

6795

" Last at input line $last_deleted_semicolon_at\n");

6796

}

6797

write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n");

6798

write_logfile_entry("\n");

6799

}

6800

6801

if ( $embedded_tab_count > 0 ) {

6802

my $first = ( $embedded_tab_count > 1 ) ? "First" : "";

6803

my $what =

6804

( $embedded_tab_count > 1 )

6805

? "quotes or patterns"

6806

: "quote or pattern";

6807

write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n");

6808

write_logfile_entry(

6809

"This means the display of this script could vary with device or software\n"

6810

);

6811

write_logfile_entry(" $first at input line $first_embedded_tab_at\n");

6812

6813

if ( $embedded_tab_count > 1 ) {

6814

write_logfile_entry(

6815

" Last at input line $last_embedded_tab_at\n");

6816

}

6817

write_logfile_entry("\n");

6818

}

6819

6820

if ($first_tabbing_disagreement) {

6821

write_logfile_entry(

6822

"First indentation disagreement seen at input line $first_tabbing_disagreement\n"

6823

);

6824

}

6825

6826

if ($in_tabbing_disagreement) {

6827

write_logfile_entry(

6828

"Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n"

6829

);

6830

}

6831

else {

6832

6833

if ($last_tabbing_disagreement) {

6834

6835

write_logfile_entry(

6836

"Last indentation disagreement seen at input line $last_tabbing_disagreement\n"

6837

);

6838

}

6839

else {

6840

write_logfile_entry("No indentation disagreement seen\n");

6841

}

6842

}

6843

write_logfile_entry("\n");

6844

6845

$vertical_aligner_object->report_anything_unusual();

6846

6847

$file_writer_object->report_line_length_errors();

6848

}

6849

6850

sub check_options {

6851

6852

# This routine is called to check the Opts hash after it is defined

6853

6854

($rOpts) = @_;

6855

my ( $tabbing_string, $tab_msg );

6856

6857

make_static_block_comment_pattern();

6858

make_static_side_comment_pattern();

6859

make_closing_side_comment_prefix();

6860

make_closing_side_comment_list_pattern();

6861

$format_skipping_pattern_begin =

6862

make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );

6863

$format_skipping_pattern_end =

6864

make_format_skipping_pattern( 'format-skipping-end', '#>>>' );

6865

6866

# If closing side comments ARE selected, then we can safely

6867

# delete old closing side comments unless closing side comment

6868

# warnings are requested. This is a good idea because it will

6869

# eliminate any old csc's which fall below the line count threshold.

6870

# We cannot do this if warnings are turned on, though, because we

6871

# might delete some text which has been added. So that must

6872

# be handled when comments are created.

6873

if ( $rOpts->{'closing-side-comments'} ) {

6874

if ( !$rOpts->{'closing-side-comment-warnings'} ) {

6875

$rOpts->{'delete-closing-side-comments'} = 1;

6876

}

6877

}

6878

6879

# If closing side comments ARE NOT selected, but warnings ARE

6880

# selected and we ARE DELETING csc's, then we will pretend to be

6881

# adding with a huge interval. This will force the comments to be

6882

# generated for comparison with the old comments, but not added.

6883

elsif ( $rOpts->{'closing-side-comment-warnings'} ) {

6884

if ( $rOpts->{'delete-closing-side-comments'} ) {

6885

$rOpts->{'delete-closing-side-comments'} = 0;

6886

$rOpts->{'closing-side-comments'} = 1;

6887

$rOpts->{'closing-side-comment-interval'} = 100000000;

6888

}

6889

}

6890

6891

make_bli_pattern();

6892

make_block_brace_vertical_tightness_pattern();

6893

6894

if ( $rOpts->{'line-up-parentheses'} ) {

6895

6896

if ( $rOpts->{'indent-only'}

6897

|| !$rOpts->{'add-newlines'}

6898

|| !$rOpts->{'delete-old-newlines'} )

6899

{

6900

warn <<EOM;

6901

-----------------------------------------------------------------------

6902

Conflict: -lp conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp

6903

6904

The -lp indentation logic requires that perltidy be able to coordinate

6905

arbitrarily large numbers of line breakpoints. This isn't possible

6906

with these flags. Sometimes an acceptable workaround is to use -wocb=3

6907

-----------------------------------------------------------------------

6908

EOM

6909

$rOpts->{'line-up-parentheses'} = 0;

6910

}

6911

}

6912

6913

# At present, tabs are not compatable with the line-up-parentheses style

6914

# (it would be possible to entab the total leading whitespace

6915

# just prior to writing the line, if desired).

6916

if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {

6917

warn <<EOM;

6918

Conflict: -t (tabs) cannot be used with the -lp option; ignoring -t; see -et.

6919

EOM

6920

$rOpts->{'tabs'} = 0;

6921

}

6922

6923

# Likewise, tabs are not compatable with outdenting..

6924

if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {

6925

warn <<EOM;

6926

Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.

6927

EOM

6928

$rOpts->{'tabs'} = 0;

6929

}

6930

6931

if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {

6932

warn <<EOM;

6933

Conflict: -t (tabs) cannot be used with the -ola option; ignoring -t; see -et.

6934

EOM

6935

$rOpts->{'tabs'} = 0;

6936

}

6937

6938

if ( !$rOpts->{'space-for-semicolon'} ) {

6939

$want_left_space{'f'} = -1;

6940

}

6941

6942

if ( $rOpts->{'space-terminal-semicolon'} ) {

6943

$want_left_space{';'} = 1;

6944

}

6945

6946

# implement outdenting preferences for keywords

6947

%outdent_keyword = ();

6948

unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) {

6949

@_ = qw(next last redo goto return); # defaults

6950

}

6951

6952

# FUTURE: if not a keyword, assume that it is an identifier

6953

foreach (@_) {

6954

if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {

6955

$outdent_keyword{$_} = 1;

6956

}

6957

else {

6958

warn "ignoring '$_' in -okwl list; not a perl keyword";

6959

}

6960

}

6961

6962

# implement user whitespace preferences

6963

if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) {

6964

@want_left_space{@_} = (1) x scalar(@_);

6965

}

6966

6967

if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) {

6968

@want_right_space{@_} = (1) x scalar(@_);

6969

}

6970

6971

if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) {

6972

@want_left_space{@_} = (-1) x scalar(@_);

6973

}

6974

6975

if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) {

6976

@want_right_space{@_} = (-1) x scalar(@_);

6977

}

6978

if ( $rOpts->{'dump-want-left-space'} ) {

6979

dump_want_left_space(*STDOUT);

6980

exit 1;

6981

}

6982

6983

if ( $rOpts->{'dump-want-right-space'} ) {

6984

dump_want_right_space(*STDOUT);

6985

exit 1;

6986

}

6987

6988

# default keywords for which space is introduced before an opening paren

6989

# (at present, including them messes up vertical alignment)

6990

@_ = qw(my local our and or err eq ne if else elsif until

6991

unless while for foreach return switch case given when);

6992

@space_after_keyword{@_} = (1) x scalar(@_);

6993

6994

# allow user to modify these defaults

6995

if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) {

6996

@space_after_keyword{@_} = (1) x scalar(@_);

6997

}

6998

6999

if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) {

7000

@space_after_keyword{@_} = (0) x scalar(@_);

7001

}

7002

7003

# implement user break preferences

7004

my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &

7005

= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=

7006

. : ? && || and or err xor

7007

);

7008

7009

my $break_after = sub {

7010

foreach my $tok (@_) {

7011

if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/:

7012

my $lbs = $left_bond_strength{$tok};

7013

my $rbs = $right_bond_strength{$tok};

7014

if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {

7015

( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =

7016

( $lbs, $rbs );

7017

}

7018

}

7019

};

7020

7021

my $break_before = sub {

7022

foreach my $tok (@_) {

7023

my $lbs = $left_bond_strength{$tok};

7024

my $rbs = $right_bond_strength{$tok};

7025

if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {

7026

( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =

7027

( $lbs, $rbs );

7028

}

7029

}

7030

};

7031

7032

$break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );

7033

$break_before->(@all_operators)

7034

if ( $rOpts->{'break-before-all-operators'} );

7035

7036

$break_after->( split_words( $rOpts->{'want-break-after'} ) );

7037

$break_before->( split_words( $rOpts->{'want-break-before'} ) );

7038

7039

# make note if breaks are before certain key types

7040

%want_break_before = ();

7041

foreach my $tok ( @all_operators, ',' ) {

7042

$want_break_before{$tok} =

7043

$left_bond_strength{$tok} < $right_bond_strength{$tok};

7044

}

7045

7046

# Coordinate ?/: breaks, which must be similar

7047

if ( !$want_break_before{':'} ) {

7048

$want_break_before{'?'} = $want_break_before{':'};

7049

$right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;

7050

$left_bond_strength{'?'} = NO_BREAK;

7051

}

7052

7053

# Define here tokens which may follow the closing brace of a do statement

7054

# on the same line, as in:

7055

# } while ( $something);

7056

@_ = qw(until while unless if ; : );

7057

push @_, ',';

7058

@is_do_follower{@_} = (1) x scalar(@_);

7059

7060

# These tokens may follow the closing brace of an if or elsif block.

7061

# In other words, for cuddled else we want code to look like:

7062

# } elsif ( $something) {

7063

# } else {

7064

if ( $rOpts->{'cuddled-else'} ) {

7065

@_ = qw(else elsif);

7066

@is_if_brace_follower{@_} = (1) x scalar(@_);

7067

}

7068

else {

7069

%is_if_brace_follower = ();

7070

}

7071

7072

# nothing can follow the closing curly of an else { } block:

7073

%is_else_brace_follower = ();

7074

7075

# what can follow a multi-line anonymous sub definition closing curly:

7076

@_ = qw# ; : => or and && || ~~ !~~ ) #;

7077

push @_, ',';

7078

@is_anon_sub_brace_follower{@_} = (1) x scalar(@_);

7079

7080

# what can follow a one-line anonynomous sub closing curly:

7081

# one-line anonumous subs also have ']' here...

7082

# see tk3.t and PP.pm

7083

@_ = qw# ; : => or and && || ) ] ~~ !~~ #;

7084

push @_, ',';

7085

@is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_);

7086

7087

# What can follow a closing curly of a block

7088

# which is not an if/elsif/else/do/sort/map/grep/eval/sub

7089

# Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'

7090

@_ = qw# ; : => or and && || ) #;

7091

push @_, ',';

7092

7093

# allow cuddled continue if cuddled else is specified

7094

if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; }

7095

7096

@is_other_brace_follower{@_} = (1) x scalar(@_);

7097

7098

$right_bond_strength{'{'} = WEAK;

7099

$left_bond_strength{'{'} = VERY_STRONG;

7100

7101

# make -l=0 equal to -l=infinite

7102

if ( !$rOpts->{'maximum-line-length'} ) {

7103

$rOpts->{'maximum-line-length'} = 1000000;

7104

}

7105

7106

# make -lbl=0 equal to -lbl=infinite

7107

if ( !$rOpts->{'long-block-line-count'} ) {

7108

$rOpts->{'long-block-line-count'} = 1000000;

7109

}

7110

7111

my $ole = $rOpts->{'output-line-ending'};

7112

if ($ole) {

7113

my %endings = (

7114

dos => "\015\012",

7115

win => "\015\012",

7116

mac => "\015",

7117

unix => "\012",

7118

);

7119

$ole = lc $ole;

7120

unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {

7121

my $str = join " ", keys %endings;

7122

die <<EOM;

7123

Unrecognized line ending '$ole'; expecting one of: $str

7124

EOM

7125

}

7126

if ( $rOpts->{'preserve-line-endings'} ) {

7127

warn "Ignoring -ple; conflicts with -ole\n";

7128

$rOpts->{'preserve-line-endings'} = undef;

7129

}

7130

}

7131

7132

# hashes used to simplify setting whitespace

7133

%tightness = (

7134

'{' => $rOpts->{'brace-tightness'},

7135

'}' => $rOpts->{'brace-tightness'},

7136

'(' => $rOpts->{'paren-tightness'},

7137

')' => $rOpts->{'paren-tightness'},

7138

'[' => $rOpts->{'square-bracket-tightness'},

7139

']' => $rOpts->{'square-bracket-tightness'},

7140

);

7141

%matching_token = (

7142

'{' => '}',

7143

'(' => ')',

7144

'[' => ']',

7145

'?' => ':',

7146

);

7147

7148

# frequently used parameters

7149

$rOpts_add_newlines = $rOpts->{'add-newlines'};

7150

$rOpts_add_whitespace = $rOpts->{'add-whitespace'};

7151

$rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};

7152

$rOpts_block_brace_vertical_tightness =

7153

$rOpts->{'block-brace-vertical-tightness'};

7154

$rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'};

7155

$rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'};

7156

$rOpts_break_at_old_ternary_breakpoints =

7157

$rOpts->{'break-at-old-ternary-breakpoints'};

7158

$rOpts_break_at_old_comma_breakpoints =

7159

$rOpts->{'break-at-old-comma-breakpoints'};

7160

$rOpts_break_at_old_keyword_breakpoints =

7161

$rOpts->{'break-at-old-keyword-breakpoints'};

7162

$rOpts_break_at_old_logical_breakpoints =

7163

$rOpts->{'break-at-old-logical-breakpoints'};

7164

$rOpts_closing_side_comment_else_flag =

7165

$rOpts->{'closing-side-comment-else-flag'};

7166

$rOpts_closing_side_comment_maximum_text =

7167

$rOpts->{'closing-side-comment-maximum-text'};

7168

$rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};

7169

$rOpts_cuddled_else = $rOpts->{'cuddled-else'};

7170

$rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};

7171

$rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};

7172

$rOpts_indent_columns = $rOpts->{'indent-columns'};

7173

$rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'};

7174

$rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'};

7175

$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};

7176

$rOpts_short_concatenation_item_length =

7177

$rOpts->{'short-concatenation-item-length'};

7178

$rOpts_swallow_optional_blank_lines =

7179

$rOpts->{'swallow-optional-blank-lines'};

7180

$rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'};

7181

$rOpts_format_skipping = $rOpts->{'format-skipping'};

7182

$rOpts_space_function_paren = $rOpts->{'space-function-paren'};

7183

$rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'};

7184

$rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'};

7185

$half_maximum_line_length = $rOpts_maximum_line_length / 2;

7186

7187

# Note that both opening and closing tokens can access the opening

7188

# and closing flags of their container types.

7189

%opening_vertical_tightness = (

7190

'(' => $rOpts->{'paren-vertical-tightness'},

7191

'{' => $rOpts->{'brace-vertical-tightness'},

7192

'[' => $rOpts->{'square-bracket-vertical-tightness'},

7193

')' => $rOpts->{'paren-vertical-tightness'},

7194

'}' => $rOpts->{'brace-vertical-tightness'},

7195

']' => $rOpts->{'square-bracket-vertical-tightness'},

7196

);

7197

7198

%closing_vertical_tightness = (

7199

'(' => $rOpts->{'paren-vertical-tightness-closing'},

7200

'{' => $rOpts->{'brace-vertical-tightness-closing'},

7201

'[' => $rOpts->{'square-bracket-vertical-tightness-closing'},

7202

')' => $rOpts->{'paren-vertical-tightness-closing'},

7203

'}' => $rOpts->{'brace-vertical-tightness-closing'},

7204

']' => $rOpts->{'square-bracket-vertical-tightness-closing'},

7205

);

7206

7207

# assume flag for '>' same as ')' for closing qw quotes

7208

%closing_token_indentation = (

7209

')' => $rOpts->{'closing-paren-indentation'},

7210

'}' => $rOpts->{'closing-brace-indentation'},

7211

']' => $rOpts->{'closing-square-bracket-indentation'},

7212

'>' => $rOpts->{'closing-paren-indentation'},

7213

);

7214

7215

%opening_token_right = (

7216

'(' => $rOpts->{'opening-paren-right'},

7217

'{' => $rOpts->{'opening-hash-brace-right'},

7218

'[' => $rOpts->{'opening-square-bracket-right'},

7219

);

7220

7221

%stack_opening_token = (

7222

'(' => $rOpts->{'stack-opening-paren'},

7223

'{' => $rOpts->{'stack-opening-hash-brace'},

7224

'[' => $rOpts->{'stack-opening-square-bracket'},

7225

);

7226

7227

%stack_closing_token = (

7228

')' => $rOpts->{'stack-closing-paren'},

7229

'}' => $rOpts->{'stack-closing-hash-brace'},

7230

']' => $rOpts->{'stack-closing-square-bracket'},

7231

);

7232

}

7233

7234

sub make_static_block_comment_pattern {

7235

7236

# create the pattern used to identify static block comments

7237

$static_block_comment_pattern = '^\s*##';

7238

7239

# allow the user to change it

7240

if ( $rOpts->{'static-block-comment-prefix'} ) {

7241

my $prefix = $rOpts->{'static-block-comment-prefix'};

7242

$prefix =~ s/^\s*//;

7243

my $pattern = $prefix;

7244

7245

# user may give leading caret to force matching left comments only

7246

if ( $prefix !~ /^\^#/ ) {

7247

if ( $prefix !~ /^#/ ) {

7248

die

7249

"ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n";

7250

}

7251

$pattern = '^\s*' . $prefix;

7252

}

7253

eval "'##'=~/$pattern/";

7254

if ($@) {

7255

die

7256

"ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n";

7257

}

7258

$static_block_comment_pattern = $pattern;

7259

}

7260

}

7261

7262

sub make_format_skipping_pattern {

7263

my ( $opt_name, $default ) = @_;

7264

my $param = $rOpts->{$opt_name};

7265

unless ($param) { $param = $default }

7266

$param =~ s/^\s*//;

7267

if ( $param !~ /^#/ ) {

7268

die "ERROR: the $opt_name parameter '$param' must begin with '#'\n";

7269

}

7270

my $pattern = '^' . $param . '\s';

7271

eval "'#'=~/$pattern/";

7272

if ($@) {

7273

die

7274

"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n";

7275

}

7276

return $pattern;

7277

}

7278

7279

sub make_closing_side_comment_list_pattern {

7280

7281

# turn any input list into a regex for recognizing selected block types

7282

$closing_side_comment_list_pattern = '^\w+';

7283

if ( defined( $rOpts->{'closing-side-comment-list'} )

7284

&& $rOpts->{'closing-side-comment-list'} )

7285

{

7286

$closing_side_comment_list_pattern =

7287

make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );

7288

}

7289

}

7290

7291

sub make_bli_pattern {

7292

7293

if ( defined( $rOpts->{'brace-left-and-indent-list'} )

7294

&& $rOpts->{'brace-left-and-indent-list'} )

7295

{

7296

$bli_list_string = $rOpts->{'brace-left-and-indent-list'};

7297

}

7298

7299

$bli_pattern = make_block_pattern( '-blil', $bli_list_string );

7300

}

7301

7302

sub make_block_brace_vertical_tightness_pattern {

7303

7304

# turn any input list into a regex for recognizing selected block types

7305

$block_brace_vertical_tightness_pattern =

7306

'^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';

7307

7308

if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )

7309

&& $rOpts->{'block-brace-vertical-tightness-list'} )

7310

{

7311

$block_brace_vertical_tightness_pattern =

7312

make_block_pattern( '-bbvtl',

7313

$rOpts->{'block-brace-vertical-tightness-list'} );

7314

}

7315

}

7316

7317

sub make_block_pattern {

7318

7319

# given a string of block-type keywords, return a regex to match them

7320

# The only tricky part is that labels are indicated with a single ':'

7321

# and the 'sub' token text may have additional text after it (name of

7322

# sub).

7323

#

7324

# Example:

7325

#

7326

# input string: "if else elsif unless while for foreach do : sub";

7327

7328

7329

my ( $abbrev, $string ) = @_;

7330

my @list = split_words($string);

7331

my @words = ();

7332

my %seen;

7333

for my $i (@list) {

7334

next if $seen{$i};

7335

$seen{$i} = 1;

7336

if ( $i eq 'sub' ) {

7337

}

7338

elsif ( $i eq ':' ) {

7339

push @words, '\w+:';

7340

}

7341

elsif ( $i =~ /^\w/ ) {

7342

push @words, $i;

7343

}

7344

else {

7345

warn "unrecognized block type $i after $abbrev, ignoring\n";

7346

}

7347

}

7348

my $pattern = '(' . join( '|', @words ) . ')$';

7349

if ( $seen{'sub'} ) {

7350

$pattern = '(' . $pattern . '|sub)';

7351

}

7352

$pattern = '^' . $pattern;

7353

return $pattern;

7354

}

7355

7356

sub make_static_side_comment_pattern {

7357

7358

# create the pattern used to identify static side comments

7359

$static_side_comment_pattern = '^##';

7360

7361

# allow the user to change it

7362

if ( $rOpts->{'static-side-comment-prefix'} ) {

7363

my $prefix = $rOpts->{'static-side-comment-prefix'};

7364

$prefix =~ s/^\s*//;

7365

my $pattern = '^' . $prefix;

7366

eval "'##'=~/$pattern/";

7367

if ($@) {

7368

die

7369

"ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n";

7370

}

7371

$static_side_comment_pattern = $pattern;

7372

}

7373

}

7374

7375

sub make_closing_side_comment_prefix {

7376

7377

# Be sure we have a valid closing side comment prefix

7378

my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};

7379

my $csc_prefix_pattern;

7380

if ( !defined($csc_prefix) ) {

7381

$csc_prefix = '## end';

7382

$csc_prefix_pattern = '^##\s+end';

7383

}

7384

else {

7385

my $test_csc_prefix = $csc_prefix;

7386

if ( $test_csc_prefix !~ /^#/ ) {

7387

$test_csc_prefix = '#' . $test_csc_prefix;

7388

}

7389

7390

# make a regex to recognize the prefix

7391

my $test_csc_prefix_pattern = $test_csc_prefix;

7392

7393

# escape any special characters

7394

$test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;

7395

7396

$test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;

7397

7398

# allow exact number of intermediate spaces to vary

7399

$test_csc_prefix_pattern =~ s/\s+/\\s\+/g;

7400

7401

# make sure we have a good pattern

7402

# if we fail this we probably have an error in escaping

7403

# characters.

7404

eval "'##'=~/$test_csc_prefix_pattern/";

7405

if ($@) {

7406

7407

# shouldn't happen..must have screwed up escaping, above

7408

report_definite_bug();

7409

warn

7410

"Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n";

7411

7412

# just warn and keep going with defaults

7413

warn "Please consider using a simpler -cscp prefix\n";

7414

warn "Using default -cscp instead; please check output\n";

7415

}

7416

else {

7417

$csc_prefix = $test_csc_prefix;

7418

$csc_prefix_pattern = $test_csc_prefix_pattern;

7419

}

7420

}

7421

$rOpts->{'closing-side-comment-prefix'} = $csc_prefix;

7422

$closing_side_comment_prefix_pattern = $csc_prefix_pattern;

7423

}

7424

7425

sub dump_want_left_space {

7426

my $fh = shift;

7427

local $" = "\n";

7428

print $fh <<EOM;

7429

These values are the main control of whitespace to the left of a token type;

7430

They may be altered with the -wls parameter.

7431

For a list of token types, use perltidy --dump-token-types (-dtt)

7432

1 means the token wants a space to its left

7433

-1 means the token does not want a space to its left

7434

------------------------------------------------------------------------

7435

EOM

7436

foreach ( sort keys %want_left_space ) {

7437

print $fh "$_\t$want_left_space{$_}\n";

7438

}

7439

}

7440

7441

sub dump_want_right_space {

7442

my $fh = shift;

7443

local $" = "\n";

7444

print $fh <<EOM;

7445

These values are the main control of whitespace to the right of a token type;

7446

They may be altered with the -wrs parameter.

7447

For a list of token types, use perltidy --dump-token-types (-dtt)

7448

1 means the token wants a space to its right

7449

-1 means the token does not want a space to its right

7450

------------------------------------------------------------------------

7451

EOM

7452

foreach ( sort keys %want_right_space ) {

7453

print $fh "$_\t$want_right_space{$_}\n";

7454

}

7455

}

7456

7457

{ # begin is_essential_whitespace

7458

7459

my %is_sort_grep_map;

7460

my %is_for_foreach;

7461

7462

BEGIN {

7463

7464

@_ = qw(sort grep map);

7465

@is_sort_grep_map{@_} = (1) x scalar(@_);

7466

7467

@_ = qw(for foreach);

7468

@is_for_foreach{@_} = (1) x scalar(@_);

7469

7470

}

7471

7472

sub is_essential_whitespace {

7473

7474

# Essential whitespace means whitespace which cannot be safely deleted

7475

# without risking the introduction of a syntax error.

7476

# We are given three tokens and their types:

7477

# ($tokenl, $typel) is the token to the left of the space in question

7478

# ($tokenr, $typer) is the token to the right of the space in question

7479

# ($tokenll, $typell) is previous nonblank token to the left of $tokenl

7480

#

7481

# This is a slow routine but is not needed too often except when -mangle

7482

# is used.

7483

#

7484

# Note: This routine should almost never need to be changed. It is

7485

# for avoiding syntax problems rather than for formatting.

7486

my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;

7487

7488

my $result =

7489

7490

# never combine two bare words or numbers

7491

# examples: and ::ok(1)

7492

# return ::spw(...)

7493

# for bla::bla:: abc

7494

# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl

7495

# $input eq"quit" to make $inputeq"quit"

7496

# my $size=-s::SINK if $file; <==OK but we won't do it

7497

# don't join something like: for bla::bla:: abc

7498

# example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl

7499

( ( $tokenl =~ /([\'\w]|\:\:)$/ ) && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )

7500

7501

# do not combine a number with a concatination dot

7502

# example: pom.caputo:

7503

# $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");

7504

|| ( ( $typel eq 'n' ) && ( $tokenr eq '.' ) )

7505

|| ( ( $typer eq 'n' ) && ( $tokenl eq '.' ) )

7506

7507

# do not join a minus with a bare word, because you might form

7508

# a file test operator. Example from Complex.pm:

7509

# if (CORE::abs($z - i) < $eps); "z-i" would be taken as a file test.

7510

|| ( ( $tokenl eq '-' ) && ( $tokenr =~ /^[_A-Za-z]$/ ) )

7511

7512

# and something like this could become ambiguous without space

7513

# after the '-':

7514

# use constant III=>1;

7515

# $a = $b - III;

7516

# and even this:

7517

# $a = - III;

7518

|| ( ( $tokenl eq '-' )

7519

&& ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) )

7520

7521

# '= -' should not become =- or you will get a warning

7522

# about reversed -=

7523

# || ($tokenr eq '-')

7524

7525

# keep a space between a quote and a bareword to prevent the

7526

# bareword from becomming a quote modifier.

7527

|| ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )

7528

7529

# keep a space between a token ending in '$' and any word;

7530

# this caused trouble: "die @$ if $@"

7531

|| ( ( $typel eq 'i' && $tokenl =~ /\$$/ )

7532

&& ( $tokenr =~ /^[a-zA-Z_]/ ) )

7533

7534

# perl is very fussy about spaces before <<

7535

|| ( $tokenr =~ /^\<\</ )

7536

7537

# avoid combining tokens to create new meanings. Example:

7538

# $a+ +$b must not become $a++$b

7539

|| ( $is_digraph{ $tokenl . $tokenr } )

7540

|| ( $is_trigraph{ $tokenl . $tokenr } )

7541

7542

# another example: do not combine these two &'s:

7543

# allow_options & &OPT_EXECCGI

7544

|| ( $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) } )

7545

7546

# don't combine $$ or $# with any alphanumeric

7547

# (testfile mangle.t with --mangle)

7548

|| ( ( $tokenl =~ /^\$[\$\#]$/ ) && ( $tokenr =~ /^\w/ ) )

7549

7550

# retain any space after possible filehandle

7551

# (testfiles prnterr1.t with --extrude and mangle.t with --mangle)

7552

|| ( $typel eq 'Z' )

7553

7554

# Perl is sensitive to whitespace after the + here:

7555

# $b = xvals $a + 0.1 * yvals $a;

7556

|| ( $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/ )

7557

7558

# keep paren separate in 'use Foo::Bar ()'

7559

|| ( $tokenr eq '('

7560

&& $typel eq 'w'

7561

&& $typell eq 'k'

7562

&& $tokenll eq 'use' )

7563

7564

# keep any space between filehandle and paren:

7565

# file mangle.t with --mangle:

7566

|| ( $typel eq 'Y' && $tokenr eq '(' )

7567

7568

# retain any space after here doc operator ( hereerr.t)

7569

|| ( $typel eq 'h' )

7570

7571

# be careful with a space around ++ and --, to avoid ambiguity as to

7572

# which token it applies

7573

|| ( ( $typer =~ /^(pp|mm)$/ ) && ( $tokenl !~ /^[\;\{\(\[]/ ) )

7574

|| ( ( $typel =~ /^(\+\+|\-\-)$/ ) && ( $tokenr !~ /^[\;\}\)\]]/ ) )

7575

7576

# need space after foreach my; for example, this will fail in

7577

# older versions of Perl:

7578

# foreach my$ft(@filetypes)...

7579

|| (

7580

$tokenl eq 'my'

7581

7582

# /^(for|foreach)$/

7583

&& $is_for_foreach{$tokenll}

7584

&& $tokenr =~ /^\$/

7585

)

7586

7587

# must have space between grep and left paren; "grep(" will fail

7588

|| ( $tokenr eq '(' && $is_sort_grep_map{$tokenl} )

7589

7590

# don't stick numbers next to left parens, as in:

7591

#use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)

7592

|| ( ( $typel eq 'n' ) && ( $tokenr eq '(' ) )

7593

7594

# We must be sure that a space between a ? and a quoted string

7595

# remains if the space before the ? remains. [Loca.pm, lockarea]

7596

# ie,

7597

# $b=join $comma ? ',' : ':', @_; # ok

7598

# $b=join $comma?',' : ':', @_; # ok!

7599

# $b=join $comma ?',' : ':', @_; # error!

7600

# Not really required:

7601

## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )

7602

7603

# do not remove space between an '&' and a bare word because

7604

# it may turn into a function evaluation, like here

7605

# between '&' and 'O_ACCMODE', producing a syntax error [File.pm]

7606

# $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);

7607

|| ( ( $typel eq '&' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) )

7608

7609

; # the value of this long logic sequence is the result we want

7610

return $result;

7611

}

7612

}

7613

7614

sub set_white_space_flag {

7615

7616

# This routine examines each pair of nonblank tokens and

7617

# sets values for array @white_space_flag.

7618

#

7619

# $white_space_flag[$j] is a flag indicating whether a white space

7620

# BEFORE token $j is needed, with the following values:

7621

#

7622

# -1 do not want a space before token $j

7623

# 0 optional space or $j is a whitespace

7624

# 1 want a space before token $j

7625

#

7626

#

7627

# The values for the first token will be defined based

7628

# upon the contents of the "to_go" output array.

7629

#

7630

# Note: retain debug print statements because they are usually

7631

# required after adding new token types.

7632

7633

BEGIN {

7634

7635

# initialize these global hashes, which control the use of

7636

# whitespace around tokens:

7637

#

7638

# %binary_ws_rules

7639

# %want_left_space

7640

# %want_right_space

7641

# %space_after_keyword

7642

#

7643

# Many token types are identical to the tokens themselves.

7644

# See the tokenizer for a complete list. Here are some special types:

7645

# k = perl keyword

7646

# f = semicolon in for statement

7647

# m = unary minus

7648

# p = unary plus

7649

# Note that :: is excluded since it should be contained in an identifier

7650

# Note that '->' is excluded because it never gets space

7651

# parentheses and brackets are excluded since they are handled specially

7652

# curly braces are included but may be overridden by logic, such as

7653

# newline logic.

7654

7655

# NEW_TOKENS: create a whitespace rule here. This can be as

7656

# simple as adding your new letter to @spaces_both_sides, for

7657

# example.

7658

7659

@_ = qw" L { ( [ ";

7660

@is_opening_type{@_} = (1) x scalar(@_);

7661

7662

@_ = qw" R } ) ] ";

7663

@is_closing_type{@_} = (1) x scalar(@_);

7664

7665

my @spaces_both_sides = qw"

7666

+ - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=

7667

.= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~

7668

&&= ||= //= <=> A k f w F n C Y U G v

7669

";

7670

7671

my @spaces_left_side = qw"

7672

t ! ~ m p { \ h pp mm Z j

7673

";

7674

push( @spaces_left_side, '#' ); # avoids warning message

7675

7676

my @spaces_right_side = qw"

7677

; } ) ] R J ++ -- **=

7678

";

7679

push( @spaces_right_side, ',' ); # avoids warning message

7680

@want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides);

7681

@want_right_space{@spaces_both_sides} =

7682

(1) x scalar(@spaces_both_sides);

7683

@want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side);

7684

@want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side);

7685

@want_left_space{@spaces_right_side} =

7686

(-1) x scalar(@spaces_right_side);

7687

@want_right_space{@spaces_right_side} =

7688

(1) x scalar(@spaces_right_side);

7689

$want_left_space{'L'} = WS_NO;

7690

$want_left_space{'->'} = WS_NO;

7691

$want_right_space{'->'} = WS_NO;

7692

$want_left_space{'**'} = WS_NO;

7693

$want_right_space{'**'} = WS_NO;

7694

7695

# hash type information must stay tightly bound

7696

# as in : ${xxxx}

7697

$binary_ws_rules{'i'}{'L'} = WS_NO;

7698

$binary_ws_rules{'i'}{'{'} = WS_YES;

7699

$binary_ws_rules{'k'}{'{'} = WS_YES;

7700

$binary_ws_rules{'U'}{'{'} = WS_YES;

7701

$binary_ws_rules{'i'}{'['} = WS_NO;

7702

$binary_ws_rules{'R'}{'L'} = WS_NO;

7703

$binary_ws_rules{'R'}{'{'} = WS_NO;

7704

$binary_ws_rules{'t'}{'L'} = WS_NO;

7705

$binary_ws_rules{'t'}{'{'} = WS_NO;

7706

$binary_ws_rules{'}'}{'L'} = WS_NO;

7707

$binary_ws_rules{'}'}{'{'} = WS_NO;

7708

$binary_ws_rules{'$'}{'L'} = WS_NO;

7709

$binary_ws_rules{'$'}{'{'} = WS_NO;

7710

$binary_ws_rules{'@'}{'L'} = WS_NO;

7711

$binary_ws_rules{'@'}{'{'} = WS_NO;

7712

$binary_ws_rules{'='}{'L'} = WS_YES;

7713

7714

# the following includes ') {'

7715

# as in : if ( xxx ) { yyy }

7716

$binary_ws_rules{']'}{'L'} = WS_NO;

7717

$binary_ws_rules{']'}{'{'} = WS_NO;

7718

$binary_ws_rules{')'}{'{'} = WS_YES;

7719

$binary_ws_rules{')'}{'['} = WS_NO;

7720

$binary_ws_rules{']'}{'['} = WS_NO;

7721

$binary_ws_rules{']'}{'{'} = WS_NO;

7722

$binary_ws_rules{'}'}{'['} = WS_NO;

7723

$binary_ws_rules{'R'}{'['} = WS_NO;

7724

7725

$binary_ws_rules{']'}{'++'} = WS_NO;

7726

$binary_ws_rules{']'}{'--'} = WS_NO;

7727

$binary_ws_rules{')'}{'++'} = WS_NO;

7728

$binary_ws_rules{')'}{'--'} = WS_NO;

7729

7730

$binary_ws_rules{'R'}{'++'} = WS_NO;

7731

$binary_ws_rules{'R'}{'--'} = WS_NO;

7732

7733

########################################################

7734

# should no longer be necessary (see niek.pl)

7735

##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label

7736

##$binary_ws_rules{'w'}{':'} = WS_NO;

7737

########################################################

7738

$binary_ws_rules{'i'}{'Q'} = WS_YES;

7739

$binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()'

7740

7741

# FIXME: we need to split 'i' into variables and functions

7742

# and have no space for functions but space for variables. For now,

7743

# I have a special patch in the special rules below

7744

$binary_ws_rules{'i'}{'('} = WS_NO;

7745

7746

$binary_ws_rules{'w'}{'('} = WS_NO;

7747

$binary_ws_rules{'w'}{'{'} = WS_YES;

7748

}

7749

my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_;

7750

my ( $last_token, $last_type, $last_block_type, $token, $type,

7751

$block_type );

7752

my (@white_space_flag);

7753

my $j_tight_closing_paren = -1;

7754

7755

if ( $max_index_to_go >= 0 ) {

7756

$token = $tokens_to_go[$max_index_to_go];

7757

$type = $types_to_go[$max_index_to_go];

7758

$block_type = $block_type_to_go[$max_index_to_go];

7759

}

7760

else {

7761

$token = ' ';

7762

$type = 'b';

7763

$block_type = '';

7764

}

7765

7766

# loop over all tokens

7767

my ( $j, $ws );

7768

7769

for ( $j = 0 ; $j <= $jmax ; $j++ ) {

7770

7771

if ( $$rtoken_type[$j] eq 'b' ) {

7772

$white_space_flag[$j] = WS_OPTIONAL;

7773

next;

7774

}

7775

7776

# set a default value, to be changed as needed

7777

$ws = undef;

7778

$last_token = $token;

7779

$last_type = $type;

7780

$last_block_type = $block_type;

7781

$token = $$rtokens[$j];

7782

$type = $$rtoken_type[$j];

7783

$block_type = $$rblock_type[$j];

7784

7785

#---------------------------------------------------------------

7786

# section 1:

7787

# handle space on the inside of opening braces

7788

#---------------------------------------------------------------

7789

7790

# /^[L\{\(\[]$/

7791

if ( $is_opening_type{$last_type} ) {

7792

7793

$j_tight_closing_paren = -1;

7794

7795

# let's keep empty matched braces together: () {} []

7796

# except for BLOCKS

7797

if ( $token eq $matching_token{$last_token} ) {

7798

if ($block_type) {

7799

$ws = WS_YES;

7800

}

7801

else {

7802

$ws = WS_NO;

7803

}

7804

}

7805

else {

7806

7807

# we're considering the right of an opening brace

7808

# tightness = 0 means always pad inside with space

7809

# tightness = 1 means pad inside if "complex"

7810

# tightness = 2 means never pad inside with space

7811

7812

my $tightness;

7813

if ( $last_type eq '{'

7814

&& $last_token eq '{'

7815

&& $last_block_type )

7816

{

7817

$tightness = $rOpts_block_brace_tightness;

7818

}

7819

else { $tightness = $tightness{$last_token} }

7820

7821

if ( $tightness <= 0 ) {

7822

$ws = WS_YES;

7823

}

7824

elsif ( $tightness > 1 ) {

7825

$ws = WS_NO;

7826

}

7827

else {

7828

7829

# Patch to count '-foo' as single token so that

7830

# each of $a{-foo} and $a{foo} and $a{'foo'} do

7831

# not get spaces with default formatting.

7832

my $j_here = $j;

7833

++$j_here

7834

if ( $token eq '-'

7835

&& $last_token eq '{'

7836

&& $$rtoken_type[ $j + 1 ] eq 'w' );

7837

7838

# $j_next is where a closing token should be if

7839

# the container has a single token

7840

my $j_next =

7841

( $$rtoken_type[ $j_here + 1 ] eq 'b' )

7842

? $j_here + 2

7843

: $j_here + 1;

7844

my $tok_next = $$rtokens[$j_next];

7845

my $type_next = $$rtoken_type[$j_next];

7846

7847

# for tightness = 1, if there is just one token

7848

# within the matching pair, we will keep it tight

7849

if (

7850

$tok_next eq $matching_token{$last_token}

7851

7852

# but watch out for this: [ [ ] (misc.t)

7853

&& $last_token ne $token

7854

)

7855

{

7856

7857

# remember where to put the space for the closing paren

7858

$j_tight_closing_paren = $j_next;

7859

$ws = WS_NO;

7860

}

7861

else {

7862

$ws = WS_YES;

7863

}

7864

}

7865

}

7866

} # done with opening braces and brackets

7867

my $ws_1 = $ws

7868

if FORMATTER_DEBUG_FLAG_WHITE;

7869

7870

#---------------------------------------------------------------

7871

# section 2:

7872

# handle space on inside of closing brace pairs

7873

#---------------------------------------------------------------

7874

7875

# /[\}\)\]R]/

7876

if ( $is_closing_type{$type} ) {

7877

7878

if ( $j == $j_tight_closing_paren ) {

7879

7880

$j_tight_closing_paren = -1;

7881

$ws = WS_NO;

7882

}

7883

else {

7884

7885

if ( !defined($ws) ) {

7886

7887

my $tightness;

7888

if ( $type eq '}' && $token eq '}' && $block_type ) {

7889

$tightness = $rOpts_block_brace_tightness;

7890

}

7891

else { $tightness = $tightness{$token} }

7892

7893

$ws = ( $tightness > 1 ) ? WS_NO : WS_YES;

7894

}

7895

}

7896

}

7897

7898

my $ws_2 = $ws

7899

if FORMATTER_DEBUG_FLAG_WHITE;

7900

7901

#---------------------------------------------------------------

7902

# section 3:

7903

# use the binary table

7904

#---------------------------------------------------------------

7905

if ( !defined($ws) ) {

7906

$ws = $binary_ws_rules{$last_type}{$type};

7907

}

7908

my $ws_3 = $ws

7909

if FORMATTER_DEBUG_FLAG_WHITE;

7910

7911

#---------------------------------------------------------------

7912

# section 4:

7913

# some special cases

7914

#---------------------------------------------------------------

7915

if ( $token eq '(' ) {

7916

7917

# This will have to be tweaked as tokenization changes.

7918

# We usually want a space at '} (', for example:

7919

# map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );

7920

#

7921

# But not others:

7922

# &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );

7923

# At present, the above & block is marked as type L/R so this case

7924

# won't go through here.

7925

if ( $last_type eq '}' ) { $ws = WS_YES }

7926

7927

# NOTE: some older versions of Perl had occasional problems if

7928

# spaces are introduced between keywords or functions and opening

7929

# parens. So the default is not to do this except is certain

7930

# cases. The current Perl seems to tolerate spaces.

7931

7932

# Space between keyword and '('

7933

elsif ( $last_type eq 'k' ) {

7934

$ws = WS_NO

7935

unless ( $rOpts_space_keyword_paren

7936

|| $space_after_keyword{$last_token} );

7937

}

7938

7939

# Space between function and '('

7940

# -----------------------------------------------------

7941

# 'w' and 'i' checks for something like:

7942

# myfun( &myfun( ->myfun(

7943

# -----------------------------------------------------

7944

elsif (( $last_type =~ /^[wU]$/ )

7945

|| ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )

7946

{

7947

$ws = WS_NO unless ($rOpts_space_function_paren);

7948

}

7949

7950

# space between something like $i and ( in

7951

# for $i ( 0 .. 20 ) {

7952

# FIXME: eventually, type 'i' needs to be split into multiple

7953

# token types so this can be a hardwired rule.

7954

elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {

7955

$ws = WS_YES;

7956

}

7957

7958

# allow constant function followed by '()' to retain no space

7959

elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) {

7960

$ws = WS_NO;

7961

}

7962

}

7963

7964

# patch for SWITCH/CASE: make space at ']{' optional

7965

# since the '{' might begin a case or when block

7966

elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {

7967

$ws = WS_OPTIONAL;

7968

}

7969

7970

# keep space between 'sub' and '{' for anonymous sub definition

7971

if ( $type eq '{' ) {

7972

if ( $last_token eq 'sub' ) {

7973

$ws = WS_YES;

7974

}

7975

7976

# this is needed to avoid no space in '){'

7977

if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }

7978

7979

# avoid any space before the brace or bracket in something like

7980

# @opts{'a','b',...}

7981

if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {

7982

$ws = WS_NO;

7983

}

7984

}

7985

7986

elsif ( $type eq 'i' ) {

7987

7988

# never a space before ->

7989

if ( $token =~ /^\-\>/ ) {

7990

$ws = WS_NO;

7991

}

7992

}

7993

7994

# retain any space between '-' and bare word

7995

elsif ( $type eq 'w' || $type eq 'C' ) {

7996

$ws = WS_OPTIONAL if $last_type eq '-';

7997

7998

# never a space before ->

7999

if ( $token =~ /^\-\>/ ) {

8000

$ws = WS_NO;

8001

}

8002

}

8003

8004

# retain any space between '-' and bare word

8005

# example: avoid space between 'USER' and '-' here:

8006

# $myhash{USER-NAME}='steve';

8007

elsif ( $type eq 'm' || $type eq '-' ) {

8008

$ws = WS_OPTIONAL if ( $last_type eq 'w' );

8009

}

8010

8011

# always space before side comment

8012

elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }

8013

8014

# always preserver whatever space was used after a possible

8015

# filehandle (except _) or here doc operator

8016

if (

8017

$type ne '#'

8018

&& ( ( $last_type eq 'Z' && $last_token ne '_' )

8019

|| $last_type eq 'h' )

8020

)

8021

{

8022

$ws = WS_OPTIONAL;

8023

}

8024

8025

my $ws_4 = $ws

8026

if FORMATTER_DEBUG_FLAG_WHITE;

8027

8028

#---------------------------------------------------------------

8029

# section 5:

8030

# default rules not covered above

8031

#---------------------------------------------------------------

8032

# if we fall through to here,

8033

# look at the pre-defined hash tables for the two tokens, and

8034

# if (they are equal) use the common value

8035

# if (either is zero or undef) use the other

8036

# if (either is -1) use it

8037

# That is,

8038

# left vs right

8039

# 1 vs 1 --> 1

8040

# 0 vs 0 --> 0

8041

# -1 vs -1 --> -1

8042

#

8043

# 0 vs -1 --> -1

8044

# 0 vs 1 --> 1

8045

# 1 vs 0 --> 1

8046

# -1 vs 0 --> -1

8047

#

8048

# -1 vs 1 --> -1

8049

# 1 vs -1 --> -1

8050

if ( !defined($ws) ) {

8051

my $wl = $want_left_space{$type};

8052

my $wr = $want_right_space{$last_type};

8053

if ( !defined($wl) ) { $wl = 0 }

8054

if ( !defined($wr) ) { $wr = 0 }

8055

$ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;

8056

}

8057

8058

if ( !defined($ws) ) {

8059

$ws = 0;

8060

write_diagnostics(

8061

"WS flag is undefined for tokens $last_token $token\n");

8062

}

8063

8064

# Treat newline as a whitespace. Otherwise, we might combine

8065

# 'Send' and '-recipients' here according to the above rules:

8066

# my $msg = new Fax::Send

8067

# -recipients => $to,

8068

# -data => $data;

8069

if ( $ws == 0 && $j == 0 ) { $ws = 1 }

8070

8071

if ( ( $ws == 0 )

8072

&& $j > 0

8073

&& $j < $jmax

8074

&& ( $last_type !~ /^[Zh]$/ ) )

8075

{

8076

8077

# If this happens, we have a non-fatal but undesirable

8078

# hole in the above rules which should be patched.

8079

write_diagnostics(

8080

"WS flag is zero for tokens $last_token $token\n");

8081

}

8082

$white_space_flag[$j] = $ws;

8083

8084

FORMATTER_DEBUG_FLAG_WHITE && do {

8085

my $str = substr( $last_token, 0, 15 );

8086

$str .= ' ' x ( 16 - length($str) );

8087

if ( !defined($ws_1) ) { $ws_1 = "*" }

8088

if ( !defined($ws_2) ) { $ws_2 = "*" }

8089

if ( !defined($ws_3) ) { $ws_3 = "*" }

8090

if ( !defined($ws_4) ) { $ws_4 = "*" }

8091

print

8092

"WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";

8093

};

8094

}

8095

return \@white_space_flag;

8096

}

8097

8098

{ # begin print_line_of_tokens

8099

8100

my $rtoken_type;

8101

my $rtokens;

8102

my $rlevels;

8103

my $rslevels;

8104

my $rblock_type;

8105

my $rcontainer_type;

8106

my $rcontainer_environment;

8107

my $rtype_sequence;

8108

my $input_line;

8109

my $rnesting_tokens;

8110

my $rci_levels;

8111

my $rnesting_blocks;

8112

8113

my $in_quote;

8114

my $python_indentation_level;

8115

8116

# These local token variables are stored by store_token_to_go:

8117

my $block_type;

8118

my $ci_level;

8119

my $container_environment;

8120

my $container_type;

8121

my $in_continued_quote;

8122

my $level;

8123

my $nesting_blocks;

8124

my $no_internal_newlines;

8125

my $slevel;

8126

my $token;

8127

my $type;

8128

my $type_sequence;

8129

8130

# routine to pull the jth token from the line of tokens

8131

sub extract_token {

8132

my $j = shift;

8133

$token = $$rtokens[$j];

8134

$type = $$rtoken_type[$j];

8135

$block_type = $$rblock_type[$j];

8136

$container_type = $$rcontainer_type[$j];

8137

$container_environment = $$rcontainer_environment[$j];

8138

$type_sequence = $$rtype_sequence[$j];

8139

$level = $$rlevels[$j];

8140

$slevel = $$rslevels[$j];

8141

$nesting_blocks = $$rnesting_blocks[$j];

8142

$ci_level = $$rci_levels[$j];

8143

}

8144

8145

{

8146

my @saved_token;

8147

8148

sub save_current_token {

8149

8150

@saved_token = (

8151

$block_type, $ci_level,

8152

$container_environment, $container_type,

8153

$in_continued_quote, $level,

8154

$nesting_blocks, $no_internal_newlines,

8155

$slevel, $token,

8156

$type, $type_sequence,

8157

);

8158

}

8159

8160

sub restore_current_token {

8161

(

8162

$block_type, $ci_level,

8163

$container_environment, $container_type,

8164

$in_continued_quote, $level,

8165

$nesting_blocks, $no_internal_newlines,

8166

$slevel, $token,

8167

$type, $type_sequence,

8168

) = @saved_token;

8169

}

8170

}

8171

8172

# Routine to place the current token into the output stream.

8173

# Called once per output token.

8174

sub store_token_to_go {

8175

8176

my $flag = $no_internal_newlines;

8177

if ( $_[0] ) { $flag = 1 }

8178

8179

$tokens_to_go[ ++$max_index_to_go ] = $token;

8180

$types_to_go[$max_index_to_go] = $type;

8181

$nobreak_to_go[$max_index_to_go] = $flag;

8182

$old_breakpoint_to_go[$max_index_to_go] = 0;

8183

$forced_breakpoint_to_go[$max_index_to_go] = 0;

8184

$block_type_to_go[$max_index_to_go] = $block_type;

8185

$type_sequence_to_go[$max_index_to_go] = $type_sequence;

8186

$container_environment_to_go[$max_index_to_go] = $container_environment;

8187

$nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks;

8188

$ci_levels_to_go[$max_index_to_go] = $ci_level;

8189

$mate_index_to_go[$max_index_to_go] = -1;

8190

$matching_token_to_go[$max_index_to_go] = '';

8191

$bond_strength_to_go[$max_index_to_go] = 0;

8192

8193

# Note: negative levels are currently retained as a diagnostic so that

8194

# the 'final indentation level' is correctly reported for bad scripts.

8195

# But this means that every use of $level as an index must be checked.

8196

# If this becomes too much of a problem, we might give up and just clip

8197

# them at zero.

8198

## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0;

8199

$levels_to_go[$max_index_to_go] = $level;

8200

$nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0;

8201

$lengths_to_go[ $max_index_to_go + 1 ] =

8202

$lengths_to_go[$max_index_to_go] + length($token);

8203

8204

# Define the indentation that this token would have if it started

8205

# a new line. We have to do this now because we need to know this

8206

# when considering one-line blocks.

8207

set_leading_whitespace( $level, $ci_level, $in_continued_quote );

8208

8209

if ( $type ne 'b' ) {

8210

$last_last_nonblank_index_to_go = $last_nonblank_index_to_go;

8211

$last_last_nonblank_type_to_go = $last_nonblank_type_to_go;

8212

$last_last_nonblank_token_to_go = $last_nonblank_token_to_go;

8213

$last_nonblank_index_to_go = $max_index_to_go;

8214

$last_nonblank_type_to_go = $type;

8215

$last_nonblank_token_to_go = $token;

8216

if ( $type eq ',' ) {

8217

$comma_count_in_batch++;

8218

}

8219

}

8220

8221

FORMATTER_DEBUG_FLAG_STORE && do {

8222

my ( $a, $b, $c ) = caller();

8223

print

8224

"STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n";

8225

};

8226

}

8227

8228

sub insert_new_token_to_go {

8229

8230

# insert a new token into the output stream. use same level as

8231

# previous token; assumes a character at max_index_to_go.

8232

save_current_token();

8233

( $token, $type, $slevel, $no_internal_newlines ) = @_;

8234

8235

if ( $max_index_to_go == UNDEFINED_INDEX ) {

8236

warning("code bug: bad call to insert_new_token_to_go\n");

8237

}

8238

$level = $levels_to_go[$max_index_to_go];

8239

8240

# FIXME: it seems to be necessary to use the next, rather than

8241

# previous, value of this variable when creating a new blank (align.t)

8242

#my $slevel = $nesting_depth_to_go[$max_index_to_go];

8243

$nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];

8244

$ci_level = $ci_levels_to_go[$max_index_to_go];

8245

$container_environment = $container_environment_to_go[$max_index_to_go];

8246

$in_continued_quote = 0;

8247

$block_type = "";

8248

$type_sequence = "";

8249

store_token_to_go();

8250

restore_current_token();

8251

return;

8252

}

8253

8254

sub print_line_of_tokens {

8255

8256

my $line_of_tokens = shift;

8257

8258

# This routine is called once per input line to process all of

8259

# the tokens on that line. This is the first stage of

8260

# beautification.

8261

#

8262

# Full-line comments and blank lines may be processed immediately.

8263

#

8264

# For normal lines of code, the tokens are stored one-by-one,

8265

# via calls to 'sub store_token_to_go', until a known line break

8266

# point is reached. Then, the batch of collected tokens is

8267

# passed along to 'sub output_line_to_go' for further

8268

# processing. This routine decides if there should be

8269

# whitespace between each pair of non-white tokens, so later

8270

# routines only need to decide on any additional line breaks.

8271

# Any whitespace is initally a single space character. Later,

8272

# the vertical aligner may expand that to be multiple space

8273

# characters if necessary for alignment.

8274

8275

# extract input line number for error messages

8276

$input_line_number = $line_of_tokens->{_line_number};

8277

8278

$rtoken_type = $line_of_tokens->{_rtoken_type};

8279

$rtokens = $line_of_tokens->{_rtokens};

8280

$rlevels = $line_of_tokens->{_rlevels};

8281

$rslevels = $line_of_tokens->{_rslevels};

8282

$rblock_type = $line_of_tokens->{_rblock_type};

8283

$rcontainer_type = $line_of_tokens->{_rcontainer_type};

8284

$rcontainer_environment = $line_of_tokens->{_rcontainer_environment};

8285

$rtype_sequence = $line_of_tokens->{_rtype_sequence};

8286

$input_line = $line_of_tokens->{_line_text};

8287

$rnesting_tokens = $line_of_tokens->{_rnesting_tokens};

8288

$rci_levels = $line_of_tokens->{_rci_levels};

8289

$rnesting_blocks = $line_of_tokens->{_rnesting_blocks};

8290

8291

$in_continued_quote = $starting_in_quote =

8292

$line_of_tokens->{_starting_in_quote};

8293

$in_quote = $line_of_tokens->{_ending_in_quote};

8294

$ending_in_quote = $in_quote;

8295

$python_indentation_level =

8296

$line_of_tokens->{_python_indentation_level};

8297

8298

my $j;

8299

my $j_next;

8300

my $jmax;

8301

my $next_nonblank_token;

8302

my $next_nonblank_token_type;

8303

my $rwhite_space_flag;

8304

8305

$jmax = @$rtokens - 1;

8306

$block_type = "";

8307

$container_type = "";

8308

$container_environment = "";

8309

$type_sequence = "";

8310

$no_internal_newlines = 1 - $rOpts_add_newlines;

8311

$is_static_block_comment = 0;

8312

8313

# Handle a continued quote..

8314

if ($in_continued_quote) {

8315

8316

# A line which is entirely a quote or pattern must go out

8317

# verbatim. Note: the \n is contained in $input_line.

8318

if ( $jmax <= 0 ) {

8319

if ( ( $input_line =~ "\t" ) ) {

8320

note_embedded_tab();

8321

}

8322

write_unindented_line("$input_line");

8323

$last_line_had_side_comment = 0;

8324

return;

8325

}

8326

8327

# prior to version 20010406, perltidy had a bug which placed

8328

# continuation indentation before the last line of some multiline

8329

# quotes and patterns -- exactly the lines passing this way.

8330

# To help find affected lines in scripts run with these

8331

# versions, run with '-chk', and it will warn of any quotes or

8332

# patterns which might have been modified by these early

8333

# versions.

8334

if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) {

8335

warning(

8336

"-chk: please check this line for extra leading whitespace\n"

8337

);

8338

}

8339

}

8340

8341

# Write line verbatim if we are in a formatting skip section

8342

if ($in_format_skipping_section) {

8343

write_unindented_line("$input_line");

8344

$last_line_had_side_comment = 0;

8345

8346

# Note: extra space appended to comment simplifies pattern matching

8347

if ( $jmax == 0

8348

&& $$rtoken_type[0] eq '#'

8349

&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o )

8350

{

8351

$in_format_skipping_section = 0;

8352

write_logfile_entry("Exiting formatting skip section\n");

8353

}

8354

return;

8355

}

8356

8357

# See if we are entering a formatting skip section

8358

if ( $rOpts_format_skipping

8359

&& $jmax == 0

8360

&& $$rtoken_type[0] eq '#'

8361

&& ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o )

8362

{

8363

flush();

8364

$in_format_skipping_section = 1;

8365

write_logfile_entry("Entering formatting skip section\n");

8366

write_unindented_line("$input_line");

8367

$last_line_had_side_comment = 0;

8368

return;

8369

}

8370

8371

# delete trailing blank tokens

8372

if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- }

8373

8374

# Handle a blank line..

8375

if ( $jmax < 0 ) {

8376

8377

# For the 'swallow-optional-blank-lines' option, we delete all

8378

# old blank lines and let the blank line rules generate any

8379

# needed blanks.

8380

if ( !$rOpts_swallow_optional_blank_lines ) {

8381

flush();

8382

$file_writer_object->write_blank_code_line();

8383

$last_line_leading_type = 'b';

8384

}

8385

$last_line_had_side_comment = 0;

8386

return;

8387

}

8388

8389

# see if this is a static block comment (starts with ## by default)

8390

my $is_static_block_comment_without_leading_space = 0;

8391

if ( $jmax == 0

8392

&& $$rtoken_type[0] eq '#'

8393

&& $rOpts->{'static-block-comments'}

8394

&& $input_line =~ /$static_block_comment_pattern/o )

8395

{

8396

$is_static_block_comment = 1;

8397

$is_static_block_comment_without_leading_space =

8398

substr( $input_line, 0, 1 ) eq '#';

8399

}

8400

8401

# Check for comments which are line directives

8402

# Treat exactly as static block comments without leading space

8403

# reference: perlsyn, near end, section Plain Old Comments (Not!)

8404

# example: '# line 42 "new_filename.plx"'

8405

if (

8406

$jmax == 0

8407

&& $$rtoken_type[0] eq '#'

8408

&& $input_line =~ /^\# \s*

8409

line \s+ (\d+) \s*

8410

(?:\s("?)([^"]+)\2)? \s*

8411

$/x

8412

)

8413

{

8414

$is_static_block_comment = 1;

8415

$is_static_block_comment_without_leading_space = 1;

8416

}

8417

8418

# create a hanging side comment if appropriate

8419

if (

8420

$jmax == 0

8421

&& $$rtoken_type[0] eq '#' # only token is a comment

8422

&& $last_line_had_side_comment # last line had side comment

8423

&& $input_line =~ /^\s/ # there is some leading space

8424

&& !$is_static_block_comment # do not make static comment hanging

8425

&& $rOpts->{'hanging-side-comments'} # user is allowing this

8426

)

8427

{

8428

8429

# We will insert an empty qw string at the start of the token list

8430

# to force this comment to be a side comment. The vertical aligner

8431

# should then line it up with the previous side comment.

8432

unshift @$rtoken_type, 'q';

8433

unshift @$rtokens, '';

8434

unshift @$rlevels, $$rlevels[0];

8435

unshift @$rslevels, $$rslevels[0];

8436

unshift @$rblock_type, '';

8437

unshift @$rcontainer_type, '';

8438

unshift @$rcontainer_environment, '';

8439

unshift @$rtype_sequence, '';

8440

unshift @$rnesting_tokens, $$rnesting_tokens[0];

8441

unshift @$rci_levels, $$rci_levels[0];

8442

unshift @$rnesting_blocks, $$rnesting_blocks[0];

8443

$jmax = 1;

8444

}

8445

8446

# remember if this line has a side comment

8447

$last_line_had_side_comment =

8448

( $jmax > 0 && $$rtoken_type[$jmax] eq '#' );

8449

8450

# Handle a block (full-line) comment..

8451

if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) {

8452

8453

if ( $rOpts->{'delete-block-comments'} ) { return }

8454

8455

if ( $rOpts->{'tee-block-comments'} ) {

8456

$file_writer_object->tee_on();

8457

}

8458

8459

destroy_one_line_block();

8460

output_line_to_go();

8461

8462

# output a blank line before block comments

8463

if (

8464

$last_line_leading_type !~ /^[#b]$/

8465

&& $rOpts->{'blanks-before-comments'} # only if allowed

8466

&& !

8467

$is_static_block_comment # never before static block comments

8468

)

8469

{

8470

flush(); # switching to new output stream

8471

$file_writer_object->write_blank_code_line();

8472

$last_line_leading_type = 'b';

8473

}

8474

8475

# TRIM COMMENTS -- This could be turned off as a option

8476

$$rtokens[0] =~ s/\s*$//; # trim right end

8477

8478

if (

8479

$rOpts->{'indent-block-comments'}

8480

&& ( !$rOpts->{'indent-spaced-block-comments'}

8481

|| $input_line =~ /^\s+/ )

8482

&& !$is_static_block_comment_without_leading_space

8483

)

8484

{

8485

extract_token(0);

8486

store_token_to_go();

8487

output_line_to_go();

8488

}

8489

else {

8490

flush(); # switching to new output stream

8491

$file_writer_object->write_code_line( $$rtokens[0] . "\n" );

8492

$last_line_leading_type = '#';

8493

}

8494

if ( $rOpts->{'tee-block-comments'} ) {

8495

$file_writer_object->tee_off();

8496

}

8497

return;

8498

}

8499

8500

# compare input/output indentation except for continuation lines

8501

# (because they have an unknown amount of initial blank space)

8502

# and lines which are quotes (because they may have been outdented)

8503

# Note: this test is placed here because we know the continuation flag

8504

# at this point, which allows us to avoid non-meaningful checks.

8505

my $structural_indentation_level = $$rlevels[0];

8506

compare_indentation_levels( $python_indentation_level,

8507

$structural_indentation_level )

8508

unless ( $python_indentation_level < 0

8509

|| ( $$rci_levels[0] > 0 )

8510

|| ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' )

8511

);

8512

8513

# Patch needed for MakeMaker. Do not break a statement

8514

# in which $VERSION may be calculated. See MakeMaker.pm;

8515

# this is based on the coding in it.

8516

# The first line of a file that matches this will be eval'd:

8517

# /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/

8518

# Examples:

8519

# *VERSION = \'1.01';

8520

# ( $VERSION ) = '$Revision: 1.73 $ ' =~ /\$Revision:\s+([^\s]+)/;

8521

# We will pass such a line straight through without breaking

8522

# it unless -npvl is used

8523

8524

my $is_VERSION_statement = 0;

8525

8526

if (

8527

!$saw_VERSION_in_this_file

8528

&& $input_line =~ /VERSION/ # quick check to reject most lines

8529

&& $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/

8530

)

8531

{

8532

$saw_VERSION_in_this_file = 1;

8533

$is_VERSION_statement = 1;

8534

write_logfile_entry("passing VERSION line; -npvl deactivates\n");

8535

$no_internal_newlines = 1;

8536

}

8537

8538

# take care of indentation-only

8539

# NOTE: In previous versions we sent all qw lines out immediately here.

8540

# No longer doing this: also write a line which is entirely a 'qw' list

8541

# to allow stacking of opening and closing tokens. Note that interior

8542

# qw lines will still go out at the end of this routine.

8543

if ( $rOpts->{'indent-only'} ) {

8544

flush();

8545

trim($input_line);

8546

8547

extract_token(0);

8548

$token = $input_line;

8549

$type = 'q';

8550

$block_type = "";

8551

$container_type = "";

8552

$container_environment = "";

8553

$type_sequence = "";

8554

store_token_to_go();

8555

output_line_to_go();

8556

return;

8557

}

8558

8559

push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding

8560

push( @$rtoken_type, 'b', 'b' );

8561

($rwhite_space_flag) =

8562

set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type );

8563

8564

# find input tabbing to allow checks for tabbing disagreement

8565

## not used for now

8566

##$input_line_tabbing = "";

8567

##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; }

8568

8569

# if the buffer hasn't been flushed, add a leading space if

8570

# necessary to keep essential whitespace. This is really only

8571

# necessary if we are squeezing out all ws.

8572

if ( $max_index_to_go >= 0 ) {

8573

8574

$old_line_count_in_batch++;

8575

8576

if (

8577

is_essential_whitespace(

8578

$last_last_nonblank_token,

8579

$last_last_nonblank_type,

8580

$tokens_to_go[$max_index_to_go],

8581

$types_to_go[$max_index_to_go],

8582

$$rtokens[0],

8583

$$rtoken_type[0]

8584

)

8585

)

8586

{

8587

my $slevel = $$rslevels[0];

8588

insert_new_token_to_go( ' ', 'b', $slevel,

8589

$no_internal_newlines );

8590

}

8591

}

8592

8593

# If we just saw the end of an elsif block, write nag message

8594

# if we do not see another elseif or an else.

8595

if ($looking_for_else) {

8596

8597

unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) {

8598

write_logfile_entry("(No else block)\n");

8599

}

8600

$looking_for_else = 0;

8601

}

8602

8603

# This is a good place to kill incomplete one-line blocks

8604

if ( ( $semicolons_before_block_self_destruct == 0 )

8605

&& ( $max_index_to_go >= 0 )

8606

&& ( $types_to_go[$max_index_to_go] eq ';' )

8607

&& ( $$rtokens[0] ne '}' ) )

8608

{

8609

destroy_one_line_block();

8610

output_line_to_go();

8611

}

8612

8613

# loop to process the tokens one-by-one

8614

$type = 'b';

8615

$token = "";

8616

8617

foreach $j ( 0 .. $jmax ) {

8618

8619

# pull out the local values for this token

8620

extract_token($j);

8621

8622

if ( $type eq '#' ) {

8623

8624

# trim trailing whitespace

8625

# (there is no option at present to prevent this)

8626

$token =~ s/\s*$//;

8627

8628

if (

8629

$rOpts->{'delete-side-comments'}

8630

8631

# delete closing side comments if necessary

8632

|| ( $rOpts->{'delete-closing-side-comments'}

8633

&& $token =~ /$closing_side_comment_prefix_pattern/o

8634

&& $last_nonblank_block_type =~

8635

/$closing_side_comment_list_pattern/o )

8636

)

8637

{

8638

if ( $types_to_go[$max_index_to_go] eq 'b' ) {

8639

unstore_token_to_go();

8640

}

8641

last;

8642

}

8643

}

8644

8645

# If we are continuing after seeing a right curly brace, flush

8646

# buffer unless we see what we are looking for, as in

8647

# } else ...

8648

if ( $rbrace_follower && $type ne 'b' ) {

8649

8650

unless ( $rbrace_follower->{$token} ) {

8651

output_line_to_go();

8652

}

8653

$rbrace_follower = undef;

8654

}

8655

8656

$j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1;

8657

$next_nonblank_token = $$rtokens[$j_next];

8658

$next_nonblank_token_type = $$rtoken_type[$j_next];

8659

8660

#--------------------------------------------------------

8661

# Start of section to patch token text

8662

#--------------------------------------------------------

8663

8664

# Modify certain tokens here for whitespace

8665

# The following is not yet done, but could be:

8666

# sub (x x x)

8667

if ( $type =~ /^[wit]$/ ) {

8668

8669

# Examples:

8670

# change '$ var' to '$var' etc

8671

# '-> new' to '->new'

8672

if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {

8673

$token =~ s/\s*//g;

8674

}

8675

8676

if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g }

8677

}

8678

8679

# change 'LABEL :' to 'LABEL:'

8680

elsif ( $type eq 'J' ) { $token =~ s/\s+//g }

8681

8682

# patch to add space to something like "x10"

8683

# This avoids having to split this token in the pre-tokenizer

8684

elsif ( $type eq 'n' ) {

8685

if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / }

8686

}

8687

8688

elsif ( $type eq 'Q' ) {

8689

note_embedded_tab() if ( $token =~ "\t" );

8690

8691

# make note of something like '$var = s/xxx/yyy/;'

8692

# in case it should have been '$var =~ s/xxx/yyy/;'

8693

if (

8694

$token =~ /^(s|tr|y|m|\/)/

8695

&& $last_nonblank_token =~ /^(=|==|!=)$/

8696

8697

# precededed by simple scalar

8698

&& $last_last_nonblank_type eq 'i'

8699

&& $last_last_nonblank_token =~ /^\$/

8700

8701

# followed by some kind of termination

8702

# (but give complaint if we can's see far enough ahead)

8703

&& $next_nonblank_token =~ /^[; \)\}]$/

8704

8705

# scalar is not decleared

8706

&& !(

8707

$types_to_go[0] eq 'k'

8708

&& $tokens_to_go[0] =~ /^(my|our|local)$/

8709

)

8710

)

8711

{

8712

my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';

8713

complain(

8714

"Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n"

8715

);

8716

}

8717

}

8718

8719

# trim blanks from right of qw quotes

8720

# (To avoid trimming qw quotes use -ntqw; the tokenizer handles this)

8721

elsif ( $type eq 'q' ) {

8722

$token =~ s/\s*$//;

8723

note_embedded_tab() if ( $token =~ "\t" );

8724

}

8725

8726

#--------------------------------------------------------

8727

# End of section to patch token text

8728

#--------------------------------------------------------

8729

8730

# insert any needed whitespace

8731

if ( ( $type ne 'b' )

8732

&& ( $max_index_to_go >= 0 )

8733

&& ( $types_to_go[$max_index_to_go] ne 'b' )

8734

&& $rOpts_add_whitespace )

8735

{

8736

my $ws = $$rwhite_space_flag[$j];

8737

8738

if ( $ws == 1 ) {

8739

insert_new_token_to_go( ' ', 'b', $slevel,

8740

$no_internal_newlines );

8741

}

8742

}

8743

8744

# Do not allow breaks which would promote a side comment to a

8745

# block comment. In order to allow a break before an opening

8746

# or closing BLOCK, followed by a side comment, those sections

8747

# of code will handle this flag separately.

8748

my $side_comment_follows = ( $next_nonblank_token_type eq '#' );

8749

my $is_opening_BLOCK =

8750

( $type eq '{'

8751

&& $token eq '{'

8752

&& $block_type

8753

&& $block_type ne 't' );

8754

my $is_closing_BLOCK =

8755

( $type eq '}'

8756

&& $token eq '}'

8757

&& $block_type

8758

&& $block_type ne 't' );

8759

8760

if ( $side_comment_follows

8761

&& !$is_opening_BLOCK

8762

&& !$is_closing_BLOCK )

8763

{

8764

$no_internal_newlines = 1;

8765

}

8766

8767

# We're only going to handle breaking for code BLOCKS at this

8768

# (top) level. Other indentation breaks will be handled by

8769

# sub scan_list, which is better suited to dealing with them.

8770

if ($is_opening_BLOCK) {

8771

8772

# Tentatively output this token. This is required before

8773

# calling starting_one_line_block. We may have to unstore

8774

# it, though, if we have to break before it.

8775

store_token_to_go($side_comment_follows);

8776

8777

# Look ahead to see if we might form a one-line block

8778

my $too_long =

8779

starting_one_line_block( $j, $jmax, $level, $slevel,

8780

$ci_level, $rtokens, $rtoken_type, $rblock_type );

8781

clear_breakpoint_undo_stack();

8782

8783

# to simplify the logic below, set a flag to indicate if

8784

# this opening brace is far from the keyword which introduces it

8785

my $keyword_on_same_line = 1;

8786

if ( ( $max_index_to_go >= 0 )

8787

&& ( $last_nonblank_type eq ')' ) )

8788

{

8789

if ( $block_type =~ /^(if|else|elsif)$/

8790

&& ( $tokens_to_go[0] eq '}' )

8791

&& $rOpts_cuddled_else )

8792

{

8793

$keyword_on_same_line = 1;

8794

}

8795

elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long )

8796

{

8797

$keyword_on_same_line = 0;

8798

}

8799

}

8800

8801

# decide if user requested break before '{'

8802

my $want_break =

8803

8804

# use -bl flag if not a sub block of any type

8805

$block_type !~ /^sub/

8806

? $rOpts->{'opening-brace-on-new-line'}

8807

8808

# use -sbl flag unless this is an anonymous sub block

8809

: $block_type !~ /^sub\W*$/

8810

? $rOpts->{'opening-sub-brace-on-new-line'}

8811

8812

# do not break for anonymous subs

8813

: 0;

8814

8815

# Break before an opening '{' ...

8816

if (

8817

8818

# if requested

8819

$want_break

8820

8821

# and we were unable to start looking for a block,

8822

&& $index_start_one_line_block == UNDEFINED_INDEX

8823

8824

# or if it will not be on same line as its keyword, so that

8825

# it will be outdented (eval.t, overload.t), and the user

8826

# has not insisted on keeping it on the right

8827

|| ( !$keyword_on_same_line

8828

&& !$rOpts->{'opening-brace-always-on-right'} )

8829

8830

)

8831

{

8832

8833

# but only if allowed

8834

unless ($no_internal_newlines) {

8835

8836

# since we already stored this token, we must unstore it

8837

unstore_token_to_go();

8838

8839

# then output the line

8840

output_line_to_go();

8841

8842

# and now store this token at the start of a new line

8843

store_token_to_go($side_comment_follows);

8844

}

8845

}

8846

8847

# Now update for side comment

8848

if ($side_comment_follows) { $no_internal_newlines = 1 }

8849

8850

# now output this line

8851

unless ($no_internal_newlines) {

8852

output_line_to_go();

8853

}

8854

}

8855

8856

elsif ($is_closing_BLOCK) {

8857

8858

# If there is a pending one-line block ..

8859

if ( $index_start_one_line_block != UNDEFINED_INDEX ) {

8860

8861

# we have to terminate it if..

8862

if (

8863

8864

# it is too long (final length may be different from

8865

# initial estimate). note: must allow 1 space for this token

8866

excess_line_length( $index_start_one_line_block,

8867

$max_index_to_go ) >= 0

8868

8869

# or if it has too many semicolons

8870

|| ( $semicolons_before_block_self_destruct == 0

8871

&& $last_nonblank_type ne ';' )

8872

)

8873

{

8874

destroy_one_line_block();

8875

}

8876

}

8877

8878

# put a break before this closing curly brace if appropriate

8879

unless ( $no_internal_newlines

8880

|| $index_start_one_line_block != UNDEFINED_INDEX )

8881

{

8882

8883

# add missing semicolon if ...

8884

# there are some tokens

8885

if (

8886

( $max_index_to_go > 0 )

8887

8888

# and we don't have one

8889

&& ( $last_nonblank_type ne ';' )

8890

8891

# patch until some block type issues are fixed:

8892

# Do not add semi-colon for block types '{',

8893

# '}', and ';' because we cannot be sure yet

8894

# that this is a block and not an anonomyous

8895

# hash (blktype.t, blktype1.t)

8896

&& ( $block_type !~ /^[\{\};]$/ )

8897

8898

# it seems best not to add semicolons in these

8899

# special block types: sort|map|grep

8900

&& ( !$is_sort_map_grep{$block_type} )

8901

8902

# and we are allowed to do so.

8903

&& $rOpts->{'add-semicolons'}

8904

)

8905

{

8906

8907

save_current_token();

8908

$token = ';';

8909

$type = ';';

8910

$level = $levels_to_go[$max_index_to_go];

8911

$slevel = $nesting_depth_to_go[$max_index_to_go];

8912

$nesting_blocks =

8913

$nesting_blocks_to_go[$max_index_to_go];

8914

$ci_level = $ci_levels_to_go[$max_index_to_go];

8915

$block_type = "";

8916

$container_type = "";

8917

$container_environment = "";

8918

$type_sequence = "";

8919

8920

# Note - we remove any blank AFTER extracting its

8921

# parameters such as level, etc, above

8922

if ( $types_to_go[$max_index_to_go] eq 'b' ) {

8923

unstore_token_to_go();

8924

}

8925

store_token_to_go();

8926

8927

note_added_semicolon();

8928

restore_current_token();

8929

}

8930

8931

# then write out everything before this closing curly brace

8932

output_line_to_go();

8933

8934

}

8935

8936

# Now update for side comment

8937

if ($side_comment_follows) { $no_internal_newlines = 1 }

8938

8939

# store the closing curly brace

8940

store_token_to_go();

8941

8942

# ok, we just stored a closing curly brace. Often, but

8943

# not always, we want to end the line immediately.

8944

# So now we have to check for special cases.

8945

8946

# if this '}' successfully ends a one-line block..

8947

my $is_one_line_block = 0;

8948

my $keep_going = 0;

8949

if ( $index_start_one_line_block != UNDEFINED_INDEX ) {

8950

8951

# Remember the type of token just before the

8952

# opening brace. It would be more general to use

8953

# a stack, but this will work for one-line blocks.

8954

$is_one_line_block =

8955

$types_to_go[$index_start_one_line_block];

8956

8957

# we have to actually make it by removing tentative

8958

# breaks that were set within it

8959

undo_forced_breakpoint_stack(0);

8960

set_nobreaks( $index_start_one_line_block,

8961

$max_index_to_go - 1 );

8962

8963

# then re-initialize for the next one-line block

8964

destroy_one_line_block();

8965

8966

# then decide if we want to break after the '}' ..

8967

# We will keep going to allow certain brace followers as in:

8968

# do { $ifclosed = 1; last } unless $losing;

8969

#

8970

# But make a line break if the curly ends a

8971

# significant block:

8972

if (

8973

$is_block_without_semicolon{$block_type}

8974

8975

# if needless semicolon follows we handle it later

8976

&& $next_nonblank_token ne ';'

8977

)

8978

{

8979

output_line_to_go() unless ($no_internal_newlines);

8980

}

8981

}

8982

8983

# set string indicating what we need to look for brace follower

8984

# tokens

8985

if ( $block_type eq 'do' ) {

8986

$rbrace_follower = \%is_do_follower;

8987

}

8988

elsif ( $block_type =~ /^(if|elsif|unless)$/ ) {

8989

$rbrace_follower = \%is_if_brace_follower;

8990

}

8991

elsif ( $block_type eq 'else' ) {

8992

$rbrace_follower = \%is_else_brace_follower;

8993

}

8994

8995

# added eval for borris.t

8996

elsif ($is_sort_map_grep_eval{$block_type}

8997

|| $is_one_line_block eq 'G' )

8998

{

8999

$rbrace_follower = undef;

9000

$keep_going = 1;

9001

}

9002

9003

# anonymous sub

9004

elsif ( $block_type =~ /^sub\W*$/ ) {

9005

9006

if ($is_one_line_block) {

9007

$rbrace_follower = \%is_anon_sub_1_brace_follower;

9008

}

9009

else {

9010

$rbrace_follower = \%is_anon_sub_brace_follower;

9011

}

9012

}

9013

9014

# None of the above: specify what can follow a closing

9015

# brace of a block which is not an

9016

# if/elsif/else/do/sort/map/grep/eval

9017

# Testfiles:

9018

# 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t

9019

else {

9020

$rbrace_follower = \%is_other_brace_follower;

9021

}

9022

9023

# See if an elsif block is followed by another elsif or else;

9024

# complain if not.

9025

if ( $block_type eq 'elsif' ) {

9026

9027

if ( $next_nonblank_token_type eq 'b' ) { # end of line?

9028

$looking_for_else = 1; # ok, check on next line

9029

}

9030

else {

9031

9032

unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) {

9033

write_logfile_entry("No else block :(\n");

9034

}

9035

}

9036

}

9037

9038

# keep going after certain block types (map,sort,grep,eval)

9039

# added eval for borris.t

9040

if ($keep_going) {

9041

9042

# keep going

9043

}

9044

9045

# if no more tokens, postpone decision until re-entring

9046

elsif ( ( $next_nonblank_token_type eq 'b' )

9047

&& $rOpts_add_newlines )

9048

{

9049

unless ($rbrace_follower) {

9050

output_line_to_go() unless ($no_internal_newlines);

9051

}

9052

}

9053

9054

elsif ($rbrace_follower) {

9055

9056

unless ( $rbrace_follower->{$next_nonblank_token} ) {

9057

output_line_to_go() unless ($no_internal_newlines);

9058

}

9059

$rbrace_follower = undef;

9060

}

9061

9062

else {

9063

output_line_to_go() unless ($no_internal_newlines);

9064

}

9065

9066

} # end treatment of closing block token

9067

9068

# handle semicolon

9069

elsif ( $type eq ';' ) {

9070

9071

# kill one-line blocks with too many semicolons

9072

$semicolons_before_block_self_destruct--;

9073

if (

9074

( $semicolons_before_block_self_destruct < 0 )

9075

|| ( $semicolons_before_block_self_destruct == 0

9076

&& $next_nonblank_token_type !~ /^[b\}]$/ )

9077

)

9078

{

9079

destroy_one_line_block();

9080

}

9081

9082

# Remove unnecessary semicolons, but not after bare

9083

# blocks, where it could be unsafe if the brace is

9084

# mistokenized.

9085

if (

9086

(

9087

$last_nonblank_token eq '}'

9088

&& (

9089

$is_block_without_semicolon{

9090

$last_nonblank_block_type}

9091

|| $last_nonblank_block_type =~ /^sub\s+\w/

9092

|| $last_nonblank_block_type =~ /^\w+:$/ )

9093

)

9094

|| $last_nonblank_type eq ';'

9095

)

9096

{

9097

9098

if (

9099

$rOpts->{'delete-semicolons'}

9100

9101

# don't delete ; before a # because it would promote it

9102

# to a block comment

9103

&& ( $next_nonblank_token_type ne '#' )

9104

)

9105

{

9106

note_deleted_semicolon();

9107

output_line_to_go()

9108

unless ( $no_internal_newlines

9109

|| $index_start_one_line_block != UNDEFINED_INDEX );

9110

next;

9111

}

9112

else {

9113

write_logfile_entry("Extra ';'\n");

9114

}

9115

}

9116

store_token_to_go();

9117

9118

output_line_to_go()

9119

unless ( $no_internal_newlines

9120

|| ( $rOpts_keep_interior_semicolons && $j < $jmax )

9121

|| ( $next_nonblank_token eq '}' ) );

9122

9123

}

9124

9125

# handle here_doc target string

9126

elsif ( $type eq 'h' ) {

9127

$no_internal_newlines =

9128

1; # no newlines after seeing here-target

9129

destroy_one_line_block();

9130

store_token_to_go();

9131

}

9132

9133

# handle all other token types

9134

else {

9135

9136

# if this is a blank...

9137

if ( $type eq 'b' ) {

9138

9139

# make it just one character

9140

$token = ' ' if $rOpts_add_whitespace;

9141

9142

# delete it if unwanted by whitespace rules

9143

# or we are deleting all whitespace

9144

my $ws = $$rwhite_space_flag[ $j + 1 ];

9145

if ( ( defined($ws) && $ws == -1 )

9146

|| $rOpts_delete_old_whitespace )

9147

{

9148

9149

# unless it might make a syntax error

9150

unless is_essential_whitespace(

9152

$last_last_nonblank_token,

9153

$last_last_nonblank_type,

9154

$tokens_to_go[$max_index_to_go],

9155

$types_to_go[$max_index_to_go],

9156

$$rtokens[ $j + 1 ],

9157

$$rtoken_type[ $j + 1 ]

9158

);

9159

}

9160

}

9161

store_token_to_go();

9162

}

9163

9164

# remember two previous nonblank OUTPUT tokens

9165

if ( $type ne '#' && $type ne 'b' ) {

9166

$last_last_nonblank_token = $last_nonblank_token;

9167

$last_last_nonblank_type = $last_nonblank_type;

9168

$last_nonblank_token = $token;

9169

$last_nonblank_type = $type;

9170

$last_nonblank_block_type = $block_type;

9171

}

9172

9173

# unset the continued-quote flag since it only applies to the

9174

# first token, and we want to resume normal formatting if

9175

# there are additional tokens on the line

9176

$in_continued_quote = 0;

9177

9178

} # end of loop over all tokens in this 'line_of_tokens'

9179

9180

# we have to flush ..

9181

if (

9182

9183

# if there is a side comment

9184

( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} )

9185

9186

# if this line ends in a quote

9187

# NOTE: This is critically important for insuring that quoted lines

9188

# do not get processed by things like -sot and -sct

9189

|| $in_quote

9190

9191

# if this is a VERSION statement

9192

|| $is_VERSION_statement

9193

9194

# to keep a label on one line if that is how it is now

9195

|| ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) )

9196

9197

# if we are instructed to keep all old line breaks

9198

|| !$rOpts->{'delete-old-newlines'}

9199

)

9200

{

9201

destroy_one_line_block();

9202

output_line_to_go();

9203

}

9204

9205

# mark old line breakpoints in current output stream

9206

if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) {

9207

$old_breakpoint_to_go[$max_index_to_go] = 1;

9208

}

9209

} # end sub print_line_of_tokens

9210

} # end print_line_of_tokens

9211

9212

# sub output_line_to_go sends one logical line of tokens on down the

9213

# pipeline to the VerticalAligner package, breaking the line into continuation

9214

# lines as necessary. The line of tokens is ready to go in the "to_go"

9215

# arrays.

9216

sub output_line_to_go {

9217

9218

# debug stuff; this routine can be called from many points

9219

FORMATTER_DEBUG_FLAG_OUTPUT && do {

9220

my ( $a, $b, $c ) = caller;

9221

write_diagnostics(

9222

"OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n"

9223

);

9224

my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ];

9225

write_diagnostics("$output_str\n");

9226

};

9227

9228

# just set a tentative breakpoint if we might be in a one-line block

9229

if ( $index_start_one_line_block != UNDEFINED_INDEX ) {

9230

set_forced_breakpoint($max_index_to_go);

9231

return;

9232

}

9233

9234

my $cscw_block_comment;

9235

$cscw_block_comment = add_closing_side_comment()

9236

if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 );

9237

9238

match_opening_and_closing_tokens();

9239

9240

# tell the -lp option we are outputting a batch so it can close

9241

# any unfinished items in its stack

9242

finish_lp_batch();

9243

9244

# If this line ends in a code block brace, set breaks at any

9245

# previous closing code block braces to breakup a chain of code

9246

# blocks on one line. This is very rare but can happen for

9247

# user-defined subs. For example we might be looking at this:

9248

# BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR {

9249

my $saw_good_break = 0; # flag to force breaks even if short line

9250

if (

9251

9252

# looking for opening or closing block brace

9253

$block_type_to_go[$max_index_to_go]

9254

9255

# but not one of these which are never duplicated on a line:

9256

9257

&& !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] }

9258

)

9259

{

9260

my $lev = $nesting_depth_to_go[$max_index_to_go];

9261

9262

# Walk backwards from the end and

9263

# set break at any closing block braces at the same level.

9264

# But quit if we are not in a chain of blocks.

9265

for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) {

9266

last if ( $levels_to_go[$i] < $lev ); # stop at a lower level

9267

next if ( $levels_to_go[$i] > $lev ); # skip past higher level

9268

9269

if ( $block_type_to_go[$i] ) {

9270

if ( $tokens_to_go[$i] eq '}' ) {

9271

set_forced_breakpoint($i);

9272

$saw_good_break = 1;

9273

}

9274

}

9275

9276

# quit if we see anything besides words, function, blanks

9277

# at this level

9278

elsif ( $types_to_go[$i] !~ /^[Gwib]$/ ) { last }

9279

}

9280

}

9281

9282

my $imin = 0;

9283

my $imax = $max_index_to_go;

9284

9285

# trim any blank tokens

9286

if ( $max_index_to_go >= 0 ) {

9287

if ( $types_to_go[$imin] eq 'b' ) { $imin++ }

9288

if ( $types_to_go[$imax] eq 'b' ) { $imax-- }

9289

}

9290

9291

# anything left to write?

9292

if ( $imin <= $imax ) {

9293

9294

# add a blank line before certain key types

9295

if ( $last_line_leading_type !~ /^[#b]/ ) {

9296

my $want_blank = 0;

9297

my $leading_token = $tokens_to_go[$imin];

9298

my $leading_type = $types_to_go[$imin];

9299

9300

# blank lines before subs except declarations and one-liners

9301

# MCONVERSION LOCATION - for sub tokenization change

9302

if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) {

9303

$want_blank = ( $rOpts->{'blanks-before-subs'} )

9304

&& (

9305

terminal_type( \@types_to_go, \@block_type_to_go, $imin,

9306

$imax ) !~ /^[\;\}]$/

9307

);

9308

}

9309

9310

# break before all package declarations

9311

# MCONVERSION LOCATION - for tokenizaton change

9312

elsif ($leading_token =~ /^(package\s)/

9313

&& $leading_type eq 'i' )

9314

{

9315

$want_blank = ( $rOpts->{'blanks-before-subs'} );

9316

}

9317

9318

# break before certain key blocks except one-liners

9319

if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) {

9320

$want_blank = ( $rOpts->{'blanks-before-subs'} )

9321

&& (

9322

terminal_type( \@types_to_go, \@block_type_to_go, $imin,

9323

$imax ) ne '}'

9324

);

9325

}

9326

9327

# Break before certain block types if we haven't had a

9328

# break at this level for a while. This is the

9329

# difficult decision..

9330

9331

&& $leading_type eq 'k' )

9332

{

9333

my $lc = $nonblank_lines_at_depth[$last_line_leading_level];

9334

if ( !defined($lc) ) { $lc = 0 }

9335

9336

$want_blank =

9337

$rOpts->{'blanks-before-blocks'}

9338

&& $lc >= $rOpts->{'long-block-line-count'}

9339

&& $file_writer_object->get_consecutive_nonblank_lines() >=

9340

$rOpts->{'long-block-line-count'}

9341

&& (

9342

terminal_type( \@types_to_go, \@block_type_to_go, $imin,

9343

$imax ) ne '}'

9344

);

9345

}

9346

9347

if ($want_blank) {

9348

9349

# future: send blank line down normal path to VerticalAligner

9350

Perl::Tidy::VerticalAligner::flush();

9351

$file_writer_object->write_blank_code_line();

9352

}

9353

}

9354

9355

# update blank line variables and count number of consecutive

9356

# non-blank, non-comment lines at this level

9357

$last_last_line_leading_level = $last_line_leading_level;

9358

$last_line_leading_level = $levels_to_go[$imin];

9359

if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 }

9360

$last_line_leading_type = $types_to_go[$imin];

9361

if ( $last_line_leading_level == $last_last_line_leading_level

9362

&& $last_line_leading_type ne 'b'

9363

&& $last_line_leading_type ne '#'

9364

&& defined( $nonblank_lines_at_depth[$last_line_leading_level] ) )

9365

{

9366

$nonblank_lines_at_depth[$last_line_leading_level]++;

9367

}

9368

else {

9369

$nonblank_lines_at_depth[$last_line_leading_level] = 1;

9370

}

9371

9372

FORMATTER_DEBUG_FLAG_FLUSH && do {

9373

my ( $package, $file, $line ) = caller;

9374

print

9375

"FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n";

9376

};

9377

9378

# add a couple of extra terminal blank tokens

9379

pad_array_to_go();

9380

9381

# set all forced breakpoints for good list formatting

9382

my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0;

9383

9384

if (

9385

$max_index_to_go > 0

9386

&& (

9387

$is_long_line

9388

|| $old_line_count_in_batch > 1

9389

|| is_unbalanced_batch()

9390

|| (

9391

$comma_count_in_batch

9392

&& ( $rOpts_maximum_fields_per_table > 0

9393

|| $rOpts_comma_arrow_breakpoints == 0 )

9394

)

9395

)

9396

)

9397

{

9398

$saw_good_break ||= scan_list();

9399

}

9400

9401

# let $ri_first and $ri_last be references to lists of

9402

# first and last tokens of line fragments to output..

9403

my ( $ri_first, $ri_last );

9404

9405

# write a single line if..

9406

if (

9407

9408

# we aren't allowed to add any newlines

9409

!$rOpts_add_newlines

9410

9411

# or, we don't already have an interior breakpoint

9412

# and we didn't see a good breakpoint

9413

|| (

9414

!$forced_breakpoint_count

9415

&& !$saw_good_break

9416

9417

# and this line is 'short'

9418

&& !$is_long_line

9419

)

9420

)

9421

{

9422

@$ri_first = ($imin);

9423

@$ri_last = ($imax);

9424

}

9425

9426

# otherwise use multiple lines

9427

else {

9428

9429

( $ri_first, $ri_last, my $colon_count ) =

9430

set_continuation_breaks($saw_good_break);

9431

9432

break_all_chain_tokens( $ri_first, $ri_last );

9433

9434

break_equals( $ri_first, $ri_last );

9435

9436

# now we do a correction step to clean this up a bit

9437

# (The only time we would not do this is for debugging)

9438

if ( $rOpts->{'recombine'} ) {

9439

( $ri_first, $ri_last ) =

9440

recombine_breakpoints( $ri_first, $ri_last );

9441

}

9442

9443

insert_final_breaks( $ri_first, $ri_last ) if $colon_count;

9444

}

9445

9446

# do corrector step if -lp option is used

9447

my $do_not_pad = 0;

9448

if ($rOpts_line_up_parentheses) {

9449

$do_not_pad = correct_lp_indentation( $ri_first, $ri_last );

9450

}

9451

send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad );

9452

}

9453

prepare_for_new_input_lines();

9454

9455

# output any new -cscw block comment

9456

if ($cscw_block_comment) {

9457

flush();

9458

$file_writer_object->write_code_line( $cscw_block_comment . "\n" );

9459

}

9460

}

9461

9462

sub note_added_semicolon {

9463

$last_added_semicolon_at = $input_line_number;

9464

if ( $added_semicolon_count == 0 ) {

9465

$first_added_semicolon_at = $last_added_semicolon_at;

9466

}

9467

$added_semicolon_count++;

9468

write_logfile_entry("Added ';' here\n");

9469

}

9470

9471

sub note_deleted_semicolon {

9472

$last_deleted_semicolon_at = $input_line_number;

9473

if ( $deleted_semicolon_count == 0 ) {

9474

$first_deleted_semicolon_at = $last_deleted_semicolon_at;

9475

}

9476

$deleted_semicolon_count++;

9477

write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;)

9478

}

9479

9480

sub note_embedded_tab {

9481

$embedded_tab_count++;

9482

$last_embedded_tab_at = $input_line_number;

9483

if ( !$first_embedded_tab_at ) {

9484

$first_embedded_tab_at = $last_embedded_tab_at;

9485

}

9486

9487

if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) {

9488

write_logfile_entry("Embedded tabs in quote or pattern\n");

9489

}

9490

}

9491

9492

sub starting_one_line_block {

9493

9494

# after seeing an opening curly brace, look for the closing brace

9495

# and see if the entire block will fit on a line. This routine is

9496

# not always right because it uses the old whitespace, so a check

9497

# is made later (at the closing brace) to make sure we really

9498

# have a one-line block. We have to do this preliminary check,

9499

# though, because otherwise we would always break at a semicolon

9500

# within a one-line block if the block contains multiple statements.

9501

9502

my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type,

9503

$rblock_type )

9504

= @_;

9505

9506

# kill any current block - we can only go 1 deep

9507

destroy_one_line_block();

9508

9509

# return value:

9510

# 1=distance from start of block to opening brace exceeds line length

9511

# 0=otherwise

9512

9513

my $i_start = 0;

9514

9515

# shouldn't happen: there must have been a prior call to

9516

# store_token_to_go to put the opening brace in the output stream

9517

if ( $max_index_to_go < 0 ) {

9518

warning("program bug: store_token_to_go called incorrectly\n");

9519

report_definite_bug();

9520

}

9521

else {

9522

9523

# cannot use one-line blocks with cuddled else else/elsif lines

9524

if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) {

9525

return 0;

9526

}

9527

}

9528

9529

my $block_type = $$rblock_type[$j];

9530

9531

# find the starting keyword for this block (such as 'if', 'else', ...)

9532

9533

if ( $block_type =~ /^[\{\}\;\:]$/ ) {

9534

$i_start = $max_index_to_go;

9535

}

9536

9537

elsif ( $last_last_nonblank_token_to_go eq ')' ) {

9538

9539

# For something like "if (xxx) {", the keyword "if" will be

9540

# just after the most recent break. This will be 0 unless

9541

# we have just killed a one-line block and are starting another.

9542

# (doif.t)

9543

$i_start = $index_max_forced_break + 1;

9544

if ( $types_to_go[$i_start] eq 'b' ) {

9545

$i_start++;

9546

}

9547

9548

unless ( $tokens_to_go[$i_start] eq $block_type ) {

9549

return 0;

9550

}

9551

}

9552

9553

# the previous nonblank token should start these block types

9554

elsif (

9555

( $last_last_nonblank_token_to_go eq $block_type )

9556

|| ( $block_type =~ /^sub/

9557

&& $last_last_nonblank_token_to_go =~ /^sub/ )

9558

)

9559

{

9560

$i_start = $last_last_nonblank_index_to_go;

9561

}

9562

9563

# patch for SWITCH/CASE to retain one-line case/when blocks

9564

elsif ( $block_type eq 'case' || $block_type eq 'when' ) {

9565

$i_start = $index_max_forced_break + 1;

9566

if ( $types_to_go[$i_start] eq 'b' ) {

9567

$i_start++;

9568

}

9569

unless ( $tokens_to_go[$i_start] eq $block_type ) {

9570

return 0;

9571

}

9572

}

9573

9574

else {

9575

return 1;

9576

}

9577

9578

my $pos = total_line_length( $i_start, $max_index_to_go ) - 1;

9579

9580

my $i;

9581

9582

# see if length is too long to even start

9583

if ( $pos > $rOpts_maximum_line_length ) {

9584

return 1;

9585

}

9586

9587

for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) {

9588

9589

# old whitespace could be arbitrarily large, so don't use it

9590

if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 }

9591

else { $pos += length( $$rtokens[$i] ) }

9592

9593

# Return false result if we exceed the maximum line length,

9594

if ( $pos > $rOpts_maximum_line_length ) {

9595

return 0;

9596

}

9597

9598

# or encounter another opening brace before finding the closing brace.

9599

elsif ($$rtokens[$i] eq '{'

9600

&& $$rtoken_type[$i] eq '{'

9601

&& $$rblock_type[$i] )

9602

{

9603

return 0;

9604

}

9605

9606

# if we find our closing brace..

9607

elsif ($$rtokens[$i] eq '}'

9608

&& $$rtoken_type[$i] eq '}'

9609

&& $$rblock_type[$i] )

9610

{

9611

9612

# be sure any trailing comment also fits on the line

9613

my $i_nonblank =

9614

( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1;

9615

9616

if ( $$rtoken_type[$i_nonblank] eq '#' ) {

9617

$pos += length( $$rtokens[$i_nonblank] );

9618

9619

if ( $i_nonblank > $i + 1 ) {

9620

$pos += length( $$rtokens[ $i + 1 ] );

9621

}

9622

9623

if ( $pos > $rOpts_maximum_line_length ) {

9624

return 0;

9625

}

9626

}

9627

9628

# ok, it's a one-line block

9629

create_one_line_block( $i_start, 20 );

9630

return 0;

9631

}

9632

9633

# just keep going for other characters

9634

else {

9635

}

9636

}

9637

9638

# Allow certain types of new one-line blocks to form by joining

9639

# input lines. These can be safely done, but for other block types,

9640

# we keep old one-line blocks but do not form new ones. It is not

9641

# always a good idea to make as many one-line blocks as possible,

9642

# so other types are not done. The user can always use -mangle.

9643

if ( $is_sort_map_grep_eval{$block_type} ) {

9644

create_one_line_block( $i_start, 1 );

9645

}

9646

9647

return 0;

9648

}

9649

9650

sub unstore_token_to_go {

9651

9652

# remove most recent token from output stream

9653

if ( $max_index_to_go > 0 ) {

9654

$max_index_to_go--;

9655

}

9656

else {

9657

$max_index_to_go = UNDEFINED_INDEX;

9658

}

9659

9660

}

9661

9662

sub want_blank_line {

9663

flush();

9664

$file_writer_object->want_blank_line();

9665

}

9666

9667

sub write_unindented_line {

9668

flush();

9669

$file_writer_object->write_line( $_[0] );

9670

}

9671

9672

sub undo_lp_ci {

9673

9674

# If there is a single, long parameter within parens, like this:

9675

#

9676

# $self->command( "/msg "

9677

# . $infoline->chan

9678

# . " You said $1, but did you know that it's square was "

9679

# . $1 * $1 . " ?" );

9680

#

9681

# we can remove the continuation indentation of the 2nd and higher lines

9682

# to achieve this effect, which is more pleasing:

9683

#

9684

# $self->command("/msg "

9685

# . $infoline->chan

9686

# . " You said $1, but did you know that it's square was "

9687

# . $1 * $1 . " ?");

9688

9689

my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_;

9690

my $max_line = @$ri_first - 1;

9691

9692

# must be multiple lines

9693

return unless $max_line > $line_open;

9694

9695

my $lev_start = $levels_to_go[$i_start];

9696

my $ci_start_plus = 1 + $ci_levels_to_go[$i_start];

9697

9698

# see if all additional lines in this container have continuation

9699

# indentation

9700

my $n;

9701

my $line_1 = 1 + $line_open;

9702

for ( $n = $line_1 ; $n <= $max_line ; ++$n ) {

9703

my $ibeg = $$ri_first[$n];

9704

my $iend = $$ri_last[$n];

9705

if ( $ibeg eq $closing_index ) { $n--; last }

9706

return if ( $lev_start != $levels_to_go[$ibeg] );

9707

return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] );

9708

last if ( $closing_index <= $iend );

9709

}

9710

9711

# we can reduce the indentation of all continuation lines

9712

my $continuation_line_count = $n - $line_open;

9713

@ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] =

9714

(0) x ($continuation_line_count);

9715

@leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] =

9716

@reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ];

9717

}

9718

9719

sub set_logical_padding {

9720

9721

# Look at a batch of lines and see if extra padding can improve the

9722

# alignment when there are certain leading operators. Here is an

9723

# example, in which some extra space is introduced before

9724

# '( $year' to make it line up with the subsequent lines:

9725

#

9726

# if ( ( $Year < 1601 )

9727

# || ( $Year > 2899 )

9728

# || ( $EndYear < 1601 )

9729

# || ( $EndYear > 2899 ) )

9730

# {

9731

# &Error_OutOfRange;

9732

# }

9733

#

9734

my ( $ri_first, $ri_last ) = @_;

9735

my $max_line = @$ri_first - 1;

9736

9737

my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces,

9738

$tok_next, $type_next, $has_leading_op_next, $has_leading_op );

9739

9740

# looking at each line of this batch..

9741

foreach $line ( 0 .. $max_line - 1 ) {

9742

9743

# see if the next line begins with a logical operator

9744

$ibeg = $$ri_first[$line];

9745

$iend = $$ri_last[$line];

9746

$ibeg_next = $$ri_first[ $line + 1 ];

9747

$tok_next = $tokens_to_go[$ibeg_next];

9748

$type_next = $types_to_go[$ibeg_next];

9749

9750

$has_leading_op_next = ( $tok_next =~ /^\w/ )

9751

? $is_chain_operator{$tok_next} # + - * / : ? && ||

9752

: $is_chain_operator{$type_next}; # and, or

9753

9754

next unless ($has_leading_op_next);

9755

9756

# next line must not be at lesser depth

9757

if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] );

9759

9760

# identify the token in this line to be padded on the left

9761

$ipad = undef;

9762

9763

# handle lines at same depth...

9764

if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) {

9765

9766

# if this is not first line of the batch ...

9767

if ( $line > 0 ) {

9768

9769

# and we have leading operator..

9770

next if $has_leading_op;

9771

9772

# Introduce padding if..

9773

# 1. the previous line is at lesser depth, or

9774

# 2. the previous line ends in an assignment

9775

# 3. the previous line ends in a 'return'

9776

# 4. the previous line ends in a comma

9777

# Example 1: previous line at lesser depth

9778

# if ( ( $Year < 1601 ) # <- we are here but

9779

# || ( $Year > 2899 ) # list has not yet

9780

# || ( $EndYear < 1601 ) # collapsed vertically

9781

# || ( $EndYear > 2899 ) )

9782

# {

9783

#

9784

# Example 2: previous line ending in assignment:

9785

# $leapyear =

9786

# $year % 4 ? 0 # <- We are here

9787

# : $year % 100 ? 1

9788

# : $year % 400 ? 0

9789

# : 1;

9790

#

9791

# Example 3: previous line ending in comma:

9792

# push @expr,

9793

# /test/ ? undef

9794

# : eval($_) ? 1

9795

# : eval($_) ? 1

9796

# : 0;

9797

9798

# be sure levels agree (do not indent after an indented 'if')

9799

next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] );

9800

9801

# allow padding on first line after a comma but only if:

9802

# (1) this is line 2 and

9803

# (2) there are at more than three lines and

9804

# (3) lines 3 and 4 have the same leading operator

9805

# These rules try to prevent padding within a long

9806

# comma-separated list.

9807

my $ok_comma;

9808

if ( $types_to_go[$iendm] eq ','

9809

&& $line == 1

9810

&& $max_line > 2 )

9811

{

9812

my $ibeg_next_next = $$ri_first[ $line + 2 ];

9813

my $tok_next_next = $tokens_to_go[$ibeg_next_next];

9814

$ok_comma = $tok_next_next eq $tok_next;

9815

}

9816

9817

unless (

9819

$is_assignment{ $types_to_go[$iendm] }

9820

|| $ok_comma

9821

|| ( $nesting_depth_to_go[$ibegm] <

9822

$nesting_depth_to_go[$ibeg] )

9823

|| ( $types_to_go[$iendm] eq 'k'

9824

&& $tokens_to_go[$iendm] eq 'return' )

9825

);

9826

9827

# we will add padding before the first token

9828

$ipad = $ibeg;

9829

}

9830

9831

# for first line of the batch..

9832

else {

9833

9834

# WARNING: Never indent if first line is starting in a

9835

# continued quote, which would change the quote.

9836

next if $starting_in_quote;

9837

9838

# if this is text after closing '}'

9839

# then look for an interior token to pad

9840

if ( $types_to_go[$ibeg] eq '}' ) {

9841

9842

}

9843

9844

# otherwise, we might pad if it looks really good

9845

else {

9846

9847

# we might pad token $ibeg, so be sure that it

9848

# is at the same depth as the next line.

9849

if ( $nesting_depth_to_go[$ibeg] !=

9851

$nesting_depth_to_go[$ibeg_next] );

9852

9853

# We can pad on line 1 of a statement if at least 3

9854

# lines will be aligned. Otherwise, it

9855

# can look very confusing.

9856

9857

# We have to be careful not to pad if there are too few

9858

# lines. The current rule is:

9859

# (1) in general we require at least 3 consecutive lines

9860

# with the same leading chain operator token,

9861

# (2) but an exception is that we only require two lines

9862

# with leading colons if there are no more lines. For example,

9863

# the first $i in the following snippet would get padding

9864

# by the second rule:

9865

#

9866

# $i == 1 ? ( "First", "Color" )

9867

# : $i == 2 ? ( "Then", "Rarity" )

9868

# : ( "Then", "Name" );

9869

9870

if ( $max_line > 1 ) {

9871

my $leading_token = $tokens_to_go[$ibeg_next];

9872

my $tokens_differ;

9873

9874

# never indent line 1 of a '.' series because

9875

# previous line is most likely at same level.

9876

# TODO: we should also look at the leasing_spaces

9877

# of the last output line and skip if it is same

9878

# as this line.

9879

next if ( $leading_token eq '.' );

9880

9881

my $count = 1;

9882

foreach my $l ( 2 .. 3 ) {

9883

last if ( $line + $l > $max_line );

9884

my $ibeg_next_next = $$ri_first[ $line + $l ];

9885

if ( $tokens_to_go[$ibeg_next_next] ne

9886

$leading_token )

9887

{

9888

$tokens_differ = 1;

9889

last;

9890

}

9891

$count++;

9892

}

9893

next if ($tokens_differ);

9894

next if ( $count < 3 && $leading_token ne ':' );

9895

$ipad = $ibeg;

9896

}

9897

else {

9898

next;

9899

}

9900

}

9901

}

9902

}

9903

9904

# find interior token to pad if necessary

9905

if ( !defined($ipad) ) {

9906

9907

for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) {

9908

9909

# find any unclosed container

9910

unless ( $type_sequence_to_go[$i]

9912

&& $mate_index_to_go[$i] > $iend );

9913

9914

# find next nonblank token to pad

9915

$ipad = $i + 1;

9916

if ( $types_to_go[$ipad] eq 'b' ) {

9917

$ipad++;

9918

last if ( $ipad > $iend );

9919

}

9920

}

9921

last unless $ipad;

9922

}

9923

9924

# next line must not be at greater depth

9925

my $iend_next = $$ri_last[ $line + 1 ];

9926

if ( $nesting_depth_to_go[ $iend_next + 1 ] >

9928

$nesting_depth_to_go[$ipad] );

9929

9930

# lines must be somewhat similar to be padded..

9931

my $inext_next = $ibeg_next + 1;

9932

if ( $types_to_go[$inext_next] eq 'b' ) {

9933

$inext_next++;

9934

}

9935

my $type = $types_to_go[$ipad];

9936

my $type_next = $types_to_go[ $ipad + 1 ];

9937

9938

# see if there are multiple continuation lines

9939

my $logical_continuation_lines = 1;

9940

if ( $line + 2 <= $max_line ) {

9941

my $leading_token = $tokens_to_go[$ibeg_next];

9942

my $ibeg_next_next = $$ri_first[ $line + 2 ];

9943

if ( $tokens_to_go[$ibeg_next_next] eq $leading_token

9944

&& $nesting_depth_to_go[$ibeg_next] eq

9945

$nesting_depth_to_go[$ibeg_next_next] )

9946

{

9947

$logical_continuation_lines++;

9948

}

9949

}

9950

9951

# see if leading types match

9952

my $types_match = $types_to_go[$inext_next] eq $type;

9953

my $matches_without_bang;

9954

9955

# if first line has leading ! then compare the following token

9956

if ( !$types_match && $type eq '!' ) {

9957

$types_match = $matches_without_bang =

9958

$types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ];

9959

}

9960

9961

if (

9962

9963

# either we have multiple continuation lines to follow

9964

# and we are not padding the first token

9965

( $logical_continuation_lines > 1 && $ipad > 0 )

9966

9967

# or..

9968

|| (

9969

9970

# types must match

9971

$types_match

9972

9973

# and keywords must match if keyword

9974

&& !(

9975

$type eq 'k'

9976

&& $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next]

9977

)

9978

)

9979

)

9980

{

9981

9982

#----------------------begin special checks--------------

9983

#

9984

# SPECIAL CHECK 1:

9985

# A check is needed before we can make the pad.

9986

# If we are in a list with some long items, we want each

9987

# item to stand out. So in the following example, the

9988

# first line begining with '$casefold->' would look good

9989

# padded to align with the next line, but then it

9990

# would be indented more than the last line, so we

9991

# won't do it.

9992

#

9993

# ok(

9994

# $casefold->{code} eq '0041'

9995

# && $casefold->{status} eq 'C'

9996

# && $casefold->{mapping} eq '0061',

9997

# 'casefold 0x41'

9998

# );

9999

#

10000

# Note:

10001

# It would be faster, and almost as good, to use a comma

10002

# count, and not pad if comma_count > 1 and the previous

10003

# line did not end with a comma.

10004

#

10005

my $ok_to_pad = 1;

10006

10007

my $ibg = $$ri_first[ $line + 1 ];

10008

my $depth = $nesting_depth_to_go[ $ibg + 1 ];

10009

10010

# just use simplified formula for leading spaces to avoid

10011

# needless sub calls

10012

my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg];

10013

10014

# look at each line beyond the next ..

10015

my $l = $line + 1;

10016

foreach $l ( $line + 2 .. $max_line ) {

10017

my $ibg = $$ri_first[$l];

10018

10019

# quit looking at the end of this container

10020

last

10021

if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth )

10022

|| ( $nesting_depth_to_go[$ibg] < $depth );

10023

10024

# cannot do the pad if a later line would be

10025

# outdented more

10026

if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) {

10027

$ok_to_pad = 0;

10028

last;

10029

}

10030

}

10031

10032

# don't pad if we end in a broken list

10033

if ( $l == $max_line ) {

10034

my $i2 = $$ri_last[$l];

10035

if ( $types_to_go[$i2] eq '#' ) {

10036

my $i1 = $$ri_first[$l];

10037

if (

10039

terminal_type( \@types_to_go, \@block_type_to_go, $i1,

10040

$i2 ) eq ','

10041

);

10042

}

10043

}

10044

10045

# SPECIAL CHECK 2:

10046

# a minus may introduce a quoted variable, and we will

10047

# add the pad only if this line begins with a bare word,

10048

# such as for the word 'Button' here:

10049

# [

10050

# Button => "Print letter \"~$_\"",

10051

# -command => [ sub { print "$_[0]\n" }, $_ ],

10052

# -accelerator => "Meta+$_"

10053

# ];

10054

#

10055

# On the other hand, if 'Button' is quoted, it looks best

10056

# not to pad:

10057

# [

10058

# 'Button' => "Print letter \"~$_\"",

10059

# -command => [ sub { print "$_[0]\n" }, $_ ],

10060

# -accelerator => "Meta+$_"

10061

# ];

10062

if ( $types_to_go[$ibeg_next] eq 'm' ) {

10063

$ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q';

10064

}

10065

10066

next unless $ok_to_pad;

10067

10068

#----------------------end special check---------------

10069

10070

my $length_1 = total_line_length( $ibeg, $ipad - 1 );

10071

my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 );

10072

$pad_spaces = $length_2 - $length_1;

10073

10074

# If the first line has a leading ! and the second does

10075

# not, then remove one space to try to align the next

10076

# leading characters, which are often the same. For example:

10077

# if ( !$ts

10078

# || $ts == $self->Holder

10079

# || $self->Holder->Type eq "Arena" )

10080

#

10081

# This usually helps readability, but if there are subsequent

10082

# ! operators things will still get messed up. For example:

10083

#

10084

# if ( !exists $Net::DNS::typesbyname{$qtype}

10085

# && exists $Net::DNS::classesbyname{$qtype}

10086

# && !exists $Net::DNS::classesbyname{$qclass}

10087

# && exists $Net::DNS::typesbyname{$qclass} )

10088

# We can't fix that.

10089

if ($matches_without_bang) { $pad_spaces-- }

10090

10091

# make sure this won't change if -lp is used

10092

my $indentation_1 = $leading_spaces_to_go[$ibeg];

10093

if ( ref($indentation_1) ) {

10094

if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) {

10095

my $indentation_2 = $leading_spaces_to_go[$ibeg_next];

10096

unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) {

10097

$pad_spaces = 0;

10098

}

10099

}

10100

}

10101

10102

# we might be able to handle a pad of -1 by removing a blank

10103

# token

10104

if ( $pad_spaces < 0 ) {

10105

10106

if ( $pad_spaces == -1 ) {

10107

if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) {

10108

$tokens_to_go[ $ipad - 1 ] = '';

10109

}

10110

}

10111

$pad_spaces = 0;

10112

}

10113

10114

# now apply any padding for alignment

10115

if ( $ipad >= 0 && $pad_spaces ) {

10116

10117

my $length_t = total_line_length( $ibeg, $iend );

10118

if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) {

10119

$tokens_to_go[$ipad] =

10120

' ' x $pad_spaces . $tokens_to_go[$ipad];

10121

}

10122

}

10123

}

10124

}

10125

continue {

10126

$iendm = $iend;

10127

$ibegm = $ibeg;

10128

$has_leading_op = $has_leading_op_next;

10129

} # end of loop over lines

10130

return;

10131

}

10132

10133

sub correct_lp_indentation {

10134

10135

# When the -lp option is used, we need to make a last pass through

10136

# each line to correct the indentation positions in case they differ

10137

# from the predictions. This is necessary because perltidy uses a

10138

# predictor/corrector method for aligning with opening parens. The

10139

# predictor is usually good, but sometimes stumbles. The corrector

10140

# tries to patch things up once the actual opening paren locations

10141

# are known.

10142

my ( $ri_first, $ri_last ) = @_;

10143

my $do_not_pad = 0;

10144

10145

# Note on flag '$do_not_pad':

10146

# We want to avoid a situation like this, where the aligner inserts

10147

# whitespace before the '=' to align it with a previous '=', because

10148

# otherwise the parens might become mis-aligned in a situation like

10149

# this, where the '=' has become aligned with the previous line,

10150

# pushing the opening '(' forward beyond where we want it.

10151

#

10152

# $mkFloor::currentRoom = '';

10153

# $mkFloor::c_entry = $c->Entry(

10154

# -width => '10',

10155

# -relief => 'sunken',

10156

# ...

10157

# );

10158

#

10159

# We leave it to the aligner to decide how to do this.

10160

10161

# first remove continuation indentation if appropriate

10162

my $max_line = @$ri_first - 1;

10163

10164

# looking at each line of this batch..

10165

my ( $ibeg, $iend );

10166

my $line;

10167

foreach $line ( 0 .. $max_line ) {

10168

$ibeg = $$ri_first[$line];

10169

$iend = $$ri_last[$line];

10170

10171

# looking at each token in this output line..

10172

my $i;

10173

foreach $i ( $ibeg .. $iend ) {

10174

10175

# How many space characters to place before this token

10176

# for special alignment. Actual padding is done in the

10177

# continue block.

10178

10179

# looking for next unvisited indentation item

10180

my $indentation = $leading_spaces_to_go[$i];

10181

if ( !$indentation->get_MARKED() ) {

10182

$indentation->set_MARKED(1);

10183

10184

# looking for indentation item for which we are aligning

10185

# with parens, braces, and brackets

10186

next unless ( $indentation->get_ALIGN_PAREN() );

10187

10188

# skip closed container on this line

10189

if ( $i > $ibeg ) {

10190

my $im = $i - 1;

10191

if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- }

10192

if ( $type_sequence_to_go[$im]

10193

&& $mate_index_to_go[$im] <= $iend )

10194

{

10195

next;

10196

}

10197

}

10198

10199

if ( $line == 1 && $i == $ibeg ) {

10200

$do_not_pad = 1;

10201

}

10202

10203

# Ok, let's see what the error is and try to fix it

10204

my $actual_pos;

10205

my $predicted_pos = $indentation->get_SPACES();

10206

if ( $i > $ibeg ) {

10207

10208

# token is mid-line - use length to previous token

10209

$actual_pos = total_line_length( $ibeg, $i - 1 );

10210

10211

# for mid-line token, we must check to see if all

10212

# additional lines have continuation indentation,

10213

# and remove it if so. Otherwise, we do not get

10214

# good alignment.

10215

my $closing_index = $indentation->get_CLOSED();

10216

if ( $closing_index > $iend ) {

10217

my $ibeg_next = $$ri_first[ $line + 1 ];

10218

if ( $ci_levels_to_go[$ibeg_next] > 0 ) {

10219

undo_lp_ci( $line, $i, $closing_index, $ri_first,

10220

$ri_last );

10221

}

10222

}

10223

}

10224

elsif ( $line > 0 ) {

10225

10226

# handle case where token starts a new line;

10227

# use length of previous line

10228

my $ibegm = $$ri_first[ $line - 1 ];

10229

my $iendm = $$ri_last[ $line - 1 ];

10230

$actual_pos = total_line_length( $ibegm, $iendm );

10231

10232

# follow -pt style

10233

++$actual_pos

10234

if ( $types_to_go[ $iendm + 1 ] eq 'b' );

10235

}

10236

else {

10237

10238

# token is first character of first line of batch

10239

$actual_pos = $predicted_pos;

10240

}

10241

10242

my $move_right = $actual_pos - $predicted_pos;

10243

10244

# done if no error to correct (gnu2.t)

10245

if ( $move_right == 0 ) {

10246

$indentation->set_RECOVERABLE_SPACES($move_right);

10247

next;

10248

}

10249

10250

# if we have not seen closure for this indentation in

10251

# this batch, we can only pass on a request to the

10252

# vertical aligner

10253

my $closing_index = $indentation->get_CLOSED();

10254

10255

if ( $closing_index < 0 ) {

10256

$indentation->set_RECOVERABLE_SPACES($move_right);

10257

next;

10258

}

10259

10260

# If necessary, look ahead to see if there is really any

10261

# leading whitespace dependent on this whitespace, and

10262

# also find the longest line using this whitespace.

10263

# Since it is always safe to move left if there are no

10264

# dependents, we only need to do this if we may have

10265

# dependent nodes or need to move right.

10266

10267

my $right_margin = 0;

10268

my $have_child = $indentation->get_HAVE_CHILD();

10269

10270

my %saw_indentation;

10271

my $line_count = 1;

10272

$saw_indentation{$indentation} = $indentation;

10273

10274

if ( $have_child || $move_right > 0 ) {

10275

$have_child = 0;

10276

my $max_length = 0;

10277

if ( $i == $ibeg ) {

10278

$max_length = total_line_length( $ibeg, $iend );

10279

}

10280

10281

# look ahead at the rest of the lines of this batch..

10282

my $line_t;

10283

foreach $line_t ( $line + 1 .. $max_line ) {

10284

my $ibeg_t = $$ri_first[$line_t];

10285

my $iend_t = $$ri_last[$line_t];

10286

last if ( $closing_index <= $ibeg_t );

10287

10288

# remember all different indentation objects

10289

my $indentation_t = $leading_spaces_to_go[$ibeg_t];

10290

$saw_indentation{$indentation_t} = $indentation_t;

10291

$line_count++;

10292

10293

# remember longest line in the group

10294

my $length_t = total_line_length( $ibeg_t, $iend_t );

10295

if ( $length_t > $max_length ) {

10296

$max_length = $length_t;

10297

}

10298

}

10299

$right_margin = $rOpts_maximum_line_length - $max_length;

10300

if ( $right_margin < 0 ) { $right_margin = 0 }

10301

}

10302

10303

my $first_line_comma_count =

10304

grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ];

10305

my $comma_count = $indentation->get_COMMA_COUNT();

10306

my $arrow_count = $indentation->get_ARROW_COUNT();

10307

10308

# This is a simple approximate test for vertical alignment:

10309

# if we broke just after an opening paren, brace, bracket,

10310

# and there are 2 or more commas in the first line,

10311

# and there are no '=>'s,

10312

# then we are probably vertically aligned. We could set

10313

# an exact flag in sub scan_list, but this is good

10314

# enough.

10315

my $indentation_count = keys %saw_indentation;

10316

my $is_vertically_aligned =

10317

( $i == $ibeg

10318

&& $first_line_comma_count > 1

10319

&& $indentation_count == 1

10320

&& ( $arrow_count == 0 || $arrow_count == $line_count ) );

10321

10322

# Make the move if possible ..

10323

if (

10324

10325

# we can always move left

10326

$move_right < 0

10327

10328

# but we should only move right if we are sure it will

10329

# not spoil vertical alignment

10330

|| ( $comma_count == 0 )

10331

|| ( $comma_count > 0 && !$is_vertically_aligned )

10332

)

10333

{

10334

my $move =

10335

( $move_right <= $right_margin )

10336

? $move_right

10337

: $right_margin;

10338

10339

foreach ( keys %saw_indentation ) {

10340

$saw_indentation{$_}

10341

->permanently_decrease_AVAILABLE_SPACES( -$move );

10342

}

10343

}

10344

10345

# Otherwise, record what we want and the vertical aligner

10346

# will try to recover it.

10347

else {

10348

$indentation->set_RECOVERABLE_SPACES($move_right);

10349

}

10350

}

10351

}

10352

}

10353

return $do_not_pad;

10354

}

10355

10356

# flush is called to output any tokens in the pipeline, so that

10357

# an alternate source of lines can be written in the correct order

10358

10359

sub flush {

10360

destroy_one_line_block();

10361

output_line_to_go();

10362

Perl::Tidy::VerticalAligner::flush();

10363

}

10364

10365

sub reset_block_text_accumulator {

10366

10367

# save text after 'if' and 'elsif' to append after 'else'

10368

if ($accumulating_text_for_block) {

10369

10370

if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) {

10371

push @{$rleading_block_if_elsif_text}, $leading_block_text;

10372

}

10373

}

10374

$accumulating_text_for_block = "";

10375

$leading_block_text = "";

10376

$leading_block_text_level = 0;

10377

$leading_block_text_length_exceeded = 0;

10378

$leading_block_text_line_number = 0;

10379

$leading_block_text_line_length = 0;

10380

}

10381

10382

sub set_block_text_accumulator {

10383

my $i = shift;

10384

$accumulating_text_for_block = $tokens_to_go[$i];

10385

if ( $accumulating_text_for_block !~ /^els/ ) {

10386

$rleading_block_if_elsif_text = [];

10387

}

10388

$leading_block_text = "";

10389

$leading_block_text_level = $levels_to_go[$i];

10390

$leading_block_text_line_number =

10391

$vertical_aligner_object->get_output_line_number();

10392

$leading_block_text_length_exceeded = 0;

10393

10394

# this will contain the column number of the last character

10395

# of the closing side comment

10396

$leading_block_text_line_length =

10397

length($accumulating_text_for_block) +

10398

length( $rOpts->{'closing-side-comment-prefix'} ) +

10399

$leading_block_text_level * $rOpts_indent_columns + 3;

10400

}

10401

10402

sub accumulate_block_text {

10403

my $i = shift;

10404

10405

# accumulate leading text for -csc, ignoring any side comments

10406

if ( $accumulating_text_for_block

10407

&& !$leading_block_text_length_exceeded

10408

&& $types_to_go[$i] ne '#' )

10409

{

10410

10411

my $added_length = length( $tokens_to_go[$i] );

10412

$added_length += 1 if $i == 0;

10413

my $new_line_length = $leading_block_text_line_length + $added_length;

10414

10415

# we can add this text if we don't exceed some limits..

10416

if (

10417

10418

# we must not have already exceeded the text length limit

10419

length($leading_block_text) <

10420

$rOpts_closing_side_comment_maximum_text

10421

10422

# and either:

10423

# the new total line length must be below the line length limit

10424

# or the new length must be below the text length limit

10425

# (ie, we may allow one token to exceed the text length limit)

10426

&& ( $new_line_length < $rOpts_maximum_line_length

10427

|| length($leading_block_text) + $added_length <

10428

$rOpts_closing_side_comment_maximum_text )

10429

10430

# UNLESS: we are adding a closing paren before the brace we seek.

10431

# This is an attempt to avoid situations where the ... to be

10432

# added are longer than the omitted right paren, as in:

10433

10434

# foreach my $item (@a_rather_long_variable_name_here) {

10435

# &whatever;

10436

# } ## end foreach my $item (@a_rather_long_variable_name_here...

10437

10438

|| (

10439

$tokens_to_go[$i] eq ')'

10440

&& (

10441

(

10442

$i + 1 <= $max_index_to_go

10443

&& $block_type_to_go[ $i + 1 ] eq

10444

$accumulating_text_for_block

10445

)

10446

|| ( $i + 2 <= $max_index_to_go

10447

&& $block_type_to_go[ $i + 2 ] eq

10448

$accumulating_text_for_block )

10449

)

10450

)

10451

)

10452

{

10453

10454

# add an extra space at each newline

10455

if ( $i == 0 ) { $leading_block_text .= ' ' }

10456

10457

# add the token text

10458

$leading_block_text .= $tokens_to_go[$i];

10459

$leading_block_text_line_length = $new_line_length;

10460

}

10461

10462

# show that text was truncated if necessary

10463

elsif ( $types_to_go[$i] ne 'b' ) {

10464

$leading_block_text_length_exceeded = 1;

10465

$leading_block_text .= '...';

10466

}

10467

}

10468

}

10469

10470

{

10471

my %is_if_elsif_else_unless_while_until_for_foreach;

10472

10473

BEGIN {

10474

10475

# These block types may have text between the keyword and opening

10476

# curly. Note: 'else' does not, but must be included to allow trailing

10477

# if/elsif text to be appended.

10478

# patch for SWITCH/CASE: added 'case' and 'when'

10479

@_ = qw(if elsif else unless while until for foreach case when);

10480

@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);

10481

}

10482

10483

sub accumulate_csc_text {

10484

10485

# called once per output buffer when -csc is used. Accumulates

10486

# the text placed after certain closing block braces.

10487

# Defines and returns the following for this buffer:

10488

10489

my $block_leading_text = ""; # the leading text of the last '}'

10490

my $rblock_leading_if_elsif_text;

10491

my $i_block_leading_text =

10492

-1; # index of token owning block_leading_text

10493

my $block_line_count = 100; # how many lines the block spans

10494

my $terminal_type = 'b'; # type of last nonblank token

10495

my $i_terminal = 0; # index of last nonblank token

10496

my $terminal_block_type = "";

10497

10498

for my $i ( 0 .. $max_index_to_go ) {

10499

my $type = $types_to_go[$i];

10500

my $block_type = $block_type_to_go[$i];

10501

my $token = $tokens_to_go[$i];

10502

10503

# remember last nonblank token type

10504

if ( $type ne '#' && $type ne 'b' ) {

10505

$terminal_type = $type;

10506

$terminal_block_type = $block_type;

10507

$i_terminal = $i;

10508

}

10509

10510

my $type_sequence = $type_sequence_to_go[$i];

10511

if ( $block_type && $type_sequence ) {

10512

10513

if ( $token eq '}' ) {

10514

10515

# restore any leading text saved when we entered this block

10516

if ( defined( $block_leading_text{$type_sequence} ) ) {

10517

( $block_leading_text, $rblock_leading_if_elsif_text ) =

10518

@{ $block_leading_text{$type_sequence} };

10519

$i_block_leading_text = $i;

10520

delete $block_leading_text{$type_sequence};

10521

$rleading_block_if_elsif_text =

10522

$rblock_leading_if_elsif_text;

10523

}

10524

10525

# if we run into a '}' then we probably started accumulating

10526

# at something like a trailing 'if' clause..no harm done.

10527

if ( $accumulating_text_for_block

10528

&& $levels_to_go[$i] <= $leading_block_text_level )

10529

{

10530

my $lev = $levels_to_go[$i];

10531

reset_block_text_accumulator();

10532

}

10533

10534

if ( defined( $block_opening_line_number{$type_sequence} ) )

10535

{

10536

my $output_line_number =

10537

$vertical_aligner_object->get_output_line_number();

10538

$block_line_count =

10539

$output_line_number -

10540

$block_opening_line_number{$type_sequence} + 1;

10541

delete $block_opening_line_number{$type_sequence};

10542

}

10543

else {

10544

10545

# Error: block opening line undefined for this line..

10546

# This shouldn't be possible, but it is not a

10547

# significant problem.

10548

}

10549

}

10550

10551

elsif ( $token eq '{' ) {

10552

10553

my $line_number =

10554

$vertical_aligner_object->get_output_line_number();

10555

$block_opening_line_number{$type_sequence} = $line_number;

10556

10557

if ( $accumulating_text_for_block

10558

&& $levels_to_go[$i] == $leading_block_text_level )

10559

{

10560

10561

if ( $accumulating_text_for_block eq $block_type ) {

10562

10563

# save any leading text before we enter this block

10564

$block_leading_text{$type_sequence} = [

10565

$leading_block_text,

10566

$rleading_block_if_elsif_text

10567

];

10568

$block_opening_line_number{$type_sequence} =

10569

$leading_block_text_line_number;

10570

reset_block_text_accumulator();

10571

}

10572

else {

10573

10574

# shouldn't happen, but not a serious error.

10575

# We were accumulating -csc text for block type

10576

# $accumulating_text_for_block and unexpectedly

10577

# encountered a '{' for block type $block_type.

10578

}

10579

}

10580

}

10581

}

10582

10583

if ( $type eq 'k'

10584

&& $csc_new_statement_ok

10585

&& $is_if_elsif_else_unless_while_until_for_foreach{$token}

10586

&& $token =~ /$closing_side_comment_list_pattern/o )

10587

{

10588

set_block_text_accumulator($i);

10589

}

10590

else {

10591

10592

# note: ignoring type 'q' because of tricks being played

10593

# with 'q' for hanging side comments

10594

if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) {

10595

$csc_new_statement_ok =

10596

( $block_type || $type eq 'J' || $type eq ';' );

10597

}

10598

if ( $type eq ';'

10599

&& $accumulating_text_for_block

10600

&& $levels_to_go[$i] == $leading_block_text_level )

10601

{

10602

reset_block_text_accumulator();

10603

}

10604

else {

10605

accumulate_block_text($i);

10606

}

10607

}

10608

}

10609

10610

# Treat an 'else' block specially by adding preceding 'if' and

10611

# 'elsif' text. Otherwise, the 'end else' is not helpful,

10612

# especially for cuddled-else formatting.

10613

if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) {

10614

$block_leading_text =

10615

make_else_csc_text( $i_terminal, $terminal_block_type,

10616

$block_leading_text, $rblock_leading_if_elsif_text );

10617

}

10618

10619

return ( $terminal_type, $i_terminal, $i_block_leading_text,

10620

$block_leading_text, $block_line_count );

10621

}

10622

}

10623

10624

sub make_else_csc_text {

10625

10626

# create additional -csc text for an 'else' and optionally 'elsif',

10627

# depending on the value of switch

10628

# $rOpts_closing_side_comment_else_flag:

10629

#

10630

# = 0 add 'if' text to trailing else

10631

# = 1 same as 0 plus:

10632

# add 'if' to 'elsif's if can fit in line length

10633

# add last 'elsif' to trailing else if can fit in one line

10634

# = 2 same as 1 but do not check if exceed line length

10635

#

10636

# $rif_elsif_text = a reference to a list of all previous closing

10637

# side comments created for this if block

10638

#

10639

my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_;

10640

my $csc_text = $block_leading_text;

10641

10642

if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 )

10643

{

10644

return $csc_text;

10645

}

10646

10647

my $count = @{$rif_elsif_text};

10648

return $csc_text unless ($count);

10649

10650

my $if_text = '[ if' . $rif_elsif_text->[0];

10651

10652

# always show the leading 'if' text on 'else'

10653

if ( $block_type eq 'else' ) {

10654

$csc_text .= $if_text;

10655

}

10656

10657

# see if that's all

10658

if ( $rOpts_closing_side_comment_else_flag == 0 ) {

10659

return $csc_text;

10660

}

10661

10662

my $last_elsif_text = "";

10663

if ( $count > 1 ) {

10664

$last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ];

10665

if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; }

10666

}

10667

10668

# tentatively append one more item

10669

my $saved_text = $csc_text;

10670

if ( $block_type eq 'else' ) {

10671

$csc_text .= $last_elsif_text;

10672

}

10673

else {

10674

$csc_text .= ' ' . $if_text;

10675

}

10676

10677

# all done if no length checks requested

10678

if ( $rOpts_closing_side_comment_else_flag == 2 ) {

10679

return $csc_text;

10680

}

10681

10682

# undo it if line length exceeded

10683

my $length =

10684

length($csc_text) +

10685

length($block_type) +

10686

length( $rOpts->{'closing-side-comment-prefix'} ) +

10687

$levels_to_go[$i_terminal] * $rOpts_indent_columns + 3;

10688

if ( $length > $rOpts_maximum_line_length ) {

10689

$csc_text = $saved_text;

10690

}

10691

return $csc_text;

10692

}

10693

10694

sub add_closing_side_comment {

10695

10696

# add closing side comments after closing block braces if -csc used

10697

my $cscw_block_comment;

10698

10699

#---------------------------------------------------------------

10700

# Step 1: loop through all tokens of this line to accumulate

10701

# the text needed to create the closing side comments. Also see

10702

# how the line ends.

10703

#---------------------------------------------------------------

10704

10705

my ( $terminal_type, $i_terminal, $i_block_leading_text,

10706

$block_leading_text, $block_line_count )

10707

= accumulate_csc_text();

10708

10709

#---------------------------------------------------------------

10710

# Step 2: make the closing side comment if this ends a block

10711

#---------------------------------------------------------------

10712

my $have_side_comment = $i_terminal != $max_index_to_go;

10713

10714

# if this line might end in a block closure..

10715

if (

10716

$terminal_type eq '}'

10717

10718

# ..and either

10719

&& (

10720

10721

# the block is long enough

10722

( $block_line_count >= $rOpts->{'closing-side-comment-interval'} )

10723

10724

# or there is an existing comment to check

10725

|| ( $have_side_comment

10726

&& $rOpts->{'closing-side-comment-warnings'} )

10727

)

10728

10729

# .. and if this is one of the types of interest

10730

&& $block_type_to_go[$i_terminal] =~

10731

/$closing_side_comment_list_pattern/o

10732

10733

# .. but not an anonymous sub

10734

# These are not normally of interest, and their closing braces are

10735

# often followed by commas or semicolons anyway. This also avoids

10736

# possible erratic output due to line numbering inconsistencies

10737

# in the cases where their closing braces terminate a line.

10738

&& $block_type_to_go[$i_terminal] ne 'sub'

10739

10740

# ..and the corresponding opening brace must is not in this batch

10741

# (because we do not need to tag one-line blocks, although this

10742

# should also be caught with a positive -csci value)

10743

&& $mate_index_to_go[$i_terminal] < 0

10744

10745

# ..and either

10746

&& (

10747

10748

# this is the last token (line doesnt have a side comment)

10749

!$have_side_comment

10750

10751

# or the old side comment is a closing side comment

10752

|| $tokens_to_go[$max_index_to_go] =~

10753

/$closing_side_comment_prefix_pattern/o

10754

)

10755

)

10756

{

10757

10758

# then make the closing side comment text

10759

my $token =

10760

"$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]";

10761

10762

# append any extra descriptive text collected above

10763

if ( $i_block_leading_text == $i_terminal ) {

10764

$token .= $block_leading_text;

10765

}

10766

$token =~ s/\s*$//; # trim any trailing whitespace

10767

10768

# handle case of existing closing side comment

10769

if ($have_side_comment) {

10770

10771

# warn if requested and tokens differ significantly

10772

if ( $rOpts->{'closing-side-comment-warnings'} ) {

10773

my $old_csc = $tokens_to_go[$max_index_to_go];

10774

my $new_csc = $token;

10775

$new_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...'

10776

my $new_trailing_dots = $1;

10777

$old_csc =~ s/\.\.\.\s*$//;

10778

$new_csc =~ s/\s+//g; # trim all whitespace

10779

$old_csc =~ s/\s+//g;

10780

10781

# Patch to handle multiple closing side comments at

10782

# else and elsif's. These have become too complicated

10783

# to check, so if we see an indication of

10784

# '[ if' or '[ # elsif', then assume they were made

10785

# by perltidy.

10786

if ( $block_type_to_go[$i_terminal] eq 'else' ) {

10787

if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc }

10788

}

10789

elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) {

10790

if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc }

10791

}

10792

10793

# if old comment is contained in new comment,

10794

# only compare the common part.

10795

if ( length($new_csc) > length($old_csc) ) {

10796

$new_csc = substr( $new_csc, 0, length($old_csc) );

10797

}

10798

10799

# if the new comment is shorter and has been limited,

10800

# only compare the common part.

10801

if ( length($new_csc) < length($old_csc) && $new_trailing_dots )

10802

{

10803

$old_csc = substr( $old_csc, 0, length($new_csc) );

10804

}

10805

10806

# any remaining difference?

10807

if ( $new_csc ne $old_csc ) {

10808

10809

# just leave the old comment if we are below the threshold

10810

# for creating side comments

10811

if ( $block_line_count <

10812

$rOpts->{'closing-side-comment-interval'} )

10813

{

10814

$token = undef;

10815

}

10816

10817

# otherwise we'll make a note of it

10818

else {

10819

10820

warning(

10821

"perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n"

10822

);

10823

10824

# save the old side comment in a new trailing block comment

10825

my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ];

10826

$year += 1900;

10827

$month += 1;

10828

$cscw_block_comment =

10829

"## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]";

10830

}

10831

}

10832

else {

10833

10834

# No differences.. we can safely delete old comment if we

10835

# are below the threshold

10836

if ( $block_line_count <

10837

$rOpts->{'closing-side-comment-interval'} )

10838

{

10839

$token = undef;

10840

unstore_token_to_go()

10841

if ( $types_to_go[$max_index_to_go] eq '#' );

10842

unstore_token_to_go()

10843

if ( $types_to_go[$max_index_to_go] eq 'b' );

10844

}

10845

}

10846

}

10847

10848

# switch to the new csc (unless we deleted it!)

10849

$tokens_to_go[$max_index_to_go] = $token if $token;

10850

}

10851

10852

# handle case of NO existing closing side comment

10853

else {

10854

10855

# insert the new side comment into the output token stream

10856

my $type = '#';

10857

my $block_type = '';

10858

my $type_sequence = '';

10859

my $container_environment =

10860

$container_environment_to_go[$max_index_to_go];

10861

my $level = $levels_to_go[$max_index_to_go];

10862

my $slevel = $nesting_depth_to_go[$max_index_to_go];

10863

my $no_internal_newlines = 0;

10864

10865

my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go];

10866

my $ci_level = $ci_levels_to_go[$max_index_to_go];

10867

my $in_continued_quote = 0;

10868

10869

# first insert a blank token

10870

insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines );

10871

10872

# then the side comment

10873

insert_new_token_to_go( $token, $type, $slevel,

10874

$no_internal_newlines );

10875

}

10876

}

10877

return $cscw_block_comment;

10878

}

10879

10880

sub previous_nonblank_token {

10881

my ($i) = @_;

10882

my $name = "";

10883

my $im = $i - 1;

10884

return "" if ( $im < 0 );

10885

if ( $types_to_go[$im] eq 'b' ) { $im--; }

10886

return "" if ( $im < 0 );

10887

$name = $tokens_to_go[$im];

10888

10889

# prepend any sub name to an isolated -> to avoid unwanted alignments

10890

# [test case is test8/penco.pl]

10891

if ( $name eq '->' ) {

10892

$im--;

10893

if ( $im >= 0 && $types_to_go[$im] ne 'b' ) {

10894

$name = $tokens_to_go[$im] . $name;

10895

}

10896

}

10897

return $name;

10898

}

10899

10900

sub send_lines_to_vertical_aligner {

10901

10902

my ( $ri_first, $ri_last, $do_not_pad ) = @_;

10903

10904

my $rindentation_list = [0]; # ref to indentations for each line

10905

10906

# define the array @matching_token_to_go for the output tokens

10907

# which will be non-blank for each special token (such as =>)

10908

# for which alignment is required.

10909

set_vertical_alignment_markers( $ri_first, $ri_last );

10910

10911

# flush if necessary to avoid unwanted alignment

10912

my $must_flush = 0;

10913

if ( @$ri_first > 1 ) {

10914

10915

# flush before a long if statement

10916

if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) {

10917

$must_flush = 1;

10918

}

10919

}

10920

if ($must_flush) {

10921

Perl::Tidy::VerticalAligner::flush();

10922

}

10923

10924

set_logical_padding( $ri_first, $ri_last );

10925

10926

# loop to prepare each line for shipment

10927

my $n_last_line = @$ri_first - 1;

10928

my $in_comma_list;

10929

for my $n ( 0 .. $n_last_line ) {

10930

my $ibeg = $$ri_first[$n];

10931

my $iend = $$ri_last[$n];

10932

10933

my ( $rtokens, $rfields, $rpatterns ) =

10934

make_alignment_patterns( $ibeg, $iend );

10935

10936

my ( $indentation, $lev, $level_end, $terminal_type,

10937

$is_semicolon_terminated, $is_outdented_line )

10938

= set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns,

10939

$ri_first, $ri_last, $rindentation_list );

10940

10941

# we will allow outdenting of long lines..

10942

my $outdent_long_lines = (

10943

10944

# which are long quotes, if allowed

10945

( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} )

10946

10947

# which are long block comments, if allowed

10948

|| (

10949

$types_to_go[$ibeg] eq '#'

10950

&& $rOpts->{'outdent-long-comments'}

10951

10952

# but not if this is a static block comment

10953

&& !$is_static_block_comment

10954

)

10955

);

10956

10957

my $level_jump =

10958

$nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg];

10959

10960

my $rvertical_tightness_flags =

10961

set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend,

10962

$ri_first, $ri_last );

10963

10964

# flush an outdented line to avoid any unwanted vertical alignment

10965

Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);

10966

10967

my $is_terminal_ternary = 0;

10968

if ( $tokens_to_go[$ibeg] eq ':'

10969

|| $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' )

10970

{

10971

if ( ( $terminal_type eq ';' && $level_end <= $lev )

10972

|| ( $level_end < $lev ) )

10973

{

10974

$is_terminal_ternary = 1;

10975

}

10976

}

10977

10978

# send this new line down the pipe

10979

my $forced_breakpoint = $forced_breakpoint_to_go[$iend];

10980

Perl::Tidy::VerticalAligner::append_line(

10981

$lev,

10982

$level_end,

10983

$indentation,

10984

$rfields,

10985

$rtokens,

10986

$rpatterns,

10987

$forced_breakpoint_to_go[$iend] || $in_comma_list,

10988

$outdent_long_lines,

10989

$is_terminal_ternary,

10990

$is_semicolon_terminated,

10991

$do_not_pad,

10992

$rvertical_tightness_flags,

10993

$level_jump,

10994

);

10995

$in_comma_list =

10996

$tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend];

10997

10998

# flush an outdented line to avoid any unwanted vertical alignment

10999

Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line);

11000

11001

$do_not_pad = 0;

11002

11003

} # end of loop to output each line

11004

11005

# remember indentation of lines containing opening containers for

11006

# later use by sub set_adjusted_indentation

11007

save_opening_indentation( $ri_first, $ri_last, $rindentation_list );

11008

}

11009

11010

{ # begin make_alignment_patterns

11011

11012

my %block_type_map;

11013

my %keyword_map;

11014

11015

BEGIN {

11016

11017

# map related block names into a common name to

11018

# allow alignment

11019

%block_type_map = (

11020

'unless' => 'if',

11021

'else' => 'if',

11022

'elsif' => 'if',

11023

'when' => 'if',

11024

'default' => 'if',

11025

'case' => 'if',

11026

'sort' => 'map',

11027

'grep' => 'map',

11028

);

11029

11030

# map certain keywords to the same 'if' class to align

11031

# long if/elsif sequences. [elsif.pl]

11032

%keyword_map = (

11033

'unless' => 'if',

11034

'else' => 'if',

11035

'elsif' => 'if',

11036

'when' => 'given',

11037

'default' => 'given',

11038

'case' => 'switch',

11039

11040

# treat an 'undef' similar to numbers and quotes

11041

'undef' => 'Q',

11042

);

11043

}

11044

11045

sub make_alignment_patterns {

11046

11047

# Here we do some important preliminary work for the

11048

# vertical aligner. We create three arrays for one

11049

# output line. These arrays contain strings that can

11050

# be tested by the vertical aligner to see if

11051

# consecutive lines can be aligned vertically.

11052

#

11053

# The three arrays are indexed on the vertical

11054

# alignment fields and are:

11055

# @tokens - a list of any vertical alignment tokens for this line.

11056

# These are tokens, such as '=' '&&' '#' etc which

11057

# we want to might align vertically. These are

11058

# decorated with various information such as

11059

# nesting depth to prevent unwanted vertical

11060

# alignment matches.

11061

# @fields - the actual text of the line between the vertical alignment

11062

# tokens.

11063

# @patterns - a modified list of token types, one for each alignment

11064

# field. These should normally each match before alignment is

11065

# allowed, even when the alignment tokens match.

11066

my ( $ibeg, $iend ) = @_;

11067

my @tokens = ();

11068

my @fields = ();

11069

my @patterns = ();

11070

my $i_start = $ibeg;

11071

my $i;

11072

11073

my $depth = 0;

11074

my @container_name = ("");

11075

my @multiple_comma_arrows = (undef);

11076

11077

my $j = 0; # field index

11078

11079

$patterns[0] = "";

11080

for $i ( $ibeg .. $iend ) {

11081

11082

# Keep track of containers balanced on this line only.

11083

# These are used below to prevent unwanted cross-line alignments.

11084

# Unbalanced containers already avoid aligning across

11085

# container boundaries.

11086

if ( $tokens_to_go[$i] eq '(' ) {

11087

11088

# if container is balanced on this line...

11089

my $i_mate = $mate_index_to_go[$i];

11090

if ( $i_mate > $i && $i_mate <= $iend ) {

11091

$depth++;

11092

my $seqno = $type_sequence_to_go[$i];

11093

my $count = comma_arrow_count($seqno);

11094

$multiple_comma_arrows[$depth] = $count && $count > 1;

11095

11096

# Append the previous token name to make the container name

11097

# more unique. This name will also be given to any commas

11098

# within this container, and it helps avoid undesirable

11099

# alignments of different types of containers.

11100

my $name = previous_nonblank_token($i);

11101

$name =~ s/^->//;

11102

$container_name[$depth] = "+" . $name;

11103

11104

# Make the container name even more unique if necessary.

11105

# If we are not vertically aligning this opening paren,

11106

# append a character count to avoid bad alignment because

11107

# it usually looks bad to align commas within continers

11108

# for which the opening parens do not align. Here

11109

# is an example very BAD alignment of commas (because

11110

# the atan2 functions are not all aligned):

11111

# $XY =

11112

# $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) +

11113

# $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) -

11114

# $X * atan2( $X, 1 ) -

11115

# $Y * atan2( $Y, 1 );

11116

#

11117

# On the other hand, it is usually okay to align commas if

11118

# opening parens align, such as:

11119

# glVertex3d( $cx + $s * $xs, $cy, $z );

11120

# glVertex3d( $cx, $cy + $s * $ys, $z );

11121

# glVertex3d( $cx - $s * $xs, $cy, $z );

11122

# glVertex3d( $cx, $cy - $s * $ys, $z );

11123

#

11124

# To distinguish between these situations, we will

11125

# append the length of the line from the previous matching

11126

# token, or beginning of line, to the function name. This

11127

# will allow the vertical aligner to reject undesirable

11128

# matches.

11129

11130

# if we are not aligning on this paren...

11131

if ( $matching_token_to_go[$i] eq '' ) {

11132

11133

# Sum length from previous alignment, or start of line.

11134

# Note that we have to sum token lengths here because

11135

# padding has been done and so array $lengths_to_go

11136

# is now wrong.

11137

my $len =

11138

length(

11139

join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );

11140

$len += leading_spaces_to_go($i_start)

11141

if ( $i_start == $ibeg );

11142

11143

# tack length onto the container name to make unique

11144

$container_name[$depth] .= "-" . $len;

11145

}

11146

}

11147

}

11148

elsif ( $tokens_to_go[$i] eq ')' ) {

11149

$depth-- if $depth > 0;

11150

}

11151

11152

# if we find a new synchronization token, we are done with

11153

# a field

11154

if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) {

11155

11156

my $tok = my $raw_tok = $matching_token_to_go[$i];

11157

11158

# make separators in different nesting depths unique

11159

# by appending the nesting depth digit.

11160

if ( $raw_tok ne '#' ) {

11161

$tok .= "$nesting_depth_to_go[$i]";

11162

}

11163

11164

# also decorate commas with any container name to avoid

11165

# unwanted cross-line alignments.

11166

if ( $raw_tok eq ',' || $raw_tok eq '=>' ) {

11167

if ( $container_name[$depth] ) {

11168

$tok .= $container_name[$depth];

11169

}

11170

}

11171

11172

# Patch to avoid aligning leading and trailing if, unless.

11173

# Mark trailing if, unless statements with container names.

11174

# This makes them different from leading if, unless which

11175

# are not so marked at present. If we ever need to name

11176

# them too, we could use ci to distinguish them.

11177

# Example problem to avoid:

11178

# return ( 2, "DBERROR" )

11179

# if ( $retval == 2 );

11180

# if ( scalar @_ ) {

11181

# my ( $a, $b, $c, $d, $e, $f ) = @_;

11182

# }

11183

if ( $raw_tok eq '(' ) {

11184

my $ci = $ci_levels_to_go[$ibeg];

11185

if ( $container_name[$depth] =~ /^\+(if|unless)/

11186

&& $ci )

11187

{

11188

$tok .= $container_name[$depth];

11189

}

11190

}

11191

11192

# Decorate block braces with block types to avoid

11193

# unwanted alignments such as the following:

11194

# foreach ( @{$routput_array} ) { $fh->print($_) }

11195

# eval { $fh->close() };

11196

if ( $raw_tok eq '{' && $block_type_to_go[$i] ) {

11197

my $block_type = $block_type_to_go[$i];

11198

11199

# map certain related block types to allow

11200

# else blocks to align

11201

$block_type = $block_type_map{$block_type}

11202

if ( defined( $block_type_map{$block_type} ) );

11203

11204

# remove sub names to allow one-line sub braces to align

11205

# regardless of name

11206

if ( $block_type =~ /^sub / ) { $block_type = 'sub' }

11207

11208

# allow all control-type blocks to align

11209

if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' }

11210

11211

$tok .= $block_type;

11212

}

11213

11214

# concatenate the text of the consecutive tokens to form

11215

# the field

11216

push( @fields,

11217

join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) );

11218

11219

# store the alignment token for this field

11220

push( @tokens, $tok );

11221

11222

# get ready for the next batch

11223

$i_start = $i;

11224

$j++;

11225

$patterns[$j] = "";

11226

}

11227

11228

# continue accumulating tokens

11229

# handle non-keywords..

11230

if ( $types_to_go[$i] ne 'k' ) {

11231

my $type = $types_to_go[$i];

11232

11233

# Mark most things before arrows as a quote to

11234

# get them to line up. Testfile: mixed.pl.

11235

if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) {

11236

my $next_type = $types_to_go[ $i + 1 ];

11237

my $i_next_nonblank =

11238

( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );

11239

11240

if ( $types_to_go[$i_next_nonblank] eq '=>' ) {

11241

$type = 'Q';

11242

11243

# Patch to ignore leading minus before words,

11244

# by changing pattern 'mQ' into just 'Q',

11245

# so that we can align things like this:

11246

# Button => "Print letter \"~$_\"",

11247

# -command => [ sub { print "$_[0]\n" }, $_ ],

11248

if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" }

11249

}

11250

}

11251

11252

# patch to make numbers and quotes align

11253

if ( $type eq 'n' ) { $type = 'Q' }

11254

11255

# patch to ignore any ! in patterns

11256

if ( $type eq '!' ) { $type = '' }

11257

11258

$patterns[$j] .= $type;

11259

}

11260

11261

# for keywords we have to use the actual text

11262

else {

11263

11264

my $tok = $tokens_to_go[$i];

11265

11266

# but map certain keywords to a common string to allow

11267

# alignment.

11268

$tok = $keyword_map{$tok}

11269

if ( defined( $keyword_map{$tok} ) );

11270

$patterns[$j] .= $tok;

11271

}

11272

}

11273

11274

# done with this line .. join text of tokens to make the last field

11275

push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) );

11276

return ( \@tokens, \@fields, \@patterns );

11277

}

11278

11279

} # end make_alignment_patterns

11280

11281

{ # begin unmatched_indexes

11282

11283

# closure to keep track of unbalanced containers.

11284

# arrays shared by the routines in this block:

11285

my @unmatched_opening_indexes_in_this_batch;

11286

my @unmatched_closing_indexes_in_this_batch;

11287

my %comma_arrow_count;

11288

11289

sub is_unbalanced_batch {

11290

@unmatched_opening_indexes_in_this_batch +

11291

@unmatched_closing_indexes_in_this_batch;

11292

}

11293

11294

sub comma_arrow_count {

11295

my $seqno = $_[0];

11296

return $comma_arrow_count{$seqno};

11297

}

11298

11299

sub match_opening_and_closing_tokens {

11300

11301

# Match up indexes of opening and closing braces, etc, in this batch.

11302

# This has to be done after all tokens are stored because unstoring

11303

# of tokens would otherwise cause trouble.

11304

11305

@unmatched_opening_indexes_in_this_batch = ();

11306

@unmatched_closing_indexes_in_this_batch = ();

11307

%comma_arrow_count = ();

11308

11309

my ( $i, $i_mate, $token );

11310

foreach $i ( 0 .. $max_index_to_go ) {

11311

if ( $type_sequence_to_go[$i] ) {

11312

$token = $tokens_to_go[$i];

11313

if ( $token =~ /^[\(\[\{\?]$/ ) {

11314

push @unmatched_opening_indexes_in_this_batch, $i;

11315

}

11316

elsif ( $token =~ /^[\)\]\}\:]$/ ) {

11317

11318

$i_mate = pop @unmatched_opening_indexes_in_this_batch;

11319

if ( defined($i_mate) && $i_mate >= 0 ) {

11320

if ( $type_sequence_to_go[$i_mate] ==

11321

$type_sequence_to_go[$i] )

11322

{

11323

$mate_index_to_go[$i] = $i_mate;

11324

$mate_index_to_go[$i_mate] = $i;

11325

}

11326

else {

11327

push @unmatched_opening_indexes_in_this_batch,

11328

$i_mate;

11329

push @unmatched_closing_indexes_in_this_batch, $i;

11330

}

11331

}

11332

else {

11333

push @unmatched_closing_indexes_in_this_batch, $i;

11334

}

11335

}

11336

}

11337

elsif ( $tokens_to_go[$i] eq '=>' ) {

11338

if (@unmatched_opening_indexes_in_this_batch) {

11339

my $j = $unmatched_opening_indexes_in_this_batch[-1];

11340

my $seqno = $type_sequence_to_go[$j];

11341

$comma_arrow_count{$seqno}++;

11342

}

11343

}

11344

}

11345

}

11346

11347

sub save_opening_indentation {

11348

11349

# This should be called after each batch of tokens is output. It

11350

# saves indentations of lines of all unmatched opening tokens.

11351

# These will be used by sub get_opening_indentation.

11352

11353

my ( $ri_first, $ri_last, $rindentation_list ) = @_;

11354

11355

# we no longer need indentations of any saved indentations which

11356

# are unmatched closing tokens in this batch, because we will

11357

# never encounter them again. So we can delete them to keep

11358

# the hash size down.

11359

foreach (@unmatched_closing_indexes_in_this_batch) {

11360

my $seqno = $type_sequence_to_go[$_];

11361

delete $saved_opening_indentation{$seqno};

11362

}

11363

11364

# we need to save indentations of any unmatched opening tokens

11365

# in this batch because we may need them in a subsequent batch.

11366

foreach (@unmatched_opening_indexes_in_this_batch) {

11367

my $seqno = $type_sequence_to_go[$_];

11368

$saved_opening_indentation{$seqno} = [

11369

lookup_opening_indentation(

11370

$_, $ri_first, $ri_last, $rindentation_list

11371

)

11372

];

11373

}

11374

}

11375

} # end unmatched_indexes

11376

11377

sub get_opening_indentation {

11378

11379

# get the indentation of the line which output the opening token

11380

# corresponding to a given closing token in the current output batch.

11381

#

11382

# given:

11383

# $i_closing - index in this line of a closing token ')' '}' or ']'

11384

#

11385

# $ri_first - reference to list of the first index $i for each output

11386

# line in this batch

11387

# $ri_last - reference to list of the last index $i for each output line

11388

# in this batch

11389

# $rindentation_list - reference to a list containing the indentation

11390

# used for each line.

11391

#

11392

# return:

11393

# -the indentation of the line which contained the opening token

11394

# which matches the token at index $i_opening

11395

# -and its offset (number of columns) from the start of the line

11396

#

11397

my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_;

11398

11399

# first, see if the opening token is in the current batch

11400

my $i_opening = $mate_index_to_go[$i_closing];

11401

my ( $indent, $offset, $is_leading, $exists );

11402

$exists = 1;

11403

if ( $i_opening >= 0 ) {

11404

11405

# it is..look up the indentation

11406

( $indent, $offset, $is_leading ) =

11407

lookup_opening_indentation( $i_opening, $ri_first, $ri_last,

11408

$rindentation_list );

11409

}

11410

11411

# if not, it should have been stored in the hash by a previous batch

11412

else {

11413

my $seqno = $type_sequence_to_go[$i_closing];

11414

if ($seqno) {

11415

if ( $saved_opening_indentation{$seqno} ) {

11416

( $indent, $offset, $is_leading ) =

11417

@{ $saved_opening_indentation{$seqno} };

11418

}

11419

11420

# some kind of serious error

11421

# (example is badfile.t)

11422

else {

11423

$indent = 0;

11424

$offset = 0;

11425

$is_leading = 0;

11426

$exists = 0;

11427

}

11428

}

11429

11430

# if no sequence number it must be an unbalanced container

11431

else {

11432

$indent = 0;

11433

$offset = 0;

11434

$is_leading = 0;

11435

$exists = 0;

11436

}

11437

}

11438

return ( $indent, $offset, $is_leading, $exists );

11439

}

11440

11441

sub lookup_opening_indentation {

11442

11443

# get the indentation of the line in the current output batch

11444

# which output a selected opening token

11445

#

11446

# given:

11447

# $i_opening - index of an opening token in the current output batch

11448

# whose line indentation we need

11449

# $ri_first - reference to list of the first index $i for each output

11450

# line in this batch

11451

# $ri_last - reference to list of the last index $i for each output line

11452

# in this batch

11453

# $rindentation_list - reference to a list containing the indentation

11454

# used for each line. (NOTE: the first slot in

11455

# this list is the last returned line number, and this is

11456

# followed by the list of indentations).

11457

#

11458

# return

11459

# -the indentation of the line which contained token $i_opening

11460

# -and its offset (number of columns) from the start of the line

11461

11462

my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_;

11463

11464

my $nline = $rindentation_list->[0]; # line number of previous lookup

11465

11466

# reset line location if necessary

11467

$nline = 0 if ( $i_opening < $ri_start->[$nline] );

11468

11469

# find the correct line

11470

unless ( $i_opening > $ri_last->[-1] ) {

11471

while ( $i_opening > $ri_last->[$nline] ) { $nline++; }

11472

}

11473

11474

# error - token index is out of bounds - shouldn't happen

11475

else {

11476

warning(

11477

"non-fatal program bug in lookup_opening_indentation - index out of range\n"

11478

);

11479

report_definite_bug();

11480

$nline = $#{$ri_last};

11481

}

11482

11483

$rindentation_list->[0] =

11484

$nline; # save line number to start looking next call

11485

my $ibeg = $ri_start->[$nline];

11486

my $offset = token_sequence_length( $ibeg, $i_opening ) - 1;

11487

my $is_leading = ( $ibeg == $i_opening );

11488

return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading );

11489

}

11490

11491

{

11492

my %is_if_elsif_else_unless_while_until_for_foreach;

11493

11494

BEGIN {

11495

11496

# These block types may have text between the keyword and opening

11497

# curly. Note: 'else' does not, but must be included to allow trailing

11498

# if/elsif text to be appended.

11499

# patch for SWITCH/CASE: added 'case' and 'when'

11500

@_ = qw(if elsif else unless while until for foreach case when);

11501

@is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_);

11502

}

11503

11504

sub set_adjusted_indentation {

11505

11506

# This routine has the final say regarding the actual indentation of

11507

# a line. It starts with the basic indentation which has been

11508

# defined for the leading token, and then takes into account any

11509

# options that the user has set regarding special indenting and

11510

# outdenting.

11511

11512

my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last,

11513

$rindentation_list )

11514

= @_;

11515

11516

# we need to know the last token of this line

11517

my ( $terminal_type, $i_terminal ) =

11518

terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend );

11519

11520

my $is_outdented_line = 0;

11521

11522

my $is_semicolon_terminated = $terminal_type eq ';'

11523

&& $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg];

11524

11525

##########################################################

11526

# Section 1: set a flag and a default indentation

11527

#

11528

# Most lines are indented according to the initial token.

11529

# But it is common to outdent to the level just after the

11530

# terminal token in certain cases...

11531

# adjust_indentation flag:

11532

# 0 - do not adjust

11533

# 1 - outdent

11534

# 2 - vertically align with opening token

11535

# 3 - indent

11536

##########################################################

11537

my $adjust_indentation = 0;

11538

my $default_adjust_indentation = $adjust_indentation;

11539

11540

my (

11541

$opening_indentation, $opening_offset,

11542

$is_leading, $opening_exists

11543

);

11544

11545

# if we are at a closing token of some type..

11546

if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) {

11547

11548

# get the indentation of the line containing the corresponding

11549

# opening token

11550

(

11551

$opening_indentation, $opening_offset,

11552

$is_leading, $opening_exists

11553

)

11554

= get_opening_indentation( $ibeg, $ri_first, $ri_last,

11555

$rindentation_list );

11556

11557

# First set the default behavior:

11558

# default behavior is to outdent closing lines

11559

# of the form: "); }; ]; )->xxx;"

11560

if (

11561

$is_semicolon_terminated

11562

11563

# and 'cuddled parens' of the form: ")->pack("

11564

|| (

11565

$terminal_type eq '('

11566

&& $types_to_go[$ibeg] eq ')'

11567

&& ( $nesting_depth_to_go[$iend] + 1 ==

11568

$nesting_depth_to_go[$ibeg] )

11569

)

11570

)

11571

{

11572

$adjust_indentation = 1;

11573

}

11574

11575

# TESTING: outdent something like '),'

11576

if (

11577

$terminal_type eq ','

11578

11579

# allow just one character before the comma

11580

&& $i_terminal == $ibeg + 1

11581

11582

# requre LIST environment; otherwise, we may outdent too much --

11583

# this can happen in calls without parentheses (overload.t);

11584

&& $container_environment_to_go[$i_terminal] eq 'LIST'

11585

)

11586

{

11587

$adjust_indentation = 1;

11588

}

11589

11590

# undo continuation indentation of a terminal closing token if

11591

# it is the last token before a level decrease. This will allow

11592

# a closing token to line up with its opening counterpart, and

11593

# avoids a indentation jump larger than 1 level.

11594

if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/

11595

&& $i_terminal == $ibeg )

11596

{

11597

my $ci = $ci_levels_to_go[$ibeg];

11598

my $lev = $levels_to_go[$ibeg];

11599

my $next_type = $types_to_go[ $ibeg + 1 ];

11600

my $i_next_nonblank =

11601

( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 );

11602

if ( $i_next_nonblank <= $max_index_to_go

11603

&& $levels_to_go[$i_next_nonblank] < $lev )

11604

{

11605

$adjust_indentation = 1;

11606

}

11607

}

11608

11609

$default_adjust_indentation = $adjust_indentation;

11610

11611

# Now modify default behavior according to user request:

11612

# handle option to indent non-blocks of the form ); }; ];

11613

# But don't do special indentation to something like ')->pack('

11614

if ( !$block_type_to_go[$ibeg] ) {

11615

my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] };

11616

if ( $cti == 1 ) {

11617

if ( $i_terminal <= $ibeg + 1

11618

|| $is_semicolon_terminated )

11619

{

11620

$adjust_indentation = 2;

11621

}

11622

else {

11623

$adjust_indentation = 0;

11624

}

11625

}

11626

elsif ( $cti == 2 ) {

11627

if ($is_semicolon_terminated) {

11628

$adjust_indentation = 3;

11629

}

11630

else {

11631

$adjust_indentation = 0;

11632

}

11633

}

11634

elsif ( $cti == 3 ) {

11635

$adjust_indentation = 3;

11636

}

11637

}

11638

11639

# handle option to indent blocks

11640

else {

11641

if (

11642

$rOpts->{'indent-closing-brace'}

11643

&& (

11644

$i_terminal == $ibeg # isolated terminal '}'

11645

|| $is_semicolon_terminated

11646

)

11647

) # } xxxx ;

11648

{

11649

$adjust_indentation = 3;

11650

}

11651

}

11652

}

11653

11654

# if at ');', '};', '>;', and '];' of a terminal qw quote

11655

elsif ($$rpatterns[0] =~ /^qb*;$/

11656

&& $$rfields[0] =~ /^([\)\}\]\>]);$/ )

11657

{

11658

if ( $closing_token_indentation{$1} == 0 ) {

11659

$adjust_indentation = 1;

11660

}

11661

else {

11662

$adjust_indentation = 3;

11663

}

11664

}

11665

11666

# if line begins with a ':', align it with any

11667

# previous line leading with corresponding ?

11668

elsif ( $types_to_go[$ibeg] eq ':' ) {

11669

(

11670

$opening_indentation, $opening_offset,

11671

$is_leading, $opening_exists

11672

)

11673

= get_opening_indentation( $ibeg, $ri_first, $ri_last,

11674

$rindentation_list );

11675

if ($is_leading) { $adjust_indentation = 2; }

11676

}

11677

11678

##########################################################

11679

# Section 2: set indentation according to flag set above

11680

#

11681

# Select the indentation object to define leading

11682

# whitespace. If we are outdenting something like '} } );'

11683

# then we want to use one level below the last token

11684

# ($i_terminal) in order to get it to fully outdent through

11685

# all levels.

11686

##########################################################

11687

my $indentation;

11688

my $lev;

11689

my $level_end = $levels_to_go[$iend];

11690

11691

if ( $adjust_indentation == 0 ) {

11692

$indentation = $leading_spaces_to_go[$ibeg];

11693

$lev = $levels_to_go[$ibeg];

11694

}

11695

elsif ( $adjust_indentation == 1 ) {

11696

$indentation = $reduced_spaces_to_go[$i_terminal];

11697

$lev = $levels_to_go[$i_terminal];

11698

}

11699

11700

# handle indented closing token which aligns with opening token

11701

elsif ( $adjust_indentation == 2 ) {

11702

11703

# handle option to align closing token with opening token

11704

$lev = $levels_to_go[$ibeg];

11705

11706

# calculate spaces needed to align with opening token

11707

my $space_count =

11708

get_SPACES($opening_indentation) + $opening_offset;

11709

11710

# Indent less than the previous line.

11711

#

11712

# Problem: For -lp we don't exactly know what it was if there

11713

# were recoverable spaces sent to the aligner. A good solution

11714

# would be to force a flush of the vertical alignment buffer, so

11715

# that we would know. For now, this rule is used for -lp:

11716

#

11717

# When the last line did not start with a closing token we will

11718

# be optimistic that the aligner will recover everything wanted.

11719

#

11720

# This rule will prevent us from breaking a hierarchy of closing

11721

# tokens, and in a worst case will leave a closing paren too far

11722

# indented, but this is better than frequently leaving it not

11723

# indented enough.

11724

my $last_spaces = get_SPACES($last_indentation_written);

11725

if ( $last_leading_token !~ /^[\}\]\)]$/ ) {

11726

$last_spaces +=

11727

get_RECOVERABLE_SPACES($last_indentation_written);

11728

}

11729

11730

# reset the indentation to the new space count if it works

11731

# only options are all or none: nothing in-between looks good

11732

$lev = $levels_to_go[$ibeg];

11733

if ( $space_count < $last_spaces ) {

11734

if ($rOpts_line_up_parentheses) {

11735

my $lev = $levels_to_go[$ibeg];

11736

$indentation =

11737

new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );

11738

}

11739

else {

11740

$indentation = $space_count;

11741

}

11742

}

11743

11744

# revert to default if it doesnt work

11745

else {

11746

$space_count = leading_spaces_to_go($ibeg);

11747

if ( $default_adjust_indentation == 0 ) {

11748

$indentation = $leading_spaces_to_go[$ibeg];

11749

}

11750

elsif ( $default_adjust_indentation == 1 ) {

11751

$indentation = $reduced_spaces_to_go[$i_terminal];

11752

$lev = $levels_to_go[$i_terminal];

11753

}

11754

}

11755

}

11756

11757

# Full indentaion of closing tokens (-icb and -icp or -cti=2)

11758

else {

11759

11760

# handle -icb (indented closing code block braces)

11761

# Updated method for indented block braces: indent one full level if

11762

# there is no continuation indentation. This will occur for major

11763

# structures such as sub, if, else, but not for things like map

11764

# blocks.

11765

#

11766

# Note: only code blocks without continuation indentation are

11767

# handled here (if, else, unless, ..). In the following snippet,

11768

# the terminal brace of the sort block will have continuation

11769

# indentation as shown so it will not be handled by the coding

11770

# here. We would have to undo the continuation indentation to do

11771

# this, but it probably looks ok as is. This is a possible future

11772

# update for semicolon terminated lines.

11773

#

11774

# if ($sortby eq 'date' or $sortby eq 'size') {

11775

# @files = sort {

11776

# $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby}

11777

# or $a cmp $b

11778

# } @files;

11779

# }

11780

#

11781

if ( $block_type_to_go[$ibeg]

11782

&& $ci_levels_to_go[$i_terminal] == 0 )

11783

{

11784

my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] );

11785

$indentation = $spaces + $rOpts_indent_columns;

11786

11787

# NOTE: for -lp we could create a new indentation object, but

11788

# there is probably no need to do it

11789

}

11790

11791

# handle -icp and any -icb block braces which fall through above

11792

# test such as the 'sort' block mentioned above.

11793

else {

11794

11795

# There are currently two ways to handle -icp...

11796

# One way is to use the indentation of the previous line:

11797

# $indentation = $last_indentation_written;

11798

11799

# The other way is to use the indentation that the previous line

11800

# would have had if it hadn't been adjusted:

11801

$indentation = $last_unadjusted_indentation;

11802

11803

# Current method: use the minimum of the two. This avoids

11804

# inconsistent indentation.

11805

if ( get_SPACES($last_indentation_written) <

11806

get_SPACES($indentation) )

11807

{

11808

$indentation = $last_indentation_written;

11809

}

11810

}

11811

11812

# use previous indentation but use own level

11813

# to cause list to be flushed properly

11814

$lev = $levels_to_go[$ibeg];

11815

}

11816

11817

# remember indentation except for multi-line quotes, which get

11818

# no indentation

11819

unless ( $ibeg == 0 && $starting_in_quote ) {

11820

$last_indentation_written = $indentation;

11821

$last_unadjusted_indentation = $leading_spaces_to_go[$ibeg];

11822

$last_leading_token = $tokens_to_go[$ibeg];

11823

}

11824

11825

# be sure lines with leading closing tokens are not outdented more

11826

# than the line which contained the corresponding opening token.

11827

11828

#############################################################

11829

# updated per bug report in alex_bug.pl: we must not

11830

# mess with the indentation of closing logical braces so

11831

# we must treat something like '} else {' as if it were

11832

# an isolated brace my $is_isolated_block_brace = (

11833

# $iend == $ibeg ) && $block_type_to_go[$ibeg];

11834

#############################################################

11835

my $is_isolated_block_brace = $block_type_to_go[$ibeg]

11836

&& ( $iend == $ibeg

11837

|| $is_if_elsif_else_unless_while_until_for_foreach{

11838

$block_type_to_go[$ibeg] } );

11839

11840

# only do this for a ':; which is aligned with its leading '?'

11841

my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading;

11842

if ( defined($opening_indentation)

11843

&& !$is_isolated_block_brace

11844

&& !$is_unaligned_colon )

11845

{

11846

if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) {

11847

$indentation = $opening_indentation;

11848

}

11849

}

11850

11851

# remember the indentation of each line of this batch

11852

push @{$rindentation_list}, $indentation;

11853

11854

# outdent lines with certain leading tokens...

11855

if (

11856

11857

# must be first word of this batch

11858

$ibeg == 0

11859

11860

# and ...

11861

&& (

11862

11863

# certain leading keywords if requested

11864

(

11865

$rOpts->{'outdent-keywords'}

11866

&& $types_to_go[$ibeg] eq 'k'

11867

&& $outdent_keyword{ $tokens_to_go[$ibeg] }

11868

)

11869

11870

# or labels if requested

11871

|| ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' )

11872

11873

# or static block comments if requested

11874

|| ( $types_to_go[$ibeg] eq '#'

11875

&& $rOpts->{'outdent-static-block-comments'}

11876

&& $is_static_block_comment )

11877

)

11878

)

11879

11880

{

11881

my $space_count = leading_spaces_to_go($ibeg);

11882

if ( $space_count > 0 ) {

11883

$space_count -= $rOpts_continuation_indentation;

11884

$is_outdented_line = 1;

11885

if ( $space_count < 0 ) { $space_count = 0 }

11886

11887

# do not promote a spaced static block comment to non-spaced;

11888

# this is not normally necessary but could be for some

11889

# unusual user inputs (such as -ci = -i)

11890

if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) {

11891

$space_count = 1;

11892

}

11893

11894

if ($rOpts_line_up_parentheses) {

11895

$indentation =

11896

new_lp_indentation_item( $space_count, $lev, 0, 0, 0 );

11897

}

11898

else {

11899

$indentation = $space_count;

11900

}

11901

}

11902

}

11903

11904

return ( $indentation, $lev, $level_end, $terminal_type,

11905

$is_semicolon_terminated, $is_outdented_line );

11906

}

11907

}

11908

11909

sub set_vertical_tightness_flags {

11910

11911

my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_;

11912

11913

# Define vertical tightness controls for the nth line of a batch.

11914

# We create an array of parameters which tell the vertical aligner

11915

# if we should combine this line with the next line to achieve the

11916

# desired vertical tightness. The array of parameters contains:

11917

#

11918

# [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace

11919

# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok

11920

# if closing: spaces of padding to use

11921

# [2] sequence number of container

11922

# [3] valid flag: do not append if this flag is false. Will be

11923

# true if appropriate -vt flag is set. Otherwise, Will be

11924

# made true only for 2 line container in parens with -lp

11925

#

11926

# These flags are used by sub set_leading_whitespace in

11927

# the vertical aligner

11928

11929

my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ];

11930

11931

# For non-BLOCK tokens, we will need to examine the next line

11932

# too, so we won't consider the last line.

11933

if ( $n < $n_last_line ) {

11934

11935

# see if last token is an opening token...not a BLOCK...

11936

my $ibeg_next = $$ri_first[ $n + 1 ];

11937

my $token_end = $tokens_to_go[$iend];

11938

my $iend_next = $$ri_last[ $n + 1 ];

11939

if (

11940

$type_sequence_to_go[$iend]

11941

&& !$block_type_to_go[$iend]

11942

&& $is_opening_token{$token_end}

11943

&& (

11944

$opening_vertical_tightness{$token_end} > 0

11945

11946

# allow 2-line method call to be closed up

11947

|| ( $rOpts_line_up_parentheses

11948

&& $token_end eq '('

11949

&& $iend > $ibeg

11950

&& $types_to_go[ $iend - 1 ] ne 'b' )

11951

)

11952

)

11953

{

11954

11955

# avoid multiple jumps in nesting depth in one line if

11956

# requested

11957

my $ovt = $opening_vertical_tightness{$token_end};

11958

my $iend_next = $$ri_last[ $n + 1 ];

11959

unless (

11960

$ovt < 2

11961

&& ( $nesting_depth_to_go[ $iend_next + 1 ] !=

11962

$nesting_depth_to_go[$ibeg_next] )

11963

)

11964

{

11965

11966

# If -vt flag has not been set, mark this as invalid

11967

# and aligner will validate it if it sees the closing paren

11968

# within 2 lines.

11969

my $valid_flag = $ovt;

11970

@{$rvertical_tightness_flags} =

11971

( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag );

11972

}

11973

}

11974

11975

# see if first token of next line is a closing token...

11976

# ..and be sure this line does not have a side comment

11977

my $token_next = $tokens_to_go[$ibeg_next];

11978

if ( $type_sequence_to_go[$ibeg_next]

11979

&& !$block_type_to_go[$ibeg_next]

11980

&& $is_closing_token{$token_next}

11981

&& $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen!

11982

{

11983

my $ovt = $opening_vertical_tightness{$token_next};

11984

my $cvt = $closing_vertical_tightness{$token_next};

11985

if (

11986

11987

# never append a trailing line like )->pack(

11988

# because it will throw off later alignment

11989

(

11990

$nesting_depth_to_go[$ibeg_next] ==

11991

$nesting_depth_to_go[ $iend_next + 1 ] + 1

11992

)

11993

&& (

11994

$cvt == 2

11995

|| (

11996

$container_environment_to_go[$ibeg_next] ne 'LIST'

11997

&& (

11998

$cvt == 1

11999

12000

# allow closing up 2-line method calls

12001

|| ( $rOpts_line_up_parentheses

12002

&& $token_next eq ')' )

12003

)

12004

)

12005

)

12006

)

12007

{

12008

12009

# decide which trailing closing tokens to append..

12010

my $ok = 0;

12011

if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 }

12012

else {

12013

my $str = join( '',

12014

@types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] );

12015

12016

# append closing token if followed by comment or ';'

12017

if ( $str =~ /^b?[#;]/ ) { $ok = 1 }

12018

}

12019

12020

if ($ok) {

12021

my $valid_flag = $cvt;

12022

@{$rvertical_tightness_flags} = (

12023

2,

12024

$tightness{$token_next} == 2 ? 0 : 1,

12025

$type_sequence_to_go[$ibeg_next], $valid_flag,

12026

);

12027

}

12028

}

12029

}

12030

12031

# Opening Token Right

12032

# If requested, move an isolated trailing opening token to the end of

12033

# the previous line which ended in a comma. We could do this

12034

# in sub recombine_breakpoints but that would cause problems

12035

# with -lp formatting. The problem is that indentation will

12036

# quickly move far to the right in nested expressions. By

12037

# doing it after indentation has been set, we avoid changes

12038

# to the indentation. Actual movement of the token takes place

12039

# in sub write_leader_and_string.

12040

if (

12041

$opening_token_right{ $tokens_to_go[$ibeg_next] }

12042

12043

# previous line is not opening

12044

# (use -sot to combine with it)

12045

&& !$is_opening_token{$token_end}

12046

12047

# previous line ended in one of these

12048

# (add other cases if necessary; '=>' and '.' are not necessary

12049

##&& ($is_opening_token{$token_end} || $token_end eq ',')

12050

&& !$block_type_to_go[$ibeg_next]

12051

12052

# this is a line with just an opening token

12053

&& ( $iend_next == $ibeg_next

12054

|| $iend_next == $ibeg_next + 2

12055

&& $types_to_go[$iend_next] eq '#' )

12056

12057

# looks bad if we align vertically with the wrong container

12058

&& $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next]

12059

)

12060

{

12061

my $valid_flag = 1;

12062

my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;

12063

@{$rvertical_tightness_flags} =

12064

( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, );

12065

}

12066

12067

# Stacking of opening and closing tokens

12068

my $stackable;

12069

my $token_beg_next = $tokens_to_go[$ibeg_next];

12070

12071

# patch to make something like 'qw(' behave like an opening paren

12072

# (aran.t)

12073

if ( $types_to_go[$ibeg_next] eq 'q' ) {

12074

if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) {

12075

$token_beg_next = $1;

12076

}

12077

}

12078

12079

if ( $is_closing_token{$token_end}

12080

&& $is_closing_token{$token_beg_next} )

12081

{

12082

$stackable = $stack_closing_token{$token_beg_next}

12083

unless ( $block_type_to_go[$ibeg_next] )

12084

; # shouldn't happen; just checking

12085

}

12086

elsif ($is_opening_token{$token_end}

12087

&& $is_opening_token{$token_beg_next} )

12088

{

12089

$stackable = $stack_opening_token{$token_beg_next}

12090

unless ( $block_type_to_go[$ibeg_next] )

12091

; # shouldn't happen; just checking

12092

}

12093

12094

if ($stackable) {

12095

12096

my $is_semicolon_terminated;

12097

if ( $n + 1 == $n_last_line ) {

12098

my ( $terminal_type, $i_terminal ) = terminal_type(

12099

\@types_to_go, \@block_type_to_go,

12100

$ibeg_next, $iend_next

12101

);

12102

$is_semicolon_terminated = $terminal_type eq ';'

12103

&& $nesting_depth_to_go[$iend_next] <

12104

$nesting_depth_to_go[$ibeg_next];

12105

}

12106

12107

# this must be a line with just an opening token

12108

# or end in a semicolon

12109

if (

12110

$is_semicolon_terminated

12111

|| ( $iend_next == $ibeg_next

12112

|| $iend_next == $ibeg_next + 2

12113

&& $types_to_go[$iend_next] eq '#' )

12114

)

12115

{

12116

my $valid_flag = 1;

12117

my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0;

12118

@{$rvertical_tightness_flags} =

12119

( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag,

12120

);

12121

}

12122

}

12123

}

12124

12125

# Check for a last line with isolated opening BLOCK curly

12126

elsif ($rOpts_block_brace_vertical_tightness

12127

&& $ibeg eq $iend

12128

&& $types_to_go[$iend] eq '{'

12129

&& $block_type_to_go[$iend] =~

12130

/$block_brace_vertical_tightness_pattern/o )

12131

{

12132

@{$rvertical_tightness_flags} =

12133

( 3, $rOpts_block_brace_vertical_tightness, 0, 1 );

12134

}

12135

12136

# pack in the sequence numbers of the ends of this line

12137

$rvertical_tightness_flags->[4] = get_seqno($ibeg);

12138

$rvertical_tightness_flags->[5] = get_seqno($iend);

12139

return $rvertical_tightness_flags;

12140

}

12141

12142

sub get_seqno {

12143

12144

# get opening and closing sequence numbers of a token for the vertical

12145

# aligner. Assign qw quotes a value to allow qw opening and closing tokens

12146

# to be treated somewhat like opening and closing tokens for stacking

12147

# tokens by the vertical aligner.

12148

my ($ii) = @_;

12149

my $seqno = $type_sequence_to_go[$ii];

12150

if ( $types_to_go[$ii] eq 'q' ) {

12151

my $SEQ_QW = -1;

12152

if ( $ii > 0 ) {

12153

$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ );

12154

}

12155

else {

12156

if ( !$ending_in_quote ) {

12157

$seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ );

12158

}

12159

}

12160

}

12161

return ($seqno);

12162

}

12163

12164

{

12165

my %is_vertical_alignment_type;

12166

my %is_vertical_alignment_keyword;

12167

12168

BEGIN {

12169

12170

@_ = qw#

12171

= **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=

12172

{ ? : => =~ && || // ~~ !~~

12173

#;

12174

@is_vertical_alignment_type{@_} = (1) x scalar(@_);

12175

12176

@_ = qw(if unless and or err eq ne for foreach while until);

12177

@is_vertical_alignment_keyword{@_} = (1) x scalar(@_);

12178

}

12179

12180

sub set_vertical_alignment_markers {

12181

12182

# This routine takes the first step toward vertical alignment of the

12183

# lines of output text. It looks for certain tokens which can serve as

12184

# vertical alignment markers (such as an '=').

12185

#

12186

# Method: We look at each token $i in this output batch and set

12187

# $matching_token_to_go[$i] equal to those tokens at which we would

12188

# accept vertical alignment.

12189

12190

# nothing to do if we aren't allowed to change whitespace

12191

if ( !$rOpts_add_whitespace ) {

12192

for my $i ( 0 .. $max_index_to_go ) {

12193

$matching_token_to_go[$i] = '';

12194

}

12195

return;

12196

}

12197

12198

my ( $ri_first, $ri_last ) = @_;

12199

12200

# remember the index of last nonblank token before any sidecomment

12201

my $i_terminal = $max_index_to_go;

12202

if ( $types_to_go[$i_terminal] eq '#' ) {

12203

if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) {

12204

if ( $i_terminal > 0 ) { --$i_terminal }

12205

}

12206

}

12207

12208

# look at each line of this batch..

12209

my $last_vertical_alignment_before_index;

12210

my $vert_last_nonblank_type;

12211

my $vert_last_nonblank_token;

12212

my $vert_last_nonblank_block_type;

12213

my $max_line = @$ri_first - 1;

12214

my ( $i, $type, $token, $block_type, $alignment_type );

12215

my ( $ibeg, $iend, $line );

12216

12217

foreach $line ( 0 .. $max_line ) {

12218

$ibeg = $$ri_first[$line];

12219

$iend = $$ri_last[$line];

12220

$last_vertical_alignment_before_index = -1;

12221

$vert_last_nonblank_type = '';

12222

$vert_last_nonblank_token = '';

12223

$vert_last_nonblank_block_type = '';

12224

12225

# look at each token in this output line..

12226

foreach $i ( $ibeg .. $iend ) {

12227

$alignment_type = '';

12228

$type = $types_to_go[$i];

12229

$block_type = $block_type_to_go[$i];

12230

$token = $tokens_to_go[$i];

12231

12232

# check for flag indicating that we should not align

12233

# this token

12234

if ( $matching_token_to_go[$i] ) {

12235

$matching_token_to_go[$i] = '';

12236

next;

12237

}

12238

12239

#--------------------------------------------------------

12240

# First see if we want to align BEFORE this token

12241

#--------------------------------------------------------

12242

12243

# The first possible token that we can align before

12244

# is index 2 because: 1) it doesn't normally make sense to

12245

# align before the first token and 2) the second

12246

# token must be a blank if we are to align before

12247

# the third

12248

if ( $i < $ibeg + 2 ) { }

12249

12250

# must follow a blank token

12251

elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { }

12252

12253

# align a side comment --

12254

elsif ( $type eq '#' ) {

12255

12256

unless (

12257

12258

# it is a static side comment

12259

(

12260

$rOpts->{'static-side-comments'}

12261

&& $token =~ /$static_side_comment_pattern/o

12262

)

12263

12264

# or a closing side comment

12265

|| ( $vert_last_nonblank_block_type

12266

&& $token =~

12267

/$closing_side_comment_prefix_pattern/o )

12268

)

12269

{

12270

$alignment_type = $type;

12271

} ## Example of a static side comment

12272

}

12273

12274

# otherwise, do not align two in a row to create a

12275

# blank field

12276

elsif ( $last_vertical_alignment_before_index == $i - 2 ) { }

12277

12278

# align before one of these keywords

12279

# (within a line, since $i>1)

12280

elsif ( $type eq 'k' ) {

12281

12282

# /^(if|unless|and|or|eq|ne)$/

12283

if ( $is_vertical_alignment_keyword{$token} ) {

12284

$alignment_type = $token;

12285

}

12286

}

12287

12288

# align before one of these types..

12289

# Note: add '.' after new vertical aligner is operational

12290

elsif ( $is_vertical_alignment_type{$type} ) {

12291

$alignment_type = $token;

12292

12293

# Do not align a terminal token. Although it might

12294

# occasionally look ok to do this, it has been found to be

12295

# a good general rule. The main problems are:

12296

# (1) that the terminal token (such as an = or :) might get

12297

# moved far to the right where it is hard to see because

12298

# nothing follows it, and

12299

# (2) doing so may prevent other good alignments.

12300

if ( $i == $iend || $i >= $i_terminal ) {

12301

$alignment_type = "";

12302

}

12303

12304

# Do not align leading ': (' or '. ('. This would prevent

12305

# alignment in something like the following:

12306

# $extra_space .=

12307

# ( $input_line_number < 10 ) ? " "

12308

# : ( $input_line_number < 100 ) ? " "

12309

# : "";

12310

# or

12311

# $code =

12312

# ( $case_matters ? $accessor : " lc($accessor) " )

12313

# . ( $yesno ? " eq " : " ne " )

12314

if ( $i == $ibeg + 2

12315

&& $types_to_go[$ibeg] =~ /^[\.\:]$/

12316

&& $types_to_go[ $i - 1 ] eq 'b' )

12317

{

12318

$alignment_type = "";

12319

}

12320

12321

# For a paren after keyword, only align something like this:

12322

# if ( $a ) { &a }

12323

# elsif ( $b ) { &b }

12324

if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) {

12325

$alignment_type = ""

12326

unless $vert_last_nonblank_token =~

12327

/^(if|unless|elsif)$/;

12328

}

12329

12330

# be sure the alignment tokens are unique

12331

# This didn't work well: reason not determined

12332

# if ($token ne $type) {$alignment_type .= $type}

12333

}

12334

12335

# NOTE: This is deactivated because it causes the previous

12336

# if/elsif alignment to fail

12337

#elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i])

12338

#{ $alignment_type = $type; }

12339

12340

if ($alignment_type) {

12341

$last_vertical_alignment_before_index = $i;

12342

}

12343

12344

#--------------------------------------------------------

12345

# Next see if we want to align AFTER the previous nonblank

12346

#--------------------------------------------------------

12347

12348

# We want to line up ',' and interior ';' tokens, with the added

12349

# space AFTER these tokens. (Note: interior ';' is included

12350

# because it may occur in short blocks).

12351

if (

12352

12353

# we haven't already set it

12354

!$alignment_type

12355

12356

# and its not the first token of the line

12357

&& ( $i > $ibeg )

12358

12359

# and it follows a blank

12360

&& $types_to_go[ $i - 1 ] eq 'b'

12361

12362

# and previous token IS one of these:

12363

&& ( $vert_last_nonblank_type =~ /^[\,\;]$/ )

12364

12365

# and it's NOT one of these

12366

&& ( $type !~ /^[b\#\)\]\}]$/ )

12367

12368

# then go ahead and align

12369

)

12370

12371

{

12372

$alignment_type = $vert_last_nonblank_type;

12373

}

12374

12375

#--------------------------------------------------------

12376

# then store the value

12377

#--------------------------------------------------------

12378

$matching_token_to_go[$i] = $alignment_type;

12379

if ( $type ne 'b' ) {

12380

$vert_last_nonblank_type = $type;

12381

$vert_last_nonblank_token = $token;

12382

$vert_last_nonblank_block_type = $block_type;

12383

}

12384

}

12385

}

12386

}

12387

}

12388

12389

sub terminal_type {

12390

12391

# returns type of last token on this line (terminal token), as follows:

12392

# returns # for a full-line comment

12393

# returns ' ' for a blank line

12394

# otherwise returns final token type

12395

12396

my ( $rtype, $rblock_type, $ibeg, $iend ) = @_;

12397

12398

# check for full-line comment..

12399

if ( $$rtype[$ibeg] eq '#' ) {

12400

return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg];

12401

}

12402

else {

12403

12404

# start at end and walk bakwards..

12405

for ( my $i = $iend ; $i >= $ibeg ; $i-- ) {

12406

12407

# skip past any side comment and blanks

12408

next if ( $$rtype[$i] eq 'b' );

12409

next if ( $$rtype[$i] eq '#' );

12410

12411

# found it..make sure it is a BLOCK termination,

12412

# but hide a terminal } after sort/grep/map because it is not

12413

# necessarily the end of the line. (terminal.t)

12414

my $terminal_type = $$rtype[$i];

12415

if (

12416

$terminal_type eq '}'

12417

&& ( !$$rblock_type[$i]

12418

|| ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) )

12419

)

12420

{

12421

$terminal_type = 'b';

12422

}

12423

return wantarray ? ( $terminal_type, $i ) : $terminal_type;

12424

}

12425

12426

# empty line

12427

return wantarray ? ( ' ', $ibeg ) : ' ';

12428

}

12429

}

12430

12431

{

12432

my %is_good_keyword_breakpoint;

12433

my %is_lt_gt_le_ge;

12434

12435

sub set_bond_strengths {

12436

12437

BEGIN {

12438

12439

@_ = qw(if unless while until for foreach);

12440

@is_good_keyword_breakpoint{@_} = (1) x scalar(@_);

12441

12442

@_ = qw(lt gt le ge);

12443

@is_lt_gt_le_ge{@_} = (1) x scalar(@_);

12444

12445

###############################################################

12446

# NOTE: NO_BREAK's set here are HINTS which may not be honored;

12447

# essential NO_BREAKS's must be enforced in section 2, below.

12448

###############################################################

12449

12450

# adding NEW_TOKENS: add a left and right bond strength by

12451

# mimmicking what is done for an existing token type. You

12452

# can skip this step at first and take the default, then

12453

# tweak later to get desired results.

12454

12455

# The bond strengths should roughly follow precenence order where

12456

# possible. If you make changes, please check the results very

12457

# carefully on a variety of scripts.

12458

12459

# no break around possible filehandle

12460

$left_bond_strength{'Z'} = NO_BREAK;

12461

$right_bond_strength{'Z'} = NO_BREAK;

12462

12463

# never put a bare word on a new line:

12464

# example print (STDERR, "bla"); will fail with break after (

12465

$left_bond_strength{'w'} = NO_BREAK;

12466

12467

# blanks always have infinite strength to force breaks after real tokens

12468

$right_bond_strength{'b'} = NO_BREAK;

12469

12470

# try not to break on exponentation

12471

@_ = qw" ** .. ... <=> ";

12472

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12473

@right_bond_strength{@_} = (STRONG) x scalar(@_);

12474

12475

# The comma-arrow has very low precedence but not a good break point

12476

$left_bond_strength{'=>'} = NO_BREAK;

12477

$right_bond_strength{'=>'} = NOMINAL;

12478

12479

# ok to break after label

12480

$left_bond_strength{'J'} = NO_BREAK;

12481

$right_bond_strength{'J'} = NOMINAL;

12482

$left_bond_strength{'j'} = STRONG;

12483

$right_bond_strength{'j'} = STRONG;

12484

$left_bond_strength{'A'} = STRONG;

12485

$right_bond_strength{'A'} = STRONG;

12486

12487

$left_bond_strength{'->'} = STRONG;

12488

$right_bond_strength{'->'} = VERY_STRONG;

12489

12490

# breaking AFTER modulus operator is ok:

12491

@_ = qw" % ";

12492

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12493

@right_bond_strength{@_} =

12494

( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_);

12495

12496

# Break AFTER math operators * and /

12497

@_ = qw" * / x ";

12498

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12499

@right_bond_strength{@_} = (NOMINAL) x scalar(@_);

12500

12501

# Break AFTER weakest math operators + and -

12502

# Make them weaker than * but a bit stronger than '.'

12503

@_ = qw" + - ";

12504

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12505

@right_bond_strength{@_} =

12506

( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_);

12507

12508

# breaking BEFORE these is just ok:

12509

@_ = qw" >> << ";

12510

@right_bond_strength{@_} = (STRONG) x scalar(@_);

12511

@left_bond_strength{@_} = (NOMINAL) x scalar(@_);

12512

12513

# breaking before the string concatenation operator seems best

12514

# because it can be hard to see at the end of a line

12515

$right_bond_strength{'.'} = STRONG;

12516

$left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK;

12517

12518

@_ = qw"} ] ) ";

12519

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12520

@right_bond_strength{@_} = (NOMINAL) x scalar(@_);

12521

12522

# make these a little weaker than nominal so that they get

12523

# favored for end-of-line characters

12524

@_ = qw"!= == =~ !~ ~~ !~~";

12525

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12526

@right_bond_strength{@_} =

12527

( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_);

12528

12529

# break AFTER these

12530

@_ = qw" < > | & >= <=";

12531

@left_bond_strength{@_} = (VERY_STRONG) x scalar(@_);

12532

@right_bond_strength{@_} =

12533

( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_);

12534

12535

# breaking either before or after a quote is ok

12536

# but bias for breaking before a quote

12537

$left_bond_strength{'Q'} = NOMINAL;

12538

$right_bond_strength{'Q'} = NOMINAL + 0.02;

12539

$left_bond_strength{'q'} = NOMINAL;

12540

$right_bond_strength{'q'} = NOMINAL;

12541

12542

# starting a line with a keyword is usually ok

12543

$left_bond_strength{'k'} = NOMINAL;

12544

12545

# we usually want to bond a keyword strongly to what immediately

12546

# follows, rather than leaving it stranded at the end of a line

12547

$right_bond_strength{'k'} = STRONG;

12548

12549

$left_bond_strength{'G'} = NOMINAL;

12550

$right_bond_strength{'G'} = STRONG;

12551

12552

# it is good to break AFTER various assignment operators

12553

@_ = qw(

12554

= **= += *= &= <<= &&=

12555

-= /= |= >>= ||= //=

12556

.= %= ^=

12557

x=

12558

);

12559

@left_bond_strength{@_} = (STRONG) x scalar(@_);

12560

@right_bond_strength{@_} =

12561

( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_);

12562

12563

# break BEFORE '&&' and '||' and '//'

12564

# set strength of '||' to same as '=' so that chains like

12565

# $a = $b || $c || $d will break before the first '||'

12566

$right_bond_strength{'||'} = NOMINAL;

12567

$left_bond_strength{'||'} = $right_bond_strength{'='};

12568

12569

# same thing for '//'

12570

$right_bond_strength{'//'} = NOMINAL;

12571

$left_bond_strength{'//'} = $right_bond_strength{'='};

12572

12573

# set strength of && a little higher than ||

12574

$right_bond_strength{'&&'} = NOMINAL;

12575

$left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1;

12576

12577

$left_bond_strength{';'} = VERY_STRONG;

12578

$right_bond_strength{';'} = VERY_WEAK;

12579

$left_bond_strength{'f'} = VERY_STRONG;

12580

12581

# make right strength of for ';' a little less than '='

12582

# to make for contents break after the ';' to avoid this:

12583

# for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=

12584

# $number_of_fields )

12585

# and make it weaker than ',' and 'and' too

12586

$right_bond_strength{'f'} = VERY_WEAK - 0.03;

12587

12588

# The strengths of ?/: should be somewhere between

12589

# an '=' and a quote (NOMINAL),

12590

# make strength of ':' slightly less than '?' to help

12591

# break long chains of ? : after the colons

12592

$left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL;

12593

$right_bond_strength{':'} = NO_BREAK;

12594

$left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01;

12595

$right_bond_strength{'?'} = NO_BREAK;

12596

12597

$left_bond_strength{','} = VERY_STRONG;

12598

$right_bond_strength{','} = VERY_WEAK;

12599

12600

# Set bond strengths of certain keywords

12601

# make 'or', 'err', 'and' slightly weaker than a ','

12602

$left_bond_strength{'and'} = VERY_WEAK - 0.01;

12603

$left_bond_strength{'or'} = VERY_WEAK - 0.02;

12604

$left_bond_strength{'err'} = VERY_WEAK - 0.02;

12605

$left_bond_strength{'xor'} = NOMINAL;

12606

$right_bond_strength{'and'} = NOMINAL;

12607

$right_bond_strength{'or'} = NOMINAL;

12608

$right_bond_strength{'err'} = NOMINAL;

12609

$right_bond_strength{'xor'} = STRONG;

12610

}

12611

12612

# patch-its always ok to break at end of line

12613

$nobreak_to_go[$max_index_to_go] = 0;

12614

12615

# adding a small 'bias' to strengths is a simple way to make a line

12616

# break at the first of a sequence of identical terms. For example,

12617

# to force long string of conditional operators to break with

12618

# each line ending in a ':', we can add a small number to the bond

12619

# strength of each ':'

12620

my $colon_bias = 0;

12621

my $amp_bias = 0;

12622

my $bar_bias = 0;

12623

my $and_bias = 0;

12624

my $or_bias = 0;

12625

my $dot_bias = 0;

12626

my $f_bias = 0;

12627

my $code_bias = -.01;

12628

my $type = 'b';

12629

my $token = ' ';

12630

my $last_type;

12631

my $last_nonblank_type = $type;

12632

my $last_nonblank_token = $token;

12633

my $delta_bias = 0.0001;

12634

my $list_str = $left_bond_strength{'?'};

12635

12636

my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,

12637

$next_nonblank_type, $next_token, $next_type, $total_nesting_depth,

12638

);

12639

12640

# preliminary loop to compute bond strengths

12641

for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) {

12642

$last_type = $type;

12643

if ( $type ne 'b' ) {

12644

$last_nonblank_type = $type;

12645

$last_nonblank_token = $token;

12646

}

12647

$type = $types_to_go[$i];

12648

12649

# strength on both sides of a blank is the same

12650

if ( $type eq 'b' && $last_type ne 'b' ) {

12651

$bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];

12652

next;

12653

}

12654

12655

$token = $tokens_to_go[$i];

12656

$block_type = $block_type_to_go[$i];

12657

$i_next = $i + 1;

12658

$next_type = $types_to_go[$i_next];

12659

$next_token = $tokens_to_go[$i_next];

12660

$total_nesting_depth = $nesting_depth_to_go[$i_next];

12661

$i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );

12662

$next_nonblank_type = $types_to_go[$i_next_nonblank];

12663

$next_nonblank_token = $tokens_to_go[$i_next_nonblank];

12664

12665

# Some token chemistry... The decision about where to break a

12666

# line depends upon a "bond strength" between tokens. The LOWER

12667

# the bond strength, the MORE likely a break. The strength

12668

# values are based on trial-and-error, and need to be tweaked

12669

# occasionally to get desired results. Things to keep in mind

12670

# are:

12671

# 1. relative strengths are important. small differences

12672

# in strengths can make big formatting differences.

12673

# 2. each indentation level adds one unit of bond strength

12674

# 3. a value of NO_BREAK makes an unbreakable bond

12675

# 4. a value of VERY_WEAK is the strength of a ','

12676

# 5. values below NOMINAL are considered ok break points

12677

# 6. values above NOMINAL are considered poor break points

12678

# We are computing the strength of the bond between the current

12679

# token and the NEXT token.

12680

my $bond_str = VERY_STRONG; # a default, high strength

12681

12682

#---------------------------------------------------------------

12683

# section 1:

12684

# use minimum of left and right bond strengths if defined;

12685

# digraphs and trigraphs like to break on their left

12686

#---------------------------------------------------------------

12687

my $bsr = $right_bond_strength{$type};

12688

12689

if ( !defined($bsr) ) {

12690

12691

if ( $is_digraph{$type} || $is_trigraph{$type} ) {

12692

$bsr = STRONG;

12693

}

12694

else {

12695

$bsr = VERY_STRONG;

12696

}

12697

}

12698

12699

# define right bond strengths of certain keywords

12700

if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {

12701

$bsr = $right_bond_strength{$token};

12702

}

12703

elsif ( $token eq 'ne' or $token eq 'eq' ) {

12704

$bsr = NOMINAL;

12705

}

12706

my $bsl = $left_bond_strength{$next_nonblank_type};

12707

12708

# set terminal bond strength to the nominal value

12709

# this will cause good preceding breaks to be retained

12710

if ( $i_next_nonblank > $max_index_to_go ) {

12711

$bsl = NOMINAL;

12712

}

12713

12714

if ( !defined($bsl) ) {

12715

12716

if ( $is_digraph{$next_nonblank_type}

12717

|| $is_trigraph{$next_nonblank_type} )

12718

{

12719

$bsl = WEAK;

12720

}

12721

else {

12722

$bsl = VERY_STRONG;

12723

}

12724

}

12725

12726

# define right bond strengths of certain keywords

12727

if ( $next_nonblank_type eq 'k'

12728

&& defined( $left_bond_strength{$next_nonblank_token} ) )

12729

{

12730

$bsl = $left_bond_strength{$next_nonblank_token};

12731

}

12732

elsif ($next_nonblank_token eq 'ne'

12733

or $next_nonblank_token eq 'eq' )

12734

{

12735

$bsl = NOMINAL;

12736

}

12737

elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {

12738

$bsl = 0.9 * NOMINAL + 0.1 * STRONG;

12739

}

12740

12741

# Note: it might seem that we would want to keep a NO_BREAK if

12742

# either token has this value. This didn't work, because in an

12743

# arrow list, it prevents the comma from separating from the

12744

# following bare word (which is probably quoted by its arrow).

12745

# So necessary NO_BREAK's have to be handled as special cases

12746

# in the final section.

12747

$bond_str = ( $bsr < $bsl ) ? $bsr : $bsl;

12748

my $bond_str_1 = $bond_str;

12749

12750

#---------------------------------------------------------------

12751

# section 2:

12752

# special cases

12753

#---------------------------------------------------------------

12754

12755

# allow long lines before final { in an if statement, as in:

12756

# if (..........

12757

# ..........)

12758

# {

12759

#

12760

# Otherwise, the line before the { tends to be too short.

12761

if ( $type eq ')' ) {

12762

if ( $next_nonblank_type eq '{' ) {

12763

$bond_str = VERY_WEAK + 0.03;

12764

}

12765

}

12766

12767

elsif ( $type eq '(' ) {

12768

if ( $next_nonblank_type eq '{' ) {

12769

$bond_str = NOMINAL;

12770

}

12771

}

12772

12773

# break on something like '} (', but keep this stronger than a ','

12774

# example is in 'howe.pl'

12775

elsif ( $type eq 'R' or $type eq '}' ) {

12776

if ( $next_nonblank_type eq '(' ) {

12777

$bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK;

12778

}

12779

}

12780

12781

#-----------------------------------------------------------------

12782

# adjust bond strength bias

12783

#-----------------------------------------------------------------

12784

12785

# TESTING: add any bias set by sub scan_list at old comma

12786

# break points.

12787

elsif ( $type eq ',' ) {

12788

$bond_str += $bond_strength_to_go[$i];

12789

}

12790

12791

elsif ( $type eq 'f' ) {

12792

$bond_str += $f_bias;

12793

$f_bias += $delta_bias;

12794

}

12795

12796

# in long ?: conditionals, bias toward just one set per line (colon.t)

12797

elsif ( $type eq ':' ) {

12798

if ( !$want_break_before{$type} ) {

12799

$bond_str += $colon_bias;

12800

$colon_bias += $delta_bias;

12801

}

12802

}

12803

12804

if ( $next_nonblank_type eq ':'

12805

&& $want_break_before{$next_nonblank_type} )

12806

{

12807

$bond_str += $colon_bias;

12808

$colon_bias += $delta_bias;

12809

}

12810

12811

# if leading '.' is used, align all but 'short' quotes;

12812

# the idea is to not place something like "\n" on a single line.

12813

elsif ( $next_nonblank_type eq '.' ) {

12814

if ( $want_break_before{'.'} ) {

12815

unless (

12816

$last_nonblank_type eq '.'

12817

&& (

12818

length($token) <=

12819

$rOpts_short_concatenation_item_length )

12820

&& ( $token !~ /^[\)\]\}]$/ )

12821

)

12822

{

12823

$dot_bias += $delta_bias;

12824

}

12825

$bond_str += $dot_bias;

12826

}

12827

}

12828

elsif ($next_nonblank_type eq '&&'

12829

&& $want_break_before{$next_nonblank_type} )

12830

{

12831

$bond_str += $amp_bias;

12832

$amp_bias += $delta_bias;

12833

}

12834

elsif ($next_nonblank_type eq '||'

12835

&& $want_break_before{$next_nonblank_type} )

12836

{

12837

$bond_str += $bar_bias;

12838

$bar_bias += $delta_bias;

12839

}

12840

elsif ( $next_nonblank_type eq 'k' ) {

12841

12842

if ( $next_nonblank_token eq 'and'

12843

&& $want_break_before{$next_nonblank_token} )

12844

{

12845

$bond_str += $and_bias;

12846

$and_bias += $delta_bias;

12847

}

12848

elsif ($next_nonblank_token =~ /^(or|err)$/

12849

&& $want_break_before{$next_nonblank_token} )

12850

{

12851

$bond_str += $or_bias;

12852

$or_bias += $delta_bias;

12853

}

12854

12855

# FIXME: needs more testing

12856

elsif ( $is_keyword_returning_list{$next_nonblank_token} ) {

12857

$bond_str = $list_str if ( $bond_str > $list_str );

12858

}

12859

elsif ( $token eq 'err'

12860

&& !$want_break_before{$token} )

12861

{

12862

$bond_str += $or_bias;

12863

$or_bias += $delta_bias;

12864

}

12865

}

12866

12867

if ( $type eq ':'

12868

&& !$want_break_before{$type} )

12869

{

12870

$bond_str += $colon_bias;

12871

$colon_bias += $delta_bias;

12872

}

12873

elsif ( $type eq '&&'

12874

&& !$want_break_before{$type} )

12875

{

12876

$bond_str += $amp_bias;

12877

$amp_bias += $delta_bias;

12878

}

12879

elsif ( $type eq '||'

12880

&& !$want_break_before{$type} )

12881

{

12882

$bond_str += $bar_bias;

12883

$bar_bias += $delta_bias;

12884

}

12885

elsif ( $type eq 'k' ) {

12886

12887

if ( $token eq 'and'

12888

&& !$want_break_before{$token} )

12889

{

12890

$bond_str += $and_bias;

12891

$and_bias += $delta_bias;

12892

}

12893

elsif ( $token eq 'or'

12894

&& !$want_break_before{$token} )

12895

{

12896

$bond_str += $or_bias;

12897

$or_bias += $delta_bias;

12898

}

12899

}

12900

12901

# keep matrix and hash indices together

12902

# but make them a little below STRONG to allow breaking open

12903

# something like {'some-word'}{'some-very-long-word'} at the }{

12904

# (bracebrk.t)

12905

if ( ( $type eq ']' or $type eq 'R' )

12906

&& ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' )

12907

)

12908

{

12909

$bond_str = 0.9 * STRONG + 0.1 * NOMINAL;

12910

}

12911

12912

if ( $next_nonblank_token =~ /^->/ ) {

12913

12914

# increase strength to the point where a break in the following

12915

# will be after the opening paren rather than at the arrow:

12916

# $a->$b($c);

12917

if ( $type eq 'i' ) {

12918

$bond_str = 1.45 * STRONG;

12919

}

12920

12921

elsif ( $type =~ /^[\)\]\}R]$/ ) {

12922

$bond_str = 0.1 * STRONG + 0.9 * NOMINAL;

12923

}

12924

12925

# otherwise make strength before an '->' a little over a '+'

12926

else {

12927

if ( $bond_str <= NOMINAL ) {

12928

$bond_str = NOMINAL + 0.01;

12929

}

12930

}

12931

}

12932

12933

if ( $token eq ')' && $next_nonblank_token eq '[' ) {

12934

$bond_str = 0.2 * STRONG + 0.8 * NOMINAL;

12935

}

12936

12937

# map1.t -- correct for a quirk in perl

12938

if ( $token eq '('

12939

&& $next_nonblank_type eq 'i'

12940

&& $last_nonblank_type eq 'k'

12941

&& $is_sort_map_grep{$last_nonblank_token} )

12942

12943

# /^(sort|map|grep)$/ )

12944

{

12945

$bond_str = NO_BREAK;

12946

}

12947

12948

# extrude.t: do not break before paren at:

12949

# -l pid_filename(

12950

if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {

12951

$bond_str = NO_BREAK;

12952

}

12953

12954

# good to break after end of code blocks

12955

if ( $type eq '}' && $block_type ) {

12956

12957

$bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;

12958

$code_bias += $delta_bias;

12959

}

12960

12961

if ( $type eq 'k' ) {

12962

12963

# allow certain control keywords to stand out

12964

if ( $next_nonblank_type eq 'k'

12965

&& $is_last_next_redo_return{$token} )

12966

{

12967

$bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;

12968

}

12969

12970

# Don't break after keyword my. This is a quick fix for a

12971

# rare problem with perl. An example is this line from file

12972

# Container.pm:

12973

# foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) )

12974

12975

if ( $token eq 'my' ) {

12976

$bond_str = NO_BREAK;

12977

}

12978

12979

}

12980

12981

# good to break before 'if', 'unless', etc

12982

if ( $is_if_brace_follower{$next_nonblank_token} ) {

12983

$bond_str = VERY_WEAK;

12984

}

12985

12986

if ( $next_nonblank_type eq 'k' ) {

12987

12988

# keywords like 'unless', 'if', etc, within statements

12989

# make good breaks

12990

if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {

12991

$bond_str = VERY_WEAK / 1.05;

12992

}

12993

}

12994

12995

# try not to break before a comma-arrow

12996

elsif ( $next_nonblank_type eq '=>' ) {

12997

if ( $bond_str < STRONG ) { $bond_str = STRONG }

12998

}

12999

13000

#----------------------------------------------------------------------

13001

# only set NO_BREAK's from here on

13002

#----------------------------------------------------------------------

13003

if ( $type eq 'C' or $type eq 'U' ) {

13004

13005

# use strict requires that bare word and => not be separated

13006

if ( $next_nonblank_type eq '=>' ) {

13007

$bond_str = NO_BREAK;

13008

}

13009

13010

# Never break between a bareword and a following paren because

13011

# perl may give an error. For example, if a break is placed

13012

# between 'to_filehandle' and its '(' the following line will

13013

# give a syntax error [Carp.pm]: my( $no) =fileno(

13014

# to_filehandle( $in)) ;

13015

if ( $next_nonblank_token eq '(' ) {

13016

$bond_str = NO_BREAK;

13017

}

13018

}

13019

13020

# use strict requires that bare word within braces not start new line

13021

elsif ( $type eq 'L' ) {

13022

13023

if ( $next_nonblank_type eq 'w' ) {

13024

$bond_str = NO_BREAK;

13025

}

13026

}

13027

13028

# in older version of perl, use strict can cause problems with

13029

# breaks before bare words following opening parens. For example,

13030

# this will fail under older versions if a break is made between

13031

# '(' and 'MAIL':

13032

# use strict;

13033

# open( MAIL, "a long filename or command");

13034

# close MAIL;

13035

elsif ( $type eq '{' ) {

13036

13037

if ( $token eq '(' && $next_nonblank_type eq 'w' ) {

13038

13039

# but it's fine to break if the word is followed by a '=>'

13040

# or if it is obviously a sub call

13041

my $i_next_next_nonblank = $i_next_nonblank + 1;

13042

my $next_next_type = $types_to_go[$i_next_next_nonblank];

13043

if ( $next_next_type eq 'b'

13044

&& $i_next_nonblank < $max_index_to_go )

13045

{

13046

$i_next_next_nonblank++;

13047

$next_next_type = $types_to_go[$i_next_next_nonblank];

13048

}

13049

13050

##if ( $next_next_type ne '=>' ) {

13051

# these are ok: '->xxx', '=>', '('

13052

13053

# We'll check for an old breakpoint and keep a leading

13054

# bareword if it was that way in the input file.

13055

# Presumably it was ok that way. For example, the

13056

# following would remain unchanged:

13057

#

13058

# @months = (

13059

# January, February, March, April,

13060

# May, June, July, August,

13061

# September, October, November, December,

13062

# );

13063

#

13064

# This should be sufficient:

13065

if ( !$old_breakpoint_to_go[$i]

13066

&& ( $next_next_type eq ',' || $next_next_type eq '}' )

13067

)

13068

{

13069

$bond_str = NO_BREAK;

13070

}

13071

}

13072

}

13073

13074

elsif ( $type eq 'w' ) {

13075

13076

if ( $next_nonblank_type eq 'R' ) {

13077

$bond_str = NO_BREAK;

13078

}

13079

13080

# use strict requires that bare word and => not be separated

13081

if ( $next_nonblank_type eq '=>' ) {

13082

$bond_str = NO_BREAK;

13083

}

13084

}

13085

13086

# in fact, use strict hates bare words on any new line. For

13087

# example, a break before the underscore here provokes the

13088

# wrath of use strict:

13089

# if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {

13090

elsif ( $type eq 'F' ) {

13091

$bond_str = NO_BREAK;

13092

}

13093

13094

# use strict does not allow separating type info from trailing { }

13095

# testfile is readmail.pl

13096

elsif ( $type eq 't' or $type eq 'i' ) {

13097

13098

if ( $next_nonblank_type eq 'L' ) {

13099

$bond_str = NO_BREAK;

13100

}

13101

}

13102

13103

# Do not break between a possible filehandle and a ? or / and do

13104

# not introduce a break after it if there is no blank

13105

# (extrude.t)

13106

elsif ( $type eq 'Z' ) {

13107

13108

# dont break..

13109

if (

13110

13111

# if there is no blank and we do not want one. Examples:

13112

# print $x++ # do not break after $x

13113

# print HTML"HELLO" # break ok after HTML

13114

(

13115

$next_type ne 'b'

13116

&& defined( $want_left_space{$next_type} )

13117

&& $want_left_space{$next_type} == WS_NO

13118

)

13119

13120

# or we might be followed by the start of a quote

13121

|| $next_nonblank_type =~ /^[\/\?]$/

13122

)

13123

{

13124

$bond_str = NO_BREAK;

13125

}

13126

}

13127

13128

# Do not break before a possible file handle

13129

if ( $next_nonblank_type eq 'Z' ) {

13130

$bond_str = NO_BREAK;

13131

}

13132

13133

# As a defensive measure, do not break between a '(' and a

13134

# filehandle. In some cases, this can cause an error. For

13135

# example, the following program works:

13136

# my $msg="hi!\n";

13137

# print

13138

# ( STDOUT

13139

# $msg

13140

# );

13141

#

13142

# But this program fails:

13143

# my $msg="hi!\n";

13144

# print

13145

# (

13146

# STDOUT

13147

# $msg

13148

# );

13149

#

13150

# This is normally only a problem with the 'extrude' option

13151

if ( $next_nonblank_type eq 'Y' && $token eq '(' ) {

13152

$bond_str = NO_BREAK;

13153

}

13154

13155

# Breaking before a ++ can cause perl to guess wrong. For

13156

# example the following line will cause a syntax error

13157

# with -extrude if we break between '$i' and '++' [fixstyle2]

13158

# print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );

13159

elsif ( $next_nonblank_type eq '++' ) {

13160

$bond_str = NO_BREAK;

13161

}

13162

13163

# Breaking before a ? before a quote can cause trouble if

13164

# they are not separated by a blank.

13165

# Example: a syntax error occurs if you break before the ? here

13166

# my$logic=join$all?' && ':' || ',@regexps;

13167

# From: Professional_Perl_Programming_Code/multifind.pl

13168

elsif ( $next_nonblank_type eq '?' ) {

13169

$bond_str = NO_BREAK

13170

if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );

13171

}

13172

13173

# Breaking before a . followed by a number

13174

# can cause trouble if there is no intervening space

13175

# Example: a syntax error occurs if you break before the .2 here

13176

# $str .= pack($endian.2, ensurrogate($ord));

13177

# From: perl58/Unicode.pm

13178

elsif ( $next_nonblank_type eq '.' ) {

13179

$bond_str = NO_BREAK

13180

if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );

13181

}

13182

13183

# patch to put cuddled elses back together when on multiple

13184

# lines, as in: } \n else \n { \n

13185

if ($rOpts_cuddled_else) {

13186

13187

if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' )

13188

|| ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) )

13189

{

13190

$bond_str = NO_BREAK;

13191

}

13192

}

13193

13194

# keep '}' together with ';'

13195

if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) {

13196

$bond_str = NO_BREAK;

13197

}

13198

13199

# never break between sub name and opening paren

13200

if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) {

13201

$bond_str = NO_BREAK;

13202

}

13203

13204

#---------------------------------------------------------------

13205

# section 3:

13206

# now take nesting depth into account

13207

#---------------------------------------------------------------

13208

# final strength incorporates the bond strength and nesting depth

13209

my $strength;

13210

13211

if ( defined($bond_str) && !$nobreak_to_go[$i] ) {

13212

if ( $total_nesting_depth > 0 ) {

13213

$strength = $bond_str + $total_nesting_depth;

13214

}

13215

else {

13216

$strength = $bond_str;

13217

}

13218

}

13219

else {

13220

$strength = NO_BREAK;

13221

}

13222

13223

# always break after side comment

13224

if ( $type eq '#' ) { $strength = 0 }

13225

13226

$bond_strength_to_go[$i] = $strength;

13227

13228

FORMATTER_DEBUG_FLAG_BOND && do {

13229

my $str = substr( $token, 0, 15 );

13230

$str .= ' ' x ( 16 - length($str) );

13231

print

13232

"BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n";

13233

};

13234

}

13235

}

13236

13237

}

13238

13239

sub pad_array_to_go {

13240

13241

# to simplify coding in scan_list and set_bond_strengths, it helps

13242

# to create some extra blank tokens at the end of the arrays

13243

$tokens_to_go[ $max_index_to_go + 1 ] = '';

13244

$tokens_to_go[ $max_index_to_go + 2 ] = '';

13245

$types_to_go[ $max_index_to_go + 1 ] = 'b';

13246

$types_to_go[ $max_index_to_go + 2 ] = 'b';

13247

$nesting_depth_to_go[ $max_index_to_go + 1 ] =

13248

$nesting_depth_to_go[$max_index_to_go];

13249

13250

# /^[R\}\)\]]$/

13251

if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) {

13252

if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) {

13253

13254

# shouldn't happen:

13255

unless ( get_saw_brace_error() ) {

13256

warning(

13257

"Program bug in scan_list: hit nesting error which should have been caught\n"

13258

);

13259

report_definite_bug();

13260

}

13261

}

13262

else {

13263

$nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1;

13264

}

13265

}

13266

13267

# /^[L\{\(\[]$/

13268

elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) {

13269

$nesting_depth_to_go[ $max_index_to_go + 1 ] += 1;

13270

}

13271

}

13272

13273

{ # begin scan_list

13274

13275

my (

13276

$block_type, $current_depth,

13277

$depth, $i,

13278

$i_last_nonblank_token, $last_colon_sequence_number,

13279

$last_nonblank_token, $last_nonblank_type,

13280

$last_old_breakpoint_count, $minimum_depth,

13281

$next_nonblank_block_type, $next_nonblank_token,

13282

$next_nonblank_type, $old_breakpoint_count,

13283

$starting_breakpoint_count, $starting_depth,

13284

$token, $type,

13285

$type_sequence,

13286

);

13287

13288

my (

13289

@breakpoint_stack, @breakpoint_undo_stack,

13290

@comma_index, @container_type,

13291

@identifier_count_stack, @index_before_arrow,

13292

@interrupted_list, @item_count_stack,

13293

@last_comma_index, @last_dot_index,

13294

@last_nonblank_type, @old_breakpoint_count_stack,

13295

@opening_structure_index_stack, @rfor_semicolon_list,

13296

@has_old_logical_breakpoints, @rand_or_list,

13297

@i_equals,

13298

);

13299

13300

# routine to define essential variables when we go 'up' to

13301

# a new depth

13302

sub check_for_new_minimum_depth {

13303

my $depth = shift;

13304

if ( $depth < $minimum_depth ) {

13305

13306

$minimum_depth = $depth;

13307

13308

# these arrays need not retain values between calls

13309

$breakpoint_stack[$depth] = $starting_breakpoint_count;

13310

$container_type[$depth] = "";

13311

$identifier_count_stack[$depth] = 0;

13312

$index_before_arrow[$depth] = -1;

13313

$interrupted_list[$depth] = 1;

13314

$item_count_stack[$depth] = 0;

13315

$last_nonblank_type[$depth] = "";

13316

$opening_structure_index_stack[$depth] = -1;

13317

13318

$breakpoint_undo_stack[$depth] = undef;

13319

$comma_index[$depth] = undef;

13320

$last_comma_index[$depth] = undef;

13321

$last_dot_index[$depth] = undef;

13322

$old_breakpoint_count_stack[$depth] = undef;

13323

$has_old_logical_breakpoints[$depth] = 0;

13324

$rand_or_list[$depth] = [];

13325

$rfor_semicolon_list[$depth] = [];

13326

$i_equals[$depth] = -1;

13327

13328

# these arrays must retain values between calls

13329

if ( !defined( $has_broken_sublist[$depth] ) ) {

13330

$dont_align[$depth] = 0;

13331

$has_broken_sublist[$depth] = 0;

13332

$want_comma_break[$depth] = 0;

13333

}

13334

}

13335

}

13336

13337

# routine to decide which commas to break at within a container;

13338

# returns:

13339

# $bp_count = number of comma breakpoints set

13340

# $do_not_break_apart = a flag indicating if container need not

13341

# be broken open

13342

sub set_comma_breakpoints {

13343

13344

my $dd = shift;

13345

my $bp_count = 0;

13346

my $do_not_break_apart = 0;

13347

13348

# anything to do?

13349

if ( $item_count_stack[$dd] ) {

13350

13351

# handle commas not in containers...

13352

if ( $dont_align[$dd] ) {

13353

do_uncontained_comma_breaks($dd);

13354

}

13355

13356

# handle commas within containers...

13357

else {

13358

my $fbc = $forced_breakpoint_count;

13359

13360

# always open comma lists not preceded by keywords,

13361

# barewords, identifiers (that is, anything that doesn't

13362

# look like a function call)

13363

my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/;

13364

13365

set_comma_breakpoints_do(

13366

$dd,

13367

$opening_structure_index_stack[$dd],

13368

$i,

13369

$item_count_stack[$dd],

13370

$identifier_count_stack[$dd],

13371

$comma_index[$dd],

13372

$next_nonblank_type,

13373

$container_type[$dd],

13374

$interrupted_list[$dd],

13375

\$do_not_break_apart,

13376

$must_break_open,

13377

);

13378

$bp_count = $forced_breakpoint_count - $fbc;

13379

$do_not_break_apart = 0 if $must_break_open;

13380

}

13381

}

13382

return ( $bp_count, $do_not_break_apart );

13383

}

13384

13385

sub do_uncontained_comma_breaks {

13386

13387

# Handle commas not in containers...

13388

# This is a catch-all routine for commas that we

13389

# don't know what to do with because the don't fall

13390

# within containers. We will bias the bond strength

13391

# to break at commas which ended lines in the input

13392

# file. This usually works better than just trying

13393

# to put as many items on a line as possible. A

13394

# downside is that if the input file is garbage it

13395

# won't work very well. However, the user can always

13396

# prevent following the old breakpoints with the

13397

# -iob flag.

13398

my $dd = shift;

13399

my $bias = -.01;

13400

foreach my $ii ( @{ $comma_index[$dd] } ) {

13401

if ( $old_breakpoint_to_go[$ii] ) {

13402

$bond_strength_to_go[$ii] = $bias;

13403

13404

# reduce bias magnitude to force breaks in order

13405

$bias *= 0.99;

13406

}

13407

}

13408

13409

# Also put a break before the first comma if

13410

# (1) there was a break there in the input, and

13411

# (2) that was exactly one previous break in the input

13412

#

13413

# For example, we will follow the user and break after

13414

# 'print' in this snippet:

13415

# print

13416

# "conformability (Not the same dimension)\n",

13417

# "\t", $have, " is ", text_unit($hu), "\n",

13418

# "\t", $want, " is ", text_unit($wu), "\n",

13419

# ;

13420

my $i_first_comma = $comma_index[$dd]->[0];

13421

if ( $old_breakpoint_to_go[$i_first_comma] ) {

13422

my $level_comma = $levels_to_go[$i_first_comma];

13423

my $ibreak = -1;

13424

my $obp_count = 0;

13425

for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) {

13426

if ( $old_breakpoint_to_go[$ii] ) {

13427

$obp_count++;

13428

last if ( $obp_count > 1 );

13429

$ibreak = $ii

13430

if ( $levels_to_go[$ii] == $level_comma );

13431

}

13432

}

13433

if ( $ibreak >= 0 && $obp_count == 1 ) {

13434

set_forced_breakpoint($ibreak);

13435

}

13436

}

13437

}

13438

13439

my %is_logical_container;

13440

13441

BEGIN {

13442

@_ = qw# if elsif unless while and or err not && | || ? : ! #;

13443

@is_logical_container{@_} = (1) x scalar(@_);

13444

}

13445

13446

sub set_for_semicolon_breakpoints {

13447

my $dd = shift;

13448

foreach ( @{ $rfor_semicolon_list[$dd] } ) {

13449

set_forced_breakpoint($_);

13450

}

13451

}

13452

13453

sub set_logical_breakpoints {

13454

my $dd = shift;

13455

if (

13456

$item_count_stack[$dd] == 0

13457

&& $is_logical_container{ $container_type[$dd] }

13458

13459

# TESTING:

13460

|| $has_old_logical_breakpoints[$dd]

13461

)

13462

{

13463

13464

# Look for breaks in this order:

13465

# 0 1 2 3

13466

# or and || &&

13467

foreach my $i ( 0 .. 3 ) {

13468

if ( $rand_or_list[$dd][$i] ) {

13469

foreach ( @{ $rand_or_list[$dd][$i] } ) {

13470

set_forced_breakpoint($_);

13471

}

13472

13473

# break at any 'if' and 'unless' too

13474

foreach ( @{ $rand_or_list[$dd][4] } ) {

13475

set_forced_breakpoint($_);

13476

}

13477

$rand_or_list[$dd] = [];

13478

last;

13479

}

13480

}

13481

}

13482

}

13483

13484

sub is_unbreakable_container {

13485

13486

# never break a container of one of these types

13487

# because bad things can happen (map1.t)

13488

my $dd = shift;

13489

$is_sort_map_grep{ $container_type[$dd] };

13490

}

13491

13492

sub scan_list {

13493

13494

# This routine is responsible for setting line breaks for all lists,

13495

# so that hierarchical structure can be displayed and so that list

13496

# items can be vertically aligned. The output of this routine is

13497

# stored in the array @forced_breakpoint_to_go, which is used to set

13498

# final breakpoints.

13499

13500

$starting_depth = $nesting_depth_to_go[0];

13501

13502

$block_type = ' ';

13503

$current_depth = $starting_depth;

13504

$i = -1;

13505

$last_colon_sequence_number = -1;

13506

$last_nonblank_token = ';';

13507

$last_nonblank_type = ';';

13508

$last_nonblank_block_type = ' ';

13509

$last_old_breakpoint_count = 0;

13510

$minimum_depth = $current_depth + 1; # forces update in check below

13511

$old_breakpoint_count = 0;

13512

$starting_breakpoint_count = $forced_breakpoint_count;

13513

$token = ';';

13514

$type = ';';

13515

$type_sequence = '';

13516

13517

check_for_new_minimum_depth($current_depth);

13518

13519

my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0;

13520

my $want_previous_breakpoint = -1;

13521

13522

my $saw_good_breakpoint;

13523

my $i_line_end = -1;

13524

my $i_line_start = -1;

13525

13526

# loop over all tokens in this batch

13527

while ( ++$i <= $max_index_to_go ) {

13528

if ( $type ne 'b' ) {

13529

$i_last_nonblank_token = $i - 1;

13530

$last_nonblank_type = $type;

13531

$last_nonblank_token = $token;

13532

$last_nonblank_block_type = $block_type;

13533

}

13534

$type = $types_to_go[$i];

13535

$block_type = $block_type_to_go[$i];

13536

$token = $tokens_to_go[$i];

13537

$type_sequence = $type_sequence_to_go[$i];

13538

my $next_type = $types_to_go[ $i + 1 ];

13539

my $next_token = $tokens_to_go[ $i + 1 ];

13540

my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );

13541

$next_nonblank_type = $types_to_go[$i_next_nonblank];

13542

$next_nonblank_token = $tokens_to_go[$i_next_nonblank];

13543

$next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];

13544

13545

# set break if flag was set

13546

if ( $want_previous_breakpoint >= 0 ) {

13547

set_forced_breakpoint($want_previous_breakpoint);

13548

$want_previous_breakpoint = -1;

13549

}

13550

13551

$last_old_breakpoint_count = $old_breakpoint_count;

13552

if ( $old_breakpoint_to_go[$i] ) {

13553

$i_line_end = $i;

13554

$i_line_start = $i_next_nonblank;

13555

13556

$old_breakpoint_count++;

13557

13558

# Break before certain keywords if user broke there and

13559

# this is a 'safe' break point. The idea is to retain

13560

# any preferred breaks for sequential list operations,

13561

# like a schwartzian transform.

13562

if ($rOpts_break_at_old_keyword_breakpoints) {

13563

if (

13564

$next_nonblank_type eq 'k'

13565

&& $is_keyword_returning_list{$next_nonblank_token}

13566

&& ( $type =~ /^[=\)\]\}Riw]$/

13567

|| $type eq 'k'

13568

&& $is_keyword_returning_list{$token} )

13569

)

13570

{

13571

13572

# we actually have to set this break next time through

13573

# the loop because if we are at a closing token (such

13574

# as '}') which forms a one-line block, this break might

13575

# get undone.

13576

$want_previous_breakpoint = $i;

13577

}

13578

}

13579

}

13580

next if ( $type eq 'b' );

13581

$depth = $nesting_depth_to_go[ $i + 1 ];

13582

13583

# safety check - be sure we always break after a comment

13584

# Shouldn't happen .. an error here probably means that the

13585

# nobreak flag did not get turned off correctly during

13586

# formatting.

13587

if ( $type eq '#' ) {

13588

if ( $i != $max_index_to_go ) {

13589

warning(

13590

"Non-fatal program bug: backup logic needed to break after a comment\n"

13591

);

13592

report_definite_bug();

13593

$nobreak_to_go[$i] = 0;

13594

set_forced_breakpoint($i);

13595

}

13596

}

13597

13598

# Force breakpoints at certain tokens in long lines.

13599

# Note that such breakpoints will be undone later if these tokens

13600

# are fully contained within parens on a line.

13601

if (

13602

13603

# break before a keyword within a line

13604

$type eq 'k'

13605

&& $i > 0

13606

13607

# if one of these keywords:

13608

&& $token =~ /^(if|unless|while|until|for)$/

13609

13610

# but do not break at something like '1 while'

13611

&& ( $last_nonblank_type ne 'n' || $i > 2 )

13612

13613

# and let keywords follow a closing 'do' brace

13614

&& $last_nonblank_block_type ne 'do'

13615

13616

&& (

13617

$is_long_line

13618

13619

# or container is broken (by side-comment, etc)

13620

|| ( $next_nonblank_token eq '('

13621

&& $mate_index_to_go[$i_next_nonblank] < $i )

13622

)

13623

)

13624

{

13625

set_forced_breakpoint( $i - 1 );

13626

}

13627

13628

# remember locations of '||' and '&&' for possible breaks if we

13629

# decide this is a long logical expression.

13630

if ( $type eq '||' ) {

13631

push @{ $rand_or_list[$depth][2] }, $i;

13632

++$has_old_logical_breakpoints[$depth]

13633

if ( ( $i == $i_line_start || $i == $i_line_end )

13634

&& $rOpts_break_at_old_logical_breakpoints );

13635

}

13636

elsif ( $type eq '&&' ) {

13637

push @{ $rand_or_list[$depth][3] }, $i;

13638

++$has_old_logical_breakpoints[$depth]

13639

if ( ( $i == $i_line_start || $i == $i_line_end )

13640

&& $rOpts_break_at_old_logical_breakpoints );

13641

}

13642

elsif ( $type eq 'f' ) {

13643

push @{ $rfor_semicolon_list[$depth] }, $i;

13644

}

13645

elsif ( $type eq 'k' ) {

13646

if ( $token eq 'and' ) {

13647

push @{ $rand_or_list[$depth][1] }, $i;

13648

++$has_old_logical_breakpoints[$depth]

13649

if ( ( $i == $i_line_start || $i == $i_line_end )

13650

&& $rOpts_break_at_old_logical_breakpoints );

13651

}

13652

13653

# break immediately at 'or's which are probably not in a logical

13654

# block -- but we will break in logical breaks below so that

13655

# they do not add to the forced_breakpoint_count

13656

elsif ( $token eq 'or' ) {

13657

push @{ $rand_or_list[$depth][0] }, $i;

13658

++$has_old_logical_breakpoints[$depth]

13659

if ( ( $i == $i_line_start || $i == $i_line_end )

13660

&& $rOpts_break_at_old_logical_breakpoints );

13661

if ( $is_logical_container{ $container_type[$depth] } ) {

13662

}

13663

else {

13664

if ($is_long_line) { set_forced_breakpoint($i) }

13665

elsif ( ( $i == $i_line_start || $i == $i_line_end )

13666

&& $rOpts_break_at_old_logical_breakpoints )

13667

{

13668

$saw_good_breakpoint = 1;

13669

}

13670

}

13671

}

13672

elsif ( $token eq 'if' || $token eq 'unless' ) {

13673

push @{ $rand_or_list[$depth][4] }, $i;

13674

if ( ( $i == $i_line_start || $i == $i_line_end )

13675

&& $rOpts_break_at_old_logical_breakpoints )

13676

{

13677

set_forced_breakpoint($i);

13678

}

13679

}

13680

}

13681

elsif ( $is_assignment{$type} ) {

13682

$i_equals[$depth] = $i;

13683

}

13684

13685

if ($type_sequence) {

13686

13687

# handle any postponed closing breakpoints

13688

if ( $token =~ /^[\)\]\}\:]$/ ) {

13689

if ( $type eq ':' ) {

13690

$last_colon_sequence_number = $type_sequence;

13691

13692

# TESTING: retain break at a ':' line break

13693

if ( ( $i == $i_line_start || $i == $i_line_end )

13694

&& $rOpts_break_at_old_ternary_breakpoints )

13695

{

13696

13697

# TESTING:

13698

set_forced_breakpoint($i);

13699

13700

# break at previous '='

13701

if ( $i_equals[$depth] > 0 ) {

13702

set_forced_breakpoint( $i_equals[$depth] );

13703

$i_equals[$depth] = -1;

13704

}

13705

}

13706

}

13707

if ( defined( $postponed_breakpoint{$type_sequence} ) ) {

13708

my $inc = ( $type eq ':' ) ? 0 : 1;

13709

set_forced_breakpoint( $i - $inc );

13710

delete $postponed_breakpoint{$type_sequence};

13711

}

13712

}

13713

13714

# set breaks at ?/: if they will get separated (and are

13715

# not a ?/: chain), or if the '?' is at the end of the

13716

# line

13717

elsif ( $token eq '?' ) {

13718

my $i_colon = $mate_index_to_go[$i];

13719

if (

13720

$i_colon <= 0 # the ':' is not in this batch

13721

|| $i == 0 # this '?' is the first token of the line

13722

|| $i ==

13723

$max_index_to_go # or this '?' is the last token

13724

)

13725

{

13726

13727

# don't break at a '?' if preceded by ':' on

13728

# this line of previous ?/: pair on this line.

13729

# This is an attempt to preserve a chain of ?/:

13730

# expressions (elsif2.t). And don't break if

13731

# this has a side comment.

13732

set_forced_breakpoint($i)

13733

unless (

13734

$type_sequence == (

13735

$last_colon_sequence_number +

13736

TYPE_SEQUENCE_INCREMENT

13737

)

13738

|| $tokens_to_go[$max_index_to_go] eq '#'

13739

);

13740

set_closing_breakpoint($i);

13741

}

13742

}

13743

}

13744

13745

#print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n";

13746

13747

#------------------------------------------------------------

13748

# Handle Increasing Depth..

13749

#

13750

# prepare for a new list when depth increases

13751

# token $i is a '(','{', or '['

13752

#------------------------------------------------------------

13753

if ( $depth > $current_depth ) {

13754

13755

$breakpoint_stack[$depth] = $forced_breakpoint_count;

13756

$breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count;

13757

$has_broken_sublist[$depth] = 0;

13758

$identifier_count_stack[$depth] = 0;

13759

$index_before_arrow[$depth] = -1;

13760

$interrupted_list[$depth] = 0;

13761

$item_count_stack[$depth] = 0;

13762

$last_comma_index[$depth] = undef;

13763

$last_dot_index[$depth] = undef;

13764

$last_nonblank_type[$depth] = $last_nonblank_type;

13765

$old_breakpoint_count_stack[$depth] = $old_breakpoint_count;

13766

$opening_structure_index_stack[$depth] = $i;

13767

$rand_or_list[$depth] = [];

13768

$rfor_semicolon_list[$depth] = [];

13769

$i_equals[$depth] = -1;

13770

$want_comma_break[$depth] = 0;

13771

$container_type[$depth] =

13772

( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ )

13773

? $last_nonblank_token

13774

: "";

13775

$has_old_logical_breakpoints[$depth] = 0;

13776

13777

# if line ends here then signal closing token to break

13778

if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' )

13779

{

13780

set_closing_breakpoint($i);

13781

}

13782

13783

# Not all lists of values should be vertically aligned..

13784

$dont_align[$depth] =

13785

13786

# code BLOCKS are handled at a higher level

13787

( $block_type ne "" )

13788

13789

# certain paren lists

13790

|| ( $type eq '(' ) && (

13791

13792

# it does not usually look good to align a list of

13793

# identifiers in a parameter list, as in:

13794

# my($var1, $var2, ...)

13795

# (This test should probably be refined, for now I'm just

13796

# testing for any keyword)

13797

( $last_nonblank_type eq 'k' )

13798

13799

# a trailing '(' usually indicates a non-list

13800

|| ( $next_nonblank_type eq '(' )

13801

);

13802

13803

# patch to outdent opening brace of long if/for/..

13804

# statements (like this one). See similar coding in

13805

# set_continuation breaks. We have also catch it here for

13806

# short line fragments which otherwise will not go through

13807

# set_continuation_breaks.

13808

if (

13809

$block_type

13810

13811

# if we have the ')' but not its '(' in this batch..

13812

&& ( $last_nonblank_token eq ')' )

13813

&& $mate_index_to_go[$i_last_nonblank_token] < 0

13814

13815

# and user wants brace to left

13816

&& !$rOpts->{'opening-brace-always-on-right'}

13817

13818

&& ( $type eq '{' ) # should be true

13819

&& ( $token eq '{' ) # should be true

13820

)

13821

{

13822

set_forced_breakpoint( $i - 1 );

13823

}

13824

}

13825

13826

#------------------------------------------------------------

13827

# Handle Decreasing Depth..

13828

#

13829

# finish off any old list when depth decreases

13830

# token $i is a ')','}', or ']'

13831

#------------------------------------------------------------

13832

elsif ( $depth < $current_depth ) {

13833

13834

check_for_new_minimum_depth($depth);

13835

13836

# force all outer logical containers to break after we see on

13837

# old breakpoint

13838

$has_old_logical_breakpoints[$depth] ||=

13839

$has_old_logical_breakpoints[$current_depth];

13840

13841

# Patch to break between ') {' if the paren list is broken.

13842

# There is similar logic in set_continuation_breaks for

13843

# non-broken lists.

13844

if ( $token eq ')'

13845

&& $next_nonblank_block_type

13846

&& $interrupted_list[$current_depth]

13847

&& $next_nonblank_type eq '{'

13848

&& !$rOpts->{'opening-brace-always-on-right'} )

13849

{

13850

set_forced_breakpoint($i);

13851

}

13852

13853

#print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n";

13854

13855

# set breaks at commas if necessary

13856

my ( $bp_count, $do_not_break_apart ) =

13857

set_comma_breakpoints($current_depth);

13858

13859

my $i_opening = $opening_structure_index_stack[$current_depth];

13860

my $saw_opening_structure = ( $i_opening >= 0 );

13861

13862

# this term is long if we had to break at interior commas..

13863

my $is_long_term = $bp_count > 0;

13864

13865

# ..or if the length between opening and closing parens exceeds

13866

# allowed line length

13867

if ( !$is_long_term && $saw_opening_structure ) {

13868

my $i_opening_minus = find_token_starting_list($i_opening);

13869

13870

# Note: we have to allow for one extra space after a

13871

# closing token so that we do not strand a comma or

13872

# semicolon, hence the '>=' here (oneline.t)

13873

$is_long_term =

13874

excess_line_length( $i_opening_minus, $i ) >= 0;

13875

}

13876

13877

# We've set breaks after all comma-arrows. Now we have to

13878

# undo them if this can be a one-line block

13879

# (the only breakpoints set will be due to comma-arrows)

13880

if (

13881

13882

# user doesn't require breaking after all comma-arrows

13883

( $rOpts_comma_arrow_breakpoints != 0 )

13884

13885

# and if the opening structure is in this batch

13886

&& $saw_opening_structure

13887

13888

# and either on the same old line

13889

&& (

13890

$old_breakpoint_count_stack[$current_depth] ==

13891

$last_old_breakpoint_count

13892

13893

# or user wants to form long blocks with arrows

13894

|| $rOpts_comma_arrow_breakpoints == 2

13895

)

13896

13897

# and we made some breakpoints between the opening and closing

13898

&& ( $breakpoint_undo_stack[$current_depth] <

13899

$forced_breakpoint_undo_count )

13900

13901

# and this block is short enough to fit on one line

13902

# Note: use < because need 1 more space for possible comma

13903

&& !$is_long_term

13904

13905

)

13906

{

13907

undo_forced_breakpoint_stack(

13908

$breakpoint_undo_stack[$current_depth] );

13909

}

13910

13911

# now see if we have any comma breakpoints left

13912

my $has_comma_breakpoints =

13913

( $breakpoint_stack[$current_depth] !=

13914

$forced_breakpoint_count );

13915

13916

# update broken-sublist flag of the outer container

13917

$has_broken_sublist[$depth] =

13918

$has_broken_sublist[$depth]

13919

|| $has_broken_sublist[$current_depth]

13920

|| $is_long_term

13921

|| $has_comma_breakpoints;

13922

13923

# Having come to the closing ')', '}', or ']', now we have to decide if we

13924

# should 'open up' the structure by placing breaks at the opening and

13925

# closing containers. This is a tricky decision. Here are some of the

13926

# basic considerations:

13927

#

13928

# -If this is a BLOCK container, then any breakpoints will have already

13929

# been set (and according to user preferences), so we need do nothing here.

13930

#

13931

# -If we have a comma-separated list for which we can align the list items,

13932

# then we need to do so because otherwise the vertical aligner cannot

13933

# currently do the alignment.

13934

#

13935

# -If this container does itself contain a container which has been broken

13936

# open, then it should be broken open to properly show the structure.

13937

#

13938

# -If there is nothing to align, and no other reason to break apart,

13939

# then do not do it.

13940

#

13941

# We will not break open the parens of a long but 'simple' logical expression.

13942

# For example:

13943

#

13944

# This is an example of a simple logical expression and its formatting:

13945

#

13946

# if ( $bigwasteofspace1 && $bigwasteofspace2

13947

# || $bigwasteofspace3 && $bigwasteofspace4 )

13948

#

13949

# Most people would prefer this than the 'spacey' version:

13950

#

13951

# if (

13952

# $bigwasteofspace1 && $bigwasteofspace2

13953

# || $bigwasteofspace3 && $bigwasteofspace4

13954

# )

13955

#

13956

# To illustrate the rules for breaking logical expressions, consider:

13957

#

13958

# FULLY DENSE:

13959

# if ( $opt_excl

13960

# and ( exists $ids_excl_uc{$id_uc}

13961

# or grep $id_uc =~ /$_/, @ids_excl_uc ))

13962

#

13963

# This is on the verge of being difficult to read. The current default is to

13964

# open it up like this:

13965

#

13966

# DEFAULT:

13967

# if (

13968

# $opt_excl

13969

# and ( exists $ids_excl_uc{$id_uc}

13970

# or grep $id_uc =~ /$_/, @ids_excl_uc )

13971

# )

13972

#

13973

# This is a compromise which tries to avoid being too dense and to spacey.

13974

# A more spaced version would be:

13975

#

13976

# SPACEY:

13977

# if (

13978

# $opt_excl

13979

# and (

13980

# exists $ids_excl_uc{$id_uc}

13981

# or grep $id_uc =~ /$_/, @ids_excl_uc

13982

# )

13983

# )

13984

#

13985

# Some people might prefer the spacey version -- an option could be added. The

13986

# innermost expression contains a long block '( exists $ids_... ')'.

13987

#

13988

# Here is how the logic goes: We will force a break at the 'or' that the

13989

# innermost expression contains, but we will not break apart its opening and

13990

# closing containers because (1) it contains no multi-line sub-containers itself,

13991

# and (2) there is no alignment to be gained by breaking it open like this

13992

#

13993

# and (

13994

# exists $ids_excl_uc{$id_uc}

13995

# or grep $id_uc =~ /$_/, @ids_excl_uc

13996

# )

13997

#

13998

# (although this looks perfectly ok and might be good for long expressions). The

13999

# outer 'if' container, though, contains a broken sub-container, so it will be

14000

# broken open to avoid too much density. Also, since it contains no 'or's, there

14001

# will be a forced break at its 'and'.

14002

14003

# set some flags telling something about this container..

14004

my $is_simple_logical_expression = 0;

14005

if ( $item_count_stack[$current_depth] == 0

14006

&& $saw_opening_structure

14007

&& $tokens_to_go[$i_opening] eq '('

14008

&& $is_logical_container{ $container_type[$current_depth] }

14009

)

14010

{

14011

14012

# This seems to be a simple logical expression with

14013

# no existing breakpoints. Set a flag to prevent

14014

# opening it up.

14015

if ( !$has_comma_breakpoints ) {

14016

$is_simple_logical_expression = 1;

14017

}

14018

14019

# This seems to be a simple logical expression with

14020

# breakpoints (broken sublists, for example). Break

14021

# at all 'or's and '||'s.

14022

else {

14023

set_logical_breakpoints($current_depth);

14024

}

14025

}

14026

14027

if ( $is_long_term

14028

&& @{ $rfor_semicolon_list[$current_depth] } )

14029

{

14030

set_for_semicolon_breakpoints($current_depth);

14031

14032

# open up a long 'for' or 'foreach' container to allow

14033

# leading term alignment unless -lp is used.

14034

$has_comma_breakpoints = 1

14035

unless $rOpts_line_up_parentheses;

14036

}

14037

14038

if (

14039

14040

# breaks for code BLOCKS are handled at a higher level

14041

!$block_type

14042

14043

# we do not need to break at the top level of an 'if'

14044

# type expression

14045

&& !$is_simple_logical_expression

14046

14047

## modification to keep ': (' containers vertically tight;

14048

## but probably better to let user set -vt=1 to avoid

14049

## inconsistency with other paren types

14050

## && ($container_type[$current_depth] ne ':')

14051

14052

# otherwise, we require one of these reasons for breaking:

14053

&& (

14054

14055

# - this term has forced line breaks

14056

$has_comma_breakpoints

14057

14058

# - the opening container is separated from this batch

14059

# for some reason (comment, blank line, code block)

14060

# - this is a non-paren container spanning multiple lines

14061

|| !$saw_opening_structure

14062

14063

# - this is a long block contained in another breakable

14064

# container

14065

|| ( $is_long_term

14066

&& $container_environment_to_go[$i_opening] ne

14067

'BLOCK' )

14068

)

14069

)

14070

{

14071

14072

# For -lp option, we must put a breakpoint before

14073

# the token which has been identified as starting

14074

# this indentation level. This is necessary for

14075

# proper alignment.

14076

if ( $rOpts_line_up_parentheses && $saw_opening_structure )

14077

{

14078

my $item = $leading_spaces_to_go[ $i_opening + 1 ];

14079

if ( $i_opening + 1 < $max_index_to_go

14080

&& $types_to_go[ $i_opening + 1 ] eq 'b' )

14081

{

14082

$item = $leading_spaces_to_go[ $i_opening + 2 ];

14083

}

14084

if ( defined($item) ) {

14085

my $i_start_2 = $item->get_STARTING_INDEX();

14086

if (

14087

defined($i_start_2)

14088

14089

# we are breaking after an opening brace, paren,

14090

# so don't break before it too

14091

&& $i_start_2 ne $i_opening

14092

)

14093

{

14094

14095

# Only break for breakpoints at the same

14096

# indentation level as the opening paren

14097

my $test1 = $nesting_depth_to_go[$i_opening];

14098

my $test2 = $nesting_depth_to_go[$i_start_2];

14099

if ( $test2 == $test1 ) {

14100

set_forced_breakpoint( $i_start_2 - 1 );

14101

}

14102

}

14103

}

14104

}

14105

14106

# break after opening structure.

14107

# note: break before closing structure will be automatic

14108

if ( $minimum_depth <= $current_depth ) {

14109

14110

set_forced_breakpoint($i_opening)

14111

unless ( $do_not_break_apart

14112

|| is_unbreakable_container($current_depth) );

14113

14114

# break at '.' of lower depth level before opening token

14115

if ( $last_dot_index[$depth] ) {

14116

set_forced_breakpoint( $last_dot_index[$depth] );

14117

}

14118

14119

# break before opening structure if preeced by another

14120

# closing structure and a comma. This is normally

14121

# done by the previous closing brace, but not

14122

# if it was a one-line block.

14123

if ( $i_opening > 2 ) {

14124

my $i_prev =

14125

( $types_to_go[ $i_opening - 1 ] eq 'b' )

14126

? $i_opening - 2

14127

: $i_opening - 1;

14128

14129

if ( $types_to_go[$i_prev] eq ','

14130

&& $types_to_go[ $i_prev - 1 ] =~ /^[\)\}]$/ )

14131

{

14132

set_forced_breakpoint($i_prev);

14133

}

14134

14135

# also break before something like ':(' or '?('

14136

# if appropriate.

14137

elsif (

14138

$types_to_go[$i_prev] =~ /^([k\:\?]|&&|\|\|)$/ )

14139

{

14140

my $token_prev = $tokens_to_go[$i_prev];

14141

if ( $want_break_before{$token_prev} ) {

14142

set_forced_breakpoint($i_prev);

14143

}

14144

}

14145

}

14146

}

14147

14148

# break after comma following closing structure

14149

if ( $next_type eq ',' ) {

14150

set_forced_breakpoint( $i + 1 );

14151

}

14152

14153

# break before an '=' following closing structure

14154

if (

14155

$is_assignment{$next_nonblank_type}

14156

&& ( $breakpoint_stack[$current_depth] !=

14157

$forced_breakpoint_count )

14158

)

14159

{

14160

set_forced_breakpoint($i);

14161

}

14162

14163

# break at any comma before the opening structure Added

14164

# for -lp, but seems to be good in general. It isn't

14165

# obvious how far back to look; the '5' below seems to

14166

# work well and will catch the comma in something like

14167

# push @list, myfunc( $param, $param, ..

14168

14169

my $icomma = $last_comma_index[$depth];

14170

if ( defined($icomma) && ( $i_opening - $icomma ) < 5 ) {

14171

unless ( $forced_breakpoint_to_go[$icomma] ) {

14172

set_forced_breakpoint($icomma);

14173

}

14174

}

14175

} # end logic to open up a container

14176

14177

# Break open a logical container open if it was already open

14178

elsif ($is_simple_logical_expression

14179

&& $has_old_logical_breakpoints[$current_depth] )

14180

{

14181

set_logical_breakpoints($current_depth);

14182

}

14183

14184

# Handle long container which does not get opened up

14185

elsif ($is_long_term) {

14186

14187

# must set fake breakpoint to alert outer containers that

14188

# they are complex

14189

set_fake_breakpoint();

14190

}

14191

}

14192

14193

#------------------------------------------------------------

14194

# Handle this token

14195

#------------------------------------------------------------

14196

14197

$current_depth = $depth;

14198

14199

# handle comma-arrow

14200

if ( $type eq '=>' ) {

14201

next if ( $last_nonblank_type eq '=>' );

14202

next if $rOpts_break_at_old_comma_breakpoints;

14203

next if $rOpts_comma_arrow_breakpoints == 3;

14204

$want_comma_break[$depth] = 1;

14205

$index_before_arrow[$depth] = $i_last_nonblank_token;

14206

next;

14207

}

14208

14209

elsif ( $type eq '.' ) {

14210

$last_dot_index[$depth] = $i;

14211

}

14212

14213

# Turn off alignment if we are sure that this is not a list

14214

# environment. To be safe, we will do this if we see certain

14215

# non-list tokens, such as ';', and also the environment is

14216

# not a list. Note that '=' could be in any of the = operators

14217

# (lextest.t). We can't just use the reported environment

14218

# because it can be incorrect in some cases.

14219

elsif ( ( $type =~ /^[\;\<\>\~]$/ || $is_assignment{$type} )

14220

&& $container_environment_to_go[$i] ne 'LIST' )

14221

{

14222

$dont_align[$depth] = 1;

14223

$want_comma_break[$depth] = 0;

14224

$index_before_arrow[$depth] = -1;

14225

}

14226

14227

# now just handle any commas

14228

next unless ( $type eq ',' );

14229

14230

$last_dot_index[$depth] = undef;

14231

$last_comma_index[$depth] = $i;

14232

14233

# break here if this comma follows a '=>'

14234

# but not if there is a side comment after the comma

14235

if ( $want_comma_break[$depth] ) {

14236

14237

if ( $next_nonblank_type =~ /^[\)\}\]R]$/ ) {

14238

$want_comma_break[$depth] = 0;

14239

$index_before_arrow[$depth] = -1;

14240

next;

14241

}

14242

14243

set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );

14244

14245

# break before the previous token if it looks safe

14246

# Example of something that we will not try to break before:

14247

# DBI::SQL_SMALLINT() => $ado_consts->{adSmallInt},

14248

# Also we don't want to break at a binary operator (like +):

14249

# $c->createOval(

14250

# $x + $R, $y +

14251

# $R => $x - $R,

14252

# $y - $R, -fill => 'black',

14253

# );

14254

my $ibreak = $index_before_arrow[$depth] - 1;

14255

if ( $ibreak > 0

14256

&& $tokens_to_go[ $ibreak + 1 ] !~ /^[\)\}\]]$/ )

14257

{

14258

if ( $tokens_to_go[$ibreak] eq '-' ) { $ibreak-- }

14259

if ( $types_to_go[$ibreak] eq 'b' ) { $ibreak-- }

14260

if ( $types_to_go[$ibreak] =~ /^[,wiZCUG\(\{\[]$/ ) {

14261

14262

# don't break pointer calls, such as the following:

14263

# File::Spec->curdir => 1,

14264

# (This is tokenized as adjacent 'w' tokens)

14265

if ( $tokens_to_go[ $ibreak + 1 ] !~ /^->/ ) {

14266

set_forced_breakpoint($ibreak);

14267

}

14268

}

14269

}

14270

14271

$want_comma_break[$depth] = 0;

14272

$index_before_arrow[$depth] = -1;

14273

14274

# handle list which mixes '=>'s and ','s:

14275

# treat any list items so far as an interrupted list

14276

$interrupted_list[$depth] = 1;

14277

next;

14278

}

14279

14280

# break after all commas above starting depth

14281

if ( $depth < $starting_depth && !$dont_align[$depth] ) {

14282

set_forced_breakpoint($i) unless ( $next_nonblank_type eq '#' );

14283

next;

14284

}

14285

14286

# add this comma to the list..

14287

my $item_count = $item_count_stack[$depth];

14288

if ( $item_count == 0 ) {

14289

14290

# but do not form a list with no opening structure

14291

# for example:

14292

14293

# open INFILE_COPY, ">$input_file_copy"

14294

# or die ("very long message");

14295

14296

if ( ( $opening_structure_index_stack[$depth] < 0 )

14297

&& $container_environment_to_go[$i] eq 'BLOCK' )

14298

{

14299

$dont_align[$depth] = 1;

14300

}

14301

}

14302

14303

$comma_index[$depth][$item_count] = $i;

14304

++$item_count_stack[$depth];

14305

if ( $last_nonblank_type =~ /^[iR\]]$/ ) {

14306

$identifier_count_stack[$depth]++;

14307

}

14308

}

14309

14310

#-------------------------------------------

14311

# end of loop over all tokens in this batch

14312

#-------------------------------------------

14313

14314

# set breaks for any unfinished lists ..

14315

for ( my $dd = $current_depth ; $dd >= $minimum_depth ; $dd-- ) {

14316

14317

$interrupted_list[$dd] = 1;

14318

$has_broken_sublist[$dd] = 1 if ( $dd < $current_depth );

14319

set_comma_breakpoints($dd);

14320

set_logical_breakpoints($dd)

14321

if ( $has_old_logical_breakpoints[$dd] );

14322

set_for_semicolon_breakpoints($dd);

14323

14324

# break open container...

14325

my $i_opening = $opening_structure_index_stack[$dd];

14326

set_forced_breakpoint($i_opening)

14327

unless (

14328

is_unbreakable_container($dd)

14329

14330

# Avoid a break which would place an isolated ' or "

14331

# on a line

14332

|| ( $type eq 'Q'

14333

&& $i_opening >= $max_index_to_go - 2

14334

&& $token =~ /^['"]$/ )

14335

);

14336

}

14337

14338

# Return a flag indicating if the input file had some good breakpoints.

14339

# This flag will be used to force a break in a line shorter than the

14340

# allowed line length.

14341

if ( $has_old_logical_breakpoints[$current_depth] ) {

14342

$saw_good_breakpoint = 1;

14343

}

14344

return $saw_good_breakpoint;

14345

}

14346

} # end scan_list

14347

14348

sub find_token_starting_list {

14349

14350

# When testing to see if a block will fit on one line, some

14351

# previous token(s) may also need to be on the line; particularly

14352

# if this is a sub call. So we will look back at least one

14353

# token. NOTE: This isn't perfect, but not critical, because

14354

# if we mis-identify a block, it will be wrapped and therefore

14355

# fixed the next time it is formatted.

14356

my $i_opening_paren = shift;

14357

my $i_opening_minus = $i_opening_paren;

14358

my $im1 = $i_opening_paren - 1;

14359

my $im2 = $i_opening_paren - 2;

14360

my $im3 = $i_opening_paren - 3;

14361

my $typem1 = $types_to_go[$im1];

14362

my $typem2 = $im2 >= 0 ? $types_to_go[$im2] : 'b';

14363

if ( $typem1 eq ',' || ( $typem1 eq 'b' && $typem2 eq ',' ) ) {

14364

$i_opening_minus = $i_opening_paren;

14365

}

14366

elsif ( $tokens_to_go[$i_opening_paren] eq '(' ) {

14367

$i_opening_minus = $im1 if $im1 >= 0;

14368

14369

# walk back to improve length estimate

14370

for ( my $j = $im1 ; $j >= 0 ; $j-- ) {

14371

last if ( $types_to_go[$j] =~ /^[$\[\{L\}\]$Rb,]$/ );

14372

$i_opening_minus = $j;

14373

}

14374

if ( $types_to_go[$i_opening_minus] eq 'b' ) { $i_opening_minus++ }

14375

}

14376

elsif ( $typem1 eq 'k' ) { $i_opening_minus = $im1 }

14377

elsif ( $typem1 eq 'b' && $im2 >= 0 && $types_to_go[$im2] eq 'k' ) {

14378

$i_opening_minus = $im2;

14379

}

14380

return $i_opening_minus;

14381

}

14382

14383

{ # begin set_comma_breakpoints_do

14384

14385

my %is_keyword_with_special_leading_term;

14386

14387

BEGIN {

14388

14389

# These keywords have prototypes which allow a special leading item

14390

# followed by a list

14391

@_ =

14392

qw(formline grep kill map printf sprintf push chmod join pack unshift);

14393

@is_keyword_with_special_leading_term{@_} = (1) x scalar(@_);

14394

}

14395

14396

sub set_comma_breakpoints_do {

14397

14398

# Given a list with some commas, set breakpoints at some of the

14399

# commas, if necessary, to make it easy to read. This list is

14400

# an example:

14401

my (

14402

$depth, $i_opening_paren, $i_closing_paren,

14403

$item_count, $identifier_count, $rcomma_index,

14404

$next_nonblank_type, $list_type, $interrupted,

14405

$rdo_not_break_apart, $must_break_open,

14406

) = @_;

14407

14408

# nothing to do if no commas seen

14409

return if ( $item_count < 1 );

14410

my $i_first_comma = $$rcomma_index[0];

14411

my $i_true_last_comma = $$rcomma_index[ $item_count - 1 ];

14412

my $i_last_comma = $i_true_last_comma;

14413

if ( $i_last_comma >= $max_index_to_go ) {

14414

$i_last_comma = $$rcomma_index[ --$item_count - 1 ];

14415

return if ( $item_count < 1 );

14416

}

14417

14418

#---------------------------------------------------------------

14419

# find lengths of all items in the list to calculate page layout

14420

#---------------------------------------------------------------

14421

my $comma_count = $item_count;

14422

my @item_lengths;

14423

my @i_term_begin;

14424

my @i_term_end;

14425

my @i_term_comma;

14426

my $i_prev_plus;

14427

my @max_length = ( 0, 0 );

14428

my $first_term_length;

14429

my $i = $i_opening_paren;

14430

my $is_odd = 1;

14431

14432

for ( my $j = 0 ; $j < $comma_count ; $j++ ) {

14433

$is_odd = 1 - $is_odd;

14434

$i_prev_plus = $i + 1;

14435

$i = $$rcomma_index[$j];

14436

14437

my $i_term_end =

14438

( $types_to_go[ $i - 1 ] eq 'b' ) ? $i - 2 : $i - 1;

14439

my $i_term_begin =

14440

( $types_to_go[$i_prev_plus] eq 'b' )

14441

? $i_prev_plus + 1

14442

: $i_prev_plus;

14443

push @i_term_begin, $i_term_begin;

14444

push @i_term_end, $i_term_end;

14445

push @i_term_comma, $i;

14446

14447

# note: currently adding 2 to all lengths (for comma and space)

14448

my $length =

14449

2 + token_sequence_length( $i_term_begin, $i_term_end );

14450

push @item_lengths, $length;

14451

14452

if ( $j == 0 ) {

14453

$first_term_length = $length;

14454

}

14455

else {

14456

14457

if ( $length > $max_length[$is_odd] ) {

14458

$max_length[$is_odd] = $length;

14459

}

14460

}

14461

}

14462

14463

# now we have to make a distinction between the comma count and item

14464

# count, because the item count will be one greater than the comma

14465

# count if the last item is not terminated with a comma

14466

my $i_b =

14467

( $types_to_go[ $i_last_comma + 1 ] eq 'b' )

14468

? $i_last_comma + 1

14469

: $i_last_comma;

14470

my $i_e =

14471

( $types_to_go[ $i_closing_paren - 1 ] eq 'b' )

14472

? $i_closing_paren - 2

14473

: $i_closing_paren - 1;

14474

my $i_effective_last_comma = $i_last_comma;

14475

14476

my $last_item_length = token_sequence_length( $i_b + 1, $i_e );

14477

14478

if ( $last_item_length > 0 ) {

14479

14480

# add 2 to length because other lengths include a comma and a blank

14481

$last_item_length += 2;

14482

push @item_lengths, $last_item_length;

14483

push @i_term_begin, $i_b + 1;

14484

push @i_term_end, $i_e;

14485

push @i_term_comma, undef;

14486

14487

my $i_odd = $item_count % 2;

14488

14489

if ( $last_item_length > $max_length[$i_odd] ) {

14490

$max_length[$i_odd] = $last_item_length;

14491

}

14492

14493

$item_count++;

14494

$i_effective_last_comma = $i_e + 1;

14495

14496

if ( $types_to_go[ $i_b + 1 ] =~ /^[iR\]]$/ ) {

14497

$identifier_count++;

14498

}

14499

}

14500

14501

#---------------------------------------------------------------

14502

# End of length calculations

14503

#---------------------------------------------------------------

14504

14505

#---------------------------------------------------------------

14506

# Compound List Rule 1:

14507

# Break at (almost) every comma for a list containing a broken

14508

# sublist. This has higher priority than the Interrupted List

14509

# Rule.

14510

#---------------------------------------------------------------

14511

if ( $has_broken_sublist[$depth] ) {

14512

14513

# Break at every comma except for a comma between two

14514

# simple, small terms. This prevents long vertical

14515

# columns of, say, just 0's.

14516

my $small_length = 10; # 2 + actual maximum length wanted

14517

14518

# We'll insert a break in long runs of small terms to

14519

# allow alignment in uniform tables.

14520

my $skipped_count = 0;

14521

my $columns = table_columns_available($i_first_comma);

14522

my $fields = int( $columns / $small_length );

14523

if ( $rOpts_maximum_fields_per_table

14524

&& $fields > $rOpts_maximum_fields_per_table )

14525

{

14526

$fields = $rOpts_maximum_fields_per_table;

14527

}

14528

my $max_skipped_count = $fields - 1;

14529

14530

my $is_simple_last_term = 0;

14531

my $is_simple_next_term = 0;

14532

foreach my $j ( 0 .. $item_count ) {

14533

$is_simple_last_term = $is_simple_next_term;

14534

$is_simple_next_term = 0;

14535

if ( $j < $item_count

14536

&& $i_term_end[$j] == $i_term_begin[$j]

14537

&& $item_lengths[$j] <= $small_length )

14538

{

14539

$is_simple_next_term = 1;

14540

}

14541

next if $j == 0;

14542

if ( $is_simple_last_term

14543

&& $is_simple_next_term

14544

&& $skipped_count < $max_skipped_count )

14545

{

14546

$skipped_count++;

14547

}

14548

else {

14549

$skipped_count = 0;

14550

my $i = $i_term_comma[ $j - 1 ];

14551

last unless defined $i;

14552

set_forced_breakpoint($i);

14553

}

14554

}

14555

14556

# always break at the last comma if this list is

14557

# interrupted; we wouldn't want to leave a terminal '{', for

14558

# example.

14559

if ($interrupted) { set_forced_breakpoint($i_true_last_comma) }

14560

return;

14561

}

14562

14563

#my ( $a, $b, $c ) = caller();

14564

#print "LISTX: in set_list $a $c interupt=$interrupted count=$item_count

14565

#i_first = $i_first_comma i_last=$i_last_comma max=$max_index_to_go\n";

14566

#print "depth=$depth has_broken=$has_broken_sublist[$depth] is_multi=$is_multiline opening_paren=($i_opening_paren) \n";

14567

14568

#---------------------------------------------------------------

14569

# Interrupted List Rule:

14570

# A list is is forced to use old breakpoints if it was interrupted

14571

# by side comments or blank lines, or requested by user.

14572

#---------------------------------------------------------------

14573

if ( $rOpts_break_at_old_comma_breakpoints

14574

|| $interrupted

14575

|| $i_opening_paren < 0 )

14576

{

14577

copy_old_breakpoints( $i_first_comma, $i_true_last_comma );

14578

return;

14579

}

14580

14581

#---------------------------------------------------------------

14582

# Looks like a list of items. We have to look at it and size it up.

14583

#---------------------------------------------------------------

14584

14585

my $opening_token = $tokens_to_go[$i_opening_paren];

14586

my $opening_environment =

14587

$container_environment_to_go[$i_opening_paren];

14588

14589

#-------------------------------------------------------------------

14590

# Return if this will fit on one line

14591

#-------------------------------------------------------------------

14592

14593

my $i_opening_minus = find_token_starting_list($i_opening_paren);

14594

return

14595

unless excess_line_length( $i_opening_minus, $i_closing_paren ) > 0;

14596

14597

#-------------------------------------------------------------------

14598

# Now we know that this block spans multiple lines; we have to set

14599

# at least one breakpoint -- real or fake -- as a signal to break

14600

# open any outer containers.

14601

#-------------------------------------------------------------------

14602

set_fake_breakpoint();

14603

14604

# be sure we do not extend beyond the current list length

14605

if ( $i_effective_last_comma >= $max_index_to_go ) {

14606

$i_effective_last_comma = $max_index_to_go - 1;

14607

}

14608

14609

# Set a flag indicating if we need to break open to keep -lp

14610

# items aligned. This is necessary if any of the list terms

14611

# exceeds the available space after the '('.

14612

my $need_lp_break_open = $must_break_open;

14613

if ( $rOpts_line_up_parentheses && !$must_break_open ) {

14614

my $columns_if_unbroken = $rOpts_maximum_line_length -

14615

total_line_length( $i_opening_minus, $i_opening_paren );

14616

$need_lp_break_open =

14617

( $max_length[0] > $columns_if_unbroken )

14618

|| ( $max_length[1] > $columns_if_unbroken )

14619

|| ( $first_term_length > $columns_if_unbroken );

14620

}

14621

14622

# Specify if the list must have an even number of fields or not.

14623

# It is generally safest to assume an even number, because the

14624

# list items might be a hash list. But if we can be sure that

14625

# it is not a hash, then we can allow an odd number for more

14626

# flexibility.

14627

my $odd_or_even = 2; # 1 = odd field count ok, 2 = want even count

14628

14629

if ( $identifier_count >= $item_count - 1

14630

|| $is_assignment{$next_nonblank_type}

14631

|| ( $list_type && $list_type ne '=>' && $list_type !~ /^[\:\?]$/ )

14632

)

14633

{

14634

$odd_or_even = 1;

14635

}

14636

14637

# do we have a long first term which should be

14638

# left on a line by itself?

14639

my $use_separate_first_term = (

14640

$odd_or_even == 1 # only if we can use 1 field/line

14641

&& $item_count > 3 # need several items

14642

&& $first_term_length >

14643

2 * $max_length[0] - 2 # need long first term

14644

&& $first_term_length >

14645

2 * $max_length[1] - 2 # need long first term

14646

);

14647

14648

# or do we know from the type of list that the first term should

14649

# be placed alone?

14650

if ( !$use_separate_first_term ) {

14651

if ( $is_keyword_with_special_leading_term{$list_type} ) {

14652

$use_separate_first_term = 1;

14653

14654

# should the container be broken open?

14655

if ( $item_count < 3 ) {

14656

if ( $i_first_comma - $i_opening_paren < 4 ) {

14657

$$rdo_not_break_apart = 1;

14658

}

14659

}

14660

elsif ($first_term_length < 20

14661

&& $i_first_comma - $i_opening_paren < 4 )

14662

{

14663

my $columns = table_columns_available($i_first_comma);

14664

if ( $first_term_length < $columns ) {

14665

$$rdo_not_break_apart = 1;

14666

}

14667

}

14668

}

14669

}

14670

14671

# if so,

14672

if ($use_separate_first_term) {

14673

14674

# ..set a break and update starting values

14675

$use_separate_first_term = 1;

14676

set_forced_breakpoint($i_first_comma);

14677

$i_opening_paren = $i_first_comma;

14678

$i_first_comma = $$rcomma_index[1];

14679

$item_count--;

14680

return if $comma_count == 1;

14681

shift @item_lengths;

14682

shift @i_term_begin;

14683

shift @i_term_end;

14684

shift @i_term_comma;

14685

}

14686

14687

# if not, update the metrics to include the first term

14688

else {

14689

if ( $first_term_length > $max_length[0] ) {

14690

$max_length[0] = $first_term_length;

14691

}

14692

}

14693

14694

# Field width parameters

14695

my $pair_width = ( $max_length[0] + $max_length[1] );

14696

my $max_width =

14697

( $max_length[0] > $max_length[1] ) ? $max_length[0] : $max_length[1];

14698

14699

# Number of free columns across the page width for laying out tables

14700

my $columns = table_columns_available($i_first_comma);

14701

14702

# Estimated maximum number of fields which fit this space

14703

# This will be our first guess

14704

my $number_of_fields_max =

14705

maximum_number_of_fields( $columns, $odd_or_even, $max_width,

14706

$pair_width );

14707

my $number_of_fields = $number_of_fields_max;

14708

14709

# Find the best-looking number of fields

14710

# and make this our second guess if possible

14711

my ( $number_of_fields_best, $ri_ragged_break_list,

14712

$new_identifier_count )

14713

= study_list_complexity( \@i_term_begin, \@i_term_end, \@item_lengths,

14714

$max_width );

14715

14716

if ( $number_of_fields_best != 0

14717

&& $number_of_fields_best < $number_of_fields_max )

14718

{

14719

$number_of_fields = $number_of_fields_best;

14720

}

14721

14722

# ----------------------------------------------------------------------

14723

# If we are crowded and the -lp option is being used, try to

14724

# undo some indentation

14725

# ----------------------------------------------------------------------

14726

if (

14727

$rOpts_line_up_parentheses

14728

&& (

14729

$number_of_fields == 0

14730

|| ( $number_of_fields == 1

14731

&& $number_of_fields != $number_of_fields_best )

14732

)

14733

)

14734

{

14735

my $available_spaces = get_AVAILABLE_SPACES_to_go($i_first_comma);

14736

if ( $available_spaces > 0 ) {

14737

14738

my $spaces_wanted = $max_width - $columns; # for 1 field

14739

14740

if ( $number_of_fields_best == 0 ) {

14741

$number_of_fields_best =

14742

get_maximum_fields_wanted( \@item_lengths );

14743

}

14744

14745

if ( $number_of_fields_best != 1 ) {

14746

my $spaces_wanted_2 =

14747

1 + $pair_width - $columns; # for 2 fields

14748

if ( $available_spaces > $spaces_wanted_2 ) {

14749

$spaces_wanted = $spaces_wanted_2;

14750

}

14751

}

14752

14753

if ( $spaces_wanted > 0 ) {

14754

my $deleted_spaces =

14755

reduce_lp_indentation( $i_first_comma, $spaces_wanted );

14756

14757

# redo the math

14758

if ( $deleted_spaces > 0 ) {

14759

$columns = table_columns_available($i_first_comma);

14760

$number_of_fields_max =

14761

maximum_number_of_fields( $columns, $odd_or_even,

14762

$max_width, $pair_width );

14763

$number_of_fields = $number_of_fields_max;

14764

14765

if ( $number_of_fields_best == 1

14766

&& $number_of_fields >= 1 )

14767

{

14768

$number_of_fields = $number_of_fields_best;

14769

}

14770

}

14771

}

14772

}

14773

}

14774

14775

# try for one column if two won't work

14776

if ( $number_of_fields <= 0 ) {

14777

$number_of_fields = int( $columns / $max_width );

14778

}

14779

14780

# The user can place an upper bound on the number of fields,

14781

# which can be useful for doing maintenance on tables

14782

if ( $rOpts_maximum_fields_per_table

14783

&& $number_of_fields > $rOpts_maximum_fields_per_table )

14784

{

14785

$number_of_fields = $rOpts_maximum_fields_per_table;

14786

}

14787

14788

# How many columns (characters) and lines would this container take

14789

# if no additional whitespace were added?

14790

my $packed_columns = token_sequence_length( $i_opening_paren + 1,

14791

$i_effective_last_comma + 1 );

14792

if ( $columns <= 0 ) { $columns = 1 } # avoid divide by zero

14793

my $packed_lines = 1 + int( $packed_columns / $columns );

14794

14795

# are we an item contained in an outer list?

14796

my $in_hierarchical_list = $next_nonblank_type =~ /^[\}\,]$/;

14797

14798

if ( $number_of_fields <= 0 ) {

14799

14800

# #---------------------------------------------------------------

14801

# # We're in trouble. We can't find a single field width that works.

14802

# # There is no simple answer here; we may have a single long list

14803

# # item, or many.

14804

# #---------------------------------------------------------------

14805

#

14806

# In many cases, it may be best to not force a break if there is just one

14807

# comma, because the standard continuation break logic will do a better

14808

# job without it.

14809

#

14810

# In the common case that all but one of the terms can fit

14811

# on a single line, it may look better not to break open the

14812

# containing parens. Consider, for example

14813

#

14814

# $color =

14815

# join ( '/',

14816

# sort { $color_value{$::a} <=> $color_value{$::b}; }

14817

# keys %colors );

14818

#

14819

# which will look like this with the container broken:

14820

#

14821

# $color = join (

14822

# '/',

14823

# sort { $color_value{$::a} <=> $color_value{$::b}; } keys %colors

14824

# );

14825

#

14826

# Here is an example of this rule for a long last term:

14827

#

14828

# log_message( 0, 256, 128,

14829

# "Number of routes in adj-RIB-in to be considered: $peercount" );

14830

#

14831

# And here is an example with a long first term:

14832

#

14833

# $s = sprintf(

14834

# "%2d wallclock secs (%$f usr %$f sys + %$f cusr %$f csys = %$f CPU)",

14835

# $r, $pu, $ps, $cu, $cs, $tt

14836

# )

14837

# if $style eq 'all';

14838

14839

my $i_last_comma = $$rcomma_index[ $comma_count - 1 ];

14840

my $long_last_term = excess_line_length( 0, $i_last_comma ) <= 0;

14841

my $long_first_term =

14842

excess_line_length( $i_first_comma + 1, $max_index_to_go ) <= 0;

14843

14844

# break at every comma ...

14845

if (

14846

14847

# if requested by user or is best looking

14848

$number_of_fields_best == 1

14849

14850

# or if this is a sublist of a larger list

14851

|| $in_hierarchical_list

14852

14853

# or if multiple commas and we dont have a long first or last

14854

# term

14855

|| ( $comma_count > 1

14856

&& !( $long_last_term || $long_first_term ) )

14857

)

14858

{

14859

foreach ( 0 .. $comma_count - 1 ) {

14860

set_forced_breakpoint( $$rcomma_index[$_] );

14861

}

14862

}

14863

elsif ($long_last_term) {

14864

14865

set_forced_breakpoint($i_last_comma);

14866

$$rdo_not_break_apart = 1 unless $must_break_open;

14867

}

14868

elsif ($long_first_term) {

14869

14870

set_forced_breakpoint($i_first_comma);

14871

}

14872

else {

14873

14874

# let breaks be defined by default bond strength logic

14875

}

14876

return;

14877

}

14878

14879

# --------------------------------------------------------

14880

# We have a tentative field count that seems to work.

14881

# How many lines will this require?

14882

# --------------------------------------------------------

14883

my $formatted_lines = $item_count / ($number_of_fields);

14884

if ( $formatted_lines != int $formatted_lines ) {

14885

$formatted_lines = 1 + int $formatted_lines;

14886

}

14887

14888

# So far we've been trying to fill out to the right margin. But

14889

# compact tables are easier to read, so let's see if we can use fewer

14890

# fields without increasing the number of lines.

14891

$number_of_fields =

14892

compactify_table( $item_count, $number_of_fields, $formatted_lines,

14893

$odd_or_even );

14894

14895

# How many spaces across the page will we fill?

14896

my $columns_per_line =

14897

( int $number_of_fields / 2 ) * $pair_width +

14898

( $number_of_fields % 2 ) * $max_width;

14899

14900

my $formatted_columns;

14901

14902

if ( $number_of_fields > 1 ) {

14903

$formatted_columns =

14904

( $pair_width * ( int( $item_count / 2 ) ) +

14905

( $item_count % 2 ) * $max_width );

14906

}

14907

else {

14908

$formatted_columns = $max_width * $item_count;

14909

}

14910

if ( $formatted_columns < $packed_columns ) {

14911

$formatted_columns = $packed_columns;

14912

}

14913

14914

my $unused_columns = $formatted_columns - $packed_columns;

14915

14916

# set some empirical parameters to help decide if we should try to

14917

# align; high sparsity does not look good, especially with few lines

14918

my $sparsity = ($unused_columns) / ($formatted_columns);

14919

my $max_allowed_sparsity =

14920

( $item_count < 3 ) ? 0.1

14921

: ( $packed_lines == 1 ) ? 0.15

14922

: ( $packed_lines == 2 ) ? 0.4

14923

: 0.7;

14924

14925

# Begin check for shortcut methods, which avoid treating a list

14926

# as a table for relatively small parenthesized lists. These

14927

# are usually easier to read if not formatted as tables.

14928

if (

14929

$packed_lines <= 2 # probably can fit in 2 lines

14930

&& $item_count < 9 # doesn't have too many items

14931

&& $opening_environment eq 'BLOCK' # not a sub-container

14932

&& $opening_token eq '(' # is paren list

14933

)

14934

{

14935

14936

# Shortcut method 1: for -lp and just one comma:

14937

# This is a no-brainer, just break at the comma.

14938

if (

14939

$rOpts_line_up_parentheses # -lp

14940

&& $item_count == 2 # two items, one comma

14941

&& !$must_break_open

14942

)

14943

{

14944

my $i_break = $$rcomma_index[0];

14945

set_forced_breakpoint($i_break);

14946

$$rdo_not_break_apart = 1;

14947

set_non_alignment_flags( $comma_count, $rcomma_index );

14948

return;

14949

14950

}

14951

14952

# method 2 is for most small ragged lists which might look

14953

# best if not displayed as a table.

14954

if (

14955

( $number_of_fields == 2 && $item_count == 3 )

14956

|| (

14957

$new_identifier_count > 0 # isn't all quotes

14958

&& $sparsity > 0.15

14959

) # would be fairly spaced gaps if aligned

14960

)

14961

{

14962

14963

my $break_count = set_ragged_breakpoints( \@i_term_comma,

14964

$ri_ragged_break_list );

14965

++$break_count if ($use_separate_first_term);

14966

14967

# NOTE: we should really use the true break count here,

14968

# which can be greater if there are large terms and

14969

# little space, but usually this will work well enough.

14970

unless ($must_break_open) {

14971

14972

if ( $break_count <= 1 ) {

14973

$$rdo_not_break_apart = 1;

14974

}

14975

elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )

14976

{

14977

$$rdo_not_break_apart = 1;

14978

}

14979

}

14980

set_non_alignment_flags( $comma_count, $rcomma_index );

14981

return;

14982

}

14983

14984

} # end shortcut methods

14985

14986

# debug stuff

14987

14988

FORMATTER_DEBUG_FLAG_SPARSE && do {

14989

print

14990

"SPARSE:cols=$columns commas=$comma_count items:$item_count ids=$identifier_count pairwidth=$pair_width fields=$number_of_fields lines packed: $packed_lines packed_cols=$packed_columns fmtd:$formatted_lines cols /line:$columns_per_line unused:$unused_columns fmtd:$formatted_columns sparsity=$sparsity allow=$max_allowed_sparsity\n";

14991

14992

};

14993

14994

#---------------------------------------------------------------

14995

# Compound List Rule 2:

14996

# If this list is too long for one line, and it is an item of a

14997

# larger list, then we must format it, regardless of sparsity

14998

# (ian.t). One reason that we have to do this is to trigger

14999

# Compound List Rule 1, above, which causes breaks at all commas of

15000

# all outer lists. In this way, the structure will be properly

15001

# displayed.

15002

#---------------------------------------------------------------

15003

15004

# Decide if this list is too long for one line unless broken

15005

my $total_columns = table_columns_available($i_opening_paren);

15006

my $too_long = $packed_columns > $total_columns;

15007

15008

# For a paren list, include the length of the token just before the

15009

# '(' because this is likely a sub call, and we would have to

15010

# include the sub name on the same line as the list. This is still

15011

# imprecise, but not too bad. (steve.t)

15012

if ( !$too_long && $i_opening_paren > 0 && $opening_token eq '(' ) {

15013

15014

$too_long = excess_line_length( $i_opening_minus,

15015

$i_effective_last_comma + 1 ) > 0;

15016

}

15017

15018

# FIXME: For an item after a '=>', try to include the length of the

15019

# thing before the '=>'. This is crude and should be improved by

15020

# actually looking back token by token.

15021

if ( !$too_long && $i_opening_paren > 0 && $list_type eq '=>' ) {

15022

my $i_opening_minus = $i_opening_paren - 4;

15023

if ( $i_opening_minus >= 0 ) {

15024

$too_long = excess_line_length( $i_opening_minus,

15025

$i_effective_last_comma + 1 ) > 0;

15026

}

15027

}

15028

15029

# Always break lists contained in '[' and '{' if too long for 1 line,

15030

# and always break lists which are too long and part of a more complex

15031

# structure.

15032

my $must_break_open_container = $must_break_open

15033

|| ( $too_long

15034

&& ( $in_hierarchical_list || $opening_token ne '(' ) );

15035

15036

#print "LISTX: next=$next_nonblank_type avail cols=$columns packed=$packed_columns must format = $must_break_open_container too-long=$too_long opening=$opening_token list_type=$list_type formatted_lines=$formatted_lines packed=$packed_lines max_sparsity= $max_allowed_sparsity sparsity=$sparsity \n";

15037

15038

#---------------------------------------------------------------

15039

# The main decision:

15040

# Now decide if we will align the data into aligned columns. Do not

15041

# attempt to align columns if this is a tiny table or it would be

15042

# too spaced. It seems that the more packed lines we have, the

15043

# sparser the list that can be allowed and still look ok.

15044

#---------------------------------------------------------------

15045

15046

if ( ( $formatted_lines < 3 && $packed_lines < $formatted_lines )

15047

|| ( $formatted_lines < 2 )

15048

|| ( $unused_columns > $max_allowed_sparsity * $formatted_columns )

15049

)

15050

{

15051

15052

#---------------------------------------------------------------

15053

# too sparse: would look ugly if aligned in a table;

15054

#---------------------------------------------------------------

15055

15056

# use old breakpoints if this is a 'big' list

15057

# FIXME: goal is to improve set_ragged_breakpoints so that

15058

# this is not necessary.

15059

if ( $packed_lines > 2 && $item_count > 10 ) {

15060

write_logfile_entry("List sparse: using old breakpoints\n");

15061

copy_old_breakpoints( $i_first_comma, $i_last_comma );

15062

}

15063

15064

# let the continuation logic handle it if 2 lines

15065

else {

15066

15067

my $break_count = set_ragged_breakpoints( \@i_term_comma,

15068

$ri_ragged_break_list );

15069

++$break_count if ($use_separate_first_term);

15070

15071

unless ($must_break_open_container) {

15072

if ( $break_count <= 1 ) {

15073

$$rdo_not_break_apart = 1;

15074

}

15075

elsif ( $rOpts_line_up_parentheses && !$need_lp_break_open )

15076

{

15077

$$rdo_not_break_apart = 1;

15078

}

15079

}

15080

set_non_alignment_flags( $comma_count, $rcomma_index );

15081

}

15082

return;

15083

}

15084

15085

#---------------------------------------------------------------

15086

# go ahead and format as a table

15087

#---------------------------------------------------------------

15088

write_logfile_entry(

15089

"List: auto formatting with $number_of_fields fields/row\n");

15090

15091

my $j_first_break =

15092

$use_separate_first_term ? $number_of_fields : $number_of_fields - 1;

15093

15094

for (

15095

my $j = $j_first_break ;

15096

$j < $comma_count ;

15097

$j += $number_of_fields

15098

)

15099

{

15100

my $i = $$rcomma_index[$j];

15101

set_forced_breakpoint($i);

15102

}

15103

return;

15104

}

15105

}

15106

15107

sub set_non_alignment_flags {

15108

15109

# set flag which indicates that these commas should not be

15110

# aligned

15111

my ( $comma_count, $rcomma_index ) = @_;

15112

foreach ( 0 .. $comma_count - 1 ) {

15113

$matching_token_to_go[ $$rcomma_index[$_] ] = 1;

15114

}

15115

}

15116

15117

sub study_list_complexity {

15118

15119

# Look for complex tables which should be formatted with one term per line.

15120

# Returns the following:

15121

#

15122

# \@i_ragged_break_list = list of good breakpoints to avoid lines

15123

# which are hard to read

15124

# $number_of_fields_best = suggested number of fields based on

15125

# complexity; = 0 if any number may be used.

15126

#

15127

my ( $ri_term_begin, $ri_term_end, $ritem_lengths, $max_width ) = @_;

15128

my $item_count = @{$ri_term_begin};

15129

my $complex_item_count = 0;

15130

my $number_of_fields_best = $rOpts_maximum_fields_per_table;

15131

my $i_max = @{$ritem_lengths} - 1;

15132

##my @item_complexity;

15133

15134

my $i_last_last_break = -3;

15135

my $i_last_break = -2;

15136

my @i_ragged_break_list;

15137

15138

my $definitely_complex = 30;

15139

my $definitely_simple = 12;

15140

my $quote_count = 0;

15141

15142

for my $i ( 0 .. $i_max ) {

15143

my $ib = $ri_term_begin->[$i];

15144

my $ie = $ri_term_end->[$i];

15145

15146

# define complexity: start with the actual term length

15147

my $weighted_length = ( $ritem_lengths->[$i] - 2 );

15148

15149

##TBD: join types here and check for variations

15150

##my $str=join "", @tokens_to_go[$ib..$ie];

15151

15152

my $is_quote = 0;

15153

if ( $types_to_go[$ib] =~ /^[qQ]$/ ) {

15154

$is_quote = 1;

15155

$quote_count++;

15156

}

15157

elsif ( $types_to_go[$ib] =~ /^[w\-]$/ ) {

15158

$quote_count++;

15159

}

15160

15161

if ( $ib eq $ie ) {

15162

if ( $is_quote && $tokens_to_go[$ib] =~ /\s/ ) {

15163

$complex_item_count++;

15164

$weighted_length *= 2;

15165

}

15166

else {

15167

}

15168

}

15169

else {

15170

if ( grep { $_ eq 'b' } @types_to_go[ $ib .. $ie ] ) {

15171

$complex_item_count++;

15172

$weighted_length *= 2;

15173

}

15174

if ( grep { $_ eq '..' } @types_to_go[ $ib .. $ie ] ) {

15175

$weighted_length += 4;

15176

}

15177

}

15178

15179

# add weight for extra tokens.

15180

$weighted_length += 2 * ( $ie - $ib );

15181

15182

## my $BUB = join '', @tokens_to_go[$ib..$ie];

15183

## print "# COMPLEXITY:$weighted_length $BUB\n";

15184

15185

##push @item_complexity, $weighted_length;

15186

15187

# now mark a ragged break after this item it if it is 'long and

15188

# complex':

15189

if ( $weighted_length >= $definitely_complex ) {

15190

15191

# if we broke after the previous term

15192

# then break before it too

15193

if ( $i_last_break == $i - 1

15194

&& $i > 1

15195

&& $i_last_last_break != $i - 2 )

15196

{

15197

15198

## FIXME: don't strand a small term

15199

pop @i_ragged_break_list;

15200

push @i_ragged_break_list, $i - 2;

15201

push @i_ragged_break_list, $i - 1;

15202

}

15203

15204

push @i_ragged_break_list, $i;

15205

$i_last_last_break = $i_last_break;

15206

$i_last_break = $i;

15207

}

15208

15209

# don't break before a small last term -- it will

15210

# not look good on a line by itself.

15211

elsif ($i == $i_max

15212

&& $i_last_break == $i - 1

15213

&& $weighted_length <= $definitely_simple )

15214

{

15215

pop @i_ragged_break_list;

15216

}

15217

}

15218

15219

my $identifier_count = $i_max + 1 - $quote_count;

15220

15221

# Need more tuning here..

15222

if ( $max_width > 12

15223

&& $complex_item_count > $item_count / 2

15224

&& $number_of_fields_best != 2 )

15225

{

15226

$number_of_fields_best = 1;

15227

}

15228

15229

return ( $number_of_fields_best, \@i_ragged_break_list, $identifier_count );

15230

}

15231

15232

sub get_maximum_fields_wanted {

15233

15234

# Not all tables look good with more than one field of items.

15235

# This routine looks at a table and decides if it should be

15236

# formatted with just one field or not.

15237

# This coding is still under development.

15238

my ($ritem_lengths) = @_;

15239

15240

my $number_of_fields_best = 0;

15241

15242

# For just a few items, we tentatively assume just 1 field.

15243

my $item_count = @{$ritem_lengths};

15244

if ( $item_count <= 5 ) {

15245

$number_of_fields_best = 1;

15246

}

15247

15248

# For larger tables, look at it both ways and see what looks best

15249

else {

15250

15251

my $is_odd = 1;

15252

my @max_length = ( 0, 0 );

15253

my @last_length_2 = ( undef, undef );

15254

my @first_length_2 = ( undef, undef );

15255

my $last_length = undef;

15256

my $total_variation_1 = 0;

15257

my $total_variation_2 = 0;

15258

my @total_variation_2 = ( 0, 0 );

15259

for ( my $j = 0 ; $j < $item_count ; $j++ ) {

15260

15261

$is_odd = 1 - $is_odd;

15262

my $length = $ritem_lengths->[$j];

15263

if ( $length > $max_length[$is_odd] ) {

15264

$max_length[$is_odd] = $length;

15265

}

15266

15267

if ( defined($last_length) ) {

15268

my $dl = abs( $length - $last_length );

15269

$total_variation_1 += $dl;

15270

}

15271

$last_length = $length;

15272

15273

my $ll = $last_length_2[$is_odd];

15274

if ( defined($ll) ) {

15275

my $dl = abs( $length - $ll );

15276

$total_variation_2[$is_odd] += $dl;

15277

}

15278

else {

15279

$first_length_2[$is_odd] = $length;

15280

}

15281

$last_length_2[$is_odd] = $length;

15282

}

15283

$total_variation_2 = $total_variation_2[0] + $total_variation_2[1];

15284

15285

my $factor = ( $item_count > 10 ) ? 1 : ( $item_count > 5 ) ? 0.75 : 0;

15286

unless ( $total_variation_2 < $factor * $total_variation_1 ) {

15287

$number_of_fields_best = 1;

15288

}

15289

}

15290

return ($number_of_fields_best);

15291

}

15292

15293

sub table_columns_available {

15294

my $i_first_comma = shift;

15295

my $columns =

15296

$rOpts_maximum_line_length - leading_spaces_to_go($i_first_comma);

15297

15298

# Patch: the vertical formatter does not line up lines whose lengths

15299

# exactly equal the available line length because of allowances

15300

# that must be made for side comments. Therefore, the number of

15301

# available columns is reduced by 1 character.

15302

$columns -= 1;

15303

return $columns;

15304

}

15305

15306

sub maximum_number_of_fields {

15307

15308

# how many fields will fit in the available space?

15309

my ( $columns, $odd_or_even, $max_width, $pair_width ) = @_;

15310

my $max_pairs = int( $columns / $pair_width );

15311

my $number_of_fields = $max_pairs * 2;

15312

if ( $odd_or_even == 1

15313

&& $max_pairs * $pair_width + $max_width <= $columns )

15314

{

15315

$number_of_fields++;

15316

}

15317

return $number_of_fields;

15318

}

15319

15320

sub compactify_table {

15321

15322

# given a table with a certain number of fields and a certain number

15323

# of lines, see if reducing the number of fields will make it look

15324

# better.

15325

my ( $item_count, $number_of_fields, $formatted_lines, $odd_or_even ) = @_;

15326

if ( $number_of_fields >= $odd_or_even * 2 && $formatted_lines > 0 ) {

15327

my $min_fields;

15328

15329

for (

15330

$min_fields = $number_of_fields ;

15331

$min_fields >= $odd_or_even

15332

&& $min_fields * $formatted_lines >= $item_count ;

15333

$min_fields -= $odd_or_even

15334

)

15335

{

15336

$number_of_fields = $min_fields;

15337

}

15338

}

15339

return $number_of_fields;

15340

}

15341

15342

sub set_ragged_breakpoints {

15343

15344

# Set breakpoints in a list that cannot be formatted nicely as a

15345

# table.

15346

my ( $ri_term_comma, $ri_ragged_break_list ) = @_;

15347

15348

my $break_count = 0;

15349

foreach (@$ri_ragged_break_list) {

15350

my $j = $ri_term_comma->[$_];

15351

if ($j) {

15352

set_forced_breakpoint($j);

15353

$break_count++;

15354

}

15355

}

15356

return $break_count;

15357

}

15358

15359

sub copy_old_breakpoints {

15360

my ( $i_first_comma, $i_last_comma ) = @_;

15361

for my $i ( $i_first_comma .. $i_last_comma ) {

15362

if ( $old_breakpoint_to_go[$i] ) {

15363

set_forced_breakpoint($i);

15364

}

15365

}

15366

}

15367

15368

sub set_nobreaks {

15369

my ( $i, $j ) = @_;

15370

if ( $i >= 0 && $i <= $j && $j <= $max_index_to_go ) {

15371

15372

FORMATTER_DEBUG_FLAG_NOBREAK && do {

15373

my ( $a, $b, $c ) = caller();

15374

print(

15375

"NOBREAK: forced_breakpoint $forced_breakpoint_count from $a $c with i=$i max=$max_index_to_go type=$types_to_go[$i]\n"

15376

);

15377

};

15378

15379

@nobreak_to_go[ $i .. $j ] = (1) x ( $j - $i + 1 );

15380

}

15381

15382

# shouldn't happen; non-critical error

15383

else {

15384

FORMATTER_DEBUG_FLAG_NOBREAK && do {

15385

my ( $a, $b, $c ) = caller();

15386

print(

15387

"NOBREAK ERROR: from $a $c with i=$i j=$j max=$max_index_to_go\n"

15388

);

15389

};

15390

}

15391

}

15392

15393

sub set_fake_breakpoint {

15394

15395

# Just bump up the breakpoint count as a signal that there are breaks.

15396

# This is useful if we have breaks but may want to postpone deciding where

15397

# to make them.

15398

$forced_breakpoint_count++;

15399

}

15400

15401

sub set_forced_breakpoint {

15402

my $i = shift;

15403

15404

return unless defined $i && $i >= 0;

15405

15406

# when called with certain tokens, use bond strengths to decide

15407

# if we break before or after it

15408

my $token = $tokens_to_go[$i];

15409

15410

if ( $token =~ /^([\=\.\,\:\?]|and|or|xor|&&|\|\|)$/ ) {

15411

if ( $want_break_before{$token} && $i >= 0 ) { $i-- }

15412

}

15413

15414

# breaks are forced before 'if' and 'unless'

15415

elsif ( $is_if_unless{$token} ) { $i-- }

15416

15417

if ( $i >= 0 && $i <= $max_index_to_go ) {

15418

my $i_nonblank = ( $types_to_go[$i] ne 'b' ) ? $i : $i - 1;

15419

15420

FORMATTER_DEBUG_FLAG_FORCE && do {

15421

my ( $a, $b, $c ) = caller();

15422

print

15423

"FORCE forced_breakpoint $forced_breakpoint_count from $a $c with i=$i_nonblank max=$max_index_to_go tok=$tokens_to_go[$i_nonblank] type=$types_to_go[$i_nonblank] nobr=$nobreak_to_go[$i_nonblank]\n";

15424

};

15425

15426

if ( $i_nonblank >= 0 && $nobreak_to_go[$i_nonblank] == 0 ) {

15427

$forced_breakpoint_to_go[$i_nonblank] = 1;

15428

15429

if ( $i_nonblank > $index_max_forced_break ) {

15430

$index_max_forced_break = $i_nonblank;

15431

}

15432

$forced_breakpoint_count++;

15433

$forced_breakpoint_undo_stack[ $forced_breakpoint_undo_count++ ] =

15434

$i_nonblank;

15435

15436

# if we break at an opening container..break at the closing

15437

if ( $tokens_to_go[$i_nonblank] =~ /^[\{\[\(\?]$/ ) {

15438

set_closing_breakpoint($i_nonblank);

15439

}

15440

}

15441

}

15442

}

15443

15444

sub clear_breakpoint_undo_stack {

15445

$forced_breakpoint_undo_count = 0;

15446

}

15447

15448

sub undo_forced_breakpoint_stack {

15449

15450

my $i_start = shift;

15451

if ( $i_start < 0 ) {

15452

$i_start = 0;

15453

my ( $a, $b, $c ) = caller();

15454

warning(

15455

"Program Bug: undo_forced_breakpoint_stack from $a $c has i=$i_start "

15456

);

15457

}

15458

15459

while ( $forced_breakpoint_undo_count > $i_start ) {

15460

my $i =

15461

$forced_breakpoint_undo_stack[ --$forced_breakpoint_undo_count ];

15462

if ( $i >= 0 && $i <= $max_index_to_go ) {

15463

$forced_breakpoint_to_go[$i] = 0;

15464

$forced_breakpoint_count--;

15465

15466

FORMATTER_DEBUG_FLAG_UNDOBP && do {

15467

my ( $a, $b, $c ) = caller();

15468

print(

15469

"UNDOBP: undo forced_breakpoint i=$i $forced_breakpoint_undo_count from $a $c max=$max_index_to_go\n"

15470

);

15471

};

15472

}

15473

15474

# shouldn't happen, but not a critical error

15475

else {

15476

FORMATTER_DEBUG_FLAG_UNDOBP && do {

15477

my ( $a, $b, $c ) = caller();

15478

print(

15479

"Program Bug: undo_forced_breakpoint from $a $c has i=$i but max=$max_index_to_go"

15480

);

15481

};

15482

}

15483

}

15484

}

15485

15486

{ # begin recombine_breakpoints

15487

15488

my %is_amp_amp;

15489

my %is_ternary;

15490

my %is_math_op;

15491

15492

BEGIN {

15493

15494

@_ = qw( && || );

15495

@is_amp_amp{@_} = (1) x scalar(@_);

15496

15497

@_ = qw( ? : );

15498

@is_ternary{@_} = (1) x scalar(@_);

15499

15500

@_ = qw( + - * / );

15501

@is_math_op{@_} = (1) x scalar(@_);

15502

}

15503

15504

sub recombine_breakpoints {

15505

15506

# sub set_continuation_breaks is very liberal in setting line breaks

15507

# for long lines, always setting breaks at good breakpoints, even

15508

# when that creates small lines. Occasionally small line fragments

15509

# are produced which would look better if they were combined.

15510

# That's the task of this routine, recombine_breakpoints.

15511

#

15512

# $ri_beg = ref to array of BEGinning indexes of each line

15513

# $ri_end = ref to array of ENDing indexes of each line

15514

my ( $ri_beg, $ri_end ) = @_;

15515

15516

my $more_to_do = 1;

15517

15518

# We keep looping over all of the lines of this batch

15519

# until there are no more possible recombinations

15520

my $nmax_last = @$ri_end;

15521

while ($more_to_do) {

15522

my $n_best = 0;

15523

my $bs_best;

15524

my $n;

15525

my $nmax = @$ri_end - 1;

15526

15527

# safety check for infinite loop

15528

unless ( $nmax < $nmax_last ) {

15529

15530

# shouldn't happen because splice below decreases nmax on each pass:

15531

# but i get paranoid sometimes

15532

die "Program bug-infinite loop in recombine breakpoints\n";

15533

}

15534

$nmax_last = $nmax;

15535

$more_to_do = 0;

15536

my $previous_outdentable_closing_paren;

15537

my $leading_amp_count = 0;

15538

my $this_line_is_semicolon_terminated;

15539

15540

# loop over all remaining lines in this batch

15541

for $n ( 1 .. $nmax ) {

15542

15543

#----------------------------------------------------------

15544

# If we join the current pair of lines,

15545

# line $n-1 will become the left part of the joined line

15546

# line $n will become the right part of the joined line

15547

#

15548

# Here are Indexes of the endpoint tokens of the two lines:

15549

#

15550

# -----line $n-1--- | -----line $n-----

15551

# $ibeg_1 $iend_1 | $ibeg_2 $iend_2

15552

# ^

15553

# |

15554

# We want to decide if we should remove the line break

15555

# betwen the tokens at $iend_1 and $ibeg_2

15556

#

15557

# We will apply a number of ad-hoc tests to see if joining

15558

# here will look ok. The code will just issue a 'next'

15559

# command if the join doesn't look good. If we get through

15560

# the gauntlet of tests, the lines will be recombined.

15561

#----------------------------------------------------------

15562

#

15563

# beginning and ending tokens of the lines we are working on

15564

my $ibeg_1 = $$ri_beg[ $n - 1 ];

15565

my $iend_1 = $$ri_end[ $n - 1 ];

15566

my $iend_2 = $$ri_end[$n];

15567

my $ibeg_2 = $$ri_beg[$n];

15568

15569

my $ibeg_nmax = $$ri_beg[$nmax];

15570

15571

# some beginning indexes of other lines, which may not exist

15572

my $ibeg_0 = $n > 1 ? $$ri_beg[ $n - 2 ] : -1;

15573

my $ibeg_3 = $n < $nmax ? $$ri_beg[ $n + 1 ] : -1;

15574

my $ibeg_4 = $n + 2 <= $nmax ? $$ri_beg[ $n + 2 ] : -1;

15575

15576

my $bs_tweak = 0;

15577

15578

#my $depth_increase=( $nesting_depth_to_go[$ibeg_2] -

15579

# $nesting_depth_to_go[$ibeg_1] );

15580

15581

##print "RECOMBINE: n=$n imid=$iend_1 if=$ibeg_1 type=$types_to_go[$ibeg_1] =$tokens_to_go[$ibeg_1] next_type=$types_to_go[$ibeg_2] next_tok=$tokens_to_go[$ibeg_2]\n";

15582

15583

# If line $n is the last line, we set some flags and

15584

# do any special checks for it

15585

if ( $n == $nmax ) {

15586

15587

# a terminal '{' should stay where it is

15588

next if $types_to_go[$ibeg_2] eq '{';

15589

15590

# set flag if statement $n ends in ';'

15591

$this_line_is_semicolon_terminated =

15592

$types_to_go[$iend_2] eq ';'

15593

15594

# with possible side comment

15595

|| ( $types_to_go[$iend_2] eq '#'

15596

&& $iend_2 - $ibeg_2 >= 2

15597

&& $types_to_go[ $iend_2 - 2 ] eq ';'

15598

&& $types_to_go[ $iend_2 - 1 ] eq 'b' );

15599

}

15600

15601

#----------------------------------------------------------

15602

# Section 1: examine token at $iend_1 (right end of first line

15603

# of pair)

15604

#----------------------------------------------------------

15605

15606

# an isolated '}' may join with a ';' terminated segment

15607

if ( $types_to_go[$iend_1] eq '}' ) {

15608

15609

# Check for cases where combining a semicolon terminated

15610

# statement with a previous isolated closing paren will

15611

# allow the combined line to be outdented. This is

15612

# generally a good move. For example, we can join up

15613

# the last two lines here:

15614

# (

15615

# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,

15616

# $size, $atime, $mtime, $ctime, $blksize, $blocks

15617

# )

15618

# = stat($file);

15619

#

15620

# to get:

15621

# (

15622

# $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,

15623

# $size, $atime, $mtime, $ctime, $blksize, $blocks

15624

# ) = stat($file);

15625

#

15626

# which makes the parens line up.

15627

#

15628

# Another example, from Joe Matarazzo, probably looks best

15629

# with the 'or' clause appended to the trailing paren:

15630

# $self->some_method(

15631

# PARAM1 => 'foo',

15632

# PARAM2 => 'bar'

15633

# ) or die "Some_method didn't work";

15634

#

15635

$previous_outdentable_closing_paren =

15636

$this_line_is_semicolon_terminated # ends in ';'

15637

&& $ibeg_1 == $iend_1 # only one token on last line

15638

&& $tokens_to_go[$iend_1] eq

15639

')' # must be structural paren

15640

15641

# only &&, ||, and : if no others seen

15642

# (but note: our count made below could be wrong

15643

# due to intervening comments)

15644

&& ( $leading_amp_count == 0

15645

|| $types_to_go[$ibeg_2] !~ /^(:|\&\&|\|\|)$/ )

15646

15647

# but leading colons probably line up with with a

15648

# previous colon or question (count could be wrong).

15649

&& $types_to_go[$ibeg_2] ne ':'

15650

15651

# only one step in depth allowed. this line must not

15652

# begin with a ')' itself.

15653

&& ( $nesting_depth_to_go[$iend_1] ==

15654

$nesting_depth_to_go[$iend_2] + 1 );

15655

15656

unless (

15658

$previous_outdentable_closing_paren

15659

15660

# handle '.' and '?' specially below

15661

|| ( $types_to_go[$ibeg_2] =~ /^[\.\?]$/ )

15662

);

15663

}

15664

15665

# do not recombine lines with ending &&, ||,

15666

elsif ( $is_amp_amp{ $types_to_go[$iend_1] } ) {

15667

next unless $want_break_before{ $types_to_go[$iend_1] };

15668

}

15669

15670

# keep a terminal colon

15671

elsif ( $types_to_go[$iend_1] eq ':' ) {

15672

next unless $want_break_before{ $types_to_go[$iend_1] };

15673

}

15674

15675

# Identify and recombine a broken ?/: chain

15676

elsif ( $types_to_go[$iend_1] eq '?' ) {

15677

15678

# Do not recombine different levels

15679

if ( $levels_to_go[$ibeg_1] ne $levels_to_go[$ibeg_2] );

15681

15682

# do not recombine unless next line ends in :

15683

next unless $types_to_go[$iend_2] eq ':';

15684

}

15685

15686

# for lines ending in a comma...

15687

elsif ( $types_to_go[$iend_1] eq ',' ) {

15688

15689

# Do not recombine at comma which is following the

15690

# input bias.

15691

# TODO: might be best to make a special flag

15692

next if ( $old_breakpoint_to_go[$iend_1] );

15693

15694

# an isolated '},' may join with an identifier + ';'

15695

# this is useful for the class of a 'bless' statement (bless.t)

15696

if ( $types_to_go[$ibeg_1] eq '}'

15697

&& $types_to_go[$ibeg_2] eq 'i' )

15698

{

15699

unless ( ( $ibeg_1 == ( $iend_1 - 1 ) )

15701

&& ( $iend_2 == ( $ibeg_2 + 1 ) )

15702

&& $this_line_is_semicolon_terminated );

15703

15704

# override breakpoint

15705

$forced_breakpoint_to_go[$iend_1] = 0;

15706

}

15707

15708

# but otherwise ..

15709

else {

15710

15711

# do not recombine after a comma unless this will leave

15712

# just 1 more line

15713

next unless ( $n + 1 >= $nmax );

15714

15715

# do not recombine if there is a change in indentation depth

15716

if (

15718

$levels_to_go[$iend_1] != $levels_to_go[$iend_2] );

15719

15720

# do not recombine a "complex expression" after a

15721

# comma. "complex" means no parens.

15722

my $saw_paren;

15723

foreach my $ii ( $ibeg_2 .. $iend_2 ) {

15724

if ( $tokens_to_go[$ii] eq '(' ) {

15725

$saw_paren = 1;

15726

last;

15727

}

15728

}

15729

next if $saw_paren;

15730

}

15731

}

15732

15733

# opening paren..

15734

elsif ( $types_to_go[$iend_1] eq '(' ) {

15735

15736

# No longer doing this

15737

}

15738

15739

elsif ( $types_to_go[$iend_1] eq ')' ) {

15740

15741

# No longer doing this

15742

}

15743

15744

# keep a terminal for-semicolon

15745

elsif ( $types_to_go[$iend_1] eq 'f' ) {

15746

next;

15747

}

15748

15749

# if '=' at end of line ...

15750

elsif ( $is_assignment{ $types_to_go[$iend_1] } ) {

15751

15752

my $is_short_quote =

15753

( $types_to_go[$ibeg_2] eq 'Q'

15754

&& $ibeg_2 == $iend_2

15755

&& length( $tokens_to_go[$ibeg_2] ) <

15756

$rOpts_short_concatenation_item_length );

15757

my $is_ternary =

15758

( $types_to_go[$ibeg_1] eq '?'

15759

&& ( $ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':' ) );

15760

15761

# always join an isolated '=', a short quote, or if this

15762

# will put ?/: at start of adjacent lines

15763

if ( $ibeg_1 != $iend_1

15764

&& !$is_short_quote

15765

&& !$is_ternary )

15766

{

15767

unless (

15769

(

15770

15771

# unless we can reduce this to two lines

15772

$nmax < $n + 2

15773

15774

# or three lines, the last with a leading semicolon

15775

|| ( $nmax == $n + 2

15776

&& $types_to_go[$ibeg_nmax] eq ';' )

15777

15778

# or the next line ends with a here doc

15779

|| $types_to_go[$iend_2] eq 'h'

15780

15781

# or the next line ends in an open paren or brace

15782

# and the break hasn't been forced [dima.t]

15783

|| ( !$forced_breakpoint_to_go[$iend_1]

15784

&& $types_to_go[$iend_2] eq '{' )

15785

)

15786

15787

# do not recombine if the two lines might align well

15788

# this is a very approximate test for this

15789

&& ( $ibeg_3 >= 0

15790

&& $types_to_go[$ibeg_2] ne

15791

$types_to_go[$ibeg_3] )

15792

);

15793

15794

# -lp users often prefer this:

15795

# my $title = function($env, $env, $sysarea,

15796

# "bubba Borrower Entry");

15797

# so we will recombine if -lp is used we have ending

15798

# comma

15799

if ( !$rOpts_line_up_parentheses

15800

|| $types_to_go[$iend_2] ne ',' )

15801

{

15802

15803

# otherwise, scan the rhs line up to last token for

15804

# complexity. Note that we are not counting the last

15805

# token in case it is an opening paren.

15806

my $tv = 0;

15807

my $depth = $nesting_depth_to_go[$ibeg_2];

15808

for ( my $i = $ibeg_2 + 1 ; $i < $iend_2 ; $i++ ) {

15809

if ( $nesting_depth_to_go[$i] != $depth ) {

15810

$tv++;

15811

last if ( $tv > 1 );

15812

}

15813

$depth = $nesting_depth_to_go[$i];

15814

}

15815

15816

# ok to recombine if no level changes before last token

15817

if ( $tv > 0 ) {

15818

15819

# otherwise, do not recombine if more than two

15820

# level changes.

15821

next if ( $tv > 1 );

15822

15823

# check total complexity of the two adjacent lines

15824

# that will occur if we do this join

15825

my $istop =

15826

( $n < $nmax ) ? $$ri_end[ $n + 1 ] : $iend_2;

15827

for ( my $i = $iend_2 ; $i <= $istop ; $i++ ) {

15828

if ( $nesting_depth_to_go[$i] != $depth ) {

15829

$tv++;

15830

last if ( $tv > 2 );

15831

}

15832

$depth = $nesting_depth_to_go[$i];

15833

}

15834

15835

# do not recombine if total is more than 2 level changes

15836

next if ( $tv > 2 );

15837

}

15838

}

15839

}

15840

15841

unless ( $tokens_to_go[$ibeg_2] =~ /^[\{\(\[]$/ ) {

15842

$forced_breakpoint_to_go[$iend_1] = 0;

15843

}

15844

}

15845

15846

# for keywords..

15847

elsif ( $types_to_go[$iend_1] eq 'k' ) {

15848

15849

# make major control keywords stand out

15850

# (recombine.t)

15851

if (

15853

15854

#/^(last|next|redo|return)$/

15855

$is_last_next_redo_return{ $tokens_to_go[$iend_1] }

15856

15857

# but only if followed by multiple lines

15858

&& $n < $nmax

15859

);

15860

15861

if ( $is_and_or{ $tokens_to_go[$iend_1] } ) {

15862

unless $want_break_before{ $tokens_to_go[$iend_1] };

15864

}

15865

}

15866

15867

# handle trailing + - * /

15868

elsif ( $is_math_op{ $types_to_go[$iend_1] } ) {

15869

15870

# combine lines if next line has single number

15871

# or a short term followed by same operator

15872

my $i_next_nonblank = $ibeg_2;

15873

my $i_next_next = $i_next_nonblank + 1;

15874

$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );

15875

my $number_follows = $types_to_go[$i_next_nonblank] eq 'n'

15876

&& (

15877

$i_next_nonblank == $iend_2

15878

|| ( $i_next_next == $iend_2

15879

&& $is_math_op{ $types_to_go[$i_next_next] } )

15880

|| $types_to_go[$i_next_next] eq ';'

15881

);

15882

15883

# find token before last operator of previous line

15884

my $iend_1_minus = $iend_1;

15885

$iend_1_minus--

15886

if ( $iend_1_minus > $ibeg_1 );

15887

$iend_1_minus--

15888

if ( $types_to_go[$iend_1_minus] eq 'b'

15889

&& $iend_1_minus > $ibeg_1 );

15890

15891

my $short_term_follows =

15892

( $types_to_go[$iend_2] eq $types_to_go[$iend_1]

15893

&& $types_to_go[$iend_1_minus] =~ /^[in]$/

15894

&& $iend_2 <= $ibeg_2 + 2

15895

&& length( $tokens_to_go[$ibeg_2] ) <

15896

$rOpts_short_concatenation_item_length );

15897

15898

unless ( $number_follows || $short_term_follows );

15900

}

15901

15902

#----------------------------------------------------------

15903

# Section 2: Now examine token at $ibeg_2 (left end of second

15904

# line of pair)

15905

#----------------------------------------------------------

15906

15907

# join lines identified above as capable of

15908

# causing an outdented line with leading closing paren

15909

if ($previous_outdentable_closing_paren) {

15910

$forced_breakpoint_to_go[$iend_1] = 0;

15911

}

15912

15913

# do not recombine lines with leading :

15914

elsif ( $types_to_go[$ibeg_2] eq ':' ) {

15915

$leading_amp_count++;

15916

next if $want_break_before{ $types_to_go[$ibeg_2] };

15917

}

15918

15919

# handle lines with leading &&, ||

15920

elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {

15921

15922

$leading_amp_count++;

15923

15924

# ok to recombine if it follows a ? or :

15925

# and is followed by an open paren..

15926

my $ok =

15927

( $is_ternary{ $types_to_go[$ibeg_1] }

15928

&& $tokens_to_go[$iend_2] eq '(' )

15929

15930

# or is followed by a ? or : at same depth

15931

#

15932

# We are looking for something like this. We can

15933

# recombine the && line with the line above to make the

15934

# structure more clear:

15935

# return

15936

# exists $G->{Attr}->{V}

15937

# && exists $G->{Attr}->{V}->{$u}

15938

# ? %{ $G->{Attr}->{V}->{$u} }

15939

# : ();

15940

#

15941

# We should probably leave something like this alone:

15942

# return

15943

# exists $G->{Attr}->{E}

15944

# && exists $G->{Attr}->{E}->{$u}

15945

# && exists $G->{Attr}->{E}->{$u}->{$v}

15946

# ? %{ $G->{Attr}->{E}->{$u}->{$v} }

15947

# : ();

15948

# so that we either have all of the &&'s (or ||'s)

15949

# on one line, as in the first example, or break at

15950

# each one as in the second example. However, it

15951

# sometimes makes things worse to check for this because

15952

# it prevents multiple recombinations. So this is not done.

15953

|| ( $ibeg_3 >= 0

15954

&& $is_ternary{ $types_to_go[$ibeg_3] }

15955

&& $nesting_depth_to_go[$ibeg_3] ==

15956

$nesting_depth_to_go[$ibeg_2] );

15957

15958

next if !$ok && $want_break_before{ $types_to_go[$ibeg_2] };

15959

$forced_breakpoint_to_go[$iend_1] = 0;

15960

15961

# tweak the bond strength to give this joint priority

15962

# over ? and :

15963

$bs_tweak = 0.25;

15964

}

15965

15966

# Identify and recombine a broken ?/: chain

15967

elsif ( $types_to_go[$ibeg_2] eq '?' ) {

15968

15969

# Do not recombine different levels

15970

my $lev = $levels_to_go[$ibeg_2];

15971

next if ( $lev ne $levels_to_go[$ibeg_1] );

15972

15973

# Do not recombine a '?' if either next line or

15974

# previous line does not start with a ':'. The reasons

15975

# are that (1) no alignment of the ? will be possible

15976

# and (2) the expression is somewhat complex, so the

15977

# '?' is harder to see in the interior of the line.

15978

my $follows_colon =

15979

$ibeg_1 >= 0 && $types_to_go[$ibeg_1] eq ':';

15980

my $precedes_colon =

15981

$ibeg_3 >= 0 && $types_to_go[$ibeg_3] eq ':';

15982

next unless ( $follows_colon || $precedes_colon );

15983

15984

# we will always combining a ? line following a : line

15985

if ( !$follows_colon ) {

15986

15987

# ...otherwise recombine only if it looks like a chain.

15988

# we will just look at a few nearby lines to see if

15989

# this looks like a chain.

15990

my $local_count = 0;

15991

foreach my $ii ( $ibeg_0, $ibeg_1, $ibeg_3, $ibeg_4 ) {

15992

$local_count++

15993

if $ii >= 0

15994

&& $types_to_go[$ii] eq ':'

15995

&& $levels_to_go[$ii] == $lev;

15996

}

15997

next unless ( $local_count > 1 );

15998

}

15999

$forced_breakpoint_to_go[$iend_1] = 0;

16000

}

16001

16002

# do not recombine lines with leading '.'

16003

elsif ( $types_to_go[$ibeg_2] =~ /^(\.)$/ ) {

16004

my $i_next_nonblank = $ibeg_2 + 1;

16005

if ( $types_to_go[$i_next_nonblank] eq 'b' ) {

16006

$i_next_nonblank++;

16007

}

16008

16009

unless (

16011

16012

# ... unless there is just one and we can reduce

16013

# this to two lines if we do. For example, this

16014

#

16015

#

16016

# $bodyA .=

16017

# '($dummy, $pat) = &get_next_tex_cmd;' . '$args .= $pat;'

16018

#

16019

# looks better than this:

16020

# $bodyA .= '($dummy, $pat) = &get_next_tex_cmd;'

16021

# . '$args .= $pat;'

16022

16023

(

16024

$n == 2

16025

&& $n == $nmax

16026

&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2]

16027

)

16028

16029

# ... or this would strand a short quote , like this

16030

# . "some long qoute"

16031

# . "\n";

16032

|| ( $types_to_go[$i_next_nonblank] eq 'Q'

16033

&& $i_next_nonblank >= $iend_2 - 1

16034

&& length( $tokens_to_go[$i_next_nonblank] ) <

16035

$rOpts_short_concatenation_item_length )

16036

);

16037

}

16038

16039

# handle leading keyword..

16040

elsif ( $types_to_go[$ibeg_2] eq 'k' ) {

16041

16042

# handle leading "or"

16043

if ( $tokens_to_go[$ibeg_2] eq 'or' ) {

16044

unless (

16046

$this_line_is_semicolon_terminated

16047

&& (

16048

16049

# following 'if' or 'unless' or 'or'

16050

$types_to_go[$ibeg_1] eq 'k'

16051

&& $is_if_unless{ $tokens_to_go[$ibeg_1] }

16052

16053

# important: only combine a very simple or

16054

# statement because the step below may have

16055

# combined a trailing 'and' with this or,

16056

# and we do not want to then combine

16057

# everything together

16058

&& ( $iend_2 - $ibeg_2 <= 7 )

16059

)

16060

);

16061

}

16062

16063

# handle leading 'and'

16064

elsif ( $tokens_to_go[$ibeg_2] eq 'and' ) {

16065

16066

# Decide if we will combine a single terminal 'and'

16067

# after an 'if' or 'unless'.

16068

16069

# This looks best with the 'and' on the same

16070

# line as the 'if':

16071

#

16072

# $a = 1

16073

# if $seconds and $nu < 2;

16074

#

16075

# But this looks better as shown:

16076

#

16077

# $a = 1

16078

# if !$this->{Parents}{$_}

16079

# or $this->{Parents}{$_} eq $_;

16080

#

16081

unless (

16083

$this_line_is_semicolon_terminated

16084

&& (

16085

16086

# following 'if' or 'unless' or 'or'

16087

$types_to_go[$ibeg_1] eq 'k'

16088

&& ( $is_if_unless{ $tokens_to_go[$ibeg_1] }

16089

|| $tokens_to_go[$ibeg_1] eq 'or' )

16090

)

16091

);

16092

}

16093

16094

# handle leading "if" and "unless"

16095

elsif ( $is_if_unless{ $tokens_to_go[$ibeg_2] } ) {

16096

16097

# FIXME: This is still experimental..may not be too useful

16098

unless (

16100

$this_line_is_semicolon_terminated

16101

16102

# previous line begins with 'and' or 'or'

16103

&& $types_to_go[$ibeg_1] eq 'k'

16104

&& $is_and_or{ $tokens_to_go[$ibeg_1] }

16105

16106

);

16107

}

16108

16109

# handle all other leading keywords

16110

else {

16111

16112

# keywords look best at start of lines,

16113

# but combine things like "1 while"

16114

unless ( $is_assignment{ $types_to_go[$iend_1] } ) {

16115

if ( ( $types_to_go[$iend_1] ne 'k' )

16117

&& ( $tokens_to_go[$ibeg_2] ne 'while' ) );

16118

}

16119

}

16120

}

16121

16122

# similar treatment of && and || as above for 'and' and 'or':

16123

# NOTE: This block of code is currently bypassed because

16124

# of a previous block but is retained for possible future use.

16125

elsif ( $is_amp_amp{ $types_to_go[$ibeg_2] } ) {

16126

16127

# maybe looking at something like:

16128

# unless $TEXTONLY || $item =~ m%</?(hr>|p>|a|img)%i;

16129

16130

unless (

16132

$this_line_is_semicolon_terminated

16133

16134

# previous line begins with an 'if' or 'unless' keyword

16135

&& $types_to_go[$ibeg_1] eq 'k'

16136

&& $is_if_unless{ $tokens_to_go[$ibeg_1] }

16137

16138

);

16139

}

16140

16141

# handle leading + - * /

16142

elsif ( $is_math_op{ $types_to_go[$ibeg_2] } ) {

16143

my $i_next_nonblank = $ibeg_2 + 1;

16144

if ( $types_to_go[$i_next_nonblank] eq 'b' ) {

16145

$i_next_nonblank++;

16146

}

16147

16148

my $i_next_next = $i_next_nonblank + 1;

16149

$i_next_next++ if ( $types_to_go[$i_next_next] eq 'b' );

16150

16151

my $is_number = (

16152

$types_to_go[$i_next_nonblank] eq 'n'

16153

&& ( $i_next_nonblank >= $iend_2 - 1

16154

|| $types_to_go[$i_next_next] eq ';' )

16155

);

16156

16157

my $iend_1_nonblank =

16158

$types_to_go[$iend_1] eq 'b' ? $iend_1 - 1 : $iend_1;

16159

my $iend_2_nonblank =

16160

$types_to_go[$iend_2] eq 'b' ? $iend_2 - 1 : $iend_2;

16161

16162

my $is_short_term =

16163

( $types_to_go[$ibeg_2] eq $types_to_go[$ibeg_1]

16164

&& $types_to_go[$iend_2_nonblank] =~ /^[in]$/

16165

&& $types_to_go[$iend_1_nonblank] =~ /^[in]$/

16166

&& $iend_2_nonblank <= $ibeg_2 + 2

16167

&& length( $tokens_to_go[$iend_2_nonblank] ) <

16168

$rOpts_short_concatenation_item_length );

16169

16170

# Combine these lines if this line is a single

16171

# number, or if it is a short term with same

16172

# operator as the previous line. For example, in

16173

# the following code we will combine all of the

16174

# short terms $A, $B, $C, $D, $E, $F, together

16175

# instead of leaving them one per line:

16176

# my $time =

16177

# $A * $B * $C * $D * $E * $F *

16178

# ( 2. * $eps * $sigma * $area ) *

16179

# ( 1. / $tcold**3 - 1. / $thot**3 );

16180

# This can be important in math-intensive code.

16181

unless (

16183

$is_number

16184

|| $is_short_term

16185

16186

# or if we can reduce this to two lines if we do.

16187

|| ( $n == 2

16188

&& $n == $nmax

16189

&& $types_to_go[$ibeg_1] ne $types_to_go[$ibeg_2] )

16190

);

16191

}

16192

16193

# handle line with leading = or similar

16194

elsif ( $is_assignment{ $types_to_go[$ibeg_2] } ) {

16195

next unless $n == 1;

16196

unless (

16198

16199

# unless we can reduce this to two lines

16200

$nmax == 2

16201

16202

# or three lines, the last with a leading semicolon

16203

|| ( $nmax == 3 && $types_to_go[$ibeg_nmax] eq ';' )

16204

16205

# or the next line ends with a here doc

16206

|| $types_to_go[$iend_2] eq 'h'

16207

);

16208

}

16209

16210

#----------------------------------------------------------

16211

# Section 3:

16212

# Combine the lines if we arrive here and it is possible

16213

#----------------------------------------------------------

16214

16215

# honor hard breakpoints

16216

next if ( $forced_breakpoint_to_go[$iend_1] > 0 );

16217

16218

my $bs = $bond_strength_to_go[$iend_1] + $bs_tweak;

16219

16220

# combined line cannot be too long

16221

if excess_line_length( $ibeg_1, $iend_2 ) > 0;

16223

16224

# do not recombine if we would skip in indentation levels

16225

if ( $n < $nmax ) {

16226

my $if_next = $$ri_beg[ $n + 1 ];

16227

if (

16229

$levels_to_go[$ibeg_1] < $levels_to_go[$ibeg_2]

16230

&& $levels_to_go[$ibeg_2] < $levels_to_go[$if_next]

16231

16232

# but an isolated 'if (' is undesirable

16233

&& !(

16234

$n == 1

16235

&& $iend_1 - $ibeg_1 <= 2

16236

&& $types_to_go[$ibeg_1] eq 'k'

16237

&& $tokens_to_go[$ibeg_1] eq 'if'

16238

&& $tokens_to_go[$iend_1] ne '('

16239

)

16240

);

16241

}

16242

16243

# honor no-break's

16244

next if ( $bs == NO_BREAK );

16245

16246

# remember the pair with the greatest bond strength

16247

if ( !$n_best ) {

16248

$n_best = $n;

16249

$bs_best = $bs;

16250

}

16251

else {

16252

16253

if ( $bs > $bs_best ) {

16254

$n_best = $n;

16255

$bs_best = $bs;

16256

}

16257

}

16258

}

16259

16260

# recombine the pair with the greatest bond strength

16261

if ($n_best) {

16262

splice @$ri_beg, $n_best, 1;

16263

splice @$ri_end, $n_best - 1, 1;

16264

16265

# keep going if we are still making progress

16266

$more_to_do++;

16267

}

16268

}

16269

return ( $ri_beg, $ri_end );

16270

}

16271

} # end recombine_breakpoints

16272

16273

sub break_all_chain_tokens {

16274

16275

# scan the current breakpoints looking for breaks at certain "chain

16276

# operators" (. : && || + etc) which often occur repeatedly in a long

16277

# statement. If we see a break at any one, break at all similar tokens

16278

# within the same container.

16279

#

16280

my ( $ri_left, $ri_right ) = @_;

16281

16282

my %saw_chain_type;

16283

my %left_chain_type;

16284

my %right_chain_type;

16285

my %interior_chain_type;

16286

my $nmax = @$ri_right - 1;

16287

16288

# scan the left and right end tokens of all lines

16289

my $count = 0;

16290

for my $n ( 0 .. $nmax ) {

16291

my $il = $$ri_left[$n];

16292

my $ir = $$ri_right[$n];

16293

my $typel = $types_to_go[$il];

16294

my $typer = $types_to_go[$ir];

16295

$typel = '+' if ( $typel eq '-' ); # treat + and - the same

16296

$typer = '+' if ( $typer eq '-' );

16297

$typel = '*' if ( $typel eq '/' ); # treat * and / the same

16298

$typer = '*' if ( $typer eq '/' );

16299

my $tokenl = $tokens_to_go[$il];

16300

my $tokenr = $tokens_to_go[$ir];

16301

16302

if ( $is_chain_operator{$tokenl} && $want_break_before{$typel} ) {

16303

next if ( $typel eq '?' );

16304

push @{ $left_chain_type{$typel} }, $il;

16305

$saw_chain_type{$typel} = 1;

16306

$count++;

16307

}

16308

if ( $is_chain_operator{$tokenr} && !$want_break_before{$typer} ) {

16309

next if ( $typer eq '?' );

16310

push @{ $right_chain_type{$typer} }, $ir;

16311

$saw_chain_type{$typer} = 1;

16312

$count++;

16313

}

16314

}

16315

return unless $count;

16316

16317

# now look for any interior tokens of the same types

16318

$count = 0;

16319

for my $n ( 0 .. $nmax ) {

16320

my $il = $$ri_left[$n];

16321

my $ir = $$ri_right[$n];

16322

for ( my $i = $il + 1 ; $i < $ir ; $i++ ) {

16323

my $type = $types_to_go[$i];

16324

$type = '+' if ( $type eq '-' );

16325

$type = '*' if ( $type eq '/' );

16326

if ( $saw_chain_type{$type} ) {

16327

push @{ $interior_chain_type{$type} }, $i;

16328

$count++;

16329

}

16330

}

16331

}

16332

return unless $count;

16333

16334

# now make a list of all new break points

16335

my @insert_list;

16336

16337

# loop over all chain types

16338

foreach my $type ( keys %saw_chain_type ) {

16339

16340

# quit if just ONE continuation line with leading . For example--

16341

# print LATEXFILE '\framebox{\parbox[c][' . $h . '][t]{' . $w . '}{'

16342

# . $contents;

16343

last if ( $nmax == 1 && $type =~ /^[\.\+]$/ );

16344

16345

# loop over all interior chain tokens

16346

foreach my $itest ( @{ $interior_chain_type{$type} } ) {

16347

16348

# loop over all left end tokens of same type

16349

if ( $left_chain_type{$type} ) {

16350

next if $nobreak_to_go[ $itest - 1 ];

16351

foreach my $i ( @{ $left_chain_type{$type} } ) {

16352

next unless in_same_container( $i, $itest );

16353

push @insert_list, $itest - 1;

16354

16355

# Break at matching ? if this : is at a different level.

16356

# For example, the ? before $THRf_DEAD in the following

16357

# should get a break if its : gets a break.

16358

#

16359

# my $flags =

16360

# ( $_ & 1 ) ? ( $_ & 4 ) ? $THRf_DEAD : $THRf_ZOMBIE

16361

# : ( $_ & 4 ) ? $THRf_R_DETACHED

16362

# : $THRf_R_JOINABLE;

16363

if ( $type eq ':'

16364

&& $levels_to_go[$i] != $levels_to_go[$itest] )

16365

{

16366

my $i_question = $mate_index_to_go[$itest];

16367

if ( $i_question > 0 ) {

16368

push @insert_list, $i_question - 1;

16369

}

16370

}

16371

last;

16372

}

16373

}

16374

16375

# loop over all right end tokens of same type

16376

if ( $right_chain_type{$type} ) {

16377

next if $nobreak_to_go[$itest];

16378

foreach my $i ( @{ $right_chain_type{$type} } ) {

16379

next unless in_same_container( $i, $itest );

16380

push @insert_list, $itest;

16381

16382

# break at matching ? if this : is at a different level

16383

if ( $type eq ':'

16384

&& $levels_to_go[$i] != $levels_to_go[$itest] )

16385

{

16386

my $i_question = $mate_index_to_go[$itest];

16387

if ( $i_question >= 0 ) {

16388

push @insert_list, $i_question;

16389

}

16390

}

16391

last;

16392

}

16393

}

16394

}

16395

}

16396

16397

# insert any new break points

16398

if (@insert_list) {

16399

insert_additional_breaks( \@insert_list, $ri_left, $ri_right );

16400

}

16401

}

16402

16403

sub break_equals {

16404

16405

# Look for assignment operators that could use a breakpoint.

16406

# For example, in the following snippet

16407

#

16408

# $HOME = $ENV{HOME}

16409

# || $ENV{LOGDIR}

16410

# || $pw[7]

16411

# || die "no home directory for user $<";

16412

#

16413

# we could break at the = to get this, which is a little nicer:

16414

# $HOME =

16415

# $ENV{HOME}

16416

# || $ENV{LOGDIR}

16417

# || $pw[7]

16418

# || die "no home directory for user $<";

16419

#

16420

# The logic here follows the logic in set_logical_padding, which

16421

# will add the padding in the second line to improve alignment.

16422

#

16423

my ( $ri_left, $ri_right ) = @_;

16424

my $nmax = @$ri_right - 1;

16425

return unless ( $nmax >= 2 );

16426

16427

# scan the left ends of first two lines

16428

my $tokbeg = "";

16429

my $depth_beg;

16430

for my $n ( 1 .. 2 ) {

16431

my $il = $$ri_left[$n];

16432

my $typel = $types_to_go[$il];

16433

my $tokenl = $tokens_to_go[$il];

16434

16435

my $has_leading_op = ( $tokenl =~ /^\w/ )

16436

? $is_chain_operator{$tokenl} # + - * / : ? && ||

16437

: $is_chain_operator{$typel}; # and, or

16438

return unless ($has_leading_op);

16439

if ( $n > 1 ) {

16440

return

16441

unless ( $tokenl eq $tokbeg

16442

&& $nesting_depth_to_go[$il] eq $depth_beg );

16443

}

16444

$tokbeg = $tokenl;

16445

$depth_beg = $nesting_depth_to_go[$il];

16446

}

16447

16448

# now look for any interior tokens of the same types

16449

my $il = $$ri_left[0];

16450

my $ir = $$ri_right[0];

16451

16452

# now make a list of all new break points

16453

my @insert_list;

16454

for ( my $i = $ir - 1 ; $i > $il ; $i-- ) {

16455

my $type = $types_to_go[$i];

16456

if ( $is_assignment{$type}

16457

&& $nesting_depth_to_go[$i] eq $depth_beg )

16458

{

16459

if ( $want_break_before{$type} ) {

16460

push @insert_list, $i - 1;

16461

}

16462

else {

16463

push @insert_list, $i;

16464

}

16465

}

16466

}

16467

16468

# Break after a 'return' followed by a chain of operators

16469

# return ( $^O !~ /win32|dos/i )

16470

# && ( $^O ne 'VMS' )

16471

# && ( $^O ne 'OS2' )

16472

# && ( $^O ne 'MacOS' );

16473

# To give:

16474

# return

16475

# ( $^O !~ /win32|dos/i )

16476

# && ( $^O ne 'VMS' )

16477

# && ( $^O ne 'OS2' )

16478

# && ( $^O ne 'MacOS' );

16479

my $i = 0;

16480

if ( $types_to_go[$i] eq 'k'

16481

&& $tokens_to_go[$i] eq 'return'

16482

&& $ir > $il

16483

&& $nesting_depth_to_go[$i] eq $depth_beg )

16484

{

16485

push @insert_list, $i;

16486

}

16487

16488

return unless (@insert_list);

16489

16490

# One final check...

16491

# scan second and thrid lines and be sure there are no assignments

16492

# we want to avoid breaking at an = to make something like this:

16493

# unless ( $icon =

16494

# $html_icons{"$type-$state"}

16495

# or $icon = $html_icons{$type}

16496

# or $icon = $html_icons{$state} )

16497

for my $n ( 1 .. 2 ) {

16498

my $il = $$ri_left[$n];

16499

my $ir = $$ri_right[$n];

16500

for ( my $i = $il + 1 ; $i <= $ir ; $i++ ) {

16501

my $type = $types_to_go[$i];

16502

return

16503

if ( $is_assignment{$type}

16504

&& $nesting_depth_to_go[$i] eq $depth_beg );

16505

}

16506

}

16507

16508

# ok, insert any new break point

16509

if (@insert_list) {

16510

insert_additional_breaks( \@insert_list, $ri_left, $ri_right );

16511

}

16512

}

16513

16514

sub insert_final_breaks {

16515

16516

my ( $ri_left, $ri_right ) = @_;

16517

16518

my $nmax = @$ri_right - 1;

16519

16520

# scan the left and right end tokens of all lines

16521

my $count = 0;

16522

my $i_first_colon = -1;

16523

for my $n ( 0 .. $nmax ) {

16524

my $il = $$ri_left[$n];

16525

my $ir = $$ri_right[$n];

16526

my $typel = $types_to_go[$il];

16527

my $typer = $types_to_go[$ir];

16528

return if ( $typel eq '?' );

16529

return if ( $typer eq '?' );

16530

if ( $typel eq ':' ) { $i_first_colon = $il; last; }

16531

elsif ( $typer eq ':' ) { $i_first_colon = $ir; last; }

16532

}

16533

16534

# For long ternary chains,

16535

# if the first : we see has its # ? is in the interior

16536

# of a preceding line, then see if there are any good

16537

# breakpoints before the ?.

16538

if ( $i_first_colon > 0 ) {

16539

my $i_question = $mate_index_to_go[$i_first_colon];

16540

if ( $i_question > 0 ) {

16541

my @insert_list;

16542

for ( my $ii = $i_question - 1 ; $ii >= 0 ; $ii -= 1 ) {

16543

my $token = $tokens_to_go[$ii];

16544

my $type = $types_to_go[$ii];

16545

16546

# For now, a good break is either a comma or a 'return'.

16547

if ( ( $type eq ',' || $type eq 'k' && $token eq 'return' )

16548

&& in_same_container( $ii, $i_question ) )

16549

{

16550

push @insert_list, $ii;

16551

last;

16552

}

16553

}

16554

16555

# insert any new break points

16556

if (@insert_list) {

16557

insert_additional_breaks( \@insert_list, $ri_left, $ri_right );

16558

}

16559

}

16560

}

16561

}

16562

16563

sub in_same_container {

16564

16565

# check to see if tokens at i1 and i2 are in the

16566

# same container, and not separated by a comma, ? or :

16567

my ( $i1, $i2 ) = @_;

16568

my $type = $types_to_go[$i1];

16569

my $depth = $nesting_depth_to_go[$i1];

16570

return unless ( $nesting_depth_to_go[$i2] == $depth );

16571

if ( $i2 < $i1 ) { ( $i1, $i2 ) = ( $i2, $i1 ) }

16572

16573

###########################################################

16574

# This is potentially a very slow routine and not critical.

16575

# For safety just give up for large differences.

16576

# See test file 'infinite_loop.txt'

16577

# TODO: replace this loop with a data structure

16578

###########################################################

16579

return if ( $i2-$i1 > 200 );

16580

16581

for ( my $i = $i1 + 1 ; $i < $i2 ; $i++ ) {

16582

next if ( $nesting_depth_to_go[$i] > $depth );

16583

return if ( $nesting_depth_to_go[$i] < $depth );

16584

16585

my $tok = $tokens_to_go[$i];

16586

$tok = ',' if $tok eq '=>'; # treat => same as ,

16587

16588

# Example: we would not want to break at any of these .'s

16589

# : "<A HREF=\"#item_" . htmlify( 0, $s2 ) . "\">$str</A>"

16590

if ( $type ne ':' ) {

16591

return if ( $tok =~ /^[\,\:\?]$/ ) || $tok eq '||' || $tok eq 'or';

16592

}

16593

else {

16594

return if ( $tok =~ /^[\,]$/ );

16595

}

16596

}

16597

return 1;

16598

}

16599

16600

sub set_continuation_breaks {

16601

16602

# Define an array of indexes for inserting newline characters to

16603

# keep the line lengths below the maximum desired length. There is

16604

# an implied break after the last token, so it need not be included.

16605

16606

# Method:

16607

# This routine is part of series of routines which adjust line

16608

# lengths. It is only called if a statement is longer than the

16609

# maximum line length, or if a preliminary scanning located

16610

# desirable break points. Sub scan_list has already looked at

16611

# these tokens and set breakpoints (in array

16612

# $forced_breakpoint_to_go[$i]) where it wants breaks (for example

16613

# after commas, after opening parens, and before closing parens).

16614

# This routine will honor these breakpoints and also add additional

16615

# breakpoints as necessary to keep the line length below the maximum

16616

# requested. It bases its decision on where the 'bond strength' is

16617

# lowest.

16618

16619

# Output: returns references to the arrays:

16620

# @i_first

16621

# @i_last

16622

# which contain the indexes $i of the first and last tokens on each

16623

# line.

16624

16625

# In addition, the array:

16626

# $forced_breakpoint_to_go[$i]

16627

# may be updated to be =1 for any index $i after which there must be

16628

# a break. This signals later routines not to undo the breakpoint.

16629

16630

my $saw_good_break = shift;

16631

my @i_first = (); # the first index to output

16632

my @i_last = (); # the last index to output

16633

my @i_colon_breaks = (); # needed to decide if we have to break at ?'s

16634

if ( $types_to_go[0] eq ':' ) { push @i_colon_breaks, 0 }

16635

16636

set_bond_strengths();

16637

16638

my $imin = 0;

16639

my $imax = $max_index_to_go;

16640

if ( $types_to_go[$imin] eq 'b' ) { $imin++ }

16641

if ( $types_to_go[$imax] eq 'b' ) { $imax-- }

16642

my $i_begin = $imin; # index for starting next iteration

16643

16644

my $leading_spaces = leading_spaces_to_go($imin);

16645

my $line_count = 0;

16646

my $last_break_strength = NO_BREAK;

16647

my $i_last_break = -1;

16648

my $max_bias = 0.001;

16649

my $tiny_bias = 0.0001;

16650

my $leading_alignment_token = "";

16651

my $leading_alignment_type = "";

16652

16653

# see if any ?/:'s are in order

16654

my $colons_in_order = 1;

16655

my $last_tok = "";

16656

my @colon_list = grep /^[\?\:]$/, @tokens_to_go[ 0 .. $max_index_to_go ];

16657

my $colon_count = @colon_list;

16658

foreach (@colon_list) {

16659

if ( $_ eq $last_tok ) { $colons_in_order = 0; last }

16660

$last_tok = $_;

16661

}

16662

16663

# This is a sufficient but not necessary condition for colon chain

16664

my $is_colon_chain = ( $colons_in_order && @colon_list > 2 );

16665

16666

#-------------------------------------------------------

16667

# BEGINNING of main loop to set continuation breakpoints

16668

# Keep iterating until we reach the end

16669

#-------------------------------------------------------

16670

while ( $i_begin <= $imax ) {

16671

my $lowest_strength = NO_BREAK;

16672

my $starting_sum = $lengths_to_go[$i_begin];

16673

my $i_lowest = -1;

16674

my $i_test = -1;

16675

my $lowest_next_token = '';

16676

my $lowest_next_type = 'b';

16677

my $i_lowest_next_nonblank = -1;

16678

16679

#-------------------------------------------------------

16680

# BEGINNING of inner loop to find the best next breakpoint

16681

#-------------------------------------------------------

16682

for ( $i_test = $i_begin ; $i_test <= $imax ; $i_test++ ) {

16683

my $type = $types_to_go[$i_test];

16684

my $token = $tokens_to_go[$i_test];

16685

my $next_type = $types_to_go[ $i_test + 1 ];

16686

my $next_token = $tokens_to_go[ $i_test + 1 ];

16687

my $i_next_nonblank =

16688

( ( $next_type eq 'b' ) ? $i_test + 2 : $i_test + 1 );

16689

my $next_nonblank_type = $types_to_go[$i_next_nonblank];

16690

my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];

16691

my $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank];

16692

my $strength = $bond_strength_to_go[$i_test];

16693

my $must_break = 0;

16694

16695

# FIXME: TESTING: Might want to be able to break after these

16696

# force an immediate break at certain operators

16697

# with lower level than the start of the line

16698

if (

16699

(

16700

$next_nonblank_type =~ /^(\.|\&\&|\|\|)$/

16701

|| ( $next_nonblank_type eq 'k'

16702

&& $next_nonblank_token =~ /^(and|or)$/ )

16703

)

16704

&& ( $nesting_depth_to_go[$i_begin] >

16705

$nesting_depth_to_go[$i_next_nonblank] )

16706

)

16707

{

16708

set_forced_breakpoint($i_next_nonblank);

16709

}

16710

16711

if (

16712

16713

# Try to put a break where requested by scan_list

16714

$forced_breakpoint_to_go[$i_test]

16715

16716

# break between ) { in a continued line so that the '{' can

16717

# be outdented

16718

# See similar logic in scan_list which catches instances

16719

# where a line is just something like ') {'

16720

|| ( $line_count

16721

&& ( $token eq ')' )

16722

&& ( $next_nonblank_type eq '{' )

16723

&& ($next_nonblank_block_type)

16724

&& !$rOpts->{'opening-brace-always-on-right'} )

16725

16726

# There is an implied forced break at a terminal opening brace

16727

|| ( ( $type eq '{' ) && ( $i_test == $imax ) )

16728

)

16729

{

16730

16731

# Forced breakpoints must sometimes be overridden, for example

16732

# because of a side comment causing a NO_BREAK. It is easier

16733

# to catch this here than when they are set.

16734

if ( $strength < NO_BREAK ) {

16735

$strength = $lowest_strength - $tiny_bias;

16736

$must_break = 1;

16737

}

16738

}

16739

16740

# quit if a break here would put a good terminal token on

16741

# the next line and we already have a possible break

16742

if (

16743

!$must_break

16744

&& ( $next_nonblank_type =~ /^[\;\,]$/ )

16745

&& (

16746

(

16747

$leading_spaces +

16748

$lengths_to_go[ $i_next_nonblank + 1 ] -

16749

$starting_sum

16750

) > $rOpts_maximum_line_length

16751

)

16752

)

16753

{

16754

last if ( $i_lowest >= 0 );

16755

}

16756

16757

# Avoid a break which would strand a single punctuation

16758

# token. For example, we do not want to strand a leading

16759

# '.' which is followed by a long quoted string.

16760

if (

16761

!$must_break

16762

&& ( $i_test == $i_begin )

16763

&& ( $i_test < $imax )

16764

&& ( $token eq $type )

16765

&& (

16766

(

16767

$leading_spaces +

16768

$lengths_to_go[ $i_test + 1 ] -

16769

$starting_sum

16770

) <= $rOpts_maximum_line_length

16771

)

16772

)

16773

{

16774

$i_test++;

16775

16776

if ( ( $i_test < $imax ) && ( $next_type eq 'b' ) ) {

16777

$i_test++;

16778

}

16779

redo;

16780

}

16781

16782

if ( ( $strength <= $lowest_strength ) && ( $strength < NO_BREAK ) )

16783

{

16784

16785

# break at previous best break if it would have produced

16786

# a leading alignment of certain common tokens, and it

16787

# is different from the latest candidate break

16788

last

16789

if ($leading_alignment_type);

16790

16791

# Force at least one breakpoint if old code had good

16792

# break It is only called if a breakpoint is required or

16793

# desired. This will probably need some adjustments

16794

# over time. A goal is to try to be sure that, if a new

16795

# side comment is introduced into formated text, then

16796

# the same breakpoints will occur. scbreak.t

16797

last

16798

if (

16799

$i_test == $imax # we are at the end

16800

&& !$forced_breakpoint_count #

16801

&& $saw_good_break # old line had good break

16802

&& $type =~ /^[#;\{]$/ # and this line ends in

16803

# ';' or side comment

16804

&& $i_last_break < 0 # and we haven't made a break

16805

&& $i_lowest > 0 # and we saw a possible break

16806

&& $i_lowest < $imax - 1 # (but not just before this ;)

16807

&& $strength - $lowest_strength < 0.5 * WEAK # and it's good

16808

);

16809

16810

$lowest_strength = $strength;

16811

$i_lowest = $i_test;

16812

$lowest_next_token = $next_nonblank_token;

16813

$lowest_next_type = $next_nonblank_type;

16814

$i_lowest_next_nonblank = $i_next_nonblank;

16815

last if $must_break;

16816

16817

# set flags to remember if a break here will produce a

16818

# leading alignment of certain common tokens

16819

if ( $line_count > 0

16820

&& $i_test < $imax

16821

&& ( $lowest_strength - $last_break_strength <= $max_bias )

16822

)

16823

{

16824

my $i_last_end = $i_begin - 1;

16825

if ( $types_to_go[$i_last_end] eq 'b' ) { $i_last_end -= 1 }

16826

my $tok_beg = $tokens_to_go[$i_begin];

16827

my $type_beg = $types_to_go[$i_begin];

16828

if (

16829

16830

# check for leading alignment of certain tokens

16831

(

16832

$tok_beg eq $next_nonblank_token

16833

&& $is_chain_operator{$tok_beg}

16834

&& ( $type_beg eq 'k'

16835

|| $type_beg eq $tok_beg )

16836

&& $nesting_depth_to_go[$i_begin] >=

16837

$nesting_depth_to_go[$i_next_nonblank]

16838

)

16839

16840

|| ( $tokens_to_go[$i_last_end] eq $token

16841

&& $is_chain_operator{$token}

16842

&& ( $type eq 'k' || $type eq $token )

16843

&& $nesting_depth_to_go[$i_last_end] >=

16844

$nesting_depth_to_go[$i_test] )

16845

)

16846

{

16847

$leading_alignment_token = $next_nonblank_token;

16848

$leading_alignment_type = $next_nonblank_type;

16849

}

16850

}

16851

}

16852

16853

my $too_long =

16854

( $i_test >= $imax )

16855

? 1

16856

: (

16857

(

16858

$leading_spaces +

16859

$lengths_to_go[ $i_test + 2 ] -

16860

$starting_sum

16861

) > $rOpts_maximum_line_length

16862

);

16863

16864

FORMATTER_DEBUG_FLAG_BREAK

16865

&& print

16866

"BREAK: testing i = $i_test imax=$imax $types_to_go[$i_test] $next_nonblank_type leading sp=($leading_spaces) next length = $lengths_to_go[$i_test+2] too_long=$too_long str=$strength\n";

16867

16868

# allow one extra terminal token after exceeding line length

16869

# if it would strand this token.

16870

if ( $rOpts_fuzzy_line_length

16871

&& $too_long

16872

&& ( $i_lowest == $i_test )

16873

&& ( length($token) > 1 )

16874

&& ( $next_nonblank_type =~ /^[\;\,]$/ ) )

16875

{

16876

$too_long = 0;

16877

}

16878

16879

last

16880

if (

16881

( $i_test == $imax ) # we're done if no more tokens,

16882

|| (

16883

( $i_lowest >= 0 ) # or no more space and we have a break

16884

&& $too_long

16885

)

16886

);

16887

}

16888

16889

#-------------------------------------------------------

16890

# END of inner loop to find the best next breakpoint

16891

# Now decide exactly where to put the breakpoint

16892

#-------------------------------------------------------

16893

16894

# it's always ok to break at imax if no other break was found

16895

if ( $i_lowest < 0 ) { $i_lowest = $imax }

16896

16897

# semi-final index calculation

16898

my $i_next_nonblank = (

16899

( $types_to_go[ $i_lowest + 1 ] eq 'b' )

16900

? $i_lowest + 2

16901

: $i_lowest + 1

16902

);

16903

my $next_nonblank_type = $types_to_go[$i_next_nonblank];

16904

my $next_nonblank_token = $tokens_to_go[$i_next_nonblank];

16905

16906

#-------------------------------------------------------

16907

# ?/: rule 1 : if a break here will separate a '?' on this

16908

# line from its closing ':', then break at the '?' instead.

16909

#-------------------------------------------------------

16910

my $i;

16911

foreach $i ( $i_begin + 1 .. $i_lowest - 1 ) {

16912

next unless ( $tokens_to_go[$i] eq '?' );

16913

16914

# do not break if probable sequence of ?/: statements

16915

next if ($is_colon_chain);

16916

16917

# do not break if statement is broken by side comment

16918

if (

16920

$tokens_to_go[$max_index_to_go] eq '#'

16921

&& terminal_type( \@types_to_go, \@block_type_to_go, 0,

16922

$max_index_to_go ) !~ /^[\;\}]$/

16923

);

16924

16925

# no break needed if matching : is also on the line

16926

if ( $mate_index_to_go[$i] >= 0

16928

&& $mate_index_to_go[$i] <= $i_next_nonblank );

16929

16930

$i_lowest = $i;

16931

if ( $want_break_before{'?'} ) { $i_lowest-- }

16932

last;

16933

}

16934

16935

#-------------------------------------------------------

16936

# END of inner loop to find the best next breakpoint:

16937

# Break the line after the token with index i=$i_lowest

16938

#-------------------------------------------------------

16939

16940

# final index calculation

16941

$i_next_nonblank = (

16942

( $types_to_go[ $i_lowest + 1 ] eq 'b' )

16943

? $i_lowest + 2

16944

: $i_lowest + 1

16945

);

16946

$next_nonblank_type = $types_to_go[$i_next_nonblank];

16947

$next_nonblank_token = $tokens_to_go[$i_next_nonblank];

16948

16949

FORMATTER_DEBUG_FLAG_BREAK

16950

&& print "BREAK: best is i = $i_lowest strength = $lowest_strength\n";

16951

16952

#-------------------------------------------------------

16953

# ?/: rule 2 : if we break at a '?', then break at its ':'

16954

#

16955

# Note: this rule is also in sub scan_list to handle a break

16956

# at the start and end of a line (in case breaks are dictated

16957

# by side comments).

16958

#-------------------------------------------------------

16959

if ( $next_nonblank_type eq '?' ) {

16960

set_closing_breakpoint($i_next_nonblank);

16961

}

16962

elsif ( $types_to_go[$i_lowest] eq '?' ) {

16963

set_closing_breakpoint($i_lowest);

16964

}

16965

16966

#-------------------------------------------------------

16967

# ?/: rule 3 : if we break at a ':' then we save

16968

# its location for further work below. We may need to go

16969

# back and break at its '?'.

16970

#-------------------------------------------------------

16971

if ( $next_nonblank_type eq ':' ) {

16972

push @i_colon_breaks, $i_next_nonblank;

16973

}

16974

elsif ( $types_to_go[$i_lowest] eq ':' ) {

16975

push @i_colon_breaks, $i_lowest;

16976

}

16977

16978

# here we should set breaks for all '?'/':' pairs which are

16979

# separated by this line

16980

16981

$line_count++;

16982

16983

# save this line segment, after trimming blanks at the ends

16984

push( @i_first,

16985

( $types_to_go[$i_begin] eq 'b' ) ? $i_begin + 1 : $i_begin );

16986

push( @i_last,

16987

( $types_to_go[$i_lowest] eq 'b' ) ? $i_lowest - 1 : $i_lowest );

16988

16989

# set a forced breakpoint at a container opening, if necessary, to

16990

# signal a break at a closing container. Excepting '(' for now.

16991

if ( $tokens_to_go[$i_lowest] =~ /^[\{\[]$/

16992

&& !$forced_breakpoint_to_go[$i_lowest] )

16993

{

16994

set_closing_breakpoint($i_lowest);

16995

}

16996

16997

# get ready to go again

16998

$i_begin = $i_lowest + 1;

16999

$last_break_strength = $lowest_strength;

17000

$i_last_break = $i_lowest;

17001

$leading_alignment_token = "";

17002

$leading_alignment_type = "";

17003

$lowest_next_token = '';

17004

$lowest_next_type = 'b';

17005

17006

if ( ( $i_begin <= $imax ) && ( $types_to_go[$i_begin] eq 'b' ) ) {

17007

$i_begin++;

17008

}

17009

17010

# update indentation size

17011

if ( $i_begin <= $imax ) {

17012

$leading_spaces = leading_spaces_to_go($i_begin);

17013

}

17014

}

17015

17016

#-------------------------------------------------------

17017

# END of main loop to set continuation breakpoints

17018

# Now go back and make any necessary corrections

17019

#-------------------------------------------------------

17020

17021

#-------------------------------------------------------

17022

# ?/: rule 4 -- if we broke at a ':', then break at

17023

# corresponding '?' unless this is a chain of ?: expressions

17024

#-------------------------------------------------------

17025

if (@i_colon_breaks) {

17026

17027

# using a simple method for deciding if we are in a ?/: chain --

17028

# this is a chain if it has multiple ?/: pairs all in order;

17029

# otherwise not.

17030

# Note that if line starts in a ':' we count that above as a break

17031

my $is_chain = ( $colons_in_order && @i_colon_breaks > 1 );

17032

17033

unless ($is_chain) {

17034

my @insert_list = ();

17035

foreach (@i_colon_breaks) {

17036

my $i_question = $mate_index_to_go[$_];

17037

if ( $i_question >= 0 ) {

17038

if ( $want_break_before{'?'} ) {

17039

$i_question--;

17040

if ( $i_question > 0

17041

&& $types_to_go[$i_question] eq 'b' )

17042

{

17043

$i_question--;

17044

}

17045

}

17046

17047

if ( $i_question >= 0 ) {

17048

push @insert_list, $i_question;

17049

}

17050

}

17051

insert_additional_breaks( \@insert_list, \@i_first, \@i_last );

17052

}

17053

}

17054

}

17055

return ( \@i_first, \@i_last, $colon_count );

17056

}

17057

17058

sub insert_additional_breaks {

17059

17060

# this routine will add line breaks at requested locations after

17061

# sub set_continuation_breaks has made preliminary breaks.

17062

17063

my ( $ri_break_list, $ri_first, $ri_last ) = @_;

17064

my $i_f;

17065

my $i_l;

17066

my $line_number = 0;

17067

my $i_break_left;

17068

foreach $i_break_left ( sort { $a <=> $b } @$ri_break_list ) {

17069

17070

$i_f = $$ri_first[$line_number];

17071

$i_l = $$ri_last[$line_number];

17072

while ( $i_break_left >= $i_l ) {

17073

$line_number++;

17074

17075

# shouldn't happen unless caller passes bad indexes

17076

if ( $line_number >= @$ri_last ) {

17077

warning(

17078

"Non-fatal program bug: couldn't set break at $i_break_left\n"

17079

);

17080

report_definite_bug();

17081

return;

17082

}

17083

$i_f = $$ri_first[$line_number];

17084

$i_l = $$ri_last[$line_number];

17085

}

17086

17087

my $i_break_right = $i_break_left + 1;

17088

if ( $types_to_go[$i_break_right] eq 'b' ) { $i_break_right++ }

17089

17090

if ( $i_break_left >= $i_f

17091

&& $i_break_left < $i_l

17092

&& $i_break_right > $i_f

17093

&& $i_break_right <= $i_l )

17094

{

17095

splice( @$ri_first, $line_number, 1, ( $i_f, $i_break_right ) );

17096

splice( @$ri_last, $line_number, 1, ( $i_break_left, $i_l ) );

17097

}

17098

}

17099

}

17100

17101

sub set_closing_breakpoint {

17102

17103

# set a breakpoint at a matching closing token

17104

# at present, this is only used to break at a ':' which matches a '?'

17105

my $i_break = shift;

17106

17107

if ( $mate_index_to_go[$i_break] >= 0 ) {

17108

17109

# CAUTION: infinite recursion possible here:

17110

# set_closing_breakpoint calls set_forced_breakpoint, and

17111

# set_forced_breakpoint call set_closing_breakpoint

17112

# ( test files attrib.t, BasicLyx.pm.html).

17113

# Don't reduce the '2' in the statement below

17114

if ( $mate_index_to_go[$i_break] > $i_break + 2 ) {

17115

17116

# break before } ] and ), but sub set_forced_breakpoint will decide

17117

# to break before or after a ? and :

17118

my $inc = ( $tokens_to_go[$i_break] eq '?' ) ? 0 : 1;

17119

set_forced_breakpoint( $mate_index_to_go[$i_break] - $inc );

17120

}

17121

}

17122

else {

17123

my $type_sequence = $type_sequence_to_go[$i_break];

17124

if ($type_sequence) {

17125

my $closing_token = $matching_token{ $tokens_to_go[$i_break] };

17126

$postponed_breakpoint{$type_sequence} = 1;

17127

}

17128

}

17129

}

17130

17131

# check to see if output line tabbing agrees with input line

17132

# this can be very useful for debugging a script which has an extra

17133

# or missing brace

17134

sub compare_indentation_levels {

17135

17136

my ( $python_indentation_level, $structural_indentation_level ) = @_;

17137

if ( ( $python_indentation_level ne $structural_indentation_level ) ) {

17138

$last_tabbing_disagreement = $input_line_number;

17139

17140

if ($in_tabbing_disagreement) {

17141

}

17142

else {

17143

$tabbing_disagreement_count++;

17144

17145

if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {

17146

write_logfile_entry(

17147

"Start indentation disagreement: input=$python_indentation_level; output=$structural_indentation_level\n"

17148

);

17149

}

17150

$in_tabbing_disagreement = $input_line_number;

17151

$first_tabbing_disagreement = $in_tabbing_disagreement

17152

unless ($first_tabbing_disagreement);

17153

}

17154

}

17155

else {

17156

17157

if ($in_tabbing_disagreement) {

17158

17159

if ( $tabbing_disagreement_count <= MAX_NAG_MESSAGES ) {

17160

write_logfile_entry(

17161

"End indentation disagreement from input line $in_tabbing_disagreement\n"

17162

);

17163

17164

if ( $tabbing_disagreement_count == MAX_NAG_MESSAGES ) {

17165

write_logfile_entry(

17166

"No further tabbing disagreements will be noted\n");

17167

}

17168

}

17169

$in_tabbing_disagreement = 0;

17170

}

17171

}

17172

}

17173

17174

#####################################################################

17175

#

17176

# the Perl::Tidy::IndentationItem class supplies items which contain

17177

# how much whitespace should be used at the start of a line

17178

#

17179

#####################################################################

17180

17181

package Perl::Tidy::IndentationItem;

17182

17183

# Indexes for indentation items

17184

use constant SPACES => 0; # total leading white spaces

17185

use constant LEVEL => 1; # the indentation 'level'

17186

use constant CI_LEVEL => 2; # the 'continuation level'

17187

use constant AVAILABLE_SPACES => 3; # how many left spaces available

17188

# for this level

17189

use constant CLOSED => 4; # index where we saw closing '}'

17190

use constant COMMA_COUNT => 5; # how many commas at this level?

17191

use constant SEQUENCE_NUMBER => 6; # output batch number

17192

use constant INDEX => 7; # index in output batch list

17193

use constant HAVE_CHILD => 8; # any dependents?

17194

use constant RECOVERABLE_SPACES => 9; # how many spaces to the right

17195

# we would like to move to get

17196

# alignment (negative if left)

17197

use constant ALIGN_PAREN => 10; # do we want to try to align

17198

# with an opening structure?

17199

use constant MARKED => 11; # if visited by corrector logic

17200

use constant STACK_DEPTH => 12; # indentation nesting depth

17201

use constant STARTING_INDEX => 13; # first token index of this level

17202

use constant ARROW_COUNT => 14; # how many =>'s

17203

17204

sub new {

17205

17206

# Create an 'indentation_item' which describes one level of leading

17207

# whitespace when the '-lp' indentation is used. We return

17208

# a reference to an anonymous array of associated variables.

17209

# See above constants for storage scheme.

17210

my (

17211

$class, $spaces, $level,

17212

$ci_level, $available_spaces, $index,

17213

$gnu_sequence_number, $align_paren, $stack_depth,

17214

$starting_index,

17215

) = @_;

17216

my $closed = -1;

17217

my $arrow_count = 0;

17218

my $comma_count = 0;

17219

my $have_child = 0;

17220

my $want_right_spaces = 0;

17221

my $marked = 0;

17222

bless [

17223

$spaces, $level, $ci_level,

17224

$available_spaces, $closed, $comma_count,

17225

$gnu_sequence_number, $index, $have_child,

17226

$want_right_spaces, $align_paren, $marked,

17227

$stack_depth, $starting_index, $arrow_count,

17228

], $class;

17229

}

17230

17231

sub permanently_decrease_AVAILABLE_SPACES {

17232

17233

# make a permanent reduction in the available indentation spaces

17234

# at one indentation item. NOTE: if there are child nodes, their

17235

# total SPACES must be reduced by the caller.

17236

17237

my ( $item, $spaces_needed ) = @_;

17238

my $available_spaces = $item->get_AVAILABLE_SPACES();

17239

my $deleted_spaces =

17240

( $available_spaces > $spaces_needed )

17241

? $spaces_needed

17242

: $available_spaces;

17243

$item->decrease_AVAILABLE_SPACES($deleted_spaces);

17244

$item->decrease_SPACES($deleted_spaces);

17245

$item->set_RECOVERABLE_SPACES(0);

17246

17247

return $deleted_spaces;

17248

}

17249

17250

sub tentatively_decrease_AVAILABLE_SPACES {

17251

17252

# We are asked to tentatively delete $spaces_needed of indentation

17253

# for a indentation item. We may want to undo this later. NOTE: if

17254

# there are child nodes, their total SPACES must be reduced by the

17255

# caller.

17256

my ( $item, $spaces_needed ) = @_;

17257

my $available_spaces = $item->get_AVAILABLE_SPACES();

17258

my $deleted_spaces =

17259

( $available_spaces > $spaces_needed )

17260

? $spaces_needed

17261

: $available_spaces;

17262

$item->decrease_AVAILABLE_SPACES($deleted_spaces);

17263

$item->decrease_SPACES($deleted_spaces);

17264

$item->increase_RECOVERABLE_SPACES($deleted_spaces);

17265

return $deleted_spaces;

17266

}

17267

17268

sub get_STACK_DEPTH {

17269

my $self = shift;

17270

return $self->[STACK_DEPTH];

17271

}

17272

17273

sub get_SPACES {

17274

my $self = shift;

17275

return $self->[SPACES];

17276

}

17277

17278

sub get_MARKED {

17279

my $self = shift;

17280

return $self->[MARKED];

17281

}

17282

17283

sub set_MARKED {

17284

my ( $self, $value ) = @_;

17285

if ( defined($value) ) {

17286

$self->[MARKED] = $value;

17287

}

17288

return $self->[MARKED];

17289

}

17290

17291

sub get_AVAILABLE_SPACES {

17292

my $self = shift;

17293

return $self->[AVAILABLE_SPACES];

17294

}

17295

17296

sub decrease_SPACES {

17297

my ( $self, $value ) = @_;

17298

if ( defined($value) ) {

17299

$self->[SPACES] -= $value;

17300

}

17301

return $self->[SPACES];

17302

}

17303

17304

sub decrease_AVAILABLE_SPACES {

17305

my ( $self, $value ) = @_;

17306

if ( defined($value) ) {

17307

$self->[AVAILABLE_SPACES] -= $value;

17308

}

17309

return $self->[AVAILABLE_SPACES];

17310

}

17311

17312

sub get_ALIGN_PAREN {

17313

my $self = shift;

17314

return $self->[ALIGN_PAREN];

17315

}

17316

17317

sub get_RECOVERABLE_SPACES {

17318

my $self = shift;

17319

return $self->[RECOVERABLE_SPACES];

17320

}

17321

17322

sub set_RECOVERABLE_SPACES {

17323

my ( $self, $value ) = @_;

17324

if ( defined($value) ) {

17325

$self->[RECOVERABLE_SPACES] = $value;

17326

}

17327

return $self->[RECOVERABLE_SPACES];

17328

}

17329

17330

sub increase_RECOVERABLE_SPACES {

17331

my ( $self, $value ) = @_;

17332

if ( defined($value) ) {

17333

$self->[RECOVERABLE_SPACES] += $value;

17334

}

17335

return $self->[RECOVERABLE_SPACES];

17336

}

17337

17338

sub get_CI_LEVEL {

17339

my $self = shift;

17340

return $self->[CI_LEVEL];

17341

}

17342

17343

sub get_LEVEL {

17344

my $self = shift;

17345

return $self->[LEVEL];

17346

}

17347

17348

sub get_SEQUENCE_NUMBER {

17349

my $self = shift;

17350

return $self->[SEQUENCE_NUMBER];

17351

}

17352

17353

sub get_INDEX {

17354

my $self = shift;

17355

return $self->[INDEX];

17356

}

17357

17358

sub get_STARTING_INDEX {

17359

my $self = shift;

17360

return $self->[STARTING_INDEX];

17361

}

17362

17363

sub set_HAVE_CHILD {

17364

my ( $self, $value ) = @_;

17365

if ( defined($value) ) {

17366

$self->[HAVE_CHILD] = $value;

17367

}

17368

return $self->[HAVE_CHILD];

17369

}

17370

17371

sub get_HAVE_CHILD {

17372

my $self = shift;

17373

return $self->[HAVE_CHILD];

17374

}

17375

17376

sub set_ARROW_COUNT {

17377

my ( $self, $value ) = @_;

17378

if ( defined($value) ) {

17379

$self->[ARROW_COUNT] = $value;

17380

}

17381

return $self->[ARROW_COUNT];

17382

}

17383

17384

sub get_ARROW_COUNT {

17385

my $self = shift;

17386

return $self->[ARROW_COUNT];

17387

}

17388

17389

sub set_COMMA_COUNT {

17390

my ( $self, $value ) = @_;

17391

if ( defined($value) ) {

17392

$self->[COMMA_COUNT] = $value;

17393

}

17394

return $self->[COMMA_COUNT];

17395

}

17396

17397

sub get_COMMA_COUNT {

17398

my $self = shift;

17399

return $self->[COMMA_COUNT];

17400

}

17401

17402

sub set_CLOSED {

17403

my ( $self, $value ) = @_;

17404

if ( defined($value) ) {

17405

$self->[CLOSED] = $value;

17406

}

17407

return $self->[CLOSED];

17408

}

17409

17410

sub get_CLOSED {

17411

my $self = shift;

17412

return $self->[CLOSED];

17413

}

17414

17415

#####################################################################

17416

#

17417

# the Perl::Tidy::VerticalAligner::Line class supplies an object to

17418

# contain a single output line

17419

#

17420

#####################################################################

17421

17422

package Perl::Tidy::VerticalAligner::Line;

17423

17424

{

17425

17426

use strict;

17427

use Carp;

17428

17429

use constant JMAX => 0;

17430

use constant JMAX_ORIGINAL_LINE => 1;

17431

use constant RTOKENS => 2;

17432

use constant RFIELDS => 3;

17433

use constant RPATTERNS => 4;

17434

use constant INDENTATION => 5;

17435

use constant LEADING_SPACE_COUNT => 6;

17436

use constant OUTDENT_LONG_LINES => 7;

17437

use constant LIST_TYPE => 8;

17438

use constant IS_HANGING_SIDE_COMMENT => 9;

17439

use constant RALIGNMENTS => 10;

17440

use constant MAXIMUM_LINE_LENGTH => 11;

17441

use constant RVERTICAL_TIGHTNESS_FLAGS => 12;

17442

17443

my %_index_map;

17444

$_index_map{jmax} = JMAX;

17445

$_index_map{jmax_original_line} = JMAX_ORIGINAL_LINE;

17446

$_index_map{rtokens} = RTOKENS;

17447

$_index_map{rfields} = RFIELDS;

17448

$_index_map{rpatterns} = RPATTERNS;

17449

$_index_map{indentation} = INDENTATION;

17450

$_index_map{leading_space_count} = LEADING_SPACE_COUNT;

17451

$_index_map{outdent_long_lines} = OUTDENT_LONG_LINES;

17452

$_index_map{list_type} = LIST_TYPE;

17453

$_index_map{is_hanging_side_comment} = IS_HANGING_SIDE_COMMENT;

17454

$_index_map{ralignments} = RALIGNMENTS;

17455

$_index_map{maximum_line_length} = MAXIMUM_LINE_LENGTH;

17456

$_index_map{rvertical_tightness_flags} = RVERTICAL_TIGHTNESS_FLAGS;

17457

17458

my @_default_data = ();

17459

$_default_data[JMAX] = undef;

17460

$_default_data[JMAX_ORIGINAL_LINE] = undef;

17461

$_default_data[RTOKENS] = undef;

17462

$_default_data[RFIELDS] = undef;

17463

$_default_data[RPATTERNS] = undef;

17464

$_default_data[INDENTATION] = undef;

17465

$_default_data[LEADING_SPACE_COUNT] = undef;

17466

$_default_data[OUTDENT_LONG_LINES] = undef;

17467

$_default_data[LIST_TYPE] = undef;

17468

$_default_data[IS_HANGING_SIDE_COMMENT] = undef;

17469

$_default_data[RALIGNMENTS] = [];

17470

$_default_data[MAXIMUM_LINE_LENGTH] = undef;

17471

$_default_data[RVERTICAL_TIGHTNESS_FLAGS] = undef;

17472

17473

{

17474

17475

# methods to count object population

17476

my $_count = 0;

17477

sub get_count { $_count; }

17478

sub _increment_count { ++$_count }

17479

sub _decrement_count { --$_count }

17480

}

17481

17482

# Constructor may be called as a class method

17483

sub new {

17484

my ( $caller, %arg ) = @_;

17485

my $caller_is_obj = ref($caller);

17486

my $class = $caller_is_obj || $caller;

17487

no strict "refs";

17488

my $self = bless [], $class;

17489

17490

$self->[RALIGNMENTS] = [];

17491

17492

my $index;

17493

foreach ( keys %_index_map ) {

17494

$index = $_index_map{$_};

17495

if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }

17496

elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }

17497

else { $self->[$index] = $_default_data[$index] }

17498

}

17499

17500

$self->_increment_count();

17501

return $self;

17502

}

17503

17504

sub DESTROY {

17505

$_[0]->_decrement_count();

17506

}

17507

17508

sub get_jmax { $_[0]->[JMAX] }

17509

sub get_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] }

17510

sub get_rtokens { $_[0]->[RTOKENS] }

17511

sub get_rfields { $_[0]->[RFIELDS] }

17512

sub get_rpatterns { $_[0]->[RPATTERNS] }

17513

sub get_indentation { $_[0]->[INDENTATION] }

17514

sub get_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] }

17515

sub get_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] }

17516

sub get_list_type { $_[0]->[LIST_TYPE] }

17517

sub get_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] }

17518

sub get_rvertical_tightness_flags { $_[0]->[RVERTICAL_TIGHTNESS_FLAGS] }

17519

17520

sub set_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->set_column( $_[2] ) }

17521

sub get_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] }

17522

sub get_alignments { @{ $_[0]->[RALIGNMENTS] } }

17523

sub get_column { $_[0]->[RALIGNMENTS]->[ $_[1] ]->get_column() }

17524

17525

sub get_starting_column {

17526

$_[0]->[RALIGNMENTS]->[ $_[1] ]->get_starting_column();

17527

}

17528

17529

sub increment_column {

17530

$_[0]->[RALIGNMENTS]->[ $_[1] ]->increment_column( $_[2] );

17531

}

17532

sub set_alignments { my $self = shift; @{ $self->[RALIGNMENTS] } = @_; }

17533

17534

sub current_field_width {

17535

my $self = shift;

17536

my ($j) = @_;

17537

if ( $j == 0 ) {

17538

return $self->get_column($j);

17539

}

17540

else {

17541

return $self->get_column($j) - $self->get_column( $j - 1 );

17542

}

17543

}

17544

17545

sub field_width_growth {

17546

my $self = shift;

17547

my $j = shift;

17548

return $self->get_column($j) - $self->get_starting_column($j);

17549

}

17550

17551

sub starting_field_width {

17552

my $self = shift;

17553

my $j = shift;

17554

if ( $j == 0 ) {

17555

return $self->get_starting_column($j);

17556

}

17557

else {

17558

return $self->get_starting_column($j) -

17559

$self->get_starting_column( $j - 1 );

17560

}

17561

}

17562

17563

sub increase_field_width {

17564

17565

my $self = shift;

17566

my ( $j, $pad ) = @_;

17567

my $jmax = $self->get_jmax();

17568

for my $k ( $j .. $jmax ) {

17569

$self->increment_column( $k, $pad );

17570

}

17571

}

17572

17573

sub get_available_space_on_right {

17574

my $self = shift;

17575

my $jmax = $self->get_jmax();

17576

return $self->[MAXIMUM_LINE_LENGTH] - $self->get_column($jmax);

17577

}

17578

17579

sub set_jmax { $_[0]->[JMAX] = $_[1] }

17580

sub set_jmax_original_line { $_[0]->[JMAX_ORIGINAL_LINE] = $_[1] }

17581

sub set_rtokens { $_[0]->[RTOKENS] = $_[1] }

17582

sub set_rfields { $_[0]->[RFIELDS] = $_[1] }

17583

sub set_rpatterns { $_[0]->[RPATTERNS] = $_[1] }

17584

sub set_indentation { $_[0]->[INDENTATION] = $_[1] }

17585

sub set_leading_space_count { $_[0]->[LEADING_SPACE_COUNT] = $_[1] }

17586

sub set_outdent_long_lines { $_[0]->[OUTDENT_LONG_LINES] = $_[1] }

17587

sub set_list_type { $_[0]->[LIST_TYPE] = $_[1] }

17588

sub set_is_hanging_side_comment { $_[0]->[IS_HANGING_SIDE_COMMENT] = $_[1] }

17589

sub set_alignment { $_[0]->[RALIGNMENTS]->[ $_[1] ] = $_[2] }

17590

17591

}

17592

17593

#####################################################################

17594

#

17595

# the Perl::Tidy::VerticalAligner::Alignment class holds information

17596

# on a single column being aligned

17597

#

17598

#####################################################################

17599

package Perl::Tidy::VerticalAligner::Alignment;

17600

17601

{

17602

17603

use strict;

17604

17605

#use Carp;

17606

17607

# Symbolic array indexes

17608

use constant COLUMN => 0; # the current column number

17609

use constant STARTING_COLUMN => 1; # column number when created

17610

use constant MATCHING_TOKEN => 2; # what token we are matching

17611

use constant STARTING_LINE => 3; # the line index of creation

17612

use constant ENDING_LINE => 4; # the most recent line to use it

17613

use constant SAVED_COLUMN => 5; # the most recent line to use it

17614

use constant SERIAL_NUMBER => 6; # unique number for this alignment

17615

# (just its index in an array)

17616

17617

# Correspondence between variables and array indexes

17618

my %_index_map;

17619

$_index_map{column} = COLUMN;

17620

$_index_map{starting_column} = STARTING_COLUMN;

17621

$_index_map{matching_token} = MATCHING_TOKEN;

17622

$_index_map{starting_line} = STARTING_LINE;

17623

$_index_map{ending_line} = ENDING_LINE;

17624

$_index_map{saved_column} = SAVED_COLUMN;

17625

$_index_map{serial_number} = SERIAL_NUMBER;

17626

17627

my @_default_data = ();

17628

$_default_data[COLUMN] = undef;

17629

$_default_data[STARTING_COLUMN] = undef;

17630

$_default_data[MATCHING_TOKEN] = undef;

17631

$_default_data[STARTING_LINE] = undef;

17632

$_default_data[ENDING_LINE] = undef;

17633

$_default_data[SAVED_COLUMN] = undef;

17634

$_default_data[SERIAL_NUMBER] = undef;

17635

17636

# class population count

17637

{

17638

my $_count = 0;

17639

sub get_count { $_count; }

17640

sub _increment_count { ++$_count }

17641

sub _decrement_count { --$_count }

17642

}

17643

17644

# constructor

17645

sub new {

17646

my ( $caller, %arg ) = @_;

17647

my $caller_is_obj = ref($caller);

17648

my $class = $caller_is_obj || $caller;

17649

no strict "refs";

17650

my $self = bless [], $class;

17651

17652

foreach ( keys %_index_map ) {

17653

my $index = $_index_map{$_};

17654

if ( exists $arg{$_} ) { $self->[$index] = $arg{$_} }

17655

elsif ($caller_is_obj) { $self->[$index] = $caller->[$index] }

17656

else { $self->[$index] = $_default_data[$index] }

17657

}

17658

$self->_increment_count();

17659

return $self;

17660

}

17661

17662

sub DESTROY {

17663

$_[0]->_decrement_count();

17664

}

17665

17666

sub get_column { return $_[0]->[COLUMN] }

17667

sub get_starting_column { return $_[0]->[STARTING_COLUMN] }

17668

sub get_matching_token { return $_[0]->[MATCHING_TOKEN] }

17669

sub get_starting_line { return $_[0]->[STARTING_LINE] }

17670

sub get_ending_line { return $_[0]->[ENDING_LINE] }

17671

sub get_serial_number { return $_[0]->[SERIAL_NUMBER] }

17672

17673

sub set_column { $_[0]->[COLUMN] = $_[1] }

17674

sub set_starting_column { $_[0]->[STARTING_COLUMN] = $_[1] }

17675

sub set_matching_token { $_[0]->[MATCHING_TOKEN] = $_[1] }

17676

sub set_starting_line { $_[0]->[STARTING_LINE] = $_[1] }

17677

sub set_ending_line { $_[0]->[ENDING_LINE] = $_[1] }

17678

sub increment_column { $_[0]->[COLUMN] += $_[1] }

17679

17680

sub save_column { $_[0]->[SAVED_COLUMN] = $_[0]->[COLUMN] }

17681

sub restore_column { $_[0]->[COLUMN] = $_[0]->[SAVED_COLUMN] }

17682

17683

}

17684

17685

package Perl::Tidy::VerticalAligner;

17686

17687

# The Perl::Tidy::VerticalAligner package collects output lines and

17688

# attempts to line up certain common tokens, such as => and #, which are

17689

# identified by the calling routine.

17690

#

17691

# There are two main routines: append_line and flush. Append acts as a

17692

# storage buffer, collecting lines into a group which can be vertically

17693

# aligned. When alignment is no longer possible or desirable, it dumps

17694

# the group to flush.

17695

#

17696

# append_line -----> flush

17697

#

17698

# collects writes

17699

# vertical one

17700

# groups group

17701

17702

BEGIN {

17703

17704

# Caution: these debug flags produce a lot of output

17705

# They should all be 0 except when debugging small scripts

17706

17707

use constant VALIGN_DEBUG_FLAG_APPEND => 0;

17708

use constant VALIGN_DEBUG_FLAG_APPEND0 => 0;

17709

use constant VALIGN_DEBUG_FLAG_TERNARY => 0;

17710

17711

my $debug_warning = sub {

17712

print "VALIGN_DEBUGGING with key $_[0]\n";

17713

};

17714

17715

VALIGN_DEBUG_FLAG_APPEND && $debug_warning->('APPEND');

17716

VALIGN_DEBUG_FLAG_APPEND0 && $debug_warning->('APPEND0');

17717

17718

}

17719

17720

use vars qw(

17721

$vertical_aligner_self

17722

$current_line

17723

$maximum_alignment_index

17724

$ralignment_list

17725

$maximum_jmax_seen

17726

$minimum_jmax_seen

17727

$previous_minimum_jmax_seen

17728

$previous_maximum_jmax_seen

17729

$maximum_line_index

17730

$group_level

17731

$group_type

17732

$group_maximum_gap

17733

$marginal_match

17734

$last_group_level_written

17735

$last_leading_space_count

17736

$extra_indent_ok

17737

$zero_count

17738

@group_lines

17739

$last_comment_column

17740

$last_side_comment_line_number

17741

$last_side_comment_length

17742

$last_side_comment_level

17743

$outdented_line_count

17744

$first_outdented_line_at

17745

$last_outdented_line_at

17746

$diagnostics_object

17747

$logger_object

17748

$file_writer_object

17749

@side_comment_history

17750

$comment_leading_space_count

17751

$is_matching_terminal_line

17752

17753

$cached_line_text

17754

$cached_line_type

17755

$cached_line_flag

17756

$cached_seqno

17757

$cached_line_valid

17758

$cached_line_leading_space_count

17759

$cached_seqno_string

17760

17761

$seqno_string

17762

$last_nonblank_seqno_string

17763

17764

$rOpts

17765

17766

$rOpts_maximum_line_length

17767

$rOpts_continuation_indentation

17768

$rOpts_indent_columns

17769

$rOpts_tabs

17770

$rOpts_entab_leading_whitespace

17771

$rOpts_valign

17772

17773

$rOpts_fixed_position_side_comment

17774

$rOpts_minimum_space_to_comment

17775

17776

);

17777

17778

sub initialize {

17779

17780

my $class;

17781

17782

( $class, $rOpts, $file_writer_object, $logger_object, $diagnostics_object )

17783

= @_;

17784

17785

# variables describing the entire space group:

17786

$ralignment_list = [];

17787

$group_level = 0;

17788

$last_group_level_written = -1;

17789

$extra_indent_ok = 0; # can we move all lines to the right?

17790

$last_side_comment_length = 0;

17791

$maximum_jmax_seen = 0;

17792

$minimum_jmax_seen = 0;

17793

$previous_minimum_jmax_seen = 0;

17794

$previous_maximum_jmax_seen = 0;

17795

17796

# variables describing each line of the group

17797

@group_lines = (); # list of all lines in group

17798

17799

$outdented_line_count = 0;

17800

$first_outdented_line_at = 0;

17801

$last_outdented_line_at = 0;

17802

$last_side_comment_line_number = 0;

17803

$last_side_comment_level = -1;

17804

$is_matching_terminal_line = 0;

17805

17806

# most recent 3 side comments; [ line number, column ]

17807

$side_comment_history[0] = [ -300, 0 ];

17808

$side_comment_history[1] = [ -200, 0 ];

17809

$side_comment_history[2] = [ -100, 0 ];

17810

17811

# write_leader_and_string cache:

17812

$cached_line_text = "";

17813

$cached_line_type = 0;

17814

$cached_line_flag = 0;

17815

$cached_seqno = 0;

17816

$cached_line_valid = 0;

17817

$cached_line_leading_space_count = 0;

17818

$cached_seqno_string = "";

17819

17820

# string of sequence numbers joined together

17821

$seqno_string = "";

17822

$last_nonblank_seqno_string = "";

17823

17824

# frequently used parameters

17825

$rOpts_indent_columns = $rOpts->{'indent-columns'};

17826

$rOpts_tabs = $rOpts->{'tabs'};

17827

$rOpts_entab_leading_whitespace = $rOpts->{'entab-leading-whitespace'};

17828

$rOpts_fixed_position_side_comment =

17829

$rOpts->{'fixed-position-side-comment'};

17830

$rOpts_minimum_space_to_comment = $rOpts->{'minimum-space-to-comment'};

17831

$rOpts_maximum_line_length = $rOpts->{'maximum-line-length'};

17832

$rOpts_valign = $rOpts->{'valign'};

17833

17834

forget_side_comment();

17835

17836

initialize_for_new_group();

17837

17838

$vertical_aligner_self = {};

17839

bless $vertical_aligner_self, $class;

17840

return $vertical_aligner_self;

17841

}

17842

17843

sub initialize_for_new_group {

17844

$maximum_line_index = -1; # lines in the current group

17845

$maximum_alignment_index = -1; # alignments in current group

17846

$zero_count = 0; # count consecutive lines without tokens

17847

$current_line = undef; # line being matched for alignment

17848

$group_maximum_gap = 0; # largest gap introduced

17849

$group_type = "";

17850

$marginal_match = 0;

17851

$comment_leading_space_count = 0;

17852

$last_leading_space_count = 0;

17853

}

17854

17855

# interface to Perl::Tidy::Diagnostics routines

17856

sub write_diagnostics {

17857

if ($diagnostics_object) {

17858

$diagnostics_object->write_diagnostics(@_);

17859

}

17860

}

17861

17862

# interface to Perl::Tidy::Logger routines

17863

sub warning {

17864

if ($logger_object) {

17865

$logger_object->warning(@_);

17866

}

17867

}

17868

17869

sub write_logfile_entry {

17870

if ($logger_object) {

17871

$logger_object->write_logfile_entry(@_);

17872

}

17873

}

17874

17875

sub report_definite_bug {

17876

if ($logger_object) {

17877

$logger_object->report_definite_bug();

17878

}

17879

}

17880

17881

sub get_SPACES {

17882

17883

# return the number of leading spaces associated with an indentation

17884

# variable $indentation is either a constant number of spaces or an

17885

# object with a get_SPACES method.

17886

my $indentation = shift;

17887

return ref($indentation) ? $indentation->get_SPACES() : $indentation;

17888

}

17889

17890

sub get_RECOVERABLE_SPACES {

17891

17892

# return the number of spaces (+ means shift right, - means shift left)

17893

# that we would like to shift a group of lines with the same indentation

17894

# to get them to line up with their opening parens

17895

my $indentation = shift;

17896

return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0;

17897

}

17898

17899

sub get_STACK_DEPTH {

17900

17901

my $indentation = shift;

17902

return ref($indentation) ? $indentation->get_STACK_DEPTH() : 0;

17903

}

17904

17905

sub make_alignment {

17906

my ( $col, $token ) = @_;

17907

17908

# make one new alignment at column $col which aligns token $token

17909

++$maximum_alignment_index;

17910

my $alignment = new Perl::Tidy::VerticalAligner::Alignment(

17911

column => $col,

17912

starting_column => $col,

17913

matching_token => $token,

17914

starting_line => $maximum_line_index,

17915

ending_line => $maximum_line_index,

17916

serial_number => $maximum_alignment_index,

17917

);

17918

$ralignment_list->[$maximum_alignment_index] = $alignment;

17919

return $alignment;

17920

}

17921

17922

sub dump_alignments {

17923

print

17924

"Current Alignments:\ni\ttoken\tstarting_column\tcolumn\tstarting_line\tending_line\n";

17925

for my $i ( 0 .. $maximum_alignment_index ) {

17926

my $column = $ralignment_list->[$i]->get_column();

17927

my $starting_column = $ralignment_list->[$i]->get_starting_column();

17928

my $matching_token = $ralignment_list->[$i]->get_matching_token();

17929

my $starting_line = $ralignment_list->[$i]->get_starting_line();

17930

my $ending_line = $ralignment_list->[$i]->get_ending_line();

17931

print

17932

"$i\t$matching_token\t$starting_column\t$column\t$starting_line\t$ending_line\n";

17933

}

17934

}

17935

17936

sub save_alignment_columns {

17937

for my $i ( 0 .. $maximum_alignment_index ) {

17938

$ralignment_list->[$i]->save_column();

17939

}

17940

}

17941

17942

sub restore_alignment_columns {

17943

for my $i ( 0 .. $maximum_alignment_index ) {

17944

$ralignment_list->[$i]->restore_column();

17945

}

17946

}

17947

17948

sub forget_side_comment {

17949

$last_comment_column = 0;

17950

}

17951

17952

sub append_line {

17953

17954

# sub append is called to place one line in the current vertical group.

17955

#

17956

# The input parameters are:

17957

# $level = indentation level of this line

17958

# $rfields = reference to array of fields

17959

# $rpatterns = reference to array of patterns, one per field

17960

# $rtokens = reference to array of tokens starting fields 1,2,..

17961

#

17962

# Here is an example of what this package does. In this example,

17963

# we are trying to line up both the '=>' and the '#'.

17964

#

17965

# '18' => 'grave', # \`

17966

# '19' => 'acute', # `'

17967

# '20' => 'caron', # \v

17968

# <-tabs-><f1-><--field 2 ---><-f3->

17969

# | | | |

17970

# | | | |

17971

# col1 col2 col3 col4

17972

#

17973

# The calling routine has already broken the entire line into 3 fields as

17974

# indicated. (So the work of identifying promising common tokens has

17975

# already been done).

17976

#

17977

# In this example, there will be 2 tokens being matched: '=>' and '#'.

17978

# They are the leading parts of fields 2 and 3, but we do need to know

17979

# what they are so that we can dump a group of lines when these tokens

17980

# change.

17981

#

17982

# The fields contain the actual characters of each field. The patterns

17983

# are like the fields, but they contain mainly token types instead

17984

# of tokens, so they have fewer characters. They are used to be

17985

# sure we are matching fields of similar type.

17986

#

17987

# In this example, there will be 4 column indexes being adjusted. The

17988

# first one is always at zero. The interior columns are at the start of

17989

# the matching tokens, and the last one tracks the maximum line length.

17990

#

17991

# Basically, each time a new line comes in, it joins the current vertical

17992

# group if possible. Otherwise it causes the current group to be dumped

17993

# and a new group is started.

17994

#

17995

# For each new group member, the column locations are increased, as

17996

# necessary, to make room for the new fields. When the group is finally

17997

# output, these column numbers are used to compute the amount of spaces of

17998

# padding needed for each field.

17999

#

18000

# Programming note: the fields are assumed not to have any tab characters.

18001

# Tabs have been previously removed except for tabs in quoted strings and

18002

# side comments. Tabs in these fields can mess up the column counting.

18003

# The log file warns the user if there are any such tabs.

18004

18005

my (

18006

$level, $level_end,

18007

$indentation, $rfields,

18008

$rtokens, $rpatterns,

18009

$is_forced_break, $outdent_long_lines,

18010

$is_terminal_ternary, $is_terminal_statement,

18011

$do_not_pad, $rvertical_tightness_flags,

18012

$level_jump,

18013

) = @_;

18014

18015

# number of fields is $jmax

18016

# number of tokens between fields is $jmax-1

18017

my $jmax = $#{$rfields};

18018

18019

my $leading_space_count = get_SPACES($indentation);

18020

18021

# set outdented flag to be sure we either align within statements or

18022

# across statement boundaries, but not both.

18023

my $is_outdented = $last_leading_space_count > $leading_space_count;

18024

$last_leading_space_count = $leading_space_count;

18025

18026

# Patch: undo for hanging side comment

18027

my $is_hanging_side_comment =

18028

( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );

18029

$is_outdented = 0 if $is_hanging_side_comment;

18030

18031

VALIGN_DEBUG_FLAG_APPEND0 && do {

18032

print

18033

"APPEND0: entering lines=$maximum_line_index new #fields= $jmax, leading_count=$leading_space_count last_cmt=$last_comment_column force=$is_forced_break\n";

18034

};

18035

18036

# Validate cached line if necessary: If we can produce a container

18037

# with just 2 lines total by combining an existing cached opening

18038

# token with the closing token to follow, then we will mark both

18039

# cached flags as valid.

18040

if ($rvertical_tightness_flags) {

18041

if ( $maximum_line_index <= 0

18042

&& $cached_line_type

18043

&& $cached_seqno

18044

&& $rvertical_tightness_flags->[2]

18045

&& $rvertical_tightness_flags->[2] == $cached_seqno )

18046

{

18047

$rvertical_tightness_flags->[3] ||= 1;

18048

$cached_line_valid ||= 1;

18049

}

18050

}

18051

18052

# do not join an opening block brace with an unbalanced line

18053

# unless requested with a flag value of 2

18054

if ( $cached_line_type == 3

18055

&& $maximum_line_index < 0

18056

&& $cached_line_flag < 2

18057

&& $level_jump != 0 )

18058

{

18059

$cached_line_valid = 0;

18060

}

18061

18062

# patch until new aligner is finished

18063

if ($do_not_pad) { my_flush() }

18064

18065

# shouldn't happen:

18066

if ( $level < 0 ) { $level = 0 }

18067

18068

# do not align code across indentation level changes

18069

# or if vertical alignment is turned off for debugging

18070

if ( $level != $group_level || $is_outdented || !$rOpts_valign ) {

18071

18072

# we are allowed to shift a group of lines to the right if its

18073

# level is greater than the previous and next group

18074

$extra_indent_ok =

18075

( $level < $group_level && $last_group_level_written < $group_level );

18076

18077

my_flush();

18078

18079

# If we know that this line will get flushed out by itself because

18080

# of level changes, we can leave the extra_indent_ok flag set.

18081

# That way, if we get an external flush call, we will still be

18082

# able to do some -lp alignment if necessary.

18083

$extra_indent_ok = ( $is_terminal_statement && $level > $group_level );

18084

18085

$group_level = $level;

18086

18087

# wait until after the above flush to get the leading space

18088

# count because it may have been changed if the -icp flag is in

18089

# effect

18090

$leading_space_count = get_SPACES($indentation);

18091

18092

}

18093

18094

# --------------------------------------------------------------------

18095

# Patch to collect outdentable block COMMENTS

18096

# --------------------------------------------------------------------

18097

my $is_blank_line = "";

18098

my $is_block_comment = ( $jmax == 0 && $rfields->[0] =~ /^#/ );

18099

if ( $group_type eq 'COMMENT' ) {

18100

if (

18101

(

18102

$is_block_comment

18103

&& $outdent_long_lines

18104

&& $leading_space_count == $comment_leading_space_count

18105

)

18106

|| $is_blank_line

18107

)

18108

{

18109

$group_lines[ ++$maximum_line_index ] = $rfields->[0];

18110

return;

18111

}

18112

else {

18113

my_flush();

18114

}

18115

}

18116

18117

# --------------------------------------------------------------------

18118

# add dummy fields for terminal ternary

18119

# --------------------------------------------------------------------

18120

my $j_terminal_match;

18121

if ( $is_terminal_ternary && $current_line ) {

18122

$j_terminal_match =

18123

fix_terminal_ternary( $rfields, $rtokens, $rpatterns );

18124

$jmax = @{$rfields} - 1;

18125

}

18126

18127

# --------------------------------------------------------------------

18128

# add dummy fields for else statement

18129

# --------------------------------------------------------------------

18130

if ( $rfields->[0] =~ /^else\s*$/

18131

&& $current_line

18132

&& $level_jump == 0 )

18133

{

18134

$j_terminal_match = fix_terminal_else( $rfields, $rtokens, $rpatterns );

18135

$jmax = @{$rfields} - 1;

18136

}

18137

18138

# --------------------------------------------------------------------

18139

# Step 1. Handle simple line of code with no fields to match.

18140

# --------------------------------------------------------------------

18141

if ( $jmax <= 0 ) {

18142

$zero_count++;

18143

18144

if ( $maximum_line_index >= 0

18145

&& !get_RECOVERABLE_SPACES( $group_lines[0]->get_indentation() ) )

18146

{

18147

18148

# flush the current group if it has some aligned columns..

18149

if ( $group_lines[0]->get_jmax() > 1 ) { my_flush() }

18150

18151

# flush current group if we are just collecting side comments..

18152

elsif (

18153

18154

# ...and we haven't seen a comment lately

18155

( $zero_count > 3 )

18156

18157

# ..or if this new line doesn't fit to the left of the comments

18158

|| ( ( $leading_space_count + length( $$rfields[0] ) ) >

18159

$group_lines[0]->get_column(0) )

18160

)

18161

{

18162

my_flush();

18163

}

18164

}

18165

18166

# patch to start new COMMENT group if this comment may be outdented

18167

if ( $is_block_comment

18168

&& $outdent_long_lines

18169

&& $maximum_line_index < 0 )

18170

{

18171

$group_type = 'COMMENT';

18172

$comment_leading_space_count = $leading_space_count;

18173

$group_lines[ ++$maximum_line_index ] = $rfields->[0];

18174

return;

18175

}

18176

18177

# just write this line directly if no current group, no side comment,

18178

# and no space recovery is needed.

18179

if ( $maximum_line_index < 0 && !get_RECOVERABLE_SPACES($indentation) )

18180

{

18181

write_leader_and_string( $leading_space_count, $$rfields[0], 0,

18182

$outdent_long_lines, $rvertical_tightness_flags );

18183

return;

18184

}

18185

}

18186

else {

18187

$zero_count = 0;

18188

}

18189

18190

# programming check: (shouldn't happen)

18191

# an error here implies an incorrect call was made

18192

if ( $jmax > 0 && ( $#{$rtokens} != ( $jmax - 1 ) ) ) {

18193

warning(

18194

"Program bug in Perl::Tidy::VerticalAligner - number of tokens = $#{$rtokens} should be one less than number of fields: $#{$rfields})\n"

18195

);

18196

report_definite_bug();

18197

}

18198

18199

# --------------------------------------------------------------------

18200

# create an object to hold this line

18201

# --------------------------------------------------------------------

18202

my $new_line = new Perl::Tidy::VerticalAligner::Line(

18203

jmax => $jmax,

18204

jmax_original_line => $jmax,

18205

rtokens => $rtokens,

18206

rfields => $rfields,

18207

rpatterns => $rpatterns,

18208

indentation => $indentation,

18209

leading_space_count => $leading_space_count,

18210

outdent_long_lines => $outdent_long_lines,

18211

list_type => "",

18212

is_hanging_side_comment => $is_hanging_side_comment,

18213

maximum_line_length => $rOpts->{'maximum-line-length'},

18214

rvertical_tightness_flags => $rvertical_tightness_flags,

18215

);

18216

18217

# Initialize a global flag saying if the last line of the group should

18218

# match end of group and also terminate the group. There should be no

18219

# returns between here and where the flag is handled at the bottom.

18220

my $col_matching_terminal = 0;

18221

if ( defined($j_terminal_match) ) {

18222

18223

# remember the column of the terminal ? or { to match with

18224

$col_matching_terminal = $current_line->get_column($j_terminal_match);

18225

18226

# set global flag for sub decide_if_aligned

18227

$is_matching_terminal_line = 1;

18228

}

18229

18230

# --------------------------------------------------------------------

18231

# It simplifies things to create a zero length side comment

18232

# if none exists.

18233

# --------------------------------------------------------------------

18234

make_side_comment( $new_line, $level_end );

18235

18236

# --------------------------------------------------------------------

18237

# Decide if this is a simple list of items.

18238

# There are 3 list types: none, comma, comma-arrow.

18239

# We use this below to be less restrictive in deciding what to align.

18240

# --------------------------------------------------------------------

18241

if ($is_forced_break) {

18242

decide_if_list($new_line);

18243

}

18244

18245

if ($current_line) {

18246

18247

# --------------------------------------------------------------------

18248

# Allow hanging side comment to join current group, if any

18249

# This will help keep side comments aligned, because otherwise we

18250

# will have to start a new group, making alignment less likely.

18251

# --------------------------------------------------------------------

18252

join_hanging_comment( $new_line, $current_line )

18253

if $is_hanging_side_comment;

18254

18255

# --------------------------------------------------------------------

18256

# If there is just one previous line, and it has more fields

18257

# than the new line, try to join fields together to get a match with

18258

# the new line. At the present time, only a single leading '=' is

18259

# allowed to be compressed out. This is useful in rare cases where

18260

# a table is forced to use old breakpoints because of side comments,

18261

# and the table starts out something like this:

18262

# my %MonthChars = ('0', 'Jan', # side comment

18263

# '1', 'Feb',

18264

# '2', 'Mar',

18265

# Eliminating the '=' field will allow the remaining fields to line up.

18266

# This situation does not occur if there are no side comments

18267

# because scan_list would put a break after the opening '('.

18268

# --------------------------------------------------------------------

18269

eliminate_old_fields( $new_line, $current_line );

18270

18271

# --------------------------------------------------------------------

18272

# If the new line has more fields than the current group,

18273

# see if we can match the first fields and combine the remaining

18274

# fields of the new line.

18275

# --------------------------------------------------------------------

18276

eliminate_new_fields( $new_line, $current_line );

18277

18278

# --------------------------------------------------------------------

18279

# Flush previous group unless all common tokens and patterns match..

18280

# --------------------------------------------------------------------

18281

check_match( $new_line, $current_line );

18282

18283

# --------------------------------------------------------------------

18284

# See if there is space for this line in the current group (if any)

18285

# --------------------------------------------------------------------

18286

if ($current_line) {

18287

check_fit( $new_line, $current_line );

18288

}

18289

}

18290

18291

# --------------------------------------------------------------------

18292

# Append this line to the current group (or start new group)

18293

# --------------------------------------------------------------------

18294

accept_line($new_line);

18295

18296

# Future update to allow this to vary:

18297

$current_line = $new_line if ( $maximum_line_index == 0 );

18298

18299

# output this group if it ends in a terminal else or ternary line

18300

if ( defined($j_terminal_match) ) {

18301

18302

# if there is only one line in the group (maybe due to failure to match

18303

# perfectly with previous lines), then align the ? or { of this

18304

# terminal line with the previous one unless that would make the line

18305

# too long

18306

if ( $maximum_line_index == 0 ) {

18307

my $col_now = $current_line->get_column($j_terminal_match);

18308

my $pad = $col_matching_terminal - $col_now;

18309

my $padding_available =

18310

$current_line->get_available_space_on_right();

18311

if ( $pad > 0 && $pad <= $padding_available ) {

18312

$current_line->increase_field_width( $j_terminal_match, $pad );

18313

}

18314

}

18315

my_flush();

18316

$is_matching_terminal_line = 0;

18317

}

18318

18319

# --------------------------------------------------------------------

18320

# Step 8. Some old debugging stuff

18321

# --------------------------------------------------------------------

18322

VALIGN_DEBUG_FLAG_APPEND && do {

18323

print "APPEND fields:";

18324

dump_array(@$rfields);

18325

print "APPEND tokens:";

18326

dump_array(@$rtokens);

18327

print "APPEND patterns:";

18328

dump_array(@$rpatterns);

18329

dump_alignments();

18330

};

18331

18332

return;

18333

}

18334

18335

sub join_hanging_comment {

18336

18337

my $line = shift;

18338

my $jmax = $line->get_jmax();

18339

return 0 unless $jmax == 1; # must be 2 fields

18340

my $rtokens = $line->get_rtokens();

18341

return 0 unless $$rtokens[0] eq '#'; # the second field is a comment..

18342

my $rfields = $line->get_rfields();

18343

return 0 unless $$rfields[0] =~ /^\s*$/; # the first field is empty...

18344

my $old_line = shift;

18345

my $maximum_field_index = $old_line->get_jmax();

18346

return 0

18347

unless $maximum_field_index > $jmax; # the current line has more fields

18348

my $rpatterns = $line->get_rpatterns();

18349

18350

$line->set_is_hanging_side_comment(1);

18351

$jmax = $maximum_field_index;

18352

$line->set_jmax($jmax);

18353

$$rfields[$jmax] = $$rfields[1];

18354

$$rtokens[ $jmax - 1 ] = $$rtokens[0];

18355

$$rpatterns[ $jmax - 1 ] = $$rpatterns[0];

18356

for ( my $j = 1 ; $j < $jmax ; $j++ ) {

18357

$$rfields[$j] = " "; # NOTE: caused glitch unless 1 blank, why?

18358

$$rtokens[ $j - 1 ] = "";

18359

$$rpatterns[ $j - 1 ] = "";

18360

}

18361

return 1;

18362

}

18363

18364

sub eliminate_old_fields {

18365

18366

my $new_line = shift;

18367

my $jmax = $new_line->get_jmax();

18368

if ( $jmax > $maximum_jmax_seen ) { $maximum_jmax_seen = $jmax }

18369

if ( $jmax < $minimum_jmax_seen ) { $minimum_jmax_seen = $jmax }

18370

18371

# there must be one previous line

18372

return unless ( $maximum_line_index == 0 );

18373

18374

my $old_line = shift;

18375

my $maximum_field_index = $old_line->get_jmax();

18376

18377

###############################################

18378

# this line must have fewer fields

18379

return unless $maximum_field_index > $jmax;

18380

###############################################

18381

18382

# Identify specific cases where field elimination is allowed:

18383

# case=1: both lines have comma-separated lists, and the first

18384

# line has an equals

18385

# case=2: both lines have leading equals

18386

18387

# case 1 is the default

18388

my $case = 1;

18389

18390

# See if case 2: both lines have leading '='

18391

# We'll require smiliar leading patterns in this case

18392

my $old_rtokens = $old_line->get_rtokens();

18393

my $rtokens = $new_line->get_rtokens();

18394

my $rpatterns = $new_line->get_rpatterns();

18395

my $old_rpatterns = $old_line->get_rpatterns();

18396

if ( $rtokens->[0] =~ /^=\d*$/

18397

&& $old_rtokens->[0] eq $rtokens->[0]

18398

&& $old_rpatterns->[0] eq $rpatterns->[0] )

18399

{

18400

$case = 2;

18401

}

18402

18403

# not too many fewer fields in new line for case 1

18404

return unless ( $case != 1 || $maximum_field_index - 2 <= $jmax );

18405

18406

# case 1 must have side comment

18407

my $old_rfields = $old_line->get_rfields();

18408

return

18409

if ( $case == 1

18410

&& length( $$old_rfields[$maximum_field_index] ) == 0 );

18411

18412

my $rfields = $new_line->get_rfields();

18413

18414

my $hid_equals = 0;

18415

18416

my @new_alignments = ();

18417

my @new_fields = ();

18418

my @new_matching_patterns = ();

18419

my @new_matching_tokens = ();

18420

18421

my $j = 0;

18422

my $k;

18423

my $current_field = '';

18424

my $current_pattern = '';

18425

18426

# loop over all old tokens

18427

my $in_match = 0;

18428

for ( $k = 0 ; $k < $maximum_field_index ; $k++ ) {

18429

$current_field .= $$old_rfields[$k];

18430

$current_pattern .= $$old_rpatterns[$k];

18431

last if ( $j > $jmax - 1 );

18432

18433

if ( $$old_rtokens[$k] eq $$rtokens[$j] ) {

18434

$in_match = 1;

18435

$new_fields[$j] = $current_field;

18436

$new_matching_patterns[$j] = $current_pattern;

18437

$current_field = '';

18438

$current_pattern = '';

18439

$new_matching_tokens[$j] = $$old_rtokens[$k];

18440

$new_alignments[$j] = $old_line->get_alignment($k);

18441

$j++;

18442

}

18443

else {

18444

18445

if ( $$old_rtokens[$k] =~ /^\=\d*$/ ) {

18446

last if ( $case == 2 ); # avoid problems with stuff

18447

# like: $a=$b=$c=$d;

18448

$hid_equals = 1;

18449

}

18450

last

18451

if ( $in_match && $case == 1 )

18452

; # disallow gaps in matching field types in case 1

18453

}

18454

}

18455

18456

# Modify the current state if we are successful.

18457

# We must exactly reach the ends of both lists for success.

18458

if ( ( $j == $jmax )

18459

&& ( $current_field eq '' )

18460

&& ( $case != 1 || $hid_equals ) )

18461

{

18462

$k = $maximum_field_index;

18463

$current_field .= $$old_rfields[$k];

18464

$current_pattern .= $$old_rpatterns[$k];

18465

$new_fields[$j] = $current_field;

18466

$new_matching_patterns[$j] = $current_pattern;

18467

18468

$new_alignments[$j] = $old_line->get_alignment($k);

18469

$maximum_field_index = $j;

18470

18471

$old_line->set_alignments(@new_alignments);

18472

$old_line->set_jmax($jmax);

18473

$old_line->set_rtokens( \@new_matching_tokens );

18474

$old_line->set_rfields( \@new_fields );

18475

$old_line->set_rpatterns( \@$rpatterns );

18476

}

18477

}

18478

18479

# create an empty side comment if none exists

18480

sub make_side_comment {

18481

my $new_line = shift;

18482

my $level_end = shift;

18483

my $jmax = $new_line->get_jmax();

18484

my $rtokens = $new_line->get_rtokens();

18485

18486

# if line does not have a side comment...

18487

if ( ( $jmax == 0 ) || ( $$rtokens[ $jmax - 1 ] ne '#' ) ) {

18488

my $rfields = $new_line->get_rfields();

18489

my $rpatterns = $new_line->get_rpatterns();

18490

$$rtokens[$jmax] = '#';

18491

$$rfields[ ++$jmax ] = '';

18492

$$rpatterns[$jmax] = '#';

18493

$new_line->set_jmax($jmax);

18494

$new_line->set_jmax_original_line($jmax);

18495

}

18496

18497

# line has a side comment..

18498

else {

18499

18500

# don't remember old side comment location for very long

18501

my $line_number = $vertical_aligner_self->get_output_line_number();

18502

my $rfields = $new_line->get_rfields();

18503

if (

18504

$line_number - $last_side_comment_line_number > 12

18505

18506

# and don't remember comment location across block level changes

18507

|| ( $level_end < $last_side_comment_level && $$rfields[0] =~ /^}/ )

18508

)

18509

{

18510

forget_side_comment();

18511

}

18512

$last_side_comment_line_number = $line_number;

18513

$last_side_comment_level = $level_end;

18514

}

18515

}

18516

18517

sub decide_if_list {

18518

18519

my $line = shift;

18520

18521

# A list will be taken to be a line with a forced break in which all

18522

# of the field separators are commas or comma-arrows (except for the

18523

# trailing #)

18524

18525

# List separator tokens are things like ',3' or '=>2',

18526

# where the trailing digit is the nesting depth. Allow braces

18527

# to allow nested list items.

18528

my $rtokens = $line->get_rtokens();

18529

my $test_token = $$rtokens[0];

18530

if ( $test_token =~ /^(\,|=>)/ ) {

18531

my $list_type = $test_token;

18532

my $jmax = $line->get_jmax();

18533

18534

foreach ( 1 .. $jmax - 2 ) {

18535

if ( $$rtokens[$_] !~ /^(\,|=>|\{)/ ) {

18536

$list_type = "";

18537

last;

18538

}

18539

}

18540

$line->set_list_type($list_type);

18541

}

18542

}

18543

18544

sub eliminate_new_fields {

18545

18546

return unless ( $maximum_line_index >= 0 );

18547

my ( $new_line, $old_line ) = @_;

18548

my $jmax = $new_line->get_jmax();

18549

18550

my $old_rtokens = $old_line->get_rtokens();

18551

my $rtokens = $new_line->get_rtokens();

18552

my $is_assignment =

18553

( $rtokens->[0] =~ /^=\d*$/ && ( $old_rtokens->[0] eq $rtokens->[0] ) );

18554

18555

# must be monotonic variation

18556

return unless ( $is_assignment || $previous_maximum_jmax_seen <= $jmax );

18557

18558

# must be more fields in the new line

18559

my $maximum_field_index = $old_line->get_jmax();

18560

return unless ( $maximum_field_index < $jmax );

18561

18562

unless ($is_assignment) {

18563

return

18564

unless ( $old_line->get_jmax_original_line() == $minimum_jmax_seen )

18565

; # only if monotonic

18566

18567

# never combine fields of a comma list

18568

return

18569

unless ( $maximum_field_index > 1 )

18570

&& ( $new_line->get_list_type() !~ /^,/ );

18571

}

18572

18573

my $rfields = $new_line->get_rfields();

18574

my $rpatterns = $new_line->get_rpatterns();

18575

my $old_rpatterns = $old_line->get_rpatterns();

18576

18577

# loop over all OLD tokens except comment and check match

18578

my $match = 1;

18579

my $k;

18580

for ( $k = 0 ; $k < $maximum_field_index - 1 ; $k++ ) {

18581

if ( ( $$old_rtokens[$k] ne $$rtokens[$k] )

18582

|| ( $$old_rpatterns[$k] ne $$rpatterns[$k] ) )

18583

{

18584

$match = 0;

18585

last;

18586

}

18587

}

18588

18589

# first tokens agree, so combine extra new tokens

18590

if ($match) {

18591

for $k ( $maximum_field_index .. $jmax - 1 ) {

18592

18593

$$rfields[ $maximum_field_index - 1 ] .= $$rfields[$k];

18594

$$rfields[$k] = "";

18595

$$rpatterns[ $maximum_field_index - 1 ] .= $$rpatterns[$k];

18596

$$rpatterns[$k] = "";

18597

}

18598

18599

$$rtokens[ $maximum_field_index - 1 ] = '#';

18600

$$rfields[$maximum_field_index] = $$rfields[$jmax];

18601

$$rpatterns[$maximum_field_index] = $$rpatterns[$jmax];

18602

$jmax = $maximum_field_index;

18603

}

18604

$new_line->set_jmax($jmax);

18605

}

18606

18607

sub fix_terminal_ternary {

18608

18609

# Add empty fields as necessary to align a ternary term

18610

# like this:

18611

#

18612

# my $leapyear =

18613

# $year % 4 ? 0

18614

# : $year % 100 ? 1

18615

# : $year % 400 ? 0

18616

# : 1;

18617

#

18618

# returns 1 if the terminal item should be indented

18619

18620

my ( $rfields, $rtokens, $rpatterns ) = @_;

18621

18622

my $jmax = @{$rfields} - 1;

18623

my $old_line = $group_lines[$maximum_line_index];

18624

my $rfields_old = $old_line->get_rfields();

18625

18626

my $rpatterns_old = $old_line->get_rpatterns();

18627

my $rtokens_old = $old_line->get_rtokens();

18628

my $maximum_field_index = $old_line->get_jmax();

18629

18630

# look for the question mark after the :

18631

my ($jquestion);

18632

my $depth_question;

18633

my $pad = "";

18634

for ( my $j = 0 ; $j < $maximum_field_index ; $j++ ) {

18635

my $tok = $rtokens_old->[$j];

18636

if ( $tok =~ /^\?(\d+)$/ ) {

18637

$depth_question = $1;

18638

18639

# depth must be correct

18640

next unless ( $depth_question eq $group_level );

18641

18642

$jquestion = $j;

18643

if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {

18644

$pad = " " x length($1);

18645

}

18646

else {

18647

return; # shouldn't happen

18648

}

18649

last;

18650

}

18651

}

18652

return unless ( defined($jquestion) ); # shouldn't happen

18653

18654

# Now splice the tokens and patterns of the previous line

18655

# into the else line to insure a match. Add empty fields

18656

# as necessary.

18657

my $jadd = $jquestion;

18658

18659

# Work on copies of the actual arrays in case we have

18660

# to return due to an error

18661

my @fields = @{$rfields};

18662

my @patterns = @{$rpatterns};

18663

my @tokens = @{$rtokens};

18664

18665

VALIGN_DEBUG_FLAG_TERNARY && do {

18666

local $" = '><';

18667

print "CURRENT FIELDS=<@{$rfields_old}>\n";

18668

print "CURRENT TOKENS=<@{$rtokens_old}>\n";

18669

print "CURRENT PATTERNS=<@{$rpatterns_old}>\n";

18670

print "UNMODIFIED FIELDS=<@{$rfields}>\n";

18671

print "UNMODIFIED TOKENS=<@{$rtokens}>\n";

18672

print "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";

18673

};

18674

18675

# handle cases of leading colon on this line

18676

if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {

18677

18678

my ( $colon, $therest ) = ( $1, $2 );

18679

18680

# Handle sub-case of first field with leading colon plus additional code

18681

# This is the usual situation as at the '1' below:

18682

# ...

18683

# : $year % 400 ? 0

18684

# : 1;

18685

if ($therest) {

18686

18687

# Split the first field after the leading colon and insert padding.

18688

# Note that this padding will remain even if the terminal value goes

18689

# out on a separate line. This does not seem to look to bad, so no

18690

# mechanism has been included to undo it.

18691

my $field1 = shift @fields;

18692

unshift @fields, ( $colon, $pad . $therest );

18693

18694

# change the leading pattern from : to ?

18695

return unless ( $patterns[0] =~ s/^\:/?/ );

18696

18697

# install leading tokens and patterns of existing line

18698

unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );

18699

unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );

18700

18701

# insert appropriate number of empty fields

18702

splice( @fields, 1, 0, ('') x $jadd ) if $jadd;

18703

}

18704

18705

# handle sub-case of first field just equal to leading colon.

18706

# This can happen for example in the example below where

18707

# the leading '(' would create a new alignment token

18708

# : ( $name =~ /[]}]$/ ) ? ( $mname = $name )

18709

# : ( $mname = $name . '->' );

18710

else {

18711

18712

return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen

18713

18714

# prepend a leading ? onto the second pattern

18715

$patterns[1] = "?b" . $patterns[1];

18716

18717

# pad the second field

18718

$fields[1] = $pad . $fields[1];

18719

18720

# install leading tokens and patterns of existing line, replacing

18721

# leading token and inserting appropriate number of empty fields

18722

splice( @tokens, 0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );

18723

splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );

18724

splice( @fields, 1, 0, ('') x $jadd ) if $jadd;

18725

}

18726

}

18727

18728

# Handle case of no leading colon on this line. This will

18729

# be the case when -wba=':' is used. For example,

18730

# $year % 400 ? 0 :

18731

# 1;

18732

else {

18733

18734

# install leading tokens and patterns of existing line

18735

$patterns[0] = '?' . 'b' . $patterns[0];

18736

unshift( @tokens, @{$rtokens_old}[ 0 .. $jquestion ] );

18737

unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );

18738

18739

# insert appropriate number of empty fields

18740

$jadd = $jquestion + 1;

18741

$fields[0] = $pad . $fields[0];

18742

splice( @fields, 0, 0, ('') x $jadd ) if $jadd;

18743

}

18744

18745

VALIGN_DEBUG_FLAG_TERNARY && do {

18746

local $" = '><';

18747

print "MODIFIED TOKENS=<@tokens>\n";

18748

print "MODIFIED PATTERNS=<@patterns>\n";

18749

print "MODIFIED FIELDS=<@fields>\n";

18750

};

18751

18752

# all ok .. update the arrays

18753

@{$rfields} = @fields;

18754

@{$rtokens} = @tokens;

18755

@{$rpatterns} = @patterns;

18756

18757

# force a flush after this line

18758

return $jquestion;

18759

}

18760

18761

sub fix_terminal_else {

18762

18763

# Add empty fields as necessary to align a balanced terminal

18764

# else block to a previous if/elsif/unless block,

18765

# like this:

18766

#

18767

# if ( 1 || $x ) { print "ok 13\n"; }

18768

# else { print "not ok 13\n"; }

18769

#

18770

# returns 1 if the else block should be indented

18771

#

18772

my ( $rfields, $rtokens, $rpatterns ) = @_;

18773

my $jmax = @{$rfields} - 1;

18774

return unless ( $jmax > 0 );

18775

18776

# check for balanced else block following if/elsif/unless

18777

my $rfields_old = $current_line->get_rfields();

18778

18779

# TBD: add handling for 'case'

18780

return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );

18781

18782

# look for the opening brace after the else, and extrace the depth

18783

my $tok_brace = $rtokens->[0];

18784

my $depth_brace;

18785

if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }

18786

18787

# probably: "else # side_comment"

18788

else { return }

18789

18790

my $rpatterns_old = $current_line->get_rpatterns();

18791

my $rtokens_old = $current_line->get_rtokens();

18792

my $maximum_field_index = $current_line->get_jmax();

18793

18794

# be sure the previous if/elsif is followed by an opening paren

18795

my $jparen = 0;

18796

my $tok_paren = '(' . $depth_brace;

18797

my $tok_test = $rtokens_old->[$jparen];

18798

return unless ( $tok_test eq $tok_paren ); # shouldn't happen

18799

18800

# Now find the opening block brace

18801

my ($jbrace);

18802

for ( my $j = 1 ; $j < $maximum_field_index ; $j++ ) {

18803

my $tok = $rtokens_old->[$j];

18804

if ( $tok eq $tok_brace ) {

18805

$jbrace = $j;

18806

last;

18807

}

18808

}

18809

return unless ( defined($jbrace) ); # shouldn't happen

18810

18811

# Now splice the tokens and patterns of the previous line

18812

# into the else line to insure a match. Add empty fields

18813

# as necessary.

18814

my $jadd = $jbrace - $jparen;

18815

splice( @{$rtokens}, 0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );

18816

splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );

18817

splice( @{$rfields}, 1, 0, ('') x $jadd );

18818

18819

# force a flush after this line if it does not follow a case

18820

return $jbrace

18821

unless ( $rfields_old->[0] =~ /^case\s*$/ );

18822

}

18823

18824

{ # sub check_match

18825

my %is_good_alignment;

18826

18827

BEGIN {

18828

18829

# Vertically aligning on certain "good" tokens is usually okay

18830

# so we can be less restrictive in marginal cases.

18831

@_ = qw( { ? => = );

18832

push @_, (',');

18833

@is_good_alignment{@_} = (1) x scalar(@_);

18834

}

18835

18836

sub check_match {

18837

18838

# See if the current line matches the current vertical alignment group.

18839

# If not, flush the current group.

18840

my $new_line = shift;

18841

my $old_line = shift;

18842

18843

# uses global variables:

18844

# $previous_minimum_jmax_seen

18845

# $maximum_jmax_seen

18846

# $maximum_line_index

18847

# $marginal_match

18848

my $jmax = $new_line->get_jmax();

18849

my $maximum_field_index = $old_line->get_jmax();

18850

18851

# flush if this line has too many fields

18852

if ( $jmax > $maximum_field_index ) { goto NO_MATCH }

18853

18854

# flush if adding this line would make a non-monotonic field count

18855

if (

18856

( $maximum_field_index > $jmax ) # this has too few fields

18857

&& (

18858

( $previous_minimum_jmax_seen <

18859

$jmax ) # and wouldn't be monotonic

18860

|| ( $old_line->get_jmax_original_line() != $maximum_jmax_seen )

18861

)

18862

)

18863

{

18864

goto NO_MATCH;

18865

}

18866

18867

# otherwise see if this line matches the current group

18868

my $jmax_original_line = $new_line->get_jmax_original_line();

18869

my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();

18870

my $rtokens = $new_line->get_rtokens();

18871

my $rfields = $new_line->get_rfields();

18872

my $rpatterns = $new_line->get_rpatterns();

18873

my $list_type = $new_line->get_list_type();

18874

18875

my $group_list_type = $old_line->get_list_type();

18876

my $old_rpatterns = $old_line->get_rpatterns();

18877

my $old_rtokens = $old_line->get_rtokens();

18878

18879

my $jlimit = $jmax - 1;

18880

if ( $maximum_field_index > $jmax ) {

18881

$jlimit = $jmax_original_line;

18882

--$jlimit unless ( length( $new_line->get_rfields()->[$jmax] ) );

18883

}

18884

18885

# handle comma-separated lists ..

18886

if ( $group_list_type && ( $list_type eq $group_list_type ) ) {

18887

for my $j ( 0 .. $jlimit ) {

18888

my $old_tok = $$old_rtokens[$j];

18889

next unless $old_tok;

18890

my $new_tok = $$rtokens[$j];

18891

next unless $new_tok;

18892

18893

# lists always match ...

18894

# unless they would align any '=>'s with ','s

18895

goto NO_MATCH

18896

if ( $old_tok =~ /^=>/ && $new_tok =~ /^,/

18897

|| $new_tok =~ /^=>/ && $old_tok =~ /^,/ );

18898

}

18899

}

18900

18901

# do detailed check for everything else except hanging side comments

18902

elsif ( !$is_hanging_side_comment ) {

18903

18904

my $leading_space_count = $new_line->get_leading_space_count();

18905

18906

my $max_pad = 0;

18907

my $min_pad = 0;

18908

my $saw_good_alignment;

18909

18910

for my $j ( 0 .. $jlimit ) {

18911

18912

my $old_tok = $$old_rtokens[$j];

18913

my $new_tok = $$rtokens[$j];

18914

18915

# Note on encoding used for alignment tokens:

18916

# -------------------------------------------

18917

# Tokens are "decorated" with information which can help

18918

# prevent unwanted alignments. Consider for example the

18919

# following two lines:

18920

# local ( $xn, $xd ) = split( '/', &'rnorm(@_) );

18921

# local ( $i, $f ) = &'bdiv( $xn, $xd );

18922

# There are three alignment tokens in each line, a comma,

18923

# an =, and a comma. In the first line these three tokens

18924

# are encoded as:

18925

# ,4+local-18 =3 ,4+split-7

18926

# and in the second line they are encoded as

18927

# ,4+local-18 =3 ,4+&'bdiv-8

18928

# Tokens always at least have token name and nesting

18929

# depth. So in this example the ='s are at depth 3 and

18930

# the ,'s are at depth 4. This prevents aligning tokens

18931

# of different depths. Commas contain additional

18932

# information, as follows:

18933

# , {depth} + {container name} - {spaces to opening paren}

18934

# This allows us to reject matching the rightmost commas

18935

# in the above two lines, since they are for different

18936

# function calls. This encoding is done in

18937

# 'sub send_lines_to_vertical_aligner'.

18938

18939

# Pick off actual token.

18940

# Everything up to the first digit is the actual token.

18941

my $alignment_token = $new_tok;

18942

if ( $alignment_token =~ /^([^\d]+)/ ) { $alignment_token = $1 }

18943

18944

# see if the decorated tokens match

18945

my $tokens_match = $new_tok eq $old_tok

18946

18947

# Exception for matching terminal : of ternary statement..

18948

# consider containers prefixed by ? and : a match

18949

|| ( $new_tok =~ /^,\d*\+\:/ && $old_tok =~ /^,\d*\+\?/ );

18950

18951

# No match if the alignment tokens differ...

18952

if ( !$tokens_match ) {

18953

18954

# ...Unless this is a side comment

18955

if (

18956

$j == $jlimit

18957

18958

# and there is either at least one alignment token

18959

# or this is a single item following a list. This

18960

# latter rule is required for 'December' to join

18961

# the following list:

18962

# my (@months) = (

18963

# '', 'January', 'February', 'March',

18964

# 'April', 'May', 'June', 'July',

18965

# 'August', 'September', 'October', 'November',

18966

# 'December'

18967

# );

18968

# If it doesn't then the -lp formatting will fail.

18969

&& ( $j > 0 || $old_tok =~ /^,/ )

18970

)

18971

{

18972

$marginal_match = 1

18973

if ( $marginal_match == 0

18974

&& $maximum_line_index == 0 );

18975

last;

18976

}

18977

18978

goto NO_MATCH;

18979

}

18980

18981

# Calculate amount of padding required to fit this in.

18982

# $pad is the number of spaces by which we must increase

18983

# the current field to squeeze in this field.

18984

my $pad =

18985

length( $$rfields[$j] ) - $old_line->current_field_width($j);

18986

if ( $j == 0 ) { $pad += $leading_space_count; }

18987

18988

# remember max pads to limit marginal cases

18989

if ( $alignment_token ne '#' ) {

18990

if ( $pad > $max_pad ) { $max_pad = $pad }

18991

if ( $pad < $min_pad ) { $min_pad = $pad }

18992

}

18993

if ( $is_good_alignment{$alignment_token} ) {

18994

$saw_good_alignment = 1;

18995

}

18996

18997

# If patterns don't match, we have to be careful...

18998

if ( $$old_rpatterns[$j] ne $$rpatterns[$j] ) {

18999

19000

# flag this as a marginal match since patterns differ

19001

$marginal_match = 1

19002

if ( $marginal_match == 0 && $maximum_line_index == 0 );

19003

19004

# We have to be very careful about aligning commas

19005

# when the pattern's don't match, because it can be

19006

# worse to create an alignment where none is needed

19007

# than to omit one. Here's an example where the ','s

19008

# are not in named continers. The first line below

19009

# should not match the next two:

19010

# ( $a, $b ) = ( $b, $r );

19011

# ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );

19012

# ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );

19013

if ( $alignment_token eq ',' ) {

19014

19015

# do not align commas unless they are in named containers

19016

goto NO_MATCH unless ( $new_tok =~ /[A-Za-z]/ );

19017

}

19018

19019

# do not align parens unless patterns match;

19020

# large ugly spaces can occur in math expressions.

19021

elsif ( $alignment_token eq '(' ) {

19022

19023

# But we can allow a match if the parens don't

19024

# require any padding.

19025

if ( $pad != 0 ) { goto NO_MATCH }

19026

}

19027

19028

# Handle an '=' alignment with different patterns to

19029

# the left.

19030

elsif ( $alignment_token eq '=' ) {

19031

19032

# It is best to be a little restrictive when

19033

# aligning '=' tokens. Here is an example of

19034

# two lines that we will not align:

19035

# my $variable=6;

19036

# $bb=4;

19037

# The problem is that one is a 'my' declaration,

19038

# and the other isn't, so they're not very similar.

19039

# We will filter these out by comparing the first

19040

# letter of the pattern. This is crude, but works

19041

# well enough.

19042

if (

19043

substr( $$old_rpatterns[$j], 0, 1 ) ne

19044

substr( $$rpatterns[$j], 0, 1 ) )

19045

{

19046

goto NO_MATCH;

19047

}

19048

19049

# If we pass that test, we'll call it a marginal match.

19050

# Here is an example of a marginal match:

19051

# $done{$$op} = 1;

19052

# $op = compile_bblock($op);

19053

# The left tokens are both identifiers, but

19054

# one accesses a hash and the other doesn't.

19055

# We'll let this be a tentative match and undo

19056

# it later if we don't find more than 2 lines

19057

# in the group.

19058

elsif ( $maximum_line_index == 0 ) {

19059

$marginal_match =

19060

2; # =2 prevents being undone below

19061

}

19062

}

19063

}

19064

19065

# Don't let line with fewer fields increase column widths

19066

# ( align3.t )

19067

if ( $maximum_field_index > $jmax ) {

19068

19069

# Exception: suspend this rule to allow last lines to join

19070

if ( $pad > 0 ) { goto NO_MATCH; }

19071

}

19072

} ## end for my $j ( 0 .. $jlimit)

19073

19074

# Turn off the "marginal match" flag in some cases...

19075

# A "marginal match" occurs when the alignment tokens agree

19076

# but there are differences in the other tokens (patterns).

19077

# If we leave the marginal match flag set, then the rule is that we

19078

# will align only if there are more than two lines in the group.

19079

# We will turn of the flag if we almost have a match

19080

# and either we have seen a good alignment token or we

19081

# just need a small pad (2 spaces) to fit. These rules are

19082

# the result of experimentation. Tokens which misaligned by just

19083

# one or two characters are annoying. On the other hand,

19084

# large gaps to less important alignment tokens are also annoying.

19085

if ( $marginal_match == 1

19086

&& $jmax == $maximum_field_index

19087

&& ( $saw_good_alignment || ( $max_pad < 3 && $min_pad > -3 ) )

19088

)

19089

{

19090

$marginal_match = 0;

19091

}

19092

##print "marginal=$marginal_match saw=$saw_good_alignment jmax=$jmax max=$maximum_field_index maxpad=$max_pad minpad=$min_pad\n";

19093

}

19094

19095

# We have a match (even if marginal).

19096

# If the current line has fewer fields than the current group

19097

# but otherwise matches, copy the remaining group fields to

19098

# make it a perfect match.

19099

if ( $maximum_field_index > $jmax ) {

19100

my $comment = $$rfields[$jmax];

19101

for $jmax ( $jlimit .. $maximum_field_index ) {

19102

$$rtokens[$jmax] = $$old_rtokens[$jmax];

19103

$$rfields[ ++$jmax ] = '';

19104

$$rpatterns[$jmax] = $$old_rpatterns[$jmax];

19105

}

19106

$$rfields[$jmax] = $comment;

19107

$new_line->set_jmax($jmax);

19108

}

19109

return;

19110

19111

NO_MATCH:

19112

##print "BUBBA: no match jmax=$jmax max=$maximum_field_index $group_list_type lines=$maximum_line_index token=$$old_rtokens[0]\n";

19113

my_flush();

19114

return;

19115

}

19116

}

19117

19118

sub check_fit {

19119

19120

return unless ( $maximum_line_index >= 0 );

19121

my $new_line = shift;

19122

my $old_line = shift;

19123

19124

my $jmax = $new_line->get_jmax();

19125

my $leading_space_count = $new_line->get_leading_space_count();

19126

my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();

19127

my $rtokens = $new_line->get_rtokens();

19128

my $rfields = $new_line->get_rfields();

19129

my $rpatterns = $new_line->get_rpatterns();

19130

19131

my $group_list_type = $group_lines[0]->get_list_type();

19132

19133

my $padding_so_far = 0;

19134

my $padding_available = $old_line->get_available_space_on_right();

19135

19136

# save current columns in case this doesn't work

19137

save_alignment_columns();

19138

19139

my ( $j, $pad, $eight );

19140

my $maximum_field_index = $old_line->get_jmax();

19141

for $j ( 0 .. $jmax ) {

19142

19143

$pad = length( $$rfields[$j] ) - $old_line->current_field_width($j);

19144

19145

if ( $j == 0 ) {

19146

$pad += $leading_space_count;

19147

}

19148

19149

# remember largest gap of the group, excluding gap to side comment

19150

if ( $pad < 0

19151

&& $group_maximum_gap < -$pad

19152

&& $j > 0

19153

&& $j < $jmax - 1 )

19154

{

19155

$group_maximum_gap = -$pad;

19156

}

19157

19158

next if $pad < 0;

19159

19160

## This patch helps sometimes, but it doesn't check to see if

19161

## the line is too long even without the side comment. It needs

19162

## to be reworked.

19163

##don't let a long token with no trailing side comment push

19164

##side comments out, or end a group. (sidecmt1.t)

19165

##next if ($j==$jmax-1 && length($$rfields[$jmax])==0);

19166

19167

# This line will need space; lets see if we want to accept it..

19168

if (

19169

19170

# not if this won't fit

19171

( $pad > $padding_available )

19172

19173

# previously, there were upper bounds placed on padding here

19174

# (maximum_whitespace_columns), but they were not really helpful

19175

19176

)

19177

{

19178

19179

# revert to starting state then flush; things didn't work out

19180

restore_alignment_columns();

19181

my_flush();

19182

last;

19183

}

19184

19185

# patch to avoid excessive gaps in previous lines,

19186

# due to a line of fewer fields.

19187

# return join( ".",

19188

# $self->{"dfi"}, $self->{"aa"}, $self->rsvd, $self->{"rd"},

19189

# $self->{"area"}, $self->{"id"}, $self->{"sel"} );

19190

next if ( $jmax < $maximum_field_index && $j == $jmax - 1 );

19191

19192

# looks ok, squeeze this field in

19193

$old_line->increase_field_width( $j, $pad );

19194

$padding_available -= $pad;

19195

19196

# remember largest gap of the group, excluding gap to side comment

19197

if ( $pad > $group_maximum_gap && $j > 0 && $j < $jmax - 1 ) {

19198

$group_maximum_gap = $pad;

19199

}

19200

}

19201

}

19202

19203

sub accept_line {

19204

19205

# The current line either starts a new alignment group or is

19206

# accepted into the current alignment group.

19207

my $new_line = shift;

19208

$group_lines[ ++$maximum_line_index ] = $new_line;

19209

19210

# initialize field lengths if starting new group

19211

if ( $maximum_line_index == 0 ) {

19212

19213

my $jmax = $new_line->get_jmax();

19214

my $rfields = $new_line->get_rfields();

19215

my $rtokens = $new_line->get_rtokens();

19216

my $j;

19217

my $col = $new_line->get_leading_space_count();

19218

19219

for $j ( 0 .. $jmax ) {

19220

$col += length( $$rfields[$j] );

19221

19222

# create initial alignments for the new group

19223

my $token = "";

19224

if ( $j < $jmax ) { $token = $$rtokens[$j] }

19225

my $alignment = make_alignment( $col, $token );

19226

$new_line->set_alignment( $j, $alignment );

19227

}

19228

19229

$maximum_jmax_seen = $jmax;

19230

$minimum_jmax_seen = $jmax;

19231

}

19232

19233

# use previous alignments otherwise

19234

else {

19235

my @new_alignments =

19236

$group_lines[ $maximum_line_index - 1 ]->get_alignments();

19237

$new_line->set_alignments(@new_alignments);

19238

}

19239

19240

# remember group jmax extremes for next call to append_line

19241

$previous_minimum_jmax_seen = $minimum_jmax_seen;

19242

$previous_maximum_jmax_seen = $maximum_jmax_seen;

19243

}

19244

19245

sub dump_array {

19246

19247

# debug routine to dump array contents

19248

local $" = ')(';

19249

print "(@_)\n";

19250

}

19251

19252

# flush() sends the current Perl::Tidy::VerticalAligner group down the

19253

# pipeline to Perl::Tidy::FileWriter.

19254

19255

# This is the external flush, which also empties the cache

19256

sub flush {

19257

19258

if ( $maximum_line_index < 0 ) {

19259

if ($cached_line_type) {

19260

$seqno_string = $cached_seqno_string;

19261

entab_and_output( $cached_line_text,

19262

$cached_line_leading_space_count,

19263

$last_group_level_written );

19264

$cached_line_type = 0;

19265

$cached_line_text = "";

19266

$cached_seqno_string = "";

19267

}

19268

}

19269

else {

19270

my_flush();

19271

}

19272

}

19273

19274

# This is the internal flush, which leaves the cache intact

19275

sub my_flush {

19276

19277

return if ( $maximum_line_index < 0 );

19278

19279

# handle a group of comment lines

19280

if ( $group_type eq 'COMMENT' ) {

19281

19282

VALIGN_DEBUG_FLAG_APPEND0 && do {

19283

my ( $a, $b, $c ) = caller();

19284

print

19285

"APPEND0: Flush called from $a $b $c for COMMENT group: lines=$maximum_line_index \n";

19286

19287

};

19288

my $leading_space_count = $comment_leading_space_count;

19289

my $leading_string = get_leading_string($leading_space_count);

19290

19291

# zero leading space count if any lines are too long

19292

my $max_excess = 0;

19293

for my $i ( 0 .. $maximum_line_index ) {

19294

my $str = $group_lines[$i];

19295

my $excess =

19296

length($str) + $leading_space_count - $rOpts_maximum_line_length;

19297

if ( $excess > $max_excess ) {

19298

$max_excess = $excess;

19299

}

19300

}

19301

19302

if ( $max_excess > 0 ) {

19303

$leading_space_count -= $max_excess;

19304

if ( $leading_space_count < 0 ) { $leading_space_count = 0 }

19305

$last_outdented_line_at =

19306

$file_writer_object->get_output_line_number();

19307

unless ($outdented_line_count) {

19308

$first_outdented_line_at = $last_outdented_line_at;

19309

}

19310

$outdented_line_count += ( $maximum_line_index + 1 );

19311

}

19312

19313

# write the group of lines

19314

my $outdent_long_lines = 0;

19315

for my $i ( 0 .. $maximum_line_index ) {

19316

write_leader_and_string( $leading_space_count, $group_lines[$i], 0,

19317

$outdent_long_lines, "" );

19318

}

19319

}

19320

19321

# handle a group of code lines

19322

else {

19323

19324

VALIGN_DEBUG_FLAG_APPEND0 && do {

19325

my $group_list_type = $group_lines[0]->get_list_type();

19326

my ( $a, $b, $c ) = caller();

19327

my $maximum_field_index = $group_lines[0]->get_jmax();

19328

print

19329

"APPEND0: Flush called from $a $b $c fields=$maximum_field_index list=$group_list_type lines=$maximum_line_index extra=$extra_indent_ok\n";

19330

19331

};

19332

19333

# some small groups are best left unaligned

19334

my $do_not_align = decide_if_aligned();

19335

19336

# optimize side comment location

19337

$do_not_align = adjust_side_comment($do_not_align);

19338

19339

# recover spaces for -lp option if possible

19340

my $extra_leading_spaces = get_extra_leading_spaces();

19341

19342

# all lines of this group have the same basic leading spacing

19343

my $group_leader_length = $group_lines[0]->get_leading_space_count();

19344

19345

# add extra leading spaces if helpful

19346

my $min_ci_gap = improve_continuation_indentation( $do_not_align,

19347

$group_leader_length );

19348

19349

# loop to output all lines

19350

for my $i ( 0 .. $maximum_line_index ) {

19351

my $line = $group_lines[$i];

19352

write_vertically_aligned_line( $line, $min_ci_gap, $do_not_align,

19353

$group_leader_length, $extra_leading_spaces );

19354

}

19355

}

19356

initialize_for_new_group();

19357

}

19358

19359

sub decide_if_aligned {

19360

19361

# Do not try to align two lines which are not really similar

19362

return unless $maximum_line_index == 1;

19363

return if ($is_matching_terminal_line);

19364

19365

my $group_list_type = $group_lines[0]->get_list_type();

19366

19367

my $do_not_align = (

19368

19369

# always align lists

19370

!$group_list_type

19371

19372

&& (

19373

19374

# don't align if it was just a marginal match

19375

$marginal_match

19376

19377

# don't align two lines with big gap

19378

|| $group_maximum_gap > 12

19379

19380

# or lines with differing number of alignment tokens

19381

# TODO: this could be improved. It occasionally rejects

19382

# good matches.

19383

|| $previous_maximum_jmax_seen != $previous_minimum_jmax_seen

19384

)

19385

);

19386

19387

# But try to convert them into a simple comment group if the first line

19388

# a has side comment

19389

my $rfields = $group_lines[0]->get_rfields();

19390

my $maximum_field_index = $group_lines[0]->get_jmax();

19391

if ( $do_not_align

19392

&& ( $maximum_line_index > 0 )

19393

&& ( length( $$rfields[$maximum_field_index] ) > 0 ) )

19394

{

19395

combine_fields();

19396

$do_not_align = 0;

19397

}

19398

return $do_not_align;

19399

}

19400

19401

sub adjust_side_comment {

19402

19403

my $do_not_align = shift;

19404

19405

# let's see if we can move the side comment field out a little

19406

# to improve readability (the last field is always a side comment field)

19407

my $have_side_comment = 0;

19408

my $first_side_comment_line = -1;

19409

my $maximum_field_index = $group_lines[0]->get_jmax();

19410

for my $i ( 0 .. $maximum_line_index ) {

19411

my $line = $group_lines[$i];

19412

19413

if ( length( $line->get_rfields()->[$maximum_field_index] ) ) {

19414

$have_side_comment = 1;

19415

$first_side_comment_line = $i;

19416

last;

19417

}

19418

}

19419

19420

my $kmax = $maximum_field_index + 1;

19421

19422

if ($have_side_comment) {

19423

19424

my $line = $group_lines[0];

19425

19426

# the maximum space without exceeding the line length:

19427

my $avail = $line->get_available_space_on_right();

19428

19429

# try to use the previous comment column

19430

my $side_comment_column = $line->get_column( $kmax - 2 );

19431

my $move = $last_comment_column - $side_comment_column;

19432

19433

## my $sc_line0 = $side_comment_history[0]->[0];

19434

## my $sc_col0 = $side_comment_history[0]->[1];

19435

## my $sc_line1 = $side_comment_history[1]->[0];

19436

## my $sc_col1 = $side_comment_history[1]->[1];

19437

## my $sc_line2 = $side_comment_history[2]->[0];

19438

## my $sc_col2 = $side_comment_history[2]->[1];

19439

##

19440

## # FUTURE UPDATES:

19441

## # Be sure to ignore 'do not align' and '} # end comments'

19442

## # Find first $move > 0 and $move <= $avail as follows:

19443

## # 1. try sc_col1 if sc_col1 == sc_col0 && (line-sc_line0) < 12

19444

## # 2. try sc_col2 if (line-sc_line2) < 12

19445

## # 3. try min possible space, plus up to 8,

19446

## # 4. try min possible space

19447

19448

if ( $kmax > 0 && !$do_not_align ) {

19449

19450

# but if this doesn't work, give up and use the minimum space

19451

if ( $move > $avail ) {

19452

$move = $rOpts_minimum_space_to_comment - 1;

19453

}

19454

19455

# but we want some minimum space to the comment

19456

my $min_move = $rOpts_minimum_space_to_comment - 1;

19457

if ( $move >= 0

19458

&& $last_side_comment_length > 0

19459

&& ( $first_side_comment_line == 0 )

19460

&& $group_level == $last_group_level_written )

19461

{

19462

$min_move = 0;

19463

}

19464

19465

if ( $move < $min_move ) {

19466

$move = $min_move;

19467

}

19468

19469

# prevously, an upper bound was placed on $move here,

19470

# (maximum_space_to_comment), but it was not helpful

19471

19472

# don't exceed the available space

19473

if ( $move > $avail ) { $move = $avail }

19474

19475

# we can only increase space, never decrease

19476

if ( $move > 0 ) {

19477

$line->increase_field_width( $maximum_field_index - 1, $move );

19478

}

19479

19480

# remember this column for the next group

19481

$last_comment_column = $line->get_column( $kmax - 2 );

19482

}

19483

else {

19484

19485

# try to at least line up the existing side comment location

19486

if ( $kmax > 0 && $move > 0 && $move < $avail ) {

19487

$line->increase_field_width( $maximum_field_index - 1, $move );

19488

$do_not_align = 0;

19489

}

19490

19491

# reset side comment column if we can't align

19492

else {

19493

forget_side_comment();

19494

}

19495

}

19496

}

19497

return $do_not_align;

19498

}

19499

19500

sub improve_continuation_indentation {

19501

my ( $do_not_align, $group_leader_length ) = @_;

19502

19503

# See if we can increase the continuation indentation

19504

# to move all continuation lines closer to the next field

19505

# (unless it is a comment).

19506

#

19507

# '$min_ci_gap'is the extra indentation that we may need to introduce.

19508

# We will only introduce this to fields which already have some ci.

19509

# Without this variable, we would occasionally get something like this

19510

# (Complex.pm):

19511

#

19512

# use overload '+' => \&plus,

19513

# '-' => \&minus,

19514

# '*' => \&multiply,

19515

# ...

19516

# 'tan' => \&tan,

19517

# 'atan2' => \&atan2,

19518

#

19519

# Whereas with this variable, we can shift variables over to get this:

19520

#

19521

# use overload '+' => \&plus,

19522

# '-' => \&minus,

19523

# '*' => \&multiply,

19524

# ...

19525

# 'tan' => \&tan,

19526

# 'atan2' => \&atan2,

19527

19528

## BUB: Deactivated####################

19529

# The trouble with this patch is that it may, for example,

19530

# move in some 'or's or ':'s, and leave some out, so that the

19531

# left edge alignment suffers.

19532

return 0;

19533

###########################################

19534

19535

my $maximum_field_index = $group_lines[0]->get_jmax();

19536

19537

my $min_ci_gap = $rOpts_maximum_line_length;

19538

if ( $maximum_field_index > 1 && !$do_not_align ) {

19539

19540

for my $i ( 0 .. $maximum_line_index ) {

19541

my $line = $group_lines[$i];

19542

my $leading_space_count = $line->get_leading_space_count();

19543

my $rfields = $line->get_rfields();

19544

19545

my $gap =

19546

$line->get_column(0) -

19547

$leading_space_count -

19548

length( $$rfields[0] );

19549

19550

if ( $leading_space_count > $group_leader_length ) {

19551

if ( $gap < $min_ci_gap ) { $min_ci_gap = $gap }

19552

}

19553

}

19554

19555

if ( $min_ci_gap >= $rOpts_maximum_line_length ) {

19556

$min_ci_gap = 0;

19557

}

19558

}

19559

else {

19560

$min_ci_gap = 0;

19561

}

19562

return $min_ci_gap;

19563

}

19564

19565

sub write_vertically_aligned_line {

19566

19567

my ( $line, $min_ci_gap, $do_not_align, $group_leader_length,

19568

$extra_leading_spaces )

19569

= @_;

19570

my $rfields = $line->get_rfields();

19571

my $leading_space_count = $line->get_leading_space_count();

19572

my $outdent_long_lines = $line->get_outdent_long_lines();

19573

my $maximum_field_index = $line->get_jmax();

19574

my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();

19575

19576

# add any extra spaces

19577

if ( $leading_space_count > $group_leader_length ) {

19578

$leading_space_count += $min_ci_gap;

19579

}

19580

19581

my $str = $$rfields[0];

19582

19583

# loop to concatenate all fields of this line and needed padding

19584

my $total_pad_count = 0;

19585

my ( $j, $pad );

19586

for $j ( 1 .. $maximum_field_index ) {

19587

19588

# skip zero-length side comments

19589

last

19590

if ( ( $j == $maximum_field_index )

19591

&& ( !defined( $$rfields[$j] ) || ( length( $$rfields[$j] ) == 0 ) )

19592

);

19593

19594

# compute spaces of padding before this field

19595

my $col = $line->get_column( $j - 1 );

19596

$pad = $col - ( length($str) + $leading_space_count );

19597

19598

if ($do_not_align) {

19599

$pad =

19600

( $j < $maximum_field_index )

19601

? 0

19602

: $rOpts_minimum_space_to_comment - 1;

19603

}

19604

19605

# if the -fpsc flag is set, move the side comment to the selected

19606

# column if and only if it is possible, ignoring constraints on

19607

# line length and minimum space to comment

19608

if ( $rOpts_fixed_position_side_comment && $j == $maximum_field_index )

19609

{

19610

my $newpad = $pad + $rOpts_fixed_position_side_comment - $col - 1;

19611

if ( $newpad >= 0 ) { $pad = $newpad; }

19612

}

19613

19614

# accumulate the padding

19615

if ( $pad > 0 ) { $total_pad_count += $pad; }

19616

19617

# add this field

19618

if ( !defined $$rfields[$j] ) {

19619

write_diagnostics("UNDEFined field at j=$j\n");

19620

}

19621

19622

# only add padding when we have a finite field;

19623

# this avoids extra terminal spaces if we have empty fields

19624

if ( length( $$rfields[$j] ) > 0 ) {

19625

$str .= ' ' x $total_pad_count;

19626

$total_pad_count = 0;

19627

$str .= $$rfields[$j];

19628

}

19629

else {

19630

$total_pad_count = 0;

19631

}

19632

19633

# update side comment history buffer

19634

if ( $j == $maximum_field_index ) {

19635

my $lineno = $file_writer_object->get_output_line_number();

19636

shift @side_comment_history;

19637

push @side_comment_history, [ $lineno, $col ];

19638

}

19639

}

19640

19641

my $side_comment_length = ( length( $$rfields[$maximum_field_index] ) );

19642

19643

# ship this line off

19644

write_leader_and_string( $leading_space_count + $extra_leading_spaces,

19645

$str, $side_comment_length, $outdent_long_lines,

19646

$rvertical_tightness_flags );

19647

}

19648

19649

sub get_extra_leading_spaces {

19650

19651

#----------------------------------------------------------

19652

# Define any extra indentation space (for the -lp option).

19653

# Here is why:

19654

# If a list has side comments, sub scan_list must dump the

19655

# list before it sees everything. When this happens, it sets

19656

# the indentation to the standard scheme, but notes how

19657

# many spaces it would have liked to use. We may be able

19658

# to recover that space here in the event that that all of the

19659

# lines of a list are back together again.

19660

#----------------------------------------------------------

19661

19662

my $extra_leading_spaces = 0;

19663

if ($extra_indent_ok) {

19664

my $object = $group_lines[0]->get_indentation();

19665

if ( ref($object) ) {

19666

my $extra_indentation_spaces_wanted =

19667

get_RECOVERABLE_SPACES($object);

19668

19669

# all indentation objects must be the same

19670

my $i;

19671

for $i ( 1 .. $maximum_line_index ) {

19672

if ( $object != $group_lines[$i]->get_indentation() ) {

19673

$extra_indentation_spaces_wanted = 0;

19674

last;

19675

}

19676

}

19677

19678

if ($extra_indentation_spaces_wanted) {

19679

19680

# the maximum space without exceeding the line length:

19681

my $avail = $group_lines[0]->get_available_space_on_right();

19682

$extra_leading_spaces =

19683

( $avail > $extra_indentation_spaces_wanted )

19684

? $extra_indentation_spaces_wanted

19685

: $avail;

19686

19687

# update the indentation object because with -icp the terminal

19688

# ');' will use the same adjustment.

19689

$object->permanently_decrease_AVAILABLE_SPACES(

19690

-$extra_leading_spaces );

19691

}

19692

}

19693

}

19694

return $extra_leading_spaces;

19695

}

19696

19697

sub combine_fields {

19698

19699

# combine all fields except for the comment field ( sidecmt.t )

19700

# Uses global variables:

19701

# @group_lines

19702

# $maximum_line_index

19703

my ( $j, $k );

19704

my $maximum_field_index = $group_lines[0]->get_jmax();

19705

for ( $j = 0 ; $j <= $maximum_line_index ; $j++ ) {

19706

my $line = $group_lines[$j];

19707

my $rfields = $line->get_rfields();

19708

foreach ( 1 .. $maximum_field_index - 1 ) {

19709

$$rfields[0] .= $$rfields[$_];

19710

}

19711

$$rfields[1] = $$rfields[$maximum_field_index];

19712

19713

$line->set_jmax(1);

19714

$line->set_column( 0, 0 );

19715

$line->set_column( 1, 0 );

19716

19717

}

19718

$maximum_field_index = 1;

19719

19720

for $j ( 0 .. $maximum_line_index ) {

19721

my $line = $group_lines[$j];

19722

my $rfields = $line->get_rfields();

19723

for $k ( 0 .. $maximum_field_index ) {

19724

my $pad = length( $$rfields[$k] ) - $line->current_field_width($k);

19725

if ( $k == 0 ) {

19726

$pad += $group_lines[$j]->get_leading_space_count();

19727

}

19728

19729

if ( $pad > 0 ) { $line->increase_field_width( $k, $pad ) }

19730

19731

}

19732

}

19733

}

19734

19735

sub get_output_line_number {

19736

19737

# the output line number reported to a caller is the number of items

19738

# written plus the number of items in the buffer

19739

my $self = shift;

19740

1 + $maximum_line_index + $file_writer_object->get_output_line_number();

19741

}

19742

19743

sub write_leader_and_string {

19744

19745

my ( $leading_space_count, $str, $side_comment_length, $outdent_long_lines,

19746

$rvertical_tightness_flags )

19747

= @_;

19748

19749

# handle outdenting of long lines:

19750

if ($outdent_long_lines) {

19751

my $excess =

19752

length($str) -

19753

$side_comment_length +

19754

$leading_space_count -

19755

$rOpts_maximum_line_length;

19756

if ( $excess > 0 ) {

19757

$leading_space_count = 0;

19758

$last_outdented_line_at =

19759

$file_writer_object->get_output_line_number();

19760

19761

unless ($outdented_line_count) {

19762

$first_outdented_line_at = $last_outdented_line_at;

19763

}

19764

$outdented_line_count++;

19765

}

19766

}

19767

19768

# Make preliminary leading whitespace. It could get changed

19769

# later by entabbing, so we have to keep track of any changes

19770

# to the leading_space_count from here on.

19771

my $leading_string =

19772

$leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";

19773

19774

# Unpack any recombination data; it was packed by

19775

# sub send_lines_to_vertical_aligner. Contents:

19776

#

19777

# [0] type: 1=opening 2=closing 3=opening block brace

19778

# [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok

19779

# if closing: spaces of padding to use

19780

# [2] sequence number of container

19781

# [3] valid flag: do not append if this flag is false

19782

#

19783

my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,

19784

$seqno_end );

19785

if ($rvertical_tightness_flags) {

19786

(

19787

$open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,

19788

$seqno_end

19789

) = @{$rvertical_tightness_flags};

19790

}

19791

19792

$seqno_string = $seqno_end;

19793

19794

# handle any cached line ..

19795

# either append this line to it or write it out

19796

if ( length($cached_line_text) ) {

19797

19798

if ( !$cached_line_valid ) {

19799

entab_and_output( $cached_line_text,

19800

$cached_line_leading_space_count,

19801

$last_group_level_written );

19802

}

19803

19804

# handle cached line with opening container token

19805

elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {

19806

19807

my $gap = $leading_space_count - length($cached_line_text);

19808

19809

# handle option of just one tight opening per line:

19810

if ( $cached_line_flag == 1 ) {

19811

if ( defined($open_or_close) && $open_or_close == 1 ) {

19812

$gap = -1;

19813

}

19814

}

19815

19816

if ( $gap >= 0 ) {

19817

$leading_string = $cached_line_text . ' ' x $gap;

19818

$leading_space_count = $cached_line_leading_space_count;

19819

$seqno_string = $cached_seqno_string . ':' . $seqno_beg;

19820

}

19821

else {

19822

entab_and_output( $cached_line_text,

19823

$cached_line_leading_space_count,

19824

$last_group_level_written );

19825

}

19826

}

19827

19828

# handle cached line to place before this closing container token

19829

else {

19830

my $test_line = $cached_line_text . ' ' x $cached_line_flag . $str;

19831

19832

if ( length($test_line) <= $rOpts_maximum_line_length ) {

19833

19834

$seqno_string = $cached_seqno_string . ':' . $seqno_beg;

19835

19836

# Patch to outdent closing tokens ending # in ');'

19837

# If we are joining a line like ');' to a previous stacked

19838

# set of closing tokens, then decide if we may outdent the

19839

# combined stack to the indentation of the ');'. Since we

19840

# should not normally outdent any of the other tokens more than

19841

# the indentation of the lines that contained them, we will

19842

# only do this if all of the corresponding opening

19843

# tokens were on the same line. This can happen with

19844

# -sot and -sct. For example, it is ok here:

19845

# __PACKAGE__->load_components( qw(

19846

# PK::Auto

19847

# Core

19848

# ));

19849

#

19850

# But, for example, we do not outdent in this example because

19851

# that would put the closing sub brace out farther than the

19852

# opening sub brace:

19853

#

19854

# perltidy -sot -sct

19855

# $c->Tk::bind(

19856

# '<Control-f>' => sub {

19857

# my ($c) = @_;

19858

# my $e = $c->XEvent;

19859

# itemsUnderArea $c;

19860

# } );

19861

#

19862

if ( $str =~ /^\);/ && $cached_line_text =~ /^[\)\}\]\s]*$/ ) {

19863

19864

# The way to tell this is if the stacked sequence numbers

19865

# of this output line are the reverse of the stacked

19866

# sequence numbers of the previous non-blank line of

19867

# sequence numbers. So we can join if the previous

19868

# nonblank string of tokens is the mirror image. For

19869

# example if stack )}] is 13:8:6 then we are looking for a

19870

# leading stack like [{( which is 6:8:13 We only need to

19871

# check the two ends, because the intermediate tokens must

19872

# fall in order. Note on speed: having to split on colons

19873

# and eliminate multiple colons might appear to be slow,

19874

# but it's not an issue because we almost never come

19875

# through here. In a typical file we don't.

19876

$seqno_string =~ s/^:+//;

19877

$last_nonblank_seqno_string =~ s/^:+//;

19878

$seqno_string =~ s/:+/:/g;

19879

$last_nonblank_seqno_string =~ s/:+/:/g;

19880

19881

# how many spaces can we outdent?

19882

my $diff =

19883

$cached_line_leading_space_count - $leading_space_count;

19884

if ( $diff > 0

19885

&& length($seqno_string)

19886

&& length($last_nonblank_seqno_string) ==

19887

length($seqno_string) )

19888

{

19889

my @seqno_last =

19890

( split ':', $last_nonblank_seqno_string );

19891

my @seqno_now = ( split ':', $seqno_string );

19892

if ( $seqno_now[-1] == $seqno_last[0]

19893

&& $seqno_now[0] == $seqno_last[-1] )

19894

{

19895

19896

# OK to outdent ..

19897

# for absolute safety, be sure we only remove

19898

# whitespace

19899

my $ws = substr( $test_line, 0, $diff );

19900

if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {

19901

19902

$test_line = substr( $test_line, $diff );

19903

$cached_line_leading_space_count -= $diff;

19904

}

19905

19906

# shouldn't happen, but not critical:

19907

##else {

19908

## ERROR transferring indentation here

19909

##}

19910

}

19911

}

19912

}

19913

19914

$str = $test_line;

19915

$leading_string = "";

19916

$leading_space_count = $cached_line_leading_space_count;

19917

}

19918

else {

19919

entab_and_output( $cached_line_text,

19920

$cached_line_leading_space_count,

19921

$last_group_level_written );

19922

}

19923

}

19924

}

19925

$cached_line_type = 0;

19926

$cached_line_text = "";

19927

19928

# make the line to be written

19929

my $line = $leading_string . $str;

19930

19931

# write or cache this line

19932

if ( !$open_or_close || $side_comment_length > 0 ) {

19933

entab_and_output( $line, $leading_space_count, $group_level );

19934

}

19935

else {

19936

$cached_line_text = $line;

19937

$cached_line_type = $open_or_close;

19938

$cached_line_flag = $tightness_flag;

19939

$cached_seqno = $seqno;

19940

$cached_line_valid = $valid;

19941

$cached_line_leading_space_count = $leading_space_count;

19942

$cached_seqno_string = $seqno_string;

19943

}

19944

19945

$last_group_level_written = $group_level;

19946

$last_side_comment_length = $side_comment_length;

19947

$extra_indent_ok = 0;

19948

}

19949

19950

sub entab_and_output {

19951

my ( $line, $leading_space_count, $level ) = @_;

19952

19953

# The line is currently correct if there is no tabbing (recommended!)

19954

# We may have to lop off some leading spaces and replace with tabs.

19955

if ( $leading_space_count > 0 ) {

19956

19957

# Nothing to do if no tabs

19958

if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )

19959

|| $rOpts_indent_columns <= 0 )

19960

{

19961

19962

# nothing to do

19963

}

19964

19965

# Handle entab option

19966

elsif ($rOpts_entab_leading_whitespace) {

19967

my $space_count =

19968

$leading_space_count % $rOpts_entab_leading_whitespace;

19969

my $tab_count =

19970

int( $leading_space_count / $rOpts_entab_leading_whitespace );

19971

my $leading_string = "\t" x $tab_count . ' ' x $space_count;

19972

if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {

19973

substr( $line, 0, $leading_space_count ) = $leading_string;

19974

}

19975

else {

19976

19977

# REMOVE AFTER TESTING

19978

# shouldn't happen - program error counting whitespace

19979

# we'll skip entabbing

19980

warning(

19981

"Error entabbing in entab_and_output: expected count=$leading_space_count\n"

19982

);

19983

}

19984

}

19985

19986

# Handle option of one tab per level

19987

else {

19988

my $leading_string = ( "\t" x $level );

19989

my $space_count =

19990

$leading_space_count - $level * $rOpts_indent_columns;

19991

19992

# shouldn't happen:

19993

if ( $space_count < 0 ) {

19994

warning(

19995

"Error entabbing in append_line: for level=$group_level count=$leading_space_count\n"

19996

);

19997

$leading_string = ( ' ' x $leading_space_count );

19998

}

19999

else {

20000

$leading_string .= ( ' ' x $space_count );

20001

}

20002

if ( $line =~ /^\s{$leading_space_count,$leading_space_count}/ ) {

20003

substr( $line, 0, $leading_space_count ) = $leading_string;

20004

}

20005

else {

20006

20007

# REMOVE AFTER TESTING

20008

# shouldn't happen - program error counting whitespace

20009

# we'll skip entabbing

20010

warning(

20011

"Error entabbing in entab_and_output: expected count=$leading_space_count\n"

20012

);

20013

}

20014

}

20015

}

20016

$file_writer_object->write_code_line( $line . "\n" );

20017

if ($seqno_string) {

20018

$last_nonblank_seqno_string = $seqno_string;

20019

}

20020

}

20021

20022

{ # begin get_leading_string

20023

20024

my @leading_string_cache;

20025

20026

sub get_leading_string {

20027

20028

# define the leading whitespace string for this line..

20029

my $leading_whitespace_count = shift;

20030

20031

# Handle case of zero whitespace, which includes multi-line quotes

20032

# (which may have a finite level; this prevents tab problems)

20033

if ( $leading_whitespace_count <= 0 ) {

20034

return "";

20035

}

20036

20037

# look for previous result

20038

elsif ( $leading_string_cache[$leading_whitespace_count] ) {

20039

return $leading_string_cache[$leading_whitespace_count];

20040

}

20041

20042

# must compute a string for this number of spaces

20043

my $leading_string;

20044

20045

# Handle simple case of no tabs

20046

if ( !( $rOpts_tabs || $rOpts_entab_leading_whitespace )

20047

|| $rOpts_indent_columns <= 0 )

20048

{

20049

$leading_string = ( ' ' x $leading_whitespace_count );

20050

}

20051

20052

# Handle entab option

20053

elsif ($rOpts_entab_leading_whitespace) {

20054

my $space_count =

20055

$leading_whitespace_count % $rOpts_entab_leading_whitespace;

20056

my $tab_count = int(

20057

$leading_whitespace_count / $rOpts_entab_leading_whitespace );

20058

$leading_string = "\t" x $tab_count . ' ' x $space_count;

20059

}

20060

20061

# Handle option of one tab per level

20062

else {

20063

$leading_string = ( "\t" x $group_level );

20064

my $space_count =

20065

$leading_whitespace_count - $group_level * $rOpts_indent_columns;

20066

20067

# shouldn't happen:

20068

if ( $space_count < 0 ) {

20069

warning(

20070

"Error in append_line: for level=$group_level count=$leading_whitespace_count\n"

20071

);

20072

$leading_string = ( ' ' x $leading_whitespace_count );

20073

}

20074

else {

20075

$leading_string .= ( ' ' x $space_count );

20076

}

20077

}

20078

$leading_string_cache[$leading_whitespace_count] = $leading_string;

20079

return $leading_string;

20080

}

20081

} # end get_leading_string

20082

20083

sub report_anything_unusual {

20084

my $self = shift;

20085

if ( $outdented_line_count > 0 ) {

20086

write_logfile_entry(

20087

"$outdented_line_count long lines were outdented:\n");

20088

write_logfile_entry(

20089

" First at output line $first_outdented_line_at\n");

20090

20091

if ( $outdented_line_count > 1 ) {

20092

write_logfile_entry(

20093

" Last at output line $last_outdented_line_at\n");

20094

}

20095

write_logfile_entry(

20096

" use -noll to prevent outdenting, -l=n to increase line length\n"

20097

);

20098

write_logfile_entry("\n");

20099

}

20100

}

20101

20102

#####################################################################

20103

#

20104

# the Perl::Tidy::FileWriter class writes the output file

20105

#

20106

#####################################################################

20107

20108

package Perl::Tidy::FileWriter;

20109

20110

# Maximum number of little messages; probably need not be changed.

20111

use constant MAX_NAG_MESSAGES => 6;

20112

20113

sub write_logfile_entry {

20114

my $self = shift;

20115

my $logger_object = $self->{_logger_object};

20116

if ($logger_object) {

20117

$logger_object->write_logfile_entry(@_);

20118

}

20119

}

20120

20121

sub new {

20122

my $class = shift;

20123

my ( $line_sink_object, $rOpts, $logger_object ) = @_;

20124

20125

bless {

20126

_line_sink_object => $line_sink_object,

20127

_logger_object => $logger_object,

20128

_rOpts => $rOpts,

20129

_output_line_number => 1,

20130

_consecutive_blank_lines => 0,

20131

_consecutive_nonblank_lines => 0,

20132

_first_line_length_error => 0,

20133

_max_line_length_error => 0,

20134

_last_line_length_error => 0,

20135

_first_line_length_error_at => 0,

20136

_max_line_length_error_at => 0,

20137

_last_line_length_error_at => 0,

20138

_line_length_error_count => 0,

20139

_max_output_line_length => 0,

20140

_max_output_line_length_at => 0,

20141

}, $class;

20142

}

20143

20144

sub tee_on {

20145

my $self = shift;

20146

$self->{_line_sink_object}->tee_on();

20147

}

20148

20149

sub tee_off {

20150

my $self = shift;

20151

$self->{_line_sink_object}->tee_off();

20152

}

20153

20154

sub get_output_line_number {

20155

my $self = shift;

20156

return $self->{_output_line_number};

20157

}

20158

20159

sub decrement_output_line_number {

20160

my $self = shift;

20161

$self->{_output_line_number}--;

20162

}

20163

20164

sub get_consecutive_nonblank_lines {

20165

my $self = shift;

20166

return $self->{_consecutive_nonblank_lines};

20167

}

20168

20169

sub reset_consecutive_blank_lines {

20170

my $self = shift;

20171

$self->{_consecutive_blank_lines} = 0;

20172

}

20173

20174

sub want_blank_line {

20175

my $self = shift;

20176

unless ( $self->{_consecutive_blank_lines} ) {

20177

$self->write_blank_code_line();

20178

}

20179

}

20180

20181

sub write_blank_code_line {

20182

my $self = shift;

20183

my $rOpts = $self->{_rOpts};

20184

return

20185

if ( $self->{_consecutive_blank_lines} >=

20186

$rOpts->{'maximum-consecutive-blank-lines'} );

20187

$self->{_consecutive_blank_lines}++;

20188

$self->{_consecutive_nonblank_lines} = 0;

20189

$self->write_line("\n");

20190

}

20191

20192

sub write_code_line {

20193

my $self = shift;

20194

my $a = shift;

20195

20196

if ( $a =~ /^\s*$/ ) {

20197

my $rOpts = $self->{_rOpts};

20198

return

20199

if ( $self->{_consecutive_blank_lines} >=

20200

$rOpts->{'maximum-consecutive-blank-lines'} );

20201

$self->{_consecutive_blank_lines}++;

20202

$self->{_consecutive_nonblank_lines} = 0;

20203

}

20204

else {

20205

$self->{_consecutive_blank_lines} = 0;

20206

$self->{_consecutive_nonblank_lines}++;

20207

}

20208

$self->write_line($a);

20209

}

20210

20211

sub write_line {

20212

my $self = shift;

20213

my $a = shift;

20214

20215

# TODO: go through and see if the test is necessary here

20216

if ( $a =~ /\n$/ ) { $self->{_output_line_number}++; }

20217

20218

$self->{_line_sink_object}->write_line($a);

20219

20220

# This calculation of excess line length ignores any internal tabs

20221

my $rOpts = $self->{_rOpts};

20222

my $exceed = length($a) - $rOpts->{'maximum-line-length'} - 1;

20223

if ( $a =~ /^\t+/g ) {

20224

$exceed += pos($a) * ( $rOpts->{'indent-columns'} - 1 );

20225

}

20226

20227

# Note that we just incremented output line number to future value

20228

# so we must subtract 1 for current line number

20229

if ( length($a) > 1 + $self->{_max_output_line_length} ) {

20230

$self->{_max_output_line_length} = length($a) - 1;

20231

$self->{_max_output_line_length_at} = $self->{_output_line_number} - 1;

20232

}

20233

20234

if ( $exceed > 0 ) {

20235

my $output_line_number = $self->{_output_line_number};

20236

$self->{_last_line_length_error} = $exceed;

20237

$self->{_last_line_length_error_at} = $output_line_number - 1;

20238

if ( $self->{_line_length_error_count} == 0 ) {

20239

$self->{_first_line_length_error} = $exceed;

20240

$self->{_first_line_length_error_at} = $output_line_number - 1;

20241

}

20242

20243

if (

20244

$self->{_last_line_length_error} > $self->{_max_line_length_error} )

20245

{

20246

$self->{_max_line_length_error} = $exceed;

20247

$self->{_max_line_length_error_at} = $output_line_number - 1;

20248

}

20249

20250

if ( $self->{_line_length_error_count} < MAX_NAG_MESSAGES ) {

20251

$self->write_logfile_entry(

20252

"Line length exceeded by $exceed characters\n");

20253

}

20254

$self->{_line_length_error_count}++;

20255

}

20256

20257

}

20258

20259

sub report_line_length_errors {

20260

my $self = shift;

20261

my $rOpts = $self->{_rOpts};

20262

my $line_length_error_count = $self->{_line_length_error_count};

20263

if ( $line_length_error_count == 0 ) {

20264

$self->write_logfile_entry(

20265

"No lines exceeded $rOpts->{'maximum-line-length'} characters\n");

20266

my $max_output_line_length = $self->{_max_output_line_length};

20267

my $max_output_line_length_at = $self->{_max_output_line_length_at};

20268

$self->write_logfile_entry(

20269

" Maximum output line length was $max_output_line_length at line $max_output_line_length_at\n"

20270

);

20271

20272

}

20273

else {

20274

20275

my $word = ( $line_length_error_count > 1 ) ? "s" : "";

20276

$self->write_logfile_entry(

20277

"$line_length_error_count output line$word exceeded $rOpts->{'maximum-line-length'} characters:\n"

20278

);

20279

20280

$word = ( $line_length_error_count > 1 ) ? "First" : "";

20281

my $first_line_length_error = $self->{_first_line_length_error};

20282

my $first_line_length_error_at = $self->{_first_line_length_error_at};

20283

$self->write_logfile_entry(

20284

" $word at line $first_line_length_error_at by $first_line_length_error characters\n"

20285

);

20286

20287

if ( $line_length_error_count > 1 ) {

20288

my $max_line_length_error = $self->{_max_line_length_error};

20289

my $max_line_length_error_at = $self->{_max_line_length_error_at};

20290

my $last_line_length_error = $self->{_last_line_length_error};

20291

my $last_line_length_error_at = $self->{_last_line_length_error_at};

20292

$self->write_logfile_entry(

20293

" Maximum at line $max_line_length_error_at by $max_line_length_error characters\n"

20294

);

20295

$self->write_logfile_entry(

20296

" Last at line $last_line_length_error_at by $last_line_length_error characters\n"

20297

);

20298

}

20299

}

20300

}

20301

20302

#####################################################################

20303

#

20304

# The Perl::Tidy::Debugger class shows line tokenization

20305

#

20306

#####################################################################

20307

20308

package Perl::Tidy::Debugger;

20309

20310

sub new {

20311

20312

my ( $class, $filename ) = @_;

20313

20314

bless {

20315

_debug_file => $filename,

20316

_debug_file_opened => 0,

20317

_fh => undef,

20318

}, $class;

20319

}

20320

20321

sub really_open_debug_file {

20322

20323

my $self = shift;

20324

my $debug_file = $self->{_debug_file};

20325

my $fh;

20326

unless ( $fh = IO::File->new("> $debug_file") ) {

20327

warn("can't open $debug_file: $!\n");

20328

}

20329

$self->{_debug_file_opened} = 1;

20330

$self->{_fh} = $fh;

20331

print $fh

20332

"Use -dump-token-types (-dtt) to get a list of token type codes\n";

20333

}

20334

20335

sub close_debug_file {

20336

20337

my $self = shift;

20338

my $fh = $self->{_fh};

20339

if ( $self->{_debug_file_opened} ) {

20340

20341

eval { $self->{_fh}->close() };

20342

}

20343

}

20344

20345

sub write_debug_entry {

20346

20347

# This is a debug dump routine which may be modified as necessary

20348

# to dump tokens on a line-by-line basis. The output will be written

20349

# to the .DEBUG file when the -D flag is entered.

20350

my $self = shift;

20351

my $line_of_tokens = shift;

20352

20353

my $input_line = $line_of_tokens->{_line_text};

20354

my $rtoken_type = $line_of_tokens->{_rtoken_type};

20355

my $rtokens = $line_of_tokens->{_rtokens};

20356

my $rlevels = $line_of_tokens->{_rlevels};

20357

my $rslevels = $line_of_tokens->{_rslevels};

20358

my $rblock_type = $line_of_tokens->{_rblock_type};

20359

my $input_line_number = $line_of_tokens->{_line_number};

20360

my $line_type = $line_of_tokens->{_line_type};

20361

20362

my ( $j, $num );

20363

20364

my $token_str = "$input_line_number: ";

20365

my $reconstructed_original = "$input_line_number: ";

20366

my $block_str = "$input_line_number: ";

20367

20368

#$token_str .= "$line_type: ";

20369

#$reconstructed_original .= "$line_type: ";

20370

20371

my $pattern = "";

20372

my @next_char = ( '"', '"' );

20373

my $i_next = 0;

20374

unless ( $self->{_debug_file_opened} ) { $self->really_open_debug_file() }

20375

my $fh = $self->{_fh};

20376

20377

for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) {

20378

20379

# testing patterns

20380

if ( $$rtoken_type[$j] eq 'k' ) {

20381

$pattern .= $$rtokens[$j];

20382

}

20383

else {

20384

$pattern .= $$rtoken_type[$j];

20385

}

20386

$reconstructed_original .= $$rtokens[$j];

20387

$block_str .= "($$rblock_type[$j])";

20388

$num = length( $$rtokens[$j] );

20389

my $type_str = $$rtoken_type[$j];

20390

20391

# be sure there are no blank tokens (shouldn't happen)

20392

# This can only happen if a programming error has been made

20393

# because all valid tokens are non-blank

20394

if ( $type_str eq ' ' ) {

20395

print $fh "BLANK TOKEN on the next line\n";

20396

$type_str = $next_char[$i_next];

20397

$i_next = 1 - $i_next;

20398

}

20399

20400

if ( length($type_str) == 1 ) {

20401

$type_str = $type_str x $num;

20402

}

20403

$token_str .= $type_str;

20404

}

20405

20406

# Write what you want here ...

20407

# print $fh "$input_line\n";

20408

# print $fh "$pattern\n";

20409

print $fh "$reconstructed_original\n";

20410

print $fh "$token_str\n";

20411

20412

#print $fh "$block_str\n";

20413

}

20414

20415

#####################################################################

20416

#

20417

# The Perl::Tidy::LineBuffer class supplies a 'get_line()'

20418

# method for returning the next line to be parsed, as well as a

20419

# 'peek_ahead()' method

20420

#

20421

# The input parameter is an object with a 'get_line()' method

20422

# which returns the next line to be parsed

20423

#

20424

#####################################################################

20425

20426

package Perl::Tidy::LineBuffer;

20427

20428

sub new {

20429

20430

my $class = shift;

20431

my $line_source_object = shift;

20432

20433

return bless {

20434

_line_source_object => $line_source_object,

20435

_rlookahead_buffer => [],

20436

}, $class;

20437

}

20438

20439

sub peek_ahead {

20440

my $self = shift;

20441

my $buffer_index = shift;

20442

my $line = undef;

20443

my $line_source_object = $self->{_line_source_object};

20444

my $rlookahead_buffer = $self->{_rlookahead_buffer};

20445

if ( $buffer_index < scalar(@$rlookahead_buffer) ) {

20446

$line = $$rlookahead_buffer[$buffer_index];

20447

}

20448

else {

20449

$line = $line_source_object->get_line();

20450

push( @$rlookahead_buffer, $line );

20451

}

20452

return $line;

20453

}

20454

20455

sub get_line {

20456

my $self = shift;

20457

my $line = undef;

20458

my $line_source_object = $self->{_line_source_object};

20459

my $rlookahead_buffer = $self->{_rlookahead_buffer};

20460

20461

if ( scalar(@$rlookahead_buffer) ) {

20462

$line = shift @$rlookahead_buffer;

20463

}

20464

else {

20465

$line = $line_source_object->get_line();

20466

}

20467

return $line;

20468

}

20469

20470

########################################################################

20471

#

20472

# the Perl::Tidy::Tokenizer package is essentially a filter which

20473

# reads lines of perl source code from a source object and provides

20474

# corresponding tokenized lines through its get_line() method. Lines

20475

# flow from the source_object to the caller like this:

20476

#

20477

# source_object --> LineBuffer_object --> Tokenizer --> calling routine

20478

# get_line() get_line() get_line() line_of_tokens

20479

#

20480

# The source object can be any object with a get_line() method which

20481

# supplies one line (a character string) perl call.

20482

# The LineBuffer object is created by the Tokenizer.

20483

# The Tokenizer returns a reference to a data structure 'line_of_tokens'

20484

# containing one tokenized line for each call to its get_line() method.

20485

#

20486

# WARNING: This is not a real class yet. Only one tokenizer my be used.

20487

#

20488

########################################################################

20489

20490

package Perl::Tidy::Tokenizer;

20491

20492

BEGIN {

20493

20494

# Caution: these debug flags produce a lot of output

20495

# They should all be 0 except when debugging small scripts

20496

20497

use constant TOKENIZER_DEBUG_FLAG_EXPECT => 0;

20498

use constant TOKENIZER_DEBUG_FLAG_NSCAN => 0;

20499

use constant TOKENIZER_DEBUG_FLAG_QUOTE => 0;

20500

use constant TOKENIZER_DEBUG_FLAG_SCAN_ID => 0;

20501

use constant TOKENIZER_DEBUG_FLAG_TOKENIZE => 0;

20502

20503

my $debug_warning = sub {

20504

print "TOKENIZER_DEBUGGING with key $_[0]\n";

20505

};

20506

20507

TOKENIZER_DEBUG_FLAG_EXPECT && $debug_warning->('EXPECT');

20508

TOKENIZER_DEBUG_FLAG_NSCAN && $debug_warning->('NSCAN');

20509

TOKENIZER_DEBUG_FLAG_QUOTE && $debug_warning->('QUOTE');

20510

TOKENIZER_DEBUG_FLAG_SCAN_ID && $debug_warning->('SCAN_ID');

20511

TOKENIZER_DEBUG_FLAG_TOKENIZE && $debug_warning->('TOKENIZE');

20512

20513

}

20514

20515

use Carp;

20516

20517

# PACKAGE VARIABLES for for processing an entire FILE.

20518

use vars qw{

20519

$tokenizer_self

20520

20521

$last_nonblank_token

20522

$last_nonblank_type

20523

$last_nonblank_block_type

20524

$statement_type

20525

$in_attribute_list

20526

$current_package

20527

$context

20528

20529

%is_constant

20530

%is_user_function

20531

%user_function_prototype

20532

%is_block_function

20533

%is_block_list_function

20534

%saw_function_definition

20535

20536

$brace_depth

20537

$paren_depth

20538

$square_bracket_depth

20539

20540

@current_depth

20541

@total_depth

20542

$total_depth

20543

@nesting_sequence_number

20544

@current_sequence_number

20545

@paren_type

20546

@paren_semicolon_count

20547

@paren_structural_type

20548

@brace_type

20549

@brace_structural_type

20550

@brace_statement_type

20551

@brace_context

20552

@brace_package

20553

@square_bracket_type

20554

@square_bracket_structural_type

20555

@depth_array

20556

@nested_ternary_flag

20557

@starting_line_of_current_depth

20558

};

20559

20560

# GLOBAL CONSTANTS for routines in this package

20561

use vars qw{

20562

%is_indirect_object_taker

20563

%is_block_operator

20564

%expecting_operator_token

20565

%expecting_operator_types

20566

%expecting_term_types

20567

%expecting_term_token

20568

%is_digraph

20569

%is_file_test_operator

20570

%is_trigraph

20571

%is_valid_token_type

20572

%is_keyword

20573

%is_code_block_token

20574

%really_want_term

20575

@opening_brace_names

20576

@closing_brace_names

20577

%is_keyword_taking_list

20578

%is_q_qq_qw_qx_qr_s_y_tr_m

20579

};

20580

20581

# possible values of operator_expected()

20582

use constant TERM => -1;

20583

use constant UNKNOWN => 0;

20584

use constant OPERATOR => 1;

20585

20586

# possible values of context

20587

use constant SCALAR_CONTEXT => -1;

20588

use constant UNKNOWN_CONTEXT => 0;

20589

use constant LIST_CONTEXT => 1;

20590

20591

# Maximum number of little messages; probably need not be changed.

20592

use constant MAX_NAG_MESSAGES => 6;

20593

20594

{

20595

20596

# methods to count instances

20597

my $_count = 0;

20598

sub get_count { $_count; }

20599

sub _increment_count { ++$_count }

20600

sub _decrement_count { --$_count }

20601

}

20602

20603

sub DESTROY {

20604

$_[0]->_decrement_count();

20605

}

20606

20607

sub new {

20608

20609

my $class = shift;

20610

20611

# Note: 'tabs' and 'indent_columns' are temporary and should be

20612

# removed asap

20613

my %defaults = (

20614

source_object => undef,

20615

debugger_object => undef,

20616

diagnostics_object => undef,

20617

logger_object => undef,

20618

starting_level => undef,

20619

indent_columns => 4,

20620

tabs => 0,

20621

look_for_hash_bang => 0,

20622

trim_qw => 1,

20623

look_for_autoloader => 1,

20624

look_for_selfloader => 1,

20625

starting_line_number => 1,

20626

);

20627

my %args = ( %defaults, @_ );

20628

20629

# we are given an object with a get_line() method to supply source lines

20630

my $source_object = $args{source_object};

20631

20632

# we create another object with a get_line() and peek_ahead() method

20633

my $line_buffer_object = Perl::Tidy::LineBuffer->new($source_object);

20634

20635

# Tokenizer state data is as follows:

20636

# _rhere_target_list reference to list of here-doc targets

20637

# _here_doc_target the target string for a here document

20638

# _here_quote_character the type of here-doc quoting (" ' ` or none)

20639

# to determine if interpolation is done

20640

# _quote_target character we seek if chasing a quote

20641

# _line_start_quote line where we started looking for a long quote

20642

# _in_here_doc flag indicating if we are in a here-doc

20643

# _in_pod flag set if we are in pod documentation

20644

# _in_error flag set if we saw severe error (binary in script)

20645

# _in_data flag set if we are in __DATA__ section

20646

# _in_end flag set if we are in __END__ section

20647

# _in_format flag set if we are in a format description

20648

# _in_attribute_list flag telling if we are looking for attributes

20649

# _in_quote flag telling if we are chasing a quote

20650

# _starting_level indentation level of first line

20651

# _input_tabstr string denoting one indentation level of input file

20652

# _know_input_tabstr flag indicating if we know _input_tabstr

20653

# _line_buffer_object object with get_line() method to supply source code

20654

# _diagnostics_object place to write debugging information

20655

# _unexpected_error_count error count used to limit output

20656

# _lower_case_labels_at line numbers where lower case labels seen

20657

$tokenizer_self = {

20658

_rhere_target_list => [],

20659

_in_here_doc => 0,

20660

_here_doc_target => "",

20661

_here_quote_character => "",

20662

_in_data => 0,

20663

_in_end => 0,

20664

_in_format => 0,

20665

_in_error => 0,

20666

_in_pod => 0,

20667

_in_attribute_list => 0,

20668

_in_quote => 0,

20669

_quote_target => "",

20670

_line_start_quote => -1,

20671

_starting_level => $args{starting_level},

20672

_know_starting_level => defined( $args{starting_level} ),

20673

_tabs => $args{tabs},

20674

_indent_columns => $args{indent_columns},

20675

_look_for_hash_bang => $args{look_for_hash_bang},

20676

_trim_qw => $args{trim_qw},

20677

_input_tabstr => "",

20678

_know_input_tabstr => -1,

20679

_last_line_number => $args{starting_line_number} - 1,

20680

_saw_perl_dash_P => 0,

20681

_saw_perl_dash_w => 0,

20682

_saw_use_strict => 0,

20683

_saw_v_string => 0,

20684

_look_for_autoloader => $args{look_for_autoloader},

20685

_look_for_selfloader => $args{look_for_selfloader},

20686

_saw_autoloader => 0,

20687

_saw_selfloader => 0,

20688

_saw_hash_bang => 0,

20689

_saw_end => 0,

20690

_saw_data => 0,

20691

_saw_negative_indentation => 0,

20692

_started_tokenizing => 0,

20693

_line_buffer_object => $line_buffer_object,

20694

_debugger_object => $args{debugger_object},

20695

_diagnostics_object => $args{diagnostics_object},

20696

_logger_object => $args{logger_object},

20697

_unexpected_error_count => 0,

20698

_started_looking_for_here_target_at => 0,

20699

_nearly_matched_here_target_at => undef,

20700

_line_text => "",

20701

_rlower_case_labels_at => undef,

20702

};

20703

20704

prepare_for_a_new_file();

20705

find_starting_indentation_level();

20706

20707

bless $tokenizer_self, $class;

20708

20709

# This is not a full class yet, so die if an attempt is made to

20710

# create more than one object.

20711

20712

if ( _increment_count() > 1 ) {

20713

confess

20714

"Attempt to create more than 1 object in $class, which is not a true class yet\n";

20715

}

20716

20717

return $tokenizer_self;

20718

20719

}

20720

20721

# interface to Perl::Tidy::Logger routines

20722

sub warning {

20723

my $logger_object = $tokenizer_self->{_logger_object};

20724

if ($logger_object) {

20725

$logger_object->warning(@_);

20726

}

20727

}

20728

20729

sub complain {

20730

my $logger_object = $tokenizer_self->{_logger_object};

20731

if ($logger_object) {

20732

$logger_object->complain(@_);

20733

}

20734

}

20735

20736

sub write_logfile_entry {

20737

my $logger_object = $tokenizer_self->{_logger_object};

20738

if ($logger_object) {

20739

$logger_object->write_logfile_entry(@_);

20740

}

20741

}

20742

20743

sub interrupt_logfile {

20744

my $logger_object = $tokenizer_self->{_logger_object};

20745

if ($logger_object) {

20746

$logger_object->interrupt_logfile();

20747

}

20748

}

20749

20750

sub resume_logfile {

20751

my $logger_object = $tokenizer_self->{_logger_object};

20752

if ($logger_object) {

20753

$logger_object->resume_logfile();

20754

}

20755

}

20756

20757

sub increment_brace_error {

20758

my $logger_object = $tokenizer_self->{_logger_object};

20759

if ($logger_object) {

20760

$logger_object->increment_brace_error();

20761

}

20762

}

20763

20764

sub report_definite_bug {

20765

my $logger_object = $tokenizer_self->{_logger_object};

20766

if ($logger_object) {

20767

$logger_object->report_definite_bug();

20768

}

20769

}

20770

20771

sub brace_warning {

20772

my $logger_object = $tokenizer_self->{_logger_object};

20773

if ($logger_object) {

20774

$logger_object->brace_warning(@_);

20775

}

20776

}

20777

20778

sub get_saw_brace_error {

20779

my $logger_object = $tokenizer_self->{_logger_object};

20780

if ($logger_object) {

20781

$logger_object->get_saw_brace_error();

20782

}

20783

else {

20784

0;

20785

}

20786

}

20787

20788

# interface to Perl::Tidy::Diagnostics routines

20789

sub write_diagnostics {

20790

if ( $tokenizer_self->{_diagnostics_object} ) {

20791

$tokenizer_self->{_diagnostics_object}->write_diagnostics(@_);

20792

}

20793

}

20794

20795

sub report_tokenization_errors {

20796

20797

my $self = shift;

20798

20799

my $level = get_indentation_level();

20800

if ( $level != $tokenizer_self->{_starting_level} ) {

20801

warning("final indentation level: $level\n");

20802

}

20803

20804

check_final_nesting_depths();

20805

20806

if ( $tokenizer_self->{_look_for_hash_bang}

20807

&& !$tokenizer_self->{_saw_hash_bang} )

20808

{

20809

warning(

20810

"hit EOF without seeing hash-bang line; maybe don't need -x?\n");

20811

}

20812

20813

if ( $tokenizer_self->{_in_format} ) {

20814

warning("hit EOF while in format description\n");

20815

}

20816

20817

if ( $tokenizer_self->{_in_pod} ) {

20818

20819

# Just write log entry if this is after __END__ or __DATA__

20820

# because this happens to often, and it is not likely to be

20821

# a parsing error.

20822

if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {

20823

write_logfile_entry(

20824

"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"

20825

);

20826

}

20827

20828

else {

20829

complain(

20830

"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble with some pod utilities\n"

20831

);

20832

}

20833

20834

}

20835

20836

if ( $tokenizer_self->{_in_here_doc} ) {

20837

my $here_doc_target = $tokenizer_self->{_here_doc_target};

20838

my $started_looking_for_here_target_at =

20839

$tokenizer_self->{_started_looking_for_here_target_at};

20840

if ($here_doc_target) {

20841

warning(

20842

"hit EOF in here document starting at line $started_looking_for_here_target_at with target: $here_doc_target\n"

20843

);

20844

}

20845

else {

20846

warning(

20847

"hit EOF in here document starting at line $started_looking_for_here_target_at with empty target string\n"

20848

);

20849

}

20850

my $nearly_matched_here_target_at =

20851

$tokenizer_self->{_nearly_matched_here_target_at};

20852

if ($nearly_matched_here_target_at) {

20853

warning(

20854

"NOTE: almost matched at input line $nearly_matched_here_target_at except for whitespace\n"

20855

);

20856

}

20857

}

20858

20859

if ( $tokenizer_self->{_in_quote} ) {

20860

my $line_start_quote = $tokenizer_self->{_line_start_quote};

20861

my $quote_target = $tokenizer_self->{_quote_target};

20862

my $what =

20863

( $tokenizer_self->{_in_attribute_list} )

20864

? "attribute list"

20865

: "quote/pattern";

20866

warning(

20867

"hit EOF seeking end of $what starting at line $line_start_quote ending in $quote_target\n"

20868

);

20869

}

20870

20871

unless ( $tokenizer_self->{_saw_perl_dash_w} ) {

20872

if ( $] < 5.006 ) {

20873

write_logfile_entry("Suggest including '-w parameter'\n");

20874

}

20875

else {

20876

write_logfile_entry("Suggest including 'use warnings;'\n");

20877

}

20878

}

20879

20880

if ( $tokenizer_self->{_saw_perl_dash_P} ) {

20881

write_logfile_entry("Use of -P parameter for defines is discouraged\n");

20882

}

20883

20884

unless ( $tokenizer_self->{_saw_use_strict} ) {

20885

write_logfile_entry("Suggest including 'use strict;'\n");

20886

}

20887

20888

# it is suggested that lables have at least one upper case character

20889

# for legibility and to avoid code breakage as new keywords are introduced

20890

if ( $tokenizer_self->{_rlower_case_labels_at} ) {

20891

my @lower_case_labels_at =

20892

@{ $tokenizer_self->{_rlower_case_labels_at} };

20893

write_logfile_entry(

20894

"Suggest using upper case characters in label(s)\n");

20895

local $" = ')(';

20896

write_logfile_entry(" defined at line(s): (@lower_case_labels_at)\n");

20897

}

20898

}

20899

20900

sub report_v_string {

20901

20902

# warn if this version can't handle v-strings

20903

my $tok = shift;

20904

unless ( $tokenizer_self->{_saw_v_string} ) {

20905

$tokenizer_self->{_saw_v_string} = $tokenizer_self->{_last_line_number};

20906

}

20907

if ( $] < 5.006 ) {

20908

warning(

20909

"Found v-string '$tok' but v-strings are not implemented in your version of perl; see Camel 3 book ch 2\n"

20910

);

20911

}

20912

}

20913

20914

sub get_input_line_number {

20915

return $tokenizer_self->{_last_line_number};

20916

}

20917

20918

# returns the next tokenized line

20919

sub get_line {

20920

20921

my $self = shift;

20922

20923

# USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,

20924

# $square_bracket_depth, $paren_depth

20925

20926

my $input_line = $tokenizer_self->{_line_buffer_object}->get_line();

20927

$tokenizer_self->{_line_text} = $input_line;

20928

20929

return undef unless ($input_line);

20930

20931

my $input_line_number = ++$tokenizer_self->{_last_line_number};

20932

20933

# Find and remove what characters terminate this line, including any

20934

# control r

20935

my $input_line_separator = "";

20936

if ( chomp($input_line) ) { $input_line_separator = $/ }

20937

20938

# TODO: what other characters should be included here?

20939

if ( $input_line =~ s/((\r|\035|\032)+)$// ) {

20940

$input_line_separator = $2 . $input_line_separator;

20941

}

20942

20943

# for backwards compatability we keep the line text terminated with

20944

# a newline character

20945

$input_line .= "\n";

20946

$tokenizer_self->{_line_text} = $input_line; # update

20947

20948

# create a data structure describing this line which will be

20949

# returned to the caller.

20950

20951

# _line_type codes are:

20952

# SYSTEM - system-specific code before hash-bang line

20953

# CODE - line of perl code (including comments)

20954

# POD_START - line starting pod, such as '=head'

20955

# POD - pod documentation text

20956

# POD_END - last line of pod section, '=cut'

20957

# HERE - text of here-document

20958

# HERE_END - last line of here-doc (target word)

20959

# FORMAT - format section

20960

# FORMAT_END - last line of format section, '.'

20961

# DATA_START - __DATA__ line

20962

# DATA - unidentified text following __DATA__

20963

# END_START - __END__ line

20964

# END - unidentified text following __END__

20965

# ERROR - we are in big trouble, probably not a perl script

20966

20967

# Other variables:

20968

# _curly_brace_depth - depth of curly braces at start of line

20969

# _square_bracket_depth - depth of square brackets at start of line

20970

# _paren_depth - depth of parens at start of line

20971

# _starting_in_quote - this line continues a multi-line quote

20972

# (so don't trim leading blanks!)

20973

# _ending_in_quote - this line ends in a multi-line quote

20974

# (so don't trim trailing blanks!)

20975

my $line_of_tokens = {

20976

_line_type => 'EOF',

20977

_line_text => $input_line,

20978

_line_number => $input_line_number,

20979

_rtoken_type => undef,

20980

_rtokens => undef,

20981

_rlevels => undef,

20982

_rslevels => undef,

20983

_rblock_type => undef,

20984

_rcontainer_type => undef,

20985

_rcontainer_environment => undef,

20986

_rtype_sequence => undef,

20987

_rnesting_tokens => undef,

20988

_rci_levels => undef,

20989

_rnesting_blocks => undef,

20990

_python_indentation_level => -1, ## 0,

20991

_starting_in_quote => 0, # to be set by subroutine

20992

_ending_in_quote => 0,

20993

_curly_brace_depth => $brace_depth,

20994

_square_bracket_depth => $square_bracket_depth,

20995

_paren_depth => $paren_depth,

20996

_quote_character => '',

20997

};

20998

20999

# must print line unchanged if we are in a here document

21000

if ( $tokenizer_self->{_in_here_doc} ) {

21001

21002

$line_of_tokens->{_line_type} = 'HERE';

21003

my $here_doc_target = $tokenizer_self->{_here_doc_target};

21004

my $here_quote_character = $tokenizer_self->{_here_quote_character};

21005

my $candidate_target = $input_line;

21006

chomp $candidate_target;

21007

if ( $candidate_target eq $here_doc_target ) {

21008

$tokenizer_self->{_nearly_matched_here_target_at} = undef;

21009

$line_of_tokens->{_line_type} = 'HERE_END';

21010

write_logfile_entry("Exiting HERE document $here_doc_target\n");

21011

21012

my $rhere_target_list = $tokenizer_self->{_rhere_target_list};

21013

if (@$rhere_target_list) { # there can be multiple here targets

21014

( $here_doc_target, $here_quote_character ) =

21015

@{ shift @$rhere_target_list };

21016

$tokenizer_self->{_here_doc_target} = $here_doc_target;

21017

$tokenizer_self->{_here_quote_character} =

21018

$here_quote_character;

21019

write_logfile_entry(

21020

"Entering HERE document $here_doc_target\n");

21021

$tokenizer_self->{_nearly_matched_here_target_at} = undef;

21022

$tokenizer_self->{_started_looking_for_here_target_at} =

21023

$input_line_number;

21024

}

21025

else {

21026

$tokenizer_self->{_in_here_doc} = 0;

21027

$tokenizer_self->{_here_doc_target} = "";

21028

$tokenizer_self->{_here_quote_character} = "";

21029

}

21030

}

21031

21032

# check for error of extra whitespace

21033

# note for PERL6: leading whitespace is allowed

21034

else {

21035

$candidate_target =~ s/\s*$//;

21036

$candidate_target =~ s/^\s*//;

21037

if ( $candidate_target eq $here_doc_target ) {

21038

$tokenizer_self->{_nearly_matched_here_target_at} =

21039

$input_line_number;

21040

}

21041

}

21042

return $line_of_tokens;

21043

}

21044

21045

# must print line unchanged if we are in a format section

21046

elsif ( $tokenizer_self->{_in_format} ) {

21047

21048

if ( $input_line =~ /^\.[\s#]*$/ ) {

21049

write_logfile_entry("Exiting format section\n");

21050

$tokenizer_self->{_in_format} = 0;

21051

$line_of_tokens->{_line_type} = 'FORMAT_END';

21052

}

21053

else {

21054

$line_of_tokens->{_line_type} = 'FORMAT';

21055

}

21056

return $line_of_tokens;

21057

}

21058

21059

# must print line unchanged if we are in pod documentation

21060

elsif ( $tokenizer_self->{_in_pod} ) {

21061

21062

$line_of_tokens->{_line_type} = 'POD';

21063

if ( $input_line =~ /^=cut/ ) {

21064

$line_of_tokens->{_line_type} = 'POD_END';

21065

write_logfile_entry("Exiting POD section\n");

21066

$tokenizer_self->{_in_pod} = 0;

21067

}

21068

if ( $input_line =~ /^\#\!.*perl\b/ ) {

21069

warning(

21070

"Hash-bang in pod can cause older versions of perl to fail! \n"

21071

);

21072

}

21073

21074

return $line_of_tokens;

21075

}

21076

21077

# must print line unchanged if we have seen a severe error (i.e., we

21078

# are seeing illegal tokens and connot continue. Syntax errors do

21079

# not pass this route). Calling routine can decide what to do, but

21080

# the default can be to just pass all lines as if they were after __END__

21081

elsif ( $tokenizer_self->{_in_error} ) {

21082

$line_of_tokens->{_line_type} = 'ERROR';

21083

return $line_of_tokens;

21084

}

21085

21086

# print line unchanged if we are __DATA__ section

21087

elsif ( $tokenizer_self->{_in_data} ) {

21088

21089

# ...but look for POD

21090

# Note that the _in_data and _in_end flags remain set

21091

# so that we return to that state after seeing the

21092

# end of a pod section

21093

if ( $input_line =~ /^=(?!cut)/ ) {

21094

$line_of_tokens->{_line_type} = 'POD_START';

21095

write_logfile_entry("Entering POD section\n");

21096

$tokenizer_self->{_in_pod} = 1;

21097

return $line_of_tokens;

21098

}

21099

else {

21100

$line_of_tokens->{_line_type} = 'DATA';

21101

return $line_of_tokens;

21102

}

21103

}

21104

21105

# print line unchanged if we are in __END__ section

21106

elsif ( $tokenizer_self->{_in_end} ) {

21107

21108

# ...but look for POD

21109

# Note that the _in_data and _in_end flags remain set

21110

# so that we return to that state after seeing the

21111

# end of a pod section

21112

if ( $input_line =~ /^=(?!cut)/ ) {

21113

$line_of_tokens->{_line_type} = 'POD_START';

21114

write_logfile_entry("Entering POD section\n");

21115

$tokenizer_self->{_in_pod} = 1;

21116

return $line_of_tokens;

21117

}

21118

else {

21119

$line_of_tokens->{_line_type} = 'END';

21120

return $line_of_tokens;

21121

}

21122

}

21123

21124

# check for a hash-bang line if we haven't seen one

21125

if ( !$tokenizer_self->{_saw_hash_bang} ) {

21126

if ( $input_line =~ /^\#\!.*perl\b/ ) {

21127

$tokenizer_self->{_saw_hash_bang} = $input_line_number;

21128

21129

# check for -w and -P flags

21130

if ( $input_line =~ /^\#\!.*perl\s.*-.*P/ ) {

21131

$tokenizer_self->{_saw_perl_dash_P} = 1;

21132

}

21133

21134

if ( $input_line =~ /^\#\!.*perl\s.*-.*w/ ) {

21135

$tokenizer_self->{_saw_perl_dash_w} = 1;

21136

}

21137

21138

if ( ( $input_line_number > 1 )

21139

&& ( !$tokenizer_self->{_look_for_hash_bang} ) )

21140

{

21141

21142

# this is helpful for VMS systems; we may have accidentally

21143

# tokenized some DCL commands

21144

if ( $tokenizer_self->{_started_tokenizing} ) {

21145

warning(

21146

"There seems to be a hash-bang after line 1; do you need to run with -x ?\n"

21147

);

21148

}

21149

else {

21150

complain("Useless hash-bang after line 1\n");

21151

}

21152

}

21153

21154

# Report the leading hash-bang as a system line

21155

# This will prevent -dac from deleting it

21156

else {

21157

$line_of_tokens->{_line_type} = 'SYSTEM';

21158

return $line_of_tokens;

21159

}

21160

}

21161

}

21162

21163

# wait for a hash-bang before parsing if the user invoked us with -x

21164

if ( $tokenizer_self->{_look_for_hash_bang}

21165

&& !$tokenizer_self->{_saw_hash_bang} )

21166

{

21167

$line_of_tokens->{_line_type} = 'SYSTEM';

21168

return $line_of_tokens;

21169

}

21170

21171

# a first line of the form ': #' will be marked as SYSTEM

21172

# since lines of this form may be used by tcsh

21173

if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {

21174

$line_of_tokens->{_line_type} = 'SYSTEM';

21175

return $line_of_tokens;

21176

}

21177

21178

# now we know that it is ok to tokenize the line...

21179

# the line tokenizer will modify any of these private variables:

21180

# _rhere_target_list

21181

# _in_data

21182

# _in_end

21183

# _in_format

21184

# _in_error

21185

# _in_pod

21186

# _in_quote

21187

my $ending_in_quote_last = $tokenizer_self->{_in_quote};

21188

tokenize_this_line($line_of_tokens);

21189

21190

# Now finish defining the return structure and return it

21191

$line_of_tokens->{_ending_in_quote} = $tokenizer_self->{_in_quote};

21192

21193

# handle severe error (binary data in script)

21194

if ( $tokenizer_self->{_in_error} ) {

21195

$tokenizer_self->{_in_quote} = 0; # to avoid any more messages

21196

warning("Giving up after error\n");

21197

$line_of_tokens->{_line_type} = 'ERROR';

21198

reset_indentation_level(0); # avoid error messages

21199

return $line_of_tokens;

21200

}

21201

21202

# handle start of pod documentation

21203

if ( $tokenizer_self->{_in_pod} ) {

21204

21205

# This gets tricky..above a __DATA__ or __END__ section, perl

21206

# accepts '=cut' as the start of pod section. But afterwards,

21207

# only pod utilities see it and they may ignore an =cut without

21208

# leading =head. In any case, this isn't good.

21209

if ( $input_line =~ /^=cut\b/ ) {

21210

if ( $tokenizer_self->{_saw_data} || $tokenizer_self->{_saw_end} ) {

21211

complain("=cut while not in pod ignored\n");

21212

$tokenizer_self->{_in_pod} = 0;

21213

$line_of_tokens->{_line_type} = 'POD_END';

21214

}

21215

else {

21216

$line_of_tokens->{_line_type} = 'POD_START';

21217

complain(

21218

"=cut starts a pod section .. this can fool pod utilities.\n"

21219

);

21220

write_logfile_entry("Entering POD section\n");

21221

}

21222

}

21223

21224

else {

21225

$line_of_tokens->{_line_type} = 'POD_START';

21226

write_logfile_entry("Entering POD section\n");

21227

}

21228

21229

return $line_of_tokens;

21230

}

21231

21232

# update indentation levels for log messages

21233

if ( $input_line !~ /^\s*$/ ) {

21234

my $rlevels = $line_of_tokens->{_rlevels};

21235

my $structural_indentation_level = $$rlevels[0];

21236

my ( $python_indentation_level, $msg ) =

21237

find_indentation_level( $input_line, $structural_indentation_level );

21238

if ($msg) { write_logfile_entry("$msg") }

21239

if ( $tokenizer_self->{_know_input_tabstr} == 1 ) {

21240

$line_of_tokens->{_python_indentation_level} =

21241

$python_indentation_level;

21242

}

21243

}

21244

21245

# see if this line contains here doc targets

21246

my $rhere_target_list = $tokenizer_self->{_rhere_target_list};

21247

if (@$rhere_target_list) {

21248

21249

my ( $here_doc_target, $here_quote_character ) =

21250

@{ shift @$rhere_target_list };

21251

$tokenizer_self->{_in_here_doc} = 1;

21252

$tokenizer_self->{_here_doc_target} = $here_doc_target;

21253

$tokenizer_self->{_here_quote_character} = $here_quote_character;

21254

write_logfile_entry("Entering HERE document $here_doc_target\n");

21255

$tokenizer_self->{_started_looking_for_here_target_at} =

21256

$input_line_number;

21257

}

21258

21259

# NOTE: __END__ and __DATA__ statements are written unformatted

21260

# because they can theoretically contain additional characters

21261

# which are not tokenized (and cannot be read with <DATA> either!).

21262

if ( $tokenizer_self->{_in_data} ) {

21263

$line_of_tokens->{_line_type} = 'DATA_START';

21264

write_logfile_entry("Starting __DATA__ section\n");

21265

$tokenizer_self->{_saw_data} = 1;

21266

21267

# keep parsing after __DATA__ if use SelfLoader was seen

21268

if ( $tokenizer_self->{_saw_selfloader} ) {

21269

$tokenizer_self->{_in_data} = 0;

21270

write_logfile_entry(

21271

"SelfLoader seen, continuing; -nlsl deactivates\n");

21272

}

21273

21274

return $line_of_tokens;

21275

}

21276

21277

elsif ( $tokenizer_self->{_in_end} ) {

21278

$line_of_tokens->{_line_type} = 'END_START';

21279

write_logfile_entry("Starting __END__ section\n");

21280

$tokenizer_self->{_saw_end} = 1;

21281

21282

# keep parsing after __END__ if use AutoLoader was seen

21283

if ( $tokenizer_self->{_saw_autoloader} ) {

21284

$tokenizer_self->{_in_end} = 0;

21285

write_logfile_entry(

21286

"AutoLoader seen, continuing; -nlal deactivates\n");

21287

}

21288

return $line_of_tokens;

21289

}

21290

21291

# now, finally, we know that this line is type 'CODE'

21292

$line_of_tokens->{_line_type} = 'CODE';

21293

21294

# remember if we have seen any real code

21295

if ( !$tokenizer_self->{_started_tokenizing}

21296

&& $input_line !~ /^\s*$/

21297

&& $input_line !~ /^\s*#/ )

21298

{

21299

$tokenizer_self->{_started_tokenizing} = 1;

21300

}

21301

21302

if ( $tokenizer_self->{_debugger_object} ) {

21303

$tokenizer_self->{_debugger_object}->write_debug_entry($line_of_tokens);

21304

}

21305

21306

# Note: if keyword 'format' occurs in this line code, it is still CODE

21307

# (keyword 'format' need not start a line)

21308

if ( $tokenizer_self->{_in_format} ) {

21309

write_logfile_entry("Entering format section\n");

21310

}

21311

21312

if ( $tokenizer_self->{_in_quote}

21313

and ( $tokenizer_self->{_line_start_quote} < 0 ) )

21314

{

21315

21316

#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {

21317

if (

21318

( my $quote_target = $tokenizer_self->{_quote_target} ) !~ /^\s*$/ )

21319

{

21320

$tokenizer_self->{_line_start_quote} = $input_line_number;

21321

write_logfile_entry(

21322

"Start multi-line quote or pattern ending in $quote_target\n");

21323

}

21324

}

21325

elsif ( ( $tokenizer_self->{_line_start_quote} >= 0 )

21326

and !$tokenizer_self->{_in_quote} )

21327

{

21328

$tokenizer_self->{_line_start_quote} = -1;

21329

write_logfile_entry("End of multi-line quote or pattern\n");

21330

}

21331

21332

# we are returning a line of CODE

21333

return $line_of_tokens;

21334

}

21335

21336

sub find_starting_indentation_level {

21337

21338

# USES GLOBAL VARIABLES: $tokenizer_self

21339

my $starting_level = 0;

21340

my $know_input_tabstr = -1; # flag for find_indentation_level

21341

21342

# use value if given as parameter

21343

if ( $tokenizer_self->{_know_starting_level} ) {

21344

$starting_level = $tokenizer_self->{_starting_level};

21345

}

21346

21347

# if we know there is a hash_bang line, the level must be zero

21348

elsif ( $tokenizer_self->{_look_for_hash_bang} ) {

21349

$tokenizer_self->{_know_starting_level} = 1;

21350

}

21351

21352

# otherwise figure it out from the input file

21353

else {

21354

my $line;

21355

my $i = 0;

21356

my $structural_indentation_level = -1; # flag for find_indentation_level

21357

21358

my $msg = "";

21359

while ( $line =

21360

$tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )

21361

{

21362

21363

# if first line is #! then assume starting level is zero

21364

if ( $i == 1 && $line =~ /^\#\!/ ) {

21365

$starting_level = 0;

21366

last;

21367

}

21368

next if ( $line =~ /^\s*#/ ); # must not be comment

21369

next if ( $line =~ /^\s*$/ ); # must not be blank

21370

( $starting_level, $msg ) =

21371

find_indentation_level( $line, $structural_indentation_level );

21372

if ($msg) { write_logfile_entry("$msg") }

21373

last;

21374

}

21375

$msg = "Line $i implies starting-indentation-level = $starting_level\n";

21376

21377

if ( $starting_level > 0 ) {

21378

21379

my $input_tabstr = $tokenizer_self->{_input_tabstr};

21380

if ( $input_tabstr eq "\t" ) {

21381

$msg .= "by guessing input tabbing uses 1 tab per level\n";

21382

}

21383

else {

21384

my $cols = length($input_tabstr);

21385

$msg .=

21386

"by guessing input tabbing uses $cols blanks per level\n";

21387

}

21388

}

21389

write_logfile_entry("$msg");

21390

}

21391

$tokenizer_self->{_starting_level} = $starting_level;

21392

reset_indentation_level($starting_level);

21393

}

21394

21395

# Find indentation level given a input line. At the same time, try to

21396

# figure out the input tabbing scheme.

21397

#

21398

# There are two types of calls:

21399

#

21400

# Type 1: $structural_indentation_level < 0

21401

# In this case we have to guess $input_tabstr to figure out the level.

21402

#

21403

# Type 2: $structural_indentation_level >= 0

21404

# In this case the level of this line is known, and this routine can

21405

# update the tabbing string, if still unknown, to make the level correct.

21406

21407

sub find_indentation_level {

21408

my ( $line, $structural_indentation_level ) = @_;

21409

21410

# USES GLOBAL VARIABLES: $tokenizer_self

21411

my $level = 0;

21412

my $msg = "";

21413

21414

my $know_input_tabstr = $tokenizer_self->{_know_input_tabstr};

21415

my $input_tabstr = $tokenizer_self->{_input_tabstr};

21416

21417

# find leading whitespace

21418

my $leading_whitespace = ( $line =~ /^(\s*)/ ) ? $1 : "";

21419

21420

# make first guess at input tabbing scheme if necessary

21421

if ( $know_input_tabstr < 0 ) {

21422

21423

$know_input_tabstr = 0;

21424

21425

if ( $tokenizer_self->{_tabs} ) {

21426

$input_tabstr = "\t";

21427

if ( length($leading_whitespace) > 0 ) {

21428

if ( $leading_whitespace !~ /\t/ ) {

21429

21430

my $cols = $tokenizer_self->{_indent_columns};

21431

21432

if ( length($leading_whitespace) < $cols ) {

21433

$cols = length($leading_whitespace);

21434

}

21435

$input_tabstr = " " x $cols;

21436

}

21437

}

21438

}

21439

else {

21440

$input_tabstr = " " x $tokenizer_self->{_indent_columns};

21441

21442

if ( length($leading_whitespace) > 0 ) {

21443

if ( $leading_whitespace =~ /^\t/ ) {

21444

$input_tabstr = "\t";

21445

}

21446

}

21447

}

21448

$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;

21449

$tokenizer_self->{_input_tabstr} = $input_tabstr;

21450

}

21451

21452

# determine the input tabbing scheme if possible

21453

if ( ( $know_input_tabstr == 0 )

21454

&& ( length($leading_whitespace) > 0 )

21455

&& ( $structural_indentation_level > 0 ) )

21456

{

21457

my $saved_input_tabstr = $input_tabstr;

21458

21459

# check for common case of one tab per indentation level

21460

if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {

21461

if ( $leading_whitespace eq "\t" x $structural_indentation_level ) {

21462

$input_tabstr = "\t";

21463

$msg = "Guessing old indentation was tab character\n";

21464

}

21465

}

21466

21467

else {

21468

21469

# detab any tabs based on 8 blanks per tab

21470

my $entabbed = "";

21471

if ( $leading_whitespace =~ s/^\t+/ /g ) {

21472

$entabbed = "entabbed";

21473

}

21474

21475

# now compute tabbing from number of spaces

21476

my $columns =

21477

length($leading_whitespace) / $structural_indentation_level;

21478

if ( $columns == int $columns ) {

21479

$msg =

21480

"Guessing old indentation was $columns $entabbed spaces\n";

21481

}

21482

else {

21483

$columns = int $columns;

21484

$msg =

21485

"old indentation is unclear, using $columns $entabbed spaces\n";

21486

}

21487

$input_tabstr = " " x $columns;

21488

}

21489

$know_input_tabstr = 1;

21490

$tokenizer_self->{_know_input_tabstr} = $know_input_tabstr;

21491

$tokenizer_self->{_input_tabstr} = $input_tabstr;

21492

21493

# see if mistakes were made

21494

if ( ( $tokenizer_self->{_starting_level} > 0 )

21495

&& !$tokenizer_self->{_know_starting_level} )

21496

{

21497

21498

if ( $input_tabstr ne $saved_input_tabstr ) {

21499

complain(

21500

"I made a bad starting level guess; rerun with a value for -sil \n"

21501

);

21502

}

21503

}

21504

}

21505

21506

# use current guess at input tabbing to get input indentation level

21507

#

21508

# Patch to handle a common case of entabbed leading whitespace

21509

# If the leading whitespace equals 4 spaces and we also have

21510

# tabs, detab the input whitespace assuming 8 spaces per tab.

21511

if ( length($input_tabstr) == 4 ) {

21512

$leading_whitespace =~ s/^\t+/ /g;

21513

}

21514

21515

if ( ( my $len_tab = length($input_tabstr) ) > 0 ) {

21516

my $pos = 0;

21517

21518

while ( substr( $leading_whitespace, $pos, $len_tab ) eq $input_tabstr )

21519

{

21520

$pos += $len_tab;

21521

$level++;

21522

}

21523

}

21524

return ( $level, $msg );

21525

}

21526

21527

# This is a currently unused debug routine

21528

sub dump_functions {

21529

21530

my $fh = *STDOUT;

21531

my ( $pkg, $sub );

21532

foreach $pkg ( keys %is_user_function ) {

21533

print $fh "\nnon-constant subs in package $pkg\n";

21534

21535

foreach $sub ( keys %{ $is_user_function{$pkg} } ) {

21536

my $msg = "";

21537

if ( $is_block_list_function{$pkg}{$sub} ) {

21538

$msg = 'block_list';

21539

}

21540

21541

if ( $is_block_function{$pkg}{$sub} ) {

21542

$msg = 'block';

21543

}

21544

print $fh "$sub $msg\n";

21545

}

21546

}

21547

21548

foreach $pkg ( keys %is_constant ) {

21549

print $fh "\nconstants and constant subs in package $pkg\n";

21550

21551

foreach $sub ( keys %{ $is_constant{$pkg} } ) {

21552

print $fh "$sub\n";

21553

}

21554

}

21555

}

21556

21557

sub ones_count {

21558

21559

# count number of 1's in a string of 1's and 0's

21560

# example: ones_count("010101010101") gives 6

21561

return ( my $cis = $_[0] ) =~ tr/1/0/;

21562

}

21563

21564

sub prepare_for_a_new_file {

21565

21566

# previous tokens needed to determine what to expect next

21567

$last_nonblank_token = ';'; # the only possible starting state which

21568

$last_nonblank_type = ';'; # will make a leading brace a code block

21569

$last_nonblank_block_type = '';

21570

21571

# scalars for remembering statement types across multiple lines

21572

$statement_type = ''; # '' or 'use' or 'sub..' or 'case..'

21573

$in_attribute_list = 0;

21574

21575

# scalars for remembering where we are in the file

21576

$current_package = "main";

21577

$context = UNKNOWN_CONTEXT;

21578

21579

# hashes used to remember function information

21580

%is_constant = (); # user-defined constants

21581

%is_user_function = (); # user-defined functions

21582

%user_function_prototype = (); # their prototypes

21583

%is_block_function = ();

21584

%is_block_list_function = ();

21585

%saw_function_definition = ();

21586

21587

# variables used to track depths of various containers

21588

# and report nesting errors

21589

$paren_depth = 0;

21590

$brace_depth = 0;

21591

$square_bracket_depth = 0;

21592

@current_depth[ 0 .. $#closing_brace_names ] =

21593

(0) x scalar @closing_brace_names;

21594

$total_depth = 0;

21595

@total_depth = ();

21596

@nesting_sequence_number[ 0 .. $#closing_brace_names ] =

21597

( 0 .. $#closing_brace_names );

21598

@current_sequence_number = ();

21599

$paren_type[$paren_depth] = '';

21600

$paren_semicolon_count[$paren_depth] = 0;

21601

$paren_structural_type[$brace_depth] = '';

21602

$brace_type[$brace_depth] = ';'; # identify opening brace as code block

21603

$brace_structural_type[$brace_depth] = '';

21604

$brace_statement_type[$brace_depth] = "";

21605

$brace_context[$brace_depth] = UNKNOWN_CONTEXT;

21606

$brace_package[$paren_depth] = $current_package;

21607

$square_bracket_type[$square_bracket_depth] = '';

21608

$square_bracket_structural_type[$square_bracket_depth] = '';

21609

21610

initialize_tokenizer_state();

21611

}

21612

21613

{ # begin tokenize_this_line

21614

21615

use constant BRACE => 0;

21616

use constant SQUARE_BRACKET => 1;

21617

use constant PAREN => 2;

21618

use constant QUESTION_COLON => 3;

21619

21620

# TV1: scalars for processing one LINE.

21621

# Re-initialized on each entry to sub tokenize_this_line.

21622

my (

21623

$block_type, $container_type, $expecting,

21624

$i, $i_tok, $input_line,

21625

$input_line_number, $last_nonblank_i, $max_token_index,

21626

$next_tok, $next_type, $peeked_ahead,

21627

$prototype, $rhere_target_list, $rtoken_map,

21628

$rtoken_type, $rtokens, $tok,

21629

$type, $type_sequence, $indent_flag,

21630

);

21631

21632

# TV2: refs to ARRAYS for processing one LINE

21633

# Re-initialized on each call.

21634

my $routput_token_list = []; # stack of output token indexes

21635

my $routput_token_type = []; # token types

21636

my $routput_block_type = []; # types of code block

21637

my $routput_container_type = []; # paren types, such as if, elsif, ..

21638

my $routput_type_sequence = []; # nesting sequential number

21639

my $routput_indent_flag = []; #

21640

21641

# TV3: SCALARS for quote variables. These are initialized with a

21642

# subroutine call and continually updated as lines are processed.

21643

my ( $in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,

21644

$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers, );

21645

21646

# TV4: SCALARS for multi-line identifiers and

21647

# statements. These are initialized with a subroutine call

21648

# and continually updated as lines are processed.

21649

my ( $id_scan_state, $identifier, $want_paren, $indented_if_level );

21650

21651

# TV5: SCALARS for tracking indentation level.

21652

# Initialized once and continually updated as lines are

21653

# processed.

21654

my (

21655

$nesting_token_string, $nesting_type_string,

21656

$nesting_block_string, $nesting_block_flag,

21657

$nesting_list_string, $nesting_list_flag,

21658

$ci_string_in_tokenizer, $continuation_string_in_tokenizer,

21659

$in_statement_continuation, $level_in_tokenizer,

21660

$slevel_in_tokenizer, $rslevel_stack,

21661

);

21662

21663

# TV6: SCALARS for remembering several previous

21664

# tokens. Initialized once and continually updated as

21665

# lines are processed.

21666

my (

21667

$last_nonblank_container_type, $last_nonblank_type_sequence,

21668

$last_last_nonblank_token, $last_last_nonblank_type,

21669

$last_last_nonblank_block_type, $last_last_nonblank_container_type,

21670

$last_last_nonblank_type_sequence, $last_nonblank_prototype,

21671

);

21672

21673

# ----------------------------------------------------------------

21674

# beginning of tokenizer variable access and manipulation routines

21675

# ----------------------------------------------------------------

21676

21677

sub initialize_tokenizer_state {

21678

21679

# TV1: initialized on each call

21680

# TV2: initialized on each call

21681

# TV3:

21682

$in_quote = 0;

21683

$quote_type = 'Q';

21684

$quote_character = "";

21685

$quote_pos = 0;

21686

$quote_depth = 0;

21687

$quoted_string_1 = "";

21688

$quoted_string_2 = "";

21689

$allowed_quote_modifiers = "";

21690

21691

# TV4:

21692

$id_scan_state = '';

21693

$identifier = '';

21694

$want_paren = "";

21695

$indented_if_level = 0;

21696

21697

# TV5:

21698

$nesting_token_string = "";

21699

$nesting_type_string = "";

21700

$nesting_block_string = '1'; # initially in a block

21701

$nesting_block_flag = 1;

21702

$nesting_list_string = '0'; # initially not in a list

21703

$nesting_list_flag = 0; # initially not in a list

21704

$ci_string_in_tokenizer = "";

21705

$continuation_string_in_tokenizer = "0";

21706

$in_statement_continuation = 0;

21707

$level_in_tokenizer = 0;

21708

$slevel_in_tokenizer = 0;

21709

$rslevel_stack = [];

21710

21711

# TV6:

21712

$last_nonblank_container_type = '';

21713

$last_nonblank_type_sequence = '';

21714

$last_last_nonblank_token = ';';

21715

$last_last_nonblank_type = ';';

21716

$last_last_nonblank_block_type = '';

21717

$last_last_nonblank_container_type = '';

21718

$last_last_nonblank_type_sequence = '';

21719

$last_nonblank_prototype = "";

21720

}

21721

21722

sub save_tokenizer_state {

21723

21724

my $rTV1 = [

21725

$block_type, $container_type, $expecting,

21726

$i, $i_tok, $input_line,

21727

$input_line_number, $last_nonblank_i, $max_token_index,

21728

$next_tok, $next_type, $peeked_ahead,

21729

$prototype, $rhere_target_list, $rtoken_map,

21730

$rtoken_type, $rtokens, $tok,

21731

$type, $type_sequence, $indent_flag,

21732

];

21733

21734

my $rTV2 = [

21735

$routput_token_list, $routput_token_type,

21736

$routput_block_type, $routput_container_type,

21737

$routput_type_sequence, $routput_indent_flag,

21738

];

21739

21740

my $rTV3 = [

21741

$in_quote, $quote_type,

21742

$quote_character, $quote_pos,

21743

$quote_depth, $quoted_string_1,

21744

$quoted_string_2, $allowed_quote_modifiers,

21745

];

21746

21747

my $rTV4 =

21748

[ $id_scan_state, $identifier, $want_paren, $indented_if_level ];

21749

21750

my $rTV5 = [

21751

$nesting_token_string, $nesting_type_string,

21752

$nesting_block_string, $nesting_block_flag,

21753

$nesting_list_string, $nesting_list_flag,

21754

$ci_string_in_tokenizer, $continuation_string_in_tokenizer,

21755

$in_statement_continuation, $level_in_tokenizer,

21756

$slevel_in_tokenizer, $rslevel_stack,

21757

];

21758

21759

my $rTV6 = [

21760

$last_nonblank_container_type,

21761

$last_nonblank_type_sequence,

21762

$last_last_nonblank_token,

21763

$last_last_nonblank_type,

21764

$last_last_nonblank_block_type,

21765

$last_last_nonblank_container_type,

21766

$last_last_nonblank_type_sequence,

21767

$last_nonblank_prototype,

21768

];

21769

return [ $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ];

21770

}

21771

21772

sub restore_tokenizer_state {

21773

my ($rstate) = @_;

21774

my ( $rTV1, $rTV2, $rTV3, $rTV4, $rTV5, $rTV6 ) = @{$rstate};

21775

(

21776

$block_type, $container_type, $expecting,

21777

$i, $i_tok, $input_line,

21778

$input_line_number, $last_nonblank_i, $max_token_index,

21779

$next_tok, $next_type, $peeked_ahead,

21780

$prototype, $rhere_target_list, $rtoken_map,

21781

$rtoken_type, $rtokens, $tok,

21782

$type, $type_sequence, $indent_flag,

21783

) = @{$rTV1};

21784

21785

(

21786

$routput_token_list, $routput_token_type,

21787

$routput_block_type, $routput_container_type,

21788

$routput_type_sequence, $routput_type_sequence,

21789

) = @{$rTV2};

21790

21791

(

21792

$in_quote, $quote_type, $quote_character, $quote_pos, $quote_depth,

21793

$quoted_string_1, $quoted_string_2, $allowed_quote_modifiers,

21794

) = @{$rTV3};

21795

21796

( $id_scan_state, $identifier, $want_paren, $indented_if_level ) =

21797

@{$rTV4};

21798

21799

(

21800

$nesting_token_string, $nesting_type_string,

21801

$nesting_block_string, $nesting_block_flag,

21802

$nesting_list_string, $nesting_list_flag,

21803

$ci_string_in_tokenizer, $continuation_string_in_tokenizer,

21804

$in_statement_continuation, $level_in_tokenizer,

21805

$slevel_in_tokenizer, $rslevel_stack,

21806

) = @{$rTV5};

21807

21808

(

21809

$last_nonblank_container_type,

21810

$last_nonblank_type_sequence,

21811

$last_last_nonblank_token,

21812

$last_last_nonblank_type,

21813

$last_last_nonblank_block_type,

21814

$last_last_nonblank_container_type,

21815

$last_last_nonblank_type_sequence,

21816

$last_nonblank_prototype,

21817

) = @{$rTV6};

21818

}

21819

21820

sub get_indentation_level {

21821

21822

# patch to avoid reporting error if indented if is not terminated

21823

if ($indented_if_level) { return $level_in_tokenizer - 1 }

21824

return $level_in_tokenizer;

21825

}

21826

21827

sub reset_indentation_level {

21828

$level_in_tokenizer = $_[0];

21829

$slevel_in_tokenizer = $_[0];

21830

push @{$rslevel_stack}, $slevel_in_tokenizer;

21831

}

21832

21833

sub peeked_ahead {

21834

$peeked_ahead = defined( $_[0] ) ? $_[0] : $peeked_ahead;

21835

}

21836

21837

# ------------------------------------------------------------

21838

# end of tokenizer variable access and manipulation routines

21839

# ------------------------------------------------------------

21840

21841

# ------------------------------------------------------------

21842

# beginning of various scanner interface routines

21843

# ------------------------------------------------------------

21844

sub scan_replacement_text {

21845

21846

# check for here-docs in replacement text invoked by

21847

# a substitution operator with executable modifier 'e'.

21848

#

21849

# given:

21850

# $replacement_text

21851

# return:

21852

# $rht = reference to any here-doc targets

21853

my ($replacement_text) = @_;

21854

21855

# quick check

21856

return undef unless ( $replacement_text =~ /<</ );

21857

21858

write_logfile_entry("scanning replacement text for here-doc targets\n");

21859

21860

# save the logger object for error messages

21861

my $logger_object = $tokenizer_self->{_logger_object};

21862

21863

# localize all package variables

21864

local (

21865

$tokenizer_self, $last_nonblank_token,

21866

$last_nonblank_type, $last_nonblank_block_type,

21867

$statement_type, $in_attribute_list,

21868

$current_package, $context,

21869

%is_constant, %is_user_function,

21870

%user_function_prototype, %is_block_function,

21871

%is_block_list_function, %saw_function_definition,

21872

$brace_depth, $paren_depth,

21873

$square_bracket_depth, @current_depth,

21874

@total_depth, $total_depth,

21875

@nesting_sequence_number, @current_sequence_number,

21876

@paren_type, @paren_semicolon_count,

21877

@paren_structural_type, @brace_type,

21878

@brace_structural_type, @brace_statement_type,

21879

@brace_context, @brace_package,

21880

@square_bracket_type, @square_bracket_structural_type,

21881

@depth_array, @starting_line_of_current_depth,

21882

@nested_ternary_flag,

21883

);

21884

21885

# save all lexical variables

21886

my $rstate = save_tokenizer_state();

21887

_decrement_count(); # avoid error check for multiple tokenizers

21888

21889

# make a new tokenizer

21890

my $rOpts = {};

21891

my $rpending_logfile_message;

21892

my $source_object =

21893

Perl::Tidy::LineSource->new( \$replacement_text, $rOpts,

21894

$rpending_logfile_message );

21895

my $tokenizer = Perl::Tidy::Tokenizer->new(

21896

source_object => $source_object,

21897

logger_object => $logger_object,

21898

starting_line_number => $input_line_number,

21899

);

21900

21901

# scan the replacement text

21902

1 while ( $tokenizer->get_line() );

21903

21904

# remove any here doc targets

21905

my $rht = undef;

21906

if ( $tokenizer_self->{_in_here_doc} ) {

21907

$rht = [];

21908

push @{$rht},

21909

[

21910

$tokenizer_self->{_here_doc_target},

21911

$tokenizer_self->{_here_quote_character}

21912

];

21913

if ( $tokenizer_self->{_rhere_target_list} ) {

21914

push @{$rht}, @{ $tokenizer_self->{_rhere_target_list} };

21915

$tokenizer_self->{_rhere_target_list} = undef;

21916

}

21917

$tokenizer_self->{_in_here_doc} = undef;

21918

}

21919

21920

# now its safe to report errors

21921

$tokenizer->report_tokenization_errors();

21922

21923

# restore all tokenizer lexical variables

21924

restore_tokenizer_state($rstate);

21925

21926

# return the here doc targets

21927

return $rht;

21928

}

21929

21930

sub scan_bare_identifier {

21931

( $i, $tok, $type, $prototype ) =

21932

scan_bare_identifier_do( $input_line, $i, $tok, $type, $prototype,

21933

$rtoken_map, $max_token_index );

21934

}

21935

21936

sub scan_identifier {

21937

( $i, $tok, $type, $id_scan_state, $identifier ) =

21938

scan_identifier_do( $i, $id_scan_state, $identifier, $rtokens,

21939

$max_token_index, $expecting );

21940

}

21941

21942

sub scan_id {

21943

( $i, $tok, $type, $id_scan_state ) =

21944

scan_id_do( $input_line, $i, $tok, $rtokens, $rtoken_map,

21945

$id_scan_state, $max_token_index );

21946

}

21947

21948

sub scan_number {

21949

my $number;

21950

( $i, $type, $number ) =

21951

scan_number_do( $input_line, $i, $rtoken_map, $type,

21952

$max_token_index );

21953

return $number;

21954

}

21955

21956

# a sub to warn if token found where term expected

21957

sub error_if_expecting_TERM {

21958

if ( $expecting == TERM ) {

21959

if ( $really_want_term{$last_nonblank_type} ) {

21960

unexpected( $tok, "term", $i_tok, $last_nonblank_i, $rtoken_map,

21961

$rtoken_type, $input_line );

21962

1;

21963

}

21964

}

21965

}

21966

21967

# a sub to warn if token found where operator expected

21968

sub error_if_expecting_OPERATOR {

21969

if ( $expecting == OPERATOR ) {

21970

my $thing = defined $_[0] ? $_[0] : $tok;

21971

unexpected( $thing, "operator", $i_tok, $last_nonblank_i,

21972

$rtoken_map, $rtoken_type, $input_line );

21973

if ( $i_tok == 0 ) {

21974

interrupt_logfile();

21975

warning("Missing ';' above?\n");

21976

resume_logfile();

21977

}

21978

1;

21979

}

21980

}

21981

21982

# ------------------------------------------------------------

21983

# end scanner interfaces

21984

# ------------------------------------------------------------

21985

21986

my %is_for_foreach;

21987

@_ = qw(for foreach);

21988

@is_for_foreach{@_} = (1) x scalar(@_);

21989

21990

my %is_my_our;

21991

@_ = qw(my our);

21992

@is_my_our{@_} = (1) x scalar(@_);

21993

21994

# These keywords may introduce blocks after parenthesized expressions,

21995

# in the form:

21996

# keyword ( .... ) { BLOCK }

21997

# patch for SWITCH/CASE: added 'switch' 'case' 'given' 'when'

21998

my %is_blocktype_with_paren;

21999

@_ = qw(if elsif unless while until for foreach switch case given when);

22000

@is_blocktype_with_paren{@_} = (1) x scalar(@_);

22001

22002

# ------------------------------------------------------------

22003

# begin hash of code for handling most token types

22004

# ------------------------------------------------------------

22005

my $tokenization_code = {

22006

22007

# no special code for these types yet, but syntax checks

22008

# could be added

22009

22010

## '!' => undef,

22011

## '!=' => undef,

22012

## '!~' => undef,

22013

## '%=' => undef,

22014

## '&&=' => undef,

22015

## '&=' => undef,

22016

## '+=' => undef,

22017

## '-=' => undef,

22018

## '..' => undef,

22019

## '..' => undef,

22020

## '...' => undef,

22021

## '.=' => undef,

22022

## '<<=' => undef,

22023

## '<=' => undef,

22024

## '<=>' => undef,

22025

## '<>' => undef,

22026

## '=' => undef,

22027

## '==' => undef,

22028

## '=~' => undef,

22029

## '>=' => undef,

22030

## '>>' => undef,

22031

## '>>=' => undef,

22032

## '\\' => undef,

22033

## '^=' => undef,

22034

## '|=' => undef,

22035

## '||=' => undef,

22036

## '//=' => undef,

22037

## '~' => undef,

22038

## '~~' => undef,

22039

## '!~~' => undef,

22040

22041

'>' => sub {

22042

error_if_expecting_TERM()

22043

if ( $expecting == TERM );

22044

},

22045

'|' => sub {

22046

error_if_expecting_TERM()

22047

if ( $expecting == TERM );

22048

},

22049

'$' => sub {

22050

22051

# start looking for a scalar

22052

error_if_expecting_OPERATOR("Scalar")

22053

if ( $expecting == OPERATOR );

22054

scan_identifier();

22055

22056

if ( $identifier eq '$^W' ) {

22057

$tokenizer_self->{_saw_perl_dash_w} = 1;

22058

}

22059

22060

# Check for indentifier in indirect object slot

22061

# (vorboard.pl, sort.t). Something like:

22062

# /^(print|printf|sort|exec|system)$/

22063

if (

22064

$is_indirect_object_taker{$last_nonblank_token}

22065

22066

|| ( ( $last_nonblank_token eq '(' )

22067

&& $is_indirect_object_taker{ $paren_type[$paren_depth] } )

22068

|| ( $last_nonblank_type =~ /^[Uw]$/ ) # possible object

22069

)

22070

{

22071

$type = 'Z';

22072

}

22073

},

22074

'(' => sub {

22075

22076

++$paren_depth;

22077

$paren_semicolon_count[$paren_depth] = 0;

22078

if ($want_paren) {

22079

$container_type = $want_paren;

22080

$want_paren = "";

22081

}

22082

else {

22083

$container_type = $last_nonblank_token;

22084

22085

# We can check for a syntax error here of unexpected '(',

22086

# but this is going to get messy...

22087

if (

22088

$expecting == OPERATOR

22089

22090

# be sure this is not a method call of the form

22091

# &method(...), $method->(..), &{method}(...),

22092

# $ref[2](list) is ok & short for $ref[2]->(list)

22093

# NOTE: at present, braces in something like &{ xxx }

22094

# are not marked as a block, we might have a method call

22095

&& $last_nonblank_token !~ /^([\]\}\&]|\-\>)/

22096

22097

)

22098

{

22099

22100

# ref: camel 3 p 703.

22101

if ( $last_last_nonblank_token eq 'do' ) {

22102

complain(

22103

"do SUBROUTINE is deprecated; consider & or -> notation\n"

22104

);

22105

}

22106

else {

22107

22108

# if this is an empty list, (), then it is not an

22109

# error; for example, we might have a constant pi and

22110

# invoke it with pi() or just pi;

22111

my ( $next_nonblank_token, $i_next ) =

22112

find_next_nonblank_token( $i, $rtokens,

22113

$max_token_index );

22114

if ( $next_nonblank_token ne ')' ) {

22115

my $hint;

22116

error_if_expecting_OPERATOR('(');

22117

22118

if ( $last_nonblank_type eq 'C' ) {

22119

$hint =

22120

"$last_nonblank_token has a void prototype\n";

22121

}

22122

elsif ( $last_nonblank_type eq 'i' ) {

22123

if ( $i_tok > 0

22124

&& $last_nonblank_token =~ /^\$/ )

22125

{

22126

$hint =

22127

"Do you mean '$last_nonblank_token->(' ?\n";

22128

}

22129

}

22130

if ($hint) {

22131

interrupt_logfile();

22132

warning($hint);

22133

resume_logfile();

22134

}

22135

} ## end if ( $next_nonblank_token...

22136

} ## end else [ if ( $last_last_nonblank_token...

22137

} ## end if ( $expecting == OPERATOR...

22138

}

22139

$paren_type[$paren_depth] = $container_type;

22140

( $type_sequence, $indent_flag ) =

22141

increase_nesting_depth( PAREN, $$rtoken_map[$i_tok] );

22142

22143

# propagate types down through nested parens

22144

# for example: the second paren in 'if ((' would be structural

22145

# since the first is.

22146

22147

if ( $last_nonblank_token eq '(' ) {

22148

$type = $last_nonblank_type;

22149

}

22150

22151

# We exclude parens as structural after a ',' because it

22152

# causes subtle problems with continuation indentation for

22153

# something like this, where the first 'or' will not get

22154

# indented.

22155

#

22156

# assert(

22157

# __LINE__,

22158

# ( not defined $check )

22159

# or ref $check

22160

# or $check eq "new"

22161

# or $check eq "old",

22162

# );

22163

#

22164

# Likewise, we exclude parens where a statement can start

22165

# because of problems with continuation indentation, like

22166

# these:

22167

#

22168

# ($firstline =~ /^#\!.*perl/)

22169

# and (print $File::Find::name, "\n")

22170

# and (return 1);

22171

#

22172

# (ref($usage_fref) =~ /CODE/)

22173

# ? &$usage_fref

22174

# : (&blast_usage, &blast_params, &blast_general_params);

22175

22176

else {

22177

$type = '{';

22178

}

22179

22180

if ( $last_nonblank_type eq ')' ) {

22181

warning(

22182

"Syntax error? found token '$last_nonblank_type' then '('\n"

22183

);

22184

}

22185

$paren_structural_type[$paren_depth] = $type;

22186

22187

},

22188

')' => sub {

22189

( $type_sequence, $indent_flag ) =

22190

decrease_nesting_depth( PAREN, $$rtoken_map[$i_tok] );

22191

22192

if ( $paren_structural_type[$paren_depth] eq '{' ) {

22193

$type = '}';

22194

}

22195

22196

$container_type = $paren_type[$paren_depth];

22197

22198

# /^(for|foreach)$/

22199

if ( $is_for_foreach{ $paren_type[$paren_depth] } ) {

22200

my $num_sc = $paren_semicolon_count[$paren_depth];

22201

if ( $num_sc > 0 && $num_sc != 2 ) {

22202

warning("Expected 2 ';' in 'for(;;)' but saw $num_sc\n");

22203

}

22204

}

22205

22206

if ( $paren_depth > 0 ) { $paren_depth-- }

22207

},

22208

',' => sub {

22209

if ( $last_nonblank_type eq ',' ) {

22210

complain("Repeated ','s \n");

22211

}

22212

22213

# patch for operator_expected: note if we are in the list (use.t)

22214

if ( $statement_type eq 'use' ) { $statement_type = '_use' }

22215

## FIXME: need to move this elsewhere, perhaps check after a '('

22216

## elsif ($last_nonblank_token eq '(') {

22217

## warning("Leading ','s illegal in some versions of perl\n");

22218

## }

22219

},

22220

';' => sub {

22221

$context = UNKNOWN_CONTEXT;

22222

$statement_type = '';

22223

22224

# /^(for|foreach)$/

22225

if ( $is_for_foreach{ $paren_type[$paren_depth] } )

22226

{ # mark ; in for loop

22227

22228

# Be careful: we do not want a semicolon such as the

22229

# following to be included:

22230

#

22231

# for (sort {strcoll($a,$b);} keys %investments) {

22232

22233

if ( $brace_depth == $depth_array[PAREN][BRACE][$paren_depth]

22234

&& $square_bracket_depth ==

22235

$depth_array[PAREN][SQUARE_BRACKET][$paren_depth] )

22236

{

22237

22238

$type = 'f';

22239

$paren_semicolon_count[$paren_depth]++;

22240

}

22241

}

22242

22243

},

22244

'"' => sub {

22245

error_if_expecting_OPERATOR("String")

22246

if ( $expecting == OPERATOR );

22247

$in_quote = 1;

22248

$type = 'Q';

22249

$allowed_quote_modifiers = "";

22250

},

22251

"'" => sub {

22252

error_if_expecting_OPERATOR("String")

22253

if ( $expecting == OPERATOR );

22254

$in_quote = 1;

22255

$type = 'Q';

22256

$allowed_quote_modifiers = "";

22257

},

22258

'`' => sub {

22259

error_if_expecting_OPERATOR("String")

22260

if ( $expecting == OPERATOR );

22261

$in_quote = 1;

22262

$type = 'Q';

22263

$allowed_quote_modifiers = "";

22264

},

22265

'/' => sub {

22266

my $is_pattern;

22267

22268

if ( $expecting == UNKNOWN ) { # indeterminte, must guess..

22269

my $msg;

22270

( $is_pattern, $msg ) =

22271

guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,

22272

$max_token_index );

22273

22274

if ($msg) {

22275

write_diagnostics("DIVIDE:$msg\n");

22276

write_logfile_entry($msg);

22277

}

22278

}

22279

else { $is_pattern = ( $expecting == TERM ) }

22280

22281

if ($is_pattern) {

22282

$in_quote = 1;

22283

$type = 'Q';

22284

$allowed_quote_modifiers = '[cgimosxp]';

22285

}

22286

else { # not a pattern; check for a /= token

22287

22288

if ( $$rtokens[ $i + 1 ] eq '=' ) { # form token /=

22289

$i++;

22290

$tok = '/=';

22291

$type = $tok;

22292

}

22293

22294

#DEBUG - collecting info on what tokens follow a divide

22295

# for development of guessing algorithm

22296

#if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) {

22297

# #write_diagnostics( "DIVIDE? $input_line\n" );

22298

#}

22299

}

22300

},

22301

'{' => sub {

22302

22303

# if we just saw a ')', we will label this block with

22304

# its type. We need to do this to allow sub

22305

# code_block_type to determine if this brace starts a

22306

# code block or anonymous hash. (The type of a paren

22307

# pair is the preceding token, such as 'if', 'else',

22308

# etc).

22309

$container_type = "";

22310

22311

# ATTRS: for a '{' following an attribute list, reset

22312

# things to look like we just saw the sub name

22313

if ( $statement_type =~ /^sub/ ) {

22314

$last_nonblank_token = $statement_type;

22315

$last_nonblank_type = 'i';

22316

$statement_type = "";

22317

}

22318

22319

# patch for SWITCH/CASE: hide these keywords from an immediately

22320

# following opening brace

22321

elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )

22322

&& $statement_type eq $last_nonblank_token )

22323

{

22324

$last_nonblank_token = ";";

22325

}

22326

22327

elsif ( $last_nonblank_token eq ')' ) {

22328

$last_nonblank_token = $paren_type[ $paren_depth + 1 ];

22329

22330

# defensive move in case of a nesting error (pbug.t)

22331

# in which this ')' had no previous '('

22332

# this nesting error will have been caught

22333

if ( !defined($last_nonblank_token) ) {

22334

$last_nonblank_token = 'if';

22335

}

22336

22337

# check for syntax error here;

22338

unless ( $is_blocktype_with_paren{$last_nonblank_token} ) {

22339

my $list = join( ' ', sort keys %is_blocktype_with_paren );

22340

warning(

22341

"syntax error at ') {', didn't see one of: $list\n");

22342

}

22343

}

22344

22345

# patch for paren-less for/foreach glitch, part 2.

22346

# see note below under 'qw'

22347

elsif ($last_nonblank_token eq 'qw'

22348

&& $is_for_foreach{$want_paren} )

22349

{

22350

$last_nonblank_token = $want_paren;

22351

if ( $last_last_nonblank_token eq $want_paren ) {

22352

warning(

22353

"syntax error at '$want_paren .. {' -- missing \$ loop variable\n"

22354

);

22355

22356

}

22357

$want_paren = "";

22358

}

22359

22360

# now identify which of the three possible types of

22361

# curly braces we have: hash index container, anonymous

22362

# hash reference, or code block.

22363

22364

# non-structural (hash index) curly brace pair

22365

# get marked 'L' and 'R'

22366

if ( is_non_structural_brace() ) {

22367

$type = 'L';

22368

22369

# patch for SWITCH/CASE:

22370

# allow paren-less identifier after 'when'

22371

# if the brace is preceded by a space

22372

if ( $statement_type eq 'when'

22373

&& $last_nonblank_type eq 'i'

22374

&& $last_last_nonblank_type eq 'k'

22375

&& ( $i_tok == 0 || $rtoken_type->[ $i_tok - 1 ] eq 'b' ) )

22376

{

22377

$type = '{';

22378

$block_type = $statement_type;

22379

}

22380

}

22381

22382

# code and anonymous hash have the same type, '{', but are

22383

# distinguished by 'block_type',

22384

# which will be blank for an anonymous hash

22385

else {

22386

22387

$block_type = code_block_type( $i_tok, $rtokens, $rtoken_type,

22388

$max_token_index );

22389

22390

# patch to promote bareword type to function taking block

22391

if ( $block_type

22392

&& $last_nonblank_type eq 'w'

22393

&& $last_nonblank_i >= 0 )

22394

{

22395

if ( $routput_token_type->[$last_nonblank_i] eq 'w' ) {

22396

$routput_token_type->[$last_nonblank_i] = 'G';

22397

}

22398

}

22399

22400

# patch for SWITCH/CASE: if we find a stray opening block brace

22401

# where we might accept a 'case' or 'when' block, then take it

22402

if ( $statement_type eq 'case'

22403

|| $statement_type eq 'when' )

22404

{

22405

if ( !$block_type || $block_type eq '}' ) {

22406

$block_type = $statement_type;

22407

}

22408

}

22409

}

22410

$brace_type[ ++$brace_depth ] = $block_type;

22411

$brace_package[$brace_depth] = $current_package;

22412

( $type_sequence, $indent_flag ) =

22413

increase_nesting_depth( BRACE, $$rtoken_map[$i_tok] );

22414

$brace_structural_type[$brace_depth] = $type;

22415

$brace_context[$brace_depth] = $context;

22416

$brace_statement_type[$brace_depth] = $statement_type;

22417

},

22418

'}' => sub {

22419

$block_type = $brace_type[$brace_depth];

22420

if ($block_type) { $statement_type = '' }

22421

if ( defined( $brace_package[$brace_depth] ) ) {

22422

$current_package = $brace_package[$brace_depth];

22423

}

22424

22425

# can happen on brace error (caught elsewhere)

22426

else {

22427

}

22428

( $type_sequence, $indent_flag ) =

22429

decrease_nesting_depth( BRACE, $$rtoken_map[$i_tok] );

22430

22431

if ( $brace_structural_type[$brace_depth] eq 'L' ) {

22432

$type = 'R';

22433

}

22434

22435

# propagate type information for 'do' and 'eval' blocks.

22436

# This is necessary to enable us to know if an operator

22437

# or term is expected next

22438

if ( $is_block_operator{ $brace_type[$brace_depth] } ) {

22439

$tok = $brace_type[$brace_depth];

22440

}

22441

22442

$context = $brace_context[$brace_depth];

22443

$statement_type = $brace_statement_type[$brace_depth];

22444

if ( $brace_depth > 0 ) { $brace_depth--; }

22445

},

22446

'&' => sub { # maybe sub call? start looking

22447

22448

# We have to check for sub call unless we are sure we

22449

# are expecting an operator. This example from s2p

22450

# got mistaken as a q operator in an early version:

22451

# print BODY &q(<<'EOT');

22452

if ( $expecting != OPERATOR ) {

22453

scan_identifier();

22454

}

22455

else {

22456

}

22457

},

22458

'<' => sub { # angle operator or less than?

22459

22460

if ( $expecting != OPERATOR ) {

22461

( $i, $type ) =

22462

find_angle_operator_termination( $input_line, $i, $rtoken_map,

22463

$expecting, $max_token_index );

22464

22465

}

22466

else {

22467

}

22468

},

22469

'?' => sub { # ?: conditional or starting pattern?

22470

22471

my $is_pattern;

22472

22473

if ( $expecting == UNKNOWN ) {

22474

22475

my $msg;

22476

( $is_pattern, $msg ) =

22477

guess_if_pattern_or_conditional( $i, $rtokens, $rtoken_map,

22478

$max_token_index );

22479

22480

if ($msg) { write_logfile_entry($msg) }

22481

}

22482

else { $is_pattern = ( $expecting == TERM ) }

22483

22484

if ($is_pattern) {

22485

$in_quote = 1;

22486

$type = 'Q';

22487

$allowed_quote_modifiers = '[cgimosxp]';

22488

}

22489

else {

22490

( $type_sequence, $indent_flag ) =

22491

increase_nesting_depth( QUESTION_COLON,

22492

$$rtoken_map[$i_tok] );

22493

}

22494

},

22495

'*' => sub { # typeglob, or multiply?

22496

22497

if ( $expecting == TERM ) {

22498

scan_identifier();

22499

}

22500

else {

22501

22502

if ( $$rtokens[ $i + 1 ] eq '=' ) {

22503

$tok = '*=';

22504

$type = $tok;

22505

$i++;

22506

}

22507

elsif ( $$rtokens[ $i + 1 ] eq '*' ) {

22508

$tok = '**';

22509

$type = $tok;

22510

$i++;

22511

if ( $$rtokens[ $i + 1 ] eq '=' ) {

22512

$tok = '**=';

22513

$type = $tok;

22514

$i++;

22515

}

22516

}

22517

}

22518

},

22519

'.' => sub { # what kind of . ?

22520

22521

if ( $expecting != OPERATOR ) {

22522

scan_number();

22523

if ( $type eq '.' ) {

22524

error_if_expecting_TERM()

22525

if ( $expecting == TERM );

22526

}

22527

}

22528

else {

22529

}

22530

},

22531

':' => sub {

22532

22533

# if this is the first nonblank character, call it a label

22534

# since perl seems to just swallow it

22535

if ( $input_line_number == 1 && $last_nonblank_i == -1 ) {

22536

$type = 'J';

22537

}

22538

22539

# ATTRS: check for a ':' which introduces an attribute list

22540

# (this might eventually get its own token type)

22541

elsif ( $statement_type =~ /^sub/ ) {

22542

$type = 'A';

22543

$in_attribute_list = 1;

22544

}

22545

22546

# check for scalar attribute, such as

22547

# my $foo : shared = 1;

22548

elsif ($is_my_our{$statement_type}

22549

&& $current_depth[QUESTION_COLON] == 0 )

22550

{

22551

$type = 'A';

22552

$in_attribute_list = 1;

22553

}

22554

22555

# otherwise, it should be part of a ?/: operator

22556

else {

22557

( $type_sequence, $indent_flag ) =

22558

decrease_nesting_depth( QUESTION_COLON,

22559

$$rtoken_map[$i_tok] );

22560

if ( $last_nonblank_token eq '?' ) {

22561

warning("Syntax error near ? :\n");

22562

}

22563

}

22564

},

22565

'+' => sub { # what kind of plus?

22566

22567

if ( $expecting == TERM ) {

22568

my $number = scan_number();

22569

22570

# unary plus is safest assumption if not a number

22571

if ( !defined($number) ) { $type = 'p'; }

22572

}

22573

elsif ( $expecting == OPERATOR ) {

22574

}

22575

else {

22576

if ( $next_type eq 'w' ) { $type = 'p' }

22577

}

22578

},

22579

'@' => sub {

22580

22581

error_if_expecting_OPERATOR("Array")

22582

if ( $expecting == OPERATOR );

22583

scan_identifier();

22584

},

22585

'%' => sub { # hash or modulo?

22586

22587

# first guess is hash if no following blank

22588

if ( $expecting == UNKNOWN ) {

22589

if ( $next_type ne 'b' ) { $expecting = TERM }

22590

}

22591

if ( $expecting == TERM ) {

22592

scan_identifier();

22593

}

22594

},

22595

'[' => sub {

22596

$square_bracket_type[ ++$square_bracket_depth ] =

22597

$last_nonblank_token;

22598

( $type_sequence, $indent_flag ) =

22599

increase_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );

22600

22601

# It may seem odd, but structural square brackets have

22602

# type '{' and '}'. This simplifies the indentation logic.

22603

if ( !is_non_structural_brace() ) {

22604

$type = '{';

22605

}

22606

$square_bracket_structural_type[$square_bracket_depth] = $type;

22607

},

22608

']' => sub {

22609

( $type_sequence, $indent_flag ) =

22610

decrease_nesting_depth( SQUARE_BRACKET, $$rtoken_map[$i_tok] );

22611

22612

if ( $square_bracket_structural_type[$square_bracket_depth] eq '{' )

22613

{

22614

$type = '}';

22615

}

22616

if ( $square_bracket_depth > 0 ) { $square_bracket_depth--; }

22617

},

22618

'-' => sub { # what kind of minus?

22619

22620

if ( ( $expecting != OPERATOR )

22621

&& $is_file_test_operator{$next_tok} )

22622

{

22623

my ( $next_nonblank_token, $i_next ) =

22624

find_next_nonblank_token( $i + 1, $rtokens,

22625

$max_token_index );

22626

22627

# check for a quoted word like "-w=>xx";

22628

# it is sufficient to just check for a following '='

22629

if ( $next_nonblank_token eq '=' ) {

22630

$type = 'm';

22631

}

22632

else {

22633

$i++;

22634

$tok .= $next_tok;

22635

$type = 'F';

22636

}

22637

}

22638

elsif ( $expecting == TERM ) {

22639

my $number = scan_number();

22640

22641

# maybe part of bareword token? unary is safest

22642

if ( !defined($number) ) { $type = 'm'; }

22643

22644

}

22645

elsif ( $expecting == OPERATOR ) {

22646

}

22647

else {

22648

22649

if ( $next_type eq 'w' ) {

22650

$type = 'm';

22651

}

22652

}

22653

},

22654

22655

'^' => sub {

22656

22657

# check for special variables like ${^WARNING_BITS}

22658

if ( $expecting == TERM ) {

22659

22660

# FIXME: this should work but will not catch errors

22661

# because we also have to be sure that previous token is

22662

# a type character ($,@,%).

22663

if ( $last_nonblank_token eq '{'

22664

&& ( $next_tok =~ /^[A-Za-z_]/ ) )

22665

{

22666

22667

if ( $next_tok eq 'W' ) {

22668

$tokenizer_self->{_saw_perl_dash_w} = 1;

22669

}

22670

$tok = $tok . $next_tok;

22671

$i = $i + 1;

22672

$type = 'w';

22673

}

22674

22675

else {

22676

unless ( error_if_expecting_TERM() ) {

22677

22678

# Something like this is valid but strange:

22679

# undef ^I;

22680

complain("The '^' seems unusual here\n");

22681

}

22682

}

22683

}

22684

},

22685

22686

'::' => sub { # probably a sub call

22687

scan_bare_identifier();

22688

},

22689

'<<' => sub { # maybe a here-doc?

22690

return

22691

unless ( $i < $max_token_index )

22692

; # here-doc not possible if end of line

22693

22694

if ( $expecting != OPERATOR ) {

22695

my ( $found_target, $here_doc_target, $here_quote_character,

22696

$saw_error );

22697

(

22698

$found_target, $here_doc_target, $here_quote_character, $i,

22699

$saw_error

22700

)

22701

= find_here_doc( $expecting, $i, $rtokens, $rtoken_map,

22702

$max_token_index );

22703

22704

if ($found_target) {

22705

push @{$rhere_target_list},

22706

[ $here_doc_target, $here_quote_character ];

22707

$type = 'h';

22708

if ( length($here_doc_target) > 80 ) {

22709

my $truncated = substr( $here_doc_target, 0, 80 );

22710

complain("Long here-target: '$truncated' ...\n");

22711

}

22712

elsif ( $here_doc_target !~ /^[A-Z_]\w+$/ ) {

22713

complain(

22714

"Unconventional here-target: '$here_doc_target'\n"

22715

);

22716

}

22717

}

22718

elsif ( $expecting == TERM ) {

22719

unless ($saw_error) {

22720

22721

# shouldn't happen..

22722

warning("Program bug; didn't find here doc target\n");

22723

report_definite_bug();

22724

}

22725

}

22726

}

22727

else {

22728

}

22729

},

22730

'->' => sub {

22731

22732

# if -> points to a bare word, we must scan for an identifier,

22733

# otherwise something like ->y would look like the y operator

22734

scan_identifier();

22735

},

22736

22737

# type = 'pp' for pre-increment, '++' for post-increment

22738

'++' => sub {

22739

if ( $expecting == TERM ) { $type = 'pp' }

22740

elsif ( $expecting == UNKNOWN ) {

22741

my ( $next_nonblank_token, $i_next ) =

22742

find_next_nonblank_token( $i, $rtokens, $max_token_index );

22743

if ( $next_nonblank_token eq '$' ) { $type = 'pp' }

22744

}

22745

},

22746

22747

'=>' => sub {

22748

if ( $last_nonblank_type eq $tok ) {

22749

complain("Repeated '=>'s \n");

22750

}

22751

22752

# patch for operator_expected: note if we are in the list (use.t)

22753

# TODO: make version numbers a new token type

22754

if ( $statement_type eq 'use' ) { $statement_type = '_use' }

22755

},

22756

22757

# type = 'mm' for pre-decrement, '--' for post-decrement

22758

'--' => sub {

22759

22760

if ( $expecting == TERM ) { $type = 'mm' }

22761

elsif ( $expecting == UNKNOWN ) {

22762

my ( $next_nonblank_token, $i_next ) =

22763

find_next_nonblank_token( $i, $rtokens, $max_token_index );

22764

if ( $next_nonblank_token eq '$' ) { $type = 'mm' }

22765

}

22766

},

22767

22768

'&&' => sub {

22769

error_if_expecting_TERM()

22770

if ( $expecting == TERM );

22771

},

22772

22773

'||' => sub {

22774

error_if_expecting_TERM()

22775

if ( $expecting == TERM );

22776

},

22777

22778

'//' => sub {

22779

error_if_expecting_TERM()

22780

if ( $expecting == TERM );

22781

},

22782

};

22783

22784

# ------------------------------------------------------------

22785

# end hash of code for handling individual token types

22786

# ------------------------------------------------------------

22787

22788

my %matching_start_token = ( '}' => '{', ']' => '[', ')' => '(' );

22789

22790

# These block types terminate statements and do not need a trailing

22791

# semicolon

22792

# patched for SWITCH/CASE:

22793

my %is_zero_continuation_block_type;

22794

@_ = qw( } { BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue ;

22795

if elsif else unless while until for foreach switch case given when);

22796

@is_zero_continuation_block_type{@_} = (1) x scalar(@_);

22797

22798

my %is_not_zero_continuation_block_type;

22799

@_ = qw(sort grep map do eval);

22800

@is_not_zero_continuation_block_type{@_} = (1) x scalar(@_);

22801

22802

my %is_logical_container;

22803

@_ = qw(if elsif unless while and or err not && ! || for foreach);

22804

@is_logical_container{@_} = (1) x scalar(@_);

22805

22806

my %is_binary_type;

22807

@_ = qw(|| &&);

22808

@is_binary_type{@_} = (1) x scalar(@_);

22809

22810

my %is_binary_keyword;

22811

@_ = qw(and or err eq ne cmp);

22812

@is_binary_keyword{@_} = (1) x scalar(@_);

22813

22814

# 'L' is token for opening { at hash key

22815

my %is_opening_type;

22816

@_ = qw" L { ( [ ";

22817

@is_opening_type{@_} = (1) x scalar(@_);

22818

22819

# 'R' is token for closing } at hash key

22820

my %is_closing_type;

22821

@_ = qw" R } ) ] ";

22822

@is_closing_type{@_} = (1) x scalar(@_);

22823

22824

my %is_redo_last_next_goto;

22825

@_ = qw(redo last next goto);

22826

@is_redo_last_next_goto{@_} = (1) x scalar(@_);

22827

22828

my %is_use_require;

22829

@_ = qw(use require);

22830

@is_use_require{@_} = (1) x scalar(@_);

22831

22832

my %is_sub_package;

22833

@_ = qw(sub package);

22834

@is_sub_package{@_} = (1) x scalar(@_);

22835

22836

# This hash holds the hash key in $tokenizer_self for these keywords:

22837

my %is_format_END_DATA = (

22838

'format' => '_in_format',

22839

'__END__' => '_in_end',

22840

'__DATA__' => '_in_data',

22841

);

22842

22843

# ref: camel 3 p 147,

22844

# but perl may accept undocumented flags

22845

# perl 5.10 adds 'p' (preserve)

22846

my %quote_modifiers = (

22847

's' => '[cegimosxp]',

22848

'y' => '[cds]',

22849

'tr' => '[cds]',

22850

'm' => '[cgimosxp]',

22851

'qr' => '[imosxp]',

22852

'q' => "",

22853

'qq' => "",

22854

'qw' => "",

22855

'qx' => "",

22856

);

22857

22858

# table showing how many quoted things to look for after quote operator..

22859

# s, y, tr have 2 (pattern and replacement)

22860

# others have 1 (pattern only)

22861

my %quote_items = (

22862

's' => 2,

22863

'y' => 2,

22864

'tr' => 2,

22865

'm' => 1,

22866

'qr' => 1,

22867

'q' => 1,

22868

'qq' => 1,

22869

'qw' => 1,

22870

'qx' => 1,

22871

);

22872

22873

sub tokenize_this_line {

22874

22875

# This routine breaks a line of perl code into tokens which are of use in

22876

# indentation and reformatting. One of my goals has been to define tokens

22877

# such that a newline may be inserted between any pair of tokens without

22878

# changing or invalidating the program. This version comes close to this,

22879

# although there are necessarily a few exceptions which must be caught by

22880

# the formatter. Many of these involve the treatment of bare words.

22881

#

22882

# The tokens and their types are returned in arrays. See previous

22883

# routine for their names.

22884

#

22885

# See also the array "valid_token_types" in the BEGIN section for an

22886

# up-to-date list.

22887

#

22888

# To simplify things, token types are either a single character, or they

22889

# are identical to the tokens themselves.

22890

#

22891

# As a debugging aid, the -D flag creates a file containing a side-by-side

22892

# comparison of the input string and its tokenization for each line of a file.

22893

# This is an invaluable debugging aid.

22894

#

22895

# In addition to tokens, and some associated quantities, the tokenizer

22896

# also returns flags indication any special line types. These include

22897

# quotes, here_docs, formats.

22898

#

22899

# -----------------------------------------------------------------------

22900

#

22901

# How to add NEW_TOKENS:

22902

#

22903

# New token types will undoubtedly be needed in the future both to keep up

22904

# with changes in perl and to help adapt the tokenizer to other applications.

22905

#

22906

# Here are some notes on the minimal steps. I wrote these notes while

22907

# adding the 'v' token type for v-strings, which are things like version

22908

# numbers 5.6.0, and ip addresses, and will use that as an example. ( You

22909

# can use your editor to search for the string "NEW_TOKENS" to find the

22910

# appropriate sections to change):

22911

#

22912

# *. Try to talk somebody else into doing it! If not, ..

22913

#

22914

# *. Make a backup of your current version in case things don't work out!

22915

#

22916

# *. Think of a new, unused character for the token type, and add to

22917

# the array @valid_token_types in the BEGIN section of this package.

22918

# For example, I used 'v' for v-strings.

22919

#

22920

# *. Implement coding to recognize the $type of the token in this routine.

22921

# This is the hardest part, and is best done by immitating or modifying

22922

# some of the existing coding. For example, to recognize v-strings, I

22923

# patched 'sub scan_bare_identifier' to recognize v-strings beginning with

22924

# 'v' and 'sub scan_number' to recognize v-strings without the leading 'v'.

22925

#

22926

# *. Update sub operator_expected. This update is critically important but

22927

# the coding is trivial. Look at the comments in that routine for help.

22928

# For v-strings, which should behave like numbers, I just added 'v' to the

22929

# regex used to handle numbers and strings (types 'n' and 'Q').

22930

#

22931

# *. Implement a 'bond strength' rule in sub set_bond_strengths in

22932

# Perl::Tidy::Formatter for breaking lines around this token type. You can

22933

# skip this step and take the default at first, then adjust later to get

22934

# desired results. For adding type 'v', I looked at sub bond_strength and

22935

# saw that number type 'n' was using default strengths, so I didn't do

22936

# anything. I may tune it up someday if I don't like the way line

22937

# breaks with v-strings look.

22938

#

22939

# *. Implement a 'whitespace' rule in sub set_white_space_flag in

22940

# Perl::Tidy::Formatter. For adding type 'v', I looked at this routine

22941

# and saw that type 'n' used spaces on both sides, so I just added 'v'

22942

# to the array @spaces_both_sides.

22943

#

22944

# *. Update HtmlWriter package so that users can colorize the token as

22945

# desired. This is quite easy; see comments identified by 'NEW_TOKENS' in

22946

# that package. For v-strings, I initially chose to use a default color

22947

# equal to the default for numbers, but it might be nice to change that

22948

# eventually.

22949

#

22950

# *. Update comments in Perl::Tidy::Tokenizer::dump_token_types.

22951

#

22952

# *. Run lots and lots of debug tests. Start with special files designed

22953

# to test the new token type. Run with the -D flag to create a .DEBUG

22954

# file which shows the tokenization. When these work ok, test as many old

22955

# scripts as possible. Start with all of the '.t' files in the 'test'

22956

# directory of the distribution file. Compare .tdy output with previous

22957

# version and updated version to see the differences. Then include as

22958

# many more files as possible. My own technique has been to collect a huge

22959

# number of perl scripts (thousands!) into one directory and run perltidy

22960

# *, then run diff between the output of the previous version and the

22961

# current version.

22962

#

22963

# *. For another example, search for the smartmatch operator '~~'

22964

# with your editor to see where updates were made for it.

22965

#

22966

# -----------------------------------------------------------------------

22967

22968

my $line_of_tokens = shift;

22969

my ($untrimmed_input_line) = $line_of_tokens->{_line_text};

22970

22971

# patch while coding change is underway

22972

# make callers private data to allow access

22973

# $tokenizer_self = $caller_tokenizer_self;

22974

22975

# extract line number for use in error messages

22976

$input_line_number = $line_of_tokens->{_line_number};

22977

22978

# reinitialize for multi-line quote

22979

$line_of_tokens->{_starting_in_quote} = $in_quote && $quote_type eq 'Q';

22980

22981

# check for pod documentation

22982

if ( ( $untrimmed_input_line =~ /^=[A-Za-z_]/ ) ) {

22983

22984

# must not be in multi-line quote

22985

# and must not be in an eqn

22986

if ( !$in_quote and ( operator_expected( 'b', '=', 'b' ) == TERM ) )

22987

{

22988

$tokenizer_self->{_in_pod} = 1;

22989

return;

22990

}

22991

}

22992

22993

$input_line = $untrimmed_input_line;

22994

22995

chomp $input_line;

22996

22997

# trim start of this line unless we are continuing a quoted line

22998

# do not trim end because we might end in a quote (test: deken4.pl)

22999

# Perl::Tidy::Formatter will delete needless trailing blanks

23000

unless ( $in_quote && ( $quote_type eq 'Q' ) ) {

23001

$input_line =~ s/^\s*//; # trim left end

23002

}

23003

23004

# update the copy of the line for use in error messages

23005

# This must be exactly what we give the pre_tokenizer

23006

$tokenizer_self->{_line_text} = $input_line;

23007

23008

# re-initialize for the main loop

23009

$routput_token_list = []; # stack of output token indexes

23010

$routput_token_type = []; # token types

23011

$routput_block_type = []; # types of code block

23012

$routput_container_type = []; # paren types, such as if, elsif, ..

23013

$routput_type_sequence = []; # nesting sequential number

23014

23015

$rhere_target_list = [];

23016

23017

$tok = $last_nonblank_token;

23018

$type = $last_nonblank_type;

23019

$prototype = $last_nonblank_prototype;

23020

$last_nonblank_i = -1;

23021

$block_type = $last_nonblank_block_type;

23022

$container_type = $last_nonblank_container_type;

23023

$type_sequence = $last_nonblank_type_sequence;

23024

$indent_flag = 0;

23025

$peeked_ahead = 0;

23026

23027

# tokenization is done in two stages..

23028

# stage 1 is a very simple pre-tokenization

23029

my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens

23030

23031

# a little optimization for a full-line comment

23032

if ( !$in_quote && ( $input_line =~ /^#/ ) ) {

23033

$max_tokens_wanted = 1 # no use tokenizing a comment

23034

}

23035

23036

# start by breaking the line into pre-tokens

23037

( $rtokens, $rtoken_map, $rtoken_type ) =

23038

pre_tokenize( $input_line, $max_tokens_wanted );

23039

23040

$max_token_index = scalar(@$rtokens) - 1;

23041

push( @$rtokens, ' ', ' ', ' ' ); # extra whitespace simplifies logic

23042

push( @$rtoken_map, 0, 0, 0 ); # shouldn't be referenced

23043

push( @$rtoken_type, 'b', 'b', 'b' );

23044

23045

# initialize for main loop

23046

for $i ( 0 .. $max_token_index + 3 ) {

23047

$routput_token_type->[$i] = "";

23048

$routput_block_type->[$i] = "";

23049

$routput_container_type->[$i] = "";

23050

$routput_type_sequence->[$i] = "";

23051

$routput_indent_flag->[$i] = 0;

23052

}

23053

$i = -1;

23054

$i_tok = -1;

23055

23056

# ------------------------------------------------------------

23057

# begin main tokenization loop

23058

# ------------------------------------------------------------

23059

23060

# we are looking at each pre-token of one line and combining them

23061

# into tokens

23062

while ( ++$i <= $max_token_index ) {

23063

23064

if ($in_quote) { # continue looking for end of a quote

23065

$type = $quote_type;

23066

23067

unless ( @{$routput_token_list} )

23068

{ # initialize if continuation line

23069

push( @{$routput_token_list}, $i );

23070

$routput_token_type->[$i] = $type;

23071

23072

}

23073

$tok = $quote_character unless ( $quote_character =~ /^\s*$/ );

23074

23075

# scan for the end of the quote or pattern

23076

(

23077

$i, $in_quote, $quote_character, $quote_pos, $quote_depth,

23078

$quoted_string_1, $quoted_string_2

23079

)

23080

= do_quote(

23081

$i, $in_quote, $quote_character,

23082

$quote_pos, $quote_depth, $quoted_string_1,

23083

$quoted_string_2, $rtokens, $rtoken_map,

23084

$max_token_index

23085

);

23086

23087

# all done if we didn't find it

23088

last if ($in_quote);

23089

23090

# save pattern and replacement text for rescanning

23091

my $qs1 = $quoted_string_1;

23092

my $qs2 = $quoted_string_2;

23093

23094

# re-initialize for next search

23095

$quote_character = '';

23096

$quote_pos = 0;

23097

$quote_type = 'Q';

23098

$quoted_string_1 = "";

23099

$quoted_string_2 = "";

23100

last if ( ++$i > $max_token_index );

23101

23102

# look for any modifiers

23103

if ($allowed_quote_modifiers) {

23104

23105

# check for exact quote modifiers

23106

if ( $$rtokens[$i] =~ /^[A-Za-z_]/ ) {

23107

my $str = $$rtokens[$i];

23108

my $saw_modifier_e;

23109

while ( $str =~ /\G$allowed_quote_modifiers/gc ) {

23110

my $pos = pos($str);

23111

my $char = substr( $str, $pos - 1, 1 );

23112

$saw_modifier_e ||= ( $char eq 'e' );

23113

}

23114

23115

# For an 'e' quote modifier we must scan the replacement

23116

# text for here-doc targets.

23117

if ($saw_modifier_e) {

23118

23119

my $rht = scan_replacement_text($qs1);

23120

23121

# Change type from 'Q' to 'h' for quotes with

23122

# here-doc targets so that the formatter (see sub

23123

# print_line_of_tokens) will not make any line

23124

# breaks after this point.

23125

if ($rht) {

23126

push @{$rhere_target_list}, @{$rht};

23127

$type = 'h';

23128

if ( $i_tok < 0 ) {

23129

my $ilast = $routput_token_list->[-1];

23130

$routput_token_type->[$ilast] = $type;

23131

}

23132

}

23133

}

23134

23135

if ( defined( pos($str) ) ) {

23136

23137

# matched

23138

if ( pos($str) == length($str) ) {

23139

last if ( ++$i > $max_token_index );

23140

}

23141

23142

# Looks like a joined quote modifier

23143

# and keyword, maybe something like

23144

# s/xxx/yyy/gefor @k=...

23145

# Example is "galgen.pl". Would have to split

23146

# the word and insert a new token in the

23147

# pre-token list. This is so rare that I haven't

23148

# done it. Will just issue a warning citation.

23149

23150

# This error might also be triggered if my quote

23151

# modifier characters are incomplete

23152

else {

23153

warning(<<EOM);

23154

23155

Partial match to quote modifier $allowed_quote_modifiers at word: '$str'

23156

Please put a space between quote modifiers and trailing keywords.

23157

EOM

23158

23159

# print "token $$rtokens[$i]\n";

23160

# my $num = length($str) - pos($str);

23161

# $$rtokens[$i]=substr($$rtokens[$i],pos($str),$num);

23162

# print "continuing with new token $$rtokens[$i]\n";

23163

23164

# skipping past this token does least damage

23165

last if ( ++$i > $max_token_index );

23166

}

23167

}

23168

else {

23169

23170

# example file: rokicki4.pl

23171

# This error might also be triggered if my quote

23172

# modifier characters are incomplete

23173

write_logfile_entry(

23174

"Note: found word $str at quote modifier location\n"

23175

);

23176

}

23177

}

23178

23179

# re-initialize

23180

$allowed_quote_modifiers = "";

23181

}

23182

}

23183

23184

unless ( $tok =~ /^\s*$/ ) {

23185

23186

# try to catch some common errors

23187

if ( ( $type eq 'n' ) && ( $tok ne '0' ) ) {

23188

23189

if ( $last_nonblank_token eq 'eq' ) {

23190

complain("Should 'eq' be '==' here ?\n");

23191

}

23192

elsif ( $last_nonblank_token eq 'ne' ) {

23193

complain("Should 'ne' be '!=' here ?\n");

23194

}

23195

}

23196

23197

$last_last_nonblank_token = $last_nonblank_token;

23198

$last_last_nonblank_type = $last_nonblank_type;

23199

$last_last_nonblank_block_type = $last_nonblank_block_type;

23200

$last_last_nonblank_container_type =

23201

$last_nonblank_container_type;

23202

$last_last_nonblank_type_sequence =

23203

$last_nonblank_type_sequence;

23204

$last_nonblank_token = $tok;

23205

$last_nonblank_type = $type;

23206

$last_nonblank_prototype = $prototype;

23207

$last_nonblank_block_type = $block_type;

23208

$last_nonblank_container_type = $container_type;

23209

$last_nonblank_type_sequence = $type_sequence;

23210

$last_nonblank_i = $i_tok;

23211

}

23212

23213

# store previous token type

23214

if ( $i_tok >= 0 ) {

23215

$routput_token_type->[$i_tok] = $type;

23216

$routput_block_type->[$i_tok] = $block_type;

23217

$routput_container_type->[$i_tok] = $container_type;

23218

$routput_type_sequence->[$i_tok] = $type_sequence;

23219

$routput_indent_flag->[$i_tok] = $indent_flag;

23220

}

23221

my $pre_tok = $$rtokens[$i]; # get the next pre-token

23222

my $pre_type = $$rtoken_type[$i]; # and type

23223

$tok = $pre_tok;

23224

$type = $pre_type; # to be modified as necessary

23225

$block_type = ""; # blank for all tokens except code block braces

23226

$container_type = ""; # blank for all tokens except some parens

23227

$type_sequence = ""; # blank for all tokens except ?/:

23228

$indent_flag = 0;

23229

$prototype = ""; # blank for all tokens except user defined subs

23230

$i_tok = $i;

23231

23232

# this pre-token will start an output token

23233

push( @{$routput_token_list}, $i_tok );

23234

23235

# continue gathering identifier if necessary

23236

# but do not start on blanks and comments

23237

if ( $id_scan_state && $pre_type !~ /[b#]/ ) {

23238

23239

if ( $id_scan_state =~ /^(sub|package)/ ) {

23240

scan_id();

23241

}

23242

else {

23243

scan_identifier();

23244

}

23245

23246

last if ($id_scan_state);

23247

next if ( ( $i > 0 ) || $type );

23248

23249

# didn't find any token; start over

23250

$type = $pre_type;

23251

$tok = $pre_tok;

23252

}

23253

23254

# handle whitespace tokens..

23255

next if ( $type eq 'b' );

23256

my $prev_tok = $i > 0 ? $$rtokens[ $i - 1 ] : ' ';

23257

my $prev_type = $i > 0 ? $$rtoken_type[ $i - 1 ] : 'b';

23258

23259

# Build larger tokens where possible, since we are not in a quote.

23260

#

23261

# First try to assemble digraphs. The following tokens are

23262

# excluded and handled specially:

23263

# '/=' is excluded because the / might start a pattern.

23264

# 'x=' is excluded since it might be $x=, with $ on previous line

23265

# '**' and *= might be typeglobs of punctuation variables

23266

# I have allowed tokens starting with <, such as <=,

23267

# because I don't think these could be valid angle operators.

23268

# test file: storrs4.pl

23269

my $test_tok = $tok . $$rtokens[ $i + 1 ];

23270

my $combine_ok = $is_digraph{$test_tok};

23271

23272

# check for special cases which cannot be combined

23273

if ($combine_ok) {

23274

23275

# '//' must be defined_or operator if an operator is expected.

23276

# TODO: Code for other ambiguous digraphs (/=, x=, **, *=)

23277

# could be migrated here for clarity

23278

if ( $test_tok eq '//' ) {

23279

my $next_type = $$rtokens[ $i + 1 ];

23280

my $expecting =

23281

operator_expected( $prev_type, $tok, $next_type );

23282

$combine_ok = 0 unless ( $expecting == OPERATOR );

23283

}

23284

}

23285

23286

if (

23287

$combine_ok

23288

&& ( $test_tok ne '/=' ) # might be pattern

23289

&& ( $test_tok ne 'x=' ) # might be $x

23290

&& ( $test_tok ne '**' ) # typeglob?

23291

&& ( $test_tok ne '*=' ) # typeglob?

23292

)

23293

{

23294

$tok = $test_tok;

23295

$i++;

23296

23297

# Now try to assemble trigraphs. Note that all possible

23298

# perl trigraphs can be constructed by appending a character

23299

# to a digraph.

23300

$test_tok = $tok . $$rtokens[ $i + 1 ];

23301

23302

if ( $is_trigraph{$test_tok} ) {

23303

$tok = $test_tok;

23304

$i++;

23305

}

23306

}

23307

23308

$type = $tok;

23309

$next_tok = $$rtokens[ $i + 1 ];

23310

$next_type = $$rtoken_type[ $i + 1 ];

23311

23312

TOKENIZER_DEBUG_FLAG_TOKENIZE && do {

23313

local $" = ')(';

23314

my @debug_list = (

23315

$last_nonblank_token, $tok,

23316

$next_tok, $brace_depth,

23317

$brace_type[$brace_depth], $paren_depth,

23318

$paren_type[$paren_depth]

23319

);

23320

print "TOKENIZE:(@debug_list)\n";

23321

};

23322

23323

# turn off attribute list on first non-blank, non-bareword

23324

if ( $pre_type ne 'w' ) { $in_attribute_list = 0 }

23325

23326

###############################################################

23327

# We have the next token, $tok.

23328

# Now we have to examine this token and decide what it is

23329

# and define its $type

23330

#

23331

# section 1: bare words

23332

###############################################################

23333

23334

if ( $pre_type eq 'w' ) {

23335

$expecting = operator_expected( $prev_type, $tok, $next_type );

23336

my ( $next_nonblank_token, $i_next ) =

23337

find_next_nonblank_token( $i, $rtokens, $max_token_index );

23338

23339

# ATTRS: handle sub and variable attributes

23340

if ($in_attribute_list) {

23341

23342

# treat bare word followed by open paren like qw(

23343

if ( $next_nonblank_token eq '(' ) {

23344

$in_quote = $quote_items{'q'};

23345

$allowed_quote_modifiers = $quote_modifiers{'q'};

23346

$type = 'q';

23347

$quote_type = 'q';

23348

next;

23349

}

23350

23351

# handle bareword not followed by open paren

23352

else {

23353

$type = 'w';

23354

next;

23355

}

23356

}

23357

23358

# quote a word followed by => operator

23359

if ( $next_nonblank_token eq '=' ) {

23360

23361

if ( $$rtokens[ $i_next + 1 ] eq '>' ) {

23362

if ( $is_constant{$current_package}{$tok} ) {

23363

$type = 'C';

23364

}

23365

elsif ( $is_user_function{$current_package}{$tok} ) {

23366

$type = 'U';

23367

$prototype =

23368

$user_function_prototype{$current_package}{$tok};

23369

}

23370

elsif ( $tok =~ /^v\d+$/ ) {

23371

$type = 'v';

23372

report_v_string($tok);

23373

}

23374

else { $type = 'w' }

23375

23376

next;

23377

}

23378

}

23379

23380

# quote a bare word within braces..like xxx->{s}; note that we

23381

# must be sure this is not a structural brace, to avoid

23382

# mistaking {s} in the following for a quoted bare word:

23383

# for(@[){s}bla}BLA}

23384

# Also treat q in something like var{-q} as a bare word, not qoute operator

23385

##if ( ( $last_nonblank_type eq 'L' )

23386

## && ( $next_nonblank_token eq '}' ) )

23387

if (

23388

$next_nonblank_token eq '}'

23389

&& (

23390

$last_nonblank_type eq 'L'

23391

|| ( $last_nonblank_type eq 'm'

23392

&& $last_last_nonblank_type eq 'L' )

23393

)

23394

)

23395

{

23396

$type = 'w';

23397

next;

23398

}

23399

23400

# a bare word immediately followed by :: is not a keyword;

23401

# use $tok_kw when testing for keywords to avoid a mistake

23402

my $tok_kw = $tok;

23403

if ( $$rtokens[ $i + 1 ] eq ':' && $$rtokens[ $i + 2 ] eq ':' )

23404

{

23405

$tok_kw .= '::';

23406

}

23407

23408

# handle operator x (now we know it isn't $x=)

23409

if ( ( $tok =~ /^x\d*$/ ) && ( $expecting == OPERATOR ) ) {

23410

if ( $tok eq 'x' ) {

23411

23412

if ( $$rtokens[ $i + 1 ] eq '=' ) { # x=

23413

$tok = 'x=';

23414

$type = $tok;

23415

$i++;

23416

}

23417

else {

23418

$type = 'x';

23419

}

23420

}

23421

23422

# FIXME: Patch: mark something like x4 as an integer for now

23423

# It gets fixed downstream. This is easier than

23424

# splitting the pretoken.

23425

else {

23426

$type = 'n';

23427

}

23428

}

23429

23430

elsif ( ( $tok eq 'strict' )

23431

and ( $last_nonblank_token eq 'use' ) )

23432

{

23433

$tokenizer_self->{_saw_use_strict} = 1;

23434

scan_bare_identifier();

23435

}

23436

23437

elsif ( ( $tok eq 'warnings' )

23438

and ( $last_nonblank_token eq 'use' ) )

23439

{

23440

$tokenizer_self->{_saw_perl_dash_w} = 1;

23441

23442

# scan as identifier, so that we pick up something like:

23443

# use warnings::register

23444

scan_bare_identifier();

23445

}

23446

23447

elsif (

23448

$tok eq 'AutoLoader'

23449

&& $tokenizer_self->{_look_for_autoloader}

23450

&& (

23451

$last_nonblank_token eq 'use'

23452

23453

# these regexes are from AutoSplit.pm, which we want

23454

# to mimic

23455

|| $input_line =~ /^\s*(use|require)\s+AutoLoader\b/

23456

|| $input_line =~ /\bISA\s*=.*\bAutoLoader\b/

23457

)

23458

)

23459

{

23460

write_logfile_entry("AutoLoader seen, -nlal deactivates\n");

23461

$tokenizer_self->{_saw_autoloader} = 1;

23462

$tokenizer_self->{_look_for_autoloader} = 0;

23463

scan_bare_identifier();

23464

}

23465

23466

elsif (

23467

$tok eq 'SelfLoader'

23468

&& $tokenizer_self->{_look_for_selfloader}

23469

&& ( $last_nonblank_token eq 'use'

23470

|| $input_line =~ /^\s*(use|require)\s+SelfLoader\b/

23471

|| $input_line =~ /\bISA\s*=.*\bSelfLoader\b/ )

23472

)

23473

{

23474

write_logfile_entry("SelfLoader seen, -nlsl deactivates\n");

23475

$tokenizer_self->{_saw_selfloader} = 1;

23476

$tokenizer_self->{_look_for_selfloader} = 0;

23477

scan_bare_identifier();

23478

}

23479

23480

elsif ( ( $tok eq 'constant' )

23481

and ( $last_nonblank_token eq 'use' ) )

23482

{

23483

scan_bare_identifier();

23484

my ( $next_nonblank_token, $i_next ) =

23485

find_next_nonblank_token( $i, $rtokens,

23486

$max_token_index );

23487

23488

if ($next_nonblank_token) {

23489

23490

if ( $is_keyword{$next_nonblank_token} ) {

23491

warning(

23492

"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"

23493

);

23494

}

23495

23496

# FIXME: could check for error in which next token is

23497

# not a word (number, punctuation, ..)

23498

else {

23499

$is_constant{$current_package}

23500

{$next_nonblank_token} = 1;

23501

}

23502

}

23503

}

23504

23505

# various quote operators

23506

elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {

23507

if ( $expecting == OPERATOR ) {

23508

23509

# patch for paren-less for/foreach glitch, part 1

23510

# perl will accept this construct as valid:

23511

#

23512

# foreach my $key qw\Uno Due Tres Quadro\ {

23513

# print "Set $key\n";

23514

# }

23515

unless ( $tok eq 'qw' && $is_for_foreach{$want_paren} )

23516

{

23517

error_if_expecting_OPERATOR();

23518

}

23519

}

23520

$in_quote = $quote_items{$tok};

23521

$allowed_quote_modifiers = $quote_modifiers{$tok};

23522

23523

# All quote types are 'Q' except possibly qw quotes.

23524

# qw quotes are special in that they may generally be trimmed

23525

# of leading and trailing whitespace. So they are given a

23526

# separate type, 'q', unless requested otherwise.

23527

$type =

23528

( $tok eq 'qw' && $tokenizer_self->{_trim_qw} )

23529

? 'q'

23530

: 'Q';

23531

$quote_type = $type;

23532

}

23533

23534

# check for a statement label

23535

elsif (

23536

( $next_nonblank_token eq ':' )

23537

&& ( $$rtokens[ $i_next + 1 ] ne ':' )

23538

&& ( $i_next <= $max_token_index ) # colon on same line

23539

&& label_ok()

23540

)

23541

{

23542

if ( $tok !~ /[A-Z]/ ) {

23543

push @{ $tokenizer_self->{_rlower_case_labels_at} },

23544

$input_line_number;

23545

}

23546

$type = 'J';

23547

$tok .= ':';

23548

$i = $i_next;

23549

next;

23550

}

23551

23552

# 'sub' || 'package'

23553

elsif ( $is_sub_package{$tok_kw} ) {

23554

error_if_expecting_OPERATOR()

23555

if ( $expecting == OPERATOR );

23556

scan_id();

23557

}

23558

23559

# Note on token types for format, __DATA__, __END__:

23560

# It simplifies things to give these type ';', so that when we

23561

# start rescanning we will be expecting a token of type TERM.

23562

# We will switch to type 'k' before outputting the tokens.

23563

elsif ( $is_format_END_DATA{$tok_kw} ) {

23564

$type = ';'; # make tokenizer look for TERM next

23565

$tokenizer_self->{ $is_format_END_DATA{$tok_kw} } = 1;

23566

last;

23567

}

23568

23569

elsif ( $is_keyword{$tok_kw} ) {

23570

$type = 'k';

23571

23572

# Since for and foreach may not be followed immediately

23573

# by an opening paren, we have to remember which keyword

23574

# is associated with the next '('

23575

if ( $is_for_foreach{$tok} ) {

23576

if ( new_statement_ok() ) {

23577

$want_paren = $tok;

23578

}

23579

}

23580

23581

# recognize 'use' statements, which are special

23582

elsif ( $is_use_require{$tok} ) {

23583

$statement_type = $tok;

23584

error_if_expecting_OPERATOR()

23585

if ( $expecting == OPERATOR );

23586

}

23587

23588

# remember my and our to check for trailing ": shared"

23589

elsif ( $is_my_our{$tok} ) {

23590

$statement_type = $tok;

23591

}

23592

23593

# Check for misplaced 'elsif' and 'else', but allow isolated

23594

# else or elsif blocks to be formatted. This is indicated

23595

# by a last noblank token of ';'

23596

elsif ( $tok eq 'elsif' ) {

23597

if ( $last_nonblank_token ne ';'

23598

&& $last_nonblank_block_type !~

23599

/^(if|elsif|unless)$/ )

23600

{

23601

warning(

23602

"expecting '$tok' to follow one of 'if|elsif|unless'\n"

23603

);

23604

}

23605

}

23606

elsif ( $tok eq 'else' ) {

23607

23608

# patched for SWITCH/CASE

23609

if ( $last_nonblank_token ne ';'

23610

&& $last_nonblank_block_type !~

23611

/^(if|elsif|unless|case|when)$/ )

23612

{

23613

warning(

23614

"expecting '$tok' to follow one of 'if|elsif|unless|case|when'\n"

23615

);

23616

}

23617

}

23618

elsif ( $tok eq 'continue' ) {

23619

if ( $last_nonblank_token ne ';'

23620

&& $last_nonblank_block_type !~

23621

/(^(\{|\}|;|while|until|for|foreach)|:$)/ )

23622

{

23623

23624

# note: ';' '{' and '}' in list above

23625

# because continues can follow bare blocks;

23626

# ':' is labeled block

23627

#

23628

############################################

23629

# NOTE: This check has been deactivated because

23630

# continue has an alternative usage for given/when

23631

# blocks in perl 5.10

23632

## warning("'$tok' should follow a block\n");

23633

############################################

23634

}

23635

}

23636

23637

# patch for SWITCH/CASE if 'case' and 'when are

23638

# treated as keywords.

23639

elsif ( $tok eq 'when' || $tok eq 'case' ) {

23640

$statement_type = $tok; # next '{' is block

23641

}

23642

23643

# indent trailing if/unless/while/until

23644

# outdenting will be handled by later indentation loop

23645

if ( $tok =~ /^(if|unless|while|until)$/

23646

&& $next_nonblank_token ne '(' )

23647

{

23648

$indent_flag = 1;

23649

}

23650

}

23651

23652

# check for inline label following

23653

# /^(redo|last|next|goto)$/

23654

elsif (( $last_nonblank_type eq 'k' )

23655

&& ( $is_redo_last_next_goto{$last_nonblank_token} ) )

23656

{

23657

$type = 'j';

23658

next;

23659

}

23660

23661

# something else --

23662

else {

23663

23664

scan_bare_identifier();

23665

if ( $type eq 'w' ) {

23666

23667

if ( $expecting == OPERATOR ) {

23668

23669

# don't complain about possible indirect object

23670

# notation.

23671

# For example:

23672

# package main;

23673

# sub new($) { ... }

23674

# $b = new A::; # calls A::new

23675

# $c = new A; # same thing but suspicious

23676

# This will call A::new but we have a 'new' in

23677

# main:: which looks like a constant.

23678

#

23679

if ( $last_nonblank_type eq 'C' ) {

23680

if ( $tok !~ /::$/ ) {

23681

complain(<<EOM);

23682

Expecting operator after '$last_nonblank_token' but found bare word '$tok'

23683

Maybe indirectet object notation?

23684

EOM

23685

}

23686

}

23687

else {

23688

error_if_expecting_OPERATOR("bareword");

23689

}

23690

}

23691

23692

# mark bare words immediately followed by a paren as

23693

# functions

23694

$next_tok = $$rtokens[ $i + 1 ];

23695

if ( $next_tok eq '(' ) {

23696

$type = 'U';

23697

}

23698

23699

# underscore after file test operator is file handle

23700

if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {

23701

$type = 'Z';

23702

}

23703

23704

# patch for SWITCH/CASE if 'case' and 'when are

23705

# not treated as keywords:

23706

if (

23707

(

23708

$tok eq 'case'

23709

&& $brace_type[$brace_depth] eq 'switch'

23710

)

23711

|| ( $tok eq 'when'

23712

&& $brace_type[$brace_depth] eq 'given' )

23713

)

23714

{

23715

$statement_type = $tok; # next '{' is block

23716

$type = 'k'; # for keyword syntax coloring

23717

}

23718

23719

# patch for SWITCH/CASE if switch and given not keywords

23720

# Switch is not a perl 5 keyword, but we will gamble

23721

# and mark switch followed by paren as a keyword. This

23722

# is only necessary to get html syntax coloring nice,

23723

# and does not commit this as being a switch/case.

23724

if ( $next_nonblank_token eq '('

23725

&& ( $tok eq 'switch' || $tok eq 'given' ) )

23726

{

23727

$type = 'k'; # for keyword syntax coloring

23728

}

23729

}

23730

}

23731

}

23732

23733

###############################################################

23734

# section 2: strings of digits

23735

###############################################################

23736

elsif ( $pre_type eq 'd' ) {

23737

$expecting = operator_expected( $prev_type, $tok, $next_type );

23738

error_if_expecting_OPERATOR("Number")

23739

if ( $expecting == OPERATOR );

23740

my $number = scan_number();

23741

if ( !defined($number) ) {

23742

23743

# shouldn't happen - we should always get a number

23744

warning("non-number beginning with digit--program bug\n");

23745

report_definite_bug();

23746

}

23747

}

23748

23749

###############################################################

23750

# section 3: all other tokens

23751

###############################################################

23752

23753

else {

23754

last if ( $tok eq '#' );

23755

my $code = $tokenization_code->{$tok};

23756

if ($code) {

23757

$expecting =

23758

operator_expected( $prev_type, $tok, $next_type );

23759

$code->();

23760

redo if $in_quote;

23761

}

23762

}

23763

}

23764

23765

# -----------------------------

23766

# end of main tokenization loop

23767

# -----------------------------

23768

23769

if ( $i_tok >= 0 ) {

23770

$routput_token_type->[$i_tok] = $type;

23771

$routput_block_type->[$i_tok] = $block_type;

23772

$routput_container_type->[$i_tok] = $container_type;

23773

$routput_type_sequence->[$i_tok] = $type_sequence;

23774

$routput_indent_flag->[$i_tok] = $indent_flag;

23775

}

23776

23777

unless ( ( $type eq 'b' ) || ( $type eq '#' ) ) {

23778

$last_last_nonblank_token = $last_nonblank_token;

23779

$last_last_nonblank_type = $last_nonblank_type;

23780

$last_last_nonblank_block_type = $last_nonblank_block_type;

23781

$last_last_nonblank_container_type = $last_nonblank_container_type;

23782

$last_last_nonblank_type_sequence = $last_nonblank_type_sequence;

23783

$last_nonblank_token = $tok;

23784

$last_nonblank_type = $type;

23785

$last_nonblank_block_type = $block_type;

23786

$last_nonblank_container_type = $container_type;

23787

$last_nonblank_type_sequence = $type_sequence;

23788

$last_nonblank_prototype = $prototype;

23789

}

23790

23791

# reset indentation level if necessary at a sub or package

23792

# in an attempt to recover from a nesting error

23793

if ( $level_in_tokenizer < 0 ) {

23794

if ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) {

23795

reset_indentation_level(0);

23796

brace_warning("resetting level to 0 at $1 $2\n");

23797

}

23798

}

23799

23800

# all done tokenizing this line ...

23801

# now prepare the final list of tokens and types

23802

23803

my @token_type = (); # stack of output token types

23804

my @block_type = (); # stack of output code block types

23805

my @container_type = (); # stack of output code container types

23806

my @type_sequence = (); # stack of output type sequence numbers

23807

my @tokens = (); # output tokens

23808

my @levels = (); # structural brace levels of output tokens

23809

my @slevels = (); # secondary nesting levels of output tokens

23810

my @nesting_tokens = (); # string of tokens leading to this depth

23811

my @nesting_types = (); # string of token types leading to this depth

23812

my @nesting_blocks = (); # string of block types leading to this depth

23813

my @nesting_lists = (); # string of list types leading to this depth

23814

my @ci_string = (); # string needed to compute continuation indentation

23815

my @container_environment = (); # BLOCK or LIST

23816

my $container_environment = '';

23817

my $im = -1; # previous $i value

23818

my $num;

23819

my $ci_string_sum = ones_count($ci_string_in_tokenizer);

23820

23821

# Computing Token Indentation

23822

#

23823

# The final section of the tokenizer forms tokens and also computes

23824

# parameters needed to find indentation. It is much easier to do it

23825

# in the tokenizer than elsewhere. Here is a brief description of how

23826

# indentation is computed. Perl::Tidy computes indentation as the sum

23827

# of 2 terms:

23828

#

23829

# (1) structural indentation, such as if/else/elsif blocks

23830

# (2) continuation indentation, such as long parameter call lists.

23831

#

23832

# These are occasionally called primary and secondary indentation.

23833

#

23834

# Structural indentation is introduced by tokens of type '{', although

23835

# the actual tokens might be '{', '(', or '['. Structural indentation

23836

# is of two types: BLOCK and non-BLOCK. Default structural indentation

23837

# is 4 characters if the standard indentation scheme is used.

23838

#

23839

# Continuation indentation is introduced whenever a line at BLOCK level

23840

# is broken before its termination. Default continuation indentation

23841

# is 2 characters in the standard indentation scheme.

23842

#

23843

# Both types of indentation may be nested arbitrarily deep and

23844

# interlaced. The distinction between the two is somewhat arbitrary.

23845

#

23846

# For each token, we will define two variables which would apply if

23847

# the current statement were broken just before that token, so that

23848

# that token started a new line:

23849

#

23850

# $level = the structural indentation level,

23851

# $ci_level = the continuation indentation level

23852

#

23853

# The total indentation will be $level * (4 spaces) + $ci_level * (2 spaces),

23854

# assuming defaults. However, in some special cases it is customary

23855

# to modify $ci_level from this strict value.

23856

#

23857

# The total structural indentation is easy to compute by adding and

23858

# subtracting 1 from a saved value as types '{' and '}' are seen. The

23859

# running value of this variable is $level_in_tokenizer.

23860

#

23861

# The total continuation is much more difficult to compute, and requires

23862

# several variables. These veriables are:

23863

#

23864

# $ci_string_in_tokenizer = a string of 1's and 0's indicating, for

23865

# each indentation level, if there are intervening open secondary

23866

# structures just prior to that level.

23867

# $continuation_string_in_tokenizer = a string of 1's and 0's indicating

23868

# if the last token at that level is "continued", meaning that it

23869

# is not the first token of an expression.

23870

# $nesting_block_string = a string of 1's and 0's indicating, for each

23871

# indentation level, if the level is of type BLOCK or not.

23872

# $nesting_block_flag = the most recent 1 or 0 of $nesting_block_string

23873

# $nesting_list_string = a string of 1's and 0's indicating, for each

23874

# indentation level, if it is is appropriate for list formatting.

23875

# If so, continuation indentation is used to indent long list items.

23876

# $nesting_list_flag = the most recent 1 or 0 of $nesting_list_string

23877

# @{$rslevel_stack} = a stack of total nesting depths at each

23878

# structural indentation level, where "total nesting depth" means

23879

# the nesting depth that would occur if every nesting token -- '{', '[',

23880

# and '(' -- , regardless of context, is used to compute a nesting

23881

# depth.

23882

23883

#my $nesting_block_flag = ($nesting_block_string =~ /1$/);

23884

#my $nesting_list_flag = ($nesting_list_string =~ /1$/);

23885

23886

my ( $ci_string_i, $level_i, $nesting_block_string_i,

23887

$nesting_list_string_i, $nesting_token_string_i,

23888

$nesting_type_string_i, );

23889

23890

foreach $i ( @{$routput_token_list} )

23891

{ # scan the list of pre-tokens indexes

23892

23893

# self-checking for valid token types

23894

my $type = $routput_token_type->[$i];

23895

my $forced_indentation_flag = $routput_indent_flag->[$i];

23896

23897

# See if we should undo the $forced_indentation_flag.

23898

# Forced indentation after 'if', 'unless', 'while' and 'until'

23899

# expressions without trailing parens is optional and doesn't

23900

# always look good. It is usually okay for a trailing logical

23901

# expression, but if the expression is a function call, code block,

23902

# or some kind of list it puts in an unwanted extra indentation

23903

# level which is hard to remove.

23904

#

23905

# Example where extra indentation looks ok:

23906

# return 1

23907

# if $det_a < 0 and $det_b > 0

23908

# or $det_a > 0 and $det_b < 0;

23909

#

23910

# Example where extra indentation is not needed because

23911

# the eval brace also provides indentation:

23912

# print "not " if defined eval {

23913

# reduce { die if $b > 2; $a + $b } 0, 1, 2, 3, 4;

23914

# };

23915

#

23916

# The following rule works fairly well:

23917

# Undo the flag if the end of this line, or start of the next

23918

# line, is an opening container token or a comma.

23919

# This almost always works, but if not after another pass it will

23920

# be stable.

23921

if ( $forced_indentation_flag && $type eq 'k' ) {

23922

my $ixlast = -1;

23923

my $ilast = $routput_token_list->[$ixlast];

23924

my $toklast = $routput_token_type->[$ilast];

23925

if ( $toklast eq '#' ) {

23926

$ixlast--;

23927

$ilast = $routput_token_list->[$ixlast];

23928

$toklast = $routput_token_type->[$ilast];

23929

}

23930

if ( $toklast eq 'b' ) {

23931

$ixlast--;

23932

$ilast = $routput_token_list->[$ixlast];

23933

$toklast = $routput_token_type->[$ilast];

23934

}

23935

if ( $toklast =~ /^[\{,]$/ ) {

23936

$forced_indentation_flag = 0;

23937

}

23938

else {

23939

( $toklast, my $i_next ) =

23940

find_next_nonblank_token( $max_token_index, $rtokens,

23941

$max_token_index );

23942

if ( $toklast =~ /^[\{,]$/ ) {

23943

$forced_indentation_flag = 0;

23944

}

23945

}

23946

}

23947

23948

# if we are already in an indented if, see if we should outdent

23949

if ($indented_if_level) {

23950

23951

# don't try to nest trailing if's - shouldn't happen

23952

if ( $type eq 'k' ) {

23953

$forced_indentation_flag = 0;

23954

}

23955

23956

# check for the normal case - outdenting at next ';'

23957

elsif ( $type eq ';' ) {

23958

if ( $level_in_tokenizer == $indented_if_level ) {

23959

$forced_indentation_flag = -1;

23960

$indented_if_level = 0;

23961

}

23962

}

23963

23964

# handle case of missing semicolon

23965

elsif ( $type eq '}' ) {

23966

if ( $level_in_tokenizer == $indented_if_level ) {

23967

$indented_if_level = 0;

23968

23969

# TBD: This could be a subroutine call

23970

$level_in_tokenizer--;

23971

if ( @{$rslevel_stack} > 1 ) {

23972

pop( @{$rslevel_stack} );

23973

}

23974

if ( length($nesting_block_string) > 1 )

23975

{ # true for valid script

23976

chop $nesting_block_string;

23977

chop $nesting_list_string;

23978

}

23979

23980

}

23981

}

23982

}

23983

23984

my $tok = $$rtokens[$i]; # the token, but ONLY if same as pretoken

23985

$level_i = $level_in_tokenizer;

23986

23987

# This can happen by running perltidy on non-scripts

23988

# although it could also be bug introduced by programming change.

23989

# Perl silently accepts a 032 (^Z) and takes it as the end

23990

if ( !$is_valid_token_type{$type} ) {

23991

my $val = ord($type);

23992

warning(

23993

"unexpected character decimal $val ($type) in script\n");

23994

$tokenizer_self->{_in_error} = 1;

23995

}

23996

23997

# ----------------------------------------------------------------

23998

# TOKEN TYPE PATCHES

23999

# output __END__, __DATA__, and format as type 'k' instead of ';'

24000

# to make html colors correct, etc.

24001

my $fix_type = $type;

24002

if ( $type eq ';' && $tok =~ /\w/ ) { $fix_type = 'k' }

24003

24004

# output anonymous 'sub' as keyword

24005

if ( $type eq 't' && $tok eq 'sub' ) { $fix_type = 'k' }

24006

24007

# -----------------------------------------------------------------

24008

24009

$nesting_token_string_i = $nesting_token_string;

24010

$nesting_type_string_i = $nesting_type_string;

24011

$nesting_block_string_i = $nesting_block_string;

24012

$nesting_list_string_i = $nesting_list_string;

24013

24014

# set primary indentation levels based on structural braces

24015

# Note: these are set so that the leading braces have a HIGHER

24016

# level than their CONTENTS, which is convenient for indentation

24017

# Also, define continuation indentation for each token.

24018

if ( $type eq '{' || $type eq 'L' || $forced_indentation_flag > 0 )

24019

{

24020

24021

# use environment before updating

24022

$container_environment =

24023

$nesting_block_flag ? 'BLOCK'

24024

: $nesting_list_flag ? 'LIST'

24025

: "";

24026

24027

# if the difference between total nesting levels is not 1,

24028

# there are intervening non-structural nesting types between

24029

# this '{' and the previous unclosed '{'

24030

my $intervening_secondary_structure = 0;

24031

if ( @{$rslevel_stack} ) {

24032

$intervening_secondary_structure =

24033

$slevel_in_tokenizer - $rslevel_stack->[-1];

24034

}

24035

24036

# Continuation Indentation

24037

#

24038

# Having tried setting continuation indentation both in the formatter and

24039

# in the tokenizer, I can say that setting it in the tokenizer is much,

24040

# much easier. The formatter already has too much to do, and can't

24041

# make decisions on line breaks without knowing what 'ci' will be at

24042

# arbitrary locations.

24043

#

24044

# But a problem with setting the continuation indentation (ci) here

24045

# in the tokenizer is that we do not know where line breaks will actually

24046

# be. As a result, we don't know if we should propagate continuation

24047

# indentation to higher levels of structure.

24048

#

24049

# For nesting of only structural indentation, we never need to do this.

24050

# For example, in a long if statement, like this

24051

#

24052

# if ( !$output_block_type[$i]

24053

# && ($in_statement_continuation) )

24054

# { <--outdented

24055

# do_something();

24056

# }

24057

#

24058

# the second line has ci but we do normally give the lines within the BLOCK

24059

# any ci. This would be true if we had blocks nested arbitrarily deeply.

24060

#

24061

# But consider something like this, where we have created a break after

24062

# an opening paren on line 1, and the paren is not (currently) a

24063

# structural indentation token:

24064

#

24065

# my $file = $menubar->Menubutton(

24066

# qw/-text File -underline 0 -menuitems/ => [

24067

# [

24068

# Cascade => '~View',

24069

# -menuitems => [

24070

# ...

24071

#

24072

# The second line has ci, so it would seem reasonable to propagate it

24073

# down, giving the third line 1 ci + 1 indentation. This suggests the

24074

# following rule, which is currently used to propagating ci down: if there

24075

# are any non-structural opening parens (or brackets, or braces), before

24076

# an opening structural brace, then ci is propagated down, and otherwise

24077

# not. The variable $intervening_secondary_structure contains this

24078

# information for the current token, and the string

24079

# "$ci_string_in_tokenizer" is a stack of previous values of this

24080

# variable.

24081

24082

# save the current states

24083

push( @{$rslevel_stack}, 1 + $slevel_in_tokenizer );

24084

$level_in_tokenizer++;

24085

24086

if ($forced_indentation_flag) {

24087

24088

# break BEFORE '?' when there is forced indentation

24089

if ( $type eq '?' ) { $level_i = $level_in_tokenizer; }

24090

if ( $type eq 'k' ) {

24091

$indented_if_level = $level_in_tokenizer;

24092

}

24093

}

24094

24095

if ( $routput_block_type->[$i] ) {

24096

$nesting_block_flag = 1;

24097

$nesting_block_string .= '1';

24098

}

24099

else {

24100

$nesting_block_flag = 0;

24101

$nesting_block_string .= '0';

24102

}

24103

24104

# we will use continuation indentation within containers

24105

# which are not blocks and not logical expressions

24106

my $bit = 0;

24107

if ( !$routput_block_type->[$i] ) {

24108

24109

# propagate flag down at nested open parens

24110

if ( $routput_container_type->[$i] eq '(' ) {

24111

$bit = 1 if $nesting_list_flag;

24112

}

24113

24114

# use list continuation if not a logical grouping

24115

# /^(if|elsif|unless|while|and|or|not|&&|!|\|\||for|foreach)$/

24116

else {

24117

$bit = 1

24118

unless

24119

$is_logical_container{ $routput_container_type->[$i]

24120

};

24121

}

24122

}

24123

$nesting_list_string .= $bit;

24124

$nesting_list_flag = $bit;

24125

24126

$ci_string_in_tokenizer .=

24127

( $intervening_secondary_structure != 0 ) ? '1' : '0';

24128

$ci_string_sum = ones_count($ci_string_in_tokenizer);

24129

$continuation_string_in_tokenizer .=

24130

( $in_statement_continuation > 0 ) ? '1' : '0';

24131

24132

# Sometimes we want to give an opening brace continuation indentation,

24133

# and sometimes not. For code blocks, we don't do it, so that the leading

24134

# '{' gets outdented, like this:

24135

#

24136

# if ( !$output_block_type[$i]

24137

# && ($in_statement_continuation) )

24138

# { <--outdented

24139

#

24140

# For other types, we will give them continuation indentation. For example,

24141

# here is how a list looks with the opening paren indented:

24142

#

24143

# @LoL =

24144

# ( [ "fred", "barney" ], [ "george", "jane", "elroy" ],

24145

# [ "homer", "marge", "bart" ], );

24146

#

24147

# This looks best when 'ci' is one-half of the indentation (i.e., 2 and 4)

24148

24149

my $total_ci = $ci_string_sum;

24150

if (

24151

!$routput_block_type->[$i] # patch: skip for BLOCK

24152

&& ($in_statement_continuation)

24153

&& !( $forced_indentation_flag && $type eq ':' )

24154

)

24155

{

24156

$total_ci += $in_statement_continuation

24157

unless ( $ci_string_in_tokenizer =~ /1$/ );

24158

}

24159

24160

$ci_string_i = $total_ci;

24161

$in_statement_continuation = 0;

24162

}

24163

24164

elsif ($type eq '}'

24165

|| $type eq 'R'

24166

|| $forced_indentation_flag < 0 )

24167

{

24168

24169

# only a nesting error in the script would prevent popping here

24170

if ( @{$rslevel_stack} > 1 ) { pop( @{$rslevel_stack} ); }

24171

24172

$level_i = --$level_in_tokenizer;

24173

24174

# restore previous level values

24175

if ( length($nesting_block_string) > 1 )

24176

{ # true for valid script

24177

chop $nesting_block_string;

24178

$nesting_block_flag = ( $nesting_block_string =~ /1$/ );

24179

chop $nesting_list_string;

24180

$nesting_list_flag = ( $nesting_list_string =~ /1$/ );

24181

24182

chop $ci_string_in_tokenizer;

24183

$ci_string_sum = ones_count($ci_string_in_tokenizer);

24184

24185

$in_statement_continuation =

24186

chop $continuation_string_in_tokenizer;

24187

24188

# zero continuation flag at terminal BLOCK '}' which

24189

# ends a statement.

24190

if ( $routput_block_type->[$i] ) {

24191

24192

# ...These include non-anonymous subs

24193

# note: could be sub ::abc { or sub 'abc

24194

if ( $routput_block_type->[$i] =~ m/^sub\s*/gc ) {

24195

24196

# note: older versions of perl require the /gc modifier

24197

# here or else the \G does not work.

24198

if ( $routput_block_type->[$i] =~ /\G('|::|\w)/gc )

24199

{

24200

$in_statement_continuation = 0;

24201

}

24202

}

24203

24204

# ...and include all block types except user subs with

24205

# block prototypes and these: (sort|grep|map|do|eval)

24206

# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|else|unless|while|until|for|foreach)$/

24207

elsif (

24208

$is_zero_continuation_block_type{

24209

$routput_block_type->[$i] } )

24210

{

24211

$in_statement_continuation = 0;

24212

}

24213

24214

# ..but these are not terminal types:

24215

# /^(sort|grep|map|do|eval)$/ )

24216

elsif (

24217

$is_not_zero_continuation_block_type{

24218

$routput_block_type->[$i] } )

24219

{

24220

}

24221

24222

# ..and a block introduced by a label

24223

# /^\w+\s*:$/gc ) {

24224

elsif ( $routput_block_type->[$i] =~ /:$/ ) {

24225

$in_statement_continuation = 0;

24226

}

24227

24228

# user function with block prototype

24229

else {

24230

$in_statement_continuation = 0;

24231

}

24232

}

24233

24234

# If we are in a list, then

24235

# we must set continuatoin indentation at the closing

24236

# paren of something like this (paren after $check):

24237

# assert(

24238

# __LINE__,

24239

# ( not defined $check )

24240

# or ref $check

24241

# or $check eq "new"

24242

# or $check eq "old",

24243

# );

24244

elsif ( $tok eq ')' ) {

24245

$in_statement_continuation = 1

24246

if $routput_container_type->[$i] =~ /^[;,\{\}]$/;

24247

}

24248

24249

elsif ( $tok eq ';' ) { $in_statement_continuation = 0 }

24250

}

24251

24252

# use environment after updating

24253

$container_environment =

24254

$nesting_block_flag ? 'BLOCK'

24255

: $nesting_list_flag ? 'LIST'

24256

: "";

24257

$ci_string_i = $ci_string_sum + $in_statement_continuation;

24258

$nesting_block_string_i = $nesting_block_string;

24259

$nesting_list_string_i = $nesting_list_string;

24260

}

24261

24262

# not a structural indentation type..

24263

else {

24264

24265

$container_environment =

24266

$nesting_block_flag ? 'BLOCK'

24267

: $nesting_list_flag ? 'LIST'

24268

: "";

24269

24270

# zero the continuation indentation at certain tokens so

24271

# that they will be at the same level as its container. For

24272

# commas, this simplifies the -lp indentation logic, which

24273

# counts commas. For ?: it makes them stand out.

24274

if ($nesting_list_flag) {

24275

if ( $type =~ /^[,\?\:]$/ ) {

24276

$in_statement_continuation = 0;

24277

}

24278

}

24279

24280

# be sure binary operators get continuation indentation

24281

if (

24282

$container_environment

24283

&& ( $type eq 'k' && $is_binary_keyword{$tok}

24284

|| $is_binary_type{$type} )

24285

)

24286

{

24287

$in_statement_continuation = 1;

24288

}

24289

24290

# continuation indentation is sum of any open ci from previous

24291

# levels plus the current level

24292

$ci_string_i = $ci_string_sum + $in_statement_continuation;

24293

24294

# update continuation flag ...

24295

# if this isn't a blank or comment..

24296

if ( $type ne 'b' && $type ne '#' ) {

24297

24298

# and we are in a BLOCK

24299

if ($nesting_block_flag) {

24300

24301

# the next token after a ';' and label starts a new stmt

24302

if ( $type eq ';' || $type eq 'J' ) {

24303

$in_statement_continuation = 0;

24304

}

24305

24306

# otherwise, we are continuing the current statement

24307

else {

24308

$in_statement_continuation = 1;

24309

}

24310

}

24311

24312

# if we are not in a BLOCK..

24313

else {

24314

24315

# do not use continuation indentation if not list

24316

# environment (could be within if/elsif clause)

24317

if ( !$nesting_list_flag ) {

24318

$in_statement_continuation = 0;

24319

}

24320

24321

# otherwise, the next token after a ',' starts a new term

24322

elsif ( $type eq ',' ) {

24323

$in_statement_continuation = 0;

24324

}

24325

24326

# otherwise, we are continuing the current term

24327

else {

24328

$in_statement_continuation = 1;

24329

}

24330

}

24331

}

24332

}

24333

24334

if ( $level_in_tokenizer < 0 ) {

24335

unless ( $tokenizer_self->{_saw_negative_indentation} ) {

24336

$tokenizer_self->{_saw_negative_indentation} = 1;

24337

warning("Starting negative indentation\n");

24338

}

24339

}

24340

24341

# set secondary nesting levels based on all continment token types

24342

# Note: these are set so that the nesting depth is the depth

24343

# of the PREVIOUS TOKEN, which is convenient for setting

24344

# the stength of token bonds

24345

my $slevel_i = $slevel_in_tokenizer;

24346

24347

# /^[L\{\(\[]$/

24348

if ( $is_opening_type{$type} ) {

24349

$slevel_in_tokenizer++;

24350

$nesting_token_string .= $tok;

24351

$nesting_type_string .= $type;

24352

}

24353

24354

# /^[R\}\)\]]$/

24355

elsif ( $is_closing_type{$type} ) {

24356

$slevel_in_tokenizer--;

24357

my $char = chop $nesting_token_string;

24358

24359

if ( $char ne $matching_start_token{$tok} ) {

24360

$nesting_token_string .= $char . $tok;

24361

$nesting_type_string .= $type;

24362

}

24363

else {

24364

chop $nesting_type_string;

24365

}

24366

}

24367

24368

push( @block_type, $routput_block_type->[$i] );

24369

push( @ci_string, $ci_string_i );

24370

push( @container_environment, $container_environment );

24371

push( @container_type, $routput_container_type->[$i] );

24372

push( @levels, $level_i );

24373

push( @nesting_tokens, $nesting_token_string_i );

24374

push( @nesting_types, $nesting_type_string_i );

24375

push( @slevels, $slevel_i );

24376

push( @token_type, $fix_type );

24377

push( @type_sequence, $routput_type_sequence->[$i] );

24378

push( @nesting_blocks, $nesting_block_string );

24379

push( @nesting_lists, $nesting_list_string );

24380

24381

# now form the previous token

24382

if ( $im >= 0 ) {

24383

$num =

24384

$$rtoken_map[$i] - $$rtoken_map[$im]; # how many characters

24385

24386

if ( $num > 0 ) {

24387

push( @tokens,

24388

substr( $input_line, $$rtoken_map[$im], $num ) );

24389

}

24390

}

24391

$im = $i;

24392

}

24393

24394

$num = length($input_line) - $$rtoken_map[$im]; # make the last token

24395

if ( $num > 0 ) {

24396

push( @tokens, substr( $input_line, $$rtoken_map[$im], $num ) );

24397

}

24398

24399

$tokenizer_self->{_in_attribute_list} = $in_attribute_list;

24400

$tokenizer_self->{_in_quote} = $in_quote;

24401

$tokenizer_self->{_quote_target} =

24402

$in_quote ? matching_end_token($quote_character) : "";

24403

$tokenizer_self->{_rhere_target_list} = $rhere_target_list;

24404

24405

$line_of_tokens->{_rtoken_type} = \@token_type;

24406

$line_of_tokens->{_rtokens} = \@tokens;

24407

$line_of_tokens->{_rblock_type} = \@block_type;

24408

$line_of_tokens->{_rcontainer_type} = \@container_type;

24409

$line_of_tokens->{_rcontainer_environment} = \@container_environment;

24410

$line_of_tokens->{_rtype_sequence} = \@type_sequence;

24411

$line_of_tokens->{_rlevels} = \@levels;

24412

$line_of_tokens->{_rslevels} = \@slevels;

24413

$line_of_tokens->{_rnesting_tokens} = \@nesting_tokens;

24414

$line_of_tokens->{_rci_levels} = \@ci_string;

24415

$line_of_tokens->{_rnesting_blocks} = \@nesting_blocks;

24416

24417

return;

24418

}

24419

} # end tokenize_this_line

24420

24421

#########i#############################################################

24422

# Tokenizer routines which assist in identifying token types

24423

#######################################################################

24424

24425

sub operator_expected {

24426

24427

# Many perl symbols have two or more meanings. For example, '<<'

24428

# can be a shift operator or a here-doc operator. The

24429

# interpretation of these symbols depends on the current state of

24430

# the tokenizer, which may either be expecting a term or an

24431

# operator. For this example, a << would be a shift if an operator

24432

# is expected, and a here-doc if a term is expected. This routine

24433

# is called to make this decision for any current token. It returns

24434

# one of three possible values:

24435

#

24436

# OPERATOR - operator expected (or at least, not a term)

24437

# UNKNOWN - can't tell

24438

# TERM - a term is expected (or at least, not an operator)

24439

#

24440

# The decision is based on what has been seen so far. This

24441

# information is stored in the "$last_nonblank_type" and

24442

# "$last_nonblank_token" variables. For example, if the

24443

# $last_nonblank_type is '=~', then we are expecting a TERM, whereas

24444

# if $last_nonblank_type is 'n' (numeric), we are expecting an

24445

# OPERATOR.

24446

#

24447

# If a UNKNOWN is returned, the calling routine must guess. A major

24448

# goal of this tokenizer is to minimize the possiblity of returning

24449

# UNKNOWN, because a wrong guess can spoil the formatting of a

24450

# script.

24451

#

24452

# adding NEW_TOKENS: it is critically important that this routine be

24453

# updated to allow it to determine if an operator or term is to be

24454

# expected after the new token. Doing this simply involves adding

24455

# the new token character to one of the regexes in this routine or

24456

# to one of the hash lists

24457

# that it uses, which are initialized in the BEGIN section.

24458

# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,

24459

# $statement_type

24460

24461

my ( $prev_type, $tok, $next_type ) = @_;

24462

24463

my $op_expected = UNKNOWN;

24464

24465

#print "tok=$tok last type=$last_nonblank_type last tok=$last_nonblank_token\n";

24466

24467

# Note: function prototype is available for token type 'U' for future

24468

# program development. It contains the leading and trailing parens,

24469

# and no blanks. It might be used to eliminate token type 'C', for

24470

# example (prototype = '()'). Thus:

24471

# if ($last_nonblank_type eq 'U') {

24472

# print "previous token=$last_nonblank_token type=$last_nonblank_type prototype=$last_nonblank_prototype\n";

24473

# }

24474

24475

# A possible filehandle (or object) requires some care...

24476

if ( $last_nonblank_type eq 'Z' ) {

24477

24478

# angle.t

24479

if ( $last_nonblank_token =~ /^[A-Za-z_]/ ) {

24480

$op_expected = UNKNOWN;

24481

}

24482

24483

# For possible file handle like "$a", Perl uses weird parsing rules.

24484

# For example:

24485

# print $a/2,"/hi"; - division

24486

# print $a / 2,"/hi"; - division

24487

# print $a/ 2,"/hi"; - division

24488

# print $a /2,"/hi"; - pattern (and error)!

24489

elsif ( ( $prev_type eq 'b' ) && ( $next_type ne 'b' ) ) {

24490

$op_expected = TERM;

24491

}

24492

24493

# Note when an operation is being done where a

24494

# filehandle might be expected, since a change in whitespace

24495

# could change the interpretation of the statement.

24496

else {

24497

if ( $tok =~ /^([x\/\+\-\*\%\&\.\?\<]|\>\>)$/ ) {

24498

complain("operator in print statement not recommended\n");

24499

$op_expected = OPERATOR;

24500

}

24501

}

24502

}

24503

24504

# handle something after 'do' and 'eval'

24505

elsif ( $is_block_operator{$last_nonblank_token} ) {

24506

24507

# something like $a = eval "expression";

24508

# ^

24509

if ( $last_nonblank_type eq 'k' ) {

24510

$op_expected = TERM; # expression or list mode following keyword

24511

}

24512

24513

# something like $a = do { BLOCK } / 2;

24514

# ^

24515

else {

24516

$op_expected = OPERATOR; # block mode following }

24517

}

24518

}

24519

24520

# handle bare word..

24521

elsif ( $last_nonblank_type eq 'w' ) {

24522

24523

# unfortunately, we can't tell what type of token to expect next

24524

# after most bare words

24525

$op_expected = UNKNOWN;

24526

}

24527

24528

# operator, but not term possible after these types

24529

# Note: moved ')' from type to token because parens in list context

24530

# get marked as '{' '}' now. This is a minor glitch in the following:

24531

# my %opts = (ref $_[0] eq 'HASH') ? %{shift()} : ();

24532

#

24533

elsif (( $last_nonblank_type =~ /^[\]RnviQh]$/ )

24534

|| ( $last_nonblank_token =~ /^(\)|\$|\-\>)/ ) )

24535

{

24536

$op_expected = OPERATOR;

24537

24538

# in a 'use' statement, numbers and v-strings are not true

24539

# numbers, so to avoid incorrect error messages, we will

24540

# mark them as unknown for now (use.t)

24541

# TODO: it would be much nicer to create a new token V for VERSION

24542

# number in a use statement. Then this could be a check on type V

24543

# and related patches which change $statement_type for '=>'

24544

# and ',' could be removed. Further, it would clean things up to

24545

# scan the 'use' statement with a separate subroutine.

24546

if ( ( $statement_type eq 'use' )

24547

&& ( $last_nonblank_type =~ /^[nv]$/ ) )

24548

{

24549

$op_expected = UNKNOWN;

24550

}

24551

}

24552

24553

# no operator after many keywords, such as "die", "warn", etc

24554

elsif ( $expecting_term_token{$last_nonblank_token} ) {

24555

24556

# patch for dor.t (defined or).

24557

# perl functions which may be unary operators

24558

# TODO: This list is incomplete, and these should be put

24559

# into a hash.

24560

if ( $tok eq '/'

24561

&& $next_type eq '/'

24562

&& $last_nonblank_type eq 'k'

24563

&& $last_nonblank_token =~ /^eof|undef|shift|pop$/ )

24564

{

24565

$op_expected = OPERATOR;

24566

}

24567

else {

24568

$op_expected = TERM;

24569

}

24570

}

24571

24572

# no operator after things like + - ** (i.e., other operators)

24573

elsif ( $expecting_term_types{$last_nonblank_type} ) {

24574

$op_expected = TERM;

24575

}

24576

24577

# a few operators, like "time", have an empty prototype () and so

24578

# take no parameters but produce a value to operate on

24579

elsif ( $expecting_operator_token{$last_nonblank_token} ) {

24580

$op_expected = OPERATOR;

24581

}

24582

24583

# post-increment and decrement produce values to be operated on

24584

elsif ( $expecting_operator_types{$last_nonblank_type} ) {

24585

$op_expected = OPERATOR;

24586

}

24587

24588

# no value to operate on after sub block

24589

elsif ( $last_nonblank_token =~ /^sub\s/ ) { $op_expected = TERM; }

24590

24591

# a right brace here indicates the end of a simple block.

24592

# all non-structural right braces have type 'R'

24593

# all braces associated with block operator keywords have been given those

24594

# keywords as "last_nonblank_token" and caught above.

24595

# (This statement is order dependent, and must come after checking

24596

# $last_nonblank_token).

24597

elsif ( $last_nonblank_type eq '}' ) {

24598

24599

# patch for dor.t (defined or).

24600

if ( $tok eq '/'

24601

&& $next_type eq '/'

24602

&& $last_nonblank_token eq ']' )

24603

{

24604

$op_expected = OPERATOR;

24605

}

24606

else {

24607

$op_expected = TERM;

24608

}

24609

}

24610

24611

# something else..what did I forget?

24612

else {

24613

24614

# collecting diagnostics on unknown operator types..see what was missed

24615

$op_expected = UNKNOWN;

24616

write_diagnostics(

24617

"OP: unknown after type=$last_nonblank_type token=$last_nonblank_token\n"

24618

);

24619

}

24620

24621

TOKENIZER_DEBUG_FLAG_EXPECT && do {

24622

print

24623

"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";

24624

};

24625

return $op_expected;

24626

}

24627

24628

sub new_statement_ok {

24629

24630

# return true if the current token can start a new statement

24631

# USES GLOBAL VARIABLES: $last_nonblank_type

24632

24633

return label_ok() # a label would be ok here

24634

24635

|| $last_nonblank_type eq 'J'; # or we follow a label

24636

24637

}

24638

24639

sub label_ok {

24640

24641

# Decide if a bare word followed by a colon here is a label

24642

# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,

24643

# $brace_depth, @brace_type

24644

24645

# if it follows an opening or closing code block curly brace..

24646

if ( ( $last_nonblank_token eq '{' || $last_nonblank_token eq '}' )

24647

&& $last_nonblank_type eq $last_nonblank_token )

24648

{

24649

24650

# it is a label if and only if the curly encloses a code block

24651

return $brace_type[$brace_depth];

24652

}

24653

24654

# otherwise, it is a label if and only if it follows a ';'

24655

# (real or fake)

24656

else {

24657

return ( $last_nonblank_type eq ';' );

24658

}

24659

}

24660

24661

sub code_block_type {

24662

24663

# Decide if this is a block of code, and its type.

24664

# Must be called only when $type = $token = '{'

24665

# The problem is to distinguish between the start of a block of code

24666

# and the start of an anonymous hash reference

24667

# Returns "" if not code block, otherwise returns 'last_nonblank_token'

24668

# to indicate the type of code block. (For example, 'last_nonblank_token'

24669

# might be 'if' for an if block, 'else' for an else block, etc).

24670

# USES GLOBAL VARIABLES: $last_nonblank_token, $last_nonblank_type,

24671

# $last_nonblank_block_type, $brace_depth, @brace_type

24672

24673

# handle case of multiple '{'s

24674

24675

# print "BLOCK_TYPE EXAMINING: type=$last_nonblank_type tok=$last_nonblank_token\n";

24676

24677

my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;

24678

if ( $last_nonblank_token eq '{'

24679

&& $last_nonblank_type eq $last_nonblank_token )

24680

{

24681

24682

# opening brace where a statement may appear is probably

24683

# a code block but might be and anonymous hash reference

24684

if ( $brace_type[$brace_depth] ) {

24685

return decide_if_code_block( $i, $rtokens, $rtoken_type,

24686

$max_token_index );

24687

}

24688

24689

# cannot start a code block within an anonymous hash

24690

else {

24691

return "";

24692

}

24693

}

24694

24695

elsif ( $last_nonblank_token eq ';' ) {

24696

24697

# an opening brace where a statement may appear is probably

24698

# a code block but might be and anonymous hash reference

24699

return decide_if_code_block( $i, $rtokens, $rtoken_type,

24700

$max_token_index );

24701

}

24702

24703

# handle case of '}{'

24704

elsif ($last_nonblank_token eq '}'

24705

&& $last_nonblank_type eq $last_nonblank_token )

24706

{

24707

24708

# a } { situation ...

24709

# could be hash reference after code block..(blktype1.t)

24710

if ($last_nonblank_block_type) {

24711

return decide_if_code_block( $i, $rtokens, $rtoken_type,

24712

$max_token_index );

24713

}

24714

24715

# must be a block if it follows a closing hash reference

24716

else {

24717

return $last_nonblank_token;

24718

}

24719

}

24720

24721

# NOTE: braces after type characters start code blocks, but for

24722

# simplicity these are not identified as such. See also

24723

# sub is_non_structural_brace.

24724

# elsif ( $last_nonblank_type eq 't' ) {

24725

# return $last_nonblank_token;

24726

# }

24727

24728

# brace after label:

24729

elsif ( $last_nonblank_type eq 'J' ) {

24730

return $last_nonblank_token;

24731

}

24732

24733

# otherwise, look at previous token. This must be a code block if

24734

# it follows any of these:

24735

24736

elsif ( $is_code_block_token{$last_nonblank_token} ) {

24737

return $last_nonblank_token;

24738

}

24739

24740

# or a sub definition

24741

elsif ( ( $last_nonblank_type eq 'i' || $last_nonblank_type eq 't' )

24742

&& $last_nonblank_token =~ /^sub\b/ )

24743

{

24744

return $last_nonblank_token;

24745

}

24746

24747

# user-defined subs with block parameters (like grep/map/eval)

24748

elsif ( $last_nonblank_type eq 'G' ) {

24749

return $last_nonblank_token;

24750

}

24751

24752

# check bareword

24753

elsif ( $last_nonblank_type eq 'w' ) {

24754

return decide_if_code_block( $i, $rtokens, $rtoken_type,

24755

$max_token_index );

24756

}

24757

24758

# anything else must be anonymous hash reference

24759

else {

24760

return "";

24761

}

24762

}

24763

24764

sub decide_if_code_block {

24765

24766

# USES GLOBAL VARIABLES: $last_nonblank_token

24767

my ( $i, $rtokens, $rtoken_type, $max_token_index ) = @_;

24768

my ( $next_nonblank_token, $i_next ) =

24769

find_next_nonblank_token( $i, $rtokens, $max_token_index );

24770

24771

# we are at a '{' where a statement may appear.

24772

# We must decide if this brace starts an anonymous hash or a code

24773

# block.

24774

# return "" if anonymous hash, and $last_nonblank_token otherwise

24775

24776

# initialize to be code BLOCK

24777

my $code_block_type = $last_nonblank_token;

24778

24779

# Check for the common case of an empty anonymous hash reference:

24780

# Maybe something like sub { { } }

24781

if ( $next_nonblank_token eq '}' ) {

24782

$code_block_type = "";

24783

}

24784

24785

else {

24786

24787

# To guess if this '{' is an anonymous hash reference, look ahead

24788

# and test as follows:

24789

#

24790

# it is a hash reference if next come:

24791

# - a string or digit followed by a comma or =>

24792

# - bareword followed by =>

24793

# otherwise it is a code block

24794

#

24795

# Examples of anonymous hash ref:

24796

# {'aa',};

24797

# {1,2}

24798

#

24799

# Examples of code blocks:

24800

# {1; print "hello\n", 1;}

24801

# {$a,1};

24802

24803

# We are only going to look ahead one more (nonblank/comment) line.

24804

# Strange formatting could cause a bad guess, but that's unlikely.

24805

my @pre_types = @$rtoken_type[ $i + 1 .. $max_token_index ];

24806

my @pre_tokens = @$rtokens[ $i + 1 .. $max_token_index ];

24807

my ( $rpre_tokens, $rpre_types ) =

24808

peek_ahead_for_n_nonblank_pre_tokens(20); # 20 is arbitrary but

24809

# generous, and prevents

24810

# wasting lots of

24811

# time in mangled files

24812

if ( defined($rpre_types) && @$rpre_types ) {

24813

push @pre_types, @$rpre_types;

24814

push @pre_tokens, @$rpre_tokens;

24815

}

24816

24817

# put a sentinal token to simplify stopping the search

24818

push @pre_types, '}';

24819

24820

my $jbeg = 0;

24821

$jbeg = 1 if $pre_types[0] eq 'b';

24822

24823

# first look for one of these

24824

# - bareword

24825

# - bareword with leading -

24826

# - digit

24827

# - quoted string

24828

my $j = $jbeg;

24829

if ( $pre_types[$j] =~ /^[\'\"]/ ) {

24830

24831

# find the closing quote; don't worry about escapes

24832

my $quote_mark = $pre_types[$j];

24833

for ( my $k = $j + 1 ; $k < $#pre_types ; $k++ ) {

24834

if ( $pre_types[$k] eq $quote_mark ) {

24835

$j = $k + 1;

24836

my $next = $pre_types[$j];

24837

last;

24838

}

24839

}

24840

}

24841

elsif ( $pre_types[$j] eq 'd' ) {

24842

$j++;

24843

}

24844

elsif ( $pre_types[$j] eq 'w' ) {

24845

unless ( $is_keyword{ $pre_tokens[$j] } ) {

24846

$j++;

24847

}

24848

}

24849

elsif ( $pre_types[$j] eq '-' && $pre_types[ ++$j ] eq 'w' ) {

24850

$j++;

24851

}

24852

if ( $j > $jbeg ) {

24853

24854

$j++ if $pre_types[$j] eq 'b';

24855

24856

# it's a hash ref if a comma or => follow next

24857

if ( $pre_types[$j] eq ','

24858

|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) )

24859

{

24860

$code_block_type = "";

24861

}

24862

}

24863

}

24864

24865

return $code_block_type;

24866

}

24867

24868

sub unexpected {

24869

24870

# report unexpected token type and show where it is

24871

# USES GLOBAL VARIABLES: $tokenizer_self

24872

my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,

24873

$rpretoken_type, $input_line )

24874

= @_;

24875

24876

if ( ++$tokenizer_self->{_unexpected_error_count} <= MAX_NAG_MESSAGES ) {

24877

my $msg = "found $found where $expecting expected";

24878

my $pos = $$rpretoken_map[$i_tok];

24879

interrupt_logfile();

24880

my $input_line_number = $tokenizer_self->{_last_line_number};

24881

my ( $offset, $numbered_line, $underline ) =

24882

make_numbered_line( $input_line_number, $input_line, $pos );

24883

$underline = write_on_underline( $underline, $pos - $offset, '^' );

24884

24885

my $trailer = "";

24886

if ( ( $i_tok > 0 ) && ( $last_nonblank_i >= 0 ) ) {

24887

my $pos_prev = $$rpretoken_map[$last_nonblank_i];

24888

my $num;

24889

if ( $$rpretoken_type[ $i_tok - 1 ] eq 'b' ) {

24890

$num = $$rpretoken_map[ $i_tok - 1 ] - $pos_prev;

24891

}

24892

else {

24893

$num = $pos - $pos_prev;

24894

}

24895

if ( $num > 40 ) { $num = 40; $pos_prev = $pos - 40; }

24896

24897

$underline =

24898

write_on_underline( $underline, $pos_prev - $offset, '-' x $num );

24899

$trailer = " (previous token underlined)";

24900

}

24901

warning( $numbered_line . "\n" );

24902

warning( $underline . "\n" );

24903

warning( $msg . $trailer . "\n" );

24904

resume_logfile();

24905

}

24906

}

24907

24908

sub is_non_structural_brace {

24909

24910

# Decide if a brace or bracket is structural or non-structural

24911

# by looking at the previous token and type

24912

# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token

24913

24914

# EXPERIMENTAL: Mark slices as structural; idea was to improve formatting.

24915

# Tentatively deactivated because it caused the wrong operator expectation

24916

# for this code:

24917

# $user = @vars[1] / 100;

24918

# Must update sub operator_expected before re-implementing.

24919

# if ( $last_nonblank_type eq 'i' && $last_nonblank_token =~ /^@/ ) {

24920

# return 0;

24921

# }

24922

24923

# NOTE: braces after type characters start code blocks, but for

24924

# simplicity these are not identified as such. See also

24925

# sub code_block_type

24926

# if ($last_nonblank_type eq 't') {return 0}

24927

24928

# otherwise, it is non-structural if it is decorated

24929

# by type information.

24930

# For example, the '{' here is non-structural: ${xxx}

24931

(

24932

$last_nonblank_token =~ /^([\$\@\*\&\%\)]|->|::)/

24933

24934

# or if we follow a hash or array closing curly brace or bracket

24935

# For example, the second '{' in this is non-structural: $a{'x'}{'y'}

24936

# because the first '}' would have been given type 'R'

24937

|| $last_nonblank_type =~ /^([R\]])$/

24938

);

24939

}

24940

24941

#########i#############################################################

24942

# Tokenizer routines for tracking container nesting depths

24943

#######################################################################

24944

24945

# The following routines keep track of nesting depths of the nesting

24946

# types, ( [ { and ?. This is necessary for determining the indentation

24947

# level, and also for debugging programs. Not only do they keep track of

24948

# nesting depths of the individual brace types, but they check that each

24949

# of the other brace types is balanced within matching pairs. For

24950

# example, if the program sees this sequence:

24951

#

24952

# { ( ( ) }

24953

#

24954

# then it can determine that there is an extra left paren somewhere

24955

# between the { and the }. And so on with every other possible

24956

# combination of outer and inner brace types. For another

24957

# example:

24958

#

24959

# ( [ ..... ] ] )

24960

#

24961

# which has an extra ] within the parens.

24962

#

24963

# The brace types have indexes 0 .. 3 which are indexes into

24964

# the matrices.

24965

#

24966

# The pair ? : are treated as just another nesting type, with ? acting

24967

# as the opening brace and : acting as the closing brace.

24968

#

24969

# The matrix

24970

#

24971

# $depth_array[$a][$b][ $current_depth[$a] ] = $current_depth[$b];

24972

#

24973

# saves the nesting depth of brace type $b (where $b is either of the other

24974

# nesting types) when brace type $a enters a new depth. When this depth

24975

# decreases, a check is made that the current depth of brace types $b is

24976

# unchanged, or otherwise there must have been an error. This can

24977

# be very useful for localizing errors, particularly when perl runs to

24978

# the end of a large file (such as this one) and announces that there

24979

# is a problem somewhere.

24980

#

24981

# A numerical sequence number is maintained for every nesting type,

24982

# so that each matching pair can be uniquely identified in a simple

24983

# way.

24984

24985

sub increase_nesting_depth {

24986

my ( $aa, $pos ) = @_;

24987

24988

# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,

24989

# @current_sequence_number, @depth_array, @starting_line_of_current_depth

24990

my $bb;

24991

$current_depth[$aa]++;

24992

$total_depth++;

24993

$total_depth[$aa][ $current_depth[$aa] ] = $total_depth;

24994

my $input_line_number = $tokenizer_self->{_last_line_number};

24995

my $input_line = $tokenizer_self->{_line_text};

24996

24997

# Sequence numbers increment by number of items. This keeps

24998

# a unique set of numbers but still allows the relative location

24999

# of any type to be determined.

25000

$nesting_sequence_number[$aa] += scalar(@closing_brace_names);

25001

my $seqno = $nesting_sequence_number[$aa];

25002

$current_sequence_number[$aa][ $current_depth[$aa] ] = $seqno;

25003

25004

$starting_line_of_current_depth[$aa][ $current_depth[$aa] ] =

25005

[ $input_line_number, $input_line, $pos ];

25006

25007

for $bb ( 0 .. $#closing_brace_names ) {

25008

next if ( $bb == $aa );

25009

$depth_array[$aa][$bb][ $current_depth[$aa] ] = $current_depth[$bb];

25010

}

25011

25012

# set a flag for indenting a nested ternary statement

25013

my $indent = 0;

25014

if ( $aa == QUESTION_COLON ) {

25015

$nested_ternary_flag[ $current_depth[$aa] ] = 0;

25016

if ( $current_depth[$aa] > 1 ) {

25017

if ( $nested_ternary_flag[ $current_depth[$aa] - 1 ] == 0 ) {

25018

my $pdepth = $total_depth[$aa][ $current_depth[$aa] - 1 ];

25019

if ( $pdepth == $total_depth - 1 ) {

25020

$indent = 1;

25021

$nested_ternary_flag[ $current_depth[$aa] - 1 ] = -1;

25022

}

25023

}

25024

}

25025

}

25026

return ( $seqno, $indent );

25027

}

25028

25029

sub decrease_nesting_depth {

25030

25031

my ( $aa, $pos ) = @_;

25032

25033

# USES GLOBAL VARIABLES: $tokenizer_self, @current_depth,

25034

# @current_sequence_number, @depth_array, @starting_line_of_current_depth

25035

my $bb;

25036

my $seqno = 0;

25037

my $input_line_number = $tokenizer_self->{_last_line_number};

25038

my $input_line = $tokenizer_self->{_line_text};

25039

25040

my $outdent = 0;

25041

$total_depth--;

25042

if ( $current_depth[$aa] > 0 ) {

25043

25044

# set a flag for un-indenting after seeing a nested ternary statement

25045

$seqno = $current_sequence_number[$aa][ $current_depth[$aa] ];

25046

if ( $aa == QUESTION_COLON ) {

25047

$outdent = $nested_ternary_flag[ $current_depth[$aa] ];

25048

}

25049

25050

# check that any brace types $bb contained within are balanced

25051

for $bb ( 0 .. $#closing_brace_names ) {

25052

next if ( $bb == $aa );

25053

25054

unless ( $depth_array[$aa][$bb][ $current_depth[$aa] ] ==

25055

$current_depth[$bb] )

25056

{

25057

my $diff =

25058

$current_depth[$bb] -

25059

$depth_array[$aa][$bb][ $current_depth[$aa] ];

25060

25061

# don't whine too many times

25062

my $saw_brace_error = get_saw_brace_error();

25063

if (

25064

$saw_brace_error <= MAX_NAG_MESSAGES

25065

25066

# if too many closing types have occured, we probably

25067

# already caught this error

25068

&& ( ( $diff > 0 ) || ( $saw_brace_error <= 0 ) )

25069

)

25070

{

25071

interrupt_logfile();

25072

my $rsl =

25073

$starting_line_of_current_depth[$aa]

25074

[ $current_depth[$aa] ];

25075

my $sl = $$rsl[0];

25076

my $rel = [ $input_line_number, $input_line, $pos ];

25077

my $el = $$rel[0];

25078

my ($ess);

25079

25080

if ( $diff == 1 || $diff == -1 ) {

25081

$ess = '';

25082

}

25083

else {

25084

$ess = 's';

25085

}

25086

my $bname =

25087

( $diff > 0 )

25088

? $opening_brace_names[$bb]

25089

: $closing_brace_names[$bb];

25090

write_error_indicator_pair( @$rsl, '^' );

25091

my $msg = <<"EOM";

25092

Found $diff extra $bname$ess between $opening_brace_names[$aa] on line $sl and $closing_brace_names[$aa] on line $el

25093

EOM

25094

25095

if ( $diff > 0 ) {

25096

my $rml =

25097

$starting_line_of_current_depth[$bb]

25098

[ $current_depth[$bb] ];

25099

my $ml = $$rml[0];

25100

$msg .=

25101

" The most recent un-matched $bname is on line $ml\n";

25102

write_error_indicator_pair( @$rml, '^' );

25103

}

25104

write_error_indicator_pair( @$rel, '^' );

25105

warning($msg);

25106

resume_logfile();

25107

}

25108

increment_brace_error();

25109

}

25110

}

25111

$current_depth[$aa]--;

25112

}

25113

else {

25114

25115

my $saw_brace_error = get_saw_brace_error();

25116

if ( $saw_brace_error <= MAX_NAG_MESSAGES ) {

25117

my $msg = <<"EOM";

25118

There is no previous $opening_brace_names[$aa] to match a $closing_brace_names[$aa] on line $input_line_number

25119

EOM

25120

indicate_error( $msg, $input_line_number, $input_line, $pos, '^' );

25121

}

25122

increment_brace_error();

25123

}

25124

return ( $seqno, $outdent );

25125

}

25126

25127

sub check_final_nesting_depths {

25128

my ($aa);

25129

25130

# USES GLOBAL VARIABLES: @current_depth, @starting_line_of_current_depth

25131

25132

for $aa ( 0 .. $#closing_brace_names ) {

25133

25134

if ( $current_depth[$aa] ) {

25135

my $rsl =

25136

$starting_line_of_current_depth[$aa][ $current_depth[$aa] ];

25137

my $sl = $$rsl[0];

25138

my $msg = <<"EOM";

25139

Final nesting depth of $opening_brace_names[$aa]s is $current_depth[$aa]

25140

The most recent un-matched $opening_brace_names[$aa] is on line $sl

25141

EOM

25142

indicate_error( $msg, @$rsl, '^' );

25143

increment_brace_error();

25144

}

25145

}

25146

}

25147

25148

#########i#############################################################

25149

# Tokenizer routines for looking ahead in input stream

25150

#######################################################################

25151

25152

sub peek_ahead_for_n_nonblank_pre_tokens {

25153

25154

# returns next n pretokens if they exist

25155

# returns undef's if hits eof without seeing any pretokens

25156

# USES GLOBAL VARIABLES: $tokenizer_self

25157

my $max_pretokens = shift;

25158

my $line;

25159

my $i = 0;

25160

my ( $rpre_tokens, $rmap, $rpre_types );

25161

25162

while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )

25163

{

25164

$line =~ s/^\s*//; # trim leading blanks

25165

next if ( length($line) <= 0 ); # skip blank

25166

next if ( $line =~ /^#/ ); # skip comment

25167

( $rpre_tokens, $rmap, $rpre_types ) =

25168

pre_tokenize( $line, $max_pretokens );

25169

last;

25170

}

25171

return ( $rpre_tokens, $rpre_types );

25172

}

25173

25174

# look ahead for next non-blank, non-comment line of code

25175

sub peek_ahead_for_nonblank_token {

25176

25177

# USES GLOBAL VARIABLES: $tokenizer_self

25178

my ( $rtokens, $max_token_index ) = @_;

25179

my $line;

25180

my $i = 0;

25181

25182

while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $i++ ) )

25183

{

25184

$line =~ s/^\s*//; # trim leading blanks

25185

next if ( length($line) <= 0 ); # skip blank

25186

next if ( $line =~ /^#/ ); # skip comment

25187

my ( $rtok, $rmap, $rtype ) =

25188

pre_tokenize( $line, 2 ); # only need 2 pre-tokens

25189

my $j = $max_token_index + 1;

25190

my $tok;

25191

25192

foreach $tok (@$rtok) {

25193

last if ( $tok =~ "\n" );

25194

$$rtokens[ ++$j ] = $tok;

25195

}

25196

last;

25197

}

25198

return $rtokens;

25199

}

25200

25201

#########i#############################################################

25202

# Tokenizer guessing routines for ambiguous situations

25203

#######################################################################

25204

25205

sub guess_if_pattern_or_conditional {

25206

25207

# this routine is called when we have encountered a ? following an

25208

# unknown bareword, and we must decide if it starts a pattern or not

25209

# input parameters:

25210

# $i - token index of the ? starting possible pattern

25211

# output parameters:

25212

# $is_pattern = 0 if probably not pattern, =1 if probably a pattern

25213

# msg = a warning or diagnostic message

25214

# USES GLOBAL VARIABLES: $last_nonblank_token

25215

my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;

25216

my $is_pattern = 0;

25217

my $msg = "guessing that ? after $last_nonblank_token starts a ";

25218

25219

if ( $i >= $max_token_index ) {

25220

$msg .= "conditional (no end to pattern found on the line)\n";

25221

}

25222

else {

25223

my $ibeg = $i;

25224

$i = $ibeg + 1;

25225

my $next_token = $$rtokens[$i]; # first token after ?

25226

25227

# look for a possible ending ? on this line..

25228

my $in_quote = 1;

25229

my $quote_depth = 0;

25230

my $quote_character = '';

25231

my $quote_pos = 0;

25232

my $quoted_string;

25233

(

25234

$i, $in_quote, $quote_character, $quote_pos, $quote_depth,

25235

$quoted_string

25236

)

25237

= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,

25238

$quote_pos, $quote_depth, $max_token_index );

25239

25240

if ($in_quote) {

25241

25242

# we didn't find an ending ? on this line,

25243

# so we bias towards conditional

25244

$is_pattern = 0;

25245

$msg .= "conditional (no ending ? on this line)\n";

25246

25247

# we found an ending ?, so we bias towards a pattern

25248

}

25249

else {

25250

25251

if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {

25252

$is_pattern = 1;

25253

$msg .= "pattern (found ending ? and pattern expected)\n";

25254

}

25255

else {

25256

$msg .= "pattern (uncertain, but found ending ?)\n";

25257

}

25258

}

25259

}

25260

return ( $is_pattern, $msg );

25261

}

25262

25263

sub guess_if_pattern_or_division {

25264

25265

# this routine is called when we have encountered a / following an

25266

# unknown bareword, and we must decide if it starts a pattern or is a

25267

# division

25268

# input parameters:

25269

# $i - token index of the / starting possible pattern

25270

# output parameters:

25271

# $is_pattern = 0 if probably division, =1 if probably a pattern

25272

# msg = a warning or diagnostic message

25273

# USES GLOBAL VARIABLES: $last_nonblank_token

25274

my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;

25275

my $is_pattern = 0;

25276

my $msg = "guessing that / after $last_nonblank_token starts a ";

25277

25278

if ( $i >= $max_token_index ) {

25279

"division (no end to pattern found on the line)\n";

25280

}

25281

else {

25282

my $ibeg = $i;

25283

my $divide_expected =

25284

numerator_expected( $i, $rtokens, $max_token_index );

25285

$i = $ibeg + 1;

25286

my $next_token = $$rtokens[$i]; # first token after slash

25287

25288

# look for a possible ending / on this line..

25289

my $in_quote = 1;

25290

my $quote_depth = 0;

25291

my $quote_character = '';

25292

my $quote_pos = 0;

25293

my $quoted_string;

25294

(

25295

$i, $in_quote, $quote_character, $quote_pos, $quote_depth,

25296

$quoted_string

25297

)

25298

= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,

25299

$quote_pos, $quote_depth, $max_token_index );

25300

25301

if ($in_quote) {

25302

25303

# we didn't find an ending / on this line,

25304

# so we bias towards division

25305

if ( $divide_expected >= 0 ) {

25306

$is_pattern = 0;

25307

$msg .= "division (no ending / on this line)\n";

25308

}

25309

else {

25310

$msg = "multi-line pattern (division not possible)\n";

25311

$is_pattern = 1;

25312

}

25313

25314

}

25315

25316

# we found an ending /, so we bias towards a pattern

25317

else {

25318

25319

if ( pattern_expected( $i, $rtokens, $max_token_index ) >= 0 ) {

25320

25321

if ( $divide_expected >= 0 ) {

25322

25323

if ( $i - $ibeg > 60 ) {

25324

$msg .= "division (matching / too distant)\n";

25325

$is_pattern = 0;

25326

}

25327

else {

25328

$msg .= "pattern (but division possible too)\n";

25329

$is_pattern = 1;

25330

}

25331

}

25332

else {

25333

$is_pattern = 1;

25334

$msg .= "pattern (division not possible)\n";

25335

}

25336

}

25337

else {

25338

25339

if ( $divide_expected >= 0 ) {

25340

$is_pattern = 0;

25341

$msg .= "division (pattern not possible)\n";

25342

}

25343

else {

25344

$is_pattern = 1;

25345

$msg .=

25346

"pattern (uncertain, but division would not work here)\n";

25347

}

25348

}

25349

}

25350

}

25351

return ( $is_pattern, $msg );

25352

}

25353

25354

# try to resolve here-doc vs. shift by looking ahead for

25355

# non-code or the end token (currently only looks for end token)

25356

# returns 1 if it is probably a here doc, 0 if not

25357

sub guess_if_here_doc {

25358

25359

# This is how many lines we will search for a target as part of the

25360

# guessing strategy. It is a constant because there is probably

25361

# little reason to change it.

25362

# USES GLOBAL VARIABLES: $tokenizer_self, $current_package

25363

# %is_constant,

25364

use constant HERE_DOC_WINDOW => 40;

25365

25366

my $next_token = shift;

25367

my $here_doc_expected = 0;

25368

my $line;

25369

my $k = 0;

25370

my $msg = "checking <<";

25371

25372

while ( $line = $tokenizer_self->{_line_buffer_object}->peek_ahead( $k++ ) )

25373

{

25374

chomp $line;

25375

25376

if ( $line =~ /^$next_token$/ ) {

25377

$msg .= " -- found target $next_token ahead $k lines\n";

25378

$here_doc_expected = 1; # got it

25379

last;

25380

}

25381

last if ( $k >= HERE_DOC_WINDOW );

25382

}

25383

25384

unless ($here_doc_expected) {

25385

25386

if ( !defined($line) ) {

25387

$here_doc_expected = -1; # hit eof without seeing target

25388

$msg .= " -- must be shift; target $next_token not in file\n";

25389

25390

}

25391

else { # still unsure..taking a wild guess

25392

25393

if ( !$is_constant{$current_package}{$next_token} ) {

25394

$here_doc_expected = 1;

25395

$msg .=

25396

" -- guessing it's a here-doc ($next_token not a constant)\n";

25397

}

25398

else {

25399

$msg .=

25400

" -- guessing it's a shift ($next_token is a constant)\n";

25401

}

25402

}

25403

}

25404

write_logfile_entry($msg);

25405

return $here_doc_expected;

25406

}

25407

25408

#########i#############################################################

25409

# Tokenizer Routines for scanning identifiers and related items

25410

#######################################################################

25411

25412

sub scan_bare_identifier_do {

25413

25414

# this routine is called to scan a token starting with an alphanumeric

25415

# variable or package separator, :: or '.

25416

# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token,

25417

# $last_nonblank_type,@paren_type, $paren_depth

25418

25419

my ( $input_line, $i, $tok, $type, $prototype, $rtoken_map,

25420

$max_token_index )

25421

= @_;

25422

my $i_begin = $i;

25423

my $package = undef;

25424

25425

my $i_beg = $i;

25426

25427

# we have to back up one pretoken at a :: since each : is one pretoken

25428

if ( $tok eq '::' ) { $i_beg-- }

25429

if ( $tok eq '->' ) { $i_beg-- }

25430

my $pos_beg = $$rtoken_map[$i_beg];

25431

pos($input_line) = $pos_beg;

25432

25433

# Examples:

25434

# A::B::C

25435

# A::

25436

# ::A

25437

# A'B

25438

if ( $input_line =~ m/\G\s*((?:\w*(?:'|::)))*(?:(?:->)?(\w+))?/gc ) {

25439

25440

my $pos = pos($input_line);

25441

my $numc = $pos - $pos_beg;

25442

$tok = substr( $input_line, $pos_beg, $numc );

25443

25444

# type 'w' includes anything without leading type info

25445

# ($,%,@,*) including something like abc::def::ghi

25446

$type = 'w';

25447

25448

my $sub_name = "";

25449

if ( defined($2) ) { $sub_name = $2; }

25450

if ( defined($1) ) {

25451

$package = $1;

25452

25453

# patch: don't allow isolated package name which just ends

25454

# in the old style package separator (single quote). Example:

25455

# use CGI':all';

25456

if ( !($sub_name) && substr( $package, -1, 1 ) eq '\'' ) {

25457

$pos--;

25458

}

25459

25460

$package =~ s/\'/::/g;

25461

if ( $package =~ /^\:/ ) { $package = 'main' . $package }

25462

$package =~ s/::$//;

25463

}

25464

else {

25465

$package = $current_package;

25466

25467

if ( $is_keyword{$tok} ) {

25468

$type = 'k';

25469

}

25470

}

25471

25472

# if it is a bareword..

25473

if ( $type eq 'w' ) {

25474

25475

# check for v-string with leading 'v' type character

25476

# (This seems to have presidence over filehandle, type 'Y')

25477

if ( $tok =~ /^v\d[_\d]*$/ ) {

25478

25479

# we only have the first part - something like 'v101' -

25480

# look for more

25481

if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {

25482

$pos = pos($input_line);

25483

$numc = $pos - $pos_beg;

25484

$tok = substr( $input_line, $pos_beg, $numc );

25485

}

25486

$type = 'v';

25487

25488

# warn if this version can't handle v-strings

25489

report_v_string($tok);

25490

}

25491

25492

elsif ( $is_constant{$package}{$sub_name} ) {

25493

$type = 'C';

25494

}

25495

25496

# bareword after sort has implied empty prototype; for example:

25497

# @sorted = sort numerically ( 53, 29, 11, 32, 7 );

25498

# This has priority over whatever the user has specified.

25499

elsif ($last_nonblank_token eq 'sort'

25500

&& $last_nonblank_type eq 'k' )

25501

{

25502

$type = 'Z';

25503

}

25504

25505

# Note: strangely, perl does not seem to really let you create

25506

# functions which act like eval and do, in the sense that eval

25507

# and do may have operators following the final }, but any operators

25508

# that you create with prototype (&) apparently do not allow

25509

# trailing operators, only terms. This seems strange.

25510

# If this ever changes, here is the update

25511

# to make perltidy behave accordingly:

25512

25513

# elsif ( $is_block_function{$package}{$tok} ) {

25514

# $tok='eval'; # patch to do braces like eval - doesn't work

25515

# $type = 'k';

25516

#}

25517

# FIXME: This could become a separate type to allow for different

25518

# future behavior:

25519

elsif ( $is_block_function{$package}{$sub_name} ) {

25520

$type = 'G';

25521

}

25522

25523

elsif ( $is_block_list_function{$package}{$sub_name} ) {

25524

$type = 'G';

25525

}

25526

elsif ( $is_user_function{$package}{$sub_name} ) {

25527

$type = 'U';

25528

$prototype = $user_function_prototype{$package}{$sub_name};

25529

}

25530

25531

# check for indirect object

25532

elsif (

25533

25534

# added 2001-03-27: must not be followed immediately by '('

25535

# see fhandle.t

25536

( $input_line !~ m/\G\(/gc )

25537

25538

# and

25539

&& (

25540

25541

# preceded by keyword like 'print', 'printf' and friends

25542

$is_indirect_object_taker{$last_nonblank_token}

25543

25544

# or preceded by something like 'print(' or 'printf('

25545

|| (

25546

( $last_nonblank_token eq '(' )

25547

&& $is_indirect_object_taker{ $paren_type[$paren_depth]

25548

}

25549

25550

)

25551

)

25552

)

25553

{

25554

25555

# may not be indirect object unless followed by a space

25556

if ( $input_line =~ m/\G\s+/gc ) {

25557

$type = 'Y';

25558

25559

# Abandon Hope ...

25560

# Perl's indirect object notation is a very bad

25561

# thing and can cause subtle bugs, especially for

25562

# beginning programmers. And I haven't even been

25563

# able to figure out a sane warning scheme which

25564

# doesn't get in the way of good scripts.

25565

25566

# Complain if a filehandle has any lower case

25567

# letters. This is suggested good practice.

25568

# Use 'sub_name' because something like

25569

# main::MYHANDLE is ok for filehandle

25570

if ( $sub_name =~ /[a-z]/ ) {

25571

25572

# could be bug caused by older perltidy if

25573

# followed by '('

25574

if ( $input_line =~ m/\G\s*\(/gc ) {

25575

complain(

25576

"Caution: unknown word '$tok' in indirect object slot\n"

25577

);

25578

}

25579

}

25580

}

25581

25582

# bareword not followed by a space -- may not be filehandle

25583

# (may be function call defined in a 'use' statement)

25584

else {

25585

$type = 'Z';

25586

}

25587

}

25588

}

25589

25590

# Now we must convert back from character position

25591

# to pre_token index.

25592

# I don't think an error flag can occur here ..but who knows

25593

my $error;

25594

( $i, $error ) =

25595

inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );

25596

if ($error) {

25597

warning("scan_bare_identifier: Possibly invalid tokenization\n");

25598

}

25599

}

25600

25601

# no match but line not blank - could be syntax error

25602

# perl will take '::' alone without complaint

25603

else {

25604

$type = 'w';

25605

25606

# change this warning to log message if it becomes annoying

25607

warning("didn't find identifier after leading ::\n");

25608

}

25609

return ( $i, $tok, $type, $prototype );

25610

}

25611

25612

sub scan_id_do {

25613

25614

# This is the new scanner and will eventually replace scan_identifier.

25615

# Only type 'sub' and 'package' are implemented.

25616

# Token types $ * % @ & -> are not yet implemented.

25617

#

25618

# Scan identifier following a type token.

25619

# The type of call depends on $id_scan_state: $id_scan_state = ''

25620

# for starting call, in which case $tok must be the token defining

25621

# the type.

25622

#

25623

# If the type token is the last nonblank token on the line, a value

25624

# of $id_scan_state = $tok is returned, indicating that further

25625

# calls must be made to get the identifier. If the type token is

25626

# not the last nonblank token on the line, the identifier is

25627

# scanned and handled and a value of '' is returned.

25628

# USES GLOBAL VARIABLES: $current_package, $last_nonblank_token, $in_attribute_list,

25629

# $statement_type, $tokenizer_self

25630

25631

my ( $input_line, $i, $tok, $rtokens, $rtoken_map, $id_scan_state,

25632

$max_token_index )

25633

= @_;

25634

my $type = '';

25635

my ( $i_beg, $pos_beg );

25636

25637

#print "NSCAN:entering i=$i, tok=$tok, type=$type, state=$id_scan_state\n";

25638

#my ($a,$b,$c) = caller;

25639

#print "NSCAN: scan_id called with tok=$tok $a $b $c\n";

25640

25641

# on re-entry, start scanning at first token on the line

25642

if ($id_scan_state) {

25643

$i_beg = $i;

25644

$type = '';

25645

}

25646

25647

# on initial entry, start scanning just after type token

25648

else {

25649

$i_beg = $i + 1;

25650

$id_scan_state = $tok;

25651

$type = 't';

25652

}

25653

25654

# find $i_beg = index of next nonblank token,

25655

# and handle empty lines

25656

my $blank_line = 0;

25657

my $next_nonblank_token = $$rtokens[$i_beg];

25658

if ( $i_beg > $max_token_index ) {

25659

$blank_line = 1;

25660

}

25661

else {

25662

25663

# only a '#' immediately after a '$' is not a comment

25664

if ( $next_nonblank_token eq '#' ) {

25665

unless ( $tok eq '$' ) {

25666

$blank_line = 1;

25667

}

25668

}

25669

25670

if ( $next_nonblank_token =~ /^\s/ ) {

25671

( $next_nonblank_token, $i_beg ) =

25672

find_next_nonblank_token_on_this_line( $i_beg, $rtokens,

25673

$max_token_index );

25674

if ( $next_nonblank_token =~ /(^#|^\s*$)/ ) {

25675

$blank_line = 1;

25676

}

25677

}

25678

}

25679

25680

# handle non-blank line; identifier, if any, must follow

25681

unless ($blank_line) {

25682

25683

if ( $id_scan_state eq 'sub' ) {

25684

( $i, $tok, $type, $id_scan_state ) = do_scan_sub(

25685

$input_line, $i, $i_beg,

25686

$tok, $type, $rtokens,

25687

$rtoken_map, $id_scan_state, $max_token_index

25688

);

25689

}

25690

25691

elsif ( $id_scan_state eq 'package' ) {

25692

( $i, $tok, $type ) =

25693

do_scan_package( $input_line, $i, $i_beg, $tok, $type, $rtokens,

25694

$rtoken_map, $max_token_index );

25695

$id_scan_state = '';

25696

}

25697

25698

else {

25699

warning("invalid token in scan_id: $tok\n");

25700

$id_scan_state = '';

25701

}

25702

}

25703

25704

if ( $id_scan_state && ( !defined($type) || !$type ) ) {

25705

25706

# shouldn't happen:

25707

warning(

25708

"Program bug in scan_id: undefined type but scan_state=$id_scan_state\n"

25709

);

25710

report_definite_bug();

25711

}

25712

25713

TOKENIZER_DEBUG_FLAG_NSCAN && do {

25714

print

25715

"NSCAN: returns i=$i, tok=$tok, type=$type, state=$id_scan_state\n";

25716

};

25717

return ( $i, $tok, $type, $id_scan_state );

25718

}

25719

25720

sub check_prototype {

25721

my ( $proto, $package, $subname ) = @_;

25722

return unless ( defined($package) && defined($subname) );

25723

if ( defined($proto) ) {

25724

$proto =~ s/^\s*\(\s*//;

25725

$proto =~ s/\s*\)$//;

25726

if ($proto) {

25727

$is_user_function{$package}{$subname} = 1;

25728

$user_function_prototype{$package}{$subname} = "($proto)";

25729

25730

# prototypes containing '&' must be treated specially..

25731

if ( $proto =~ /\&/ ) {

25732

25733

# right curly braces of prototypes ending in

25734

# '&' may be followed by an operator

25735

if ( $proto =~ /\&$/ ) {

25736

$is_block_function{$package}{$subname} = 1;

25737

}

25738

25739

# right curly braces of prototypes NOT ending in

25740

# '&' may NOT be followed by an operator

25741

elsif ( $proto !~ /\&$/ ) {

25742

$is_block_list_function{$package}{$subname} = 1;

25743

}

25744

}

25745

}

25746

else {

25747

$is_constant{$package}{$subname} = 1;

25748

}

25749

}

25750

else {

25751

$is_user_function{$package}{$subname} = 1;

25752

}

25753

}

25754

25755

sub do_scan_package {

25756

25757

# do_scan_package parses a package name

25758

# it is called with $i_beg equal to the index of the first nonblank

25759

# token following a 'package' token.

25760

# USES GLOBAL VARIABLES: $current_package,

25761

25762

my ( $input_line, $i, $i_beg, $tok, $type, $rtokens, $rtoken_map,

25763

$max_token_index )

25764

= @_;

25765

my $package = undef;

25766

my $pos_beg = $$rtoken_map[$i_beg];

25767

pos($input_line) = $pos_beg;

25768

25769

# handle non-blank line; package name, if any, must follow

25770

if ( $input_line =~ m/\G\s*((?:\w*(?:'|::))*\w+)/gc ) {

25771

$package = $1;

25772

$package = ( defined($1) && $1 ) ? $1 : 'main';

25773

$package =~ s/\'/::/g;

25774

if ( $package =~ /^\:/ ) { $package = 'main' . $package }

25775

$package =~ s/::$//;

25776

my $pos = pos($input_line);

25777

my $numc = $pos - $pos_beg;

25778

$tok = 'package ' . substr( $input_line, $pos_beg, $numc );

25779

$type = 'i';

25780

25781

# Now we must convert back from character position

25782

# to pre_token index.

25783

# I don't think an error flag can occur here ..but ?

25784

my $error;

25785

( $i, $error ) =

25786

inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );

25787

if ($error) { warning("Possibly invalid package\n") }

25788

$current_package = $package;

25789

25790

# check for error

25791

my ( $next_nonblank_token, $i_next ) =

25792

find_next_nonblank_token( $i, $rtokens, $max_token_index );

25793

if ( $next_nonblank_token !~ /^[;\}]$/ ) {

25794

warning(

25795

"Unexpected '$next_nonblank_token' after package name '$tok'\n"

25796

);

25797

}

25798

}

25799

25800

# no match but line not blank --

25801

# could be a label with name package, like package: , for example.

25802

else {

25803

$type = 'k';

25804

}

25805

25806

return ( $i, $tok, $type );

25807

}

25808

25809

sub scan_identifier_do {

25810

25811

# This routine assembles tokens into identifiers. It maintains a

25812

# scan state, id_scan_state. It updates id_scan_state based upon

25813

# current id_scan_state and token, and returns an updated

25814

# id_scan_state and the next index after the identifier.

25815

# USES GLOBAL VARIABLES: $context, $last_nonblank_token,

25816

# $last_nonblank_type

25817

25818

my ( $i, $id_scan_state, $identifier, $rtokens, $max_token_index,

25819

$expecting )

25820

= @_;

25821

my $i_begin = $i;

25822

my $type = '';

25823

my $tok_begin = $$rtokens[$i_begin];

25824

if ( $tok_begin eq ':' ) { $tok_begin = '::' }

25825

my $id_scan_state_begin = $id_scan_state;

25826

my $identifier_begin = $identifier;

25827

my $tok = $tok_begin;

25828

my $message = "";

25829

25830

# these flags will be used to help figure out the type:

25831

my $saw_alpha = ( $tok =~ /^[A-Za-z_]/ );

25832

my $saw_type;

25833

25834

# allow old package separator (') except in 'use' statement

25835

my $allow_tick = ( $last_nonblank_token ne 'use' );

25836

25837

# get started by defining a type and a state if necessary

25838

unless ($id_scan_state) {

25839

$context = UNKNOWN_CONTEXT;

25840

25841

# fixup for digraph

25842

if ( $tok eq '>' ) {

25843

$tok = '->';

25844

$tok_begin = $tok;

25845

}

25846

$identifier = $tok;

25847

25848

if ( $tok eq '$' || $tok eq '*' ) {

25849

$id_scan_state = '$';

25850

$context = SCALAR_CONTEXT;

25851

}

25852

elsif ( $tok eq '%' || $tok eq '@' ) {

25853

$id_scan_state = '$';

25854

$context = LIST_CONTEXT;

25855

}

25856

elsif ( $tok eq '&' ) {

25857

$id_scan_state = '&';

25858

}

25859

elsif ( $tok eq 'sub' or $tok eq 'package' ) {

25860

$saw_alpha = 0; # 'sub' is considered type info here

25861

$id_scan_state = '$';

25862

$identifier .= ' '; # need a space to separate sub from sub name

25863

}

25864

elsif ( $tok eq '::' ) {

25865

$id_scan_state = 'A';

25866

}

25867

elsif ( $tok =~ /^[A-Za-z_]/ ) {

25868

$id_scan_state = ':';

25869

}

25870

elsif ( $tok eq '->' ) {

25871

$id_scan_state = '$';

25872

}

25873

else {

25874

25875

# shouldn't happen

25876

my ( $a, $b, $c ) = caller;

25877

warning("Program Bug: scan_identifier given bad token = $tok \n");

25878

warning(" called from sub $a line: $c\n");

25879

report_definite_bug();

25880

}

25881

$saw_type = !$saw_alpha;

25882

}

25883

else {

25884

$i--;

25885

$saw_type = ( $tok =~ /([\$\%\@\*\&])/ );

25886

}

25887

25888

# now loop to gather the identifier

25889

my $i_save = $i;

25890

25891

while ( $i < $max_token_index ) {

25892

$i_save = $i unless ( $tok =~ /^\s*$/ );

25893

$tok = $$rtokens[ ++$i ];

25894

25895

if ( ( $tok eq ':' ) && ( $$rtokens[ $i + 1 ] eq ':' ) ) {

25896

$tok = '::';

25897

$i++;

25898

}

25899

25900

if ( $id_scan_state eq '$' ) { # starting variable name

25901

25902

if ( $tok eq '$' ) {

25903

25904

$identifier .= $tok;

25905

25906

# we've got a punctuation variable if end of line (punct.t)

25907

if ( $i == $max_token_index ) {

25908

$type = 'i';

25909

$id_scan_state = '';

25910

last;

25911

}

25912

}

25913

elsif ( $tok =~ /^[A-Za-z_]/ ) { # alphanumeric ..

25914

$saw_alpha = 1;

25915

$id_scan_state = ':'; # now need ::

25916

$identifier .= $tok;

25917

}

25918

elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..

25919

$saw_alpha = 1;

25920

$id_scan_state = ':'; # now need ::

25921

$identifier .= $tok;

25922

25923

# Perl will accept leading digits in identifiers,

25924

# although they may not always produce useful results.

25925

# Something like $main::0 is ok. But this also works:

25926

#

25927

# sub howdy::123::bubba{ print "bubba $54321!\n" }

25928

# howdy::123::bubba();

25929

#

25930

}

25931

elsif ( $tok =~ /^[0-9]/ ) { # numeric

25932

$saw_alpha = 1;

25933

$id_scan_state = ':'; # now need ::

25934

$identifier .= $tok;

25935

}

25936

elsif ( $tok eq '::' ) {

25937

$id_scan_state = 'A';

25938

$identifier .= $tok;

25939

}

25940

elsif ( ( $tok eq '#' ) && ( $identifier eq '$' ) ) { # $#array

25941

$identifier .= $tok; # keep same state, a $ could follow

25942

}

25943

elsif ( $tok eq '{' ) {

25944

25945

# check for something like ${#} or ${�}

25946

if ( $identifier eq '$'

25947

&& $i + 2 <= $max_token_index

25948

&& $$rtokens[ $i + 2 ] eq '}'

25949

&& $$rtokens[ $i + 1 ] !~ /[\s\w]/ )

25950

{

25951

my $next2 = $$rtokens[ $i + 2 ];

25952

my $next1 = $$rtokens[ $i + 1 ];

25953

$identifier .= $tok . $next1 . $next2;

25954

$i += 2;

25955

$id_scan_state = '';

25956

last;

25957

}

25958

25959

# skip something like ${xxx} or ->{

25960

$id_scan_state = '';

25961

25962

# if this is the first token of a line, any tokens for this

25963

# identifier have already been accumulated

25964

if ( $identifier eq '$' || $i == 0 ) { $identifier = ''; }

25965

$i = $i_save;

25966

last;

25967

}

25968

25969

# space ok after leading $ % * & @

25970

elsif ( $tok =~ /^\s*$/ ) {

25971

25972

if ( $identifier =~ /^[\$\%\*\&\@]/ ) {

25973

25974

if ( length($identifier) > 1 ) {

25975

$id_scan_state = '';

25976

$i = $i_save;

25977

$type = 'i'; # probably punctuation variable

25978

last;

25979

}

25980

else {

25981

25982

# spaces after $'s are common, and space after @

25983

# is harmless, so only complain about space

25984

# after other type characters. Space after $ and

25985

# @ will be removed in formatting. Report space

25986

# after % and * because they might indicate a

25987

# parsing error. In other words '% ' might be a

25988

# modulo operator. Delete this warning if it

25989

# gets annoying.

25990

if ( $identifier !~ /^[\@\$]$/ ) {

25991

$message =

25992

"Space in identifier, following $identifier\n";

25993

}

25994

}

25995

}

25996

25997

# else:

25998

# space after '->' is ok

25999

}

26000

elsif ( $tok eq '^' ) {

26001

26002

# check for some special variables like $^W

26003

if ( $identifier =~ /^[\$\*\@\%]$/ ) {

26004

$identifier .= $tok;

26005

$id_scan_state = 'A';

26006

26007

# Perl accepts '$^]' or '@^]', but

26008

# there must not be a space before the ']'.

26009

my $next1 = $$rtokens[ $i + 1 ];

26010

if ( $next1 eq ']' ) {

26011

$i++;

26012

$identifier .= $next1;

26013

$id_scan_state = "";

26014

last;

26015

}

26016

}

26017

else {

26018

$id_scan_state = '';

26019

}

26020

}

26021

else { # something else

26022

26023

# check for various punctuation variables

26024

if ( $identifier =~ /^[\$\*\@\%]$/ ) {

26025

$identifier .= $tok;

26026

}

26027

26028

elsif ( $identifier eq '$#' ) {

26029

26030

if ( $tok eq '{' ) { $type = 'i'; $i = $i_save }

26031

26032

# perl seems to allow just these: $#: $#- $#+

26033

elsif ( $tok =~ /^[\:\-\+]$/ ) {

26034

$type = 'i';

26035

$identifier .= $tok;

26036

}

26037

else {

26038

$i = $i_save;

26039

write_logfile_entry( 'Use of $# is deprecated' . "\n" );

26040

}

26041

}

26042

elsif ( $identifier eq '$$' ) {

26043

26044

# perl does not allow references to punctuation

26045

# variables without braces. For example, this

26046

# won't work:

26047

# $:=\4;

26048

# $a = $$:;

26049

# You would have to use

26050

# $a = ${$:};

26051

26052

$i = $i_save;

26053

if ( $tok eq '{' ) { $type = 't' }

26054

else { $type = 'i' }

26055

}

26056

elsif ( $identifier eq '->' ) {

26057

$i = $i_save;

26058

}

26059

else {

26060

$i = $i_save;

26061

if ( length($identifier) == 1 ) { $identifier = ''; }

26062

}

26063

$id_scan_state = '';

26064

last;

26065

}

26066

}

26067

elsif ( $id_scan_state eq '&' ) { # starting sub call?

26068

26069

if ( $tok =~ /^[\$A-Za-z_]/ ) { # alphanumeric ..

26070

$id_scan_state = ':'; # now need ::

26071

$saw_alpha = 1;

26072

$identifier .= $tok;

26073

}

26074

elsif ( $tok eq "'" && $allow_tick ) { # alphanumeric ..

26075

$id_scan_state = ':'; # now need ::

26076

$saw_alpha = 1;

26077

$identifier .= $tok;

26078

}

26079

elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above

26080

$id_scan_state = ':'; # now need ::

26081

$saw_alpha = 1;

26082

$identifier .= $tok;

26083

}

26084

elsif ( $tok =~ /^\s*$/ ) { # allow space

26085

}

26086

elsif ( $tok eq '::' ) { # leading ::

26087

$id_scan_state = 'A'; # accept alpha next

26088

$identifier .= $tok;

26089

}

26090

elsif ( $tok eq '{' ) {

26091

if ( $identifier eq '&' || $i == 0 ) { $identifier = ''; }

26092

$i = $i_save;

26093

$id_scan_state = '';

26094

last;

26095

}

26096

else {

26097

26098

# punctuation variable?

26099

# testfile: cunningham4.pl

26100

#

26101

# We have to be careful here. If we are in an unknown state,

26102

# we will reject the punctuation variable. In the following

26103

# example the '&' is a binary opeator but we are in an unknown

26104

# state because there is no sigil on 'Prima', so we don't

26105

# know what it is. But it is a bad guess that

26106

# '&~' is a punction variable.

26107

# $self->{text}->{colorMap}->[

26108

# Prima::PodView::COLOR_CODE_FOREGROUND

26109

# & ~tb::COLOR_INDEX ] =

26110

# $sec->{ColorCode}

26111

if ( $identifier eq '&' && $expecting ) {

26112

$identifier .= $tok;

26113

}

26114

else {

26115

$identifier = '';

26116

$i = $i_save;

26117

$type = '&';

26118

}

26119

$id_scan_state = '';

26120

last;

26121

}

26122

}

26123

elsif ( $id_scan_state eq 'A' ) { # looking for alpha (after ::)

26124

26125

if ( $tok =~ /^[A-Za-z_]/ ) { # found it

26126

$identifier .= $tok;

26127

$id_scan_state = ':'; # now need ::

26128

$saw_alpha = 1;

26129

}

26130

elsif ( $tok eq "'" && $allow_tick ) {

26131

$identifier .= $tok;

26132

$id_scan_state = ':'; # now need ::

26133

$saw_alpha = 1;

26134

}

26135

elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above

26136

$identifier .= $tok;

26137

$id_scan_state = ':'; # now need ::

26138

$saw_alpha = 1;

26139

}

26140

elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {

26141

$id_scan_state = '(';

26142

$identifier .= $tok;

26143

}

26144

elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {

26145

$id_scan_state = ')';

26146

$identifier .= $tok;

26147

}

26148

else {

26149

$id_scan_state = '';

26150

$i = $i_save;

26151

last;

26152

}

26153

}

26154

elsif ( $id_scan_state eq ':' ) { # looking for :: after alpha

26155

26156

if ( $tok eq '::' ) { # got it

26157

$identifier .= $tok;

26158

$id_scan_state = 'A'; # now require alpha

26159

}

26160

elsif ( $tok =~ /^[A-Za-z_]/ ) { # more alphanumeric is ok here

26161

$identifier .= $tok;

26162

$id_scan_state = ':'; # now need ::

26163

$saw_alpha = 1;

26164

}

26165

elsif ( $tok =~ /^[0-9]/ ) { # numeric..see comments above

26166

$identifier .= $tok;

26167

$id_scan_state = ':'; # now need ::

26168

$saw_alpha = 1;

26169

}

26170

elsif ( $tok eq "'" && $allow_tick ) { # tick

26171

26172

if ( $is_keyword{$identifier} ) {

26173

$id_scan_state = ''; # that's all

26174

$i = $i_save;

26175

}

26176

else {

26177

$identifier .= $tok;

26178

}

26179

}

26180

elsif ( ( $identifier =~ /^sub / ) && ( $tok =~ /^\s*$/ ) ) {

26181

$id_scan_state = '(';

26182

$identifier .= $tok;

26183

}

26184

elsif ( ( $identifier =~ /^sub / ) && ( $tok eq '(' ) ) {

26185

$id_scan_state = ')';

26186

$identifier .= $tok;

26187

}

26188

else {

26189

$id_scan_state = ''; # that's all

26190

$i = $i_save;

26191

last;

26192

}

26193

}

26194

elsif ( $id_scan_state eq '(' ) { # looking for ( of prototype

26195

26196

if ( $tok eq '(' ) { # got it

26197

$identifier .= $tok;

26198

$id_scan_state = ')'; # now find the end of it

26199

}

26200

elsif ( $tok =~ /^\s*$/ ) { # blank - keep going

26201

$identifier .= $tok;

26202

}

26203

else {

26204

$id_scan_state = ''; # that's all - no prototype

26205

$i = $i_save;

26206

last;

26207

}

26208

}

26209

elsif ( $id_scan_state eq ')' ) { # looking for ) to end

26210

26211

if ( $tok eq ')' ) { # got it

26212

$identifier .= $tok;

26213

$id_scan_state = ''; # all done

26214

last;

26215

}

26216

elsif ( $tok =~ /^[\s\$\%\\\*\@\&\;]/ ) {

26217

$identifier .= $tok;

26218

}

26219

else { # probable error in script, but keep going

26220

warning("Unexpected '$tok' while seeking end of prototype\n");

26221

$identifier .= $tok;

26222

}

26223

}

26224

else { # can get here due to error in initialization

26225

$id_scan_state = '';

26226

$i = $i_save;

26227

last;

26228

}

26229

}

26230

26231

if ( $id_scan_state eq ')' ) {

26232

warning("Hit end of line while seeking ) to end prototype\n");

26233

}

26234

26235

# once we enter the actual identifier, it may not extend beyond

26236

# the end of the current line

26237

if ( $id_scan_state =~ /^[A\:]/ ) {

26238

$id_scan_state = '';

26239

}

26240

if ( $i < 0 ) { $i = 0 }

26241

26242

unless ($type) {

26243

26244

if ($saw_type) {

26245

26246

if ($saw_alpha) {

26247

if ( $identifier =~ /^->/ && $last_nonblank_type eq 'w' ) {

26248

$type = 'w';

26249

}

26250

else { $type = 'i' }

26251

}

26252

elsif ( $identifier eq '->' ) {

26253

$type = '->';

26254

}

26255

elsif (

26256

( length($identifier) > 1 )

26257

26258

# In something like '@$=' we have an identifier '@$'

26259

# In something like '$${' we have type '$$' (and only

26260

# part of an identifier)

26261

&& !( $identifier =~ /\$$/ && $tok eq '{' )

26262

&& ( $identifier !~ /^(sub |package )$/ )

26263

)

26264

{

26265

$type = 'i';

26266

}

26267

else { $type = 't' }

26268

}

26269

elsif ($saw_alpha) {

26270

26271

# type 'w' includes anything without leading type info

26272

# ($,%,@,*) including something like abc::def::ghi

26273

$type = 'w';

26274

}

26275

else {

26276

$type = '';

26277

} # this can happen on a restart

26278

}

26279

26280

if ($identifier) {

26281

$tok = $identifier;

26282

if ($message) { write_logfile_entry($message) }

26283

}

26284

else {

26285

$tok = $tok_begin;

26286

$i = $i_begin;

26287

}

26288

26289

TOKENIZER_DEBUG_FLAG_SCAN_ID && do {

26290

my ( $a, $b, $c ) = caller;

26291

print

26292

"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_begin, $id_scan_state_begin, $identifier_begin\n";

26293

print

26294

"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $identifier\n";

26295

};

26296

return ( $i, $tok, $type, $id_scan_state, $identifier );

26297

}

26298

26299

{

26300

26301

# saved package and subnames in case prototype is on separate line

26302

my ( $package_saved, $subname_saved );

26303

26304

sub do_scan_sub {

26305

26306

# do_scan_sub parses a sub name and prototype

26307

# it is called with $i_beg equal to the index of the first nonblank

26308

# token following a 'sub' token.

26309

26310

# TODO: add future error checks to be sure we have a valid

26311

# sub name. For example, 'sub &doit' is wrong. Also, be sure

26312

# a name is given if and only if a non-anonymous sub is

26313

# appropriate.

26314

# USES GLOBAL VARS: $current_package, $last_nonblank_token,

26315

# $in_attribute_list, %saw_function_definition,

26316

# $statement_type

26317

26318

my (

26319

$input_line, $i, $i_beg,

26320

$tok, $type, $rtokens,

26321

$rtoken_map, $id_scan_state, $max_token_index

26322

) = @_;

26323

$id_scan_state = ""; # normally we get everything in one call

26324

my $subname = undef;

26325

my $package = undef;

26326

my $proto = undef;

26327

my $attrs = undef;

26328

my $match;

26329

26330

my $pos_beg = $$rtoken_map[$i_beg];

26331

pos($input_line) = $pos_beg;

26332

26333

# sub NAME PROTO ATTRS

26334

if (

26335

$input_line =~ m/\G\s*

26336

((?:\w*(?:'|::))*) # package - something that ends in :: or '

26337

(\w+) # NAME - required

26338

(\s*$[^){]*$)? # PROTO - something in parens

26339

(\s*:)? # ATTRS - leading : of attribute list

26340

/gcx

26341

)

26342

{

26343

$match = 1;

26344

$subname = $2;

26345

$proto = $3;

26346

$attrs = $4;

26347

26348

$package = ( defined($1) && $1 ) ? $1 : $current_package;

26349

$package =~ s/\'/::/g;

26350

if ( $package =~ /^\:/ ) { $package = 'main' . $package }

26351

$package =~ s/::$//;

26352

my $pos = pos($input_line);

26353

my $numc = $pos - $pos_beg;

26354

$tok = 'sub ' . substr( $input_line, $pos_beg, $numc );

26355

$type = 'i';

26356

}

26357

26358

# Look for prototype/attributes not preceded on this line by subname;

26359

# This might be an anonymous sub with attributes,

26360

# or a prototype on a separate line from its sub name

26361

elsif (

26362

$input_line =~ m/\G(\s*$[^){]*$)? # PROTO

26363

(\s*:)? # ATTRS leading ':'

26364

/gcx

26365

&& ( $1 || $2 )

26366

)

26367

{

26368

$match = 1;

26369

$proto = $1;

26370

$attrs = $2;

26371

26372

# Handle prototype on separate line from subname

26373

if ($subname_saved) {

26374

$package = $package_saved;

26375

$subname = $subname_saved;

26376

$tok = $last_nonblank_token;

26377

}

26378

$type = 'i';

26379

}

26380

26381

if ($match) {

26382

26383

# ATTRS: if there are attributes, back up and let the ':' be

26384

# found later by the scanner.

26385

my $pos = pos($input_line);

26386

if ($attrs) {

26387

$pos -= length($attrs);

26388

}

26389

26390

my $next_nonblank_token = $tok;

26391

26392

# catch case of line with leading ATTR ':' after anonymous sub

26393

if ( $pos == $pos_beg && $tok eq ':' ) {

26394

$type = 'A';

26395

$in_attribute_list = 1;

26396

}

26397

26398

# We must convert back from character position

26399

# to pre_token index.

26400

else {

26401

26402

# I don't think an error flag can occur here ..but ?

26403

my $error;

26404

( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,

26405

$max_token_index );

26406

if ($error) { warning("Possibly invalid sub\n") }

26407

26408

# check for multiple definitions of a sub

26409

( $next_nonblank_token, my $i_next ) =

26410

find_next_nonblank_token_on_this_line( $i, $rtokens,

26411

$max_token_index );

26412

}

26413

26414

if ( $next_nonblank_token =~ /^(\s*|#)$/ )

26415

{ # skip blank or side comment

26416

my ( $rpre_tokens, $rpre_types ) =

26417

peek_ahead_for_n_nonblank_pre_tokens(1);

26418

if ( defined($rpre_tokens) && @$rpre_tokens ) {

26419

$next_nonblank_token = $rpre_tokens->[0];

26420

}

26421

else {

26422

$next_nonblank_token = '}';

26423

}

26424

}

26425

$package_saved = "";

26426

$subname_saved = "";

26427

if ( $next_nonblank_token eq '{' ) {

26428

if ($subname) {

26429

26430

# Check for multiple definitions of a sub, but

26431

# it is ok to have multiple sub BEGIN, etc,

26432

# so we do not complain if name is all caps

26433

if ( $saw_function_definition{$package}{$subname}

26434

&& $subname !~ /^[A-Z]+$/ )

26435

{

26436

my $lno = $saw_function_definition{$package}{$subname};

26437

warning(

26438

"already saw definition of 'sub $subname' in package '$package' at line $lno\n"

26439

);

26440

}

26441

$saw_function_definition{$package}{$subname} =

26442

$tokenizer_self->{_last_line_number};

26443

}

26444

}

26445

elsif ( $next_nonblank_token eq ';' ) {

26446

}

26447

elsif ( $next_nonblank_token eq '}' ) {

26448

}

26449

26450

# ATTRS - if an attribute list follows, remember the name

26451

# of the sub so the next opening brace can be labeled.

26452

# Setting 'statement_type' causes any ':'s to introduce

26453

# attributes.

26454

elsif ( $next_nonblank_token eq ':' ) {

26455

$statement_type = $tok;

26456

}

26457

26458

# see if PROTO follows on another line:

26459

elsif ( $next_nonblank_token eq '(' ) {

26460

if ( $attrs || $proto ) {

26461

warning(

26462

"unexpected '(' after definition or declaration of sub '$subname'\n"

26463

);

26464

}

26465

else {

26466

$id_scan_state = 'sub'; # we must come back to get proto

26467

$statement_type = $tok;

26468

$package_saved = $package;

26469

$subname_saved = $subname;

26470

}

26471

}

26472

elsif ($next_nonblank_token) { # EOF technically ok

26473

warning(

26474

"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' but saw '$next_nonblank_token'\n"

26475

);

26476

}

26477

check_prototype( $proto, $package, $subname );

26478

}

26479

26480

# no match but line not blank

26481

else {

26482

}

26483

return ( $i, $tok, $type, $id_scan_state );

26484

}

26485

}

26486

26487

#########i###############################################################

26488

# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS

26489

#########################################################################

26490

26491

sub find_next_nonblank_token {

26492

my ( $i, $rtokens, $max_token_index ) = @_;

26493

26494

if ( $i >= $max_token_index ) {

26495

if ( !peeked_ahead() ) {

26496

peeked_ahead(1);

26497

$rtokens =

26498

peek_ahead_for_nonblank_token( $rtokens, $max_token_index );

26499

}

26500

}

26501

my $next_nonblank_token = $$rtokens[ ++$i ];

26502

26503

if ( $next_nonblank_token =~ /^\s*$/ ) {

26504

$next_nonblank_token = $$rtokens[ ++$i ];

26505

}

26506

return ( $next_nonblank_token, $i );

26507

}

26508

26509

sub numerator_expected {

26510

26511

# this is a filter for a possible numerator, in support of guessing

26512

# for the / pattern delimiter token.

26513

# returns -

26514

# 1 - yes

26515

# 0 - can't tell

26516

# -1 - no

26517

# Note: I am using the convention that variables ending in

26518

# _expected have these 3 possible values.

26519

my ( $i, $rtokens, $max_token_index ) = @_;

26520

my $next_token = $$rtokens[ $i + 1 ];

26521

if ( $next_token eq '=' ) { $i++; } # handle /=

26522

my ( $next_nonblank_token, $i_next ) =

26523

find_next_nonblank_token( $i, $rtokens, $max_token_index );

26524

26525

if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {

26526

1;

26527

}

26528

else {

26529

26530

if ( $next_nonblank_token =~ /^\s*$/ ) {

26531

0;

26532

}

26533

else {

26534

-1;

26535

}

26536

}

26537

}

26538

26539

sub pattern_expected {

26540

26541

# This is the start of a filter for a possible pattern.

26542

# It looks at the token after a possbible pattern and tries to

26543

# determine if that token could end a pattern.

26544

# returns -

26545

# 1 - yes

26546

# 0 - can't tell

26547

# -1 - no

26548

my ( $i, $rtokens, $max_token_index ) = @_;

26549

my $next_token = $$rtokens[ $i + 1 ];

26550

if ( $next_token =~ /^[cgimosxp]/ ) { $i++; } # skip possible modifier

26551

my ( $next_nonblank_token, $i_next ) =

26552

find_next_nonblank_token( $i, $rtokens, $max_token_index );

26553

26554

# list of tokens which may follow a pattern

26555

# (can probably be expanded)

26556

if ( $next_nonblank_token =~ /(\)|\}|\;|\&\&|\|\||and|or|while|if|unless)/ )

26557

{

26558

1;

26559

}

26560

else {

26561

26562

if ( $next_nonblank_token =~ /^\s*$/ ) {

26563

0;

26564

}

26565

else {

26566

-1;

26567

}

26568

}

26569

}

26570

26571

sub find_next_nonblank_token_on_this_line {

26572

my ( $i, $rtokens, $max_token_index ) = @_;

26573

my $next_nonblank_token;

26574

26575

if ( $i < $max_token_index ) {

26576

$next_nonblank_token = $$rtokens[ ++$i ];

26577

26578

if ( $next_nonblank_token =~ /^\s*$/ ) {

26579

26580

if ( $i < $max_token_index ) {

26581

$next_nonblank_token = $$rtokens[ ++$i ];

26582

}

26583

}

26584

}

26585

else {

26586

$next_nonblank_token = "";

26587

}

26588

return ( $next_nonblank_token, $i );

26589

}

26590

26591

sub find_angle_operator_termination {

26592

26593

# We are looking at a '<' and want to know if it is an angle operator.

26594

# We are to return:

26595

# $i = pretoken index of ending '>' if found, current $i otherwise

26596

# $type = 'Q' if found, '>' otherwise

26597

my ( $input_line, $i_beg, $rtoken_map, $expecting, $max_token_index ) = @_;

26598

my $i = $i_beg;

26599

my $type = '<';

26600

pos($input_line) = 1 + $$rtoken_map[$i];

26601

26602

my $filter;

26603

26604

# we just have to find the next '>' if a term is expected

26605

if ( $expecting == TERM ) { $filter = '[\>]' }

26606

26607

# we have to guess if we don't know what is expected

26608

elsif ( $expecting == UNKNOWN ) { $filter = '[\>\;\=\#\|\<]' }

26609

26610

# shouldn't happen - we shouldn't be here if operator is expected

26611

else { warning("Program Bug in find_angle_operator_termination\n") }

26612

26613

# To illustrate what we might be looking at, in case we are

26614

# guessing, here are some examples of valid angle operators

26615

# (or file globs):

26616

# <tmp_imp/*>

26617

# <FH>

26618

# <$fh>

26619

# <*.c *.h>

26620

# <_>

26621

# <jskdfjskdfj* op/* jskdjfjkosvk*> ( glob.t)

26622

# <${PREFIX}*img*.$IMAGE_TYPE>

26623

# <img*.$IMAGE_TYPE>

26624

# <Timg*.$IMAGE_TYPE>

26625

# <$LATEX2HTMLVERSIONS${dd}html[1-9].[0-9].pl>

26626

#

26627

# Here are some examples of lines which do not have angle operators:

26628

# return undef unless $self->[2]++ < $#{$self->[1]};

26629

# < 2 || @$t >

26630

#

26631

# the following line from dlister.pl caused trouble:

26632

# print'~'x79,"\n",$D<1024?"0.$D":$D>>10,"K, $C files\n\n\n";

26633

#

26634

# If the '<' starts an angle operator, it must end on this line and

26635

# it must not have certain characters like ';' and '=' in it. I use

26636

# this to limit the testing. This filter should be improved if

26637

# possible.

26638

26639

if ( $input_line =~ /($filter)/g ) {

26640

26641

if ( $1 eq '>' ) {

26642

26643

# We MAY have found an angle operator termination if we get

26644

# here, but we need to do more to be sure we haven't been

26645

# fooled.

26646

my $pos = pos($input_line);

26647

26648

my $pos_beg = $$rtoken_map[$i];

26649

my $str = substr( $input_line, $pos_beg, ( $pos - $pos_beg ) );

26650

26651

# Reject if the closing '>' follows a '-' as in:

26652

# if ( VERSION < 5.009 && $op-> name eq 'aassign' ) { }

26653

if ( $expecting eq UNKNOWN ) {

26654

my $check = substr( $input_line, $pos - 2, 1 );

26655

if ( $check eq '-' ) {

26656

return ( $i, $type );

26657

}

26658

}

26659

26660

######################################debug#####

26661

#write_diagnostics( "ANGLE? :$str\n");

26662

#print "ANGLE: found $1 at pos=$pos str=$str check=$check\n";

26663

######################################debug#####

26664

$type = 'Q';

26665

my $error;

26666

( $i, $error ) =

26667

inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );

26668

26669

# It may be possible that a quote ends midway in a pretoken.

26670

# If this happens, it may be necessary to split the pretoken.

26671

if ($error) {

26672

warning(

26673

"Possible tokinization error..please check this line\n");

26674

report_possible_bug();

26675

}

26676

26677

# Now let's see where we stand....

26678

# OK if math op not possible

26679

if ( $expecting == TERM ) {

26680

}

26681

26682

# OK if there are no more than 2 pre-tokens inside

26683

# (not possible to write 2 token math between < and >)

26684

# This catches most common cases

26685

elsif ( $i <= $i_beg + 3 ) {

26686

write_diagnostics("ANGLE(1 or 2 tokens): $str\n");

26687

}

26688

26689

# Not sure..

26690

else {

26691

26692

# Let's try a Brace Test: any braces inside must balance

26693

my $br = 0;

26694

while ( $str =~ /\{/g ) { $br++ }

26695

while ( $str =~ /\}/g ) { $br-- }

26696

my $sb = 0;

26697

while ( $str =~ /\[/g ) { $sb++ }

26698

while ( $str =~ /\]/g ) { $sb-- }

26699

my $pr = 0;

26700

while ( $str =~ /\(/g ) { $pr++ }

26701

while ( $str =~ /\)/g ) { $pr-- }

26702

26703

# if braces do not balance - not angle operator

26704

if ( $br || $sb || $pr ) {

26705

$i = $i_beg;

26706

$type = '<';

26707

write_diagnostics(

26708

"NOT ANGLE (BRACE={$br ($pr [$sb ):$str\n");

26709

}

26710

26711

# we should keep doing more checks here...to be continued

26712

# Tentatively accepting this as a valid angle operator.

26713

# There are lots more things that can be checked.

26714

else {

26715

write_diagnostics(

26716

"ANGLE-Guessing yes: $str expecting=$expecting\n");

26717

write_logfile_entry("Guessing angle operator here: $str\n");

26718

}

26719

}

26720

}

26721

26722

# didn't find ending >

26723

else {

26724

if ( $expecting == TERM ) {

26725

warning("No ending > for angle operator\n");

26726

}

26727

}

26728

}

26729

return ( $i, $type );

26730

}

26731

26732

sub scan_number_do {

26733

26734

# scan a number in any of the formats that Perl accepts

26735

# Underbars (_) are allowed in decimal numbers.

26736

# input parameters -

26737

# $input_line - the string to scan

26738

# $i - pre_token index to start scanning

26739

# $rtoken_map - reference to the pre_token map giving starting

26740

# character position in $input_line of token $i

26741

# output parameters -

26742

# $i - last pre_token index of the number just scanned

26743

# number - the number (characters); or undef if not a number

26744

26745

my ( $input_line, $i, $rtoken_map, $input_type, $max_token_index ) = @_;

26746

my $pos_beg = $$rtoken_map[$i];

26747

my $pos;

26748

my $i_begin = $i;

26749

my $number = undef;

26750

my $type = $input_type;

26751

26752

my $first_char = substr( $input_line, $pos_beg, 1 );

26753

26754

# Look for bad starting characters; Shouldn't happen..

26755

if ( $first_char !~ /[\d\.\+\-Ee]/ ) {

26756

warning("Program bug - scan_number given character $first_char\n");

26757

report_definite_bug();

26758

return ( $i, $type, $number );

26759

}

26760

26761

# handle v-string without leading 'v' character ('Two Dot' rule)

26762

# (vstring.t)

26763

# TODO: v-strings may contain underscores

26764

pos($input_line) = $pos_beg;

26765

if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {

26766

$pos = pos($input_line);

26767

my $numc = $pos - $pos_beg;

26768

$number = substr( $input_line, $pos_beg, $numc );

26769

$type = 'v';

26770

report_v_string($number);

26771

}

26772

26773

# handle octal, hex, binary

26774

if ( !defined($number) ) {

26775

pos($input_line) = $pos_beg;

26776

if ( $input_line =~ /\G[+-]?0((x[0-9a-fA-F_]+)|([0-7_]+)|(b[01_]+))/g )

26777

{

26778

$pos = pos($input_line);

26779

my $numc = $pos - $pos_beg;

26780

$number = substr( $input_line, $pos_beg, $numc );

26781

$type = 'n';

26782

}

26783

}

26784

26785

# handle decimal

26786

if ( !defined($number) ) {

26787

pos($input_line) = $pos_beg;

26788

26789

if ( $input_line =~ /\G([+-]?[\d_]*(\.[\d_]*)?([Ee][+-]?(\d+))?)/g ) {

26790

$pos = pos($input_line);

26791

26792

# watch out for things like 0..40 which would give 0. by this;

26793

if ( ( substr( $input_line, $pos - 1, 1 ) eq '.' )

26794

&& ( substr( $input_line, $pos, 1 ) eq '.' ) )

26795

{

26796

$pos--;

26797

}

26798

my $numc = $pos - $pos_beg;

26799

$number = substr( $input_line, $pos_beg, $numc );

26800

$type = 'n';

26801

}

26802

}

26803

26804

# filter out non-numbers like e + - . e2 .e3 +e6

26805

# the rule: at least one digit, and any 'e' must be preceded by a digit

26806

if (

26807

$number !~ /\d/ # no digits

26808

|| ( $number =~ /^(.*)[eE]/

26809

&& $1 !~ /\d/ ) # or no digits before the 'e'

26810

)

26811

{

26812

$number = undef;

26813

$type = $input_type;

26814

return ( $i, $type, $number );

26815

}

26816

26817

# Found a number; now we must convert back from character position

26818

# to pre_token index. An error here implies user syntax error.

26819

# An example would be an invalid octal number like '009'.

26820

my $error;

26821

( $i, $error ) =

26822

inverse_pretoken_map( $i, $pos, $rtoken_map, $max_token_index );

26823

if ($error) { warning("Possibly invalid number\n") }

26824

26825

return ( $i, $type, $number );

26826

}

26827

26828

sub inverse_pretoken_map {

26829

26830

# Starting with the current pre_token index $i, scan forward until

26831

# finding the index of the next pre_token whose position is $pos.

26832

my ( $i, $pos, $rtoken_map, $max_token_index ) = @_;

26833

my $error = 0;

26834

26835

while ( ++$i <= $max_token_index ) {

26836

26837

if ( $pos <= $$rtoken_map[$i] ) {

26838

26839

# Let the calling routine handle errors in which we do not

26840

# land on a pre-token boundary. It can happen by running

26841

# perltidy on some non-perl scripts, for example.

26842

if ( $pos < $$rtoken_map[$i] ) { $error = 1 }

26843

$i--;

26844

last;

26845

}

26846

}

26847

return ( $i, $error );

26848

}

26849

26850

sub find_here_doc {

26851

26852

# find the target of a here document, if any

26853

# input parameters:

26854

# $i - token index of the second < of <<

26855

# ($i must be less than the last token index if this is called)

26856

# output parameters:

26857

# $found_target = 0 didn't find target; =1 found target

26858

# HERE_TARGET - the target string (may be empty string)

26859

# $i - unchanged if not here doc,

26860

# or index of the last token of the here target

26861

# $saw_error - flag noting unbalanced quote on here target

26862

my ( $expecting, $i, $rtokens, $rtoken_map, $max_token_index ) = @_;

26863

my $ibeg = $i;

26864

my $found_target = 0;

26865

my $here_doc_target = '';

26866

my $here_quote_character = '';

26867

my $saw_error = 0;

26868

my ( $next_nonblank_token, $i_next_nonblank, $next_token );

26869

$next_token = $$rtokens[ $i + 1 ];

26870

26871

# perl allows a backslash before the target string (heredoc.t)

26872

my $backslash = 0;

26873

if ( $next_token eq '\\' ) {

26874

$backslash = 1;

26875

$next_token = $$rtokens[ $i + 2 ];

26876

}

26877

26878

( $next_nonblank_token, $i_next_nonblank ) =

26879

find_next_nonblank_token_on_this_line( $i, $rtokens, $max_token_index );

26880

26881

if ( $next_nonblank_token =~ /[\'\"\`]/ ) {

26882

26883

my $in_quote = 1;

26884

my $quote_depth = 0;

26885

my $quote_pos = 0;

26886

my $quoted_string;

26887

26888

(

26889

$i, $in_quote, $here_quote_character, $quote_pos, $quote_depth,

26890

$quoted_string

26891

)

26892

= follow_quoted_string( $i_next_nonblank, $in_quote, $rtokens,

26893

$here_quote_character, $quote_pos, $quote_depth, $max_token_index );

26894

26895

if ($in_quote) { # didn't find end of quote, so no target found

26896

$i = $ibeg;

26897

if ( $expecting == TERM ) {

26898

warning(

26899

"Did not find here-doc string terminator ($here_quote_character) before end of line \n"

26900

);

26901

$saw_error = 1;

26902

}

26903

}

26904

else { # found ending quote

26905

my $j;

26906

$found_target = 1;

26907

26908

my $tokj;

26909

for ( $j = $i_next_nonblank + 1 ; $j < $i ; $j++ ) {

26910

$tokj = $$rtokens[$j];

26911

26912

# we have to remove any backslash before the quote character

26913

# so that the here-doc-target exactly matches this string

26914

if ( $tokj eq "\\"

26916

&& $j < $i - 1

26917

&& $$rtokens[ $j + 1 ] eq $here_quote_character );

26918

$here_doc_target .= $tokj;

26919

}

26920

}

26921

}

26922

26923

elsif ( ( $next_token =~ /^\s*$/ ) and ( $expecting == TERM ) ) {

26924

$found_target = 1;

26925

write_logfile_entry(

26926

"found blank here-target after <<; suggest using \"\"\n");

26927

$i = $ibeg;

26928

}

26929

elsif ( $next_token =~ /^\w/ ) { # simple bareword or integer after <<

26930

26931

my $here_doc_expected;

26932

if ( $expecting == UNKNOWN ) {

26933

$here_doc_expected = guess_if_here_doc($next_token);

26934

}

26935

else {

26936

$here_doc_expected = 1;

26937

}

26938

26939

if ($here_doc_expected) {

26940

$found_target = 1;

26941

$here_doc_target = $next_token;

26942

$i = $ibeg + 1;

26943

}

26944

26945

}

26946

else {

26947

26948

if ( $expecting == TERM ) {

26949

$found_target = 1;

26950

write_logfile_entry("Note: bare here-doc operator <<\n");

26951

}

26952

else {

26953

$i = $ibeg;

26954

}

26955

}

26956

26957

# patch to neglect any prepended backslash

26958

if ( $found_target && $backslash ) { $i++ }

26959

26960

return ( $found_target, $here_doc_target, $here_quote_character, $i,

26961

$saw_error );

26962

}

26963

26964

sub do_quote {

26965

26966

# follow (or continue following) quoted string(s)

26967

# $in_quote return code:

26968

# 0 - ok, found end

26969

# 1 - still must find end of quote whose target is $quote_character

26970

# 2 - still looking for end of first of two quotes

26971

#

26972

# Returns updated strings:

26973

# $quoted_string_1 = quoted string seen while in_quote=1

26974

# $quoted_string_2 = quoted string seen while in_quote=2

26975

my (

26976

$i, $in_quote, $quote_character,

26977

$quote_pos, $quote_depth, $quoted_string_1,

26978

$quoted_string_2, $rtokens, $rtoken_map,

26979

$max_token_index

26980

) = @_;

26981

26982

my $in_quote_starting = $in_quote;

26983

26984

my $quoted_string;

26985

if ( $in_quote == 2 ) { # two quotes/quoted_string_1s to follow

26986

my $ibeg = $i;

26987

(

26988

$i, $in_quote, $quote_character, $quote_pos, $quote_depth,

26989

$quoted_string

26990

)

26991

= follow_quoted_string( $i, $in_quote, $rtokens, $quote_character,

26992

$quote_pos, $quote_depth, $max_token_index );

26993

$quoted_string_2 .= $quoted_string;

26994

if ( $in_quote == 1 ) {

26995

if ( $quote_character =~ /[\{\[\<\(]/ ) { $i++; }

26996

$quote_character = '';

26997

}

26998

else {

26999

$quoted_string_2 .= "\n";

27000

}

27001

}

27002

27003

if ( $in_quote == 1 ) { # one (more) quote to follow

27004

my $ibeg = $i;

27005

(

27006

$i, $in_quote, $quote_character, $quote_pos, $quote_depth,

27007

$quoted_string

27008

)

27009

= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,

27010

$quote_pos, $quote_depth, $max_token_index );

27011

$quoted_string_1 .= $quoted_string;

27012

if ( $in_quote == 1 ) {

27013

$quoted_string_1 .= "\n";

27014

}

27015

}

27016

return ( $i, $in_quote, $quote_character, $quote_pos, $quote_depth,

27017

$quoted_string_1, $quoted_string_2 );

27018

}

27019

27020

sub follow_quoted_string {

27021

27022

# scan for a specific token, skipping escaped characters

27023

# if the quote character is blank, use the first non-blank character

27024

# input parameters:

27025

# $rtokens = reference to the array of tokens

27026

# $i = the token index of the first character to search

27027

# $in_quote = number of quoted strings being followed

27028

# $beginning_tok = the starting quote character

27029

# $quote_pos = index to check next for alphanumeric delimiter

27030

# output parameters:

27031

# $i = the token index of the ending quote character

27032

# $in_quote = decremented if found end, unchanged if not

27033

# $beginning_tok = the starting quote character

27034

# $quote_pos = index to check next for alphanumeric delimiter

27035

# $quote_depth = nesting depth, since delimiters '{ ( [ <' can be nested.

27036

# $quoted_string = the text of the quote (without quotation tokens)

27037

my ( $i_beg, $in_quote, $rtokens, $beginning_tok, $quote_pos, $quote_depth,

27038

$max_token_index )

27039

= @_;

27040

my ( $tok, $end_tok );

27041

my $i = $i_beg - 1;

27042

my $quoted_string = "";

27043

27044

TOKENIZER_DEBUG_FLAG_QUOTE && do {

27045

print

27046

"QUOTE entering with quote_pos = $quote_pos i=$i beginning_tok =$beginning_tok\n";

27047

};

27048

27049

# get the corresponding end token

27050

if ( $beginning_tok !~ /^\s*$/ ) {

27051

$end_tok = matching_end_token($beginning_tok);

27052

}

27053

27054

# a blank token means we must find and use the first non-blank one

27055

else {

27056

my $allow_quote_comments = ( $i < 0 ) ? 1 : 0; # i<0 means we saw a <cr>

27057

27058

while ( $i < $max_token_index ) {

27059

$tok = $$rtokens[ ++$i ];

27060

27061

if ( $tok !~ /^\s*$/ ) {

27062

27063

if ( ( $tok eq '#' ) && ($allow_quote_comments) ) {

27064

$i = $max_token_index;

27065

}

27066

else {

27067

27068

if ( length($tok) > 1 ) {

27069

if ( $quote_pos <= 0 ) { $quote_pos = 1 }

27070

$beginning_tok = substr( $tok, $quote_pos - 1, 1 );

27071

}

27072

else {

27073

$beginning_tok = $tok;

27074

$quote_pos = 0;

27075

}

27076

$end_tok = matching_end_token($beginning_tok);

27077

$quote_depth = 1;

27078

last;

27079

}

27080

}

27081

else {

27082

$allow_quote_comments = 1;

27083

}

27084

}

27085

}

27086

27087

# There are two different loops which search for the ending quote

27088

# character. In the rare case of an alphanumeric quote delimiter, we

27089

# have to look through alphanumeric tokens character-by-character, since

27090

# the pre-tokenization process combines multiple alphanumeric

27091

# characters, whereas for a non-alphanumeric delimiter, only tokens of

27092

# length 1 can match.

27093

27094

###################################################################

27095

# Case 1 (rare): loop for case of alphanumeric quote delimiter..

27096

# "quote_pos" is the position the current word to begin searching

27097

###################################################################

27098

if ( $beginning_tok =~ /\w/ ) {

27099

27100

# Note this because it is not recommended practice except

27101

# for obfuscated perl contests

27102

if ( $in_quote == 1 ) {

27103

write_logfile_entry(

27104

"Note: alphanumeric quote delimiter ($beginning_tok) \n");

27105

}

27106

27107

while ( $i < $max_token_index ) {

27108

27109

if ( $quote_pos == 0 || ( $i < 0 ) ) {

27110

$tok = $$rtokens[ ++$i ];

27111

27112

if ( $tok eq '\\' ) {

27113

27114

# retain backslash unless it hides the end token

27115

$quoted_string .= $tok

27116

unless $$rtokens[ $i + 1 ] eq $end_tok;

27117

$quote_pos++;

27118

last if ( $i >= $max_token_index );

27119

$tok = $$rtokens[ ++$i ];

27120

}

27121

}

27122

my $old_pos = $quote_pos;

27123

27124

unless ( defined($tok) && defined($end_tok) && defined($quote_pos) )

27125

{

27126

27127

}

27128

$quote_pos = 1 + index( $tok, $end_tok, $quote_pos );

27129

27130

if ( $quote_pos > 0 ) {

27131

27132

$quoted_string .=

27133

substr( $tok, $old_pos, $quote_pos - $old_pos - 1 );

27134

27135

$quote_depth--;

27136

27137

if ( $quote_depth == 0 ) {

27138

$in_quote--;

27139

last;

27140

}

27141

}

27142

else {

27143

$quoted_string .= substr( $tok, $old_pos );

27144

}

27145

}

27146

}

27147

27148

########################################################################

27149

# Case 2 (normal): loop for case of a non-alphanumeric quote delimiter..

27150

########################################################################

27151

else {

27152

27153

while ( $i < $max_token_index ) {

27154

$tok = $$rtokens[ ++$i ];

27155

27156

if ( $tok eq $end_tok ) {

27157

$quote_depth--;

27158

27159

if ( $quote_depth == 0 ) {

27160

$in_quote--;

27161

last;

27162

}

27163

}

27164

elsif ( $tok eq $beginning_tok ) {

27165

$quote_depth++;

27166

}

27167

elsif ( $tok eq '\\' ) {

27168

27169

# retain backslash unless it hides the beginning or end token

27170

$tok = $$rtokens[ ++$i ];

27171

$quoted_string .= '\\'

27172

unless ( $tok eq $end_tok || $tok eq $beginning_tok );

27173

}

27174

$quoted_string .= $tok;

27175

}

27176

}

27177

if ( $i > $max_token_index ) { $i = $max_token_index }

27178

return ( $i, $in_quote, $beginning_tok, $quote_pos, $quote_depth,

27179

$quoted_string );

27180

}

27181

27182

sub indicate_error {

27183

my ( $msg, $line_number, $input_line, $pos, $carrat ) = @_;

27184

interrupt_logfile();

27185

warning($msg);

27186

write_error_indicator_pair( $line_number, $input_line, $pos, $carrat );

27187

resume_logfile();

27188

}

27189

27190

sub write_error_indicator_pair {

27191

my ( $line_number, $input_line, $pos, $carrat ) = @_;

27192

my ( $offset, $numbered_line, $underline ) =

27193

make_numbered_line( $line_number, $input_line, $pos );

27194

$underline = write_on_underline( $underline, $pos - $offset, $carrat );

27195

warning( $numbered_line . "\n" );

27196

$underline =~ s/\s*$//;

27197

warning( $underline . "\n" );

27198

}

27199

27200

sub make_numbered_line {

27201

27202

# Given an input line, its line number, and a character position of

27203

# interest, create a string not longer than 80 characters of the form

27204

# $lineno: sub_string

27205

# such that the sub_string of $str contains the position of interest

27206

#

27207

# Here is an example of what we want, in this case we add trailing

27208

# '...' because the line is long.

27209

#

27210

# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...

27211

#

27212

# Here is another example, this time in which we used leading '...'

27213

# because of excessive length:

27214

#

27215

# 2: ... er of the World Wide Web Consortium's

27216

#

27217

# input parameters are:

27218

# $lineno = line number

27219

# $str = the text of the line

27220

# $pos = position of interest (the error) : 0 = first character

27221

#

27222

# We return :

27223

# - $offset = an offset which corrects the position in case we only

27224

# display part of a line, such that $pos-$offset is the effective

27225

# position from the start of the displayed line.

27226

# - $numbered_line = the numbered line as above,

27227

# - $underline = a blank 'underline' which is all spaces with the same

27228

# number of characters as the numbered line.

27229

27230

my ( $lineno, $str, $pos ) = @_;

27231

my $offset = ( $pos < 60 ) ? 0 : $pos - 40;

27232

my $excess = length($str) - $offset - 68;

27233

my $numc = ( $excess > 0 ) ? 68 : undef;

27234

27235

if ( defined($numc) ) {

27236

if ( $offset == 0 ) {

27237

$str = substr( $str, $offset, $numc - 4 ) . " ...";

27238

}

27239

else {

27240

$str = "... " . substr( $str, $offset + 4, $numc - 4 ) . " ...";

27241

}

27242

}

27243

else {

27244

27245

if ( $offset == 0 ) {

27246

}

27247

else {

27248

$str = "... " . substr( $str, $offset + 4 );

27249

}

27250

}

27251

27252

my $numbered_line = sprintf( "%d: ", $lineno );

27253

$offset -= length($numbered_line);

27254

$numbered_line .= $str;

27255

my $underline = " " x length($numbered_line);

27256

return ( $offset, $numbered_line, $underline );

27257

}

27258

27259

sub write_on_underline {

27260

27261

# The "underline" is a string that shows where an error is; it starts

27262

# out as a string of blanks with the same length as the numbered line of

27263

# code above it, and we have to add marking to show where an error is.

27264

# In the example below, we want to write the string '--^' just below

27265

# the line of bad code:

27266

#

27267

# 2: (One of QAML 2.0's authors is a member of the World Wide Web Con ...

27268

# ---^

27269

# We are given the current underline string, plus a position and a

27270

# string to write on it.

27271

#

27272

# In the above example, there will be 2 calls to do this:

27273

# First call: $pos=19, pos_chr=^

27274

# Second call: $pos=16, pos_chr=---

27275

#

27276

# This is a trivial thing to do with substr, but there is some

27277

# checking to do.

27278

27279

my ( $underline, $pos, $pos_chr ) = @_;

27280

27281

# check for error..shouldn't happen

27282

unless ( ( $pos >= 0 ) && ( $pos <= length($underline) ) ) {

27283

return $underline;

27284

}

27285

my $excess = length($pos_chr) + $pos - length($underline);

27286

if ( $excess > 0 ) {

27287

$pos_chr = substr( $pos_chr, 0, length($pos_chr) - $excess );

27288

}

27289

substr( $underline, $pos, length($pos_chr) ) = $pos_chr;

27290

return ($underline);

27291

}

27292

27293

sub pre_tokenize {

27294

27295

# Break a string, $str, into a sequence of preliminary tokens. We

27296

# are interested in these types of tokens:

27297

# words (type='w'), example: 'max_tokens_wanted'

27298

# digits (type = 'd'), example: '0755'

27299

# whitespace (type = 'b'), example: ' '

27300

# any other single character (i.e. punct; type = the character itself).

27301

# We cannot do better than this yet because we might be in a quoted

27302

# string or pattern. Caller sets $max_tokens_wanted to 0 to get all

27303

# tokens.

27304

my ( $str, $max_tokens_wanted ) = @_;

27305

27306

# we return references to these 3 arrays:

27307

my @tokens = (); # array of the tokens themselves

27308

my @token_map = (0); # string position of start of each token

27309

my @type = (); # 'b'=whitespace, 'd'=digits, 'w'=alpha, or punct

27310

27311

do {

27312

27313

# whitespace

27314

if ( $str =~ /\G(\s+)/gc ) { push @type, 'b'; }

27315

27316

# numbers

27317

# note that this must come before words!

27318

elsif ( $str =~ /\G(\d+)/gc ) { push @type, 'd'; }

27319

27320

# words

27321

elsif ( $str =~ /\G(\w+)/gc ) { push @type, 'w'; }

27322

27323

# single-character punctuation

27324

elsif ( $str =~ /\G(\W)/gc ) { push @type, $1; }

27325

27326

# that's all..

27327

else {

27328

return ( \@tokens, \@token_map, \@type );

27329

}

27330

27331

push @tokens, $1;

27332

push @token_map, pos($str);

27333

27334

} while ( --$max_tokens_wanted != 0 );

27335

27336

return ( \@tokens, \@token_map, \@type );

27337

}

27338

27339

sub show_tokens {

27340

27341

# this is an old debug routine

27342

my ( $rtokens, $rtoken_map ) = @_;

27343

my $num = scalar(@$rtokens);

27344

my $i;

27345

27346

for ( $i = 0 ; $i < $num ; $i++ ) {

27347

my $len = length( $$rtokens[$i] );

27348

print "$i:$len:$$rtoken_map[$i]:$$rtokens[$i]:\n";

27349

}

27350

}

27351

27352

sub matching_end_token {

27353

27354

# find closing character for a pattern

27355

my $beginning_token = shift;

27356

27357

if ( $beginning_token eq '{' ) {

27358

'}';

27359

}

27360

elsif ( $beginning_token eq '[' ) {

27361

']';

27362

}

27363

elsif ( $beginning_token eq '<' ) {

27364

'>';

27365

}

27366

elsif ( $beginning_token eq '(' ) {

27367

')';

27368

}

27369

else {

27370

$beginning_token;

27371

}

27372

}

27373

27374

sub dump_token_types {

27375

my $class = shift;

27376

my $fh = shift;

27377

27378

# This should be the latest list of token types in use

27379

# adding NEW_TOKENS: add a comment here

27380

print $fh <<'END_OF_LIST';

27381

27382

Here is a list of the token types currently used for lines of type 'CODE'.

27383

For the following tokens, the "type" of a token is just the token itself.

27384

27385

.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>

27386

( ) <= >= == =~ !~ != ++ -- /= x=

27387

... **= <<= >>= &&= ||= //= <=>

27388

, + - / * | % ! x ~ = \ ? : . < > ^ &

27389

27390

The following additional token types are defined:

27391

27392

type meaning

27393

b blank (white space)

27394

{ indent: opening structural curly brace or square bracket or paren

27395

(code block, anonymous hash reference, or anonymous array reference)

27396

} outdent: right structural curly brace or square bracket or paren

27397

[ left non-structural square bracket (enclosing an array index)

27398

] right non-structural square bracket

27399

( left non-structural paren (all but a list right of an =)

27400

) right non-structural parena

27401

L left non-structural curly brace (enclosing a key)

27402

R right non-structural curly brace

27403

; terminal semicolon

27404

f indicates a semicolon in a "for" statement

27405

h here_doc operator <<

27406

# a comment

27407

Q indicates a quote or pattern

27408

q indicates a qw quote block

27409

k a perl keyword

27410

C user-defined constant or constant function (with void prototype = ())

27411

U user-defined function taking parameters

27412

G user-defined function taking block parameter (like grep/map/eval)

27413

M (unused, but reserved for subroutine definition name)

27414

P (unused, but -html uses it to label pod text)

27415

t type indicater such as %,$,@,*,&,sub

27416

w bare word (perhaps a subroutine call)

27417

i identifier of some type (with leading %, $, @, *, &, sub, -> )

27418

n a number

27419

v a v-string

27420

F a file test operator (like -e)

27421

Y File handle

27422

Z identifier in indirect object slot: may be file handle, object

27423

J LABEL: code block label

27424

j LABEL after next, last, redo, goto

27425

p unary +

27426

m unary -

27427

pp pre-increment operator ++

27428

mm pre-decrement operator --

27429

A : used as attribute separator

27430

27431

Here are the '_line_type' codes used internally:

27432

SYSTEM - system-specific code before hash-bang line

27433

CODE - line of perl code (including comments)

27434

POD_START - line starting pod, such as '=head'

27435

POD - pod documentation text

27436

POD_END - last line of pod section, '=cut'

27437

HERE - text of here-document

27438

HERE_END - last line of here-doc (target word)

27439

FORMAT - format section

27440

FORMAT_END - last line of format section, '.'

27441

DATA_START - __DATA__ line

27442

DATA - unidentified text following __DATA__

27443

END_START - __END__ line

27444

END - unidentified text following __END__

27445

ERROR - we are in big trouble, probably not a perl script

27446

END_OF_LIST

27447

}

27448

27449

BEGIN {

27450

27451

# These names are used in error messages

27452

@opening_brace_names = qw# '{' '[' '(' '?' #;

27453

@closing_brace_names = qw# '}' ']' ')' ':' #;

27454

27455

my @digraphs = qw(

27456

.. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>

27457

<= >= == =~ !~ != ++ -- /= x= ~~

27458

);

27459

@is_digraph{@digraphs} = (1) x scalar(@digraphs);

27460

27461

my @trigraphs = qw( ... **= <<= >>= &&= ||= //= <=> !~~ );

27462

@is_trigraph{@trigraphs} = (1) x scalar(@trigraphs);

27463

27464

# make a hash of all valid token types for self-checking the tokenizer

27465

# (adding NEW_TOKENS : select a new character and add to this list)

27466

my @valid_token_types = qw#

27467

A b C G L R f h Q k t w i q n p m F pp mm U j J Y Z v

27468

{ } ( ) [ ] ; + - / * | % ! x ~ = \ ? : . < > ^ &

27469

#;

27470

push( @valid_token_types, @digraphs );

27471

push( @valid_token_types, @trigraphs );

27472

push( @valid_token_types, '#' );

27473

push( @valid_token_types, ',' );

27474

@is_valid_token_type{@valid_token_types} = (1) x scalar(@valid_token_types);

27475

27476

# a list of file test letters, as in -e (Table 3-4 of 'camel 3')

27477

my @file_test_operators =

27478

qw( A B C M O R S T W X b c d e f g k l o p r s t u w x z);

27479

@is_file_test_operator{@file_test_operators} =

27480

(1) x scalar(@file_test_operators);

27481

27482

# these functions have prototypes of the form (&), so when they are

27483

# followed by a block, that block MAY BE followed by an operator.

27484

@_ = qw( do eval );

27485

@is_block_operator{@_} = (1) x scalar(@_);

27486

27487

# these functions allow an identifier in the indirect object slot

27488

@_ = qw( print printf sort exec system say);

27489

@is_indirect_object_taker{@_} = (1) x scalar(@_);

27490

27491

# These tokens may precede a code block

27492

# patched for SWITCH/CASE

27493

@_ =

27494

qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else

27495

unless do while until eval for foreach map grep sort

27496

switch case given when);

27497

@is_code_block_token{@_} = (1) x scalar(@_);

27498

27499

# I'll build the list of keywords incrementally

27500

my @Keywords = ();

27501

27502

# keywords and tokens after which a value or pattern is expected,

27503

# but not an operator. In other words, these should consume terms

27504

# to their right, or at least they are not expected to be followed

27505

# immediately by operators.

27506

my @value_requestor = qw(

27507

AUTOLOAD

27508

BEGIN

27509

CHECK

27510

DESTROY

27511

END

27512

EQ

27513

GE

27514

GT

27515

INIT

27516

LE

27517

LT

27518

NE

27519

UNITCHECK

27520

abs

27521

accept

27522

alarm

27523

and

27524

atan2

27525

bind

27526

binmode

27527

bless

27528

break

27529

caller

27530

chdir

27531

chmod

27532

chomp

27533

chop

27534

chown

27535

chr

27536

chroot

27537

close

27538

closedir

27539

cmp

27540

connect

27541

continue

27542

cos

27543

crypt

27544

dbmclose

27545

dbmopen

27546

defined

27547

delete

27548

die

27549

dump

27550

each

27551

else

27552

elsif

27553

eof

27554

eq

27555

exec

27556

exists

27557

exit

27558

exp

27559

fcntl

27560

fileno

27561

flock

27562

for

27563

foreach

27564

formline

27565

ge

27566

getc

27567

getgrgid

27568

getgrnam

27569

gethostbyaddr

27570

gethostbyname

27571

getnetbyaddr

27572

getnetbyname

27573

getpeername

27574

getpgrp

27575

getpriority

27576

getprotobyname

27577

getprotobynumber

27578

getpwnam

27579

getpwuid

27580

getservbyname

27581

getservbyport

27582

getsockname

27583

getsockopt

27584

glob

27585

gmtime

27586

goto

27587

grep

27588

gt

27589

hex

27590

if

27591

index

27592

int

27593

ioctl

27594

join

27595

keys

27596

kill

27597

last

27598

lc

27599

lcfirst

27600

le

27601

length

27602

link

27603

listen

27604

local

27605

localtime

27606

lock

27607

log

27608

lstat

27609

lt

27610

map

27611

mkdir

27612

msgctl

27613

msgget

27614

msgrcv

27615

msgsnd

27616

my

27617

ne

27618

no

27620

not

27621

oct

27622

open

27623

opendir

27624

or

27625

ord

27626

our

27627

pack

27628

pipe

27629

pop

27630

pos

27631

print

27632

printf

27633

prototype

27634

push

27635

quotemeta

27636

rand

27637

read

27638

readdir

27639

readlink

27640

readline

27641

readpipe

27642

recv

27643

redo

27644

ref

27645

rename

27646

require

27647

reset

27648

return

27649

reverse

27650

rewinddir

27651

rindex

27652

rmdir

27653

scalar

27654

seek

27655

seekdir

27656

select

27657

semctl

27658

semget

27659

semop

27660

send

27661

sethostent

27662

setnetent

27663

setpgrp

27664

setpriority

27665

setprotoent

27666

setservent

27667

setsockopt

27668

shift

27669

shmctl

27670

shmget

27671

shmread

27672

shmwrite

27673

shutdown

27674

sin

27675

sleep

27676

socket

27677

socketpair

27678

sort

27679

splice

27680

split

27681

sprintf

27682

sqrt

27683

srand

27684

stat

27685

study

27686

substr

27687

symlink

27688

syscall

27689

sysopen

27690

sysread

27691

sysseek

27692

system

27693

syswrite

27694

tell

27695

telldir

27696

tie

27697

tied

27698

truncate

27699

uc

27700

ucfirst

27701

umask

27702

undef

27703

unless

27704

unlink

27705

unpack

27706

unshift

27707

untie

27708

until

27709

use

27710

utime

27711

values

27712

vec

27713

waitpid

27714

warn

27715

while

27716

write

27717

xor

27718

27719

switch

27720

case

27721

given

27722

when

27723

err

27724

say

27725

);

27726

27727

# patched above for SWITCH/CASE given/when err say

27728

# 'err' is a fairly safe addition.

27729

# TODO: 'default' still needed if appropriate

27730

# 'use feature' seen, but perltidy works ok without it.

27731

# Concerned that 'default' could break code.

27732

push( @Keywords, @value_requestor );

27733

27734

# These are treated the same but are not keywords:

27735

my @extra_vr = qw(

27736

constant

27737

vars

27738

);

27739

push( @value_requestor, @extra_vr );

27740

27741

@expecting_term_token{@value_requestor} = (1) x scalar(@value_requestor);

27742

27743

# this list contains keywords which do not look for arguments,

27744

# so that they might be followed by an operator, or at least

27745

# not a term.

27746

my @operator_requestor = qw(

27747

endgrent

27748

endhostent

27749

endnetent

27750

endprotoent

27751

endpwent

27752

endservent

27753

fork

27754

getgrent

27755

gethostent

27756

getlogin

27757

getnetent

27758

getppid

27759

getprotoent

27760

getpwent

27761

getservent

27762

setgrent

27763

setpwent

27764

time

27765

times

27766

wait

27767

wantarray

27768

);

27769

27770

push( @Keywords, @operator_requestor );

27771

27772

# These are treated the same but are not considered keywords:

27773

my @extra_or = qw(

27774

STDERR

27775

STDIN

27776

STDOUT

27777

);

27778

27779

push( @operator_requestor, @extra_or );

27780

27781

@expecting_operator_token{@operator_requestor} =

27782

(1) x scalar(@operator_requestor);

27783

27784

# these token TYPES expect trailing operator but not a term

27785

# note: ++ and -- are post-increment and decrement, 'C' = constant

27786

my @operator_requestor_types = qw( ++ -- C <> q );

27787

@expecting_operator_types{@operator_requestor_types} =

27788

(1) x scalar(@operator_requestor_types);

27789

27790

# these token TYPES consume values (terms)

27791

# note: pp and mm are pre-increment and decrement

27792

# f=semicolon in for, F=file test operator

27793

my @value_requestor_type = qw#

27794

L { ( [ ~ !~ =~ ; . .. ... A : && ! || // = + - x

27795

**= += -= .= /= *= %= x= &= |= ^= <<= >>= &&= ||= //=

27796

<= >= == != => \ > < % * / ? & | ** <=> ~~ !~~

27797

f F pp mm Y p m U J G j >> << ^ t

27798

#;

27799

push( @value_requestor_type, ',' )

27800

; # (perl doesn't like a ',' in a qw block)

27801

@expecting_term_types{@value_requestor_type} =

27802

(1) x scalar(@value_requestor_type);

27803

27804

# Note: the following valid token types are not assigned here to

27805

# hashes requesting to be followed by values or terms, but are

27806

# instead currently hard-coded into sub operator_expected:

27807

# ) -> :: Q R Z ] b h i k n v w } #

27808

27809

# For simple syntax checking, it is nice to have a list of operators which

27810

# will really be unhappy if not followed by a term. This includes most

27811

# of the above...

27812

%really_want_term = %expecting_term_types;

27813

27814

# with these exceptions...

27815

delete $really_want_term{'U'}; # user sub, depends on prototype

27816

delete $really_want_term{'F'}; # file test works on $_ if no following term

27817

delete $really_want_term{'Y'}; # indirect object, too risky to check syntax;

27818

# let perl do it

27819

27820

@_ = qw(q qq qw qx qr s y tr m);

27821

@is_q_qq_qw_qx_qr_s_y_tr_m{@_} = (1) x scalar(@_);

27822

27823

# These keywords are handled specially in the tokenizer code:

27824

my @special_keywords = qw(

27825

do

27826

eval

27827

format

27828

m

27829

package

27830

q

27831

qq

27832

qr

27833

qw

27834

qx

27835

s

27836

sub

27837

tr

27838

y

27839

);

27840

push( @Keywords, @special_keywords );

27841

27842

# Keywords after which list formatting may be used

27843

# WARNING: do not include |map|grep|eval or perl may die on

27844

# syntax errors (map1.t).

27845

my @keyword_taking_list = qw(

27846

and

27847

chmod

27848

chomp

27849

chop

27850

chown

27851

dbmopen

27852

die

27853

elsif

27854

exec

27855

fcntl

27856

for

27857

foreach

27858

formline

27859

getsockopt

27860

if

27861

index

27862

ioctl

27863

join

27864

kill

27865

local

27866

msgctl

27867

msgrcv

27868

msgsnd

27869

my

27870

open

27871

or

27872

our

27873

pack

27874

print

27875

printf

27876

push

27877

read

27878

readpipe

27879

recv

27880

return

27881

reverse

27882

rindex

27883

seek

27884

select

27885

semctl

27886

semget

27887

send

27888

setpriority

27889

setsockopt

27890

shmctl

27891

shmget

27892

shmread

27893

shmwrite

27894

socket

27895

socketpair

27896

sort

27897

splice

27898

split

27899

sprintf

27900

substr

27901

syscall

27902

sysopen

27903

sysread

27904

sysseek

27905

system

27906

syswrite

27907

tie

27908

unless

27909

unlink

27910

unpack

27911

unshift

27912

until

27913

vec

27914

warn

27915

while

27916

);

27917

@is_keyword_taking_list{@keyword_taking_list} =

27918

(1) x scalar(@keyword_taking_list);

27919

27920

# These are not used in any way yet

27921

# my @unused_keywords = qw(

27922

# CORE

27923

# __FILE__

27924

# __LINE__

27925

# __PACKAGE__

27926

# );

27927

27928

# The list of keywords was extracted from function 'keyword' in

27929

# perl file toke.c version 5.005.03, using this utility, plus a

27930

# little editing: (file getkwd.pl):

27931

# while (<>) { while (/\"(.*)\"/g) { print "$1\n"; } }

27932

# Add 'get' prefix where necessary, then split into the above lists.

27933

# This list should be updated as necessary.

27934

# The list should not contain these special variables:

27935

# ARGV DATA ENV SIG STDERR STDIN STDOUT

27936

# __DATA__ __END__

27937

27938

@is_keyword{@Keywords} = (1) x scalar(@Keywords);

27939

}

27940

1;

27941

__END__

27942

27943

=head1 NAME

27944

27945

Perl::Tidy - Parses and beautifies perl source

27946

27947

=head1 SYNOPSIS

27948

27949

use Perl::Tidy;

27950

27951

Perl::Tidy::perltidy(

27952

source => $source,

27953

destination => $destination,

27954

stderr => $stderr,

27955

argv => $argv,

27956

perltidyrc => $perltidyrc,

27957

logfile => $logfile,

27958

errorfile => $errorfile,

27959

formatter => $formatter, # callback object (see below)

27960

dump_options => $dump_options,

27961

dump_options_type => $dump_options_type,

27962

);

27963

27964

=head1 DESCRIPTION

27965

27966

This module makes the functionality of the perltidy utility available to perl

27967

scripts. Any or all of the input parameters may be omitted, in which case the

27968

@ARGV array will be used to provide input parameters as described

27969

in the perltidy(1) man page.

27970

27971

For example, the perltidy script is basically just this:

27972

27973

use Perl::Tidy;

27974

Perl::Tidy::perltidy();

27975

27976

The module accepts input and output streams by a variety of methods.

27977

The following list of parameters may be any of a the following: a

27978

filename, an ARRAY reference, a SCALAR reference, or an object with

27979

either a B<getline> or B<print> method, as appropriate.

27980

27981

source - the source of the script to be formatted

27982

destination - the destination of the formatted output

27983

stderr - standard error output

27984

perltidyrc - the .perltidyrc file

27985

logfile - the .LOG file stream, if any

27986

errorfile - the .ERR file stream, if any

27987

dump_options - ref to a hash to receive parameters (see below),

27988

dump_options_type - controls contents of dump_options

27989

dump_getopt_flags - ref to a hash to receive Getopt flags

27990

dump_options_category - ref to a hash giving category of options

27991

dump_abbreviations - ref to a hash giving all abbreviations

27992

27993

The following chart illustrates the logic used to decide how to

27994

treat a parameter.

27995

27996

ref($param) $param is assumed to be:

27997

----------- ---------------------

27998

undef a filename

27999

SCALAR ref to string

28000

ARRAY ref to array

28001

(other) object with getline (if source) or print method

28002

28003

If the parameter is an object, and the object has a B<close> method, that

28004

close method will be called at the end of the stream.

28005

28006

=over 4

28007

28008

=item source

28009

28010

If the B<source> parameter is given, it defines the source of the

28011

input stream.

28012

28013

=item destination

28014

28015

If the B<destination> parameter is given, it will be used to define the

28016

file or memory location to receive output of perltidy.

28017

28018

=item stderr

28019

28020

The B<stderr> parameter allows the calling program to capture the output

28021

to what would otherwise go to the standard error output device.

28022

28023

=item perltidyrc

28024

28025

If the B<perltidyrc> file is given, it will be used instead of any

28026

F<.perltidyrc> configuration file that would otherwise be used.

28027

28028

=item argv

28029

28030

If the B<argv> parameter is given, it will be used instead of the

28031

B<@ARGV> array. The B<argv> parameter may be a string, a reference to a

28032

string, or a reference to an array. If it is a string or reference to a

28033

string, it will be parsed into an array of items just as if it were a

28034

command line string.

28035

28036

=item dump_options

28037

28038

If the B<dump_options> parameter is given, it must be the reference to a hash.

28039

In this case, the parameters contained in any perltidyrc configuration file

28040

will be placed in this hash and perltidy will return immediately. This is

28041

equivalent to running perltidy with --dump-options, except that the perameters

28042

are returned in a hash rather than dumped to standard output. Also, by default

28043

only the parameters in the perltidyrc file are returned, but this can be

28044

changed (see the next parameter). This parameter provides a convenient method

28045

for external programs to read a perltidyrc file. An example program using

28046

this feature, F<perltidyrc_dump.pl>, is included in the distribution.

28047

28048

Any combination of the B<dump_> parameters may be used together.

28049

28050

=item dump_options_type

28051

28052

This parameter is a string which can be used to control the parameters placed

28053

in the hash reference supplied by B<dump_options>. The possible values are

28054

'perltidyrc' (default) and 'full'. The 'full' parameter causes both the

28055

default options plus any options found in a perltidyrc file to be returned.

28056

28057

=item dump_getopt_flags

28058

28059

If the B<dump_getopt_flags> parameter is given, it must be the reference to a

28060

hash. This hash will receive all of the parameters that perltidy understands

28061

and flags that are passed to Getopt::Long. This parameter may be

28062

used alone or with the B<dump_options> flag. Perltidy will

28063

exit immediately after filling this hash. See the demo program

28064

F<perltidyrc_dump.pl> for example usage.

28065

28066

=item dump_options_category

28067

28068

If the B<dump_options_category> parameter is given, it must be the reference to a

28069

hash. This hash will receive a hash with keys equal to all long parameter names

28070

and values equal to the title of the corresponding section of the perltidy manual.

28071

See the demo program F<perltidyrc_dump.pl> for example usage.

28072

28073

=item dump_abbreviations

28074

28075

If the B<dump_abbreviations> parameter is given, it must be the reference to a

28076

hash. This hash will receive all abbreviations used by Perl::Tidy. See the

28077

demo program F<perltidyrc_dump.pl> for example usage.

28078

28079

=back

28080

28081

=head1 EXAMPLE

28082

28083

The following example passes perltidy a snippet as a reference

28084

to a string and receives the result back in a reference to

28085

an array.

28086

28087

use Perl::Tidy;

28088

28089

# some messy source code to format

28090

my $source = <<'EOM';

28091

use strict;

28092

my @editors=('Emacs', 'Vi '); my $rand = rand();

28093

print "A poll of 10 random programmers gave these results:\n";

28094

foreach(0..10) {

28095

my $i=int ($rand+rand());

28096

print " $editors[$i] users are from Venus" . ", " .

28097

"$editors[1-$i] users are from Mars" .

28098

"\n";

28099

}

28100

EOM

28101

28102

# We'll pass it as ref to SCALAR and receive it in a ref to ARRAY

28103

my @dest;

28104

perltidy( source => \$source, destination => \@dest );

28105

foreach (@dest) {print}

28106

28107

=head1 Using the B<formatter> Callback Object

28108

28109

The B<formatter> parameter is an optional callback object which allows

28110

the calling program to receive tokenized lines directly from perltidy for

28111

further specialized processing. When this parameter is used, the two

28112

formatting options which are built into perltidy (beautification or

28113

html) are ignored. The following diagram illustrates the logical flow:

28114

28115

|-- (normal route) -> code beautification

28116

caller->perltidy->|-- (-html flag ) -> create html

28117

|-- (formatter given)-> callback to write_line

28118

28119

This can be useful for processing perl scripts in some way. The

28120

parameter C<$formatter> in the perltidy call,

28121

28122

formatter => $formatter,

28123

28124

is an object created by the caller with a C<write_line> method which

28125

will accept and process tokenized lines, one line per call. Here is

28126

a simple example of a C<write_line> which merely prints the line number,

28127

the line type (as determined by perltidy), and the text of the line:

28128

28129

sub write_line {

28130

28131

# This is called from perltidy line-by-line

28132

my $self = shift;

28133

my $line_of_tokens = shift;

28134

my $line_type = $line_of_tokens->{_line_type};

28135

my $input_line_number = $line_of_tokens->{_line_number};

28136

my $input_line = $line_of_tokens->{_line_text};

28137

print "$input_line_number:$line_type:$input_line";

28138

}

28139

28140

The complete program, B<perllinetype>, is contained in the examples section of

28141

the source distribution. As this example shows, the callback method

28142

receives a parameter B<$line_of_tokens>, which is a reference to a hash

28143

of other useful information. This example uses these hash entries:

28144

28145

$line_of_tokens->{_line_number} - the line number (1,2,...)

28146

$line_of_tokens->{_line_text} - the text of the line

28147

$line_of_tokens->{_line_type} - the type of the line, one of:

28148

28149

SYSTEM - system-specific code before hash-bang line

28150

CODE - line of perl code (including comments)

28151

POD_START - line starting pod, such as '=head'

28152

POD - pod documentation text

28153

POD_END - last line of pod section, '=cut'

28154

HERE - text of here-document

28155

HERE_END - last line of here-doc (target word)

28156

FORMAT - format section

28157

FORMAT_END - last line of format section, '.'

28158

DATA_START - __DATA__ line

28159

DATA - unidentified text following __DATA__

28160

END_START - __END__ line

28161

END - unidentified text following __END__

28162

ERROR - we are in big trouble, probably not a perl script

28163

28164

Most applications will be only interested in lines of type B<CODE>. For

28165

another example, let's write a program which checks for one of the

28166

so-called I<naughty matching variables> C<&`>, C<$&>, and C<$'>, which

28167

can slow down processing. Here is a B<write_line>, from the example

28168

program B<find_naughty.pl>, which does that:

28169

28170

sub write_line {

28171

28172

# This is called back from perltidy line-by-line

28173

# We're looking for $`, $&, and $'

28174

my ( $self, $line_of_tokens ) = @_;

28175

28176

# pull out some stuff we might need

28177

my $line_type = $line_of_tokens->{_line_type};

28178

my $input_line_number = $line_of_tokens->{_line_number};

28179

my $input_line = $line_of_tokens->{_line_text};

28180

my $rtoken_type = $line_of_tokens->{_rtoken_type};

28181

my $rtokens = $line_of_tokens->{_rtokens};

28182

chomp $input_line;

28183

28184

# skip comments, pod, etc

28185

return if ( $line_type ne 'CODE' );

28186

28187

# loop over tokens looking for $`, $&, and $'

28188

for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {

28189

28190

# we only want to examine token types 'i' (identifier)

28191

next unless $$rtoken_type[$j] eq 'i';

28192

28193

# pull out the actual token text

28194

my $token = $$rtokens[$j];

28195

28196

# and check it

28197

if ( $token =~ /^\$[\`\&\']$/ ) {

28198

print STDERR

28199

"$input_line_number: $token\n";

28200

}

28201

}

28202

}

28203

28204

This example pulls out these tokenization variables from the $line_of_tokens

28205

hash reference:

28206

28207

$rtoken_type = $line_of_tokens->{_rtoken_type};

28208

$rtokens = $line_of_tokens->{_rtokens};

28209

28210

The variable C<$rtoken_type> is a reference to an array of token type codes,

28211

and C<$rtokens> is a reference to a corresponding array of token text.

28212

These are obviously only defined for lines of type B<CODE>.

28213

Perltidy classifies tokens into types, and has a brief code for each type.

28214

You can get a complete list at any time by running perltidy from the

28215

command line with

28216

28217

perltidy --dump-token-types

28218

28219

In the present example, we are only looking for tokens of type B<i>

28220

(identifiers), so the for loop skips past all other types. When an

28221

identifier is found, its actual text is checked to see if it is one

28222

being sought. If so, the above write_line prints the token and its

28223

line number.

28224

28225

The B<formatter> feature is relatively new in perltidy, and further

28226

documentation needs to be written to complete its description. However,

28227

several example programs have been written and can be found in the

28228

B<examples> section of the source distribution. Probably the best way

28229

to get started is to find one of the examples which most closely matches

28230

your application and start modifying it.

28231

28232

For help with perltidy's pecular way of breaking lines into tokens, you

28233

might run, from the command line,

28234

28235

perltidy -D filename

28236

28237

where F<filename> is a short script of interest. This will produce

28238

F<filename.DEBUG> with interleaved lines of text and their token types.

28239

The B<-D> flag has been in perltidy from the beginning for this purpose.

28240

If you want to see the code which creates this file, it is

28241

C<write_debug_entry> in Tidy.pm.

28242

28243

=head1 EXPORT

28244

28245

&perltidy

28246

28247

=head1 CREDITS

28248

28249

Thanks to Hugh Myers who developed the initial modular interface

28250

to perltidy.

28251

28252

=head1 VERSION

28253

28254

This man page documents Perl::Tidy version 20071205.

28255

28256

=head1 AUTHOR

28257

28258

Steve Hancock

28259

perltidy at users.sourceforge.net

28260

28261

=head1 SEE ALSO

28262

28263

The perltidy(1) man page describes all of the features of perltidy. It

28264

can be found at http://perltidy.sourceforge.net.

28265

28266

=cut