174
175
$lines[-1]{presig} .= $lines[-1]{quoter} = defn $2;
175
176
$lines[-1]{presig} .= $lines[-1]{quotespace} = defn $3;
177
$lines[-1]{hang} = $args{lists} ? Hang->new($_) : NullHang->new();
178
$lines[-1]{hang} = Hang->new($_, $args{lists});
179
180
s/([ \t]*)(.*?)(\s*)$//
180
181
or die "Internal Error ($@) on '$_'";
273
275
my $lastignored = 1;
274
276
for my $index (0..$#paras) {
275
277
local $_ = $paras[$index]{raw} . "\n";
277
$paras[$index]{ignore} = $args{ignore}($lastignored);
278
next unless $args{mail} && /^--$/;
278
$paras[$index]{ignore} = $args{ignore}($lastignored);
279
$lastignored &&= $paras[$index]{ignore};
280
next unless $args{mail} && /^--\s*$/;
279
281
$remainder = join "\n", map { $_->{raw} } splice @paras, $index;
280
282
$remainder .= "\n" unless $remainder =~ /\n\z/;
528
530
s/ ( [:;] \s+ ) ($alword) /$1 . recase($2,'title')/ex;
531
my $abbrev = join '|', qw{
532
etc[.] pp[.] ph[.]?d[.]
533
(?:[A-Z][A-Za-z]+[.])+
534
(?:[A-Z][.])(?:[A-Z][.])+
537
my $gen_abbrev = join '|', $abbrev, qw{
538
(^[^a-z]*([a-z][.])+)
533
sub inv($@) { my ($k, %inv)=shift; for(0..$#_) {$inv{$_[$_]}=$_*$k} %inv }
534
my @unit= ( "" , qw ( I II III IV V VI VII VIII IX ));
535
my @ten = ( "" , qw ( X XX XXX XL L LX LXX LXXX XC ));
536
my @hund= ( "" , qw ( C CC CCC CD D DC DCC DCCC CM ));
537
my @thou= ( "" , qw ( M MM MMM ));
538
my %rval= (inv(1,@unit),inv(10,@ten),inv(100,@hund),inv(1000,@thou));
539
my $rbpat= join ")(",join("|",reverse @thou), join("|",reverse @hund), join("|",reverse @ten), join("|",reverse @unit);
540
my $rpat= join ")(?:",join("|",reverse @thou), join("|",reverse @hund), join("|",reverse @ten), join("|",reverse @unit);
541
my $rom = qq/(?:(?=[MDCLXVI])(?:$rpat))/;
543
my $abbrev = join '|', qw{ etc[.] pp[.] ph[.]?d[.] },
544
"(?!$rom)(?:[A-Z][A-Za-z]+[.])+",
545
'(?:[A-Z][.])(?:[A-Z][.])+';
547
my $gen_abbrev = join '|',
548
qw{ etc[.] pp[.] ph[.]?d[.] },
549
'(?:[A-Z][.])(?:[A-Z][.])+',
550
'(^[^a-zA-Z]*([a-z][.])+)';
541
552
my $term = q{(?:[.]|[!?]+)};
558
569
$str =~ s/([a-z])/uc $1/ie;
559
570
$brsent = $str =~ /^[[(]/;
561
$eos = $str !~ /($gen_abbrev)[^a-z]*\s/i
572
$eos = $str !~ /^($gen_abbrev)[^a-z]*\s/i
562
573
&& $str =~ /[a-z][^a-z]*$term([^a-z]*)\s/
563
574
&& !($1=~/[])]/ && !$brsent);
564
575
$str =~ s/\s+$/$trailer/ if $eos && $trailer;
629
sub inv($@) { my ($k, %inv)=shift; for(0..$#_) {$inv{$_[$_]}=$_*$k} %inv }
630
my @unit= ( "" , qw ( I II III IV V VI VII VIII IX ));
631
my @ten = ( "" , qw ( X XX XXX XL L LX LXX LXXX XC ));
632
my @hund= ( "" , qw ( C CC CCC CD D DC DCC DCCC CM ));
633
my @thou= ( "" , qw ( M MM MMM ));
634
my %rval= (inv(1,@unit),inv(10,@ten),inv(100,@hund),inv(1000,@thou));
635
my $rbpat= join ")(",join("|",reverse @thou), join("|",reverse @hund), join("|",reverse @ten), join("|",reverse @unit);
636
my $rpat= join ")(?:",join("|",reverse @thou), join("|",reverse @hund), join("|",reverse @ten), join("|",reverse @unit);
652
655
# BITS OF A NUMERIC VALUE
654
my $num = q/(?:\d{1,3}\b)/;
655
my $rom = qq/(?:(?=[MDCLXVI])(?:$rpat))/;
657
my $num = q/(?:\d{1,3}\b(?!:\d\d\b))/; # Ignore 8:20 etc.
656
658
my $let = q/[A-Za-z]/;
657
659
my $pbr = q/[[(<]/;
658
660
my $sbr = q/])>/;
671
my ($class, $orig) = @_;
673
my ($class, $orig, $lists_mode) = @_;
674
return NullHang->new() if !$lists_mode;
672
676
my $origlen = length $orig;
674
678
if ($_[1] =~ s#\A($hangPS)##) {
675
679
@vals = { type => 'ps', val => $1 }
677
elsif ($_[1] =~ s#\A($hang)##) {
681
elsif ($lists_mode =~ /1|bullet/i && $_[1] =~ s#\A($hang)##) {
678
682
@vals = { type => 'bul', val => $1 }
680
elsif ($_[1] =~ m#\([^\s)]+\s#) {
684
elsif ($_[1] =~ m#\A\([^\s)]+\s#) {
687
691
last if $_[1] =~ m#\A($ows)($abbrev)#
688
692
&& (length $1 || !@vals); # ws-separated or first
694
last if $_[1] =~ m{\A $ows $pbr [^$sbr \t]* \s}xms;
690
696
$cut = $origlen - length $_[1];
691
697
my $pre = $_[1] =~ s#\A($ows$pbr$ows)## ? $1 : "";
692
my $val = $_[1] =~ s#\A($num)## && { type=>'num', val=>$1 }
693
|| $_[1] =~ s#\A($rom)##i && { type=>'rom', val=>$1, nval=>fromRoman($1) }
694
|| $_[1] =~ s#\A($let(?!$let))##i && { type=>'let', val=>$1 }
695
|| { val => "", type => "" };
699
= ($lists_mode =~ /1|number/i && $_[1] =~ s#\A($num)##)
700
? { type=>'num', val=>$1 }
701
: ($lists_mode =~ /1|roman/i && $_[1] =~ s#\A($rom)\b##i)
702
? { type=>'rom', val=>$1, nval=>fromRoman($1) }
703
: ($lists_mode =~ /1|alpha/i && $_[1] =~ s#\A($let(?!$let))##i)
704
? { type=>'let', val=>$1 }
705
: { val => "", type => "" };
696
706
$_[1] = $pre.$_[1] and last unless $val->{val};
697
707
$val->{post} = $pre && $_[1] =~ s#\A($ows()[.:/]?[$close{$pre}][.:/]?)## && $1
698
708
|| $_[1] =~ s#\A($ows()[$sbr.:/])## && $1
705
715
$_[1] = substr($orig,pop(@vals)->{cut});
708
# check for orphaned years...
709
if (@vals==1 && $vals[0]->{type} eq 'num'
710
&& $vals[0]->{val} >= 1000
711
&& $vals[0]->{post} eq '.') {
712
$_[1] = substr($orig,pop(@vals)->{cut});
719
# check for orphaned years or unlikely Roman numerals...
720
if (@vals==1 && defined $vals[0]->{post} && $vals[0]->{post} =~ /[\.>)]/) {
722
if ($v->{type} eq 'num' && $v->{val} >= 1000) {
723
$_[1] = substr($orig,pop(@vals)->{cut});
715
727
return NullHang->new if !@vals;
716
728
bless \@vals, $class;
882
894
$formatted = autoformat $rawtext, { squeeze=>0 };
896
# Select appropriate tabspacing (default is 8 spaces per tab):
898
$formatted = autoformat $rawtext, { tabspace=>4 };
884
900
# Case conversions...
886
902
$formatted = autoformat $rawtext, { case => 'lower' };
1093
1109
disordered lists of letters and Roman numerals presents an interesting
1094
1110
challenge. A list such as:
1098
1114
E. Activate Geiger counter.
1100
1116
should be reordered as C<A.> C<B.> C<C.,> whereas:
1104
1120
XLI. Activate Geiger counter.
1110
1126
only of valid Roman numerals, at least one of which is two or
1111
1127
more characters long.
1129
Note that renumbering starts at the first number actually given, rather than
1130
restarting at the first possible number. To renumber from 1 (or A.) you must
1131
change the first numbered bullet to that.
1113
1133
If automatic renumbering isn't wanted, just specify the C<'renumber'>
1114
1134
option with a false value.
1116
Note that numbers above 1000 at the start of a line are no longer
1136
Note that normal numbers above 1000 at the start of a line are no longer
1117
1137
considered to be paragraph numbering. Numbered paragraphs running that
1118
1138
high are exceptionally rare, and much rarer than paragraphs that look
1121
1141
Although it has long been popular (especially in the year
1122
1142
2001) to point out that we now live in the Future, many
1148
1168
treated as bullets) to be ignored in this way, you can turn of list formatting
1149
1169
entirely by setting the C<'lists'> option to a false value.
1171
You can also select which kinds of lists are recognized, by using a string as
1174
# Don't recognize Roman numerals or alphabetics as list markers...
1175
autoformat { lists => 'number, bullet' }, $text;
1177
# Don't recognize bullets or numbers as list markers...
1178
autoformat { lists => 'roman, alpha' }, $text;
1180
# Recognize everything except Roman numerals as list markers...
1181
autoformat { lists => 'number, bullet, alpha' }, $text;
1183
The string should contain one or more of the following words: C<number>,
1184
C<bullet>, C<alpha>, C<roman>. C<autoformat()> will ignore any list type that
1185
doesn't appear in the C<'lists'> string.
1196
1231
When reformatted (see below), the indentation and the attribution
1197
1232
structure will be preserved:
1199
"We are all of us in the gutter, but some of us are looking at
1234
"We are all of us in the gutter, but some of us are looking
1203
1238
=head2 Widow control
1234
1269
(but I<not> on any quoters before that text). For example, with
1235
1270
C<'right'> justification:
1237
R3> Now is the Winter of our discontent made
1238
R4> glorious Summer by this son of York. And all
1239
R5> the clouds that lour'd upon our house In the
1240
R6> deep bosom of the ocean buried.
1272
R3> Now is the Winter of our discontent made
1273
R3> glorious Summer by this son of York. And all
1274
R3> the clouds that lour'd upon our house In the
1275
R3> deep bosom of the ocean buried.
1242
1277
Full justification is interesting in a fixed-width medium like plaintext
1243
1278
because it usually results in uneven spacing between words. Typically,
1244
1279
formatters provide this by distributing the extra spaces into the first
1245
1280
available gaps of each line:
1247
R7> Now is the Winter of our discontent made
1248
R8> glorious Summer by this son of York. And all
1249
R9> the clouds that lour'd upon our house In
1250
R10> the deep bosom of the ocean buried.
1282
R3> Now is the Winter of our discontent made
1283
R3> glorious Summer by this son of York. And all
1284
R3> the clouds that lour'd upon our house In
1285
R3> the deep bosom of the ocean buried.
1252
1287
This produces a rather jarring visual effect, so C<autoformat> reverses
1253
1288
the strategy and inserts extra spaces at the end of lines:
1255
R11> Now is the Winter of our discontent made
1256
R12> glorious Summer by this son of York. And all
1257
R13> the clouds that lour'd upon our house In
1258
R14> the deep bosom of the ocean buried.
1290
R3> Now is the Winter of our discontent made
1291
R3> glorious Summer by this son of York. And all
1292
R3> the clouds that lour'd upon our house In
1293
R3> the deep bosom of the ocean buried.
1260
1295
Most readers find this less disconcerting.
1365
1400
print autoformat { all => 1, ignore => 'indented' }, $text;
1403
=head2 Handling tabs
1405
Text::Autoformat replaces any tabs in the text it's formatting with the
1406
appropriate number of spaces (using Text::Tabs to do its dirty work). It
1407
normally assumes that each tab is equivalent to 8 space characters, but you
1408
can change that default using the 'tabspace' option:
1410
print autoformat { tabspace => 4 }, $text;
1368
1413
=head1 SEE ALSO
1370
1415
The Text::Reform module
1378
1423
There are undoubtedly serious bugs lurking somewhere in code this funky
1379
1424
:-) Bug reports and other feedback are most welcome.
1383
Copyright (c) 1997-2000, Damian Conway. All Rights Reserved. This module
1384
is free software. It may be used, redistributed and/or modified under
1385
the terms of the Perl Artistic License (see
1386
http://www.perl.com/perl/misc/Artistic.html)
1426
=head1 LICENCE AND COPYRIGHT
1428
Copyright (c) 1997-2007, Damian Conway C<< <DCONWAY@CPAN.org> >>. All rights reserved.
1430
This module is free software; you can redistribute it and/or
1431
modify it under the same terms as Perl itself. See L<perlartistic>.
1434
=head1 DISCLAIMER OF WARRANTY
1436
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
1437
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
1438
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
1439
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
1440
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1441
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
1442
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
1443
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
1444
NECESSARY SERVICING, REPAIR, OR CORRECTION.
1446
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
1447
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
1448
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
1449
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
1450
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
1451
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
1452
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
1453
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
1454
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF