682
758
# It'd be nice to do this with a foreach loop, but with only three
683
759
# possibilities right now, it's easier not to...
685
if ($self->{conf}->{rewrite_header}->{Subject}) {
761
if (defined $self->{conf}->{rewrite_header}->{Subject}) {
762
$subject = "\n" if !defined $subject;
687
763
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject});
688
764
$tag =~ s/\n/ /gs; # strip tag's newlines
689
765
$subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!?
692
if ($self->{conf}->{rewrite_header}->{To}) {
768
if (defined $self->{conf}->{rewrite_header}->{To}) {
769
$to = "\n" if !defined $to;
694
770
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To});
695
771
$tag =~ s/\n/ /gs; # strip tag's newlines
696
772
$to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;
699
if ($self->{conf}->{rewrite_header}->{From}) {
775
if (defined $self->{conf}->{rewrite_header}->{From}) {
776
$from = "\n" if !defined $from;
701
777
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From});
702
778
$tag =~ s/\n+//gs; # strip tag's newlines
703
779
$from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;
706
782
# add report headers to message
707
$newmsg .= "From: $from" if $from;
708
$newmsg .= "To: $to" if $to;
709
$newmsg .= "Cc: $cc" if $cc;
710
$newmsg .= "Subject: $subject" if $subject;
711
$newmsg .= "Date: $date" if $date;
712
$newmsg .= "Message-Id: $msgid" if $msgid;
714
foreach my $header (keys %{$self->{conf}->{headers_spam}}) {
715
my $data = $self->{conf}->{headers_spam}->{$header};
716
my $line = $self->_process_header($header,$data);
717
$line = $self->qp_encode_header($line);
718
$newmsg .= "X-Spam-$header: $line\n" # add even if empty
783
$newmsg .= "From: $from" if defined $from;
784
$newmsg .= "To: $to" if defined $to;
785
$newmsg .= "Cc: $cc" if defined $cc;
786
$newmsg .= "Subject: $subject" if defined $subject;
787
$newmsg .= "Date: $date" if defined $date;
788
$newmsg .= "Message-Id: $msgid" if defined $msgid;
789
$newmsg .= $self->_get_added_headers('headers_spam');
721
791
if (defined $self->{conf}->{report_safe_copy_headers}) {
722
792
my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/;
869
939
# The tag should be a comment for this header ...
870
940
$tag = "($tag)" if ($hdr =~ /^(?:From|To)$/);
872
943
s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i;
875
946
$addition = 'headers_spam';
878
# Break the pristine header set up into two blocks; "pre" is the stuff that
879
# we want to ensure comes before any SpamAssassin markup headers, like the
880
# Return-Path header (see bug 3409).
949
# Break the pristine header set into two blocks; $new_hdrs_pre is the stuff
950
# that we want to ensure comes before any SpamAssassin markup headers,
951
# like the Return-Path header (see bug 3409).
882
# "post" is all the rest of the message headers, placed after the
883
# SpamAssassin markup hdrs. Once one of those headers is seen, all further
884
# headers go into that set; it's assumed that it's an old copy of the
885
# header, or attempted spoofing, if it crops up halfway through the
953
# all the rest of the message headers (as left in @pristine_headers), is
954
# to be placed after the SpamAssassin markup hdrs. Once one of those headers
955
# is seen, all further headers go into that set; it's assumed that it's an
956
# old copy of the header, or attempted spoofing, if it crops up halfway
957
# through the headers.
888
959
my $new_hdrs_pre = '';
889
my $new_hdrs_post = '';
890
foreach my $hdr (@pristine_headers) {
891
if ($new_hdrs_post eq '' && $hdr =~ /^Return-Path:/i) {
892
$new_hdrs_pre .= $hdr;
894
$new_hdrs_post .= $hdr;
960
if (@pristine_headers && $pristine_headers[0] =~ /^Return-Path:/i) {
961
$new_hdrs_pre .= shift(@pristine_headers);
962
while (@pristine_headers && $pristine_headers[0] =~ /^[ \t]/) {
963
$new_hdrs_pre .= shift(@pristine_headers);
898
# use string appends to put this back together -- I finally benchmarked it.
899
# join() is 56% of the speed of just using string appends. ;)
900
while (my ($header, $data) = each %{$self->{conf}->{$addition}}) {
901
my $line = $self->_process_header($header,$data);
902
$line = $self->qp_encode_header($line);
903
$new_hdrs_pre .= "X-Spam-$header: $line\n";
966
$new_hdrs_pre .= $self->_get_added_headers($addition);
906
968
# fix up line endings appropriately
907
my $newmsg = $new_hdrs_pre.$new_hdrs_post.$separator;
969
my $newmsg = $new_hdrs_pre . join('',@pristine_headers) . $separator;
908
970
$self->_fixup_report_line_endings(\$newmsg);
910
972
return $newmsg.$self->{msg}->get_pristine_body();
967
1030
# a tag for it (bug 4793)
970
1034
$text =~ s{(_(\w+?)(?:\((.*?)\))?_)}{
973
my $result = $self->_get_tag($tag,$3);
974
(defined $result) ? $result : $full;
1038
if ($tag =~ /^ADDEDHEADER(?:HAM|SPAM|)\z/) {
1039
# Bug 6278: break infinite recursion through _get_added_headers and
1040
# _get_tag on an attempt to use such tag in add_header template
1042
$result = $self->_get_tag($tag,$3);
1044
defined $result ? $result : $full;
980
sub bayes_report_make_list {
983
my $param = shift || "5";
984
my ($limit,$fmt_arg,$more) = split /,/, $param;
986
return "Tokens not available." unless defined $info;
990
Short => 'Token: \"$t\"',
991
compact => '$p-$D--$t',
992
Compact => 'Probability $p -declassification distance $D (\"+\" means > 9) --token: \"$t\"',
993
medium => '$p-$D-$N--$t',
994
long => '$p-$d--${h}h-${s}s--${a}d--$t',
995
Long => 'Probability $p -declassification distance $D --in ${h} ham messages -and ${s} spam messages --$a} days old--token:\"$t\"'
998
my $raw_fmt = (!$fmt_arg ? '$p-$D--$t' : $formats{$fmt_arg});
1000
return "Invalid format, must be one of: ".join(",",keys %formats)
1001
unless defined $raw_fmt;
1003
my $fmt = '"'.$raw_fmt.'"';
1004
my $amt = $limit < @$info ? $limit : @$info;
1005
return "" unless $amt;
1007
my $Bayes = $self->{main}{bayes_scanner};
1008
return "Bayes not available" unless defined $Bayes;
1009
my $ns = $self->{bayes_nspam};
1010
my $nh = $self->{bayes_nham};
1011
my $digit = sub { $_[0] > 9 ? "+" : $_[0] };
1015
my($t,$prob,$s,$h,$u) = @$_;
1016
my $a = int(($now - $u)/(3600 * 24));
1017
my $d = $Bayes->compute_declassification_distance($ns,$nh,$s,$h,$prob);
1018
my $p = sprintf "%.3f", $prob;
1020
my ($c,$o) = $prob < 0.5 ? ($h,$s) : ($s,$h);
1021
my ($D,$S,$H,$C,$O,$N) = map &$digit($_), ($d,$s,$h,$c,$o,$n);
1022
eval $fmt; ## no critic
1023
} @{$info}[0..$amt-1];
1026
1050
###########################################################################
1028
1052
# public API for plugins
1177
1201
REMOTEHOSTNAME => sub {
1178
$self->{tag_data}->{'REMOTEHOSTNAME'} ||
1202
$self->{tag_data}->{'REMOTEHOSTNAME'} || "localhost";
1181
1204
REMOTEHOSTADDR => sub {
1182
$self->{tag_data}->{'REMOTEHOSTADDR'} ||
1205
$self->{tag_data}->{'REMOTEHOSTADDR'} || "127.0.0.1";
1186
1208
LASTEXTERNALIP => sub {
1187
1209
my $lasthop = $self->{relays_external}->[0];
1188
return $lasthop ? $lasthop->{ip} : '';
1210
$lasthop ? $lasthop->{ip} : '';
1191
1213
LASTEXTERNALRDNS => sub {
1192
1214
my $lasthop = $self->{relays_external}->[0];
1193
return $lasthop ? $lasthop->{rdns} : '';
1215
$lasthop ? $lasthop->{rdns} : '';
1196
1218
LASTEXTERNALHELO => sub {
1197
1219
my $lasthop = $self->{relays_external}->[0];
1198
return $lasthop ? $lasthop->{helo} : '';
1220
$lasthop ? $lasthop->{helo} : '';
1201
CONTACTADDRESS => sub { $self->{conf}->{report_contact}; },
1223
CONTACTADDRESS => sub { $self->{conf}->{report_contact} },
1204
1226
defined($self->{bayes_score}) ?
1205
1227
sprintf("%3.4f", $self->{bayes_score}) : "0.5"
1208
HAMMYTOKENS => sub {
1209
$self->bayes_report_make_list
1210
( $self->{bayes_token_info_hammy}, shift );
1213
SPAMMYTOKENS => sub {
1214
$self->bayes_report_make_list
1215
( $self->{bayes_token_info_spammy}, shift );
1218
TOKENSUMMARY => sub {
1219
if( defined $self->{tag_data}{BAYESTC} )
1221
my $tcount_neutral = $self->{tag_data}{BAYESTCLEARNED}
1222
- $self->{tag_data}{BAYESTCSPAMMY}
1223
- $self->{tag_data}{BAYESTCHAMMY};
1224
my $tcount_new = $self->{tag_data}{BAYESTC}
1225
- $self->{tag_data}{BAYESTCLEARNED};
1226
"Tokens: new, $tcount_new; "
1227
."hammy, $self->{tag_data}{BAYESTCHAMMY}; "
1228
."neutral, $tcount_neutral; "
1229
."spammy, $self->{tag_data}{BAYESTCSPAMMY}."
1235
1230
DATE => \&Mail::SpamAssassin::Util::time_to_rfc822_date,
1238
1233
my $arg = (shift || "*");
1239
1234
my $length = int($self->{score});
1240
1235
$length = 50 if $length > 50;
1241
return $arg x $length;
1244
AUTOLEARN => sub { return $self->get_autolearn_status(); },
1239
AUTOLEARN => sub { $self->get_autolearn_status() },
1246
AUTOLEARNSCORE => sub { return $self->get_autolearn_points(); },
1241
AUTOLEARNSCORE => sub { $self->get_autolearn_points() },
1249
1244
my $arg = (shift || ',');
1250
return (join($arg, sort(@{$self->{test_names_hit}})) || "none");
1245
join($arg, sort(@{$self->{test_names_hit}})) || "none";
1253
1248
SUBTESTS => sub {
1254
1249
my $arg = (shift || ',');
1255
return (join($arg, sort(@{$self->{subtest_names_hit}})) || "none");
1250
join($arg, sort(@{$self->{subtest_names_hit}})) || "none";
1258
1253
TESTSSCORES => sub {
1265
1260
$line .= $arg . $test . "=" . $self->{conf}->{scores}->{$test};
1268
return $line ? $line : 'none';
1263
$line ? $line : 'none';
1271
1266
PREVIEW => sub { $self->get_content_preview() },
1274
return "\n" . ($self->{tag_data}->{REPORT} || "");
1268
REPORT => sub { "\n" . ($self->{tag_data}->{REPORT} || "") },
1277
1270
HEADER => sub {
1278
1271
my $hdr = shift || return;
1279
return $self->get($hdr);
1272
$self->get($hdr,undef);
1275
TIMING => sub { $self->{main}->timer_report() },
1277
ADDEDHEADERHAM => sub { $self->_get_added_headers('headers_ham') },
1279
ADDEDHEADERSPAM=> sub { $self->_get_added_headers('headers_spam') },
1281
ADDEDHEADER => sub {
1282
$self->_get_added_headers(
1283
$self->{is_spam} ? 'headers_spam' : 'headers_ham');
1285
1289
if (exists $tags{$tag}) {
1286
$data = $tags{$tag}->(@_);
1288
elsif (exists($self->{tag_data}->{$tag})) {
1290
$data = $tags{$tag};
1291
$data = $data->(@_) if ref $data eq 'CODE';
1292
$data = "" if !defined $data;
1293
} elsif (exists $self->{tag_data}->{$tag}) {
1289
1294
$data = $self->{tag_data}->{$tag};
1290
if (ref $data eq 'CODE') {
1291
$data = $data->(@_);
1294
# known valid tags that might not get defined in some circumstances
1295
elsif ($tag !~ /^(?:BAYESTC(?:|LEARNED|SPAMMY|HAMMY)|RBL)$/) {
1298
$data = "" unless defined $data;
1295
$data = $data->(@_) if ref $data eq 'CODE';
1296
$data = "" if !defined $data;
1661
1667
return $result;
1664
# heavily optimized for speed
1670
# optimized for speed
1665
1671
# $_[0] is self
1666
1672
# $_[1] is request
1667
1673
# $_[2] is defval
1669
# return cache entry if it is defined
1670
return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
1672
# fill in cache entry if it is empty
1673
if (!exists $_[0]->{c}->{$_[1]}) {
1674
$_[0]->{c}->{$_[1]} = _get(@_);
1675
return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};
1675
my $cache = $_[0]->{c};
1677
if (exists $cache->{$_[1]}) {
1678
# return cache entry if it is known
1679
# (measured hit/attempts rate on a production mailer is about 47%)
1680
$found = $cache->{$_[1]};
1682
# fill in a cache entry
1684
$cache->{$_[1]} = $found;
1678
# if the requested header wasn't found, we should return either
1679
# a default value as specified by the caller, or the blank string ''
1686
# if the requested header wasn't found, we should return a default value
1687
# as specified by the caller: if defval argument is present it represents
1688
# a default value even if undef; if defval argument is absent a default
1689
# value is an empty string for upwards compatibility
1690
return (defined $found ? $found : @_ > 2 ? $_[2] : '');
1683
1693
###########################################################################
1685
# Taken from URI and URI::Find
1686
my $reserved = q(;/?:@&=+$,[]\#|);
1687
my $mark = q(-_.!~*'()); #'; emacs
1688
my $unreserved = "A-Za-z0-9\Q$mark\E\x00-\x08\x0b\x0c\x0e-\x1a\x1c-\x1f";
1689
my $uricSet = quotemeta($reserved) . $unreserved . "%";
1691
my $schemeRE = qr/(?:https?|ftp|mailto|javascript|file)/i;
1693
my $uricCheat = $uricSet;
1694
$uricCheat =~ tr/://d;
1695
# uri parsing from plain text:
1696
# The goals are to find URIs in plain text spam that are intended to be clicked on or copy/pasted, but
1697
# ignore random strings that might look like URIs, for example in uuencoded files, and to ignore
1698
# URIs that spammers might seed in spam in ways not visible or clickable to add work to spam filters.
1699
# When we extract a domain and look it up in an RBL, an FP on decding that the text is a URI is not much
1700
# of a problem, as the only cost is an extra RBL lookup. The same FP is worse if the URI is used in matching rule
1701
# because it could lead to a rule FP, as in bug 5780 with WIERD_PORT matching random uuencoded strings.
1702
# The principles of the following code are 1) if ThunderBird or Outlook Express would linkify a string,
1703
# then we should attempt to parse it as a URI; 2) Where TBird and OE parse differently, choose to do what is most
1704
# likely to find a domain for the RBL tests; 3) If it begins with a scheme or www\d*\. or ftp\. assume that
1705
# it is a URI; 4) If it does not then require that the start of the string looks like a FQDN with a valid TLD;
1706
# 5) Reject strings that after parsing, URLDecoding, and redirection processing don't have a valid TLD
1708
# We get the entire URI that would be linkified before dealing with it, in order to do the right thing
1709
# with URI-encodings and redirecting URIs.
1711
# The delimiters for start of a URI in TBird are @(`{|[\"'<>,\s in OE they are ("<\s
1713
# Tbird allows .,?';-! in a URI but ignores [.,?';-!]* at the end.
1714
# TBird's end delimiters are )`{}|[]"<>\s but ) is only an end delmiter if there is no ( in the URI
1715
# OE only uses space as a delimiter, but ignores [~!@#^&*()_+`-={}|[]:";'<>?,.]* at the end.
1717
# Both TBird and OE decide that a URI is an email address when there is '@' character embedded in it.
1718
# TBird has some additional restrictions on email URIs: They cannot contain non-ASCII characters and their end
1719
# delimiters include ( and '
1721
# bug 4522: ISO2022 format mail, most commonly Japanese SHIFT-JIS, inserts a three character escape sequence ESC ( .
1723
# a hybrid of tbird and oe's version of uri parsing
1724
my $tbirdstartdelim = '><"\'`,{[(|\s' . "\x1b"; # The \x1b as per bug 4522
1725
my $iso2022shift = "\x1b" . '\(.'; # bug 4522
1726
my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b"; # The \x1b as per bug 4522
1727
my $oeignoreatend = '-~!@#^&*()_+=:;\'?,.';
1728
my $nonASCII = '\x80-\xff';
1729
my $tbirdenddelimemail = $tbirdenddelim . '(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes
1730
my $tbirdenddelimplusat = $tbirdenddelimemail . '@';
1732
# regexps for finding plain text non-scheme hostnames with valid TLDs.
1696
1734
# the list from %VALID_TLDS in Util/RegistrarBoundaries.pm, as a
1697
# Regexp::Optimize optimized regexp ;) accurate as of 20050318
1735
# Regexp::List optimized regexp ;) accurate as of 20080208
1698
1736
my $tldsRE = qr/
1700
(?:a(?:e(?:ro)?|r(?:pa)?|[cdfgilmnoqstuwzx])|b(?:iz?|[abdefghjmnorstvwyz])
1701
|c(?:o(?:m|op)?|[acdfghiklmnrsu])|d[ejkmoz]|e[ceghrst]|f[ijkmor]
1702
|g(?:[abdefghilmnpqrstuwy]|ov)|h[kmnrtu]|i(?:n(?:fo|t)?|[delmoqrst])
1703
|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]
1704
|m(?:u(?:seum)?|[acdghkmnopqrstvwxyz]|i?l)|n(?:a(?:me)?|et?|[cfgilopruz])
1705
|o(?:m|rg)|p(?:ro?|[aefghklmnstwy])|r[eouw]|s[abcdeghijklmnortvyzu]
1706
|t[cdfghjklmnoprtvwz]|u[agkmsyz]|v[aceginu]|w[fs]|xxx|y[etu]|z[amw]|ed?u|qa
1709
# from RFC 1035, but allowing domains starting with numbers:
1710
# $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;
1711
# $domain = qq<$label(?:\.$label)*>;
1712
# length($host) <= 255 && $host =~ /^($domain)$/
1714
# massively simplified from grammar, only matches known TLDs, a single
1715
# dot at end of TLD works
1716
# negative look-behinds:
1717
# (?<![a-z\d][.-]) = don't let there be more hostname behind, but
1718
# don't miss ".....www.bar.com" or "-----www.foo.com"
1719
# (?<!.\@) = this will be caught by the email address regular expression
1720
my $schemelessRE = qr/(?<![a-z\d][._-])(?<!.\@)\b[a-z\d]
1726
my $uriRe = qr/\b(?:$schemeRE:[$uricCheat]|$schemelessRE)[$uricSet#]*/o;
1728
# Taken from Email::Find (thanks Tatso!)
1729
# This is the BNF from RFC 822
1734
my $close_br = '\]';
1735
my $nonASCII = '\x80-\xff';
1736
my $ctrl = '\000-\037';
1737
my $cr_list = '\n\015';
1738
my $qtext = qq/[^$esc$nonASCII$cr_list\"]/; #"
1739
my $dtext = qq/[^$esc$nonASCII$cr_list$open_br$close_br]/;
1740
my $quoted_pair = qq<$esc>.qq<[^$nonASCII]>;
1741
my $atom_char = qq/[^($space)<>\@,;:\".$esc$open_br$close_br$ctrl$nonASCII]/;
1743
my $atom = qq{(?>$atom_char+)};
1744
my $quoted_str = qq<\"$qtext*(?:$quoted_pair$qtext*)*\">; #"
1745
my $word = qq<(?:$atom|$quoted_str)>;
1746
my $local_part = qq<$word(?:$period$word)*>;
1748
# This is a combination of the domain name BNF from RFC 1035 plus the
1749
# domain literal definition from RFC 822, but allowing domains starting
1751
my $label = q/[A-Za-z\d](?:[A-Za-z\d-]*[A-Za-z\d])?/;
1752
my $domain_ref = qq<$label(?:$period$label)*>;
1753
my $domain_lit = qq<$open_br(?:$dtext|$quoted_pair)*$close_br>;
1754
my $domain = qq<(?:$domain_ref|$domain_lit)>;
1756
# Finally, the address-spec regex (more or less)
1757
my $Addr_spec_re = qr<$local_part\s*\@\s*$domain>o;
1759
# TVD: This really belongs in metadata
1738
(?:a(?:e(?:ro)?|r(?:pa)?|s(?:ia)?|[cdfgilmnoqtuwxz])|b(?:iz?|[abdefghjmnorstwyz])
1739
|c(?:at?|o(?:m|op)?|[cdfghiklmnruvxyz])|d[ejkmoz]|e(?:[cegrst]|d?u)|f[ijkmor]
1740
|g(?:[adefghilmnpqrstuwy]|ov)|h[kmnrtu]|i(?:n(?:fo|t)?|[delmoqrst])
1741
|j(?:o(?:bs)?|[emp])|k[eghimnprwyz]|l[abcikrstuvy]
1742
|m(?:o(?:bi)?|u(?:seum)?|[acdeghkmnpqrstvwxyz]|i?l)|n(?:a(?:me)?|et?|[cfgilopruz])
1743
|o(?:m|rg)|p(?:ro?|[aefghklnstwy])|r[eosuw]|s[abcdeghiklmnrtuvyz]
1744
|t(?:r(?:avel)?|[cdfghjkmnoptvwz]|e?l)|u[agksyz]|v[aceginu]|w[fs]|y[eu]|z[amw]|qa
1747
# knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL
1748
# schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri
1749
my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io;
1750
my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io;
1751
my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io;
1752
my $tbirdurire = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))
1753
(?:(?:($uriknownscheme)(?=[$tbirdenddelim])) |
1754
(?:($urimailscheme)(?=[$tbirdenddelimemail])) |
1755
(?:\b($urischemeless)(?=[$tbirdenddelim])))/xo;
1761
1757
=item $status->get_uri_list ()
1950
1949
# also, if we allow $textary to be passed in, we need to invalidate
1951
1950
# the cache first. fyi.
1952
1951
my $textary = $self->get_decoded_stripped_body_text_array();
1952
my $redirector_patterns = $self->{conf}->{redirector_patterns};
1954
1954
my ($rulename, $pat, @uris);
1960
# NOTE: do not modify $_ in this loop
1961
while (/($uriRe)/igo) {
1964
# skip mismatches from URI regular expression
1965
next if $uri =~ /^[a-z\d.-]*\.\./i; # skip ".."
1967
$uri =~ s/^<(.*)>$/$1/;
1968
$uri =~ s/[\]\)>#]$//;
1970
if ($uri !~ /^${schemeRE}:/io) {
1971
# If it's a hostname that was just sitting out in the
1972
# open, without a protocol, and not inside of an HTML tag,
1973
# the we should add the proper protocol in front, rather
1974
# than using the base URI.
1957
for my $entry (@$textary) {
1959
# a workaround for [perl #69973] bug:
1960
# Invalid and tainted utf-8 char crashes perl 5.10.1 in regexp evaluation
1961
# Bug 6225, regexp and string should both be utf8, or none of them;
1962
# untainting string also seems to avoid the crash
1964
# Bug 6225: untaint the string in an attempt to work around a perl crash
1965
local $_ = untaint_var($entry);
1968
while (/$tbirdurire/igo) {
1969
my $rawuri = $1||$2||$3;
1970
$rawuri =~ s/(^[^(]*)\).*$/$1/; # as per ThunderBird, ) is an end delimiter if there is no ( preceeding it
1971
$rawuri =~ s/[$oeignoreatend]*$//; # remove trailing string of punctuations that TBird ignores
1972
# skip if there is '..' in the hostname portion of the URI, something we can't catch in the general URI regexp
1973
next if $rawuri =~ /^(?:(?:https?|ftp|mailto):(?:\/\/)?)?[a-z\d.-]*\.\./i;
1975
# If it's a hostname that was just sitting out in the
1976
# open, without a protocol, and not inside of an HTML tag,
1977
# the we should add the proper protocol in front, rather
1978
# than using the base URI.
1981
if ($uri !~ /^(?:https?|ftp|mailto|javascript|file):/i) {
1975
1982
if ($uri =~ /^ftp\./i) {
1977
1983
$uri = "ftp://$uri";
1985
elsif ($uri =~ /^www\d{0,2}\./i) {
1986
$uri = "http://$uri";
1988
elsif ($uri =~ /\@/) {
1981
1989
$uri = "mailto:$uri";
1983
else # if ($uri =~ /^www\d*\./i)
1985
1992
# some spammers are using unschemed URIs to escape filters
1993
$rblonly = 1; # flag that this is a URI that MUAs don't linkify so only use for RBLs
1987
1994
$uri = "http://$uri";
1991
# warn("uri: got URI: $uri\n");
1994
while (/($Addr_spec_re)/igo) {
1997
# skip mismatches from email address regular expression
1998
next unless $uri =~ /\.${tldsRE}\W*$/io; # skip non-TLDs
2000
$uri =~ s/\s*\@\s*/@/; # remove spaces around the '@'
2001
$uri = "mailto:$uri"; # prepend mailto:
2003
#warn("uri: got URI: $uri\n");
1998
if ($uri =~ /^mailto:/) {
1999
# skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters
2000
$uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-f]|7[0-9a-eA-E])/);
2001
next if ($uri !~ /^[^@]+@[^@]+$/);
2002
my $domuri = Mail::SpamAssassin::Util::uri_to_domain($uri);
2003
next unless $domuri;
2004
push (@uris, $rawuri);
2005
push (@uris, $uri) unless ($rawuri eq $uri);
2008
next unless ($uri =~/^(?:https?|ftp):/); # at this point only valid if one or the other of these
2010
my @tmp = Mail::SpamAssassin::Util::uri_list_canonify($redirector_patterns, $uri);
2011
my $goodurifound = 0;
2012
foreach my $cleanuri (@tmp) {
2013
my $domain = Mail::SpamAssassin::Util::uri_to_domain($cleanuri);
2015
# bug 5780: Stop after domain to avoid FP, but do that after all deobfuscation of urlencoding and redirection
2018
$cleanuri =~ s/^(https?:\/\/[^:\/]+).*$/$1/;
2020
push (@uris, $cleanuri);
2024
next unless $goodurifound;
2025
push @uris, $rawuri unless $rblonly;
2226
2267
my ($self, $rule, $area, %params) = @_;
2228
# ensure that rule values always result in an *increase* of
2229
# $self->{tests_already_hit}->{$rule}:
2230
my $value = $params{value}; if (!$value || $value <= 0) { $value = 1; }
2269
my $conf_ref = $self->{conf};
2271
my $dynamic_score_provided;
2272
my $score = $params{score};
2273
if (defined $score) {
2274
$dynamic_score_provided = 1;
2276
$score = $conf_ref->{scores}->{$rule};
2279
# adding a hit does nothing if we don't have a score -- we probably
2280
# shouldn't have run it in the first place
2281
return unless $score;
2283
# ensure that rule values always result in an *increase*
2284
# of $self->{tests_already_hit}->{$rule}:
2285
my $value = $params{value};
2286
if (!$value || $value <= 0) { $value = 1 }
2288
my $tflags_ref = $conf_ref->{tflags};
2289
my $tflags_add = $params{tflags};
2290
if (defined $tflags_add && $tflags_add ne '') {
2291
$_ = (!defined $_ || $_ eq '') ? $tflags_add : ($_ . ' ' . $tflags_add)
2292
for $tflags_ref->{$rule};
2232
2295
my $already_hit = $self->{tests_already_hit}->{$rule} || 0;
2234
2296
# don't count hits multiple times, unless 'tflags multiple' is on
2235
if ($already_hit && ($self->{conf}->{tflags}->{$rule}||'') !~ /\bmultiple\b/) {
2297
if ($already_hit && ($tflags_ref->{$rule}||'') !~ /\bmultiple\b/) {
2502
2585
# Resent- headers take priority, if present. see bug 672
2503
# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672
2504
my $resent = $self->get('Resent-From');
2586
my $resent = $self->get('Resent-From',undef);
2505
2587
if (defined $resent && $resent =~ /\S/) {
2506
2588
@addrs = $self->{main}->find_all_addrs_in_line ($resent);
2510
2591
# bug 2292: Used to use find_all_addrs_in_line() with the same
2511
2592
# headers, but the would catch addresses in comments which caused
2512
2593
# FNs for things like whitelist_from. Since all of these are From
2513
# headers, there should only be 1 address in each anyway, so use the
2594
# headers, there should only be 1 address in each anyway (not exactly
2595
# true, RFC 2822 allows multiple addresses in a From header field),
2596
# so use the :addr code...
2515
2597
# bug 3366: some addresses come in as 'foo@bar...', which is invalid.
2516
2598
# so deal with the multiple periods.
2518
@addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }
2600
@addrs = map { tr/././s; $_ } grep { $_ ne '' }
2519
2601
($self->get('From:addr'), # std
2520
2602
$self->get('Envelope-Sender:addr'), # qmail: new-inject(1)
2521
2603
$self->get('Resent-Sender:addr'), # procmailrc manpage
2559
2637
$rcvd =~ s/\n+/\n/gs;
2561
2639
my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one
2563
2641
foreach my $line (@rcvdlines) {
2564
2642
if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }
2567
2645
@addrs = $self->{main}->find_all_addrs_in_line (
2568
join(" ", @rcvdaddrs)."\n" .
2569
$self->get('To') . # std
2570
$self->get('Apparently-To') . # sendmail, from envelope
2571
$self->get('Delivered-To') . # Postfix, poss qmail
2572
$self->get('Envelope-Recipients') . # qmail: new-inject(1)
2573
$self->get('Apparently-Resent-To') . # procmailrc manpage
2574
$self->get('X-Envelope-To') . # procmailrc manpage
2575
$self->get('Envelope-To') . # exim
2576
$self->get('X-Delivered-To') . # procmail quick start
2577
$self->get('X-Original-To') . # procmail quick start
2578
$self->get('X-Rcpt-To') . # procmail quick start
2579
$self->get('X-Real-To') . # procmail quick start
2580
$self->get('Cc')); # std
2647
join(" ", @rcvdaddrs)."\n",
2648
$self->get('To'), # std
2649
$self->get('Apparently-To'), # sendmail, from envelope
2650
$self->get('Delivered-To'), # Postfix, poss qmail
2651
$self->get('Envelope-Recipients'), # qmail: new-inject(1)
2652
$self->get('Apparently-Resent-To'), # procmailrc manpage
2653
$self->get('X-Envelope-To'), # procmailrc manpage
2654
$self->get('Envelope-To'), # exim
2655
$self->get('X-Delivered-To'), # procmail quick start
2656
$self->get('X-Original-To'), # procmail quick start
2657
$self->get('X-Rcpt-To'), # procmail quick start
2658
$self->get('X-Real-To'), # procmail quick start
2659
$self->get('Cc'))); # std
2581
2660
# those are taken from various sources; thanks to Nancy McGough, who
2582
2661
# noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>