~ubuntu-branches/ubuntu/utopic/spamassassin/utopic-proposed

« back to all changes in this revision

Viewing changes to lib/Mail/SpamAssassin/PerMsgStatus.pm

Committer: Bazaar Package Importer
Author(s): Noah Meyerhans
Date: 2010-01-26 22:53:12 UTC
mfrom: (1.1.13 upstream) (5.1.7 sid)
Revision ID: james.westby@ubuntu.com-20100126225312-wkftb10idc1kz2aq

Tags: 3.3.0-1

* New upstream version.
* Switch to dpkg-source 3.0 (quilt) format

files added:
META.yml

debian/patches/.dpkg-source-applied

debian/patches/10_change_config_paths

debian/patches/20_edit_spamc_pod

debian/patches/30_edit_README

debian/patches/50_sa-learn_fix_empty_list_handling

debian/patches/60_fix-pod

debian/patches/70_fix-whatis

debian/patches/80_fix_man_warnings

debian/patches/81_fix_man_speling

debian/patches/series

debian/source

debian/source/format

lib/Mail/SpamAssassin/BayesStore/BDB.pm

lib/Mail/SpamAssassin/Plugin/FreeMail.pm

lib/Mail/SpamAssassin/Plugin/PhishTag.pm

lib/Mail/SpamAssassin/Plugin/Reuse.pm

lib/Mail/SpamAssassin/Util/ScopedTimer.pm

pkgrules

pkgrules/10_default_prefs.cf

pkgrules/20_advance_fee.cf

pkgrules/20_body_tests.cf

pkgrules/20_compensate.cf

pkgrules/20_dnsbl_tests.cf

pkgrules/20_drugs.cf

pkgrules/20_dynrdns.cf

pkgrules/20_fake_helo_tests.cf

pkgrules/20_freemail.cf

pkgrules/20_freemail_domains.cf

pkgrules/20_head_tests.cf

pkgrules/20_html_tests.cf

pkgrules/20_imageinfo.cf

pkgrules/20_meta_tests.cf

pkgrules/20_net_tests.cf

pkgrules/20_phrases.cf

pkgrules/20_porn.cf

pkgrules/20_ratware.cf

pkgrules/20_uri_tests.cf

pkgrules/20_vbounce.cf

pkgrules/23_bayes.cf

pkgrules/25_accessdb.cf

pkgrules/25_antivirus.cf

pkgrules/25_asn.cf

pkgrules/25_dcc.cf

pkgrules/25_dkim.cf

pkgrules/25_hashcash.cf

pkgrules/25_pyzor.cf

pkgrules/25_razor2.cf

pkgrules/25_replace.cf

pkgrules/25_spf.cf

pkgrules/25_textcat.cf

pkgrules/25_uribl.cf

pkgrules/30_text_de.cf

pkgrules/30_text_fr.cf

pkgrules/30_text_it.cf

pkgrules/30_text_nl.cf

pkgrules/30_text_pl.cf

pkgrules/30_text_pt_br.cf

pkgrules/50_scores.cf

pkgrules/60_adsp_override_dkim.cf

pkgrules/60_awl.cf

pkgrules/60_shortcircuit.cf

pkgrules/60_whitelist.cf

pkgrules/60_whitelist_dkim.cf

pkgrules/60_whitelist_spf.cf

pkgrules/60_whitelist_subject.cf

pkgrules/72_active.cf

rules/v330.pre

sa-awl.raw

sa-check_spamd.raw

t/basic_meta.t

t/bayesbdb.t

t/config_tree_recurse.t

t/cross_user_config_leak.t

t/data/01_test_rules.pre

t/data/dkim

t/data/dkim/test-adsp-11.msg

t/data/dkim/test-adsp-12.msg

t/data/dkim/test-adsp-13.msg

t/data/dkim/test-adsp-14.msg

t/data/dkim/test-adsp-15.msg

t/data/dkim/test-adsp-16.msg

t/data/dkim/test-adsp-17.msg

t/data/dkim/test-adsp-18.msg

t/data/dkim/test-adsp-19.msg

t/data/dkim/test-adsp-20.msg

t/data/dkim/test-adsp-21.msg

t/data/dkim/test-adsp-22.msg

t/data/dkim/test-fail-01.msg

t/data/dkim/test-fail-02.msg

t/data/dkim/test-fail-03.msg

t/data/dkim/test-fail-04.msg

t/data/dkim/test-fail-05.msg

t/data/dkim/test-fail-06.msg

t/data/dkim/test-fail-07.msg

t/data/dkim/test-fail-08.msg

t/data/dkim/test-fail-09.msg

t/data/dkim/test-pass-01.msg

t/data/dkim/test-pass-02.msg

t/data/dkim/test-pass-03.msg

t/data/dkim/test-pass-04.msg

t/data/dkim/test-pass-05.msg

t/data/dkim/test-pass-06.msg

t/data/dkim/test-pass-07.msg

t/data/dkim/test-pass-08.msg

t/data/dkim/test-pass-09.msg

t/data/dkim/test-pass-10.msg

t/data/dkim/test-pass-11.msg

t/data/dkim/test-pass-12.msg

t/data/dkim/test-pass-13.msg

t/data/dkim/test-pass-14.msg

t/data/dkim/test-pass-15.msg

t/data/dkim/test-pass-16.msg

t/data/dkim/test-pass-17.msg

t/data/dkim/test-pass-18.msg

t/data/dkim/test-pass-19.msg

t/data/whitelists/ibm_enews_de

t/data/whitelists/infoworld

t/data/whitelists/netsol_renewal

t/data/whitelists/networkworld

t/db_awl_perms.t

t/dkim2.t

t/if_can.t

t/reuse.t

t/root_spamd_virtual.t

t/root_spamd_x_u.t

t/sa_awl.t

t/sa_check_spamd.t

t/sa_compile.t

t/spamc_bug6176.t

t/spamd_client.t

t/spamd_sql_prefs.t

t/spamd_whitelist_leak.t

t/stop_always_matching_regexps.t

t/timeout.t

files removed:
debian/patches/00list

debian/patches/05_fix_MakeMaker_DESTDIR.dpatch

debian/patches/10_change_config_paths.dpatch

debian/patches/20_edit_spamc_pod.dpatch

debian/patches/21_fix_spamc-undocumented-exit-status-98.dpatch

debian/patches/30_edit_README.dpatch

debian/patches/40_remote_dsbl.dpatch

debian/patches/41_remove_openwhois.dpatch

debian/patches/50_sa-learn_fix_empty_list_handling.dpatch

debian/patches/60_fix-pod.dpatch

debian/patches/70_fix-whatis.dpatch

debian/patches/80_fix_man_warnings.dpatch

debian/patches/81_fix_man_speling.dpatch

debian/patches/90_ipv6_header_support.dpatch

debian/patches/95_fix_FH_DATE_PAST_20XX.dpatch

lib/Mail/SpamAssassin/Plugin/DomainKeys.pm

rules/10_default_prefs.cf

rules/20_advance_fee.cf

rules/20_body_tests.cf

rules/20_compensate.cf

rules/20_dnsbl_tests.cf

rules/20_drugs.cf

rules/20_dynrdns.cf

rules/20_fake_helo_tests.cf

rules/20_head_tests.cf

rules/20_html_tests.cf

rules/20_imageinfo.cf

rules/20_meta_tests.cf

rules/20_net_tests.cf

rules/20_phrases.cf

rules/20_porn.cf

rules/20_ratware.cf

rules/20_uri_tests.cf

rules/20_vbounce.cf

rules/23_bayes.cf

rules/25_accessdb.cf

rules/25_antivirus.cf

rules/25_asn.cf

rules/25_dcc.cf

rules/25_dkim.cf

rules/25_domainkeys.cf

rules/25_hashcash.cf

rules/25_pyzor.cf

rules/25_razor2.cf

rules/25_replace.cf

rules/25_spf.cf

rules/25_textcat.cf

rules/25_uribl.cf

rules/30_text_de.cf

rules/30_text_fr.cf

rules/30_text_it.cf

rules/30_text_nl.cf

rules/30_text_pl.cf

rules/30_text_pt_br.cf

rules/50_scores.cf

rules/60_awl.cf

rules/60_shortcircuit.cf

rules/60_whitelist.cf

rules/60_whitelist_dk.cf

rules/60_whitelist_dkim.cf

rules/60_whitelist_spf.cf

rules/60_whitelist_subject.cf

rules/72_active.cf

rules/72_removed.cf

t/meta.t

t/rule_tests.t

files modified:
CREDITS

Changes

INSTALL

MANIFEST

MANIFEST.SKIP

Makefile.PL

PACKAGING

README

UPGRADE

USAGE

build/mkrules

build/parse-rules-for-masses

build/preprocessor

build/sha1sum.pl

debian/changelog

debian/control

debian/rules

ldap/README

lib/Mail/SpamAssassin.pm

lib/Mail/SpamAssassin/AICache.pm

lib/Mail/SpamAssassin/ArchiveIterator.pm

lib/Mail/SpamAssassin/AsyncLoop.pm

lib/Mail/SpamAssassin/AutoWhitelist.pm

lib/Mail/SpamAssassin/Bayes.pm

lib/Mail/SpamAssassin/Bayes/CombineChi.pm

lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm

lib/Mail/SpamAssassin/BayesStore.pm

lib/Mail/SpamAssassin/BayesStore/DBM.pm

lib/Mail/SpamAssassin/BayesStore/MySQL.pm

lib/Mail/SpamAssassin/BayesStore/PgSQL.pm

lib/Mail/SpamAssassin/BayesStore/SDBM.pm

lib/Mail/SpamAssassin/BayesStore/SQL.pm

lib/Mail/SpamAssassin/Client.pm

lib/Mail/SpamAssassin/Conf.pm

lib/Mail/SpamAssassin/Conf/LDAP.pm

lib/Mail/SpamAssassin/Conf/Parser.pm

lib/Mail/SpamAssassin/Conf/SQL.pm

lib/Mail/SpamAssassin/Constants.pm

lib/Mail/SpamAssassin/DBBasedAddrList.pm

lib/Mail/SpamAssassin/Dns.pm

lib/Mail/SpamAssassin/DnsResolver.pm

lib/Mail/SpamAssassin/HTML.pm

lib/Mail/SpamAssassin/Locales.pm

lib/Mail/SpamAssassin/Locker.pm

lib/Mail/SpamAssassin/Locker/Flock.pm

lib/Mail/SpamAssassin/Locker/UnixNFSSafe.pm

lib/Mail/SpamAssassin/Locker/Win32.pm

lib/Mail/SpamAssassin/Logger.pm

lib/Mail/SpamAssassin/Logger/File.pm

lib/Mail/SpamAssassin/Logger/Stderr.pm

lib/Mail/SpamAssassin/Logger/Syslog.pm

lib/Mail/SpamAssassin/MailingList.pm

lib/Mail/SpamAssassin/Message.pm

lib/Mail/SpamAssassin/Message/Metadata.pm

lib/Mail/SpamAssassin/Message/Metadata/Received.pm

lib/Mail/SpamAssassin/Message/Node.pm

lib/Mail/SpamAssassin/NetSet.pm

lib/Mail/SpamAssassin/PerMsgLearner.pm

lib/Mail/SpamAssassin/PerMsgStatus.pm

lib/Mail/SpamAssassin/PersistentAddrList.pm

lib/Mail/SpamAssassin/Plugin.pm

lib/Mail/SpamAssassin/Plugin/ASN.pm

lib/Mail/SpamAssassin/Plugin/AWL.pm

lib/Mail/SpamAssassin/Plugin/AccessDB.pm

lib/Mail/SpamAssassin/Plugin/AntiVirus.pm

lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm

lib/Mail/SpamAssassin/Plugin/Bayes.pm

lib/Mail/SpamAssassin/Plugin/BodyEval.pm

lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

lib/Mail/SpamAssassin/Plugin/Check.pm

lib/Mail/SpamAssassin/Plugin/DCC.pm

lib/Mail/SpamAssassin/Plugin/DKIM.pm

lib/Mail/SpamAssassin/Plugin/DNSEval.pm

lib/Mail/SpamAssassin/Plugin/HTMLEval.pm

lib/Mail/SpamAssassin/Plugin/HTTPSMismatch.pm

lib/Mail/SpamAssassin/Plugin/Hashcash.pm

lib/Mail/SpamAssassin/Plugin/HeaderEval.pm

lib/Mail/SpamAssassin/Plugin/ImageInfo.pm

lib/Mail/SpamAssassin/Plugin/MIMEEval.pm

lib/Mail/SpamAssassin/Plugin/MIMEHeader.pm

lib/Mail/SpamAssassin/Plugin/OneLineBodyRuleType.pm

lib/Mail/SpamAssassin/Plugin/Pyzor.pm

lib/Mail/SpamAssassin/Plugin/Razor2.pm

lib/Mail/SpamAssassin/Plugin/RelayCountry.pm

lib/Mail/SpamAssassin/Plugin/RelayEval.pm

lib/Mail/SpamAssassin/Plugin/ReplaceTags.pm

lib/Mail/SpamAssassin/Plugin/Rule2XSBody.pm

lib/Mail/SpamAssassin/Plugin/SPF.pm

lib/Mail/SpamAssassin/Plugin/Shortcircuit.pm

lib/Mail/SpamAssassin/Plugin/SpamCop.pm

lib/Mail/SpamAssassin/Plugin/Test.pm

lib/Mail/SpamAssassin/Plugin/TextCat.pm

lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

lib/Mail/SpamAssassin/Plugin/URIDetail.pm

lib/Mail/SpamAssassin/Plugin/URIEval.pm

lib/Mail/SpamAssassin/Plugin/VBounce.pm

lib/Mail/SpamAssassin/Plugin/WLBLEval.pm

lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm

lib/Mail/SpamAssassin/PluginHandler.pm

lib/Mail/SpamAssassin/Reporter.pm

lib/Mail/SpamAssassin/SQLBasedAddrList.pm

lib/Mail/SpamAssassin/SpamdForkScaling.pm

lib/Mail/SpamAssassin/SubProcBackChannel.pm

lib/Mail/SpamAssassin/Timeout.pm

lib/Mail/SpamAssassin/Util.pm

lib/Mail/SpamAssassin/Util/DependencyInfo.pm

lib/Mail/SpamAssassin/Util/Progress.pm

lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm

lib/Mail/SpamAssassin/Util/TieOneStringHash.pm

lib/spamassassin-run.pod

rules/STATISTICS-set0.txt

rules/STATISTICS-set1.txt

rules/STATISTICS-set2.txt

rules/STATISTICS-set3.txt

rules/active.list

rules/local.cf

rules/user_prefs.template

rules/v310.pre

rules/v312.pre

sa-compile.raw

sa-learn.raw

sa-update.raw

spamassassin.raw

spamassassin.spec

spamc/getopt.c

spamc/libspamc.c

spamc/libspamc.h

spamc/spamc.c

spamc/spamc.h.in

spamc/spamc.pod

spamc/utils.c

spamc/utils.h

spamd-apache2/bin/apache-spamd.pl

spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm

spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2.pm

spamd-apache2/lib/Mail/SpamAssassin/Spamd/Apache2/Config.pm

spamd/PROTOCOL

spamd/README

spamd/README.vpopmail

spamd/redhat-rc-script.sh

spamd/spamd.raw

sql/README

sql/README.awl

sql/README.bayes

sql/awl_mysql.sql

sql/awl_pg.sql

sql/bayes_mysql.sql

sql/bayes_pg.sql

t/SATest.pm

t/bayesdbm.t

t/bayesdbm_flock.t

t/bayessdbm.t

t/bayessdbm_seen_delete.t

t/bayessql.t

t/cidrs.t

t/config.dist

t/data/01_test_rules.cf

t/data/Dumpheaders.pm

t/data/spam/004

t/data/spam/008

t/data/spam/011

t/data/spam/dnsbl.eml

t/data/testplugin.pm

t/db_based_whitelist.t

t/db_based_whitelist_ips.t

t/dcc.t

t/debug.t

t/dnsbl.t

t/dnsbl_sc_meta.t

t/duplicates.t

t/get_all_headers.t *

t/get_headers.t

t/html_obfu.t

t/ip_addrs.t

t/lang_lint.t

t/line_endings.t

t/mimeparse.t

t/missing_hb_separator.t

t/mkrules.t

t/razor2.t

t/rcvd_parser.t

t/re_base_extraction.t

t/recreate.t

t/root_spamd_tell.t

t/root_spamd_tell_paranoid.t

t/root_spamd_tell_x.t

t/root_spamd_tell_x_paranoid.t

t/root_spamd_u.t *

t/root_spamd_u_dcc.t *

t/rule_names.t

t/sha1.t

t/shortcircuit.t

t/spamc_H.t *

t/spamc_x_E_R.t

t/spamc_x_e.t *

t/spamd_hup.t

t/spamd_kill_restart.t

t/spamd_kill_restart_rr.t

t/spamd_maxsize.t

t/spamd_prefork_stress.t

t/spamd_prefork_stress_2.t

t/spamd_prefork_stress_3.t

t/spamd_prefork_stress_4.t

t/spamd_protocol_10.t

t/spamd_unix.t

t/spamd_user_rules_leak.t

t/spf.t

t/strip2.t

t/strip_no_subject.t

t/trust_path.t

t/uri.t

t/uri_text.t

t/uribl.t

t/whitelist_addrs.t

t/whitelist_from.t

Show diffs side-by-side

added added

removed removed

lib/Mail/SpamAssassin/PerMsgStatus.pm

use strict;

use warnings;

use re 'taint';

use Time::HiRes qw(time);

use Mail::SpamAssassin::Constants qw(:sa);

use Mail::SpamAssassin::AsyncLoop;

use Mail::SpamAssassin::Conf;

use Mail::SpamAssassin::Util;

use Mail::SpamAssassin::Util qw(untaint_var);

use Mail::SpamAssassin::Timeout;

use Mail::SpamAssassin::Logger;

use vars qw{

'spamd_result_log_items' => [ ],

'tests_already_hit' => { },

'c' => { },

'tag_data' => { },

'rule_errors' => 0,

'disable_auto_learning' => 0,

'auto_learn_status' => undef,

'conf' => $main->{conf},

'async' => Mail::SpamAssassin::AsyncLoop->new($main)

'async' => Mail::SpamAssassin::AsyncLoop->new($main),

100

'master_deadline' => $msg->{master_deadline}, # dflt inherited from msg

101

'deadline_exceeded' => 0, # time limit exceeded, skipping further tests

102

};

103

#$self->{main}->{use_rule_subs} = 1;

104

105

dbg("check: pms new, time limit in %.3f s",

106

$self->{master_deadline} - time) if $self->{master_deadline};

107

108

if (defined $opts && $opts->{disable_auto_learning}) {

109

$self->{disable_auto_learning} = 1;

100

110

}

111

121

$self->{should_log_rule_hits} = 1;

112

122

}

113

123

124

# known valid tags that might not get their entry in pms->{tag_data}

125

# in some circumstances

126

my $tag_data_ref = $self->{tag_data};

127

foreach (qw(SUMMARY REPORT RBL)) { $tag_data_ref->{$_} = '' }

128

foreach (qw(AWL AWLMEAN AWLCOUNT AWLPRESCORE

129

DCCB DCCR DCCREP PYZOR DKIMIDENTITY DKIMDOMAIN

130

BAYESTC BAYESTCLEARNED BAYESTCSPAMMY BAYESTCHAMMY

131

HAMMYTOKENS SPAMMYTOKENS TOKENSUMMARY)) {

132

$tag_data_ref->{$_} = undef; # exist, but undefined

133

}

134

114

135

bless ($self, $class);

115

136

$self;

116

137

}

124

145

=cut

125

146

126

147

sub check {

148

my ($self) = shift;

149

my $master_deadline = $self->{master_deadline};

150

if (!$master_deadline) {

151

$self->check_timed(@_);

152

} else {

153

my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline });

154

my $err = $t->run(sub { $self->check_timed(@_) });

155

if (time > $master_deadline && !$self->{deadline_exceeded}) {

156

info("check: exceeded time limit in pms check");

157

$self->{deadline_exceeded} = 1;

158

}

159

}

160

}

161

162

sub check_timed {

127

163

my ($self) = @_;

128

164

local ($_);

129

165

161

197

{

162

198

# did anything happen? if not, this is fatal

163

199

if (!$self->{main}->have_plugin("check_main")) {

164

die "check: no loaded plugin implements 'check_main': cannot scan!";

200

die "check: no loaded plugin implements 'check_main': cannot scan!\n".

201

"Check the necessary '.pre' files are in the config directory.\n";

165

202

}

166

203

}

167

204

202

239

=cut

203

240

204

241

sub learn {

242

my ($self) = shift;

243

my $master_deadline = $self->{master_deadline};

244

if (!$master_deadline) {

245

$self->learn_timed(@_);

246

} else {

247

my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline });

248

my $err = $t->run(sub { $self->learn_timed(@_) });

249

if (time > $master_deadline && !$self->{deadline_exceeded}) {

250

info("learn: exceeded time limit in pms learn");

251

$self->{deadline_exceeded} = 1;

252

}

253

}

254

}

255

256

sub learn_timed {

205

257

my ($self) = @_;

206

258

207

259

if (!$self->{conf}->{bayes_auto_learn} ||

221

273

return;

222

274

}

223

275

276

my $timer = $self->{main}->time_method("learn");

277

224

278

$self->{main}->call_plugins ("autolearn", {

225

279

permsgstatus => $self,

226

280

isspam => $isspam

229

283

# bug 3704: temporarily override learn's ability to re-learn a message

230

284

my $orig_learner = $self->{main}->init_learner({ "no_relearn" => 1 });

231

285

286

my $eval_stat;

232

287

eval {

233

288

my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0);

234

289

if ($learnstatus->did_learn()) {

239

294

$self->{main}->finish_learner(); # for now

240

295

241

296

if (exists $self->{main}->{bayes_scanner}) {

242

$self->{main}->{bayes_scanner}->sanity_check_is_untied();

297

$self->{main}->{bayes_scanner}->force_close();

243

298

}

299

300

} or do {

301

$eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

244

302

};

245

303

246

304

# reset learner options to their original values

247

305

$self->{main}->init_learner($orig_learner);

248

306

249

if ($@) {

250

dbg("learn: auto-learning failed: $@");

307

if (defined $eval_stat) {

308

dbg("learn: auto-learning failed: $eval_stat");

251

309

$self->{auto_learn_status} = "failed";

252

310

}

253

311

}

504

562

505

563

if (!exists $self->{'report'}) {

506

564

my $report;

565

566

my $timer = $self->{main}->time_method("get_report");

507

567

$report = $self->{conf}->{report_template};

508

568

$report ||= '(no report template found)';

509

569

543

603

chomp ($str); $str .= " [...]\n";

544

604

545

605

# in case the last line was huge, trim it back to around 200 chars

606

local $1;

546

607

$str =~ s/^(.{,200}).*$/$1/gs;

547

608

548

609

# now, some tidy-ups that make things look a bit prettier

617

678

sub rewrite_mail {

618

679

my ($self) = @_;

619

680

681

my $timer = $self->{main}->time_method("rewrite_mail");

620

682

my $msg = $self->{msg}->get_mbox_separator() || '';

621

683

622

684

if ($self->{is_spam} && $self->{conf}->{report_safe}) {

640

702

}

641

703

}

642

704

705

sub _get_added_headers($) {

706

my ($self, $which) = @_;

707

my $str = '';

708

# use string appends to put this back together -- I finally benchmarked it.

709

# join() is 56% of the speed of just using string appends. ;)

710

foreach my $hf_ref (@{$self->{conf}->{$which}}) {

711

my($hfname, $hfbody) = @$hf_ref;

712

my $line = $self->_process_header($hfname,$hfbody);

713

$line = $self->qp_encode_header($line);

714

$str .= "X-Spam-$hfname: $line\n";

715

}

716

return $str;

717

};

718

643

719

# rewrite the message in report_safe mode

644

720

# should not be called directly, use rewrite_mail instead

645

721

682

758

# It'd be nice to do this with a foreach loop, but with only three

683

759

# possibilities right now, it's easier not to...

684

760

685

if ($self->{conf}->{rewrite_header}->{Subject}) {

686

$subject ||= "\n";

761

if (defined $self->{conf}->{rewrite_header}->{Subject}) {

762

$subject = "\n" if !defined $subject;

687

763

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject});

688

764

$tag =~ s/\n/ /gs; # strip tag's newlines

689

765

$subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!?

690

766

}

691

767

692

if ($self->{conf}->{rewrite_header}->{To}) {

693

$to ||= "\n";

768

if (defined $self->{conf}->{rewrite_header}->{To}) {

769

$to = "\n" if !defined $to;

694

770

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To});

695

771

$tag =~ s/\n/ /gs; # strip tag's newlines

696

772

$to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;

697

773

}

698

774

699

if ($self->{conf}->{rewrite_header}->{From}) {

700

$from ||= "\n";

775

if (defined $self->{conf}->{rewrite_header}->{From}) {

776

$from = "\n" if !defined $from;

701

777

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From});

702

778

$tag =~ s/\n+//gs; # strip tag's newlines

703

779

$from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;

704

780

}

705

781

706

782

# add report headers to message

707

$newmsg .= "From: $from" if $from;

708

$newmsg .= "To: $to" if $to;

709

$newmsg .= "Cc: $cc" if $cc;

710

$newmsg .= "Subject: $subject" if $subject;

711

$newmsg .= "Date: $date" if $date;

712

$newmsg .= "Message-Id: $msgid" if $msgid;

713

714

foreach my $header (keys %{$self->{conf}->{headers_spam}}) {

715

my $data = $self->{conf}->{headers_spam}->{$header};

716

my $line = $self->_process_header($header,$data);

717

$line = $self->qp_encode_header($line);

718

$newmsg .= "X-Spam-$header: $line\n" # add even if empty

719

}

783

$newmsg .= "From: $from" if defined $from;

784

$newmsg .= "To: $to" if defined $to;

785

$newmsg .= "Cc: $cc" if defined $cc;

786

$newmsg .= "Subject: $subject" if defined $subject;

787

$newmsg .= "Date: $date" if defined $date;

788

$newmsg .= "Message-Id: $msgid" if defined $msgid;

789

$newmsg .= $self->_get_added_headers('headers_spam');

720

790

721

791

if (defined $self->{conf}->{report_safe_copy_headers}) {

722

792

my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/;

825

895

my @pristine_headers = split(/^/m, $self->{msg}->get_pristine_header());

826

896

for (my $line = 0; $line <= $#pristine_headers; $line++) {

827

897

next unless ($pristine_headers[$line] =~ /^X-Spam-(?!Prev-)/i);

828

splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|\s+\S)/i);

898

splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|[ \t])/i);

829

899

$line--;

830

900

}

831

901

my $separator = '';

832

if ($pristine_headers[$#pristine_headers] =~ /^\s*$/) {

902

if (@pristine_headers && $pristine_headers[$#pristine_headers] =~ /^\s*$/) {

833

903

$separator = pop @pristine_headers;

834

904

}

835

905

869

939

# The tag should be a comment for this header ...

870

940

$tag = "($tag)" if ($hdr =~ /^(?:From|To)$/);

871

941

942

local $1;

872

943

s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i;

873

944

}

874

945

875

946

$addition = 'headers_spam';

876

947

}

877

948

878

# Break the pristine header set up into two blocks; "pre" is the stuff that

879

# we want to ensure comes before any SpamAssassin markup headers, like the

880

# Return-Path header (see bug 3409).

949

# Break the pristine header set into two blocks; $new_hdrs_pre is the stuff

950

# that we want to ensure comes before any SpamAssassin markup headers,

951

# like the Return-Path header (see bug 3409).

881

952

882

# "post" is all the rest of the message headers, placed after the

883

# SpamAssassin markup hdrs. Once one of those headers is seen, all further

884

# headers go into that set; it's assumed that it's an old copy of the

885

# header, or attempted spoofing, if it crops up halfway through the

886

# headers.

953

# all the rest of the message headers (as left in @pristine_headers), is

954

# to be placed after the SpamAssassin markup hdrs. Once one of those headers

955

# is seen, all further headers go into that set; it's assumed that it's an

956

# old copy of the header, or attempted spoofing, if it crops up halfway

957

# through the headers.

887

958

888

959

my $new_hdrs_pre = '';

889

my $new_hdrs_post = '';

890

foreach my $hdr (@pristine_headers) {

891

if ($new_hdrs_post eq '' && $hdr =~ /^Return-Path:/i) {

892

$new_hdrs_pre .= $hdr;

893

} else {

894

$new_hdrs_post .= $hdr;

960

if (@pristine_headers && $pristine_headers[0] =~ /^Return-Path:/i) {

961

$new_hdrs_pre .= shift(@pristine_headers);

962

while (@pristine_headers && $pristine_headers[0] =~ /^[ \t]/) {

963

$new_hdrs_pre .= shift(@pristine_headers);

895

964

}

896

965

}

897

898

# use string appends to put this back together -- I finally benchmarked it.

899

# join() is 56% of the speed of just using string appends. ;)

900

while (my ($header, $data) = each %{$self->{conf}->{$addition}}) {

901

my $line = $self->_process_header($header,$data);

902

$line = $self->qp_encode_header($line);

903

$new_hdrs_pre .= "X-Spam-$header: $line\n";

904

}

966

$new_hdrs_pre .= $self->_get_added_headers($addition);

905

967

906

968

# fix up line endings appropriately

907

my $newmsg = $new_hdrs_pre.$new_hdrs_post.$separator;

969

my $newmsg = $new_hdrs_pre . join('',@pristine_headers) . $separator;

908

970

$self->_fixup_report_line_endings(\$newmsg);

909

971

910

972

return $newmsg.$self->{msg}->get_pristine_body();

923

985

924

986

my @hexchars = split('', '0123456789abcdef');

925

987

my $ord;

988

local $1;

926

989

$text =~ s{([\x80-\xff])}{

927

990

$ord = ord $1;

928

991

'='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f]

967

1030

# a tag for it (bug 4793)

968

1031

my $t;

969

1032

my $v;

1033

local($1,$2,$3);

970

1034

$text =~ s{(_(\w+?)(?:$(.*?)$)?_)}{

971

my $full = $1;

1035

my $full = $1;

972

1036

my $tag = $2;

973

my $result = $self->_get_tag($tag,$3);

974

(defined $result) ? $result : $full;

1037

my $result;

1038

if ($tag =~ /^ADDEDHEADER(?:HAM|SPAM|)\z/) {

1039

# Bug 6278: break infinite recursion through _get_added_headers and

1040

# _get_tag on an attempt to use such tag in add_header template

1041

} else {

1042

$result = $self->_get_tag($tag,$3);

1043

}

1044

defined $result ? $result : $full;

975

1045

}ge;

976

1046

977

1047

return $text;

978

1048

}

979

1049

980

sub bayes_report_make_list {

981

my $self = shift;

982

my $info = shift;

983

my $param = shift || "5";

984

my ($limit,$fmt_arg,$more) = split /,/, $param;

985

986

return "Tokens not available." unless defined $info;

987

988

my %formats = (

989

short => '$t',

990

Short => 'Token: \"$t\"',

991

compact => '$p-$D--$t',

992

Compact => 'Probability $p -declassification distance $D (\"+\" means > 9) --token: \"$t\"',

993

medium => '$p-$D-$N--$t',

994

long => '$p-$d--${h}h-${s}s--${a}d--$t',

995

Long => 'Probability $p -declassification distance $D --in ${h} ham messages -and ${s} spam messages --$a} days old--token:\"$t\"'

996

);

997

998

my $raw_fmt = (!$fmt_arg ? '$p-$D--$t' : $formats{$fmt_arg});

999

1000

return "Invalid format, must be one of: ".join(",",keys %formats)

1001

unless defined $raw_fmt;

1002

1003

my $fmt = '"'.$raw_fmt.'"';

1004

my $amt = $limit < @$info ? $limit : @$info;

1005

return "" unless $amt;

1006

1007

my $Bayes = $self->{main}{bayes_scanner};

1008

return "Bayes not available" unless defined $Bayes;

1009

my $ns = $self->{bayes_nspam};

1010

my $nh = $self->{bayes_nham};

1011

my $digit = sub { $_[0] > 9 ? "+" : $_[0] };

1012

my $now = time;

1013

1014

join ', ', map {

1015

my($t,$prob,$s,$h,$u) = @$_;

1016

my $a = int(($now - $u)/(3600 * 24));

1017

my $d = $Bayes->compute_declassification_distance($ns,$nh,$s,$h,$prob);

1018

my $p = sprintf "%.3f", $prob;

1019

my $n = $s + $h;

1020

my ($c,$o) = $prob < 0.5 ? ($h,$s) : ($s,$h);

1021

my ($D,$S,$H,$C,$O,$N) = map &$digit($_), ($d,$s,$h,$c,$o,$n);

1022

eval $fmt; ## no critic

1023

} @{$info}[0..$amt-1];

1024

}

1025

1026

1050

###########################################################################

1027

1051

1028

1052

# public API for plugins

1110

1134

# called by spamd

1111

1135

sub get_spamd_result_log_items {

1112

1136

my ($self) = @_;

1113

my @ret = ();

1137

my @ret;

1114

1138

foreach my $ref (@{$self->{spamd_result_log_items}}) {

1115

1139

push @ret, &$ref;

1116

1140

}

1154

1178

1155

1179

# tag data also comes from $self->{tag_data}->{TAG}

1156

1180

1157

$tag = "" unless defined $tag; # can be "0", so use defined test

1181

$tag = "" unless defined $tag; # can be "0", so use a defined test

1158

1182

1159

1183

%tags = ( YESNO => sub { $self->_get_tag_value_for_yesno() },

1160

1184

1175

1199

1176

1200

1177

1201

REMOTEHOSTNAME => sub {

1178

$self->{tag_data}->{'REMOTEHOSTNAME'} ||

1179

"localhost";

1202

$self->{tag_data}->{'REMOTEHOSTNAME'} || "localhost";

1180

1203

1181

1204

REMOTEHOSTADDR => sub {

1182

$self->{tag_data}->{'REMOTEHOSTADDR'} ||

1183

"127.0.0.1";

1205

$self->{tag_data}->{'REMOTEHOSTADDR'} || "127.0.0.1";

1184

1206

1185

1207

1186

1208

LASTEXTERNALIP => sub {

1187

1209

my $lasthop = $self->{relays_external}->[0];

1188

return $lasthop ? $lasthop->{ip} : '';

1210

$lasthop ? $lasthop->{ip} : '';

1189

1211

1190

1212

1191

1213

LASTEXTERNALRDNS => sub {

1192

1214

my $lasthop = $self->{relays_external}->[0];

1193

return $lasthop ? $lasthop->{rdns} : '';

1215

$lasthop ? $lasthop->{rdns} : '';

1194

1216

1195

1217

1196

1218

LASTEXTERNALHELO => sub {

1197

1219

my $lasthop = $self->{relays_external}->[0];

1198

return $lasthop ? $lasthop->{helo} : '';

1220

$lasthop ? $lasthop->{helo} : '';

1199

1221

1200

1222

1201

CONTACTADDRESS => sub { $self->{conf}->{report_contact}; },

1223

CONTACTADDRESS => sub { $self->{conf}->{report_contact} },

1202

1224

1203

1225

BAYES => sub {

1204

1226

defined($self->{bayes_score}) ?

1205

1227

sprintf("%3.4f", $self->{bayes_score}) : "0.5"

1206

1228

1207

1229

1208

HAMMYTOKENS => sub {

1209

$self->bayes_report_make_list

1210

( $self->{bayes_token_info_hammy}, shift );

1211

1212

1213

SPAMMYTOKENS => sub {

1214

$self->bayes_report_make_list

1215

( $self->{bayes_token_info_spammy}, shift );

1216

1217

1218

TOKENSUMMARY => sub {

1219

if( defined $self->{tag_data}{BAYESTC} )

1220

{

1221

my $tcount_neutral = $self->{tag_data}{BAYESTCLEARNED}

1222

- $self->{tag_data}{BAYESTCSPAMMY}

1223

- $self->{tag_data}{BAYESTCHAMMY};

1224

my $tcount_new = $self->{tag_data}{BAYESTC}

1225

- $self->{tag_data}{BAYESTCLEARNED};

1226

"Tokens: new, $tcount_new; "

1227

."hammy, $self->{tag_data}{BAYESTCHAMMY}; "

1228

."neutral, $tcount_neutral; "

1229

."spammy, $self->{tag_data}{BAYESTCSPAMMY}."

1230

} else {

1231

"Bayes not run.";

1232

}

1233

1234

1235

1230

DATE => \&Mail::SpamAssassin::Util::time_to_rfc822_date,

1236

1231

1237

1232

STARS => sub {

1238

1233

my $arg = (shift || "*");

1239

1234

my $length = int($self->{score});

1240

1235

$length = 50 if $length > 50;

1241

return $arg x $length;

1236

$arg x $length;

1242

1237

1243

1238

1244

AUTOLEARN => sub { return $self->get_autolearn_status(); },

1239

AUTOLEARN => sub { $self->get_autolearn_status() },

1245

1240

1246

AUTOLEARNSCORE => sub { return $self->get_autolearn_points(); },

1241

AUTOLEARNSCORE => sub { $self->get_autolearn_points() },

1247

1242

1248

1243

TESTS => sub {

1249

1244

my $arg = (shift || ',');

1250

return (join($arg, sort(@{$self->{test_names_hit}})) || "none");

1245

join($arg, sort(@{$self->{test_names_hit}})) || "none";

1251

1246

1252

1247

1253

1248

SUBTESTS => sub {

1254

1249

my $arg = (shift || ',');

1255

return (join($arg, sort(@{$self->{subtest_names_hit}})) || "none");

1250

join($arg, sort(@{$self->{subtest_names_hit}})) || "none";

1256

1251

1257

1252

1258

1253

TESTSSCORES => sub {

1265

1260

$line .= $arg . $test . "=" . $self->{conf}->{scores}->{$test};

1266

1261

}

1267

1262

}

1268

return $line ? $line : 'none';

1263

$line ? $line : 'none';

1269

1264

1270

1265

1271

1266

PREVIEW => sub { $self->get_content_preview() },

1272

1267

1273

REPORT => sub {

1274

return "\n" . ($self->{tag_data}->{REPORT} || "");

1275

1268

REPORT => sub { "\n" . ($self->{tag_data}->{REPORT} || "") },

1276

1269

1277

1270

HEADER => sub {

1278

1271

my $hdr = shift || return;

1279

return $self->get($hdr);

1272

$self->get($hdr,undef);

1280

1273

1281

1274

1275

TIMING => sub { $self->{main}->timer_report() },

1276

1277

ADDEDHEADERHAM => sub { $self->_get_added_headers('headers_ham') },

1278

1279

ADDEDHEADERSPAM=> sub { $self->_get_added_headers('headers_spam') },

1280

1281

ADDEDHEADER => sub {

1282

$self->_get_added_headers(

1283

$self->{is_spam} ? 'headers_spam' : 'headers_ham');

1284

1285

1282

1286

);

1283

1287

1284

my $data = "";

1288

my $data;

1285

1289

if (exists $tags{$tag}) {

1286

$data = $tags{$tag}->(@_);

1287

}

1288

elsif (exists($self->{tag_data}->{$tag})) {

1290

$data = $tags{$tag};

1291

$data = $data->(@_) if ref $data eq 'CODE';

1292

$data = "" if !defined $data;

1293

} elsif (exists $self->{tag_data}->{$tag}) {

1289

1294

$data = $self->{tag_data}->{$tag};

1290

if (ref $data eq 'CODE') {

1291

$data = $data->(@_);

1292

}

1293

}

1294

# known valid tags that might not get defined in some circumstances

1295

elsif ($tag !~ /^(?:BAYESTC(?:|LEARNED|SPAMMY|HAMMY)|RBL)$/) {

1296

return;

1297

}

1298

$data = "" unless defined $data;

1295

$data = $data->(@_) if ref $data eq 'CODE';

1296

$data = "" if !defined $data;

1297

}

1299

1298

return $data;

1300

1299

}

1301

1300

1355

1354

sub extract_message_metadata {

1356

1355

my ($self) = @_;

1357

1356

1357

my $timer = $self->{main}->time_method("extract_message_metadata");

1358

$self->{msg}->extract_message_metadata($self);

1359

1360

foreach my $item (qw(

1455

=back

1456

1457

Appending C<:name> to the header name will cause everything except

1458

the first real name to be removed from the header. For example,

1458

the first display name to be removed from the header. For example,

1459

all of the following will result in "Foo Blah"

1460

1461

=over 4

1526

my $getname = 0;

1527

my $getraw = 0;

1528

1529

# special queries

1530

if (index($request, ':') != -1) {

1531

$getaddr = ($request =~ s/:addr$//);

1532

$getname = ($request =~ s/:name$//);

1533

$getraw = ($request =~ s/:raw$//);

1529

# special queries - process and strip modifiers

1530

if (index($request,':') >= 0) { # triage

1531

local $1;

1532

while ($request =~ s/:([^:]*)//) {

1533

if ($1 eq 'raw') { $getraw = 1 }

1534

elsif ($1 eq 'addr') { $getaddr = $getraw = 1 }

1535

elsif ($1 eq 'name') { $getname = 1 }

1536

}

1534

1537

}

1535

1538

1536

# ALL: entire raw headers

1539

# ALL: entire pristine or semi-raw headers

1537

1540

if ($request eq 'ALL') {

1538

$result = $self->{msg}->get_all_headers(1);

1541

$result = $getraw ? $self->{msg}->get_pristine_header()

1542

: $self->{msg}->get_all_headers(1);

1539

1543

}

1540

1544

# ALL-TRUSTED: entire trusted raw headers

1541

1545

elsif ($request eq 'ALL-TRUSTED') {

1585

1589

# ToCc: the combined recipients list

1586

1590

elsif ($request eq 'ToCc') {

1587

1591

$result = join("\n", $self->{msg}->get_header('To', $getraw));

1588

if ($result) {

1592

if ($result ne '') {

1589

1593

chomp $result;

1590

1594

$result .= ", " if $result =~ /\S/;

1591

1595

}

1592

1596

$result .= join("\n", $self->{msg}->get_header('Cc', $getraw));

1593

$result = undef if !$result;

1597

$result = undef if $result eq '';

1594

1598

}

1595

1599

# MESSAGEID: handle lists which move the real message-id to another

1596

1600

# header for resending.

1597

1601

elsif ($request eq 'MESSAGEID') {

1598

$result = join("\n", grep { defined($_) && length($_) > 0 }

1602

$result = join("\n", grep { defined($_) && $_ ne '' }

1599

1603

$self->{msg}->get_header('X-Message-Id', $getraw),

1600

1604

$self->{msg}->get_header('Resent-Message-Id', $getraw),

1601

1605

$self->{msg}->get_header('X-Original-Message-ID', $getraw),

1603

1607

}

1604

1608

# a conventional header

1605

1609

else {

1606

if ($getraw) {

1607

$result = join('', $self->{msg}->raw_header($request));

1608

} else {

1609

$result = join('', $self->{msg}->get_header($request));

1610

}

1611

1612

# metadata

1613

if (!$result) {

1610

my @results = $getraw ? $self->{msg}->raw_header($request)

1611

: $self->{msg}->get_header($request);

1612

# dbg("message: get(%s) = %s", $request, join(", ",@results));

1613

if (@results) {

1614

$result = join('', @results);

1615

} else { # metadata

1614

1616

$result = $self->{msg}->get_metadata($request);

1615

$result = undef if !$result;

1616

1617

}

1617

1618

}

1618

1619

1620

# special queries

1620

1621

if (defined $result && ($getaddr || $getname)) {

1622

local $1;

1621

1623

$result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'

1622

1624

$result =~ s/\s+/ /g; # reduce whitespace

1623

1625

$result =~ s/^\s+//; # leading whitespace

1636

1638

1637

1639

# strip out the (comments)

1638

1640

$result =~ s/\s*$.*?$//g;

1639

# strip out the "quoted text"

1640

$result =~ s/(?<!<)"[^"]*"(?!@)//g;

1641

# strip out the "quoted text", unless it's the only thing in the string

1642

if ($result !~ /^".*"$/) {

1643

$result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs

1644

}

1641

1645

# Foo Blah <jm@xxx> or <jm@xxx>

1642

$result =~ s/^[^<]*?<(.*?)>.*$/$1/;

1646

local $1;

1647

$result =~ s/^[^"<]*?<(.*?)>.*$/$1/;

1643

1648

# multiple addresses on one line? remove all but first

1644

1649

$result =~ s/,.*$//;

1645

1650

}

1654

1659

# "Foo Blah" <jm@foo>

1655

1660

# "'Foo Blah'" <jm@foo>

1656

1661

1662

local $1;

1657

1663

$result =~ s/^[\'\"]*(.*?)[\'\"]*\s*<.+>\s*$/$1/g

1658

1664

or $result =~ s/^.+\s$(.*?)$\s*$/$1/g; # jm@foo (Foo Blah)

1659

1665

}

1661

1667

return $result;

1662

1668

}

1663

1669

1664

# heavily optimized for speed

1670

# optimized for speed

1665

1671

# $_[0] is self

1666

1672

# $_[1] is request

1667

1673

# $_[2] is defval

1668

1674

sub get {

1669

# return cache entry if it is defined

1670

return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};

1671

1672

# fill in cache entry if it is empty

1673

if (!exists $_[0]->{c}->{$_[1]}) {

1674

$_[0]->{c}->{$_[1]} = _get(@_);

1675

return $_[0]->{c}->{$_[1]} if defined $_[0]->{c}->{$_[1]};

1675

my $cache = $_[0]->{c};

1676

my $found;

1677

if (exists $cache->{$_[1]}) {

1678

# return cache entry if it is known

1679

# (measured hit/attempts rate on a production mailer is about 47%)

1680

$found = $cache->{$_[1]};

1681

} else {

1682

# fill in a cache entry

1683

$found = _get(@_);

1684

$cache->{$_[1]} = $found;

1676

1685

}

1677

1678

# if the requested header wasn't found, we should return either

1679

# a default value as specified by the caller, or the blank string ''

1680

return $_[2] || '';

1686

# if the requested header wasn't found, we should return a default value

1687

# as specified by the caller: if defval argument is present it represents

1688

# a default value even if undef; if defval argument is absent a default

1689

# value is an empty string for upwards compatibility

1690

return (defined $found ? $found : @_ > 2 ? $_[2] : '');

1681

1691

}

1682

1692

1683

1693

###########################################################################

1684

1694

1685

# Taken from URI and URI::Find

1686

my $reserved = q(;/?:@&=+$,[]\#|);

1687

my $mark = q(-_.!~*'()); #'; emacs

1688

my $unreserved = "A-Za-z0-9\Q$mark\E\x00-\x08\x0b\x0c\x0e-\x1a\x1c-\x1f";

1689

my $uricSet = quotemeta($reserved) . $unreserved . "%";

1690

1691

my $schemeRE = qr/(?:https?|ftp|mailto|javascript|file)/i;

1692

1693

my $uricCheat = $uricSet;

1694

$uricCheat =~ tr/://d;

1695

# uri parsing from plain text:

1696

# The goals are to find URIs in plain text spam that are intended to be clicked on or copy/pasted, but

1697

# ignore random strings that might look like URIs, for example in uuencoded files, and to ignore

1698

# URIs that spammers might seed in spam in ways not visible or clickable to add work to spam filters.

1699

# When we extract a domain and look it up in an RBL, an FP on decding that the text is a URI is not much

1700

# of a problem, as the only cost is an extra RBL lookup. The same FP is worse if the URI is used in matching rule

1701

# because it could lead to a rule FP, as in bug 5780 with WIERD_PORT matching random uuencoded strings.

1702

# The principles of the following code are 1) if ThunderBird or Outlook Express would linkify a string,

1703

# then we should attempt to parse it as a URI; 2) Where TBird and OE parse differently, choose to do what is most

1704

# likely to find a domain for the RBL tests; 3) If it begins with a scheme or www\d*\. or ftp\. assume that

1705

# it is a URI; 4) If it does not then require that the start of the string looks like a FQDN with a valid TLD;

1706

# 5) Reject strings that after parsing, URLDecoding, and redirection processing don't have a valid TLD

1707

1708

# We get the entire URI that would be linkified before dealing with it, in order to do the right thing

1709

# with URI-encodings and redirecting URIs.

1710

1711

# The delimiters for start of a URI in TBird are @(`{|[\"'<>,\s in OE they are ("<\s

1712

1713

# Tbird allows .,?';-! in a URI but ignores [.,?';-!]* at the end.

1714

# TBird's end delimiters are )`{}|[]"<>\s but ) is only an end delmiter if there is no ( in the URI

1715

# OE only uses space as a delimiter, but ignores [~!@#^&*()_+`-={}|[]:";'<>?,.]* at the end.

1716

1717

# Both TBird and OE decide that a URI is an email address when there is '@' character embedded in it.

1718

# TBird has some additional restrictions on email URIs: They cannot contain non-ASCII characters and their end

1719

# delimiters include ( and '

1720

1721

# bug 4522: ISO2022 format mail, most commonly Japanese SHIFT-JIS, inserts a three character escape sequence ESC ( .

1722

1723

# a hybrid of tbird and oe's version of uri parsing

1724

my $tbirdstartdelim = '><"\'`,{[(|\s' . "\x1b"; # The \x1b as per bug 4522

1725

my $iso2022shift = "\x1b" . '\(.'; # bug 4522

1726

my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b"; # The \x1b as per bug 4522

1727

my $oeignoreatend = '-~!@#^&*()_+=:;\'?,.';

1728

my $nonASCII = '\x80-\xff';

1729

my $tbirdenddelimemail = $tbirdenddelim . '(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes

1730

my $tbirdenddelimplusat = $tbirdenddelimemail . '@';

1731

1732

# regexps for finding plain text non-scheme hostnames with valid TLDs.

1695

1733

1696

1734

# the list from %VALID_TLDS in Util/RegistrarBoundaries.pm, as a

1697

# Regexp::Optimize optimized regexp ;) accurate as of 20050318

1735

# Regexp::List optimized regexp ;) accurate as of 20080208

1698

1736

my $tldsRE = qr/

1699

(?=[a-wyz])

1700

(?:a(?:e(?:ro)?|r(?:pa)?|[cdfgilmnoqstuwzx])|b(?:iz?|[abdefghjmnorstvwyz])

1701

1702

1703

|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]

1704

1705

|o(?:m|rg)|p(?:ro?|[aefghklmnstwy])|r[eouw]|s[abcdeghijklmnortvyzu]

1706

|t[cdfghjklmnoprtvwz]|u[agkmsyz]|v[aceginu]|w[fs]|xxx|y[etu]|z[amw]|ed?u|qa

1707

)/ix;

1708

1709

# from RFC 1035, but allowing domains starting with numbers:

1710

# $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;

1711

# $domain = qq<$label(?:\.$label)*>;

1712

# length($host) <= 255 && $host =~ /^($domain)$/

1713

# changes:

1714

# massively simplified from grammar, only matches known TLDs, a single

1715

# dot at end of TLD works

1716

# negative look-behinds:

1717

# (?<![a-z\d][.-]) = don't let there be more hostname behind, but

1718

# don't miss ".....www.bar.com" or "-----www.foo.com"

1719

# (?<!.\@) = this will be caught by the email address regular expression

1720

my $schemelessRE = qr/(?<![a-z\d][._-])(?<!.\@)\b[a-z\d]

1721

[a-z\d._-]{0,251}

1722

\.${tldsRE}\.?\b

1723

(?![a-z\d._-])

1724

/ix;

1725

1726

my $uriRe = qr/\b(?:$schemeRE:[$uricCheat]|$schemelessRE)[$uricSet#]*/o;

1727

1728

# Taken from Email::Find (thanks Tatso!)

1729

# This is the BNF from RFC 822

1730

my $esc = '\\\\';

1731

my $period = '\.';

1732

my $space = '\040';

1733

my $open_br = '\[';

1734

my $close_br = '\]';

1735

my $nonASCII = '\x80-\xff';

1736

my $ctrl = '\000-\037';

1737

my $cr_list = '\n\015';

1738

my $qtext = qq/[^$esc$nonASCII$cr_list\"]/; #"

1739

my $dtext = qq/[^$esc$nonASCII$cr_list$open_br$close_br]/;

1740

my $quoted_pair = qq<$esc>.qq<[^$nonASCII]>;

1741

my $atom_char = qq/[^($space)<>\@,;:\".$esc$open_br$close_br$ctrl$nonASCII]/;

1742

1743

my $atom = qq{(?>$atom_char+)};

1744

my $quoted_str = qq<\"$qtext*(?:$quoted_pair$qtext*)*\">; #"

1745

my $word = qq<(?:$atom|$quoted_str)>;

1746

my $local_part = qq<$word(?:$period$word)*>;

1747

1748

# This is a combination of the domain name BNF from RFC 1035 plus the

1749

# domain literal definition from RFC 822, but allowing domains starting

1750

# with numbers.

1751

my $label = q/[A-Za-z\d](?:[A-Za-z\d-]*[A-Za-z\d])?/;

1752

my $domain_ref = qq<$label(?:$period$label)*>;

1753

my $domain_lit = qq<$open_br(?:$dtext|$quoted_pair)*$close_br>;

1754

my $domain = qq<(?:$domain_ref|$domain_lit)>;

1755

1756

# Finally, the address-spec regex (more or less)

1757

my $Addr_spec_re = qr<$local_part\s*\@\s*$domain>o;

1758

1759

# TVD: This really belongs in metadata

1737

(?=[a-wyz])

1738

(?:a(?:e(?:ro)?|r(?:pa)?|s(?:ia)?|[cdfgilmnoqtuwxz])|b(?:iz?|[abdefghjmnorstwyz])

1739

|c(?:at?|o(?:m|op)?|[cdfghiklmnruvxyz])|d[ejkmoz]|e(?:[cegrst]|d?u)|f[ijkmor]

1740

1741

|j(?:o(?:bs)?|[emp])|k[eghimnprwyz]|l[abcikrstuvy]

1742

1743

|o(?:m|rg)|p(?:ro?|[aefghklnstwy])|r[eosuw]|s[abcdeghiklmnrtuvyz]

1744

|t(?:r(?:avel)?|[cdfghjkmnoptvwz]|e?l)|u[agksyz]|v[aceginu]|w[fs]|y[eu]|z[amw]|qa

1745

)/ix;

1746

1747

# knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL

1748

# schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri

1749

my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io;

1750

my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io;

1751

my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io;

1752

my $tbirdurire = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))

1753

(?:(?:($uriknownscheme)(?=[$tbirdenddelim])) |

1754

(?:($urimailscheme)(?=[$tbirdenddelimemail])) |

1755

(?:\b($urischemeless)(?=[$tbirdenddelim])))/xo;

1760

1756

1761

1757

=item $status->get_uri_list ()

1762

1758

1780

1776

return @{$self->{uri_list}};

1781

1777

}

1782

1778

1783

my @uris = ();

1779

my @uris;

1784

1780

# $self->{redirect_num} = 0;

1785

1781

1786

1782

# get URIs from HTML parsing

1846

1842

return $self->{uri_detail_list};

1847

1843

}

1848

1844

1845

my $timer = $self->{main}->time_method("get_uri_detail_list");

1846

1849

1847

$self->{uri_domain_count} = 0;

1850

1848

1851

1849

# do this so we're sure metadata->html is setup

1852

1850

my %parsed = map { $_ => 'parsed' } $self->_get_parsed_uri_list();

1853

1851

1854

1852

# Look for the domain in DK/DKIM headers

1855

my $dk = join(" ", $self->get('DomainKey-Signature'), $self->get('DKIM-Signature'));

1853

my $dk = join(" ", grep {defined} ( $self->get('DomainKey-Signature',undef),

1854

$self->get('DKIM-Signature',undef) ));

1856

1855

while ($dk =~ /\bd\s*=\s*([^;]+)/g) {

1857

1856

my $dom = $1;

1858

1857

$dom =~ s/\s+//g;

1898

1897

$detail->{$uri}->{types}->{$type} = 1;

1899

1898

my $info = $detail->{$uri};

1900

1899

1901

my @uris = ();

1900

my @uris;

1902

1901

1903

1902

if (!exists $info->{cleaned}) {

1904

1903

if ($type eq 'parsed') {

1950

1949

# also, if we allow $textary to be passed in, we need to invalidate

1951

1950

# the cache first. fyi.

1952

1951

my $textary = $self->get_decoded_stripped_body_text_array();

1952

my $redirector_patterns = $self->{conf}->{redirector_patterns};

1953

1954

my ($rulename, $pat, @uris);

1955

local ($_);

1956

1957

1955

my $text;

1958

1956

1959

for (@$textary) {

1960

# NOTE: do not modify $_ in this loop

1961

while (/($uriRe)/igo) {

1962

my $uri = $1;

1963

1964

# skip mismatches from URI regular expression

1965

next if $uri =~ /^[a-z\d.-]*\.\./i; # skip ".."

1966

1967

$uri =~ s/^<(.*)>$/$1/;

1968

$uri =~ s/[\]\)>#]$//;

1969

1970

if ($uri !~ /^${schemeRE}:/io) {

1971

# If it's a hostname that was just sitting out in the

1972

# open, without a protocol, and not inside of an HTML tag,

1973

# the we should add the proper protocol in front, rather

1974

# than using the base URI.

1957

for my $entry (@$textary) {

1958

1959

# a workaround for [perl #69973] bug:

1960

# Invalid and tainted utf-8 char crashes perl 5.10.1 in regexp evaluation

1961

# Bug 6225, regexp and string should both be utf8, or none of them;

1962

# untainting string also seems to avoid the crash

1963

1964

# Bug 6225: untaint the string in an attempt to work around a perl crash

1965

local $_ = untaint_var($entry);

1966

1967

local($1,$2,$3);

1968

while (/$tbirdurire/igo) {

1969

my $rawuri = $1||$2||$3;

1970

$rawuri =~ s/(^[^(]*)\).*$/$1/; # as per ThunderBird, ) is an end delimiter if there is no ( preceeding it

1971

$rawuri =~ s/[$oeignoreatend]*$//; # remove trailing string of punctuations that TBird ignores

1972

# skip if there is '..' in the hostname portion of the URI, something we can't catch in the general URI regexp

1973

next if $rawuri =~ /^(?:(?:https?|ftp|mailto):(?:\/\/)?)?[a-z\d.-]*\.\./i;

1974

1975

# If it's a hostname that was just sitting out in the

1976

# open, without a protocol, and not inside of an HTML tag,

1977

# the we should add the proper protocol in front, rather

1978

# than using the base URI.

1979

my $uri = $rawuri;

1980

my $rblonly;

1981

if ($uri !~ /^(?:https?|ftp|mailto|javascript|file):/i) {

1975

1982

if ($uri =~ /^ftp\./i) {

1976

push (@uris, $uri);

1977

1983

$uri = "ftp://$uri";

1978

1984

}

1979

if ($uri =~ /\@/) {

1980

push (@uris, $uri);

1985

elsif ($uri =~ /^www\d{0,2}\./i) {

1986

$uri = "http://$uri";

1987

}

1988

elsif ($uri =~ /\@/) {

1981

1989

$uri = "mailto:$uri";

1982

1990

}

1983

else # if ($uri =~ /^www\d*\./i)

1984

{

1991

else {

1985

1992

# some spammers are using unschemed URIs to escape filters

1986

push (@uris, $uri);

1993

$rblonly = 1; # flag that this is a URI that MUAs don't linkify so only use for RBLs

1987

1994

$uri = "http://$uri";

1988

1995

}

1989

1996

}

1990

1997

1991

# warn("uri: got URI: $uri\n");

1992

push @uris, $uri;

1993

}

1994

while (/($Addr_spec_re)/igo) {

1995

my $uri = $1;

1996

1997

# skip mismatches from email address regular expression

1998

next unless $uri =~ /\.${tldsRE}\W*$/io; # skip non-TLDs

1999

2000

$uri =~ s/\s*\@\s*/@/; # remove spaces around the '@'

2001

$uri = "mailto:$uri"; # prepend mailto:

2002

2003

#warn("uri: got URI: $uri\n");

2004

push @uris, $uri;

1998

if ($uri =~ /^mailto:/) {

1999

# skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters

2000

$uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-f]|7[0-9a-eA-E])/);

2001

next if ($uri !~ /^[^@]+@[^@]+$/);

2002

my $domuri = Mail::SpamAssassin::Util::uri_to_domain($uri);

2003

next unless $domuri;

2004

push (@uris, $rawuri);

2005

push (@uris, $uri) unless ($rawuri eq $uri);

2006

}

2007

2008

next unless ($uri =~/^(?:https?|ftp):/); # at this point only valid if one or the other of these

2009

2010

my @tmp = Mail::SpamAssassin::Util::uri_list_canonify($redirector_patterns, $uri);

2011

my $goodurifound = 0;

2012

foreach my $cleanuri (@tmp) {

2013

my $domain = Mail::SpamAssassin::Util::uri_to_domain($cleanuri);

2014

if ($domain) {

2015

# bug 5780: Stop after domain to avoid FP, but do that after all deobfuscation of urlencoding and redirection

2016

if ($rblonly) {

2017

local $1;

2018

$cleanuri =~ s/^(https?:\/\/[^:\/]+).*$/$1/;

2019

}

2020

push (@uris, $cleanuri);

2021

$goodurifound = 1;

2022

}

2023

}

2024

next unless $goodurifound;

2025

push @uris, $rawuri unless $rblonly;

2005

2026

}

2006

2027

}

2007

2028

2032

2053

next if ($self->is_rule_complete($r));

2033

2054

2034

2055

dbg("rules: meta rule $metarule depends on pending rule $r, blocking");

2056

my $timer = $self->{main}->time_method("wait_for_pending_rules");

2057

2035

2058

my $start = time;

2036

2059

$self->harvest_until_rule_completes($r);

2037

2060

my $elapsed = time - $start;

2038

2061

2039

2062

if (!$self->is_rule_complete($r)) {

2040

dbg ("rules: rule $r is still not complete; exited early?");

2063

dbg("rules: rule $r is still not complete; exited early?");

2041

2064

}

2042

2065

elsif ($elapsed > 0) {

2043

2066

info("rules: $r took $elapsed seconds to complete, for $metarule");

2087

2110

2088

2111

}

2089

2112

ENDOFEVAL

2090

eval $evalstr; ## no critic

2091

2092

if ($@) {

2093

warn "rules: failed to run header tests, skipping some: $@\n";

2113

eval $evalstr . '; 1' ## no critic

2114

or do {

2115

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

2116

warn "rules: failed to run header tests, skipping some: $eval_stat\n";

2094

2117

$self->{rule_errors}++;

2095

}

2118

};

2096

2119

2097

2120

# ensure this method is deleted if finish_tests() is called

2098

2121

push (@TEMPORARY_METHODS, $function);

2141

2164

return;

2142

2165

}

2143

2166

2167

# this should not happen; warn about NaN (bug 3364)

2168

if ($score != $score) {

2169

warn "rules: score '$score' for rule '$rule' in '$area' '$desc'";

2170

return;

2171

}

2172

2144

2173

# Add the rule hit to the score

2145

2174

$self->{score} += $score;

2146

2175

2214

2243

C<hit_rule> plugin call, called by this method. If unset, I<'unknown'> is

2215

2244

used.

2216

2245

2246

=item tflags => $string

2247

2248

Optional: a string, i.e. a space-separated list of additional tflags

2249

to be appended to an existing list of flags in $self->{conf}->{tflags},

2250

such as: "nice noautolearn multiple". No syntax checks are performed.

2251

2252

=item description => $string

2253

2254

Optional: a custom rule description string. This is used in the

2255

C<hit_rule> plugin call, called by this method. If unset, the static

2256

description is used.

2257

2217

2258

=back

2218

2259

2219

2260

Backwards compatibility: the two mandatory arguments have been part of this API

2225

2266

sub got_hit {

2226

2267

my ($self, $rule, $area, %params) = @_;

2227

2268

2228

# ensure that rule values always result in an *increase* of

2229

# $self->{tests_already_hit}->{$rule}:

2230

my $value = $params{value}; if (!$value || $value <= 0) { $value = 1; }

2269

my $conf_ref = $self->{conf};

2270

2271

my $dynamic_score_provided;

2272

my $score = $params{score};

2273

if (defined $score) {

2274

$dynamic_score_provided = 1;

2275

} else {

2276

$score = $conf_ref->{scores}->{$rule};

2277

}

2278

2279

# adding a hit does nothing if we don't have a score -- we probably

2280

# shouldn't have run it in the first place

2281

return unless $score;

2282

2283

# ensure that rule values always result in an *increase*

2284

# of $self->{tests_already_hit}->{$rule}:

2285

my $value = $params{value};

2286

if (!$value || $value <= 0) { $value = 1 }

2287

2288

my $tflags_ref = $conf_ref->{tflags};

2289

my $tflags_add = $params{tflags};

2290

if (defined $tflags_add && $tflags_add ne '') {

2291

$_ = (!defined $_ || $_ eq '') ? $tflags_add : ($_ . ' ' . $tflags_add)

2292

for $tflags_ref->{$rule};

2293

};

2231

2294

2232

2295

my $already_hit = $self->{tests_already_hit}->{$rule} || 0;

2233

2234

2296

# don't count hits multiple times, unless 'tflags multiple' is on

2235

if ($already_hit && ($self->{conf}->{tflags}->{$rule}||'') !~ /\bmultiple\b/) {

2297

if ($already_hit && ($tflags_ref->{$rule}||'') !~ /\bmultiple\b/) {

2236

2298

return;

2237

2299

}

2238

2300

2241

2303

# default ruletype, if not specified:

2242

2304

$params{ruletype} ||= 'unknown';

2243

2305

2306

if ($dynamic_score_provided) { # copy it to static for proper reporting

2307

$conf_ref->{scoreset}->[$_]->{$rule} = $score for (0..3);

2308

$conf_ref->{scores}->{$rule} = $score;

2309

}

2310

2311

my $rule_descr = $params{description};

2312

if (defined $rule_descr) {

2313

$conf_ref->{descriptions}->{$rule} = $rule_descr; # save dynamic descr.

2314

} else {

2315

$rule_descr = $conf_ref->get_description_for_rule($rule); # static

2316

}

2317

$rule_descr = $rule if !defined $rule_descr || $rule_descr eq '';

2244

2318

$self->_handle_hit($rule,

2245

$params{score} || $self->{conf}->{scores}->{$rule},

2319

$score,

2246

2320

$area,

2247

2321

$params{ruletype},

2248

$self->{conf}->get_description_for_rule($rule) || $rule);

2322

$rule_descr);

2249

2323

2250

2324

# take care of duplicate rules, too (bug 5206)

2251

my $dups = $self->{conf}->{duplicate_rules}->{$rule};

2325

my $dups = $conf_ref->{duplicate_rules}->{$rule};

2252

2326

if ($dups && @{$dups}) {

2253

2327

foreach my $dup (@{$dups}) {

2254

2328

$self->got_hit($dup, $area, %params);

2300

2374

# This will prevent us falling through and picking up inappropriate headers.

2301

2375

if (defined $self->{conf}->{envelope_sender_header}) {

2302

2376

# make sure we get the most recent copy - there can be only one EnvelopeSender.

2303

$envf = $self->get($self->{conf}->{envelope_sender_header}.":addr");

2377

$envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef);

2304

2378

# ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)

2305

2379

goto ok if defined $envf && ($envf =~ /\@/ || $envf =~ /^$/);

2306

2380

# Warn them if it's configured, but not there or not usable.

2307

2381

if (defined $envf) {

2308

2382

chomp $envf;

2309

dbg("message: envelope_sender_header '$self->{conf}->{envelope_sender_header}: $envf' is not an FQDN, ignoring");

2383

dbg("message: envelope_sender_header '%s: %s' is not an FQDN, ignoring",

2384

$self->{conf}->{envelope_sender_header}, $envf);

2310

2385

} else {

2311

dbg("message: envelope_sender_header '".$self->{conf}->{envelope_sender_header}."' not found in message");

2386

dbg("message: envelope_sender_header '%s' not found in message",

2387

$self->{conf}->{envelope_sender_header});

2312

2388

}

2313

2389

# Couldn't get envelope-sender using the configured header.

2314

2390

return;

2343

2419

# lines, we cannot trust any Envelope-From headers, since they're likely to

2344

2420

# be incorrect fetchmail guesses.

2345

2421

2346

if ($self->get ("X-Sender") =~ /\@/) {

2347

my $rcvd = join (' ', $self->get ("Received"));

2422

if ($self->get("X-Sender") =~ /\@/) {

2423

my $rcvd = join(' ', $self->get("Received"));

2348

2424

if ($rcvd =~ /\(fetchmail/) {

2349

2425

dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from");

2350

2426

return;

2351

2427

}

2352

2428

}

2353

2429

2354

# procmailrc notes this, amavisd are adding it, we recommend it

2355

# (although we now recommend adding to Received instead)

2356

if ($envf = $self->get ("X-Envelope-From")) {

2430

# procmailrc notes this (we now recommend adding it to Received instead)

2431

if ($envf = $self->get("X-Envelope-From")) {

2357

2432

# heuristic: this could have been relayed via a list which then used

2358

2433

# a *new* Envelope-from. check

2359

if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nX-Envelope-From:\s/s) {

2434

if ($self->get("ALL:raw") =~ /^Received:.*^X-Envelope-From:/smi) {

2360

2435

dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");

2361

2436

return;

2362

2437

} else {

2365

2440

}

2366

2441

2367

2442

# qmail, new-inject(1)

2368

if ($envf = $self->get ("Envelope-Sender")) {

2443

if ($envf = $self->get("Envelope-Sender")) {

2369

2444

# heuristic: this could have been relayed via a list which then used

2370

2445

# a *new* Envelope-from. check

2371

if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nEnvelope-Sender:\s/s) {

2446

if ($self->get("ALL:raw") =~ /^Received:.*^Envelope-Sender:/smi) {

2372

2447

dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");

2373

2448

} else {

2374

2449

goto ok;

2375

2450

}

2376

2451

}

2377

2452

2378

# Postfix, sendmail, also mentioned in RFC821

2379

if ($envf = $self->get ("Return-Path")) {

2453

# Postfix, sendmail, amavisd-new, ...

2454

# RFC 2821 requires it:

2455

# When the delivery SMTP server makes the "final delivery" of a

2456

# message, it inserts a return-path line at the beginning of the mail

2457

# data. This use of return-path is required; mail systems MUST support

2458

# it. The return-path line preserves the information in the <reverse-

2459

# path> from the MAIL command.

2460

if ($envf = $self->get("Return-Path")) {

2380

2461

# heuristic: this could have been relayed via a list which then used

2381

2462

# a *new* Envelope-from. check

2382

if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nReturn-Path:\s/s) {

2463

if ($self->get("ALL:raw") =~ /^Received:.*^Return-Path:/smi) {

2383

2464

dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");

2384

2465

} else {

2385

2466

goto ok;

2419

2500

2420

2501

my $cur_rcvd_index = -1; # none found yet

2421

2502

my $result = '';

2422

foreach my $hdr (split("\n", $self->get('ALL'))) {

2423

if ($hdr =~ /^received: /i) {

2503

2504

foreach my $hdr (split(/^/m, $self->{msg}->get_pristine_header())) {

2505

if ($hdr =~ /^Received:/i) {

2424

2506

$cur_rcvd_index++;

2425

2507

next if (defined $start_rcvd && !$include_start_rcvd &&

2426

2508

$start_rcvd == $cur_rcvd_index);

2467

2549

}

2468

2550

2469

2551

my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();

2470

print $tmpfh $$fulltext;

2471

close $tmpfh;

2552

print $tmpfh $$fulltext or die "error writing to $tmpf: $!";

2553

close $tmpfh or die "error closing $tmpf: $!";

2472

2554

2473

2555

$self->{fulltext_tmpfile} = $tmpf;

2474

2556

2485

2567

sub delete_fulltext_tmpfile {

2486

2568

my ($self) = @_;

2487

2569

if (defined $self->{fulltext_tmpfile}) {

2488

unlink $self->{fulltext_tmpfile};

2570

unlink $self->{fulltext_tmpfile}

2571

or die "cannot unlink ".$self->{fulltext_tmpfile}.": $!";

2489

2572

$self->{fulltext_tmpfile} = undef;

2490

2573

}

2491

2574

}

2500

2583

my @addrs;

2501

2584

2502

2585

# Resent- headers take priority, if present. see bug 672

2503

# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672

2504

my $resent = $self->get('Resent-From');

2586

my $resent = $self->get('Resent-From',undef);

2505

2587

if (defined $resent && $resent =~ /\S/) {

2506

2588

@addrs = $self->{main}->find_all_addrs_in_line ($resent);

2507

2508

2589

}

2509

2590

else {

2510

2591

# bug 2292: Used to use find_all_addrs_in_line() with the same

2511

2592

# headers, but the would catch addresses in comments which caused

2512

2593

# FNs for things like whitelist_from. Since all of these are From

2513

# headers, there should only be 1 address in each anyway, so use the

2514

# :addr code...

2594

# headers, there should only be 1 address in each anyway (not exactly

2595

# true, RFC 2822 allows multiple addresses in a From header field),

2596

# so use the :addr code...

2515

2597

# bug 3366: some addresses come in as 'foo@bar...', which is invalid.

2516

2598

# so deal with the multiple periods.

2517

2599

## no critic

2518

@addrs = grep { defined($_) && length($_) > 0 } map { tr/././s; $_; }

2600

@addrs = map { tr/././s; $_ } grep { $_ ne '' }

2519

2601

($self->get('From:addr'), # std

2520

2602

$self->get('Envelope-Sender:addr'), # qmail: new-inject(1)

2521

2603

$self->get('Resent-Sender:addr'), # procmailrc manpage

2541

2623

my @addrs;

2542

2624

2543

2625

# Resent- headers take priority, if present. see bug 672

2544

# http://www.hughes-family.org/bugzilla/show_bug.cgi?id=672

2545

my $resent = $self->get('Resent-To') . $self->get('Resent-Cc');

2546

if (defined $resent && $resent =~ /\S/) {

2547

@addrs = $self->{main}->find_all_addrs_in_line (

2548

$self->get('Resent-To') . # std, rfc822

2549

$self->get('Resent-Cc')); # std, rfc822

2550

2626

my $resent = join('', $self->get('Resent-To'), $self->get('Resent-Cc'));

2627

if ($resent =~ /\S/) {

2628

@addrs = $self->{main}->find_all_addrs_in_line($resent);

2551

2629

} else {

2552

2630

# OK, a fetchmail trick: try to find the recipient address from

2553

2631

# the most recent 3 Received lines. This is required for sendmail,

2559

2637

$rcvd =~ s/\n+/\n/gs;

2560

2638

2561

2639

my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one

2562

my @rcvdaddrs = ();

2640

my @rcvdaddrs;

2563

2641

foreach my $line (@rcvdlines) {

2564

2642

if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }

2565

2643

}

2566

2644

2567

2645

@addrs = $self->{main}->find_all_addrs_in_line (

2568

join(" ", @rcvdaddrs)."\n" .

2569

$self->get('To') . # std

2570

$self->get('Apparently-To') . # sendmail, from envelope

2571

$self->get('Delivered-To') . # Postfix, poss qmail

2572

$self->get('Envelope-Recipients') . # qmail: new-inject(1)

2573

$self->get('Apparently-Resent-To') . # procmailrc manpage

2574

$self->get('X-Envelope-To') . # procmailrc manpage

2575

$self->get('Envelope-To') . # exim

2576

$self->get('X-Delivered-To') . # procmail quick start

2577

$self->get('X-Original-To') . # procmail quick start

2578

$self->get('X-Rcpt-To') . # procmail quick start

2579

$self->get('X-Real-To') . # procmail quick start

2580

$self->get('Cc')); # std

2646

join('',

2647

join(" ", @rcvdaddrs)."\n",

2648

$self->get('To'), # std

2649

$self->get('Apparently-To'), # sendmail, from envelope

2650

$self->get('Delivered-To'), # Postfix, poss qmail

2651

$self->get('Envelope-Recipients'), # qmail: new-inject(1)

2652

$self->get('Apparently-Resent-To'), # procmailrc manpage

2653

$self->get('X-Envelope-To'), # procmailrc manpage

2654

$self->get('Envelope-To'), # exim

2655

$self->get('X-Delivered-To'), # procmail quick start

2656

$self->get('X-Original-To'), # procmail quick start

2657

$self->get('X-Rcpt-To'), # procmail quick start

2658

$self->get('X-Real-To'), # procmail quick start

2659

$self->get('Cc'))); # std

2581

2660

# those are taken from various sources; thanks to Nancy McGough, who

2582

2661

# noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>

2583

2662

}

Older »