~ubuntu-branches/ubuntu/utopic/spamassassin/utopic-updates

« back to all changes in this revision

Viewing changes to .pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/PerMsgStatus.pm

Committer: Package Import Robot
Author(s): Noah Meyerhans
Date: 2014-02-14 22:45:15 UTC
mfrom: (0.8.1) (0.6.2) (5.1.22 sid)
Revision ID: package-import@ubuntu.com-20140214224515-z1es2twos8xh7n2y

Tags: 3.4.0-1

http://bugs.debian.org/738963

http://bugs.debian.org/738872

http://bugs.debian.org/738867

http://bugs.debian.org/738951

http://bugs.debian.org/738974

* New upstream version! (Closes: 738963, 738872, 738867)
* Scrub the environment when switching to the debian-spamd user in
  postinst and cron.daily. (Closes: 738951)
* Enhancements to postinst to better manage ownership of
  /var/lib/spamassassin, via Iain Lane <iain.lane@canonical.com>
  (Closes: 738974)

files added:
.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin/DCC.pm

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin/DNSEval.pm

.pc/98_sa-compile-quiet

.pc/98_sa-compile-quiet/lib

.pc/98_sa-compile-quiet/lib/Mail

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin/Plugin

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

META.json

debian/patches/98_sa-compile-quiet

lib/Mail/SpamAssassin/BayesStore/Redis.pm

lib/Mail/SpamAssassin/Plugin/AskDNS.pm

lib/Mail/SpamAssassin/Util/TinyRedis.pm

pkgrules/10_hasbase.cf

pkgrules/20_mailspike.cf

rules/v340.pre

t/autolearn.t

t/autolearn_force.t

t/autolearn_force_fail.t

t/basic_lint_without_sandbox.t

t/data/spam/badmime3.txt

t/dnsbl_subtests.t

files removed:
.pc/50_sa-learn_fix_empty_list_handling

.pc/50_sa-learn_fix_empty_list_handling/sa-learn.raw

.pc/60_bug_684709

.pc/60_bug_684709/lib

.pc/60_bug_684709/lib/Mail

.pc/60_bug_684709/lib/Mail/SpamAssassin

.pc/60_bug_684709/lib/Mail/SpamAssassin/Message.pm

.pc/85_disable_SSLv2

.pc/85_disable_SSLv2/spamc

.pc/85_disable_SSLv2/spamc/libspamc.c

.pc/85_disable_SSLv2/spamc/libspamc.h

.pc/85_disable_SSLv2/spamc/spamc.c

.pc/85_disable_SSLv2/spamc/spamc.pod

.pc/85_disable_SSLv2/spamd

.pc/85_disable_SSLv2/spamd/spamd.raw

.pc/90_missing_tld

.pc/90_missing_tld/lib

.pc/90_missing_tld/lib/Mail

.pc/90_missing_tld/lib/Mail/SpamAssassin

.pc/90_missing_tld/lib/Mail/SpamAssassin/Util

.pc/90_missing_tld/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm

.pc/90_missing_tld/t

.pc/90_missing_tld/t/uri_text.t

.pc/91_no_rfc_ignorant

.pc/91_no_rfc_ignorant/pkgrules

.pc/91_no_rfc_ignorant/pkgrules/20_dnsbl_tests.cf

.pc/91_no_rfc_ignorant/pkgrules/30_text_de.cf

.pc/91_no_rfc_ignorant/pkgrules/50_scores.cf

.pc/91_no_rfc_ignorant/pkgrules/local.cf

.pc/91_no_rfc_ignorant/rules

.pc/91_no_rfc_ignorant/rules/STATISTICS-set1.txt

.pc/91_no_rfc_ignorant/rules/STATISTICS-set3.txt

.pc/91_no_rfc_ignorant/rules/active.list

.pc/95_bug694504-spamdforkscaling-crash

.pc/95_bug694504-spamdforkscaling-crash/lib

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin/Logger

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin/Logger/Syslog.pm

.pc/95_bug694504-spamdforkscaling-crash/spamd

.pc/95_bug694504-spamdforkscaling-crash/spamd/spamd.raw

.pc/96_disable_njabl

.pc/96_disable_njabl/pkgrules

.pc/96_disable_njabl/pkgrules/20_dnsbl_tests.cf

.pc/96_disable_njabl/pkgrules/30_text_de.cf

.pc/96_disable_njabl/pkgrules/30_text_fr.cf

.pc/96_disable_njabl/pkgrules/30_text_nl.cf

.pc/96_disable_njabl/pkgrules/30_text_pl.cf

.pc/96_disable_njabl/pkgrules/50_scores.cf

.pc/98_bug721565-syntax-5.18

.pc/98_bug721565-syntax-5.18/lib

.pc/98_bug721565-syntax-5.18/lib/Mail

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/AsyncLoop.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Conf

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Conf/Parser.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/DnsResolver.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Message.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/PerMsgStatus.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Util.pm

debian/patches/50_sa-learn_fix_empty_list_handling

debian/patches/60_bug_684709

debian/patches/85_disable_SSLv2

debian/patches/90_missing_tld

debian/patches/91_no_rfc_ignorant

debian/patches/95_bug694504-spamdforkscaling-crash

debian/patches/96_disable_njabl

debian/patches/98_bug721565-syntax-5.18

rules/STATISTICS-set0.txt

rules/STATISTICS-set1.txt

rules/STATISTICS-set2.txt

rules/STATISTICS-set3.txt

t/dkim2.t

t/spamd_ssl_v2.t

t/spamd_ssl_v23.t

files modified:
.pc/10_change_config_paths/INSTALL

.pc/10_change_config_paths/README

.pc/10_change_config_paths/UPGRADE

.pc/10_change_config_paths/lib/Mail/SpamAssassin/Conf.pm

.pc/10_change_config_paths/lib/Mail/SpamAssassin/Plugin/Test.pm

.pc/10_change_config_paths/lib/spamassassin-run.pod

.pc/10_change_config_paths/sa-compile.raw

.pc/10_change_config_paths/sa-learn.raw

.pc/10_change_config_paths/spamc/spamc.pod

.pc/10_change_config_paths/spamd/spamd.raw

.pc/10_change_config_paths/sql/README

.pc/20_edit_spamc_pod/spamc/spamc.pod

.pc/30_edit_README/README

.pc/55_disable_nagios_epm/sa-check_spamd.raw

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Conf.pm

.pc/97_bug720499-pod-5.18/sa-check_spamd.raw

.pc/applied-patches

CREDITS

Changes

INSTALL

MANIFEST

MANIFEST.SKIP

META.yml

Makefile.PL

README

UPGRADE

build/mkrules

build/parse-rules-for-masses

debian/changelog

debian/control

debian/patches/10_change_config_paths

debian/patches/20_edit_spamc_pod

debian/patches/90_pod_cleanup

debian/patches/series

debian/sa-compile.postinst

debian/spamassassin.cron.daily

debian/spamassassin.postinst

debian/tests/daemon *

debian/tests/get_spam_points.py *

lib/Mail/SpamAssassin.pm

lib/Mail/SpamAssassin/AICache.pm

lib/Mail/SpamAssassin/ArchiveIterator.pm

lib/Mail/SpamAssassin/AsyncLoop.pm

lib/Mail/SpamAssassin/AutoWhitelist.pm

lib/Mail/SpamAssassin/Bayes/CombineChi.pm

lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm

lib/Mail/SpamAssassin/BayesStore.pm

lib/Mail/SpamAssassin/BayesStore/BDB.pm

lib/Mail/SpamAssassin/BayesStore/DBM.pm

lib/Mail/SpamAssassin/BayesStore/MySQL.pm

lib/Mail/SpamAssassin/BayesStore/PgSQL.pm

lib/Mail/SpamAssassin/BayesStore/SQL.pm

lib/Mail/SpamAssassin/Client.pm

lib/Mail/SpamAssassin/Conf.pm

lib/Mail/SpamAssassin/Conf/LDAP.pm

lib/Mail/SpamAssassin/Conf/Parser.pm

lib/Mail/SpamAssassin/Conf/SQL.pm

lib/Mail/SpamAssassin/Constants.pm

lib/Mail/SpamAssassin/Dns.pm

lib/Mail/SpamAssassin/DnsResolver.pm

lib/Mail/SpamAssassin/HTML.pm

lib/Mail/SpamAssassin/Locales.pm

lib/Mail/SpamAssassin/Logger.pm

lib/Mail/SpamAssassin/Logger/File.pm

lib/Mail/SpamAssassin/Logger/Syslog.pm

lib/Mail/SpamAssassin/MailingList.pm

lib/Mail/SpamAssassin/Message.pm

lib/Mail/SpamAssassin/Message/Metadata.pm

lib/Mail/SpamAssassin/Message/Metadata/Received.pm

lib/Mail/SpamAssassin/Message/Node.pm

lib/Mail/SpamAssassin/NetSet.pm

lib/Mail/SpamAssassin/PerMsgStatus.pm

lib/Mail/SpamAssassin/Plugin.pm

lib/Mail/SpamAssassin/Plugin/ASN.pm

lib/Mail/SpamAssassin/Plugin/AWL.pm

lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm

lib/Mail/SpamAssassin/Plugin/Bayes.pm

lib/Mail/SpamAssassin/Plugin/BodyEval.pm

lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

lib/Mail/SpamAssassin/Plugin/Check.pm

lib/Mail/SpamAssassin/Plugin/DCC.pm

lib/Mail/SpamAssassin/Plugin/DKIM.pm

lib/Mail/SpamAssassin/Plugin/DNSEval.pm

lib/Mail/SpamAssassin/Plugin/FreeMail.pm

lib/Mail/SpamAssassin/Plugin/Hashcash.pm

lib/Mail/SpamAssassin/Plugin/HeaderEval.pm

lib/Mail/SpamAssassin/Plugin/ImageInfo.pm

lib/Mail/SpamAssassin/Plugin/MIMEEval.pm

lib/Mail/SpamAssassin/Plugin/PhishTag.pm

lib/Mail/SpamAssassin/Plugin/Pyzor.pm

lib/Mail/SpamAssassin/Plugin/Razor2.pm

lib/Mail/SpamAssassin/Plugin/RelayCountry.pm

lib/Mail/SpamAssassin/Plugin/Reuse.pm

lib/Mail/SpamAssassin/Plugin/SPF.pm

lib/Mail/SpamAssassin/Plugin/Test.pm

lib/Mail/SpamAssassin/Plugin/TextCat.pm

lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

lib/Mail/SpamAssassin/Plugin/URIEval.pm

lib/Mail/SpamAssassin/Plugin/VBounce.pm

lib/Mail/SpamAssassin/Plugin/WLBLEval.pm

lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm

lib/Mail/SpamAssassin/PluginHandler.pm

lib/Mail/SpamAssassin/Reporter.pm

lib/Mail/SpamAssassin/SQLBasedAddrList.pm

lib/Mail/SpamAssassin/SpamdForkScaling.pm

lib/Mail/SpamAssassin/Timeout.pm

lib/Mail/SpamAssassin/Util.pm

lib/Mail/SpamAssassin/Util/DependencyInfo.pm

lib/Mail/SpamAssassin/Util/Progress.pm

lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm

lib/Mail/SpamAssassin/Util/ScopedTimer.pm

lib/spamassassin-run.pod

pkgrules/10_default_prefs.cf

pkgrules/20_aux_tlds.cf

pkgrules/20_dnsbl_tests.cf

pkgrules/20_drugs.cf

pkgrules/20_dynrdns.cf

pkgrules/20_fake_helo_tests.cf

pkgrules/20_freemail.cf

pkgrules/20_freemail_domains.cf

pkgrules/20_head_tests.cf

pkgrules/20_html_tests.cf

pkgrules/20_phrases.cf

pkgrules/20_ratware.cf

pkgrules/20_uri_tests.cf

pkgrules/20_vbounce.cf

pkgrules/25_dcc.cf

pkgrules/25_replace.cf

pkgrules/25_spf.cf

pkgrules/25_uribl.cf

pkgrules/30_text_de.cf

pkgrules/30_text_fr.cf

pkgrules/30_text_nl.cf

pkgrules/30_text_pl.cf

pkgrules/30_text_pt_br.cf

pkgrules/50_scores.cf

pkgrules/60_adsp_override_dkim.cf

pkgrules/60_whitelist_dkim.cf

pkgrules/72_active.cf

pkgrules/72_scores.cf

pkgrules/73_sandbox_manual_scores.cf

pkgrules/STATISTICS-set0-72_scores.cf.txt

pkgrules/STATISTICS-set1-72_scores.cf.txt

pkgrules/local.cf

pkgrules/regression_tests.cf

rules/active.list

rules/init.pre

rules/local.cf

rules/regression_tests.cf

sa-awl.raw

sa-check_spamd.raw

sa-compile.raw

sa-learn.raw

sa-update.raw

spamassassin.raw

spamc/libspamc.c

spamc/libspamc.h

spamc/spamc.c

spamc/spamc.pod

spamc/utils.c

spamc/utils.h

spamd/netbsd-rc-script.sh

spamd/spamd.raw

sql/README

sql/README.bayes

sql/awl_mysql.sql

sql/bayes_mysql.sql

sql/userpref_pg.sql

t/SATest.pm

t/bayesbdb.t

t/bayesdbm.t

t/bayesdbm_flock.t

t/bayessdbm.t

t/bayessdbm_seen_delete.t

t/bayessql.t

t/config.dist

t/config_errs.t

t/cross_user_config_leak.t

t/data/01_test_rules.cf

t/dcc.t

t/debug.t

t/dkim.t

t/duplicates.t

t/html_colors.t

t/if_can.t

t/lang_pl_tests.t

t/mimeparse.t

t/mkrules_else.t *

t/priorities.t

t/rcvd_parser.t

t/re_base_extraction.t

t/rule_multiple.t

t/rule_names.t

t/sa_check_spamd.t

t/sa_compile.t

t/spamd_hup.t

t/spamd_maxsize.t

t/spamd_prefork_stress_4.t

t/spamd_protocol_10.t

t/spamd_sql_prefs.t

t/spamd_unix_and_tcp.t

t/spf.t

t/trust_path.t

t/uri_text.t

t/uribl.t

t/uribl_all_types.t

t/uribl_domains_only.t

t/uribl_ips_only.t

t/whitelist_addrs.t

Show diffs side-by-side

added added

removed removed

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/PerMsgStatus.pm

# <@LICENSE>

# Licensed to the Apache Software Foundation (ASF) under one or more

# contributor license agreements. See the NOTICE file distributed with

# this work for additional information regarding copyright ownership.

# The ASF licenses this file to you under the Apache License, Version 2.0

# (the "License"); you may not use this file except in compliance with

# the License. You may obtain a copy of the License at:

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# </@LICENSE>

=head1 NAME

Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)

=head1 SYNOPSIS

my $spamtest = new Mail::SpamAssassin ({

'rules_filename' => '/etc/spamassassin.rules',

'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs'

});

my $mail = $spamtest->parse();

my $status = $spamtest->check ($mail);

my $rewritten_mail;

if ($status->is_spam()) {

$rewritten_mail = $status->rewrite_mail ();

}

...

=head1 DESCRIPTION

The Mail::SpamAssassin C<check()> method returns an object of this

class. This object encapsulates all the per-message state.

=head1 METHODS

=over 4

=cut

package Mail::SpamAssassin::PerMsgStatus;

use strict;

use warnings;

use re 'taint';

use Time::HiRes qw(time);

use Mail::SpamAssassin::Constants qw(:sa);

use Mail::SpamAssassin::AsyncLoop;

use Mail::SpamAssassin::Conf;

use Mail::SpamAssassin::Util qw(untaint_var);

use Mail::SpamAssassin::Util::RegistrarBoundaries;

use Mail::SpamAssassin::Timeout;

use Mail::SpamAssassin::Logger;

use vars qw{

@ISA @TEMPORARY_METHODS %TEMPORARY_EVAL_GLUE_METHODS

};

@ISA = qw();

# methods defined by the compiled ruleset; deleted in finish_tests()

@TEMPORARY_METHODS = ();

# methods defined by register_plugin_eval_glue(); deleted in finish_tests()

%TEMPORARY_EVAL_GLUE_METHODS = ();

###########################################################################

sub new {

my $class = shift;

$class = ref($class) || $class;

my ($main, $msg, $opts) = @_;

my $self = {

'main' => $main,

'msg' => $msg,

'score' => 0,

'test_logs' => '',

'test_names_hit' => [ ],

'subtest_names_hit' => [ ],

'spamd_result_log_items' => [ ],

'tests_already_hit' => { },

'c' => { },

'tag_data' => { },

'rule_errors' => 0,

'disable_auto_learning' => 0,

'auto_learn_status' => undef,

'conf' => $main->{conf},

100

'async' => Mail::SpamAssassin::AsyncLoop->new($main),

101

'master_deadline' => $msg->{master_deadline}, # dflt inherited from msg

102

'deadline_exceeded' => 0, # time limit exceeded, skipping further tests

103

};

104

#$self->{main}->{use_rule_subs} = 1;

105

106

dbg("check: pms new, time limit in %.3f s",

107

$self->{master_deadline} - time) if $self->{master_deadline};

108

109

if (defined $opts && $opts->{disable_auto_learning}) {

110

$self->{disable_auto_learning} = 1;

111

}

112

113

# used with "mass-check --loghits"

114

if ($self->{main}->{save_pattern_hits}) {

115

$self->{save_pattern_hits} = 1;

116

$self->{pattern_hits} = { };

117

}

118

119

delete $self->{should_log_rule_hits};

120

my $dbgcache = would_log('dbg', 'rules');

121

if ($dbgcache || $self->{save_pattern_hits}) {

122

$self->{should_log_rule_hits} = 1;

123

}

124

125

# known valid tags that might not get their entry in pms->{tag_data}

126

# in some circumstances

127

my $tag_data_ref = $self->{tag_data};

128

foreach (qw(SUMMARY REPORT RBL)) { $tag_data_ref->{$_} = '' }

129

foreach (qw(AWL AWLMEAN AWLCOUNT AWLPRESCORE

130

DCCB DCCR DCCREP PYZOR DKIMIDENTITY DKIMDOMAIN

131

BAYESTC BAYESTCLEARNED BAYESTCSPAMMY BAYESTCHAMMY

132

HAMMYTOKENS SPAMMYTOKENS TOKENSUMMARY)) {

133

$tag_data_ref->{$_} = undef; # exist, but undefined

134

}

135

136

bless ($self, $class);

137

$self;

138

}

139

140

###########################################################################

141

142

=item $status->check ()

143

144

Runs the SpamAssassin rules against the message pointed to by the object.

145

146

=cut

147

148

sub check {

149

my ($self) = shift;

150

my $master_deadline = $self->{master_deadline};

151

if (!$master_deadline) {

152

$self->check_timed(@_);

153

} else {

154

my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline });

155

my $err = $t->run(sub { $self->check_timed(@_) });

156

if (time > $master_deadline && !$self->{deadline_exceeded}) {

157

info("check: exceeded time limit in pms check");

158

$self->{deadline_exceeded} = 1;

159

}

160

}

161

}

162

163

sub check_timed {

164

my ($self) = @_;

165

local ($_);

166

167

$self->{learned_points} = 0;

168

$self->{body_only_points} = 0;

169

$self->{head_only_points} = 0;

170

$self->{score} = 0;

171

172

$self->{main}->call_plugins ("check_start", { permsgstatus => $self });

173

174

# in order of slowness; fastest first, slowest last.

175

# we do ALL the tests, even if a spam triggers lots of them early on.

176

# this lets us see ludicrously spammish mails (score: 40) etc., which

177

# we can then immediately submit to spamblocking services.

178

179

# TODO: change this to do whitelist/blacklists first? probably a plan

180

# NOTE: definitely need AWL stuff last, for regression-to-mean of score

181

182

# TVD: we may want to do more than just clearing out the headers, but ...

183

$self->{msg}->delete_header('X-Spam-.*');

184

185

# Resident Mail::SpamAssassin code will possibly never change score

186

# sets, even if bayes becomes available. So we should do a quick check

187

# to see if we should go from {0,1} to {2,3}. We of course don't need

188

# to do this switch if we're already using bayes ... ;)

189

my $set = $self->{conf}->get_score_set();

190

if (($set & 2) == 0 && $self->{main}->{bayes_scanner} && $self->{main}->{bayes_scanner}->is_scan_available()) {

191

dbg("check: scoreset $set but bayes is available, switching scoresets");

192

$self->{conf}->set_score_set ($set|2);

193

}

194

195

# The primary check functionality occurs via a plugin call. For more

196

# information, please see: Mail::SpamAssassin::Plugin::Check

197

if (!$self->{main}->call_plugins ("check_main", { permsgstatus => $self }))

198

{

199

# did anything happen? if not, this is fatal

200

if (!$self->{main}->have_plugin("check_main")) {

201

die "check: no loaded plugin implements 'check_main': cannot scan!\n".

202

"Check the necessary '.pre' files are in the config directory.\n";

203

}

204

}

205

206

# delete temporary storage and memory allocation used during checking

207

$self->delete_fulltext_tmpfile();

208

209

# now that we've finished checking the mail, clear out this cache

210

# to avoid unforeseen side-effects.

211

$self->{c} = { };

212

213

# Round the score to 3 decimal places to avoid rounding issues

214

# We assume required_score to be properly rounded already.

215

# add 0 to force it back to numeric representation instead of string.

216

$self->{score} = (sprintf "%0.3f", $self->{score}) + 0;

217

218

dbg("check: is spam? score=".$self->{score}.

219

" required=".$self->{conf}->{required_score});

220

dbg("check: tests=".$self->get_names_of_tests_hit());

221

dbg("check: subtests=".$self->get_names_of_subtests_hit());

222

$self->{is_spam} = $self->is_spam();

223

224

$self->{main}->{resolver}->bgabort();

225

$self->{main}->call_plugins ("check_end", { permsgstatus => $self });

226

227

228

}

229

230

###########################################################################

231

232

=item $status->learn()

233

234

After a mail message has been checked, this method can be called. If the score

235

is outside a certain range around the threshold, ie. if the message is judged

236

more-or-less definitely spam or definitely non-spam, it will be fed into

237

SpamAssassin's learning systems (currently the naive Bayesian classifier),

238

so that future similar mails will be caught.

239

240

=cut

241

242

sub learn {

243

my ($self) = shift;

244

my $master_deadline = $self->{master_deadline};

245

if (!$master_deadline) {

246

$self->learn_timed(@_);

247

} else {

248

my $t = Mail::SpamAssassin::Timeout->new({ deadline => $master_deadline });

249

my $err = $t->run(sub { $self->learn_timed(@_) });

250

if (time > $master_deadline && !$self->{deadline_exceeded}) {

251

info("learn: exceeded time limit in pms learn");

252

$self->{deadline_exceeded} = 1;

253

}

254

}

255

}

256

257

sub learn_timed {

258

my ($self) = @_;

259

260

if (!$self->{conf}->{bayes_auto_learn} ||

261

!$self->{conf}->{use_bayes} ||

262

$self->{disable_auto_learning})

263

{

264

$self->{auto_learn_status} = "disabled";

265

return;

266

}

267

268

my $isspam = $self->{main}->call_plugins ("autolearn_discriminator", {

269

permsgstatus => $self

270

});

271

272

if (!defined $isspam) {

273

$self->{auto_learn_status} = 'no';

274

return;

275

}

276

277

my $timer = $self->{main}->time_method("learn");

278

279

$self->{main}->call_plugins ("autolearn", {

280

permsgstatus => $self,

281

isspam => $isspam

282

});

283

284

# bug 3704: temporarily override learn's ability to re-learn a message

285

my $orig_learner = $self->{main}->init_learner({ "no_relearn" => 1 });

286

287

my $eval_stat;

288

eval {

289

my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0);

290

if ($learnstatus->did_learn()) {

291

$self->{auto_learn_status} = $isspam ? "spam" : "ham";

292

}

293

# This must wait until the did_learn call.

294

$learnstatus->finish();

295

$self->{main}->finish_learner(); # for now

296

297

if (exists $self->{main}->{bayes_scanner}) {

298

$self->{main}->{bayes_scanner}->force_close();

299

}

300

301

} or do {

302

$eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

303

};

304

305

# reset learner options to their original values

306

$self->{main}->init_learner($orig_learner);

307

308

if (defined $eval_stat) {

309

dbg("learn: auto-learning failed: $eval_stat");

310

$self->{auto_learn_status} = "failed";

311

}

312

}

313

314

=item $score = $status->get_autolearn_points()

315

316

Return the message's score as computed for auto-learning. Certain tests are

317

ignored:

318

319

- rules with tflags set to 'learn' (the Bayesian rules)

320

321

- rules with tflags set to 'userconf' (user white/black-listing rules, etc)

322

323

- rules with tflags set to 'noautolearn'

324

325

Also note that auto-learning occurs using scores from either scoreset 0 or 1,

326

depending on what scoreset is used during message check. It is likely that the

327

message check and auto-learn scores will be different.

328

329

=cut

330

331

sub get_autolearn_points {

332

my ($self) = @_;

333

$self->_get_autolearn_points();

334

return $self->{autolearn_points};

335

}

336

337

=item $score = $status->get_head_only_points()

338

339

Return the message's score as computed for auto-learning, ignoring

340

all rules except for header-based ones.

341

342

=cut

343

344

sub get_head_only_points {

345

my ($self) = @_;

346

$self->_get_autolearn_points();

347

return $self->{head_only_points};

348

}

349

350

=item $score = $status->get_learned_points()

351

352

Return the message's score as computed for auto-learning, ignoring

353

all rules except for learning-based ones.

354

355

=cut

356

357

sub get_learned_points {

358

my ($self) = @_;

359

$self->_get_autolearn_points();

360

return $self->{learned_points};

361

}

362

363

=item $score = $status->get_body_only_points()

364

365

Return the message's score as computed for auto-learning, ignoring

366

all rules except for body-based ones.

367

368

=cut

369

370

sub get_body_only_points {

371

my ($self) = @_;

372

$self->_get_autolearn_points();

373

return $self->{body_only_points};

374

}

375

376

sub _get_autolearn_points {

377

my ($self) = @_;

378

379

return if (exists $self->{autolearn_points});

380

# ensure it only gets computed once, even if we return early

381

$self->{autolearn_points} = 0;

382

383

# This function needs to use use sum($score[scoreset % 2]) not just {score}.

384

# otherwise we shift what we autolearn on and it gets really wierd. - tvd

385

my $orig_scoreset = $self->{conf}->get_score_set();

386

my $new_scoreset = $orig_scoreset;

387

my $scores = $self->{conf}->{scores};

388

389

if (($orig_scoreset & 2) == 0) { # we don't need to recompute

390

dbg("learn: auto-learn: currently using scoreset $orig_scoreset");

391

}

392

else {

393

$new_scoreset = $orig_scoreset & ~2;

394

dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset");

395

$scores = $self->{conf}->{scoreset}->[$new_scoreset];

396

}

397

398

my $tflags = $self->{conf}->{tflags};

399

my $points = 0;

400

401

# Just in case this function is called multiple times, clear out the

402

# previous calculated values

403

$self->{learned_points} = 0;

404

$self->{body_only_points} = 0;

405

$self->{head_only_points} = 0;

406

407

foreach my $test (@{$self->{test_names_hit}}) {

408

# According to the documentation, noautolearn, userconf, and learn

409

# rules are ignored for autolearning.

410

if (exists $tflags->{$test}) {

411

next if $tflags->{$test} =~ /\bnoautolearn\b/;

412

next if $tflags->{$test} =~ /\buserconf\b/;

413

414

# Keep track of the learn points for an additional autolearn check.

415

# Use the original scoreset since it'll be 0 in sets 0 and 1.

416

if ($tflags->{$test} =~ /\blearn\b/) {

417

# we're guaranteed that the score will be defined

418

$self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test};

419

next;

420

}

421

}

422

423

# ignore tests with 0 score in this scoreset

424

next if ($scores->{$test} == 0);

425

426

# Go ahead and add points to the proper locations

427

if (!$self->{conf}->maybe_header_only ($test)) {

428

$self->{body_only_points} += $scores->{$test};

429

}

430

if (!$self->{conf}->maybe_body_only ($test)) {

431

$self->{head_only_points} += $scores->{$test};

432

}

433

434

$points += $scores->{$test};

435

}

436

437

# Figure out the final value we'll use for autolearning

438

$points = (sprintf "%0.3f", $points) + 0;

439

dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points");

440

441

$self->{autolearn_points} = $points;

442

}

443

444

###########################################################################

445

446

=item $isspam = $status->is_spam ()

447

448

After a mail message has been checked, this method can be called. It will

449

return 1 for mail determined likely to be spam, 0 if it does not seem

450

spam-like.

451

452

=cut

453

454

sub is_spam {

455

my ($self) = @_;

456

# changed to test this so sub-tests can ask "is_spam" during a run

457

return ($self->{score} >= $self->{conf}->{required_score});

458

}

459

460

###########################################################################

461

462

=item $list = $status->get_names_of_tests_hit ()

463

464

After a mail message has been checked, this method can be called. It will

465

return a comma-separated string, listing all the symbolic test names

466

of the tests which were trigged by the mail.

467

468

=cut

469

470

sub get_names_of_tests_hit {

471

my ($self) = @_;

472

473

return join(',', sort(@{$self->{test_names_hit}}));

474

}

475

476

###########################################################################

477

478

=item $list = $status->get_names_of_subtests_hit ()

479

480

After a mail message has been checked, this method can be called. It will

481

return a comma-separated string, listing all the symbolic test names of the

482

meta-rule sub-tests which were trigged by the mail. Sub-tests are the

483

normally-hidden rules, which score 0 and have names beginning with two

484

underscores, used in meta rules.

485

486

=cut

487

488

sub get_names_of_subtests_hit {

489

my ($self) = @_;

490

491

return join(',', sort(@{$self->{subtest_names_hit}}));

492

}

493

494

###########################################################################

495

496

=item $num = $status->get_score ()

497

498

After a mail message has been checked, this method can be called. It will

499

return the message's score.

500

501

=cut

502

503

sub get_score {

504

my ($self) = @_;

505

return $self->{score};

506

}

507

508

# left as backward compatibility

509

sub get_hits {

510

my ($self) = @_;

511

return $self->{score};

512

}

513

514

###########################################################################

515

516

=item $num = $status->get_required_score ()

517

518

After a mail message has been checked, this method can be called. It will

519

return the score required for a mail to be considered spam.

520

521

=cut

522

523

sub get_required_score {

524

my ($self) = @_;

525

return $self->{conf}->{required_score};

526

}

527

528

# left as backward compatibility

529

sub get_required_hits {

530

my ($self) = @_;

531

return $self->{conf}->{required_score};

532

}

533

534

###########################################################################

535

536

=item $num = $status->get_autolearn_status ()

537

538

After a mail message has been checked, this method can be called. It will

539

return one of the following strings depending on whether the mail was

540

auto-learned or not: "ham", "no", "spam", "disabled", "failed", "unavailable".

541

542

=cut

543

544

sub get_autolearn_status {

545

my ($self) = @_;

546

return ($self->{auto_learn_status} || "unavailable");

547

}

548

549

###########################################################################

550

551

=item $report = $status->get_report ()

552

553

Deliver a "spam report" on the checked mail message. This contains details of

554

how many spam detection rules it triggered.

555

556

The report is returned as a multi-line string, with the lines separated by

557

C<\n> characters.

558

559

=cut

560

561

sub get_report {

562

my ($self) = @_;

563

564

if (!exists $self->{'report'}) {

565

my $report;

566

567

my $timer = $self->{main}->time_method("get_report");

568

$report = $self->{conf}->{report_template};

569

$report ||= '(no report template found)';

570

571

$report = $self->_replace_tags($report);

572

573

$report =~ s/\n*$/\n\n/s;

574

$self->{report} = $report;

575

}

576

577

return $self->{report};

578

}

579

580

###########################################################################

581

582

=item $preview = $status->get_content_preview ()

583

584

Give a "preview" of the content.

585

586

This is returned as a multi-line string, with the lines separated by C<\n>

587

characters, containing a fully-decoded, safe, plain-text sample of the first

588

few lines of the message body.

589

590

=cut

591

592

sub get_content_preview {

593

my ($self) = @_;

594

595

my $str = '';

596

my $ary = $self->get_decoded_stripped_body_text_array();

597

shift @{$ary}; # drop the subject line

598

599

my $numlines = 3;

600

while (length ($str) < 200 && @{$ary} && $numlines-- > 0) {

601

$str .= shift @{$ary};

602

}

603

undef $ary;

604

chomp ($str); $str .= " [...]\n";

605

606

# in case the last line was huge, trim it back to around 200 chars

607

local $1;

608

$str =~ s/^(.{,200}).*$/$1/gs;

609

610

# now, some tidy-ups that make things look a bit prettier

611

$str =~ s/-----Original Message-----.*$//gs;

612

$str =~ s/This is a multi-part message in MIME format\.//gs;

613

$str =~ s/[-_\*\.]{10,}//gs;

614

$str =~ s/\s+/ /gs;

615

616

# add "Content preview:" ourselves, so that the text aligns

617

# correctly with the template -- then trim it off. We don't

618

# have to get this *exactly* right, but it's nicer if we

619

# make a bit of an effort ;)

620

$str = Mail::SpamAssassin::Util::wrap($str, " ", "Content preview: ", 75, 1);

621

$str =~ s/^Content preview:\s+//gs;

622

623

return $str;

624

}

625

626

###########################################################################

627

628

=item $msg = $status->get_message()

629

630

Return the object representing the message being scanned.

631

632

=cut

633

634

sub get_message {

635

my ($self) = @_;

636

return $self->{msg};

637

}

638

639

###########################################################################

640

641

=item $status->rewrite_mail ()

642

643

Rewrite the mail message. This will at minimum add headers, and at

644

maximum MIME-encapsulate the message text, to reflect its spam or not-spam

645

status. The function will return a scalar of the rewritten message.

646

647

The actual modifications depend on the configuration (see

648

C<Mail::SpamAssassin::Conf> for more information).

649

650

The possible modifications are as follows:

651

652

=over 4

653

654

=item To:, From: and Subject: modification on spam mails

655

656

Depending on the configuration, the To: and From: lines can have a

657

user-defined RFC 2822 comment appended for spam mail. The subject line

658

may have a user-defined string prepended to it for spam mail.

659

660

=item X-Spam-* headers for all mails

661

662

Depending on the configuration, zero or more headers with names

663

beginning with C<X-Spam-> will be added to mail depending on whether

664

it is spam or ham.

665

666

=item spam message with report_safe

667

668

If report_safe is set to true (1), then spam messages are encapsulated

669

into their own message/rfc822 MIME attachment without any modifications

670

being made.

671

672

If report_safe is set to false (0), then the message will only have the

673

above headers added/modified.

674

675

=back

676

677

=cut

678

679

sub rewrite_mail {

680

my ($self) = @_;

681

682

my $timer = $self->{main}->time_method("rewrite_mail");

683

my $msg = $self->{msg}->get_mbox_separator() || '';

684

685

if ($self->{is_spam} && $self->{conf}->{report_safe}) {

686

$msg .= $self->rewrite_report_safe();

687

}

688

else {

689

$msg .= $self->rewrite_no_report_safe();

690

}

691

692

return $msg;

693

}

694

695

# Make the line endings in the passed string reference appropriate

696

# for the original mail. Callers must note bug 5250: don't rewrite

697

# the message body, since that will corrupt 8bit attachments/MIME parts.

698

699

sub _fixup_report_line_endings {

700

my ($self, $strref) = @_;

701

if ($self->{msg}->{line_ending} ne "\n") {

702

$$strref =~ s/\r?\n/$self->{msg}->{line_ending}/gs;

703

}

704

}

705

706

sub _get_added_headers($) {

707

my ($self, $which) = @_;

708

my $str = '';

709

# use string appends to put this back together -- I finally benchmarked it.

710

# join() is 56% of the speed of just using string appends. ;)

711

foreach my $hf_ref (@{$self->{conf}->{$which}}) {

712

my($hfname, $hfbody) = @$hf_ref;

713

my $line = $self->_process_header($hfname,$hfbody);

714

$line = $self->qp_encode_header($line);

715

$str .= "X-Spam-$hfname: $line\n";

716

}

717

return $str;

718

};

719

720

# rewrite the message in report_safe mode

721

# should not be called directly, use rewrite_mail instead

722

723

sub rewrite_report_safe {

724

my ($self) = @_;

725

726

# This is the original message. We do not want to make any modifications so

727

# we may recover it if necessary. It will be put into the new message as a

728

# message/rfc822 MIME part.

729

my $original = $self->{msg}->get_pristine();

730

731

# This is the new message.

732

my $newmsg = '';

733

734

# the report charset

735

my $report_charset = "; charset=iso-8859-1";

736

if ($self->{conf}->{report_charset}) {

737

$report_charset = "; charset=" . $self->{conf}->{report_charset};

738

}

739

740

# the SpamAssassin report

741

my $report = $self->get_report();

742

743

# If there are any wide characters, need to MIME-encode in UTF-8

744

# TODO: If $report_charset is something other than iso-8859-1/us-ascii, then

745

# we could try converting to that charset if possible

746

unless ($] < 5.008 || utf8::downgrade($report, 1)) {

747

$report_charset = "; charset=utf-8";

748

utf8::encode($report);

749

}

750

751

# get original headers, "pristine" if we can do it

752

my $from = $self->{msg}->get_pristine_header("From");

753

my $to = $self->{msg}->get_pristine_header("To");

754

my $cc = $self->{msg}->get_pristine_header("Cc");

755

my $subject = $self->{msg}->get_pristine_header("Subject");

756

my $msgid = $self->{msg}->get_pristine_header('Message-Id');

757

my $date = $self->{msg}->get_pristine_header("Date");

758

759

# It'd be nice to do this with a foreach loop, but with only three

760

# possibilities right now, it's easier not to...

761

762

if (defined $self->{conf}->{rewrite_header}->{Subject}) {

763

$subject = "\n" if !defined $subject;

764

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject});

765

$tag =~ s/\n/ /gs; # strip tag's newlines

766

$subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!?

767

}

768

769

if (defined $self->{conf}->{rewrite_header}->{To}) {

770

$to = "\n" if !defined $to;

771

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To});

772

$tag =~ s/\n/ /gs; # strip tag's newlines

773

$to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;

774

}

775

776

if (defined $self->{conf}->{rewrite_header}->{From}) {

777

$from = "\n" if !defined $from;

778

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From});

779

$tag =~ s/\n+//gs; # strip tag's newlines

780

$from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/;

781

}

782

783

# add report headers to message

784

$newmsg .= "From: $from" if defined $from;

785

$newmsg .= "To: $to" if defined $to;

786

$newmsg .= "Cc: $cc" if defined $cc;

787

$newmsg .= "Subject: $subject" if defined $subject;

788

$newmsg .= "Date: $date" if defined $date;

789

$newmsg .= "Message-Id: $msgid" if defined $msgid;

790

$newmsg .= $self->_get_added_headers('headers_spam');

791

792

if (defined $self->{conf}->{report_safe_copy_headers}) {

793

my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/;

794

795

foreach my $hdr (@{$self->{conf}->{report_safe_copy_headers}}) {

796

next if exists $already_added{lc $hdr};

797

my @hdrtext = $self->{msg}->get_pristine_header($hdr);

798

$already_added{lc $hdr}++;

799

800

if (lc $hdr eq "received") { # add Received at the top ...

801

my $rhdr = "";

802

foreach (@hdrtext) {

803

$rhdr .= "$hdr: $_";

804

}

805

$newmsg = "$rhdr$newmsg";

806

}

807

else {

808

foreach (@hdrtext) {

809

$newmsg .= "$hdr: $_";

810

}

811

}

812

}

813

}

814

815

# jm: add a SpamAssassin Received header to note markup time etc.

816

# emulates the fetchmail style.

817

# tvd: do this after report_safe_copy_headers so Received will be done correctly

818

$newmsg = "Received: from localhost by " .

819

Mail::SpamAssassin::Util::fq_hostname() . "\n" .

820

"\twith SpamAssassin (version " .

821

Mail::SpamAssassin::Version() . ");\n" .

822

"\t" . Mail::SpamAssassin::Util::time_to_rfc822_date() . "\n" .

823

$newmsg;

824

825

# MIME boundary

826

my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2 ** 32)));

827

828

# ensure it's unique, so we can't be attacked this way

829

while ($original =~ /^\Q${boundary}\E(?:--)?$/m) {

830

$boundary .= "/".sprintf("%08X",int(rand(2 ** 32)));

831

}

832

833

# determine whether Content-Disposition should be "attachment" or "inline"

834

my $disposition;

835

my $ct = $self->{msg}->get_header("Content-Type");

836

if (defined $ct && $ct ne '' && $ct !~ m{text/plain}i) {

837

$disposition = "attachment";

838

$report .= $self->_replace_tags($self->{conf}->{unsafe_report_template});

839

# if we wanted to defang the attachment, this would be the place

840

}

841

else {

842

$disposition = "inline";

843

}

844

845

my $type = "message/rfc822";

846

$type = "text/plain" if $self->{conf}->{report_safe} > 1;

847

848

my $description = $self->{conf}->{'encapsulated_content_description'};

849

850

# Note: the message should end in blank line since mbox format wants

851

# blank line at end and messages may be concatenated! In addition, the

852

# x-spam-type parameter is fixed since we will use it later to recognize

853

# original messages that can be extracted.

854

$newmsg .= <<"EOM";

855

MIME-Version: 1.0

856

Content-Type: multipart/mixed; boundary="$boundary"

857

858

This is a multi-part message in MIME format.

859

860

--$boundary

861

Content-Type: text/plain$report_charset

862

Content-Disposition: inline

863

Content-Transfer-Encoding: 8bit

864

865

$report

866

867

--$boundary

868

Content-Type: $type; x-spam-type=original

869

Content-Description: $description

870

Content-Disposition: $disposition

871

Content-Transfer-Encoding: 8bit

872

873

EOM

874

875

my $newmsgtrailer = "\n--$boundary--\n\n";

876

877

# now fix line endings in both headers, report_safe body parts,

878

# and new MIME boundaries and structure

879

$self->_fixup_report_line_endings(\$newmsg);

880

$self->_fixup_report_line_endings(\$newmsgtrailer);

881

$newmsg .= $original.$newmsgtrailer;

882

883

return $newmsg;

884

}

885

886

# rewrite the message in non-report_safe mode (just headers)

887

# should not be called directly, use rewrite_mail instead

888

889

sub rewrite_no_report_safe {

890

my ($self) = @_;

891

892

# put the pristine headers into an array

893

# skip the X-Spam- headers, but allow the X-Spam-Prev headers to remain.

894

# since there may be a missing header/body

895

896

my @pristine_headers = split(/^/m, $self->{msg}->get_pristine_header());

897

for (my $line = 0; $line <= $#pristine_headers; $line++) {

898

next unless ($pristine_headers[$line] =~ /^X-Spam-(?!Prev-)/i);

899

splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|[ \t])/i);

900

$line--;

901

}

902

my $separator = '';

903

if (@pristine_headers && $pristine_headers[$#pristine_headers] =~ /^\s*$/) {

904

$separator = pop @pristine_headers;

905

}

906

907

my $addition = 'headers_ham';

908

909

if($self->{is_spam})

910

{

911

# special-case: Subject lines. ensure one exists, if we're

912

# supposed to mark it up.

913

my $created_subject = 0;

914

my $subject = $self->{msg}->get_pristine_header('Subject');

915

if (!defined($subject) && $self->{is_spam}

916

&& exists $self->{conf}->{rewrite_header}->{'Subject'})

917

{

918

push(@pristine_headers, "Subject: \n");

919

$created_subject = 1;

920

}

921

922

# Deal with header rewriting

923

foreach (@pristine_headers) {

924

# if we're not going to do a rewrite, skip this header!

925

next if (!/^(From|Subject|To):/i);

926

my $hdr = ucfirst(lc($1));

927

next if (!defined $self->{conf}->{rewrite_header}->{$hdr});

928

929

# pop the original version onto the end of the header array

930

if ($created_subject) {

931

push(@pristine_headers, "X-Spam-Prev-Subject: (nonexistent)\n");

932

} else {

933

push(@pristine_headers, "X-Spam-Prev-$_");

934

}

935

936

# Figure out the rewrite piece

937

my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{$hdr});

938

$tag =~ s/\n/ /gs;

939

940

# The tag should be a comment for this header ...

941

$tag = "($tag)" if ($hdr =~ /^(?:From|To)$/);

942

943

local $1;

944

s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i;

945

}

946

947

$addition = 'headers_spam';

948

}

949

950

# Break the pristine header set into two blocks; $new_hdrs_pre is the stuff

951

# that we want to ensure comes before any SpamAssassin markup headers,

952

# like the Return-Path header (see bug 3409).

953

954

# all the rest of the message headers (as left in @pristine_headers), is

955

# to be placed after the SpamAssassin markup hdrs. Once one of those headers

956

# is seen, all further headers go into that set; it's assumed that it's an

957

# old copy of the header, or attempted spoofing, if it crops up halfway

958

# through the headers.

959

960

my $new_hdrs_pre = '';

961

if (@pristine_headers && $pristine_headers[0] =~ /^Return-Path:/i) {

962

$new_hdrs_pre .= shift(@pristine_headers);

963

while (@pristine_headers && $pristine_headers[0] =~ /^[ \t]/) {

964

$new_hdrs_pre .= shift(@pristine_headers);

965

}

966

}

967

$new_hdrs_pre .= $self->_get_added_headers($addition);

968

969

# fix up line endings appropriately

970

my $newmsg = $new_hdrs_pre . join('',@pristine_headers) . $separator;

971

$self->_fixup_report_line_endings(\$newmsg);

972

973

return $newmsg.$self->{msg}->get_pristine_body();

974

}

975

976

sub qp_encode_header {

977

my ($self, $text) = @_;

978

979

# do nothing unless there's an 8-bit char

980

return $text unless ($text =~ /[\x80-\xff]/);

981

982

my $cs = 'ISO-8859-1';

983

if ($self->{report_charset}) {

984

$cs = $self->{report_charset};

985

}

986

987

my @hexchars = split('', '0123456789abcdef');

988

my $ord;

989

local $1;

990

$text =~ s{([\x80-\xff])}{

991

$ord = ord $1;

992

'='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f]

993

}ges;

994

995

$text = '=?'.$cs.'?Q?'.$text.'?=';

996

997

dbg("markup: encoding header in $cs: $text");

998

return $text;

999

}

1000

1001

sub _process_header {

1002

my ($self, $hdr_name, $hdr_data) = @_;

1003

1004

$hdr_data = $self->_replace_tags($hdr_data);

1005

$hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ...

1006

1007

if ($self->{conf}->{fold_headers}) {

1008

if ($hdr_data =~ /\n/) {

1009

$hdr_data =~ s/\s*\n\s*/\n\t/g;

1010

return $hdr_data;

1011

}

1012

else {

1013

# use '!!' instead of ': ' so it doesn't wrap on the space

1014

my $hdr = "X-Spam-$hdr_name!!$hdr_data";

1015

$hdr = Mail::SpamAssassin::Util::wrap($hdr, "\t", "", 79, 0, '(?<=[\s,])');

1016

$hdr =~ s/^\t\n//gm;

1017

return (split (/!!/, $hdr, 2))[1]; # just return the data part

1018

}

1019

}

1020

else {

1021

$hdr_data =~ s/\n/ /g; # Can't have newlines in headers, unless folded

1022

return $hdr_data;

1023

}

1024

}

1025

1026

sub _replace_tags {

1027

my $self = shift;

1028

my $text = shift;

1029

1030

# default to leaving the original string in place, if we cannot find

1031

# a tag for it (bug 4793)

1032

my $t;

1033

my $v;

1034

local($1,$2,$3);

1035

$text =~ s{(_(\w+?)(?:$(.*?)$)?_)}{

1036

my $full = $1;

1037

my $tag = $2;

1038

my $result;

1039

if ($tag =~ /^ADDEDHEADER(?:HAM|SPAM|)\z/) {

1040

# Bug 6278: break infinite recursion through _get_added_headers and

1041

# _get_tag on an attempt to use such tag in add_header template

1042

} else {

1043

$result = $self->_get_tag($tag,$3);

1044

}

1045

defined $result ? $result : $full;

1046

}ge;

1047

1048

return $text;

1049

}

1050

1051

###########################################################################

1052

1053

# public API for plugins

1054

1055

=item $status->set_tag($tagname, $value)

1056

1057

Set a template tag, as used in C<add_header>, report templates, etc. This API

1058

is intended for use by plugins. Tag names will be converted to an

1059

all-uppercase representation internally.

1060

1061

C<$value> can be a subroutine reference, which will be evaluated each time

1062

the template is expanded. Note that perl supports closures, which means

1063

that variables set in the caller's scope can be accessed inside this C<sub>.

1064

For example:

1065

1066

my $text = "hello world!";

1067

$status->set_tag("FOO", sub {

1068

return $text;

1069

});

1070

1071

See C<Mail::SpamAssassin::Conf>'s C<TEMPLATE TAGS> section for more details on

1072

how template tags are used.

1073

1074

C<undef> will be returned if a tag by that name has not been defined.

1075

1076

=cut

1077

1078

sub set_tag {

1079

my $self = shift;

1080

my $tag = uc shift;

1081

my $val = shift;

1082

1083

$self->{tag_data}->{$tag} = $val;

1084

}

1085

1086

# public API for plugins

1087

1088

=item $string = $status->get_tag($tagname)

1089

1090

Get the current value of a template tag, as used in C<add_header>, report

1091

templates, etc. This API is intended for use by plugins. Tag names will be

1092

converted to an all-uppercase representation internally. See

1093

C<Mail::SpamAssassin::Conf>'s C<TEMPLATE TAGS> section for more details on

1094

tags.

1095

1096

C<undef> will be returned if a tag by that name has not been defined.

1097

1098

=cut

1099

1100

sub get_tag {

1101

# expose this previously-private API

1102

return shift->_get_tag(uc shift);

1103

}

1104

1105

###########################################################################

1106

1107

# public API for plugins

1108

1109

=item $status->set_spamd_result_item($subref)

1110

1111

Set an entry for the spamd result log line. C<$subref> should be a code

1112

reference for a subroutine which will return a string in C<'name=VALUE'>

1113

format, similar to the other entries in the spamd result line:

1114

1115

Jul 17 14:10:47 radish spamd[16670]: spamd: result: Y 22 - ALL_NATURAL,

1116

DATE_IN_FUTURE_03_06,DIET_1,DRUGS_ERECTILE,DRUGS_PAIN,

1117

TEST_FORGED_YAHOO_RCVD,TEST_INVALID_DATE,TEST_NOREALNAME,

1118

TEST_NORMAL_HTTP_TO_IP,UNDISC_RECIPS scantime=0.4,size=3138,user=jm,

1119

uid=1000,required_score=5.0,rhost=localhost,raddr=127.0.0.1,

1120

rport=33153,mid=<9PS291LhupY>,autolearn=spam

1121

1122

C<name> and C<VALUE> must not contain C<=> or C<,> characters, as it

1123

is important that these log lines are easy to parse.

1124

1125

The code reference will be called by spamd after the message has been scanned,

1126

and the C<PerMsgStatus::check()> method has returned.

1127

1128

=cut

1129

1130

sub set_spamd_result_item {

1131

my ($self, $ref) = @_;

1132

push @{$self->{spamd_result_log_items}}, $ref;

1133

}

1134

1135

# called by spamd

1136

sub get_spamd_result_log_items {

1137

my ($self) = @_;

1138

my @ret;

1139

foreach my $ref (@{$self->{spamd_result_log_items}}) {

1140

push @ret, &$ref;

1141

}

1142

return @ret;

1143

}

1144

1145

###########################################################################

1146

1147

sub _get_tag_value_for_yesno {

1148

my($self, $arg) = @_;

1149

my($arg_spam, $arg_ham);

1150

($arg_spam, $arg_ham) = split(/,/, $arg, 2) if defined $arg;

1151

return $self->{is_spam} ? (defined $arg_spam ? $arg_spam : 'Yes')

1152

: (defined $arg_ham ? $arg_ham : 'No');

1153

}

1154

1155

sub _get_tag_value_for_score {

1156

#$pad parameter never used. removed.

1157

my ($self) = @_;

1158

1159

my $score = sprintf("%2.1f", $self->{score});

1160

my $rscore = $self->_get_tag_value_for_required_score();

1161

1162

#Change due to bug 6419 to use Util function for consistency with spamd

1163

#and PerMessageStatus

1164

return Mail::SpamAssassin::Util::get_tag_value_for_score($score, $rscore, $self->{is_spam});

1165

}

1166

1167

sub _get_tag_value_for_required_score {

1168

my $self = shift;

1169

return sprintf("%2.1f", $self->{conf}->{required_score});

1170

}

1171

1172

sub _get_tag {

1173

my $self = shift;

1174

my $tag = shift;

1175

my %tags;

1176

1177

# tag data also comes from $self->{tag_data}->{TAG}

1178

1179

$tag = "" unless defined $tag; # can be "0", so use a defined test

1180

1181

%tags = ( YESNO => sub { $self->_get_tag_value_for_yesno(@_) },

1182

1183

YESNOCAPS => sub { uc $self->_get_tag_value_for_yesno(@_) },

1184

1185

SCORE => sub { $self->_get_tag_value_for_score(shift) },

1186

HITS => sub { $self->_get_tag_value_for_score(shift) },

1187

1188

REQD => sub { $self->_get_tag_value_for_required_score() },

1189

1190

VERSION => \&Mail::SpamAssassin::Version,

1191

1192

SUBVERSION => sub { $Mail::SpamAssassin::SUB_VERSION },

1193

1194

HOSTNAME => sub {

1195

$self->{conf}->{report_hostname} ||

1196

Mail::SpamAssassin::Util::fq_hostname();

1197

1198

1199

REMOTEHOSTNAME => sub {

1200

$self->{tag_data}->{'REMOTEHOSTNAME'} || "localhost";

1201

1202

REMOTEHOSTADDR => sub {

1203

$self->{tag_data}->{'REMOTEHOSTADDR'} || "127.0.0.1";

1204

1205

1206

LASTEXTERNALIP => sub {

1207

my $lasthop = $self->{relays_external}->[0];

1208

$lasthop ? $lasthop->{ip} : '';

1209

1210

1211

LASTEXTERNALRDNS => sub {

1212

my $lasthop = $self->{relays_external}->[0];

1213

$lasthop ? $lasthop->{rdns} : '';

1214

1215

1216

LASTEXTERNALHELO => sub {

1217

my $lasthop = $self->{relays_external}->[0];

1218

$lasthop ? $lasthop->{helo} : '';

1219

1220

1221

CONTACTADDRESS => sub { $self->{conf}->{report_contact} },

1222

1223

BAYES => sub {

1224

defined($self->{bayes_score}) ?

1225

sprintf("%3.4f", $self->{bayes_score}) : "0.5"

1226

1227

1228

DATE => \&Mail::SpamAssassin::Util::time_to_rfc822_date,

1229

1230

STARS => sub {

1231

my $arg = (shift || "*");

1232

my $length = int($self->{score});

1233

$length = 50 if $length > 50;

1234

$arg x $length;

1235

1236

1237

AUTOLEARN => sub { $self->get_autolearn_status() },

1238

1239

AUTOLEARNSCORE => sub { $self->get_autolearn_points() },

1240

1241

TESTS => sub {

1242

my $arg = (shift || ',');

1243

join($arg, sort(@{$self->{test_names_hit}})) || "none";

1244

1245

1246

SUBTESTS => sub {

1247

my $arg = (shift || ',');

1248

join($arg, sort(@{$self->{subtest_names_hit}})) || "none";

1249

1250

1251

TESTSSCORES => sub {

1252

my $arg = (shift || ",");

1253

my $line = '';

1254

foreach my $test (sort @{$self->{test_names_hit}}) {

1255

if (!$line) {

1256

$line .= $test . "=" . $self->{conf}->{scores}->{$test};

1257

} else {

1258

$line .= $arg . $test . "=" . $self->{conf}->{scores}->{$test};

1259

}

1260

}

1261

$line ? $line : 'none';

1262

1263

1264

PREVIEW => sub { $self->get_content_preview() },

1265

1266

REPORT => sub { "\n" . ($self->{tag_data}->{REPORT} || "") },

1267

1268

HEADER => sub {

1269

my $hdr = shift || return;

1270

$self->get($hdr,undef);

1271

1272

1273

TIMING => sub { $self->{main}->timer_report() },

1274

1275

ADDEDHEADERHAM => sub { $self->_get_added_headers('headers_ham') },

1276

1277

ADDEDHEADERSPAM=> sub { $self->_get_added_headers('headers_spam') },

1278

1279

ADDEDHEADER => sub {

1280

$self->_get_added_headers(

1281

$self->{is_spam} ? 'headers_spam' : 'headers_ham');

1282

1283

1284

);

1285

1286

my $data;

1287

if (exists $tags{$tag}) {

1288

$data = $tags{$tag};

1289

$data = $data->(@_) if ref $data eq 'CODE';

1290

$data = "" if !defined $data;

1291

} elsif (exists $self->{tag_data}->{$tag}) {

1292

$data = $self->{tag_data}->{$tag};

1293

$data = $data->(@_) if ref $data eq 'CODE';

1294

$data = "" if !defined $data;

1295

}

1296

return $data;

1297

}

1298

1299

###########################################################################

1300

1301

=item $status->finish ()

1302

1303

Indicate that this C<$status> object is finished with, and can be destroyed.

1304

1305

If you are using SpamAssassin in a persistent environment, or checking many

1306

mail messages from one C<Mail::SpamAssassin> factory, this method should be

1307

called to ensure Perl's garbage collection will clean up old status objects.

1308

1309

=cut

1310

1311

sub finish {

1312

my ($self) = @_;

1313

1314

$self->{main}->call_plugins ("per_msg_finish", {

1315

permsgstatus => $self

1316

});

1317

1318

# Delete out all of the members of $self. This will remove any direct

1319

# circular references and let the memory get reclaimed while also being more

1320

# efficient than a foreach() loop over the keys.

1321

%{$self} = ();

1322

}

1323

1324

sub finish_tests {

1325

my ($conf) = @_;

1326

foreach my $method (@TEMPORARY_METHODS) {

1327

if (defined &{$method}) {

1328

undef &{$method};

1329

}

1330

}

1331

@TEMPORARY_METHODS = (); # clear for next time

1332

%TEMPORARY_EVAL_GLUE_METHODS = ();

1333

}

1334

1335

1336

=item $name = $status->get_current_eval_rule_name()

1337

1338

Return the name of the currently-running eval rule. C<undef> is

1339

returned if no eval rule is currently being run. Useful for plugins

1340

to determine the current rule name while inside an eval test function

1341

call.

1342

1343

=cut

1344

1345

sub get_current_eval_rule_name {

1346

my ($self) = @_;

1347

return $self->{current_rule_name};

1348

}

1349

1350

###########################################################################

1351

1352

sub extract_message_metadata {

1353

my ($self) = @_;

1354

1355

my $timer = $self->{main}->time_method("extract_message_metadata");

1356

$self->{msg}->extract_message_metadata($self);

1357

1358

foreach my $item (qw(

1359

relays_trusted relays_trusted_str num_relays_trusted

1360

relays_untrusted relays_untrusted_str num_relays_untrusted

1361

relays_internal relays_internal_str num_relays_internal

1362

relays_external relays_external_str num_relays_external

1363

num_relays_unparseable last_trusted_relay_index

1364

last_internal_relay_index

1365

))

1366

{

1367

$self->{$item} = $self->{msg}->{metadata}->{$item};

1368

}

1369

1370

$self->{tag_data}->{RELAYSTRUSTED} = $self->{relays_trusted_str};

1371

$self->{tag_data}->{RELAYSUNTRUSTED} = $self->{relays_untrusted_str};

1372

$self->{tag_data}->{RELAYSINTERNAL} = $self->{relays_internal_str};

1373

$self->{tag_data}->{RELAYSEXTERNAL} = $self->{relays_external_str};

1374

$self->{tag_data}->{LANGUAGES} = $self->{msg}->get_metadata("X-Languages");

1375

1376

# This should happen before we get called, but just in case.

1377

if (!defined $self->{msg}->{metadata}->{html}) {

1378

$self->get_decoded_stripped_body_text_array();

1379

}

1380

$self->{html} = $self->{msg}->{metadata}->{html};

1381

1382

# allow plugins to add more metadata, read the stuff that's there, etc.

1383

$self->{main}->call_plugins ("parsed_metadata", { permsgstatus => $self });

1384

}

1385

1386

###########################################################################

1387

1388

=item $status->get_decoded_body_text_array ()

1389

1390

Returns the message body, with B<base64> or B<quoted-printable> encodings

1391

decoded, and non-text parts or non-inline attachments stripped.

1392

1393

It is returned as an array of strings, with each string representing

1394

one newline-separated line of the body.

1395

1396

=cut

1397

1398

sub get_decoded_body_text_array {

1399

return $_[0]->{msg}->get_decoded_body_text_array();

1400

}

1401

1402

=item $status->get_decoded_stripped_body_text_array ()

1403

1404

Returns the message body, decoded (as described in

1405

get_decoded_body_text_array()), with HTML rendered, and with whitespace

1406

normalized.

1407

1408

It will always render text/html, and will use a heuristic to determine if other

1409

text/* parts should be considered text/html.

1410

1411

It is returned as an array of strings, with each string representing one

1412

'paragraph'. Paragraphs, in plain-text mails, are double-newline-separated

1413

blocks of multi-line text.

1414

1415

=cut

1416

1417

sub get_decoded_stripped_body_text_array {

1418

return $_[0]->{msg}->get_rendered_body_text_array();

1419

}

1420

1421

###########################################################################

1422

1423

=item $status->get (header_name [, default_value])

1424

1425

Returns a message header, pseudo-header, real name or address.

1426

C<header_name> is the name of a mail header, such as 'Subject', 'To',

1427

etc. If C<default_value> is given, it will be used if the requested

1428

C<header_name> does not exist.

1429

1430

Appending C<:raw> to the header name will inhibit decoding of quoted-printable

1431

or base-64 encoded strings.

1432

1433

Appending C<:addr> to the header name will cause everything except

1434

the first email address to be removed from the header. For example,

1435

all of the following will result in "example@foo":

1436

1437

=over 4

1438

1439

=item example@foo

1440

1441

=item example@foo (Foo Blah)

1442

1443

=item example@foo, example@bar

1444

1445

=item display: example@foo (Foo Blah), example@bar ;

1446

1447

=item Foo Blah <example@foo>

1448

1449

=item "Foo Blah" <example@foo>

1450

1451

=item "'Foo Blah'" <example@foo>

1452

1453

=back

1454

1455

Appending C<:name> to the header name will cause everything except

1456

the first display name to be removed from the header. For example,

1457

all of the following will result in "Foo Blah"

1458

1459

=over 4

1460

1461

=item example@foo (Foo Blah)

1462

1463

=item example@foo (Foo Blah), example@bar

1464

1465

=item display: example@foo (Foo Blah), example@bar ;

1466

1467

=item Foo Blah <example@foo>

1468

1469

=item "Foo Blah" <example@foo>

1470

1471

=item "'Foo Blah'" <example@foo>

1472

1473

=back

1474

1475

There are several special pseudo-headers that can be specified:

1476

1477

=over 4

1478

1479

=item C<ALL> can be used to mean the text of all the message's headers.

1480

1481

=item C<ALL-TRUSTED> can be used to mean the text of all the message's headers

1482

that could only have been added by trusted relays.

1483

1484

=item C<ALL-INTERNAL> can be used to mean the text of all the message's headers

1485

that could only have been added by internal relays.

1486

1487

=item C<ALL-UNTRUSTED> can be used to mean the text of all the message's

1488

headers that may have been added by untrusted relays. To make this

1489

pseudo-header more useful for header rules the 'Received' header that was added

1490

by the last trusted relay is included, even though it can be trusted.

1491

1492

=item C<ALL-EXTERNAL> can be used to mean the text of all the message's headers

1493

that may have been added by external relays. Like C<ALL-UNTRUSTED> the

1494

'Received' header added by the last internal relay is included.

1495

1496

=item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'

1497

headers.

1498

1499

=item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP

1500

transaction that delivered this message, if this data has been made available

1501

by the SMTP server.

1502

1503

=item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;

1504

some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'

1505

or 'X-Message-Id', then uses its own one in the 'Message-Id' header. The value

1506

returned for this symbol is the text from all 3 headers, separated by newlines.

1507

1508

=item C<X-Spam-Relays-Untrusted> is the generated metadata of untrusted relays

1509

the message has passed through

1510

1511

=item C<X-Spam-Relays-Trusted> is the generated metadata of trusted relays

1512

the message has passed through

1513

1514

=back

1515

1516

=cut

1517

1518

# only uses two arguments, ignores $defval

1519

sub _get {

1520

my ($self, $request) = @_;

1521

1522

my $result;

1523

my $getaddr = 0;

1524

my $getname = 0;

1525

my $getraw = 0;

1526

1527

# special queries - process and strip modifiers

1528

if (index($request,':') >= 0) { # triage

1529

local $1;

1530

while ($request =~ s/:([^:]*)//) {

1531

if ($1 eq 'raw') { $getraw = 1 }

1532

elsif ($1 eq 'addr') { $getaddr = $getraw = 1 }

1533

elsif ($1 eq 'name') { $getname = 1 }

1534

}

1535

}

1536

my $request_lc = lc $request;

1537

1538

# ALL: entire pristine or semi-raw headers

1539

if ($request eq 'ALL') {

1540

$result = $getraw ? $self->{msg}->get_pristine_header()

1541

: $self->{msg}->get_all_headers(1);

1542

}

1543

# ALL-TRUSTED: entire trusted raw headers

1544

elsif ($request eq 'ALL-TRUSTED') {

1545

# '+1' since we added the received header even though it's not considered

1546

# trusted, so we know that those headers can be trusted too

1547

return $self->get_all_hdrs_in_rcvd_index_range(

1548

undef, $self->{last_trusted_relay_index}+1);

1549

}

1550

# ALL-INTERNAL: entire internal raw headers

1551

elsif ($request eq 'ALL-INTERNAL') {

1552

# '+1' for the same reason as in ALL-TRUSTED above

1553

return $self->get_all_hdrs_in_rcvd_index_range(

1554

undef, $self->{last_internal_relay_index}+1);

1555

}

1556

# ALL-UNTRUSTED: entire untrusted raw headers

1557

elsif ($request eq 'ALL-UNTRUSTED') {

1558

# '+1' for the same reason as in ALL-TRUSTED above

1559

return $self->get_all_hdrs_in_rcvd_index_range(

1560

$self->{last_trusted_relay_index}+1, undef);

1561

}

1562

# ALL-EXTERNAL: entire external raw headers

1563

elsif ($request eq 'ALL-EXTERNAL') {

1564

# '+1' for the same reason as in ALL-TRUSTED above

1565

return $self->get_all_hdrs_in_rcvd_index_range(

1566

$self->{last_internal_relay_index}+1, undef);

1567

}

1568

# EnvelopeFrom: the SMTP MAIL FROM: address

1569

elsif ($request_lc eq "\LEnvelopeFrom") {

1570

$result = $self->get_envelope_from();

1571

}

1572

# untrusted relays list, as string

1573

elsif ($request_lc eq "\LX-Spam-Relays-Untrusted") {

1574

$result = $self->{relays_untrusted_str};

1575

}

1576

# trusted relays list, as string

1577

elsif ($request_lc eq "\LX-Spam-Relays-Trusted") {

1578

$result = $self->{relays_trusted_str};

1579

}

1580

# external relays list, as string

1581

elsif ($request_lc eq "\LX-Spam-Relays-External") {

1582

$result = $self->{relays_external_str};

1583

}

1584

# internal relays list, as string

1585

elsif ($request_lc eq "\LX-Spam-Relays-Internal") {

1586

$result = $self->{relays_internal_str};

1587

}

1588

# ToCc: the combined recipients list

1589

elsif ($request_lc eq "\LToCc") {

1590

$result = join("\n", $self->{msg}->get_header('To', $getraw));

1591

if ($result ne '') {

1592

chomp $result;

1593

$result .= ", " if $result =~ /\S/;

1594

}

1595

$result .= join("\n", $self->{msg}->get_header('Cc', $getraw));

1596

$result = undef if $result eq '';

1597

}

1598

# MESSAGEID: handle lists which move the real message-id to another

1599

# header for resending.

1600

elsif ($request eq 'MESSAGEID') {

1601

$result = join("\n", grep { defined($_) && $_ ne '' }

1602

$self->{msg}->get_header('X-Message-Id', $getraw),

1603

$self->{msg}->get_header('Resent-Message-Id', $getraw),

1604

$self->{msg}->get_header('X-Original-Message-ID', $getraw),

1605

$self->{msg}->get_header('Message-Id', $getraw));

1606

}

1607

# a conventional header

1608

else {

1609

my @results = $getraw ? $self->{msg}->raw_header($request)

1610

: $self->{msg}->get_header($request);

1611

# dbg("message: get(%s) = %s", $request, join(", ",@results));

1612

if (@results) {

1613

$result = join('', @results);

1614

} else { # metadata

1615

$result = $self->{msg}->get_metadata($request);

1616

}

1617

}

1618

1619

# special queries

1620

if (defined $result && ($getaddr || $getname)) {

1621

local $1;

1622

$result =~ s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'

1623

$result =~ s/\s+/ /g; # reduce whitespace

1624

$result =~ s/^\s+//; # leading whitespace

1625

$result =~ s/\s+$//; # trailing whitespace

1626

1627

if ($getaddr) {

1628

# Get the email address out of the header

1629

# All of these should result in "jm@foo":

1630

# jm@foo

1631

# jm@foo (Foo Blah)

1632

# jm@foo, jm@bar

1633

# display: jm@foo (Foo Blah), jm@bar ;

1634

# Foo Blah <jm@foo>

1635

# "Foo Blah" <jm@foo>

1636

# "'Foo Blah'" <jm@foo>

1637

1638

# strip out the (comments)

1639

$result =~ s/\s*$.*?$//g;

1640

# strip out the "quoted text", unless it's the only thing in the string

1641

if ($result !~ /^".*"$/) {

1642

$result =~ s/(?<!<)"[^"]*"(?!@)//g; #" emacs

1643

}

1644

# Foo Blah <jm@xxx> or <jm@xxx>

1645

local $1;

1646

$result =~ s/^[^"<]*?<(.*?)>.*$/$1/;

1647

# multiple addresses on one line? remove all but first

1648

$result =~ s/,.*$//;

1649

}

1650

elsif ($getname) {

1651

# Get the real name out of the header

1652

# All of these should result in "Foo Blah":

1653

1654

# jm@foo (Foo Blah)

1655

# jm@foo (Foo Blah), jm@bar

1656

# display: jm@foo (Foo Blah), jm@bar ;

1657

# Foo Blah <jm@foo>

1658

# "Foo Blah" <jm@foo>

1659

# "'Foo Blah'" <jm@foo>

1660

1661

local $1;

1662

$result =~ s/^[\'\"]*(.*?)[\'\"]*\s*<.+>\s*$/$1/g

1663

or $result =~ s/^.+\s$(.*?)$\s*$/$1/g; # jm@foo (Foo Blah)

1664

}

1665

}

1666

return $result;

1667

}

1668

1669

# optimized for speed

1670

# $_[0] is self

1671

# $_[1] is request

1672

# $_[2] is defval

1673

sub get {

1674

my $cache = $_[0]->{c};

1675

my $found;

1676

if (exists $cache->{$_[1]}) {

1677

# return cache entry if it is known

1678

# (measured hit/attempts rate on a production mailer is about 47%)

1679

$found = $cache->{$_[1]};

1680

} else {

1681

# fill in a cache entry

1682

$found = _get(@_);

1683

$cache->{$_[1]} = $found;

1684

}

1685

# if the requested header wasn't found, we should return a default value

1686

# as specified by the caller: if defval argument is present it represents

1687

# a default value even if undef; if defval argument is absent a default

1688

# value is an empty string for upwards compatibility

1689

return (defined $found ? $found : @_ > 2 ? $_[2] : '');

1690

}

1691

1692

###########################################################################

1693

1694

# uri parsing from plain text:

1695

# The goals are to find URIs in plain text spam that are intended to be clicked on or copy/pasted, but

1696

# ignore random strings that might look like URIs, for example in uuencoded files, and to ignore

1697

# URIs that spammers might seed in spam in ways not visible or clickable to add work to spam filters.

1698

# When we extract a domain and look it up in an RBL, an FP on decding that the text is a URI is not much

1699

# of a problem, as the only cost is an extra RBL lookup. The same FP is worse if the URI is used in matching rule

1700

# because it could lead to a rule FP, as in bug 5780 with WIERD_PORT matching random uuencoded strings.

1701

# The principles of the following code are 1) if ThunderBird or Outlook Express would linkify a string,

1702

# then we should attempt to parse it as a URI; 2) Where TBird and OE parse differently, choose to do what is most

1703

# likely to find a domain for the RBL tests; 3) If it begins with a scheme or www\d*\. or ftp\. assume that

1704

# it is a URI; 4) If it does not then require that the start of the string looks like a FQDN with a valid TLD;

1705

# 5) Reject strings that after parsing, URLDecoding, and redirection processing don't have a valid TLD

1706

1707

# We get the entire URI that would be linkified before dealing with it, in order to do the right thing

1708

# with URI-encodings and redirecting URIs.

1709

1710

# The delimiters for start of a URI in TBird are @(`{|[\"'<>,\s in OE they are ("<\s

1711

1712

# Tbird allows .,?';-! in a URI but ignores [.,?';-!]* at the end.

1713

# TBird's end delimiters are )`{}|[]"<>\s but ) is only an end delmiter if there is no ( in the URI

1714

# OE only uses space as a delimiter, but ignores [~!@#^&*()_+`-={}|[]:";'<>?,.]* at the end.

1715

1716

# Both TBird and OE decide that a URI is an email address when there is '@' character embedded in it.

1717

# TBird has some additional restrictions on email URIs: They cannot contain non-ASCII characters and their end

1718

# delimiters include ( and '

1719

1720

# bug 4522: ISO2022 format mail, most commonly Japanese SHIFT-JIS, inserts a three character escape sequence ESC ( .

1721

1722

# a hybrid of tbird and oe's version of uri parsing

1723

my $tbirdstartdelim = '><"\'`,{[(|\s' . "\x1b"; # The \x1b as per bug 4522

1724

my $iso2022shift = "\x1b" . '\(.'; # bug 4522

1725

my $tbirdenddelim = '><"`}\]{[|\s' . "\x1b"; # The \x1b as per bug 4522

1726

my $oeignoreatend = '-~!@#^&*()_+=:;\'?,.';

1727

my $nonASCII = '\x80-\xff';

1728

my $tbirdenddelimemail = $tbirdenddelim . '(\'' . $nonASCII; # tbird ignores non-ASCII mail addresses for now, until RFC changes

1729

my $tbirdenddelimplusat = $tbirdenddelimemail . '@';

1730

1731

# valid TLDs

1732

my $tldsRE = $Mail::SpamAssassin::Util::RegistrarBoundaries::VALID_TLDS_RE;

1733

1734

# knownscheme regexp looks for either a https?: or ftp: scheme, or www\d*\. or ftp\. prefix, i.e., likely to start a URL

1735

# schemeless regexp looks for a valid TLD at the end of what may be a FQDN, followed by optional ., optional :portnum, optional /rest_of_uri

1736

my $urischemeless = qr/[a-z\d][a-z\d._-]{0,251}\.${tldsRE}\.?(?::\d{1,5})?(?:\/[^$tbirdenddelim]{1,251})?/io;

1737

my $uriknownscheme = qr/(?:(?:(?:(?:https?)|(?:ftp)):(?:\/\/)?)|(?:(?:www\d{0,2}|ftp)\.))[^$tbirdenddelim]{1,251}/io;

1738

my $urimailscheme = qr/(?:mailto:)?[^$tbirdenddelimplusat]{1,251}@[^$tbirdenddelimemail]{1,251}/io;

1739

my $tbirdurire = qr/(?:\b|(?<=$iso2022shift)|(?<=[$tbirdstartdelim]))

1740

(?:(?:($uriknownscheme)(?=(?:[$tbirdenddelim]|\z))) |

1741

(?:($urimailscheme)(?=(?:[$tbirdenddelimemail]|\z))) |

1742

(?:\b($urischemeless)(?=(?:[$tbirdenddelim]|\z))))/xo;

1743

1744

=item $status->get_uri_list ()

1745

1746

Returns an array of all unique URIs found in the message. It takes

1747

a combination of the URIs found in the rendered (decoded and HTML

1748

stripped) body and the URIs found when parsing the HTML in the message.

1749

Will also set $status->{uri_list} (the array as returned by this function).

1750

1751

The returned array will include the "raw" URI as well as

1752

"slightly cooked" versions. For example, the single URI

1753

'http://%77w%77.example.com/' will get turned into:

1754

( 'http://%77w%77.example.com/', 'http://www.example.com/' )

1755

1756

=cut

1757

1758

sub get_uri_list {

1759

my ($self) = @_;

1760

1761

# use cached answer if available

1762

if (defined $self->{uri_list}) {

1763

return @{$self->{uri_list}};

1764

}

1765

1766

my @uris;

1767

# $self->{redirect_num} = 0;

1768

1769

# get URIs from HTML parsing

1770

while(my($uri, $info) = each %{ $self->get_uri_detail_list() }) {

1771

if ($info->{cleaned}) {

1772

foreach (@{$info->{cleaned}}) {

1773

push(@uris, $_);

1774

1775

# count redirection attempts and log it

1776

# if (my @http = m{\b(https?:/{0,2})}gi) {

1777

# $self->{redirect_num} = $#http if ($#http > $self->{redirect_num});

1778

# }

1779

}

1780

}

1781

}

1782

1783

$self->{uri_list} = \@uris;

1784

1785

return @uris;

1786

}

1787

1788

=item $status->get_uri_detail_list ()

1789

1790

Returns a hash reference of all unique URIs found in the message and

1791

various data about where the URIs were found in the message. It takes a

1792

combination of the URIs found in the rendered (decoded and HTML stripped)

1793

body and the URIs found when parsing the HTML in the message. Will also

1794

set $status->{uri_detail_list} (the hash reference as returned by this

1795

function). This function will also set $status->{uri_domain_count} (count of

1796

unique domains).

1797

1798

The hash format looks something like this:

1799

1800

raw_uri => {

1801

types => { a => 1, img => 1, parsed => 1 },

1802

cleaned => [ canonified_uri ],

1803

anchor_text => [ "click here", "no click here" ],

1804

domains => { domain1 => 1, domain2 => 1 },

1805

}

1806

1807

C<raw_uri> is whatever the URI was in the message itself

1808

(http://spamassassin.apache%2Eorg/).

1809

1810

C<types> is a hash of the HTML tags (lowercase) which referenced

1811

the raw_uri. I<parsed> is a faked type which specifies that the

1812

raw_uri was seen in the rendered text.

1813

1814

C<cleaned> is an array of the raw and canonified version of the raw_uri

1815

(http://spamassassin.apache%2Eorg/, http://spamassassin.apache.org/).

1816

1817

C<anchor_text> is an array of the anchor text (text between <a> and

1818

</a>), if any, which linked to the URI.

1819

1820

C<domains> is a hash of the domains found in the canonified URIs.

1821

1822

=cut

1823

1824

sub get_uri_detail_list {

1825

my ($self) = @_;

1826

1827

# use cached answer if available

1828

if (defined $self->{uri_detail_list}) {

1829

return $self->{uri_detail_list};

1830

}

1831

1832

my $timer = $self->{main}->time_method("get_uri_detail_list");

1833

1834

$self->{uri_domain_count} = 0;

1835

1836

# do this so we're sure metadata->html is setup

1837

my %parsed = map { $_ => 'parsed' } $self->_get_parsed_uri_list();

1838

1839

# Look for the domain in DK/DKIM headers

1840

my $dk = join(" ", grep {defined} ( $self->get('DomainKey-Signature',undef),

1841

$self->get('DKIM-Signature',undef) ));

1842

while ($dk =~ /\bd\s*=\s*([^;]+)/g) {

1843

my $dom = $1;

1844

$dom =~ s/\s+//g;

1845

$parsed{$dom} = 'domainkeys';

1846

}

1847

1848

# get URIs from HTML parsing

1849

# use the metadata version since $self->{html} may not be setup

1850

my $detail = $self->{msg}->{metadata}->{html}->{uri_detail} || { };

1851

$self->{'uri_truncated'} = 1 if $self->{msg}->{metadata}->{html}->{uri_truncated};

1852

1853

# don't keep dereferencing ...

1854

my $redirector_patterns = $self->{conf}->{redirector_patterns};

1855

1856

# canonify the HTML parsed URIs

1857

while(my($uri, $info) = each %{ $detail }) {

1858

my @tmp = Mail::SpamAssassin::Util::uri_list_canonify($redirector_patterns, $uri);

1859

$info->{cleaned} = \@tmp;

1860

1861

foreach (@tmp) {

1862

my $domain = Mail::SpamAssassin::Util::uri_to_domain($_);

1863

if ($domain && !$info->{domains}->{$domain}) {

1864

$info->{domains}->{$domain} = 1;

1865

$self->{uri_domain_count}++;

1866

}

1867

}

1868

1869

if (would_log('dbg', 'uri') == 2) {

1870

dbg("uri: html uri found, $uri");

1871

foreach my $nuri (@tmp) {

1872

dbg("uri: cleaned html uri, $nuri");

1873

}

1874

if ($info->{domains}) {

1875

foreach my $domain (keys %{$info->{domains}}) {

1876

dbg("uri: html domain, $domain");

1877

}

1878

}

1879

}

1880

}

1881

1882

# canonify the text parsed URIs

1883

while (my($uri, $type) = each %parsed) {

1884

$detail->{$uri}->{types}->{$type} = 1;

1885

my $info = $detail->{$uri};

1886

1887

my @uris;

1888

1889

if (!exists $info->{cleaned}) {

1890

if ($type eq 'parsed') {

1891

@uris = Mail::SpamAssassin::Util::uri_list_canonify($redirector_patterns, $uri);

1892

}

1893

else {

1894

@uris = ( $uri );

1895

}

1896

$info->{cleaned} = \@uris;

1897

1898

foreach (@uris) {

1899

my $domain = Mail::SpamAssassin::Util::uri_to_domain($_);

1900

if ($domain && !$info->{domains}->{$domain}) {

1901

$info->{domains}->{$domain} = 1;

1902

$self->{uri_domain_count}++;

1903

}

1904

}

1905

}

1906

1907

if (would_log('dbg', 'uri') == 2) {

1908

dbg("uri: parsed uri found of type $type, $uri");

1909

foreach my $nuri (@uris) {

1910

dbg("uri: cleaned parsed uri, $nuri");

1911

}

1912

if ($info->{domains}) {

1913

foreach my $domain (keys %{$info->{domains}}) {

1914

dbg("uri: parsed domain, $domain");

1915

}

1916

}

1917

}

1918

}

1919

1920

# setup the cache

1921

$self->{uri_detail_list} = $detail;

1922

1923

return $detail;

1924

}

1925

1926

sub _get_parsed_uri_list {

1927

my ($self) = @_;

1928

1929

# use cached answer if available

1930

unless (defined $self->{parsed_uri_list}) {

1931

# TVD: we used to use decoded_body which is fine, except then we'll

1932

# try parsing URLs out of HTML, which is what the HTML code is going

1933

# to do (note: we know the HTML parsing occurs, because we call for the

1934

# rendered text which does HTML parsing...) trying to get URLs out of

1935

# HTML w/out parsing causes issues, so let's not do it.

1936

# also, if we allow $textary to be passed in, we need to invalidate

1937

# the cache first. fyi.

1938

my $textary = $self->get_decoded_stripped_body_text_array();

1939

my $redirector_patterns = $self->{conf}->{redirector_patterns};

1940

1941

my ($rulename, $pat, @uris);

1942

my $text;

1943

1944

for my $entry (@$textary) {

1945

1946

# a workaround for [perl #69973] bug:

1947

# Invalid and tainted utf-8 char crashes perl 5.10.1 in regexp evaluation

1948

# Bug 6225, regexp and string should both be utf8, or none of them;

1949

# untainting string also seems to avoid the crash

1950

1951

# Bug 6225: untaint the string in an attempt to work around a perl crash

1952

local $_ = untaint_var($entry);

1953

1954

local($1,$2,$3);

1955

while (/$tbirdurire/igo) {

1956

my $rawuri = $1||$2||$3;

1957

$rawuri =~ s/(^[^(]*)\).*$/$1/; # as per ThunderBird, ) is an end delimiter if there is no ( preceeding it

1958

$rawuri =~ s/[$oeignoreatend]*$//; # remove trailing string of punctuations that TBird ignores

1959

# skip if there is '..' in the hostname portion of the URI, something we can't catch in the general URI regexp

1960

next if $rawuri =~ /^(?:(?:https?|ftp|mailto):(?:\/\/)?)?[a-z\d.-]*\.\./i;

1961

1962

# If it's a hostname that was just sitting out in the

1963

# open, without a protocol, and not inside of an HTML tag,

1964

# the we should add the proper protocol in front, rather

1965

# than using the base URI.

1966

my $uri = $rawuri;

1967

my $rblonly;

1968

if ($uri !~ /^(?:https?|ftp|mailto|javascript|file):/i) {

1969

if ($uri =~ /^ftp\./i) {

1970

$uri = "ftp://$uri";

1971

}

1972

elsif ($uri =~ /^www\d{0,2}\./i) {

1973

$uri = "http://$uri";

1974

}

1975

elsif ($uri =~ /\@/) {

1976

$uri = "mailto:$uri";

1977

}

1978

else {

1979

# some spammers are using unschemed URIs to escape filters

1980

$rblonly = 1; # flag that this is a URI that MUAs don't linkify so only use for RBLs

1981

$uri = "http://$uri";

1982

}

1983

}

1984

1985

if ($uri =~ /^mailto:/i) {

1986

# skip a mail link that does not have a valid TLD or other than one @ after decoding any URLEncoded characters

1987

$uri = Mail::SpamAssassin::Util::url_encode($uri) if ($uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-f]|7[0-9a-eA-E])/);

1988

next if ($uri !~ /^[^@]+@[^@]+$/);

1989

my $domuri = Mail::SpamAssassin::Util::uri_to_domain($uri);

1990

next unless $domuri;

1991

push (@uris, $rawuri);

1992

push (@uris, $uri) unless ($rawuri eq $uri);

1993

}

1994

1995

next unless ($uri =~/^(?:https?|ftp):/i); # at this point only valid if one or the other of these

1996

1997

my @tmp = Mail::SpamAssassin::Util::uri_list_canonify($redirector_patterns, $uri);

1998

my $goodurifound = 0;

1999

foreach my $cleanuri (@tmp) {

2000

my $domain = Mail::SpamAssassin::Util::uri_to_domain($cleanuri);

2001

if ($domain) {

2002

# bug 5780: Stop after domain to avoid FP, but do that after all deobfuscation of urlencoding and redirection

2003

if ($rblonly) {

2004

local $1;

2005

$cleanuri =~ s/^(https?:\/\/[^:\/]+).*$/$1/i;

2006

}

2007

push (@uris, $cleanuri);

2008

$goodurifound = 1;

2009

}

2010

}

2011

next unless $goodurifound;

2012

push @uris, $rawuri unless $rblonly;

2013

}

2014

}

2015

2016

# Make sure all the URIs are nice and short

2017

foreach my $uri ( @uris ) {

2018

if (length $uri > MAX_URI_LENGTH) {

2019

$self->{'uri_truncated'} = 1;

2020

$uri = substr $uri, 0, MAX_URI_LENGTH;

2021

}

2022

}

2023

2024

# setup the cache and return

2025

$self->{parsed_uri_list} = \@uris;

2026

}

2027

2028

return @{$self->{parsed_uri_list}};

2029

}

2030

2031

###########################################################################

2032

2033

sub ensure_rules_are_complete {

2034

my $self = shift;

2035

my $metarule = shift;

2036

# @_ is now the list of rules

2037

2038

foreach my $r (@_) {

2039

# dbg("rules: meta rule depends on net rule $r");

2040

next if ($self->is_rule_complete($r));

2041

2042

dbg("rules: meta rule $metarule depends on pending rule $r, blocking");

2043

my $timer = $self->{main}->time_method("wait_for_pending_rules");

2044

2045

my $start = time;

2046

$self->harvest_until_rule_completes($r);

2047

my $elapsed = time - $start;

2048

2049

if (!$self->is_rule_complete($r)) {

2050

dbg("rules: rule $r is still not complete; exited early?");

2051

}

2052

elsif ($elapsed > 0) {

2053

info("rules: $r took $elapsed seconds to complete, for $metarule");

2054

}

2055

}

2056

}

2057

2058

###########################################################################

2059

2060

# use a separate sub here, for brevity

2061

# called out of generated eval

2062

sub handle_eval_rule_errors {

2063

my ($self, $rulename) = @_;

2064

warn "rules: failed to run $rulename test, skipping:\n\t($@)\n";

2065

$self->{rule_errors}++;

2066

}

2067

2068

sub register_plugin_eval_glue {

2069

my ($self, $function) = @_;

2070

2071

if (!$function) {

2072

warn "rules: empty function name";

2073

return;

2074

}

2075

2076

# only need to call this once per fn (globally)

2077

return if exists $TEMPORARY_EVAL_GLUE_METHODS{$function};

2078

$TEMPORARY_EVAL_GLUE_METHODS{$function} = undef;

2079

2080

# return if it's not an eval_plugin function

2081

return if (!exists $self->{conf}->{eval_plugins}->{$function});

2082

2083

# return if it's been registered already

2084

return if ($self->can ($function) &&

2085

defined &{'Mail::SpamAssassin::PerMsgStatus::'.$function});

2086

2087

my $evalstr = <<"ENDOFEVAL";

2088

{

2089

package Mail::SpamAssassin::PerMsgStatus;

2090

2091

sub $function {

2092

my (\$self) = shift;

2093

my \$plugin = \$self->{conf}->{eval_plugins}->{$function};

2094

return \$plugin->$function (\$self, \@_);

2095

}

2096

2097

2098

}

2099

ENDOFEVAL

2100

eval $evalstr . '; 1' ## no critic

2101

or do {

2102

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

2103

warn "rules: failed to run header tests, skipping some: $eval_stat\n";

2104

$self->{rule_errors}++;

2105

};

2106

2107

# ensure this method is deleted if finish_tests() is called

2108

push (@TEMPORARY_METHODS, $function);

2109

}

2110

2111

###########################################################################

2112

2113

# note: only eval tests should store state in $self->{test_log_msgs};

2114

# pattern tests do not.

2115

2116

# the clearing of the test state is now inlined as:

2117

2118

# $self->{test_log_msgs} = (); # clear test state

2119

2120

# except for this public API for plugin use:

2121

2122

=item $status->clear_test_state()

2123

2124

Clear test state, including test log messages from C<$status-E<gt>test_log()>.

2125

2126

=cut

2127

2128

sub clear_test_state {

2129

my ($self) = @_;

2130

$self->{test_log_msgs} = ();

2131

}

2132

2133

# internal API, called only by get_hit()

2134

# TODO: refactor and merge this into that function

2135

sub _handle_hit {

2136

my ($self, $rule, $score, $area, $ruletype, $desc) = @_;

2137

2138

$self->{main}->call_plugins ("hit_rule", {

2139

permsgstatus => $self,

2140

rulename => $rule,

2141

ruletype => $ruletype,

2142

score => $score

2143

});

2144

2145

# ignore meta-match sub-rules.

2146

if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; }

2147

2148

# this should not happen; warn about it

2149

if (!defined $score) {

2150

warn "rules: score undef for rule '$rule' in '$area' '$desc'";

2151

return;

2152

}

2153

2154

# this should not happen; warn about NaN (bug 3364)

2155

if ($score != $score) {

2156

warn "rules: score '$score' for rule '$rule' in '$area' '$desc'";

2157

return;

2158

}

2159

2160

# Add the rule hit to the score

2161

$self->{score} += $score;

2162

2163

push(@{$self->{test_names_hit}}, $rule);

2164

$area ||= '';

2165

2166

if ($score >= 10 || $score <= -10) {

2167

$score = sprintf("%4.0f", $score);

2168

}

2169

else {

2170

$score = sprintf("%4.1f", $score);

2171

}

2172

2173

# save both summaries

2174

# TODO: this is slower than necessary, if we only need one

2175

$self->{tag_data}->{REPORT} .= sprintf ("* %s %s %s%s\n%s",

2176

$score, $rule, $area,

2177

$self->_wrap_desc($desc,

2178

4+length($rule)+length($score)+length($area), "* "),

2179

($self->{test_log_msgs}->{TERSE} ?

2180

"* " . $self->{test_log_msgs}->{TERSE} : ''));

2181

2182

$self->{tag_data}->{SUMMARY} .= sprintf ("%s %-22s %s%s\n%s",

2183

$score, $rule, $area,

2184

$self->_wrap_desc($desc,

2185

3+length($rule)+length($score)+length($area), " " x 28),

2186

($self->{test_log_msgs}->{LONG} || ''));

2187

2188

$self->{test_log_msgs} = (); # clear test logs

2189

}

2190

2191

sub _wrap_desc {

2192

my ($self, $desc, $firstlinelength, $prefix) = @_;

2193

2194

my $firstline = " " x $firstlinelength;

2195

my $wrapped = Mail::SpamAssassin::Util::wrap($desc, $prefix, $firstline, 75, 0);

2196

$wrapped =~ s/^\s+//s;

2197

$wrapped;

2198

}

2199

2200

###########################################################################

2201

2202

=item $status->got_hit ($rulename, $desc_prepend [, name => value, ...])

2203

2204

2205

2206

There are two mandatory arguments. These are C<$rulename>, the name of the rule

2207

that fired, and C<$desc_prepend>, which is a short string that will be

2208

prepended to the rules C<describe> string in output reports.

2209

2210

In addition, callers can supplement that with the following optional

2211

data:

2212

2213

=over 4

2214

2215

=item score => $num

2216

2217

Optional: the score to use for the rule hit. If unspecified,

2218

the value from the C<Mail::SpamAssassin::Conf> object's C<{scores}>

2219

hash will be used (a configured score), and in its absence the

2220

C<defscore> option value.

2221

2222

=item defscore => $num

2223

2224

Optional: the score to use for the rule hit if neither the

2225

option C<score> is provided, nor a configured score value is provided.

2226

2227

=item value => $num

2228

2229

Optional: the value to assign to the rule; the default value is C<1>.

2230

I<tflags multiple> rules use values of greater than 1 to indicate

2231

multiple hits. This value is accessible to meta rules.

2232

2233

=item ruletype => $type

2234

2235

Optional, but recommended: the rule type string. This is used in the

2236

C<hit_rule> plugin call, called by this method. If unset, I<'unknown'> is

2237

used.

2238

2239

=item tflags => $string

2240

2241

Optional: a string, i.e. a space-separated list of additional tflags

2242

to be appended to an existing list of flags in $self->{conf}->{tflags},

2243

such as: "nice noautolearn multiple". No syntax checks are performed.

2244

2245

=item description => $string

2246

2247

Optional: a custom rule description string. This is used in the

2248

C<hit_rule> plugin call, called by this method. If unset, the static

2249

description is used.

2250

2251

=back

2252

2253

Backwards compatibility: the two mandatory arguments have been part of this API

2254

since SpamAssassin 2.x. The optional I<name=<gt>value> pairs, however, are a

2255

new addition in SpamAssassin 3.2.0.

2256

2257

=cut

2258

2259

sub got_hit {

2260

my ($self, $rule, $area, %params) = @_;

2261

2262

my $conf_ref = $self->{conf};

2263

2264

my $dynamic_score_provided;

2265

my $score = $params{score};

2266

if (defined $score) { # overrides any configured scores

2267

$dynamic_score_provided = 1;

2268

} else {

2269

$score = $conf_ref->{scores}->{$rule};

2270

$score = $params{defscore} if !defined $score;

2271

}

2272

2273

# adding a hit does nothing if we don't have a score -- we probably

2274

# shouldn't have run it in the first place

2275

return unless $score;

2276

2277

# ensure that rule values always result in an *increase*

2278

# of $self->{tests_already_hit}->{$rule}:

2279

my $value = $params{value};

2280

if (!$value || $value <= 0) { $value = 1 }

2281

2282

my $tflags_ref = $conf_ref->{tflags};

2283

my $tflags_add = $params{tflags};

2284

if (defined $tflags_add && $tflags_add ne '') {

2285

$_ = (!defined $_ || $_ eq '') ? $tflags_add : ($_ . ' ' . $tflags_add)

2286

for $tflags_ref->{$rule};

2287

};

2288

2289

my $already_hit = $self->{tests_already_hit}->{$rule} || 0;

2290

# don't count hits multiple times, unless 'tflags multiple' is on

2291

if ($already_hit && ($tflags_ref->{$rule}||'') !~ /\bmultiple\b/) {

2292

return;

2293

}

2294

2295

$self->{tests_already_hit}->{$rule} = $already_hit + $value;

2296

2297

# default ruletype, if not specified:

2298

$params{ruletype} ||= 'unknown';

2299

2300

if ($dynamic_score_provided) { # copy it to static for proper reporting

2301

$conf_ref->{scoreset}->[$_]->{$rule} = $score for (0..3);

2302

$conf_ref->{scores}->{$rule} = $score;

2303

}

2304

2305

my $rule_descr = $params{description};

2306

if (defined $rule_descr) {

2307

$conf_ref->{descriptions}->{$rule} = $rule_descr; # save dynamic descr.

2308

} else {

2309

$rule_descr = $conf_ref->get_description_for_rule($rule); # static

2310

}

2311

$rule_descr = $rule if !defined $rule_descr || $rule_descr eq '';

2312

$self->_handle_hit($rule,

2313

$score,

2314

$area,

2315

$params{ruletype},

2316

$rule_descr);

2317

2318

# take care of duplicate rules, too (bug 5206)

2319

my $dups = $conf_ref->{duplicate_rules}->{$rule};

2320

if ($dups && @{$dups}) {

2321

foreach my $dup (@{$dups}) {

2322

$self->got_hit($dup, $area, %params);

2323

}

2324

}

2325

2326

return 1;

2327

}

2328

2329

###########################################################################

2330

2331

# TODO: this needs API doc

2332

sub test_log {

2333

my ($self, $msg) = @_;

2334

while ($msg =~ s/^(.{30,48})\s//) {

2335

$self->_test_log_line ($1);

2336

}

2337

$self->_test_log_line ($msg);

2338

}

2339

2340

sub _test_log_line {

2341

my ($self, $msg) = @_;

2342

2343

$self->{test_log_msgs}->{TERSE} .= sprintf ("[%s]\n", $msg);

2344

if (length($msg) > 47) {

2345

$self->{test_log_msgs}->{LONG} .= sprintf ("%78s\n", "[$msg]");

2346

} else {

2347

$self->{test_log_msgs}->{LONG} .= sprintf ("%27s [%s]\n", "", $msg);

2348

}

2349

}

2350

2351

###########################################################################

2352

2353

# helper for get(). Do not call directly, as get() caches its results

2354

# and this does not!

2355

sub get_envelope_from {

2356

my ($self) = @_;

2357

2358

# bug 2142:

2359

# Get the SMTP MAIL FROM:, aka. the "envelope sender", if our

2360

# calling app has helpfully marked up the source message

2361

# with it. Various MTAs and calling apps each have their

2362

# own idea of what header to use for this! see

2363

2364

my $envf;

2365

2366

# Rely on the 'envelope-sender-header' header if the user has configured one.

2367

# Assume that because they have configured it, their MTA will always add it.

2368

# This will prevent us falling through and picking up inappropriate headers.

2369

if (defined $self->{conf}->{envelope_sender_header}) {

2370

# make sure we get the most recent copy - there can be only one EnvelopeSender.

2371

$envf = $self->get($self->{conf}->{envelope_sender_header}.":addr",undef);

2372

# ok if it contains an "@" sign, or is "" (ie. "<>" without the < and >)

2373

goto ok if defined $envf && ($envf =~ /\@/ || $envf =~ /^$/);

2374

# Warn them if it's configured, but not there or not usable.

2375

if (defined $envf) {

2376

chomp $envf;

2377

dbg("message: envelope_sender_header '%s: %s' is not an FQDN, ignoring",

2378

$self->{conf}->{envelope_sender_header}, $envf);

2379

} else {

2380

dbg("message: envelope_sender_header '%s' not found in message",

2381

$self->{conf}->{envelope_sender_header});

2382

}

2383

# Couldn't get envelope-sender using the configured header.

2384

return;

2385

}

2386

2387

# User hasn't given us a header to trust, so try to guess the sender.

2388

2389

# use the "envelope-sender" string found in the Received headers,

2390

# if possible... use the last untrusted header, in case there's

2391

# trusted headers.

2392

my $lasthop = $self->{relays_untrusted}->[0];

2393

if (!defined $lasthop) {

2394

# no untrusted headers? in that case, the message is ALL_TRUSTED.

2395

# use the first trusted header (ie. the oldest, originating one).

2396

$lasthop = $self->{relays_trusted}->[-1];

2397

}

2398

2399

if (defined $lasthop) {

2400

$envf = $lasthop->{envfrom};

2401

# TODO FIXME: Received.pm puts both null senders and absence-of-sender

2402

# into the relays array as '', so we can't distinguish them :(

2403

if ($envf && ($envf =~ /\@/)) {

2404

goto ok;

2405

}

2406

}

2407

2408

# WARNING: a lot of list software adds an X-Sender for the original env-from

2409

# (including Yahoo! Groups). Unfortunately, fetchmail will pick it up and

2410

# reuse it as the env-from for *its* delivery -- even though the list

2411

# software had used a different env-from in the intervening delivery. Hence,

2412

# if this header is present, and there's a fetchmail sig in the Received

2413

# lines, we cannot trust any Envelope-From headers, since they're likely to

2414

# be incorrect fetchmail guesses.

2415

2416

if ($self->get("X-Sender") =~ /\@/) {

2417

my $rcvd = join(' ', $self->get("Received"));

2418

if ($rcvd =~ /\(fetchmail/) {

2419

dbg("message: X-Sender and fetchmail signatures found, cannot trust envelope-from");

2420

return;

2421

}

2422

}

2423

2424

# procmailrc notes this (we now recommend adding it to Received instead)

2425

if ($envf = $self->get("X-Envelope-From")) {

2426

# heuristic: this could have been relayed via a list which then used

2427

# a *new* Envelope-from. check

2428

if ($self->get("ALL:raw") =~ /^Received:.*^X-Envelope-From:/smi) {

2429

dbg("message: X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");

2430

return;

2431

} else {

2432

goto ok;

2433

}

2434

}

2435

2436

# qmail, new-inject(1)

2437

if ($envf = $self->get("Envelope-Sender")) {

2438

# heuristic: this could have been relayed via a list which then used

2439

# a *new* Envelope-from. check

2440

if ($self->get("ALL:raw") =~ /^Received:.*^Envelope-Sender:/smi) {

2441

dbg("message: Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");

2442

} else {

2443

goto ok;

2444

}

2445

}

2446

2447

# Postfix, sendmail, amavisd-new, ...

2448

# RFC 2821 requires it:

2449

# When the delivery SMTP server makes the "final delivery" of a

2450

# message, it inserts a return-path line at the beginning of the mail

2451

# data. This use of return-path is required; mail systems MUST support

2452

# it. The return-path line preserves the information in the <reverse-

2453

# path> from the MAIL command.

2454

if ($envf = $self->get("Return-Path")) {

2455

# heuristic: this could have been relayed via a list which then used

2456

# a *new* Envelope-from. check

2457

if ($self->get("ALL:raw") =~ /^Received:.*^Return-Path:/smi) {

2458

dbg("message: Return-Path header found after 1 or more Received lines, cannot trust envelope-from");

2459

} else {

2460

goto ok;

2461

}

2462

}

2463

2464

# give up.

2465

return;

2466

2467

ok:

2468

$envf =~ s/^<*//gs; # remove <

2469

$envf =~ s/>*\s*$//gs; # remove >, whitespace, newlines

2470

return $envf;

2471

}

2472

2473

###########################################################################

2474

2475

# helper for get(ALL-*). get() caches its results, so don't call this

2476

# directly unless you need a range of headers not covered by the ALL-*

2477

# psuedo-headers!

2478

2479

# Get all the headers found between an index range of received headers, the

2480

# index doesn't care if we could parse the received headers or not.

2481

# Use undef for the $start_rcvd or $end_rcvd numbers to start/end with the

2482

# first/last header in the message, otherwise indicate the index number you

2483

# want to start/end at. Set $include_start_rcvd or $include_end_rcvd to 0 to

2484

# indicate you don't want to include the received header found at the start or

2485

# end indexes... basically toggles between [s,e], [s,e), (s,e], (s,e).

2486

sub get_all_hdrs_in_rcvd_index_range {

2487

my ($self, $start_rcvd, $end_rcvd, $include_start_rcvd, $include_end_rcvd) = @_;

2488

2489

# prevent bad input causing us to return the first header found

2490

return if (defined $end_rcvd && $end_rcvd < 0);

2491

2492

$include_start_rcvd = 1 unless defined $include_start_rcvd;

2493

$include_end_rcvd = 1 unless defined $include_end_rcvd;

2494

2495

my $cur_rcvd_index = -1; # none found yet

2496

my $result = '';

2497

2498

foreach my $hdr (split(/^/m, $self->{msg}->get_pristine_header())) {

2499

if ($hdr =~ /^Received:/i) {

2500

$cur_rcvd_index++;

2501

next if (defined $start_rcvd && !$include_start_rcvd &&

2502

$start_rcvd == $cur_rcvd_index);

2503

last if (defined $end_rcvd && !$include_end_rcvd &&

2504

$end_rcvd == $cur_rcvd_index);

2505

}

2506

if ((!defined $start_rcvd || $start_rcvd <= $cur_rcvd_index) &&

2507

(!defined $end_rcvd || $cur_rcvd_index < $end_rcvd)) {

2508

$result .= $hdr."\n";

2509

}

2510

elsif (defined $end_rcvd && $cur_rcvd_index == $end_rcvd) {

2511

$result .= $hdr."\n";

2512

last;

2513

}

2514

}

2515

return ($result eq '' ? undef : $result);

2516

}

2517

2518

###########################################################################

2519

2520

sub sa_die { Mail::SpamAssassin::sa_die(@_); }

2521

2522

###########################################################################

2523

2524

=item $status->create_fulltext_tmpfile (fulltext_ref)

2525

2526

This function creates a temporary file containing the passed scalar

2527

reference data (typically the full/pristine text of the message).

2528

This is typically used by external programs like pyzor and dccproc, to

2529

avoid hangs due to buffering issues. Methods that need this, should

2530

call $self->create_fulltext_tmpfile($fulltext) to retrieve the temporary

2531

filename; it will be created if it has not already been.

2532

2533

Note: This can only be called once until $status->delete_fulltext_tmpfile() is

2534

called.

2535

2536

=cut

2537

2538

sub create_fulltext_tmpfile {

2539

my ($self, $fulltext) = @_;

2540

2541

if (defined $self->{fulltext_tmpfile}) {

2542

return $self->{fulltext_tmpfile};

2543

}

2544

2545

my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();

2546

print $tmpfh $$fulltext or die "error writing to $tmpf: $!";

2547

close $tmpfh or die "error closing $tmpf: $!";

2548

2549

$self->{fulltext_tmpfile} = $tmpf;

2550

2551

return $self->{fulltext_tmpfile};

2552

}

2553

2554

=item $status->delete_fulltext_tmpfile ()

2555

2556

Will cleanup after a $status->create_fulltext_tmpfile() call. Deletes the

2557

temporary file and uncaches the filename.

2558

2559

=cut

2560

2561

sub delete_fulltext_tmpfile {

2562

my ($self) = @_;

2563

if (defined $self->{fulltext_tmpfile}) {

2564

unlink $self->{fulltext_tmpfile}

2565

or die "cannot unlink ".$self->{fulltext_tmpfile}.": $!";

2566

$self->{fulltext_tmpfile} = undef;

2567

}

2568

}

2569

2570

###########################################################################

2571

2572

sub all_from_addrs {

2573

my ($self) = @_;

2574

2575

if (exists $self->{all_from_addrs}) { return @{$self->{all_from_addrs}}; }

2576

2577

my @addrs;

2578

2579

# Resent- headers take priority, if present. see bug 672

2580

my $resent = $self->get('Resent-From',undef);

2581

if (defined $resent && $resent =~ /\S/) {

2582

@addrs = $self->{main}->find_all_addrs_in_line ($resent);

2583

}

2584

else {

2585

# bug 2292: Used to use find_all_addrs_in_line() with the same

2586

# headers, but the would catch addresses in comments which caused

2587

# FNs for things like whitelist_from. Since all of these are From

2588

# headers, there should only be 1 address in each anyway (not exactly

2589

# true, RFC 2822 allows multiple addresses in a From header field),

2590

# so use the :addr code...

2591

# bug 3366: some addresses come in as 'foo@bar...', which is invalid.

2592

# so deal with the multiple periods.

2593

## no critic

2594

@addrs = map { tr/././s; $_ } grep { $_ ne '' }

2595

($self->get('From:addr'), # std

2596

$self->get('Envelope-Sender:addr'), # qmail: new-inject(1)

2597

$self->get('Resent-Sender:addr'), # procmailrc manpage

2598

$self->get('X-Envelope-From:addr'), # procmailrc manpage

2599

$self->get('EnvelopeFrom:addr')); # SMTP envelope

2600

# http://www.cs.tut.fi/~jkorpela/headers.html is useful here

2601

}

2602

2603

# Remove duplicate addresses

2604

my %addrs = map { $_ => 1 } @addrs;

2605

@addrs = keys %addrs;

2606

2607

dbg("eval: all '*From' addrs: " . join(" ", @addrs));

2608

$self->{all_from_addrs} = \@addrs;

2609

return @addrs;

2610

}

2611

2612

sub all_to_addrs {

2613

my ($self) = @_;

2614

2615

if (exists $self->{all_to_addrs}) { return @{$self->{all_to_addrs}}; }

2616

2617

my @addrs;

2618

2619

# Resent- headers take priority, if present. see bug 672

2620

my $resent = join('', $self->get('Resent-To'), $self->get('Resent-Cc'));

2621

if ($resent =~ /\S/) {

2622

@addrs = $self->{main}->find_all_addrs_in_line($resent);

2623

} else {

2624

# OK, a fetchmail trick: try to find the recipient address from

2625

# the most recent 3 Received lines. This is required for sendmail,

2626

# since it does not add a helpful header like exim, qmail

2627

# or Postfix do.

2628

2629

my $rcvd = $self->get('Received');

2630

$rcvd =~ s/\n[ \t]+/ /gs;

2631

$rcvd =~ s/\n+/\n/gs;

2632

2633

my @rcvdlines = split(/\n/, $rcvd, 4); pop @rcvdlines; # forget last one

2634

my @rcvdaddrs;

2635

foreach my $line (@rcvdlines) {

2636

if ($line =~ / for (\S+\@\S+);/) { push (@rcvdaddrs, $1); }

2637

}

2638

2639

@addrs = $self->{main}->find_all_addrs_in_line (

2640

join('',

2641

join(" ", @rcvdaddrs)."\n",

2642

$self->get('To'), # std

2643

$self->get('Apparently-To'), # sendmail, from envelope

2644

$self->get('Delivered-To'), # Postfix, poss qmail

2645

$self->get('Envelope-Recipients'), # qmail: new-inject(1)

2646

$self->get('Apparently-Resent-To'), # procmailrc manpage

2647

$self->get('X-Envelope-To'), # procmailrc manpage

2648

$self->get('Envelope-To'), # exim

2649

$self->get('X-Delivered-To'), # procmail quick start

2650

$self->get('X-Original-To'), # procmail quick start

2651

$self->get('X-Rcpt-To'), # procmail quick start

2652

$self->get('X-Real-To'), # procmail quick start

2653

$self->get('Cc'))); # std

2654

# those are taken from various sources; thanks to Nancy McGough, who

2655

# noted some in <http://www.ii.com/internet/robots/procmail/qs/#envelope>

2656

}

2657

2658

dbg("eval: all '*To' addrs: " . join(" ", @addrs));

2659

$self->{all_to_addrs} = \@addrs;

2660

return @addrs;

2661

2662

# http://www.cs.tut.fi/~jkorpela/headers.html is useful here, also

2663

# http://www.exim.org/pipermail/exim-users/Week-of-Mon-20001009/021672.html

2664

}

2665

2666

###########################################################################

2667

2668

2669

__END__

2670

2671

=back

2672

2673

=head1 SEE ALSO

2674

2675

C<Mail::SpamAssassin>

2676

C<spamassassin>

2677

Older »