~ubuntu-branches/ubuntu/utopic/spamassassin/utopic-updates

« back to all changes in this revision

Viewing changes to .pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Util.pm

Committer: Package Import Robot
Author(s): Noah Meyerhans
Date: 2014-02-14 22:45:15 UTC
mfrom: (0.8.1) (0.6.2) (5.1.22 sid)
Revision ID: package-import@ubuntu.com-20140214224515-z1es2twos8xh7n2y

Tags: 3.4.0-1

http://bugs.debian.org/738963

http://bugs.debian.org/738872

http://bugs.debian.org/738867

http://bugs.debian.org/738951

http://bugs.debian.org/738974

* New upstream version! (Closes: 738963, 738872, 738867)
* Scrub the environment when switching to the debian-spamd user in
  postinst and cron.daily. (Closes: 738951)
* Enhancements to postinst to better manage ownership of
  /var/lib/spamassassin, via Iain Lane <iain.lane@canonical.com>
  (Closes: 738974)

files added:
.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin/DCC.pm

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Plugin/DNSEval.pm

.pc/98_sa-compile-quiet

.pc/98_sa-compile-quiet/lib

.pc/98_sa-compile-quiet/lib/Mail

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin/Plugin

.pc/98_sa-compile-quiet/lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

META.json

debian/patches/98_sa-compile-quiet

lib/Mail/SpamAssassin/BayesStore/Redis.pm

lib/Mail/SpamAssassin/Plugin/AskDNS.pm

lib/Mail/SpamAssassin/Util/TinyRedis.pm

pkgrules/10_hasbase.cf

pkgrules/20_mailspike.cf

rules/v340.pre

t/autolearn.t

t/autolearn_force.t

t/autolearn_force_fail.t

t/basic_lint_without_sandbox.t

t/data/spam/badmime3.txt

t/dnsbl_subtests.t

files removed:
.pc/50_sa-learn_fix_empty_list_handling

.pc/50_sa-learn_fix_empty_list_handling/sa-learn.raw

.pc/60_bug_684709

.pc/60_bug_684709/lib

.pc/60_bug_684709/lib/Mail

.pc/60_bug_684709/lib/Mail/SpamAssassin

.pc/60_bug_684709/lib/Mail/SpamAssassin/Message.pm

.pc/85_disable_SSLv2

.pc/85_disable_SSLv2/spamc

.pc/85_disable_SSLv2/spamc/libspamc.c

.pc/85_disable_SSLv2/spamc/libspamc.h

.pc/85_disable_SSLv2/spamc/spamc.c

.pc/85_disable_SSLv2/spamc/spamc.pod

.pc/85_disable_SSLv2/spamd

.pc/85_disable_SSLv2/spamd/spamd.raw

.pc/90_missing_tld

.pc/90_missing_tld/lib

.pc/90_missing_tld/lib/Mail

.pc/90_missing_tld/lib/Mail/SpamAssassin

.pc/90_missing_tld/lib/Mail/SpamAssassin/Util

.pc/90_missing_tld/lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm

.pc/90_missing_tld/t

.pc/90_missing_tld/t/uri_text.t

.pc/91_no_rfc_ignorant

.pc/91_no_rfc_ignorant/pkgrules

.pc/91_no_rfc_ignorant/pkgrules/20_dnsbl_tests.cf

.pc/91_no_rfc_ignorant/pkgrules/30_text_de.cf

.pc/91_no_rfc_ignorant/pkgrules/50_scores.cf

.pc/91_no_rfc_ignorant/pkgrules/local.cf

.pc/91_no_rfc_ignorant/rules

.pc/91_no_rfc_ignorant/rules/STATISTICS-set1.txt

.pc/91_no_rfc_ignorant/rules/STATISTICS-set3.txt

.pc/91_no_rfc_ignorant/rules/active.list

.pc/95_bug694504-spamdforkscaling-crash

.pc/95_bug694504-spamdforkscaling-crash/lib

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin/Logger

.pc/95_bug694504-spamdforkscaling-crash/lib/Mail/SpamAssassin/Logger/Syslog.pm

.pc/95_bug694504-spamdforkscaling-crash/spamd

.pc/95_bug694504-spamdforkscaling-crash/spamd/spamd.raw

.pc/96_disable_njabl

.pc/96_disable_njabl/pkgrules

.pc/96_disable_njabl/pkgrules/20_dnsbl_tests.cf

.pc/96_disable_njabl/pkgrules/30_text_de.cf

.pc/96_disable_njabl/pkgrules/30_text_fr.cf

.pc/96_disable_njabl/pkgrules/30_text_nl.cf

.pc/96_disable_njabl/pkgrules/30_text_pl.cf

.pc/96_disable_njabl/pkgrules/50_scores.cf

.pc/98_bug721565-syntax-5.18

.pc/98_bug721565-syntax-5.18/lib

.pc/98_bug721565-syntax-5.18/lib/Mail

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/AsyncLoop.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Conf

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Conf/Parser.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/DnsResolver.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Message.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/PerMsgStatus.pm

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Util.pm

debian/patches/50_sa-learn_fix_empty_list_handling

debian/patches/60_bug_684709

debian/patches/85_disable_SSLv2

debian/patches/90_missing_tld

debian/patches/91_no_rfc_ignorant

debian/patches/95_bug694504-spamdforkscaling-crash

debian/patches/96_disable_njabl

debian/patches/98_bug721565-syntax-5.18

rules/STATISTICS-set0.txt

rules/STATISTICS-set1.txt

rules/STATISTICS-set2.txt

rules/STATISTICS-set3.txt

t/dkim2.t

t/spamd_ssl_v2.t

t/spamd_ssl_v23.t

files modified:
.pc/10_change_config_paths/INSTALL

.pc/10_change_config_paths/README

.pc/10_change_config_paths/UPGRADE

.pc/10_change_config_paths/lib/Mail/SpamAssassin/Conf.pm

.pc/10_change_config_paths/lib/Mail/SpamAssassin/Plugin/Test.pm

.pc/10_change_config_paths/lib/spamassassin-run.pod

.pc/10_change_config_paths/sa-compile.raw

.pc/10_change_config_paths/sa-learn.raw

.pc/10_change_config_paths/spamc/spamc.pod

.pc/10_change_config_paths/spamd/spamd.raw

.pc/10_change_config_paths/sql/README

.pc/20_edit_spamc_pod/spamc/spamc.pod

.pc/30_edit_README/README

.pc/55_disable_nagios_epm/sa-check_spamd.raw

.pc/90_pod_cleanup/lib/Mail/SpamAssassin/Conf.pm

.pc/97_bug720499-pod-5.18/sa-check_spamd.raw

.pc/applied-patches

CREDITS

Changes

INSTALL

MANIFEST

MANIFEST.SKIP

META.yml

Makefile.PL

README

UPGRADE

build/mkrules

build/parse-rules-for-masses

debian/changelog

debian/control

debian/patches/10_change_config_paths

debian/patches/20_edit_spamc_pod

debian/patches/90_pod_cleanup

debian/patches/series

debian/sa-compile.postinst

debian/spamassassin.cron.daily

debian/spamassassin.postinst

debian/tests/daemon *

debian/tests/get_spam_points.py *

lib/Mail/SpamAssassin.pm

lib/Mail/SpamAssassin/AICache.pm

lib/Mail/SpamAssassin/ArchiveIterator.pm

lib/Mail/SpamAssassin/AsyncLoop.pm

lib/Mail/SpamAssassin/AutoWhitelist.pm

lib/Mail/SpamAssassin/Bayes/CombineChi.pm

lib/Mail/SpamAssassin/Bayes/CombineNaiveBayes.pm

lib/Mail/SpamAssassin/BayesStore.pm

lib/Mail/SpamAssassin/BayesStore/BDB.pm

lib/Mail/SpamAssassin/BayesStore/DBM.pm

lib/Mail/SpamAssassin/BayesStore/MySQL.pm

lib/Mail/SpamAssassin/BayesStore/PgSQL.pm

lib/Mail/SpamAssassin/BayesStore/SQL.pm

lib/Mail/SpamAssassin/Client.pm

lib/Mail/SpamAssassin/Conf.pm

lib/Mail/SpamAssassin/Conf/LDAP.pm

lib/Mail/SpamAssassin/Conf/Parser.pm

lib/Mail/SpamAssassin/Conf/SQL.pm

lib/Mail/SpamAssassin/Constants.pm

lib/Mail/SpamAssassin/Dns.pm

lib/Mail/SpamAssassin/DnsResolver.pm

lib/Mail/SpamAssassin/HTML.pm

lib/Mail/SpamAssassin/Locales.pm

lib/Mail/SpamAssassin/Logger.pm

lib/Mail/SpamAssassin/Logger/File.pm

lib/Mail/SpamAssassin/Logger/Syslog.pm

lib/Mail/SpamAssassin/MailingList.pm

lib/Mail/SpamAssassin/Message.pm

lib/Mail/SpamAssassin/Message/Metadata.pm

lib/Mail/SpamAssassin/Message/Metadata/Received.pm

lib/Mail/SpamAssassin/Message/Node.pm

lib/Mail/SpamAssassin/NetSet.pm

lib/Mail/SpamAssassin/PerMsgStatus.pm

lib/Mail/SpamAssassin/Plugin.pm

lib/Mail/SpamAssassin/Plugin/ASN.pm

lib/Mail/SpamAssassin/Plugin/AWL.pm

lib/Mail/SpamAssassin/Plugin/AutoLearnThreshold.pm

lib/Mail/SpamAssassin/Plugin/Bayes.pm

lib/Mail/SpamAssassin/Plugin/BodyEval.pm

lib/Mail/SpamAssassin/Plugin/BodyRuleBaseExtractor.pm

lib/Mail/SpamAssassin/Plugin/Check.pm

lib/Mail/SpamAssassin/Plugin/DCC.pm

lib/Mail/SpamAssassin/Plugin/DKIM.pm

lib/Mail/SpamAssassin/Plugin/DNSEval.pm

lib/Mail/SpamAssassin/Plugin/FreeMail.pm

lib/Mail/SpamAssassin/Plugin/Hashcash.pm

lib/Mail/SpamAssassin/Plugin/HeaderEval.pm

lib/Mail/SpamAssassin/Plugin/ImageInfo.pm

lib/Mail/SpamAssassin/Plugin/MIMEEval.pm

lib/Mail/SpamAssassin/Plugin/PhishTag.pm

lib/Mail/SpamAssassin/Plugin/Pyzor.pm

lib/Mail/SpamAssassin/Plugin/Razor2.pm

lib/Mail/SpamAssassin/Plugin/RelayCountry.pm

lib/Mail/SpamAssassin/Plugin/Reuse.pm

lib/Mail/SpamAssassin/Plugin/SPF.pm

lib/Mail/SpamAssassin/Plugin/Test.pm

lib/Mail/SpamAssassin/Plugin/TextCat.pm

lib/Mail/SpamAssassin/Plugin/URIDNSBL.pm

lib/Mail/SpamAssassin/Plugin/URIEval.pm

lib/Mail/SpamAssassin/Plugin/VBounce.pm

lib/Mail/SpamAssassin/Plugin/WLBLEval.pm

lib/Mail/SpamAssassin/Plugin/WhiteListSubject.pm

lib/Mail/SpamAssassin/PluginHandler.pm

lib/Mail/SpamAssassin/Reporter.pm

lib/Mail/SpamAssassin/SQLBasedAddrList.pm

lib/Mail/SpamAssassin/SpamdForkScaling.pm

lib/Mail/SpamAssassin/Timeout.pm

lib/Mail/SpamAssassin/Util.pm

lib/Mail/SpamAssassin/Util/DependencyInfo.pm

lib/Mail/SpamAssassin/Util/Progress.pm

lib/Mail/SpamAssassin/Util/RegistrarBoundaries.pm

lib/Mail/SpamAssassin/Util/ScopedTimer.pm

lib/spamassassin-run.pod

pkgrules/10_default_prefs.cf

pkgrules/20_aux_tlds.cf

pkgrules/20_dnsbl_tests.cf

pkgrules/20_drugs.cf

pkgrules/20_dynrdns.cf

pkgrules/20_fake_helo_tests.cf

pkgrules/20_freemail.cf

pkgrules/20_freemail_domains.cf

pkgrules/20_head_tests.cf

pkgrules/20_html_tests.cf

pkgrules/20_phrases.cf

pkgrules/20_ratware.cf

pkgrules/20_uri_tests.cf

pkgrules/20_vbounce.cf

pkgrules/25_dcc.cf

pkgrules/25_replace.cf

pkgrules/25_spf.cf

pkgrules/25_uribl.cf

pkgrules/30_text_de.cf

pkgrules/30_text_fr.cf

pkgrules/30_text_nl.cf

pkgrules/30_text_pl.cf

pkgrules/30_text_pt_br.cf

pkgrules/50_scores.cf

pkgrules/60_adsp_override_dkim.cf

pkgrules/60_whitelist_dkim.cf

pkgrules/72_active.cf

pkgrules/72_scores.cf

pkgrules/73_sandbox_manual_scores.cf

pkgrules/STATISTICS-set0-72_scores.cf.txt

pkgrules/STATISTICS-set1-72_scores.cf.txt

pkgrules/local.cf

pkgrules/regression_tests.cf

rules/active.list

rules/init.pre

rules/local.cf

rules/regression_tests.cf

sa-awl.raw

sa-check_spamd.raw

sa-compile.raw

sa-learn.raw

sa-update.raw

spamassassin.raw

spamc/libspamc.c

spamc/libspamc.h

spamc/spamc.c

spamc/spamc.pod

spamc/utils.c

spamc/utils.h

spamd/netbsd-rc-script.sh

spamd/spamd.raw

sql/README

sql/README.bayes

sql/awl_mysql.sql

sql/bayes_mysql.sql

sql/userpref_pg.sql

t/SATest.pm

t/bayesbdb.t

t/bayesdbm.t

t/bayesdbm_flock.t

t/bayessdbm.t

t/bayessdbm_seen_delete.t

t/bayessql.t

t/config.dist

t/config_errs.t

t/cross_user_config_leak.t

t/data/01_test_rules.cf

t/dcc.t

t/debug.t

t/dkim.t

t/duplicates.t

t/html_colors.t

t/if_can.t

t/lang_pl_tests.t

t/mimeparse.t

t/mkrules_else.t *

t/priorities.t

t/rcvd_parser.t

t/re_base_extraction.t

t/rule_multiple.t

t/rule_names.t

t/sa_check_spamd.t

t/sa_compile.t

t/spamd_hup.t

t/spamd_maxsize.t

t/spamd_prefork_stress_4.t

t/spamd_protocol_10.t

t/spamd_sql_prefs.t

t/spamd_unix_and_tcp.t

t/spf.t

t/trust_path.t

t/uri_text.t

t/uribl.t

t/uribl_all_types.t

t/uribl_domains_only.t

t/uribl_ips_only.t

t/whitelist_addrs.t

Show diffs side-by-side

added added

removed removed

.pc/98_bug721565-syntax-5.18/lib/Mail/SpamAssassin/Util.pm

# <@LICENSE>

# Licensed to the Apache Software Foundation (ASF) under one or more

# contributor license agreements. See the NOTICE file distributed with

# this work for additional information regarding copyright ownership.

# The ASF licenses this file to you under the Apache License, Version 2.0

# (the "License"); you may not use this file except in compliance with

# the License. You may obtain a copy of the License at:

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# </@LICENSE>

=head1 NAME

Mail::SpamAssassin::Util - utility functions

=head1 DESCRIPTION

A general class for utility functions. Please use this for functions that

stand alone, without requiring a $self object, Portability functions

especially.

NOTE: The functions in this module are to be considered private. Their API may

change at any point, and it's expected that they'll only be used by other

Mail::SpamAssassin modules. (TODO: we should probably revisit this if

it's useful for plugin development.)

NOTE: Utility functions should not be changing global variables such

as $_, $1, $2, ... $/, etc. unless explicitly documented. If these

variables are in use by these functions, they should be localized.

=over 4

=cut

package Mail::SpamAssassin::Util;

use strict;

use warnings;

use bytes;

use re 'taint';

use Mail::SpamAssassin::Logger;

BEGIN {

use Exporter ();

use vars qw (

@ISA @EXPORT @EXPORT_OK

$AM_TAINTED

);

@ISA = qw(Exporter);

@EXPORT = ();

@EXPORT_OK = qw(&local_tz &base64_decode &untaint_var &untaint_file_path

&exit_status_str &proc_status_ok &am_running_on_windows);

}

use Mail::SpamAssassin;

use Mail::SpamAssassin::Util::RegistrarBoundaries;

use Config;

use IO::Handle;

use File::Spec;

use File::Basename;

use Time::Local;

use Sys::Hostname (); # don't import hostname() into this namespace!

use Fcntl;

use Errno qw(ENOENT EACCES EEXIST);

use POSIX qw(:sys_wait_h WIFEXITED WIFSIGNALED WIFSTOPPED WEXITSTATUS

WTERMSIG WSTOPSIG);

###########################################################################

use constant HAS_MIME_BASE64 => eval { require MIME::Base64; };

use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);

###########################################################################

# find an executable in the current $PATH (or whatever for that platform)

{

# Show the PATH we're going to explore only once.

my $displayed_path = 0;

sub find_executable_in_env_path {

my ($filename) = @_;

clean_path_in_taint_mode();

if ( !$displayed_path++ ) {

dbg("util: current PATH is: ".join($Config{'path_sep'},File::Spec->path()));

}

foreach my $path (File::Spec->path()) {

my $fname = File::Spec->catfile ($path, $filename);

if ( -f $fname ) {

100

if (-x $fname) {

101

dbg("util: executable for $filename was found at $fname");

102

return $fname;

103

}

104

else {

105

dbg("util: $filename was found at $fname, but isn't executable");

106

}

107

}

108

}

109

return undef;

110

}

111

}

112

113

###########################################################################

114

115

# taint mode: delete more unsafe vars for exec, as per perlsec

116

{

117

# We only need to clean the environment once, it stays clean ...

118

my $cleaned_taint_path = 0;

119

120

sub clean_path_in_taint_mode {

121

return if ($cleaned_taint_path++);

122

return unless am_running_in_taint_mode();

123

124

dbg("util: taint mode: deleting unsafe environment variables, resetting PATH");

125

126

if (RUNNING_ON_WINDOWS) {

127

dbg("util: running on Win32, skipping PATH cleaning");

128

return;

129

}

130

131

delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};

132

133

# Go through and clean the PATH out

134

my @path;

135

my @stat;

136

foreach my $dir (File::Spec->path()) {

137

next unless $dir;

138

139

# untaint if at least 1 char and no NL (is the restriction intentional?)

140

local ($1);

141

$dir = untaint_var($1) if $dir =~ /^(.+)$/;

142

# then clean ( 'foo/./bar' -> 'foo/bar', etc. )

143

$dir = File::Spec->canonpath($dir);

144

145

if (!File::Spec->file_name_is_absolute($dir)) {

146

dbg("util: PATH included '$dir', which is not absolute, dropping");

147

next;

148

}

149

elsif (!(@stat=stat($dir))) {

150

dbg("util: PATH included '$dir', which is unusable, dropping: $!");

151

next;

152

}

153

elsif (!-d _) {

154

dbg("util: PATH included '$dir', which isn't a directory, dropping");

155

next;

156

}

157

elsif (($stat[2]&2) != 0) {

158

# World-Writable directories are considered insecure.

159

# We could be more paranoid and check all of the parent directories as well,

160

# but it's good for now.

161

dbg("util: PATH included '$dir', which is world writable, dropping");

162

next;

163

}

164

165

dbg("util: PATH included '$dir', keeping");

166

push(@path, $dir);

167

}

168

169

$ENV{'PATH'} = join($Config{'path_sep'}, @path);

170

dbg("util: final PATH set to: ".$ENV{'PATH'});

171

}

172

}

173

174

# taint mode: are we running in taint mode? 1 for yes, 0 for no.

175

sub am_running_in_taint_mode {

176

return $AM_TAINTED if defined $AM_TAINTED;

177

178

if ($] >= 5.008) {

179

# perl 5.8 and above, ${^TAINT} is a syntax violation in 5.005

180

$AM_TAINTED = eval q(no warnings q(syntax); ${^TAINT});

181

}

182

else {

183

# older versions

184

my $blank;

185

for my $d ((File::Spec->curdir, File::Spec->rootdir, File::Spec->tmpdir)) {

186

opendir(TAINT, $d) || next;

187

$blank = readdir(TAINT);

188

closedir(TAINT) or die "error closing directory $d: $!";

189

last;

190

}

191

if (!(defined $blank && $blank)) {

192

# these are sometimes untainted, so this is less preferable than readdir

193

$blank = join('', values %ENV, $0, @ARGV);

194

}

195

$blank = substr($blank, 0, 0);

196

# seriously mind-bending perl

197

$AM_TAINTED = not eval { eval "1 || $blank" || 1 };

198

}

199

dbg("util: running in taint mode? %s", $AM_TAINTED ? "yes" : "no");

200

return $AM_TAINTED;

201

}

202

203

###########################################################################

204

205

sub am_running_on_windows {

206

return RUNNING_ON_WINDOWS;

207

}

208

209

###########################################################################

210

211

# untaint a path to a file, e.g. "/home/jm/.spamassassin/foo",

212

# "C:\Program Files\SpamAssassin\tmp\foo", "/home/��t/etc".

213

214

# TODO: this does *not* handle locales well. We cannot use "use locale"

215

# and \w, since that will not detaint the data. So instead just allow the

216

# high-bit chars from ISO-8859-1, none of which have special metachar

217

# meanings (as far as I know).

218

219

sub untaint_file_path {

220

my ($path) = @_;

221

222

return unless defined($path);

223

return '' if ($path eq '');

224

225

local ($1);

226

# Barry Jaspan: allow ~ and spaces, good for Windows. Also return ''

227

# if input is '', as it is a safe path.

228

my $chars = '-_A-Za-z\xA0-\xFF0-9\.\%\@\=\+\,\/\\\:';

229

my $re = qr/^\s*([$chars][${chars}~ ]*)$/o;

230

231

if ($path =~ $re) {

232

return untaint_var($1);

233

} else {

234

warn "util: refusing to untaint suspicious path: \"$path\"\n";

235

return $path;

236

}

237

}

238

239

sub untaint_hostname {

240

my ($host) = @_;

241

242

return unless defined($host);

243

return '' if ($host eq '');

244

245

# from RFC 1035, but allowing domains starting with numbers:

246

# $label = q/[A-Za-z\d](?:[A-Za-z\d-]{0,61}[A-Za-z\d])?/;

247

# $domain = qq<$label(?:\.$label)*>;

248

# length($host) <= 255 && $host =~ /^($domain)$/

249

# expanded (no variables in the re) because of a tainting bug in Perl 5.8.0

250

if (length($host) <= 255 && $host =~ /^[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?(?:\.[a-z\d](?:[a-z\d-]{0,61}[a-z\d])?)*$/i) {

251

return untaint_var($host);

252

}

253

else {

254

warn "util: cannot untaint hostname: \"$host\"\n";

255

return $host;

256

}

257

}

258

259

# This sub takes a scalar or a reference to an array, hash, scalar or another

260

# reference and recursively untaints all its values (and keys if it's a

261

# reference to a hash). It should be used with caution as blindly untainting

262

# values subverts the purpose of working in taint mode. It will return the

263

# untainted value if requested but to avoid unnecessary copying, the return

264

# value should be ignored when working on lists.

265

# Bad:

266

# %ENV = untaint_var(\%ENV);

267

# Better:

268

# untaint_var(\%ENV);

269

270

sub untaint_var {

271

no re 'taint'; # override a "use re 'taint'" from outer scope

272

local ($_) = @_;

273

return undef unless defined;

274

275

unless (ref) {

276

local($1); # avoid Perl taint bug: tainted global $1 propagates taintedness

277

/^(.*)\z/s;

278

return $1;

279

}

280

elsif (ref eq 'ARRAY') {

281

$_ = untaint_var($_) for @{$_};

282

return @{$_} if wantarray;

283

}

284

elsif (ref eq 'HASH') {

285

while (my ($k, $v) = each %{$_}) {

286

if (!defined $v && $_ == \%ENV) {

287

delete ${$_}{$k};

288

next;

289

}

290

${$_}{untaint_var($k)} = untaint_var($v);

291

}

292

return %{$_} if wantarray;

293

}

294

elsif (ref eq 'SCALAR' or ref eq 'REF') {

295

${$_} = untaint_var(${$_});

296

}

297

else {

298

warn "util: can't untaint a " . ref($_) . "!\n";

299

}

300

return $_;

301

}

302

303

###########################################################################

304

305

sub taint_var {

306

my ($v) = @_;

307

return $v unless defined $v; # can't taint "undef"

308

309

# $^X is apparently "always tainted".

310

# Concatenating an empty tainted string taints the result.

311

return $v . substr($^X, 0, 0);

312

}

313

314

###########################################################################

315

316

# map process termination status number to an informative string, and

317

# append optional mesage (dual-valued errno or a string or a number),

318

# returning the resulting string

319

320

sub exit_status_str($;$) {

321

my($stat,$errno) = @_;

322

my $str;

323

if (!defined($stat)) {

324

$str = '(no status)';

325

} elsif (am_running_on_windows()) {

326

$str = 'exit (running under Windows, cannot determine exit status)'

327

} elsif (WIFEXITED($stat)) {

328

$str = sprintf("exit %d", WEXITSTATUS($stat));

329

} elsif (WIFSTOPPED($stat)) {

330

$str = sprintf("stopped, signal %d", WSTOPSIG($stat));

331

} else {

332

my $sig = WTERMSIG($stat);

333

$str = sprintf("%s, signal %d (%04x)",

334

$sig == 1 ? 'HANGUP' : $sig == 2 ? 'interrupted' :

335

$sig == 6 ? 'ABORTED' : $sig == 9 ? 'KILLED' :

336

$sig == 15 ? 'TERMINATED' : 'DIED',

337

$sig, $stat);

338

}

339

if (defined $errno) { # deal with dual-valued and plain variables

340

$str .= ', '.$errno if (0+$errno) != 0 || ($errno ne '' && $errno ne '0');

341

}

342

return $str;

343

}

344

345

###########################################################################

346

347

# check errno to be 0 and a process exit status to be in the list of success

348

# status codes, returning true if both are ok, and false otherwise

349

350

sub proc_status_ok($;$@) {

351

my($exit_status,$errno,@success) = @_;

352

my $ok = 0;

353

if ((!defined $errno || $errno == 0) && WIFEXITED($exit_status)) {

354

my $j = WEXITSTATUS($exit_status);

355

if (!@success) { $ok = $j==0 } # empty list implies only status 0 is good

356

elsif (grep {$_ == $j} @success) { $ok = 1 }

357

}

358

return $ok;

359

}

360

361

###########################################################################

362

363

# timezone mappings: in case of conflicts, use RFC 2822, then most

364

# common and least conflicting mapping

365

my %TZ = (

366

# standard

367

'UT' => '+0000',

368

'UTC' => '+0000',

369

# US and Canada

370

'NDT' => '-0230',

371

'AST' => '-0400',

372

'ADT' => '-0300',

373

'NST' => '-0330',

374

'EST' => '-0500',

375

'EDT' => '-0400',

376

'CST' => '-0600',

377

'CDT' => '-0500',

378

'MST' => '-0700',

379

'MDT' => '-0600',

380

'PST' => '-0800',

381

'PDT' => '-0700',

382

'HST' => '-1000',

383

'AKST' => '-0900',

384

'AKDT' => '-0800',

385

'HADT' => '-0900',

386

'HAST' => '-1000',

387

# Europe

388

'GMT' => '+0000',

389

'BST' => '+0100',

390

'IST' => '+0100',

391

'WET' => '+0000',

392

'WEST' => '+0100',

393

'CET' => '+0100',

394

'CEST' => '+0200',

395

'EET' => '+0200',

396

'EEST' => '+0300',

397

'MSK' => '+0300',

398

'MSD' => '+0400',

399

'MET' => '+0100',

400

'MEZ' => '+0100',

401

'MEST' => '+0200',

402

'MESZ' => '+0200',

403

# South America

404

'BRST' => '-0200',

405

'BRT' => '-0300',

406

# Australia

407

'AEST' => '+1000',

408

'AEDT' => '+1100',

409

'ACST' => '+0930',

410

'ACDT' => '+1030',

411

'AWST' => '+0800',

412

# New Zealand

413

'NZST' => '+1200',

414

'NZDT' => '+1300',

415

# Asia

416

'JST' => '+0900',

417

'KST' => '+0900',

418

'HKT' => '+0800',

419

'SGT' => '+0800',

420

'PHT' => '+0800',

421

# Middle East

422

'IDT' => '+0300',

423

);

424

425

# month mappings

426

my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,

427

jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);

428

429

my $LOCALTZ;

430

431

sub local_tz {

432

return $LOCALTZ if defined($LOCALTZ);

433

434

# standard method for determining local timezone

435

my $time = time;

436

my @g = gmtime($time);

437

my @t = localtime($time);

438

my $z = $t[1]-$g[1]+($t[2]-$g[2])*60+($t[7]-$g[7])*1440+($t[5]-$g[5])*525600;

439

$LOCALTZ = sprintf("%+.2d%.2d", $z/60, $z%60);

440

return $LOCALTZ;

441

}

442

443

sub parse_rfc822_date {

444

my ($date) = @_;

445

local ($_); local ($1,$2,$3,$4);

446

my ($yyyy, $mmm, $dd, $hh, $mm, $ss, $mon, $tzoff);

447

448

# make it a bit easier to match

449

$_ = " $date "; s/, */ /gs; s/\s+/ /gs;

450

451

# now match it in parts. Date part first:

452

if (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) / /i) {

453

$dd = $1; $mon = lc($2); $yyyy = $3;

454

} elsif (s/ (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) +(\d+) \d+:\d+:\d+ (\d{4}) / /i) {

455

$dd = $2; $mon = lc($1); $yyyy = $3;

456

} elsif (s/ (\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{2,3}) / /i) {

457

$dd = $1; $mon = lc($2); $yyyy = $3;

458

} else {

459

dbg("util: time cannot be parsed: $date");

460

return undef;

461

}

462

463

# handle two and three digit dates as specified by RFC 2822

464

if (defined $yyyy) {

465

if (length($yyyy) == 2 && $yyyy < 50) {

466

$yyyy += 2000;

467

}

468

elsif (length($yyyy) != 4) {

469

# three digit years and two digit years with values between 50 and 99

470

$yyyy += 1900;

471

}

472

}

473

474

# hh:mm:ss

475

if (s/ (\d?\d):(\d\d)(:(\d\d))? / /) {

476

$hh = $1; $mm = $2; $ss = $4 || 0;

477

}

478

479

# numeric timezones

480

if (s/ ([-+]\d{4}) / /) {

481

$tzoff = $1;

482

}

483

# common timezones

484

elsif (s/\b([A-Z]{2,4}(?:-DST)?)\b/ / && exists $TZ{$1}) {

485

$tzoff = $TZ{$1};

486

}

487

# all other timezones are considered equivalent to "-0000"

488

$tzoff ||= '-0000';

489

490

# months

491

if (exists $MONTH{$mon}) {

492

$mmm = $MONTH{$mon};

493

}

494

495

$hh ||= 0; $mm ||= 0; $ss ||= 0; $dd ||= 0; $mmm ||= 0; $yyyy ||= 0;

496

497

# Fudge invalid times so that we get a usable date.

498

if ($ss > 59) { # rfc2822 does recognize leap seconds, not handled here

499

dbg("util: second after supported range, forcing second to 59: $date");

500

$ss = 59;

501

}

502

503

if ($mm > 59) {

504

dbg("util: minute after supported range, forcing minute to 59: $date");

505

$mm = 59;

506

}

507

508

if ($hh > 23) {

509

dbg("util: hour after supported range, forcing hour to 23: $date");

510

$hh = 23;

511

}

512

513

my $max_dd = 31;

514

if ($mmm == 4 || $mmm == 6 || $mmm == 9 || $mmm == 11) {

515

$max_dd = 30;

516

}

517

elsif ($mmm == 2) {

518

$max_dd = (!($yyyy % 4) && (($yyyy % 100) || !($yyyy % 400))) ? 29 : 28;

519

}

520

if ($dd > $max_dd) {

521

dbg("util: day is too high, incrementing date to next valid date: $date");

522

$dd = 1;

523

$mmm++;

524

if ($mmm > 12) {

525

$mmm = 1;

526

$yyyy++;

527

}

528

}

529

530

# Time::Local (v1.10 at least, also 1.17) throws warnings when dates cause

531

# a signed 32-bit integer overflow. So force a min/max for year.

532

if ($yyyy > 2037) {

533

dbg("util: year after supported range, forcing year to 2037: $date");

534

$yyyy = 2037;

535

}

536

elsif ($yyyy < 1970) {

537

dbg("util: year before supported range, forcing year to 1970: $date");

538

$yyyy = 1970;

539

}

540

541

my $time;

542

eval { # could croak

543

$time = timegm($ss, $mm, $hh, $dd, $mmm-1, $yyyy);

544

545

} or do {

546

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

547

dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss, $eval_stat");

548

return undef;

549

};

550

551

if ($tzoff =~ /([-+])(\d\d)(\d\d)$/) # convert to seconds difference

552

{

553

$tzoff = (($2 * 60) + $3) * 60;

554

if ($1 eq '-') {

555

$time += $tzoff;

556

} elsif ($time < $tzoff) { # careful with year 1970 and '+' time zones

557

$time = 0;

558

} else {

559

$time -= $tzoff;

560

}

561

}

562

563

return $time;

564

}

565

566

sub time_to_rfc822_date {

567

my($time) = @_;

568

569

my @days = qw/Sun Mon Tue Wed Thu Fri Sat/;

570

my @months = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;

571

my @localtime = localtime($time || time);

572

$localtime[5]+=1900;

573

574

sprintf("%s, %02d %s %4d %02d:%02d:%02d %s", $days[$localtime[6]], $localtime[3],

575

$months[$localtime[4]], @localtime[5,2,1,0], local_tz());

576

}

577

578

###########################################################################

579

580

# This used to be a wrapper for Text::Wrap. Now we do basically the same

581

# function as Text::Wrap::wrap(). See bug 5056 and 2165 for more information

582

# about why things aren't using that function anymore.

583

584

# It accepts values for almost all options which can be set

585

# in Text::Wrap. All parameters are optional (leaving away the first one

586

# probably doesn't make too much sense though), either a missing or a false

587

# value will fall back to the default.

588

589

# The parameters are:

590

# 1st: The string to wrap. Only one string is allowed.

591

# (default: "")

592

# 2nd: The prefix to be put in front of all lines except the first one.

593

# (default: "")

594

# 3rd: The prefix for the first line. (default: "")

595

# 4th: The number of columns available (no line will be longer than this

596

# unless overflow is set below). (default: 77)

597

# 5th: Enable or disable overflow mode. (default: 0)

598

# 6th: The sequence/expression to wrap at. (default: '\s');

599

# 7th: The string to join the lines again. (default: "\n")

600

601

sub wrap {

602

my $string = shift || '';

603

my $prefix = shift || '';

604

my $first = shift || '';

605

my $length = shift || 77;

606

my $overflow = shift || 0;

607

my $break = shift || qr/\s/;

608

my $sep = "\n";

609

610

# go ahead and break apart the string, keeping the break chars

611

my @arr = split(/($break)/, $string);

612

613

# tack the first prefix line at the start

614

splice @arr, 0, 0, $first if $first;

615

616

# go ahead and make up the lines in the array

617

my $pos = 0;

618

my $pos_mod = 0;

619

while ($#arr > $pos) {

620

my $len = length $arr[$pos];

621

622

# if we don't want to have lines > $length (overflow==0), we

623

# need to verify what will happen with the next line. if we don't

624

# care if a single line goes longer, don't care about the next

625

# line.

626

# we also want this to be true for the first entry on the line

627

if ($pos_mod != 0 && $overflow == 0) {

628

$len += length $arr[$pos+1];

629

}

630

631

if ($len <= $length) {

632

# if the length determined above is within bounds, go ahead and

633

# merge the next line with the current one

634

$arr[$pos] .= splice @arr, $pos+1, 1;

635

$pos_mod = 1;

636

}

637

else {

638

# ok, the current line is the right length, but there's more text!

639

# prep the current line and then go onto the next one

640

641

# strip any trailing whitespace from the next line that's ready

642

$arr[$pos] =~ s/\s+$//;

643

644

# go to the next line and reset pos_mod

645

$pos++;

646

$pos_mod = 0;

647

648

# put the appropriate prefix at the front of the line

649

splice @arr, $pos, 0, $prefix;

650

}

651

}

652

653

# go ahead and return the wrapped text, with the separator in between

654

return join($sep, @arr);

655

}

656

657

###########################################################################

658

659

# Some base64 decoders will remove intermediate "=" characters, others

660

# will stop decoding on the first "=" character, this one translates "="

661

# characters to null.

662

sub base64_decode {

663

local $_ = shift;

664

my $decoded_length = shift;

665

666

s/\s+//g;

667

if (HAS_MIME_BASE64 && (length($_) % 4 == 0) &&

668

m|^(?:[A-Za-z0-9+/=]{2,}={0,2})$|s)

669

{

670

# only use MIME::Base64 when the XS and Perl are both correct and quiet

671

s/(=+)(?!=*$)/'A' x length($1)/ge;

672

673

# If only a certain number of bytes are requested, truncate the encoded

674

# version down to the appropriate size and return the requested bytes

675

if (defined $decoded_length) {

676

$_ = substr $_, 0, 4 * (int($decoded_length/3) + 1);

677

my $decoded = MIME::Base64::decode_base64($_);

678

return substr $decoded, 0, $decoded_length;

679

}

680

681

# otherwise, just decode the whole thing and return it

682

return MIME::Base64::decode_base64($_);

683

}

684

tr{A-Za-z0-9+/=}{}cd; # remove non-base64 characters

685

s/=+$//; # remove terminating padding

686

tr{A-Za-z0-9+/=}{ -_`}; # translate to uuencode

687

s/.$// if (length($_) % 4 == 1); # unpack cannot cope with extra byte

688

689

my $length;

690

my $out = '';

691

while ($_) {

692

$length = (length >= 84) ? 84 : length;

693

$out .= unpack("u", chr(32 + $length * 3/4) . substr($_, 0, $length, ''));

694

last if (defined $decoded_length && length $out >= $decoded_length);

695

}

696

697

# If only a certain number of bytes are requested, truncate the encoded

698

# version down to the appropriate size and return the requested bytes

699

if (defined $decoded_length) {

700

return substr $out, 0, $decoded_length;

701

}

702

703

return $out;

704

}

705

706

sub qp_decode {

707

local $_ = shift;

708

709

s/\=\r?\n//gs;

710

s/\=([0-9a-fA-F]{2})/chr(hex($1))/ge;

711

return $_;

712

}

713

714

sub base64_encode {

715

local $_ = shift;

716

717

if (HAS_MIME_BASE64) {

718

return MIME::Base64::encode_base64($_);

719

}

720

721

$_ = pack("u57", $_);

722

s/^.//mg;

723

tr| -_`|A-Za-z0-9+/A|; # -> #`# <- kluge against vim syntax issues

724

s/(A+)$/'=' x length $1/e;

725

return $_;

726

}

727

728

###########################################################################

729

730

sub portable_getpwuid {

731

if (defined &Mail::SpamAssassin::Util::_getpwuid_wrapper) {

732

return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);

733

}

734

735

my $sts;

736

if (!RUNNING_ON_WINDOWS) {

737

$sts = eval ' sub _getpwuid_wrapper { getpwuid($_[0]); }; 1 ';

738

} else {

739

dbg("util: defining getpwuid() wrapper using 'unknown' as username");

740

$sts = eval ' sub _getpwuid_wrapper { _fake_getpwuid($_[0]); }; 1 ';

741

}

742

if (!$sts) {

743

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

744

warn "util: failed to define getpwuid() wrapper: $eval_stat\n";

745

} else {

746

return Mail::SpamAssassin::Util::_getpwuid_wrapper(@_);

747

}

748

}

749

750

sub _fake_getpwuid {

751

return (

752

'unknown', # name,

753

'x', # passwd,

754

$_[0], # uid,

755

0, # gid,

756

'', # quota,

757

'', # comment,

758

'', # gcos,

759

'/', # dir,

760

'', # shell,

761

'', # expire

762

);

763

}

764

765

###########################################################################

766

767

# Given a string, extract an IPv4 address from it. Required, since

768

# we currently have no way to portably unmarshal an IPv4 address from

769

# an IPv6 one without kludging elsewhere.

770

771

sub extract_ipv4_addr_from_string {

772

my ($str) = @_;

773

774

return unless defined($str);

775

776

if ($str =~ /\b(

777

(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.

778

(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.

779

(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.

780

(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)

781

)\b/ix)

782

{

783

if (defined $1) { return $1; }

784

}

785

786

# ignore native IPv6 addresses; currently we have no way to deal with

787

# these if we could extract them, as the DNSBLs don't provide a way

788

# to query them! TODO, eventually, once IPv6 spam starts to appear ;)

789

return;

790

}

791

792

###########################################################################

793

794

{

795

my($hostname, $fq_hostname);

796

797

# get the current host's unqalified domain name (better: return whatever

798

# Sys::Hostname thinks our hostname is, might also be a full qualified one)

799

sub hostname {

800

return $hostname if defined($hostname);

801

802

# Sys::Hostname isn't taint safe and might fall back to `hostname`. So we've

803

# got to clean PATH before we may call it.

804

clean_path_in_taint_mode();

805

$hostname = Sys::Hostname::hostname();

806

$hostname =~ s/[()]//gs; # bug 5929

807

return $hostname;

808

}

809

810

# get the current host's fully-qualified domain name, if possible. If

811

# not possible, return the unqualified hostname.

812

sub fq_hostname {

813

return $fq_hostname if defined($fq_hostname);

814

815

$fq_hostname = hostname();

816

if ($fq_hostname !~ /\./) { # hostname doesn't contain a dot, so it can't be a FQDN

817

my @names = grep(/^\Q${fq_hostname}.\E/o, # grep only FQDNs

818

map { split } (gethostbyname($fq_hostname))[0 .. 1] # from all aliases

819

);

820

$fq_hostname = $names[0] if (@names); # take the first FQDN, if any

821

$fq_hostname =~ s/[()]//gs; # bug 5929

822

}

823

824

return $fq_hostname;

825

}

826

}

827

828

###########################################################################

829

830

sub ips_match_in_16_mask {

831

my ($ipset1, $ipset2) = @_;

832

my ($b1, $b2);

833

834

foreach my $ip1 (@{$ipset1}) {

835

foreach my $ip2 (@{$ipset2}) {

836

next unless defined $ip1;

837

next unless defined $ip2;

838

next unless ($ip1 =~ /^(\d+\.\d+\.)/); $b1 = $1;

839

next unless ($ip2 =~ /^(\d+\.\d+\.)/); $b2 = $1;

840

if ($b1 eq $b2) { return 1; }

841

}

842

}

843

844

return 0;

845

}

846

847

sub ips_match_in_24_mask {

848

my ($ipset1, $ipset2) = @_;

849

my ($b1, $b2);

850

851

foreach my $ip1 (@{$ipset1}) {

852

foreach my $ip2 (@{$ipset2}) {

853

next unless defined $ip1;

854

next unless defined $ip2;

855

next unless ($ip1 =~ /^(\d+\.\d+\.\d+\.)/); $b1 = $1;

856

next unless ($ip2 =~ /^(\d+\.\d+\.\d+\.)/); $b2 = $1;

857

if ($b1 eq $b2) { return 1; }

858

}

859

}

860

861

return 0;

862

}

863

864

###########################################################################

865

866

sub my_inet_aton { unpack("N", pack("C4", split(/\./, $_[0]))) }

867

868

###########################################################################

869

870

sub parse_content_type {

871

# This routine is typically called by passing a

872

# get_header("content-type") which passes all content-type headers

873

# (array context). If there are multiple Content-type headers (invalid,

874

# but it happens), MUAs seem to take the last one and so that's what we

875

# should do here.

876

877

my $ct = $_[-1] || 'text/plain; charset=us-ascii';

878

879

# This could be made a bit more rigid ...

880

# the actual ABNF, BTW (RFC 1521, section 7.2.1):

881

# boundary := 0*69<bchars> bcharsnospace

882

# bchars := bcharsnospace / " "

883

# bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"

884

# / "," / "-" / "." / "/" / ":" / "=" / "?"

885

886

# The boundary may be surrounded by double quotes.

887

# "the boundary parameter, which consists of 1 to 70 characters from

888

# a set of characters known to be very robust through email gateways,

889

# and NOT ending with white space. (If a boundary appears to end with

890

# white space, the white space must be presumed to have been added by

891

# a gateway, and must be deleted.)"

892

893

# In practice:

894

# - MUAs accept whitespace before and after the "=" character

895

# - only an opening double quote seems to be needed

896

# - non-quoted boundaries should be followed by space, ";", or end of line

897

# - blank boundaries seem to not work

898

899

my($boundary) = $ct =~ m!\bboundary\s*=\s*("[^"]+|[^\s";]+(?=[\s;]|$))!i;

900

901

# remove double-quotes in boundary (should only be at start and end)

902

903

$boundary =~ tr/"//d if defined $boundary;

904

905

# Parse out the charset and name, if they exist.

906

907

my($charset) = $ct =~ /\bcharset\s*=\s*["']?(.*?)["']?(?:;|$)/i;

908

my($name) = $ct =~ /\b(?:file)?name\s*=\s*["']?(.*?)["']?(?:;|$)/i;

909

910

# Get the actual MIME type out ...

911

# Note: the header content may not be whitespace unfolded, so make sure the

912

# REs do /s when appropriate.

913

# correct:

914

# Content-type: text/plain; charset=us-ascii

915

# missing a semi-colon, CT shouldn't have whitespace anyway:

916

# Content-type: text/plain charset=us-ascii

917

918

$ct =~ s/^\s+//; # strip leading whitespace

919

$ct =~ s/;.*$//s; # strip everything after first ';'

920

$ct =~ s@^([^/]+(?:/[^/\s]*)?).*$@$1@s; # only something/something ...

921

$ct = lc $ct;

922

923

# bug 4298: If at this point we don't have a content-type, assume text/plain;

924

# also, bug 5399: if the content-type *starts* with "text", and isn't in a

925

# list of known bad/non-plain formats, do likewise.

926

if (!$ct ||

927

($ct =~ /^text\b/ && $ct !~ /^text\/(?:x-vcard|calendar|html)$/))

928

{

929

$ct = "text/plain";

930

}

931

932

# strip inappropriate chars (bug 5399: after the text/plain fixup)

933

$ct =~ tr/\000-\040\177-\377\042\050\051\054\056\072-\077\100\133-\135//d;

934

935

# Now that the header has been parsed, return the requested information.

936

# In scalar context, just the MIME type, in array context the

937

# four important data parts (type, boundary, charset, and filename).

938

939

return wantarray ? ($ct,$boundary,$charset,$name) : $ct;

940

}

941

942

###########################################################################

943

944

sub url_encode {

945

my ($url) = @_;

946

my (@characters) = split(/(\%[0-9a-fA-F]{2})/, $url);

947

my (@unencoded);

948

my (@encoded);

949

950

foreach (@characters) {

951

# escaped character set ...

952

if (/\%[0-9a-fA-F]{2}/) {

953

# IF it is in the range of 0x00-0x20 or 0x7f-0xff

954

# or it is one of "<", ">", """, "#", "%",

955

# ";", "/", "?", ":", "@", "=" or "&"

956

# THEN preserve its encoding

957

unless (/(20|7f|[0189a-fA-F][0-9a-fA-F])/i) {

958

s/\%([2-7][0-9a-fA-F])/sprintf "%c", hex($1)/e;

959

push(@unencoded, $_);

960

}

961

}

962

# other stuff

963

else {

964

# 0x00-0x20, 0x7f-0xff, ", %, <, >

965

s/([\000-\040\177-\377\042\045\074\076])

966

/push(@encoded, $1) && sprintf "%%%02x", unpack("C",$1)/egx;

967

}

968

}

969

if (wantarray) {

970

return(join("", @characters), join("", @unencoded), join("", @encoded));

971

}

972

else {

973

return join("", @characters);

974

}

975

}

976

977

###########################################################################

978

979

=item $module = first_available_module (@module_list)

980

981

Return the name of the first module that can be successfully loaded with

982

C<require> from the list. Returns C<undef> if none are available.

983

984

This is used instead of C<AnyDBM_File> as follows:

985

986

my $module = Mail::SpamAssassin::Util::first_available_module

987

(qw(DB_File GDBM_File NDBM_File SDBM_File));

988

tie %hash, $module, $path, [... args];

989

990

Note that C<SDBM_File> is guaranteed to be present, since it comes

991

with Perl.

992

993

=cut

994

995

sub first_available_module {

996

my (@packages) = @_;

997

foreach my $mod (@packages) {

998

if (eval 'require '.$mod.'; 1; ') {

999

return $mod;

1000

}

1001

}

1002

undef;

1003

}

1004

1005

###########################################################################

1006

1007

=item my ($filepath, $filehandle) = secure_tmpfile();

1008

1009

Generates a filename for a temporary file, opens it exclusively and

1010

securely, and returns a filehandle to the open file (opened O_RDWR).

1011

1012

If it cannot open a file after 20 tries, it returns C<undef>.

1013

1014

=cut

1015

1016

# thanks to http://www2.picante.com:81/~gtaylor/autobuse/ for this code

1017

sub secure_tmpfile {

1018

my $tmpdir = untaint_file_path($ENV{'TMPDIR'} || File::Spec->tmpdir());

1019

1020

if (!$tmpdir) {

1021

# Note: we would prefer to keep this fatal, as not being able to

1022

# find a writable tmpdir is a big deal for the calling code too.

1023

# That would be quite a psychotic case, also.

1024

warn "util: cannot find a temporary directory, set TMP or TMPDIR in environment";

1025

return;

1026

}

1027

1028

my ($reportfile, $tmpfile);

1029

my $umask = umask 077;

1030

1031

for (my $retries = 20; $retries > 0; $retries--) {

1032

# we do not rely on the obscurity of this name for security,

1033

# we use a average-quality PRG since this is all we need

1034

my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,

1035

rand 62, rand 62, rand 62]);

1036

$reportfile = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");

1037

1038

# instead, we require O_EXCL|O_CREAT to guarantee us proper

1039

# ownership of our file, read the open(2) man page

1040

if (sysopen($tmpfile, $reportfile, O_RDWR|O_CREAT|O_EXCL, 0600)) {

1041

binmode $tmpfile or die "cannot set $reportfile to binmode: $!";

1042

last;

1043

}

1044

1045

if ($!{EEXIST}) {

1046

# it is acceptable if $tmpfile already exists, try another

1047

next;

1048

}

1049

1050

# error, maybe "out of quota" or "too many open files" (bug 4017)

1051

warn "util: secure_tmpfile failed to create file '$reportfile': $!\n";

1052

1053

# ensure the file handle is not semi-open in some way

1054

if ($tmpfile) {

1055

close $tmpfile or info("error closing $reportfile: $!");

1056

}

1057

}

1058

1059

umask $umask;

1060

1061

if (!$tmpfile) {

1062

warn "util: secure_tmpfile failed to create file, giving up";

1063

return; # undef

1064

}

1065

1066

return ($reportfile, $tmpfile);

1067

}

1068

1069

=item my ($dirpath) = secure_tmpdir();

1070

1071

Generates a directory for temporary files. Creates it securely and

1072

returns the path to the directory.

1073

1074

If it cannot create a directory after 20 tries, it returns C<undef>.

1075

1076

=cut

1077

1078

# stolen from secure_tmpfile()

1079

sub secure_tmpdir {

1080

my $tmpdir = untaint_file_path(File::Spec->tmpdir());

1081

1082

if (!$tmpdir) {

1083

# Note: we would prefer to keep this fatal, as not being able to

1084

# find a writable tmpdir is a big deal for the calling code too.

1085

# That would be quite a psychotic case, also.

1086

warn "util: cannot find a temporary directory, set TMP or TMPDIR in environment";

1087

return;

1088

}

1089

1090

my ($reportpath, $tmppath);

1091

my $umask = umask 077;

1092

1093

for (my $retries = 20; $retries > 0; $retries--) {

1094

# we do not rely on the obscurity of this name for security,

1095

# we use a average-quality PRG since this is all we need

1096

my $suffix = join('', (0..9,'A'..'Z','a'..'z')[rand 62, rand 62, rand 62,

1097

rand 62, rand 62, rand 62]);

1098

$reportpath = File::Spec->catfile($tmpdir,".spamassassin${$}${suffix}tmp");

1099

1100

# instead, we require O_EXCL|O_CREAT to guarantee us proper

1101

# ownership of our file, read the open(2) man page

1102

if (mkdir $reportpath, 0700) {

1103

$tmppath = $reportpath;

1104

last;

1105

}

1106

1107

if ($!{EEXIST}) {

1108

# it is acceptable if $reportpath already exists, try another

1109

next;

1110

}

1111

1112

# error, maybe "out of quota" or "too many open files" (bug 4017)

1113

warn "util: secure_tmpdir failed to create file '$reportpath': $!\n";

1114

}

1115

1116

umask $umask;

1117

1118

warn "util: secure_tmpdir failed to create a directory, giving up" if (!$tmppath);

1119

1120

return $tmppath;

1121

}

1122

1123

1124

###########################################################################

1125

1126

sub uri_to_domain {

1127

my ($uri) = @_;

1128

1129

# Javascript is not going to help us, so return.

1130

return if ($uri =~ /^javascript:/i);

1131

1132

$uri =~ s,#.*$,,gs; # drop fragment

1133

$uri =~ s#^[a-z]+:/{0,2}##gsi; # drop the protocol

1134

$uri =~ s,^[^/]*\@,,gs; # username/passwd

1135

1136

# strip path and CGI params. note: bug 4213 shows that "&" should

1137

# *not* be likewise stripped here -- it's permitted in hostnames by

1138

# some common MUAs!

1139

$uri =~ s,[/\?].*$,,gs;

1140

1141

$uri =~ s,:\d*$,,gs; # port, bug 4191: sometimes the # is missing

1142

1143

# skip undecoded URIs if the encoded bits shouldn't be.

1144

# we'll see the decoded version as well. see url_encode()

1145

return if $uri =~ /\%(?:2[1-9a-fA-F]|[3-6][0-9a-fA-F]|7[0-9a-eA-E])/;

1146

1147

# keep IPs intact

1148

if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {

1149

# get rid of hostname part of domain, understanding delegation

1150

$uri = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);

1151

1152

# ignore invalid domains

1153

return unless

1154

(Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));

1155

}

1156

1157

# $uri is now the domain only

1158

return lc $uri;

1159

}

1160

1161

sub uri_list_canonify {

1162

my($redirector_patterns, @uris) = @_;

1163

1164

# make sure we catch bad encoding tricks

1165

my @nuris;

1166

for my $uri (@uris) {

1167

# we're interested in http:// and so on, skip mailto: and

1168

# email addresses with no protocol

1169

next if $uri =~ /^mailto:/i || $uri =~ /^[^:]*\@/;

1170

1171

# sometimes we catch URLs on multiple lines

1172

$uri =~ s/\n//g;

1173

1174

# URLs won't have leading/trailing whitespace

1175

$uri =~ s/^\s+//;

1176

$uri =~ s/\s+$//;

1177

1178

# CRs just confuse things down below, so trash them now

1179

$uri =~ s/\r//g;

1180

1181

# Make a copy so we don't trash the original in the array

1182

my $nuri = $uri;

1183

1184

# bug 4390: certain MUAs treat back slashes as front slashes.

1185

# since backslashes are supposed to be encoded in a URI, swap non-encoded

1186

# ones with front slashes.

1187

$nuri =~ tr@\\@/@;

1188

1189

# http:www.foo.biz -> http://www.foo.biz

1190

$nuri =~ s#^(https?:)/{0,2}#$1//#i;

1191

1192

# *always* make a dup with all %-encoding decoded, since

1193

# important parts of the URL may be encoded (such as the

1194

# scheme). (bug 4213)

1195

if ($nuri =~ /\%[0-9a-fA-F]{2}/) {

1196

$nuri = Mail::SpamAssassin::Util::url_encode($nuri);

1197

}

1198

1199

# www.foo.biz -> http://www.foo.biz

1200

# unschemed URIs: assume default of "http://" as most MUAs do

1201

if ($nuri !~ /^[-_a-z0-9]+:/i) {

1202

if ($nuri =~ /^ftp\./) {

1203

$nuri =~ s@^@ftp://@g;

1204

}

1205

else {

1206

$nuri =~ s@^@http://@g;

1207

}

1208

}

1209

1210

# http://www.foo.biz?id=3 -> http://www.foo.biz/?id=3

1211

$nuri =~ s@^(https?://[^/?]+)\?@$1/?@i;

1212

1213

# deal with encoding of chars, this is just the set of printable

1214

# chars minus ' ' (that is, dec 33-126, hex 21-7e)

1215

$nuri =~ s/\&\#0*(3[3-9]|[4-9]\d|1[01]\d|12[0-6]);/sprintf "%c",$1/ge;

1216

$nuri =~ s/\&\#x0*(2[1-9]|[3-6][a-fA-F0-9]|7[0-9a-eA-E]);/sprintf "%c",hex($1)/ge;

1217

1218

# put the new URI on the new list if it's different

1219

if ($nuri ne $uri) {

1220

push(@nuris, $nuri);

1221

}

1222

1223

# deal with wierd hostname parts, remove user/pass, etc.

1224

if ($nuri =~ m{^(https?://)([^/]+?)((?::\d*)?\/.*)?$}i) {

1225

my($proto, $host, $rest) = ($1,$2,$3);

1226

1227

# not required

1228

$rest ||= '';

1229

1230

# bug 4146: deal with non-US ASCII 7-bit chars in the host portion

1231

# of the URI according to RFC 1738 that's invalid, and the tested

1232

# browsers (Firefox, IE) remove them before usage...

1233

if ($host =~ tr/\000-\040\200-\377//d) {

1234

push(@nuris, join ('', $proto, $host, $rest));

1235

}

1236

1237

# deal with http redirectors. strip off one level of redirector

1238

# and add back to the array. the foreach loop will go over those

1239

# and deal appropriately.

1240

# bug 3308: redirectors like yahoo only need one '/' ... <grrr>

1241

if ($rest =~ m{(https?:/{0,2}.+)$}i) {

1242

push(@uris, $1);

1243

}

1244

1245

# resort to redirector pattern matching if the generic https? check

1246

# doesn't result in a match -- bug 4176

1247

else {

1248

foreach (@{$redirector_patterns}) {

1249

if ("$proto$host$rest" =~ $_) {

1250

next unless defined $1;

1251

dbg("uri: parsed uri pattern: $_");

1252

dbg("uri: parsed uri found: $1 in redirector: $proto$host$rest");

1253

push (@uris, $1);

1254

last;

1255

}

1256

}

1257

}

1258

1259

########################

1260

## TVD: known issue, if host has multiple combinations of the following,

1261

## all permutations will be put onto @nuris. shouldn't be an issue.

1262

1263

# Get rid of cruft that could cause confusion for rules...

1264

1265

# remove "www.fakehostname.com@" username part

1266

if ($host =~ s/^[^\@]+\@//gs) {

1267

push(@nuris, join ('', $proto, $host, $rest));

1268

}

1269

1270

# bug 3186: If in a sentence, we might pick up odd characters ...

1271

# ie: "visit http://example.biz." or "visit http://example.biz!!!"

1272

# the host portion should end in some form of alpha-numeric, strip off

1273

# the rest.

1274

if ($host =~ s/[^0-9A-Za-z]+$//) {

1275

push(@nuris, join ('', $proto, $host, $rest));

1276

}

1277

1278

########################

1279

1280

# deal with hosts which are IPs

1281

# also handle things like:

1282

# http://89.0x00000000000000000000068.0000000000000000000000160.0x00000000000011

1283

# both hex (0x) and oct (0+) encoded octets, etc.

1284

1285

if ($host =~ /^

1286

((?:0x[0-9a-f]+|\d+)\.)

1287

((?:0x[0-9a-f]+|\d+)\.)

1288

((?:0x[0-9a-f]+|\d+)\.)

1289

(0x[0-9a-f]+|\d+)

1290

$/ix)

1291

{

1292

my @chunk = ($1,$2,$3,$4);

1293

foreach my $octet (@chunk) {

1294

$octet =~ s/^0x([0-9a-f]+)/sprintf "%d",hex($1)/gei;

1295

$octet =~ s/^0+([1-3][0-7]{0,2}|[4-7][0-7]?)\b/sprintf "%d",oct($1)/ge;

1296

$octet =~ s/^0+//;

1297

}

1298

push(@nuris, join ('', $proto, @chunk, $rest));

1299

}

1300

1301

# "http://0x7f000001/"

1302

elsif ($host =~ /^0x[0-9a-f]+$/i) {

1303

# only take last 4 octets

1304

$host =~ s/^0x[0-9a-f]*?([0-9a-f]{1,8})$/sprintf "%d",hex($1)/gei;

1305

push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));

1306

}

1307

1308

# "http://1113343453/"

1309

elsif ($host =~ /^[0-9]+$/) {

1310

push(@nuris, join ('', $proto, decode_ulong_to_ip($host), $rest));

1311

}

1312

1313

}

1314

}

1315

1316

# remove duplicates, merge nuris and uris

1317

my %uris = map { $_ => 1 } @uris, @nuris;

1318

1319

return keys %uris;

1320

}

1321

1322

sub decode_ulong_to_ip {

1323

return join(".", unpack("CCCC",pack("H*", sprintf "%08lx", $_[0])));

1324

}

1325

1326

###########################################################################

1327

1328

sub first_date {

1329

my (@strings) = @_;

1330

1331

foreach my $string (@strings) {

1332

my $time = parse_rfc822_date($string);

1333

return $time if defined($time) && $time;

1334

}

1335

return undef;

1336

}

1337

1338

sub receive_date {

1339

my ($header) = @_;

1340

1341

$header ||= '';

1342

$header =~ s/\n[ \t]+/ /gs; # fix continuation lines

1343

1344

my @rcvd = ($header =~ /^Received:(.*)/img);

1345

my @local;

1346

my $time;

1347

1348

if (@rcvd) {

1349

if ($rcvd[0] =~ /qmail \d+ invoked by uid \d+/ ||

1350

$rcvd[0] =~ /\bfrom (?:localhost\s|(?:\S+ ){1,2}\S*\b127\.0\.0\.1\b)/)

1351

{

1352

push @local, (shift @rcvd);

1353

}

1354

if (@rcvd && ($rcvd[0] =~ m/\bby localhost with \w+ \(fetchmail-[\d.]+/)) {

1355

push @local, (shift @rcvd);

1356

}

1357

elsif (@local) {

1358

unshift @rcvd, (shift @local);

1359

}

1360

}

1361

1362

if (@rcvd) {

1363

$time = first_date(shift @rcvd);

1364

return $time if defined($time);

1365

}

1366

if (@local) {

1367

$time = first_date(@local);

1368

return $time if defined($time);

1369

}

1370

if ($header =~ /^(?:From|X-From-Line:)\s+(.+)$/im) {

1371

my $string = $1;

1372

$string .= " ".local_tz() unless $string =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;

1373

$time = first_date($string);

1374

return $time if defined($time);

1375

}

1376

if (@rcvd) {

1377

$time = first_date(@rcvd);

1378

return $time if defined($time);

1379

}

1380

if ($header =~ /^Resent-Date:\s*(.+)$/im) {

1381

$time = first_date($1);

1382

return $time if defined($time);

1383

}

1384

if ($header =~ /^Date:\s*(.+)$/im) {

1385

$time = first_date($1);

1386

return $time if defined($time);

1387

}

1388

1389

return time;

1390

}

1391

1392

###########################################################################

1393

1394

sub setuid_to_euid {

1395

return if (RUNNING_ON_WINDOWS);

1396

1397

# remember the target uid, the first number is the important one

1398

my $touid = $>;

1399

1400

if ($< != $touid) {

1401

dbg("util: changing real uid from $< to match effective uid $touid");

1402

# bug 3586: kludges needed to work around platform dependent behavior assigning to $<

1403

# The POSIX functions deal with that so just use it here

1404

POSIX::setuid($touid);

1405

$< = $touid; $> = $touid; # bug 5574

1406

1407

# Check that we have now accomplished the setuid: catch bug 3586 if it comes back

1408

if ($< != $touid) {

1409

# keep this fatal: it's a serious security problem if it fails

1410

die "util: setuid $< to $touid failed!";

1411

}

1412

}

1413

}

1414

1415

# helper app command-line open

1416

sub helper_app_pipe_open {

1417

if (RUNNING_ON_WINDOWS) {

1418

return helper_app_pipe_open_windows (@_);

1419

} else {

1420

return helper_app_pipe_open_unix (@_);

1421

}

1422

}

1423

1424

sub helper_app_pipe_open_windows {

1425

my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;

1426

1427

# use a traditional open(FOO, "cmd |")

1428

my $cmd = join(' ', @cmdline);

1429

if ($stdinfile) { $cmd .= qq/ < "$stdinfile"/; }

1430

if ($duperr2out) { $cmd .= " 2>&1"; }

1431

return open ($fh, $cmd.'|');

1432

}

1433

1434

sub force_die {

1435

my ($msg) = @_;

1436

1437

# note use of eval { } scope in logging -- paranoia to ensure that a broken

1438

# $SIG{__WARN__} implementation will not interfere with the flow of control

1439

# here, where we *have* to die.

1440

eval { warn $msg }; # hmm, STDERR may no longer be open

1441

eval { dbg("util: force_die: $msg") };

1442

1443

POSIX::_exit(6); # avoid END and destructor processing

1444

kill('KILL',$$); # still kicking? die!

1445

}

1446

1447

sub helper_app_pipe_open_unix {

1448

my ($fh, $stdinfile, $duperr2out, @cmdline) = @_;

1449

1450

my $pid;

1451

# do a fork-open, so we can setuid() back

1452

eval {

1453

$pid = open ($fh, '-|'); 1;

1454

} or do {

1455

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

1456

die "util: cannot fork: $eval_stat";

1457

};

1458

if (!defined $pid) {

1459

# acceptable to die() here, calling code catches it

1460

die "util: cannot open a pipe to a forked process: $!";

1461

}

1462

1463

if ($pid != 0) {

1464

return $pid; # parent process; return the child pid

1465

}

1466

1467

# else, child process.

1468

1469

# from now on, we cannot die(), it could create a cloned process

1470

# use force_die() instead (bug 4370, cmt 2)

1471

eval {

1472

# go setuid...

1473

setuid_to_euid();

1474

dbg("util: setuid: ruid=$< euid=$>");

1475

1476

# now set up the fds. due to some wierdness, we may have to ensure that

1477

# we *really* close the correct fd number, since some other code may have

1478

# redirected the meaning of STDOUT/STDIN/STDERR it seems... (bug 3649).

1479

# use POSIX::close() for that. it's safe to call close() and POSIX::close()

1480

# on the same fd; the latter is a no-op in that case.

1481

1482

if (!$stdinfile) { # < $tmpfile

1483

# ensure we have *some* kind of fd 0.

1484

$stdinfile = "/dev/null";

1485

}

1486

1487

my $f = fileno(STDIN);

1488

close STDIN or die "error closing STDIN: $!";

1489

1490

# sanity: was that the *real* STDIN? if not, close that one too ;)

1491

if ($f != 0) {

1492

POSIX::close(0);

1493

}

1494

1495

open (STDIN, "<$stdinfile") or die "cannot open $stdinfile: $!";

1496

1497

# this should be impossible; if we just closed fd 0, UNIX

1498

# fd behaviour dictates that the next fd opened (the new STDIN)

1499

# will be the lowest unused fd number, which should be 0.

1500

# so die with a useful error if this somehow isn't the case.

1501

if (fileno(STDIN) != 0) {

1502

die "oops: fileno(STDIN) [".fileno(STDIN)."] != 0";

1503

}

1504

1505

# Ensure STDOUT is open. As we just created a pipe to ensure this, it has

1506

# to be open to that pipe, and if it isn't, something's seriously screwy.

1507

# Update: actually, this fails! see bug 3649 comment 37. For some reason,

1508

# fileno(STDOUT) can be 0; possibly because open("-|") didn't change the fh

1509

# named STDOUT, instead changing fileno(1) directly. So this is now

1510

# commented.

1511

# if (fileno(STDOUT) != 1) {

1512

# die "setuid: oops: fileno(STDOUT) [".fileno(STDOUT)."] != 1";

1513

# }

1514

1515

STDOUT->autoflush(1);

1516

1517

if ($duperr2out) { # 2>&1

1518

my $f = fileno(STDERR);

1519

close STDERR or die "error closing STDERR: $!";

1520

1521

# sanity: was that the *real* STDERR? if not, close that one too ;)

1522

if ($f != 2) {

1523

POSIX::close(2);

1524

}

1525

1526

open (STDERR, ">&STDOUT") or die "dup STDOUT failed: $!";

1527

STDERR->autoflush(1); # make sure not to lose diagnostics if exec fails

1528

1529

# STDERR must be fd 2 to be useful to subprocesses! (bug 3649)

1530

if (fileno(STDERR) != 2) {

1531

die "oops: fileno(STDERR) [".fileno(STDERR)."] != 2";

1532

}

1533

}

1534

1535

exec @cmdline;

1536

die "exec failed: $!";

1537

};

1538

my $eval_stat = $@ ne '' ? $@ : "errno=$!"; chomp $eval_stat;

1539

1540

# bug 4370: we really have to exit here; break any eval traps

1541

force_die(sprintf('util: failed to spawn a process "%s": %s',

1542

join(", ",@cmdline), $eval_stat));

1543

die; # must be a die() otherwise -w will complain

1544

}

1545

1546

###########################################################################

1547

1548

# As "perldoc perlvar" notes, in perl 5.8.0, the concept of "safe" signal

1549

# handling was added, which means that signals cannot interrupt a running OP.

1550

# unfortunately, a regexp match is a single OP, so a psychotic m// can

1551

# effectively "hang" the interpreter as a result, and a $SIG{ALRM} handler

1552

# will never get called.

1553

1554

# However, by using "unsafe" signals, we can still interrupt that -- and

1555

# POSIX::sigaction can create an unsafe handler on 5.8.x. So this function

1556

# provides a portable way to do that.

1557

1558

sub trap_sigalrm_fully {

1559

my ($handler) = @_;

1560

if ($] < 5.008 || am_running_on_windows()) {

1561

# signals are always unsafe on perl older than 5.008, just use %SIG

1562

# Bug 6359, no POSIX::SIGALRM on Windows, just use %SIG

1563

$SIG{ALRM} = $handler;

1564

} else {

1565

# may be using "safe" signals with %SIG; use POSIX to avoid it

1566

POSIX::sigaction POSIX::SIGALRM(), new POSIX::SigAction $handler;

1567

}

1568

}

1569

1570

###########################################################################

1571

1572

# Removes any normal perl-style regexp delimiters at

1573

# the start and end, and modifiers at the end (if present).

1574

# If modifiers are found, they are inserted into the pattern using

1575

# the /(?i)/ idiom.

1576

1577

sub regexp_remove_delimiters {

1578

my ($re) = @_;

1579

1580

my $delim;

1581

if (!defined $re || $re eq '') {

1582

warn "cannot remove delimiters from null regexp";

1583

return undef; # invalid

1584

}

1585

elsif ($re =~ s/^m{//) { # m{foo/bar}

1586

$delim = '}';

1587

}

1588

elsif ($re =~ s/^m\(//) { # m(foo/bar)

1589

$delim = ')';

1590

}

1591

elsif ($re =~ s/^m<//) { # m<foo/bar>

1592

$delim = '>';

1593

}

1594

elsif ($re =~ s/^m(\W)//) { # m#foo/bar#

1595

$delim = $1;

1596

} else { # /foo\/bar/ or !foo/bar!

1597

$re =~ s/^(\W)//; $delim = $1;

1598

}

1599

1600

$re =~ s/\Q${delim}\E([imsx]*)$// or warn "unbalanced re: $re";

1601

1602

my $mods = $1;

1603

if ($mods) {

1604

$re = "(?".$mods.")".$re;

1605

}

1606

1607

return $re;

1608

}

1609

1610

# turn "/foobar/i" into qr/(?i)foobar/

1611

1612

sub make_qr {

1613

my ($re) = @_;

1614

$re = regexp_remove_delimiters($re);

1615

return qr/$re/;

1616

}

1617

1618

###########################################################################

1619

1620

sub get_my_locales {

1621

my ($ok_locales) = @_;

1622

1623

my @locales = split(' ', $ok_locales);

1624

my $lang = $ENV{'LC_ALL'};

1625

$lang ||= $ENV{'LANGUAGE'};

1626

$lang ||= $ENV{'LC_MESSAGES'};

1627

$lang ||= $ENV{'LANG'};

1628

push (@locales, $lang) if defined($lang);

1629

return @locales;

1630

}

1631

1632

###########################################################################

1633

1634

# bug 5612: work around for bugs in Berkeley db 4.2

1635

1636

# on 4.2 having the __db.[DBNAME] file will cause an loop that will never finish

1637

# on 4.3+ the loop will timeout after 301 open attempts, but we will still

1638

# be unable to open the database. This workaround solves both problems.

1639

1640

sub avoid_db_file_locking_bug {

1641

my ($path) = @_;

1642

1643

my $db_tmpfile = untaint_file_path(File::Spec->catfile(dirname($path),

1644

'__db.'.basename($path)));

1645

1646

# delete "__db.[DBNAME]" and "__db.[DBNAME].*"

1647

foreach my $tfile ($db_tmpfile, glob("$db_tmpfile.*")) {

1648

my $file = untaint_file_path($tfile);

1649

my $stat_errn = stat($file) ? 0 : 0+$!;

1650

next if $stat_errn == ENOENT;

1651

1652

dbg("Berkeley DB bug work-around: cleaning tmp file $file");

1653

unlink($file) or warn "cannot remove Berkeley DB tmp file $file: $!\n";

1654

}

1655

}

1656

1657

###########################################################################

1658

1659

sub fisher_yates_shuffle {

1660

my ($deck) = @_;

1661

for (my $i = $#{$deck}; $i > 0; $i--) {

1662

my $j = int rand($i+1);

1663

@$deck[$i,$j] = @$deck[$j,$i];

1664

}

1665

}

1666

1667

###########################################################################

1668

1669

1670

###########################################################################

1671

1672

# bugs 6419 and 2607 relate to returning a score 1/10th lower than the

1673

# required score if the rounded to the 10th version of the score is equal

1674

# to the required score

1675

1676

# moved from PerMessageStatus.pm to here and modified to allow for a

1677

# non-class version of the routine to be called from PerMessageStatus

1678

# and from spamd

1679

1680

sub get_tag_value_for_score {

1681

my ($score, $rscore, $is_spam) = @_;

1682

1683

#BASED ON _get_tag_value_for_score from PerMsgStatus.pm

1684

1685

$score = sprintf("%2.1f", $score);

1686

$rscore = sprintf("%2.1f", $rscore);

1687

1688

# if the email is spam, return the accurate score

1689

# if the email is NOT spam and the score is less than the required score,

1690

# then return the accurate score

1691

1692

return $score if $is_spam or $score < $rscore;

1693

1694

# if the email is NOT spam and $score = $rscore, return the $rscore - 0.1

1695

# effectively flooring the value to the closest tenth

1696

1697

return $rscore - 0.1;

1698

}

1699

1700

###########################################################################

1701

1702

1703

1704

1705

=back

1706

1707

=cut

Older »