3
# Copyright (C) 2002 Laird Breyer
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
# Author: Laird Breyer <laird@lbreyer.com>
21
# IMPLEMENTATION NOTES
23
# This script follows the mailcross testsuite interface
24
# requirements. Type man mailcross for details.
26
# The script accepts one of more commands on the command line,
27
# and may read STDIN and write STDOUT as follows:
30
# In this case, a single email is expected on STDIN,
31
# and a list of category filenames is expected in $2, $3, etc.
32
# The script writes the category name corresponding to the
33
# input email on STDOUT.
36
# In this case, a standard mbox stream is expected on STDIN,
37
# while a suitable category file name is expected in $2. No output
38
# is written to STDOUT.
41
# In this case, a directory is expected in $2, which is examined
42
# for old database information. If any old databases are found, they
43
# are purged or reset. No output is written to STDOUT.
45
# If $1 == "describe":
46
# In this case, STDIN and the command line are ignored. A single
47
# line is written on STDOUT, describing the filter functionality.
49
# If $1 == "bootstrap":
50
# In this case, the current script is copied to the directory $2,
51
# provided the classifier we're wrapping exists on the system.
54
# The POPFile API code is a quick and dirty adaptation of insert.pl
55
# I have no idea how this really works, and I wouldn't call it robust.
56
# It's been tested on popfile 0.20.1 and may not work on any other version.
57
# It seems that the corpus directory is always created in the current working
58
# directory. That's not thrilling, but we can make do.
60
# We call the API code directly instead of using the insert.pl and bayes.pl
61
# scripts. This is necessary because those scripts don't create buckets, and
62
# assume a single usual corpus location. For our tests, we need lots of
63
# different corpus locations.
65
# this variable is modified by bootstrap
67
[ -z "$TEMPDIR" ] && TEMPDIR=/tmp
72
export BOOKAY=`basename $1`
73
export DBPATH=`dirname $1`
74
if [ -n "$POPDIR" ] ; then
77
# assume POPDIR is filled (done by bootstrap)
78
# also, we assume that the buckets on the command line are
79
# the only ones in the current corpus
83
use Classifier::Bayes;
84
use POPFile::Configuration;
87
my $c = new POPFile::Configuration;
88
my $mq = new POPFile::MQ;
89
my $b = new Classifier::Bayes;
91
$c->configuration( $c );
94
$mq->configuration( $c );
97
$b->configuration( $c );
102
$c->load_configuration();
106
print $b->classify("mailcross.tmp");
117
export BOOKAY=`basename $1`
118
export DBPATH=`dirname $1`
120
if [ -n "$POPDIR" ] ; then
121
# assume POPDIR is filled (done by bootstrap)
123
perl -I"$POPDIR" -e '
126
use Classifier::Bayes;
127
use POPFile::Configuration;
130
my $c = new POPFile::Configuration;
131
my $mq = new POPFile::MQ;
132
my $b = new Classifier::Bayes;
134
$c->configuration( $c );
137
$mq->configuration( $c );
140
$b->configuration( $c );
145
$c->load_configuration();
149
$b->create_bucket($ENV{"BOOKAY"});
150
$b->clear_bucket($ENV{"BOOKAY"});
152
open(TMPFILE, ">mailcross.tmp") || die;
157
$b->add_message_to_bucket($ENV{"BOOKAY"}, "mailcross.tmp");
159
open(TMPFILE, ">mailcross.tmp") || die;
164
$b->add_message_to_bucket($ENV{"BOOKAY"}, "mailcross.tmp");
175
find "$1" -name "table.db" -exec rm -f {} \;
176
find "$1" -name "params" -exec rm -f {} \;
180
# we look for the popfile.pl script (not executable, can't use which)
183
for d in `echo "$PATH:/usr/share/popfile:$HOME/popfile:."` ; do
184
if [ -f "$d/popfile.pl" ] ; then
190
if [ -n "$POPDIR" -a -n "`which perl`" ] ; then
191
VER=`cat $POPDIR/popfile.pl | grep CORE_version | sed -e 's/^.*(//' -e 's/).*$//' -e 's/, /./g'`
193
echo "POPFile $VER with default options"
196
if [ -d "$2" ] ; then
197
# we look for the popfile.pl script (not executable, can't use which)
200
for d in `echo "$PATH:$HOME/popfile"` ; do
201
if [ -f "$d/popfile.pl" ] ; then
207
if [ -n "$POPDIR" -a -n "`which perl`" ] ; then
209
cat "$0" | sed -e "s|POPDIR=\"\"|POPDIR=\"$POPDIR\"|" > "$2"/`basename $0`
210
chmod +x "$2"/`basename $0`
212
echo "POPFile appears to be missing"
215
echo "bad target directory $2"
222
export HYACINTH=`basename $1`
223
export DBPATH=`dirname $1`
225
cat > "$TEMPDIR/mailtoe.tmp"
228
if [ -n "$POPDIR" ] ; then
230
# assume POPDIR is filled (done by bootstrap)
231
# also, we assume that the buckets on the command line are
232
# the only ones in the current corpus
233
perl -I"$POPDIR" -e '
236
use Classifier::Bayes;
237
use POPFile::Configuration;
240
my $c = new POPFile::Configuration;
241
my $mq = new POPFile::MQ;
242
my $b = new Classifier::Bayes;
244
$c->configuration( $c );
247
$mq->configuration( $c );
250
$b->configuration( $c );
255
$c->load_configuration();
259
my $bookay = $b->classify($ENV{"TEMPDIR"}."/mailtoe.tmp");
260
if( $bookay ne $ENV{"HYACINTH"} ) {
261
$b->add_message_to_bucket($ENV{"HYACINTH"}, $ENV{"TEMPDIR"}."/mailtoe.tmp");
277
export HYACINTH=`basename $1`
278
export DBPATH=`dirname $1`
280
cat > "$TEMPDIR/mailtoe.tmp"
283
if [ -n "$POPDIR" ] ; then
285
# assume POPDIR is filled (done by bootstrap)
286
# also, we assume that the buckets on the command line are
287
# the only ones in the current corpus
288
perl -I"$POPDIR" -e '
291
use Classifier::Bayes;
292
use POPFile::Configuration;
295
my $c = new POPFile::Configuration;
296
my $mq = new POPFile::MQ;
297
my $b = new Classifier::Bayes;
299
$c->configuration( $c );
302
$mq->configuration( $c );
305
$b->configuration( $c );
310
$c->load_configuration();
314
my $bookay = $b->classify($ENV{"TEMPDIR"}."/mailtoe.tmp");
315
$b->add_message_to_bucket($ENV{"HYACINTH"}, $ENV{"TEMPDIR"}."/mailtoe.tmp");