3
# Copyright (C) 2002 Laird Breyer
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
# Author: Laird Breyer <laird@lbreyer.com>
21
# IMPLEMENTATION NOTES
23
# This script follows the mailcross testsuite interface
24
# requirements. Type man mailcross for details.
26
# The script accepts one of more commands on the command line,
27
# and may read STDIN and write STDOUT as follows:
30
# In this case, a single email is expected on STDIN,
31
# and a list of category filenames is expected in $2, $3, etc.
32
# The script writes the category name corresponding to the
33
# input email on STDOUT.
36
# In this case, a standard mbox stream is expected on STDIN,
37
# while a suitable category file name is expected in $2. No output
38
# is written to STDOUT.
41
# In this case, a directory is expected in $2, which is examined
42
# for old database information. If any old databases are found, they
43
# are purged or reset. No output is written to STDOUT.
45
# If $1 == "describe":
46
# In this case, STDIN and the command line are ignored. A single
47
# line is written on STDOUT, describing the filter functionality.
49
# If $1 == "bootstrap":
50
# In this case, the current script is copied to the directory $2,
51
# provided the classifier we're wrapping exists on the system.
54
# In this case, a single email is expected on STDIN,
55
# and a list of category filenames is expected in $2, $3, etc.
56
# The category name in $2 represents the "true" category, and $3 $4 etc
57
# are a complete list of possible categories.
58
# The script writes the classified category name corresponding to the
59
# input email on STDOUT, and if this differs from the true category $2,
60
# then, and only then, the email is learned.
63
# Like "toe", but the input email is always learned.
66
DBACL="dbacl -j -q 2 -T email -e cef -T email:headers -T html:links -T html:alt"
67
[ -z "$TEMPDIR" ] && TEMPDIR=/tmp
73
$DBACL -m -v `for f in "$@"; do echo -n "-c $f " ; done`
78
$DBACL -h 17 -H 19 -l $@
83
find "$1" -name "*.mbox" -print | sed 's/.mbox$//' | xargs rm -f
84
find "$1" -name "*.toe" -exec rm -f {} \;
85
find "$1" -name "*.foot" -exec rm -f {} \;
86
find "$1" -name "*.tmp" -exec rm -f {} \;
87
find "$1" -name "*.onl" -exec rm -f {} \;
92
if [ -n "`which dbacl`" ] ; then
93
VER=`dbacl -V | head -1 | sed 's/.*version //'`
95
echo "dbacl $VER with cef,headers,alt,links"
100
if [ -n "`which dbacl`" ] ; then
104
echo "dbacl appears to be missing"
107
echo "bad target directory $2"
115
CATS=`for f in "$@"; do echo -n "-c $f " ; done`
116
cat > "$TEMPDIR/mailtoe.tmp"
117
VERDICT=`cat $TEMPDIR/mailtoe.tmp | $DBACL -v $CATS`
118
if [ "x$VERDICT" != "x`basename $TRUECAT`" ] ; then
119
#cat "$TEMPDIR/mailtoe.tmp" >> $TRUECAT.toe
120
#cat $TRUECAT.toe | $DBACL -h 17 -H 19 -l $TRUECAT
121
cat "$TEMPDIR/mailtoe.tmp" | $DBACL -h 17 -H 19 -l $TRUECAT -o $TRUECAT.onl
130
CATS=`for f in "$@"; do echo -n "-c $f " ; done`
131
cat > "$TEMPDIR/mailfoot.tmp"
132
VERDICT=`cat "$TEMPDIR/mailfoot.tmp" | $DBACL -v $CATS`
133
cat "$TEMPDIR/mailfoot.tmp" | $DBACL -h 17 -H 19 -l $TRUECAT -o $TRUECAT.onl
134
#cat "$TEMPDIR/mailfoot.tmp" >> "$TRUECAT.foot"
135
#cat "$TRUECAT.foot" | $DBACL -h 17 -H 19 -l $TRUECAT