~ubuntu-branches/ubuntu/hardy/dbacl/hardy

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
# 
# Copyright (C) 2002 Laird Breyer
#  
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# 
# Author:   Laird Breyer <laird@lbreyer.com>
#
# IMPLEMENTATION NOTES
#
# This script follows the mailcross testsuite interface
# requirements. Type man mailcross for details.
#
# The script accepts one of more commands on the command line,
# and may read STDIN and write STDOUT as follows:
#
# If $1 == "filter":
# In this case, a single email is expected on STDIN,
# and a list of category filenames is expected in $2, $3, etc.
# The script writes the category name corresponding to the 
# input email on STDOUT.
#
# If $1 == "learn":
# In this case, a standard mbox stream is expected on STDIN,
# while a suitable category file name is expected in $2. No output
# is written to STDOUT.
#
# If $1 == "clean":
# In this case, a directory is expected in $2, which is examined
# for old database information. If any old databases are found, they
# are purged or reset. No output is written to STDOUT.
#
# If $1 == "describe":
# In this case, STDIN and the command line are ignored. A single
# line is written on STDOUT, describing the filter functionality.
#
# If $1 == "bootstrap":
# In this case, the current script is copied to the directory $2,
# provided the classifier we're wrapping exists on the system.
#
# If $1 == "toe":
# In this case, a single email is expected on STDIN,
# and a list of category filenames is expected in $2, $3, etc.
# The category name in $2 represents the "true" category, and $3 $4 etc
# are a complete list of possible categories.
# The script writes the classified category name corresponding to the 
# input email on STDOUT, and if this differs from the true category $2,
# then, and only then, the email is learned.
#
# If $1 == "foot":
# Like "toe", but the input email is always learned.
#

ADIR="$HOME/.annoyance-filter"
AF="$ADIR/annoyance-filter"
TRAIN="$AF"
[ -z "$TEMPDIR" ] && TEMPDIR=/tmp

case "$1" in
    filter)
	shift
	CATEGORY=`basename $1`
	DBPATH=`dirname $1`
	$AF --fread "$DBPATH/fdict.bin" --classify - | sed -e 's/JUNK/spam/' -e 's/MAIL/notspam/' -e 's/INDT/notspam/'
	;;
    learn)
	shift
	CATEGORY=`basename $1`
	DBPATH=`dirname $1`
	if [ "$CATEGORY" = "spam" ]; then
	    T="--junk"
	else
	    T="--mail"
	fi
	if [ -e "$DBPATH/dict.bin" ]; then
	    R="--read $DBPATH/dict.bin"
	else
	    R="--clearjunk --clearmail"
	fi
	$AF $R $T - --prune --write "$DBPATH/dict.bin" --fwrite "$DBPATH/fdict.bin"
	;;
    clean)
	shift
	find "$1" -name "dict.bin" -exec rm {} \;
	find "$1" -name "fdict.bin" -exec rm {} \;
	find "$1" -name "*.tmp" -exec rm {} \;
	;;
    describe)
	VER="(unavailable?)"
	if [ -e $AF ] ; then
	    VER=`$AF --version | head -1 | sed 's/annoyance-filter //'`
	fi
	echo "Annoyance Filter $VER with prune"
	;;
    bootstrap)
	if [ -d "$2" ] ; then
            if [ -e "$AF" ] ; then
		echo "selecting $0"
		cp "$0" "$2"
		echo -e "\tannoyance-filter is hard-coded for use only with exactly"
		echo -e "\ttwo categories named 'spam' and 'notspam'."
	    else
		echo "annoyance-filter appears to be missing"
            fi
	else
	    echo "bad target directory $2"
	fi
	;;
    toe)
	ME="$0"
	shift
	TRUECAT="$1"
	shift
	cat > "$TEMPDIR/mailtoe.tmp"
	VERDICT=`cat "$TEMPDIR/mailtoe.tmp" | "$ME" filter "$@"`
	if [ "x$VERDICT" != "x`basename $TRUECAT`" ] ; then
	    cat "$TEMPDIR/mailtoe.tmp" | "$ME" learn "$TRUECAT"
	fi
	echo -ne "$VERDICT"
	;;
    foot)
	ME="$0"
	shift
	TRUECAT="$1"
	shift
	cat > "$TEMPDIR/mailfoot.tmp"
	VERDICT=`cat "$TEMPDIR/mailfoot.tmp" | "$ME" filter "$@"`
	cat "$TEMPDIR/mailfoot.tmp" | "$ME" learn "$TRUECAT"
	echo -ne "$VERDICT"
	;;
esac