3
# Copyright (C) 2002 Laird Breyer
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
# Author: Laird Breyer <laird@lbreyer.com>
21
# These functions are included by the test scripts. They could be
22
# just sourced, but then where to put the common functions in the filesystem?
24
# begin mailtest.functions
26
prerequisite_command() {
27
if [ -z "`type -p $1`" ]; then
28
echo "Error: $1 not found. Please install $2 to proceed."
33
clean_working_tree() {
34
if [ -e $MXDIR ]; then
37
echo "Nothing to clean"
42
cat > $MXDIR/mbox/dummy.mailbox <<EOF
43
From MAILER-DAEMON Thu Dec 4 13:50:55 2003
44
Date: 04 Dec 2003 13:50:55 +1000
45
From: Mail System Internal Data <MAILER-DAEMON@scooby>
46
Subject: DON\'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA
47
Message-ID: <1070509855@scooby>
48
X-IMAP: 1023007291 0000010227
51
This text is part of the internal format of your mail folder, and is not
52
a real message. It is created automatically by the mail system software.
53
If deleted, important folder data will be lost, and it will be re-created
54
with the data reset to initial values.
59
prepare_working_tree() {
60
if [ -d $MXDIR ]; then
61
echo "Error: Directory $MXDIR already exists. Remove it or use it.";
63
elif [ -n "$1" ]; then
64
prerequisite_command "seq" "shellutils"
66
if [ $NUM -gt -1 ]; then
68
mkdir "$MXDIR/tmp" && \
69
mkdir "$MXDIR/log" && \
70
mkdir "$MXDIR/plots" && \
71
mkdir "$MXDIR/mbox" && \
72
mkdir "$MXDIR/review" && \
73
mkdir "$MXDIR/filters" && \
74
for i in `seq 0 $NUM`; do mkdir "$MXDIR/$i"; done && \
75
echo "=== prepare_working_tree $*" >> $ALOG
78
echo "Error: Please specify a number greater than zero."
83
echo "Error: Please specify a number."
89
CATS=`find $MXDIR -type f -name '*.mbox' -exec basename {} \; | sort -u`
90
[ -z "$CATS" ] && echo "No categories found"
94
FILTS=`find $FILTERS -type f -perm +0111 -exec basename {} \; | sort -u`
95
[ -z "$FILTS" ] && echo "No filters found"
98
get_number_of_subsets() {
99
NUM=`ls $MXDIR | grep '^[0-9]' | wc -l`
100
if [ $(($NUM)) -le 0 ]; then
101
echo "error: you need to prepare first."
106
review_misclassified() {
111
cat > msg.tmp.\$POSITION.\$FILENO
112
M=\`head -1 msg.tmp.\$POSITION.\$FILENO | grep -e "\$EMAIL.*\$DATE"\`
113
if [ -n "\$M" ]; then
114
mv msg.tmp.\$POSITION.\$FILENO $MXDIR/review/\$1.\$2.\$POSITION.\$FILENO
116
rm msg.tmp.\$POSITION.\$FILENO
120
if [ -d "$MXDIR/review" ]; then
121
rm -f "$MXDIR/review/*"
123
echo "Error: You need to prepare first."
127
if [ -s "$CLOG" ]; then
128
grep " $1 $2 " "$CLOG" | \
130
BOX=`echo $f | cut -d' ' -f2`
131
export POSITION=`echo $f | cut -d' ' -f1`
132
export EMAIL=`echo $f | cut -d' ' -f4`
133
export DATE=`echo $f | cut -d' ' -f5-`
135
[ -e "$MXDIR/$POSITION/$BOX.mbox" ] || export POSITION=mbox
136
cat "$MXDIR/$POSITION/$BOX.mbox" | formail -s /bin/sh "$SM" "$1" "$2"
139
echo "Error: There are no logs - run the classifier(s) first"
145
testsuite_list_wrappers() {
146
if [ -d $BOOTSTRAP ] ; then
147
echo -ne "The following classification wrappers are selectable:\n\n"
148
for f in `ls $BOOTSTRAP` ; do
149
[ -x "$BOOTSTRAP/$f" ] && echo "$f - `$BOOTSTRAP/$f describe`"
152
echo "Bootstrap directory $BOOTSTRAP does not exist."
156
testsuite_deselect_wrappers() {
158
if [ -z "$*" ] ; then
162
if [ -e $FILTERS/$f ] ; then
163
echo "deselecting $f"
166
echo "$f: no such filter."
172
testsuite_select_wrappers() {
174
if [ -z "$*" ] ; then
178
if [ -x $BOOTSTRAP/$f ] ; then
179
$BOOTSTRAP/$f bootstrap $FILTERS
181
echo "The wrapper $f cannot be selected, skipping."
188
awk -v "num=$NUM" -v "cats=${CATS//.mbox/}" '
203
printf("Where do misclassifications go?\n(numbers on diagonal represent \"recall\")\n\n")
205
printf(" true | but predicted as...\n")
207
for(c in names) printf("%10s", names[c])
211
printf("%-10s | ", names[c])
213
printf("%9.2f%%", 100 * f[names[c],names[d]]/fp[names[c]])
220
printf("What is really in each category after prediction?\n(numbers on diagonal represent \"precision\")\n\n")
222
printf("category | contains mixture of...\n")
224
for(c in names) printf("%10s", names[c])
228
printf("%-10s | ", names[c])
230
printf("%9.2f%%", 100 * f[names[d],names[c]]/fn[names[c]])
239
x += f[names[c],names[c]]
241
y += f[names[c],names[d]]
245
printf("Total correct classifications: %9.2f%%\n\n", (100 * x)/y)
252
OUTFILE=$MXDIR/plots/`basename $1 .log`
254
DATAFILE=$OUTFILE.plotdata
258
| cut -f1,2,3 -d ' ' \
259
| awk '{count[$1]++; ecount[$1] += ($2 != $3); print (count[$1] == 1) ? "\n" : "", count[$1], ecount[$1]}' \
262
OUTPUT="set terminal x11"
263
SCALE="unset logscale"
264
PAUSE="pause -1 \"Press any key...\""
267
if [ "$o" = "ps" ]; then
268
OUTPUT="set terminal postscript"
269
OUTPRINT="set output \"$OUTFILE.ps\""
271
echo "writing $OUTFILE.ps"
272
elif [ "$o" = "logscale" ]; then
282
plot "$DATAFILE" with points
289
OUTFILE=$MXDIR/plots/$1
291
DATAFILE=$OUTFILE.plotdata
292
ERRORFILE=$OUTFILE.ploterrors
296
| awk '{print $5, $14}' \
300
| awk '{if( (($2 == $3) && ($5 > $14)) || (($2 != $3) && ($5 < $14)) ) print }' \
301
| awk '{print $5, $14}' \
304
OUTPUT="set terminal x11"
305
SCALE="unset logscale"
306
PAUSE="pause -1 \"Press any key...\""
309
if [ "$o" = "ps" ]; then
310
OUTPUT="set terminal postscript"
311
OUTPRINT="set output \"$OUTFILE.ps\""
313
echo "writing $OUTFILE.ps"
314
elif [ "$o" = "logscale" ]; then
324
plot "$DATAFILE" using 1:2 pt 1, "$ERRORFILE" using 1:2 pt 5, x
331
# end mailtest.functions