3
# Copyright (C) 2002 Laird Breyer
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
# Author: Laird Breyer <laird@lbreyer.com>
23
PROGNAME2=`basename $0`
24
VERSION="$PROGNAME version @VERSION@\nTrain On Error simulator"
25
MXDIR="$PWD/mailtoe.d"
26
ALOG="$MXDIR/log/activity.log"
27
CLOG="$MXDIR/log/toe.log"
28
SLOG="$MXDIR/log/summary.log"
30
BOOTSTRAP="@PKGDATADIR@/testsuite"
31
FILTERS="$MXDIR/filters"
33
# use this for debugging
40
$PROGNAME2 prepare size
41
$PROGNAME2 add category [MBOX]...
43
$PROGNAME2 summarize [LEVEL]
44
$PROGNAME2 plot [ps|logscale]
45
$PROGNAME2 review TRUECAT PREDCAT
49
$PROGNAME2 testsuite select [FILTER]...
50
$PROGNAME2 testsuite deselect [FILTER]...
51
$PROGNAME2 testsuite list
52
$PROGNAME2 testsuite status
53
$PROGNAME2 testsuite run [plots]
54
$PROGNAME2 testsuite summarize
61
# usage: $0 seqno seed [MBOX]... -s COMMAND ARGS
63
$SIG{PIPE} = "IGNORE";
71
foreach my $g (@ARGV) {
74
} elsif( $cmd ne "" ) {
77
if( open($mbox{$g}, "<$g") ) {
86
$args =~ s|/mbox/|/$n/|g;
89
while( scalar keys %mbox > 0 ) {
90
my $j = int(rand scalar keys %mbox);
91
foreach my $f (keys %mbox) {
93
if( eof($mbox{$f}) ) {
101
open(CPIPE, "$cmd $c $args");
102
while( ($line{$f} !~ /^From /) && !eof($mbox{$f}) ) {
103
$line{$f} = readline $mbox{$f};
106
my $fromline = $line{$f};
107
$fromline =~ s/^From//;
111
print CPIPE $line{$f};
112
$line{$f} = readline $mbox{$f};
113
while( !eof($mbox{$f}) && ($line{$f} !~ /^From /) ) {
114
print CPIPE $line{$f};
115
$line{$f} = readline $mbox{$f};
118
# expect the piped command to output result without trailing newline
128
# this is the default filter
129
if [ -z "$MAILTOE_FILTER" ]; then
130
MAILTOE_FILTER="@PKGDATADIR@/dbaclB toe"
134
# begin mailtest.functions
135
# end mailtest.functions
137
# check this for environment variable overrrides
138
[ -e $HOME/.mailtoerc ] && . $HOME/.mailtoerc
141
# main switch statement - this processes commands
148
clean) # delete working tree
153
prerequisite_command "killall" "killall"
154
killall -9 -g mailtoe
157
prepare) # create directory tree
159
prepare_working_tree "$@"
161
# use parent process id to randomize
163
for i in `seq 0 $NUM`; do
164
echo $RANDOM > "$MXDIR/$i/seed"
171
if [ -z "$CATNAME" ]; then
172
echo "error: missing category name."
176
prerequisite_command "formail" "mailutils"
177
get_number_of_subsets
179
echo "=== $PROGNAME $*" >> $ALOG
182
# use formail to ensure mbox format is clean
184
cat "$@" | formail -s /bin/bash -c \
185
"cat >> $MXDIR/mbox/$CATNAME.mbox"
187
formail -s /bin/bash -c \
188
"cat >> $MXDIR/mbox/$CATNAME.mbox"
193
echo "This command is not meaningful."
198
get_number_of_subsets
199
NUM=`expr $NUM - 1` # we count from zero to NUM-1
201
STUFF="$MXDIR/log/run.stuff"
203
prerequisite_command "perl" "perl"
204
prerequisite_command "sed" "sed"
205
prerequisite_command "seq" "shellutils"
207
echo "=== $PROGNAME run $*" >> $ALOG
209
echo "# location | true | predicted | from" > $CLOG
211
for i in `seq 0 $NUM`; do
213
COMMAND="$MAILTOE_FILTER "
214
SEED=`cat $MXDIR/$i/seed`
215
CATPATHS=`for n in $CATS; do echo -ne "$MXDIR/mbox/$n "; done`
216
echo "| $COMMAND" >> $ALOG
218
mbox_multiplex $i $SEED $CATPATHS -s $COMMAND >> $CLOG
220
echo " toe $COMMAND |" >> $ALOG
226
get_number_of_subsets # includes check that directory tree is present
229
prerequisite_command "awk" "awk or equivalent"
231
echo "=== $PROGNAME summarize $*" >> $ALOG
233
if [ -s $CLOG ]; then
234
cat $CLOG | summarize_log
236
echo "Error: No results found. You must run the TOE simulations first."
243
prerequisite_command "formail" "mailutils"
244
prerequisite_command "grep" "grep"
246
if [ -z "$1" -o -z "$2" ]; then
247
echo "Error: Missing category, e.g. $PROGNAME review notspam spam"
249
review_misclassified "$MXDIR/tmp/save_msg.sh" "$1" "$2"
255
prerequisite_command "gnuplot" "gnuplot"
257
if [ -e "$MXDIR/log/toe.log" ]; then
258
plot_errors "$MXDIR/log/toe.log" "Misclassifications over time in TOE simulation\n$MAILTOE_FILTER" "$@"
266
testsuite_list_wrappers
270
testsuite_deselect_wrappers "$@"
274
testsuite_select_wrappers "$@"
278
echo -e "The following categories are ready to be TOE tested:\n"
281
echo -n "$c - counting... "
282
NUM=`grep '^From ' $MXDIR/*/$c | wc -l`
286
echo -e "\nThe following classifiers are ready to be TOE tested:\n"
289
echo "$f - `$FILTERS/$f describe`"
296
get_number_of_subsets
301
echo -ne "Now testing: "
302
"$FILTERS/$f" describe
305
"$FILTERS/$f" clean "$MXDIR"
307
# before we can classify, we need to create all the
308
# category databases - we use a dummy mailbox for this
309
for i in `seq 0 $NUM`; do
311
cat $MXDIR/mbox/dummy.mailbox | "$FILTERS/$f" learn "$MXDIR/$i/${j/.mbox/}"
315
export MAILTOE_FILTER="$FILTERS/$f toe"
320
echo "Writing results."
321
echo -e "\n---------------" >> "$SLOG"
322
"$FILTERS/$f" describe >> "$SLOG"
324
echo "---------------" >> "$SLOG"
325
"$PROGNAME" summarize >> "$SLOG"
327
if [ "$2" = "plots" ]; then
328
prerequisite_command "gnuplot" "gnuplot"
329
plot_errors "$MXDIR/plots/$f.toe.ps" "Misclassifications over time in TOE simulation\n$f" ps
336
if [ -s "$SLOG" ]; then
339
echo "Error: No results found. You must run the testsuite first."
b'\\ No newline at end of file'