3
# Copyright (C) 2002 Laird Breyer
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful,
11
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
# GNU General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program; if not, write to the Free Software
17
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
# Author: Laird Breyer <laird@lbreyer.com>
23
PROGNAME2=`basename $0`
24
VERSION="$PROGNAME version @VERSION@\nFull Online Ordered Training simulator"
25
MXDIR="$PWD/mailfoot.d"
26
ALOG="$MXDIR/log/activity.log"
27
CLOG="$MXDIR/log/foot.log"
28
SLOG="$MXDIR/log/summary.log"
30
BOOTSTRAP="@PKGDATADIR@/testsuite"
31
FILTERS="$MXDIR/filters"
33
# use this for debugging
40
$PROGNAME2 prepare size
41
$PROGNAME2 add category [MBOX]...
43
$PROGNAME2 summarize [LEVEL]
44
$PROGNAME2 plot [ps|logscale]
45
$PROGNAME2 review TRUECAT PREDCAT
49
$PROGNAME2 testsuite select [FILTER]...
50
$PROGNAME2 testsuite deselect [FILTER]...
51
$PROGNAME2 testsuite list
52
$PROGNAME2 testsuite status
53
$PROGNAME2 testsuite run [plots]
54
$PROGNAME2 testsuite summarize
61
# usage: $0 seqno seed [MBOX]... -s COMMAND ARGS
63
$SIG{PIPE} = "IGNORE";
71
foreach my $g (@ARGV) {
74
} elsif( $cmd ne "" ) {
77
if( open($mbox{$g}, "<$g") ) {
86
$args =~ s|/mbox/|/$n/|g;
89
while( scalar keys %mbox > 0 ) {
90
my $j = int(rand scalar keys %mbox);
91
foreach my $f (keys %mbox) {
93
if( eof($mbox{$f}) ) {
101
open(CPIPE, "$cmd $c $args");
102
while( ($line{$f} !~ /^From /) && !eof($mbox{$f}) ) {
103
$line{$f} = readline $mbox{$f};
106
my $fromline = $line{$f};
107
$fromline =~ s/^From//;
111
print CPIPE $line{$f};
112
$line{$f} = readline $mbox{$f};
113
while( !eof($mbox{$f}) && ($line{$f} !~ /^From /) ) {
114
print CPIPE $line{$f};
115
$line{$f} = readline $mbox{$f};
118
# expect the piped command to output result without trailing newline
128
# this is the default filter
129
if [ -z "$MAILFOOT_FILTER" ]; then
130
MAILFOOT_FILTER="@PKGDATADIR@/dbaclB foot"
134
# begin mailtest.functions
135
# end mailtest.functions
137
# check this for environment variable overrrides
138
[ -e $HOME/.mailfootrc ] && . $HOME/.mailfootrc
141
# main switch statement - this processes commands
148
clean) # delete working tree
153
prerequisite_command "killall" "killall"
154
killall -9 -g mailfoot
157
prepare) # create directory tree
159
prepare_working_tree "$@"
161
# use parent process id to randomize
163
for i in `seq 0 $NUM`; do
164
echo $RANDOM > "$MXDIR/$i/seed"
171
if [ -z "$CATNAME" ]; then
172
echo "error: missing category name."
176
prerequisite_command "formail" "mailutils"
177
get_number_of_subsets
179
echo "=== $PROGNAME $*" >> $ALOG
182
# use formail to ensure mbox format is clean
184
cat "$@" | formail -s /bin/bash -c \
185
"cat >> $MXDIR/mbox/$CATNAME.mbox"
187
formail -s /bin/bash -c \
188
"cat >> $MXDIR/mbox/$CATNAME.mbox"
193
echo "This command is not meaningful."
198
get_number_of_subsets
199
NUM=$(($NUM - 1)) # we count from zero to NUM-1
201
STUFF="$MXDIR/log/run.stuff"
203
prerequisite_command "perl" "perl"
204
prerequisite_command "sed" "sed"
205
prerequisite_command "seq" "shellutils"
207
echo "=== $PROGNAME run $*" >> $ALOG
210
echo "# location | true | predicted | from" > $CLOG
212
for i in `seq 0 $NUM`; do
214
COMMAND="$MAILFOOT_FILTER "
215
SEED=`cat $MXDIR/$i/seed`
216
CATPATHS=`for n in $CATS; do echo -ne "$MXDIR/mbox/$n "; done`
217
echo "| $COMMAND" >> $ALOG
219
mbox_multiplex $i $SEED $CATPATHS -s $COMMAND >> $CLOG
221
echo " toe $COMMAND |" >> $ALOG
227
get_number_of_subsets # includes check that directory tree is present
230
prerequisite_command "awk" "awk or equivalent"
232
echo "=== $PROGNAME summarize $*" >> $ALOG
234
if [ -s $CLOG ]; then
235
cat $CLOG | summarize_log
237
echo "Error: No results found. You must run the FOOT simulations first."
244
prerequisite_command "formail" "mailutils"
245
prerequisite_command "grep" "grep"
247
if [ -z "$1" -o -z "$2" ]; then
248
echo "Error: Missing category, e.g. $PROGNAME review notspam spam"
250
review_misclassified "$MXDIR/tmp/save_msg.sh" "$1" "$2"
256
prerequisite_command "gnuplot" "gnuplot"
258
if [ -e "$MXDIR/log/foot.log" ]; then
259
plot_errors "$MXDIR/log/foot.log" "Misclassifications over time in FOOT simulation\n$MAILFOOT_FILTER" "$@"
267
testsuite_list_wrappers
271
testsuite_deselect_wrappers "$@"
275
testsuite_select_wrappers "$@"
279
echo -e "The following categories are ready to be FOOT tested:\n"
282
echo -n "$c - counting... "
283
NUM=`grep '^From ' $MXDIR/*/$c | wc -l`
287
echo -e "\nThe following classifiers are ready to be FOOT tested:\n"
290
echo "$f - `$FILTERS/$f describe`"
297
get_number_of_subsets
302
echo -ne "Now testing: "
303
"$FILTERS/$f" describe
306
"$FILTERS/$f" clean "$MXDIR"
308
# before we can classify, we need to create all the
309
# category databases - we use a dummy mailbox for this
310
for i in `seq 0 $NUM`; do
312
cat $MXDIR/mbox/dummy.mailbox | "$FILTERS/$f" learn "$MXDIR/$i/${j/.mbox/}"
316
export MAILFOOT_FILTER="$FILTERS/$f foot"
321
echo "Writing results."
322
echo -e "\n---------------" >> "$SLOG"
323
"$FILTERS/$f" describe >> "$SLOG"
325
echo "---------------" >> "$SLOG"
326
"$PROGNAME" summarize >> "$SLOG"
328
if [ "$2" = "plots" ]; then
329
prerequisite_command "gnuplot" "gnuplot"
330
plot_errors "$MXDIR/plots/$f.foot.ps" "Misclassifications over time in FOOT simulation\n$f" ps
337
if [ -s "$SLOG" ]; then
340
echo "Error: No results found. You must run the testsuite first."
b'\\ No newline at end of file'