1
/* ifile - intelligent mail filter for EXMH/MH
2
ifile is Copyright (C) 1997 Jason Rennie <jrennie@ai.mit.edu>
4
This program is free software; you can redistribute it and/or
5
modify it under the terms of the GNU General Public License
6
as published by the Free Software Foundation; either version 2
7
of the License, or (at your option) any later version.
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
GNU General Public License for more details.
14
You should have received a copy of the GNU General Public License
15
along with this program (see file 'COPYING'); if not, write to the Free
16
Software Foundation, Inc., 59 Temple Place - Suite 330,
17
Boston, MA 02111-1307, USA.
22
#include <ifile.h> /* main ifile function library */
24
/* variables for keeping track of time/speed of ifile */
25
extern clock_t DMZ_start, DMZ_end, DMZ2_start;
27
/* returns a hash value for the string S */
28
/* written by Jason Rennie <jrennie@ai.mit.edu> */
30
hash (const char * s, long int size)
34
if (s == NULL) return 0;
36
for(hashval=0; *s!='\0'; s++)
37
hashval = (*s + (hashval << 5) - hashval) % size;
43
/* Given a printf style format string and an arbitrarily long list of
44
* arguments which are in accordance with the format string, ifile_sprintf
45
* will allocate memory for and create a string according to the given
47
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile. */
49
ifile_sprintf (char * format, ...)
51
char buf[MAX_STR_LEN] = ""; /* holds string to be returned by function */
56
assert(vsprintf(buf, format, ap) < MAX_STR_LEN - 1);
59
rtn = malloc(strlen(buf)+1);
66
/* Returns a string which is the concatenation of an arbitrary number
67
* of strings passed as arguments to the function.
68
* First argumet passed to function is the number of strings passed to
69
* the function which are to be concatenated. */
70
/* written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
72
ifile_cats (long int num_strings, ...)
75
long int string_size = 0;
76
char * new_string = NULL;
79
va_start(ap, num_strings);
80
for (i=0; i < num_strings; i++)
81
string_size += strlen(va_arg(ap, char *));
84
new_string = malloc(string_size+1);
85
if (!new_string) abort();
88
va_start(ap, num_strings);
89
for (i=0; i < num_strings; i++)
90
strcat(new_string, va_arg(ap, char *));
97
/* Given an integer value, allocates space for and returns a string
98
* representing the integer in character form */
99
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
101
itoa (long int number)
103
linked_list * list = NULL;
104
linked_list * new_digit = NULL;
105
linked_list * list_ptr = NULL, * old_list_ptr = NULL;
106
char buf[MAX_STR_LEN];
107
long int negative = FALSE;
113
tmp = malloc(strlen("0") + 1);
121
number = abs(number);
125
/* Loads digits into stack-like linked list */
129
new_digit = (linked_list *) malloc(sizeof(linked_list));
130
if (!new_digit) abort();
131
new_digit->next = list;
132
new_digit->digit = number - ((number/10)*10);
137
/* Removes digits from list, copying them into the string to be returned */
140
if (negative == TRUE)
146
for (; i < MAX_STR_LEN - 1; i++)
148
buf[i] = 48 + list_ptr->digit;
149
old_list_ptr = list_ptr;
150
list_ptr = list_ptr -> next;
152
if (list_ptr == NULL) break;
155
tmp = malloc(strlen(buf) + 1);
163
* Reads up to and including the next feedline (\n). Returns a pointer to
164
* STRING on success, and NULL on EOF or error. Updates bufp also.
166
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
168
readline (char **bufp)
170
char *first = *bufp, *last;
172
last = strchr(first, '\n');
184
/* Wrapper for standard free() function. Frees memory and then sets
185
* pointer equal to NULL. */
186
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
188
/* This seems like a big noop that's costing us efficiency -- jhbrown */
190
ifile_free (void * var)
197
/* Accepts a pointer to a message and prints out all the words of that
200
ifile_print_message (htable * message)
204
for (elem = htable_init_traversal(message);
205
elem != NULL; elem = htable_next_traversal(message, elem))
206
printf("(%ld,%s) ", (long int) elem->entry, (char *) elem->index);
211
/* Accepts a file pointer and reads/lexes text from the associated file.
212
* Returns a hash table which maps words appearing in the message to
213
* their frequency in the message. */
214
/* The GOOD code! This uses Andrew's cool lexing code :) */
215
/* Written by Jason Rennie <jrennie@ai.mit.edu> and others for ifile */
217
ifile_read_message (FILE * FP)
219
ifile_lex * document;
220
char token[MAX_STR_LEN];
221
long int token_len; /* length of token */
222
long int old_freq; /* previous frequency of word */
223
htable * message = malloc(sizeof(htable));
224
if (!message) abort();
226
ifile_verbosify(ifile_verbose, "Reading message...\n");
227
htable_init(message, 100, (unsigned long (*)(const void *, long int)) hash);
229
DMZ2_start = clock();
231
document = ifile_default_lexer->open_text_fp (ifile_default_lexer, FP);
235
token_len = ifile_default_lexer->get_word (ifile_default_lexer, document,
237
while (token_len != 0)
239
ifile_verbosify(ifile_debug, "Read \'%s\'. length=%d\n", token,
241
/* update arrays which strictly concern message */
242
old_freq = (long int) htable_lookup(message, (void *) token);
243
htable_put(message, ((char *) token),
244
(void *) (old_freq+1));
246
token_len = ifile_default_lexer->get_word (ifile_default_lexer,
247
document, token, MAX_STR_LEN);
250
ifile_default_lexer->close (ifile_default_lexer, document);
254
ifile_verbosify(ifile_quiet, "Unable to read message.\n");
259
ifile_verbosify(ifile_debug, "\n");
261
ifile_verbosify(ifile_verbose,
262
"Finishing reading message. Time used: %.3f sec\n",
263
((float)(DMZ_end-DMZ2_start))/CLOCKS_PER_SECOND);
269
/* Given a hash table representing a message, changes all non-zero
270
* frequency values to zero */
272
ifile_bitify_document(htable * message)
276
for (elem = htable_init_traversal(message);
277
elem != NULL; elem = htable_next_traversal(message, elem))
278
if ((long int) elem->entry > 0)
279
elem->entry = (void *) 1U;
283
/* Given an array of categories and their respective ratings, prints
284
* the information to the given file */
285
/* Written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
287
ifile_print_ratings (FILE * FP, category_rating * ratings, ifile_db * idata,
292
for (i = 0; i < idata->num_folders; i++)
293
fprintf(FP, "%s %.8f\n", ratings[i].category, ratings[i].rating);
296
idata->num_folders > 0 &&
297
(ratings[0].rating + ratings[1].rating) != 0)
299
fprintf(FP, "diff[%s,%s](%%) %.2f\n",
300
ratings[0].category, ratings[1].category,
301
-(ratings[0].rating - ratings[1].rating) /
302
(ratings[0].rating + ratings[1].rating) * 100);
304
fprintf(FP, "---------\n");
307
/* Written by Karl Vogel <vogelke@dnaco.net> for ifile */
309
ifile_concise_ratings (char * path, FILE * FP, category_rating * ratings,
310
ifile_db * idata, int thresh)
312
//if (path) fprintf (FP, "%s ", path);
318
idata->num_folders > 0 &&
319
(ratings[0].rating + ratings[1].rating) != 0)
321
diff = -(ratings[0].rating - ratings[1].rating) /
322
(ratings[0].rating + ratings[1].rating) * 1000;
324
if (path) fprintf (FP, "%s ", path);
325
if (idata->num_folders > 0 && diff < thresh)
326
fprintf (FP, "%s,%s\n", ratings[0].category, ratings[1].category);
328
fprintf (FP, "%s\n", ratings[0].category);
331
/* Returns a pointer to a new string that is an exact duplicate of the
332
* string pointed to by the s1 parameter. The malloc() function is
333
* used to allocate space for the new string. */
334
/* written by Jason Rennie <jrennie@ai.mit.edu> for ifile */
336
ifile_strdup (const char *s1)
338
char *s = (char *) malloc(strlen(s1)+1);