~ubuntu-branches/ubuntu/feisty/clamav/feisty

« back to all changes in this revision

Viewing changes to contrib/entitynorm/generate_entitylist.c

  • Committer: Bazaar Package Importer
  • Author(s): Kees Cook
  • Date: 2007-02-20 10:33:44 UTC
  • mto: This revision was merged to the branch mainline in revision 16.
  • Revision ID: james.westby@ubuntu.com-20070220103344-zgcu2psnx9d98fpa
Tags: upstream-0.90
Import upstream version 0.90

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 *  Copyright (C) 2006 T�r�k Edvin <edwin@clamav.net>
 
3
 *
 
4
 *  This program is free software; you can redistribute it and/or modify
 
5
 *  it under the terms of the GNU General Public License as published by
 
6
 *  the Free Software Foundation; either version 2 of the License, or
 
7
 *  (at your option) any later version.
 
8
 *
 
9
 *  This program is distributed in the hope that it will be useful,
 
10
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
11
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
12
 *  GNU General Public License for more details.
 
13
 *
 
14
 *  You should have received a copy of the GNU General Public License
 
15
 *  along with this program; if not, write to the Free Software
 
16
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 
17
 *  MA 02110-1301, USA.
 
18
 *
 
19
 */
 
20
 
 
21
 
 
22
#include <clamav-config.h>
 
23
#include <others.h>
 
24
#include <htmlnorm.h>
 
25
#include <hashtab.h>
 
26
#include <entitiesConverter.h>
 
27
#include <regex.h>
 
28
#include <sys/types.h>
 
29
#include <dirent.h>
 
30
 
 
31
#define MAX_LINE 1024
 
32
/* ------------ generating entity tables from .ent files ---------------- */
 
33
/* TODO: move this into contrib/entitynorm/ ------------*/
 
34
 
 
35
 
 
36
static char* extract_str(const char* l,const regmatch_t* pmatch)
 
37
{
 
38
   const int len = pmatch->rm_eo - pmatch->rm_so;        
 
39
   char* s ;
 
40
   if(pmatch->rm_so==-1)
 
41
           return NULL;
 
42
   s = malloc(len+1); 
 
43
   strncpy(s, l+pmatch->rm_so, len);
 
44
   s[len] = '\0';
 
45
   return s;
 
46
}
 
47
 
 
48
static regex_t entity_regex;
 
49
static int entity_extract(const char* line,char* entity_name,int* entity_value)
 
50
{
 
51
        regmatch_t pmatch[3];
 
52
 
 
53
        if(regexec(&entity_regex,line,3,pmatch,0)==0) {
 
54
                const char* entity_val;
 
55
                strncpy(entity_name,extract_str(line,&pmatch[1]),MAX_LINE); 
 
56
                entity_val  = extract_str(line,&pmatch[2]);
 
57
                if(sscanf( entity_val,"%d",entity_value)!=1)
 
58
                        return -2;
 
59
                return 1;
 
60
        }
 
61
        else return 0;  
 
62
}
 
63
static void loadEntitiesFromFile(const char* file,struct hashtable* s,char* xt[])
 
64
{
 
65
        char line[MAX_LINE];
 
66
        FILE* f = fopen(file,"rt");
 
67
        if(!f) {
 
68
                fprintf(stderr,"Unable to open file:%s",file);
 
69
                exit(3);
 
70
        }
 
71
 
 
72
        while( fgets(line,MAX_LINE,f) ) {
 
73
                unsigned char name[MAX_LINE];
 
74
                int val;
 
75
                int rc = entity_extract(line,name,&val);
 
76
                if(rc<0) {
 
77
                        printf("error during extraction:%s!",line);
 
78
                        exit(3);
 
79
                }
 
80
                else if(rc) {
 
81
                        struct element* elem;
 
82
                        if(elem = hashtab_find(s,name,strlen(name))) {
 
83
                                if(elem->data != val)
 
84
                                        cli_dbgmsg("Overriding entity value for %s: %d -> %d\n", name, elem->data, val);
 
85
                                else {
 
86
                                        cli_dbgmsg("Duplicate entity value for %s:%d\n",name, elem->data);
 
87
                                        continue;
 
88
                                }
 
89
                        }
 
90
                        if(xt[val] && strcmp(xt[val],name))
 
91
                                cli_dbgmsg("Duplicate entity reference to same code:%s->%d<-%s\n",name,val,xt[val]);
 
92
                        else if(xt[val])
 
93
                                printf("%s:%d\n",xt[val],val);
 
94
                        xt[val] = strdup(name);
 
95
                        hashtab_insert(s,name,strlen(name),val);
 
96
                }
 
97
        }
 
98
        fclose(f);
 
99
}
 
100
 
 
101
 
 
102
static void init_entity_parser(void)
 
103
{
 
104
        int rc;
 
105
        char errbuff[MAX_LINE];
 
106
        if(( rc = regcomp(&entity_regex,".*<!ENTITY +([^ ]+) +\"&#([0-9]+);\">.*",REG_EXTENDED) )) {
 
107
                regerror(rc,&entity_regex,errbuff,MAX_LINE);
 
108
                fprintf(stderr,"Error compiling regex:%s\n",errbuff);
 
109
                exit(1);
 
110
        }
 
111
}
 
112
 
 
113
 
 
114
int main(int argc, char* argv[])
 
115
{
 
116
        struct entity_conv conv;
 
117
        const char* ent_dir;
 
118
        struct dirent* entry;
 
119
        struct hashtable ht;
 
120
        char* xt[65536];
 
121
 
 
122
        memset(xt,0,65536*sizeof(xt[0]));
 
123
        cl_debug();
 
124
        init_entity_parser();
 
125
        hashtab_init(&ht,512);
 
126
 
 
127
        if(argc<2) {
 
128
                fprintf(stderr,"Usage: %s <entity directory>\n",argv[0]);
 
129
                return 1;
 
130
        }
 
131
 
 
132
        ent_dir = argv[1];
 
133
        DIR* dir = opendir(ent_dir);
 
134
        if(!dir) {
 
135
                cli_errmsg("Can't open directory\n");
 
136
                return 2;
 
137
        }
 
138
        do {
 
139
                entry = readdir(dir);
 
140
                if(entry) {
 
141
                        char buffer[4096];
 
142
                        snprintf(buffer,4095,"%s/%s",ent_dir,entry->d_name);
 
143
                        buffer[4095] = '\0';
 
144
                        cli_dbgmsg("Loading entities from:%s\n", entry->d_name);
 
145
                        loadEntitiesFromFile(buffer,&ht,xt);
 
146
                }
 
147
        } while(entry);
 
148
        closedir(dir);
 
149
#if 0   
 
150
        FILE* f1=fopen("/tmp/test.out","w");
 
151
        hashtab_store(&ht,f1);
 
152
        fclose(f1);
 
153
        init_entity_converter(&conv,UNKNOWN,8192);      
 
154
        FILE* f = fopen(argv[1],"rb");
 
155
        if(!f) {
 
156
                perror("FIle not found!\n");
 
157
                exit(1);
 
158
        }
 
159
        /*
 
160
        int c;
 
161
        while((c=fgetc(f))!=EOF) {
 
162
                const char* x = process_byte(&conv,c);
 
163
                if(x)
 
164
                        printf("%s",x);
 
165
        }
 
166
        */
 
167
        unsigned char* s;
 
168
        do{
 
169
                s = encoding_norm_readline(&conv, f, NULL, 8192);
 
170
                if(s)
 
171
                        printf("%s",s);
 
172
                free(s);
 
173
        } while(s);
 
174
        encoding_norm_done(&conv);
 
175
        fflush(stdout);
 
176
#endif  
 
177
        hashtab_generate_c(&ht,"entities_htable");
 
178
        return 0;
 
179
}
 
180