2
* Copyright (C) 2006 T�r�k Edvin <edwin@clamav.net>
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
22
#include <clamav-config.h>
26
#include <entitiesConverter.h>
28
#include <sys/types.h>
32
/* ------------ generating entity tables from .ent files ---------------- */
33
/* TODO: move this into contrib/entitynorm/ ------------*/
36
static char* extract_str(const char* l,const regmatch_t* pmatch)
38
const int len = pmatch->rm_eo - pmatch->rm_so;
43
strncpy(s, l+pmatch->rm_so, len);
48
static regex_t entity_regex;
49
static int entity_extract(const char* line,char* entity_name,int* entity_value)
53
if(regexec(&entity_regex,line,3,pmatch,0)==0) {
54
const char* entity_val;
55
strncpy(entity_name,extract_str(line,&pmatch[1]),MAX_LINE);
56
entity_val = extract_str(line,&pmatch[2]);
57
if(sscanf( entity_val,"%d",entity_value)!=1)
63
static void loadEntitiesFromFile(const char* file,struct hashtable* s,char* xt[])
66
FILE* f = fopen(file,"rt");
68
fprintf(stderr,"Unable to open file:%s",file);
72
while( fgets(line,MAX_LINE,f) ) {
73
unsigned char name[MAX_LINE];
75
int rc = entity_extract(line,name,&val);
77
printf("error during extraction:%s!",line);
82
if(elem = hashtab_find(s,name,strlen(name))) {
84
cli_dbgmsg("Overriding entity value for %s: %d -> %d\n", name, elem->data, val);
86
cli_dbgmsg("Duplicate entity value for %s:%d\n",name, elem->data);
90
if(xt[val] && strcmp(xt[val],name))
91
cli_dbgmsg("Duplicate entity reference to same code:%s->%d<-%s\n",name,val,xt[val]);
93
printf("%s:%d\n",xt[val],val);
94
xt[val] = strdup(name);
95
hashtab_insert(s,name,strlen(name),val);
102
static void init_entity_parser(void)
105
char errbuff[MAX_LINE];
106
if(( rc = regcomp(&entity_regex,".*<!ENTITY +([^ ]+) +\"&#([0-9]+);\">.*",REG_EXTENDED) )) {
107
regerror(rc,&entity_regex,errbuff,MAX_LINE);
108
fprintf(stderr,"Error compiling regex:%s\n",errbuff);
114
int main(int argc, char* argv[])
116
struct entity_conv conv;
118
struct dirent* entry;
122
memset(xt,0,65536*sizeof(xt[0]));
124
init_entity_parser();
125
hashtab_init(&ht,512);
128
fprintf(stderr,"Usage: %s <entity directory>\n",argv[0]);
133
DIR* dir = opendir(ent_dir);
135
cli_errmsg("Can't open directory\n");
139
entry = readdir(dir);
142
snprintf(buffer,4095,"%s/%s",ent_dir,entry->d_name);
144
cli_dbgmsg("Loading entities from:%s\n", entry->d_name);
145
loadEntitiesFromFile(buffer,&ht,xt);
150
FILE* f1=fopen("/tmp/test.out","w");
151
hashtab_store(&ht,f1);
153
init_entity_converter(&conv,UNKNOWN,8192);
154
FILE* f = fopen(argv[1],"rb");
156
perror("FIle not found!\n");
161
while((c=fgetc(f))!=EOF) {
162
const char* x = process_byte(&conv,c);
169
s = encoding_norm_readline(&conv, f, NULL, 8192);
174
encoding_norm_done(&conv);
177
hashtab_generate_c(&ht,"entities_htable");