3
snowdrop - text watermarking and watermark recovery
4
---------------------------------------------------
6
Copyright (C) 2002 by Michal Zalewski <lcamtuf@coredump.cx>
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
11
(at your option) any later version.
13
As a special exception, this program may be linked with the
14
OpenSSL library, despite that library's more restrictive license.
16
English language backend.
28
#include <openssl/md5.h>
30
#include <md5global.h>
32
#define MD5_Init MD5Init
33
#define MD5_Final MD5Final
34
#define MD5_Update MD5Update
35
#endif /* USE_OPENSSL */
39
// Max synonym cache entries
43
// How often do you want typos (number of atoms)?
51
static int word_cnt; // Original term counter
52
static char* use_quot; // Close quotes using this string
53
static int indent_val=-1; // Indentation, if any
54
static char prev_punct; // Previous atom was a punctuation mark
55
static int cur_size; // Current input atom storage capacity
56
static int cur_mod; // Current input atom modification
57
static int just_testing; // Just teestiiing!
59
#define MOD_NONE 0 // No modification
60
#define MOD_SYNONYM 1 // Put a synonym
61
#define MOD_TYPO 2 // Make a typo
62
#define MOD_QUOTE 4 // Change quotes
63
#define MOD_PSPACE 5 // Add spaces
64
#define MOD_CAPS 7 // Capitalization
65
#define MOD_PERIOD 8 // ; -> .
66
#define MOD_DASH 9 // - -> 0xad
77
struct syncache scache[MAXCACHE+1];
80
struct sd_syns { char *from, *to, bid; };
81
static struct sd_syns syn[MAXWORD+1];
84
// Load synonym database, of course...
85
static void load_synonyms(void) {
86
int line=0,added=0,z,m;
90
if (syn[0].from) return;
92
if (getenv("SD_SYNONYMS")) {
93
strcpy(buf,getenv("SD_SYNONYMS"));
96
sprintf(buf,"%s/.snowdrop/synonyms",getenv("HOME"));
98
if (!f) f=fopen("/usr/share/snowdrop/synonyms","r");
99
if (!f) f=fopen("synonyms","r");
101
if (!f) fatal("cannot find synonym dictionary (%s)",buf);
103
while (fgets(buf,MAXBUF,f)) {
105
char w1[MAXBUF], w2[MAXBUF],c;
108
if (buf[strlen(buf)-1]=='\n') buf[strlen(buf)-1]=0;
109
if (strchr(buf,'#')) *strchr(buf,'#')=0;
111
while (isspace(*bcop)) bcop++;
112
if (!(*bcop)) continue;
113
if (sscanf(buf,"%s %c %s",w1,&c,w2)!=3)
114
fatal("malformed dictionary line %d [1]",line);
116
if (!strcasecmp(w1,w2)) fatal("NOOP dictionary entry at line %d",line);
119
while (syn[i].from) {
121
if (!strcasecmp(syn[i].from,w1))
122
if (!strcasecmp(syn[i].to,w2))
123
fatal("duplicate dictionary entry for %s - %s at line %d",w1,w2,line);
125
if (!strcasecmp(syn[i].from,w2))
126
if (!strcasecmp(syn[i].to,w1))
127
fatal("duplicate dictionary entry for %s - %s (reverse) at line %d",w1,w2,line);
133
for (z=0;z<strlen(w1);z++) if (isupper(w1[z])) fatal("line %d: uppercase entry (%s)",line,w1);
134
for (z=0;z<strlen(w2);z++) if (isupper(w2[z])) fatal("line %d: uppercase entry (%s)",line,w2);
136
syn[i].from=strdup(w1);
137
if (!syn[i].from) fatal("not enough memory");
138
syn[i].to=strdup(w2);
139
if (!syn[i].to) fatal("not enough memory");
141
if (c=='>') syn[i].bid=0; else
142
if (c=='|') syn[i].bid=1; else
143
fatal("malformed dictionary line %d [2]",line);
146
// Add a synonym for w1 -> w2
148
for (m=0;m<sctop;m++) if (!strcmp(scache[i].from,w1)) break;
149
if (m==sctop) sctop++;
150
if (sctop>=MAXCACHE) fatal("MAXCACHE exceeded");
152
scache[m].from=syn[i].from;
153
scache[m].to[(int)scache[m].tcnt]=syn[i].to;
155
if (scache[m].tcnt>=MAXSYN) fatal("MAXSYN for %s exceeded",w1);
157
// Add a synonym for w2 -> w1 if bid
160
for (m=0;m<sctop;m++) if (!strcmp(scache[i].from,w2)) break;
161
if (m==sctop) sctop++;
162
if (sctop>=MAXCACHE) fatal("MAXCACHE exceeded");
164
scache[m].from=syn[i].to;
165
scache[m].to[(int)scache[m].tcnt]=syn[i].from;
167
if (scache[m].tcnt>=MAXSYN) fatal("MAXSYN for %s exceeded",w2);
173
debug("[+] Loaded %d synonyms (%d lines parsed).\n",added,line);
177
static unsigned int got_md5;
179
unsigned int md5_importantstuff(void) {
181
unsigned int result[4];
184
if (got_md5) return got_md5;
186
while (syn[i].from) {
187
MD5_Update(&kuku,&i,sizeof(int));
188
MD5_Update(&kuku,syn[i].from,strlen(syn[i].from)+1);
189
MD5_Update(&kuku,&syn[i].bid,1);
190
MD5_Update(&kuku,syn[i].to,strlen(syn[i].to)+1);
191
MD5_Update(&kuku,"-|-",3);
194
MD5_Final((char*)result,&kuku);
196
return got_md5=(result[0] ^ result[1] ^ result[2] ^ result[3]);
201
// Get the number of synonyms matching the term.
202
static int lookup_syn_cnt(const char* term) {
205
if (!strcasecmp(scache[q].from,term)) return scache[q].tcnt;
212
static char caps[MAXBUF+1];
215
// Try to copy the capitalization scheme from original term...
216
// Assume that first letter is uppercase or all letters are uppercase.
217
// This if for compatibility with MOD_CAPS.
218
static char* handle_caps(const char* orig,const char* nterm) {
221
if (!nterm[0]) fatal("handle_caps with empty nterm");
225
if (isupper(orig[0])) caps[0]=toupper(caps[0]);
227
if (isupper(orig[1]) && orig[2])
228
while (caps[q]) { caps[q]=toupper(caps[q]); q++; }
234
// Ok, give me the actual num-th synonym for 'term'.
235
static char* lookup_syn_no(const char* term,const int num) {
238
if (!strcasecmp(scache[q].from,term)) return handle_caps(term,scache[q].to[num]);
245
static const char* input_data;
246
static int input_off;
247
static const char* water_data;
248
static int water_off;
250
static int cur_punct;
251
static int linesofar; // How many things in this line?
252
static int atomsofar; // How many atoms in this line?
255
void set_original(const char* buf) {
256
if (!buf) fatal("set_original(NULL)");
276
void set_watermarked(const char* buf) {
277
if (!buf) fatal("set_watermarked(NULL)");
278
if (!input_data) fatal("set_watermarked before set_original");
284
char FOOBAR[]=".f00.b4r.";
286
static char orig_buf[MAXBUF+1];
289
char* get_orig_atom(void) {
290
char* now=(char*)input_data+input_off, sth[2];
293
if (!input_data) fatal("get_orig_atom before set_original");
299
prev_punct=cur_punct;
301
if (resetinnext) { linesofar=0; atomsofar=0; resetinnext=0; }
305
while (*now == ' ' || *now == '\t') {
307
strcat(orig_buf,*now==' '?" ":"\t");
309
strcat(orig_buf,*now==' '?" ":" ");
313
nospaced=orig_buf+strlen(orig_buf);
314
// Append next one character unconditionally.
317
strcat(orig_buf,sth);
319
// Certain chars should be kept together...
320
if (!(or=='\'' || or=='`' || or==',')) or=0xff;
321
if (or==0xff && !isalnum(sth[0])) or=0xfe;
325
// Copy the rest. Stop on something that does not belong.
326
while ((or==0xff && isalnum(*now)) || (*now==or)) {
328
strcat(orig_buf,sth);
332
// If we stopped because of ' surrounded by chars, continue.
333
if (isalnum(*(now-1)) && *now=='\'' && isalnum(*(now+1))) {
335
strcat(orig_buf,sth);
340
input_off=now-input_data;
341
if (!*now) { if (!strlen(orig_buf)) return 0; }
343
if (use_quot==FOOBAR) use_quot=0;
344
if (use_quot && (*nospaced=='\'' || *nospaced=='"')) {
345
int q=strlen(nospaced);
346
strcpy(nospaced,use_quot);
347
linesofar-=strlen(nospaced)-q;
351
if (indent_val!=-1) {
352
if (orig_buf[0]==' ' && !linesofar) {
356
for (i=0;i<indent_val;i++) strcat(tmp," ");
357
strcat(tmp,nospaced);
358
strcpy(orig_buf,tmp);
359
linesofar-=indent_val;
363
cur_punct=!isalnum(*nospaced);
364
if (*nospaced=='\n') cur_punct=0;
366
if (strchr(orig_buf,'\n')) resetinnext=1;
368
linesofar+=strlen(orig_buf);
372
// Too late. Paragraph ended. You die.
373
if (!linesofar && strchr(orig_buf,'\n')) {
374
indent_val=-1; use_quot=0;
381
static char water_buf[MAXBUF+1];
383
char* get_water_atom(void) {
384
char* now=(char*)water_data+water_off, sth[2];
387
if (!water_data) fatal("get_water_atom before set_watermarked");
392
while (*now == ' ' || *now == '\t') {
393
strcat(water_buf,*now==' '?" ":"\t");
397
nospaced=water_buf+strlen(water_buf);
398
// Append next one character unconditionally.
401
strcat(water_buf,sth);
403
// Certain chars should be kept together...
404
if (!(or=='\'' || or=='`' || or==',')) or=0xff;
405
if (or==0xff && !isalnum(sth[0])) or=0xfe;
409
// Copy the rest. Stop on something that does not belong.
410
while ((or==0xff && isalnum(*now)) || (*now==or)) {
412
strcat(water_buf,sth);
416
// If we stopped because of ' surrounded by chars, continue.
417
if (isalnum(*(now-1)) && *now=='\'' && isalnum(*(now+1))) {
419
strcat(water_buf,sth);
424
water_off=now-water_data;
425
if (!*now) { if (!strlen(water_buf)) return 0; }
432
#define CHECK_STOR(siz,id) if ((siz) >= top_storage) { \
433
mod_type=(id); top_storage=(siz); }
436
static inline int storcap(int max) {
448
int get_storage(const char* orig, const int domain) {
449
const char* text=orig;
453
if (!orig) fatal("get_storage(NULL...)");
454
while (*text == ' ') {tsp++; text++; }
460
if (prev_punct && orig[0]==' ') {
461
CHECK_STOR(1,MOD_PSPACE);
465
if (strchr(orig,'\n')) {
467
if (linesofar>81) { CHECK_STOR(1,MOD_PSPACE); } else
468
if (linesofar<76) CHECK_STOR(1,MOD_PSPACE);
470
if (linesofar>81) { CHECK_STOR(4,MOD_PSPACE); } else
471
if (linesofar<79) CHECK_STOR(storcap(79-linesofar),MOD_PSPACE);
479
// FIXME: make typos a bit less predictable?
480
if (!(word_cnt % TYPORATIO) && isalnum(*text)) {
482
CHECK_STOR(4,MOD_TYPO);
484
CHECK_STOR(5,MOD_TYPO);
492
// If the word has at least two uppercase letters,
493
// either make it first-only or all uppercase. This gives us
494
// one bit and would not break synonym capitalization.
496
for (i=0;i<strlen(text);i++) {
497
if (isupper(text[i])) gu++;
499
if (gu > 1) CHECK_STOR(1,MOD_CAPS);
503
// We can ruin some ;s ;-)
504
if (*text==';') CHECK_STOR(1,MOD_PERIOD);
507
// We can ruin some -s ;-)
508
if (*text=='-' || *(unsigned char*)text==0xad) CHECK_STOR(1,MOD_DASH);
510
// We can also mess with quotes. This is good.
512
if (!strcmp(text,"''") || !strcmp(text,"`") || !strcmp(text,"'") ||
513
!strcmp(text,"``") || !strcmp(text,"\"") || !strcmp(text,",,")) CHECK_STOR(3,MOD_QUOTE);
517
case DOMAIN_SYNONYMS:
518
// Determine how many synonyms can be substituted for a word.
519
// If any, add 1 to the number (as we can left the word unchanged,
520
// as well). Now, determine largest power of two less or equal to
521
// the number we got. This is our storage capacity.
524
i=lookup_syn_cnt(text);
526
CHECK_STOR(storcap(i+1),MOD_SYNONYM);
531
default: fatal("bogus domain in get_storage");
539
static char setv[MAXBUF+1];
541
static char typovals[]="abcdefghijklmnopqrstuvwxyz0123456789";
544
char* set_value(const char* orig,int value, const int domain) {
546
const char* text=orig;
548
cap=get_storage(orig,domain);
549
if (cap <= 0) fatal("set_value with fixed atom");
550
if (cap > 16) fatal("set_value with atom of excessive storage capacity");
551
if (value >= (1<<cap)) fatal("set_value: new value exceeds storage capacity");
552
if (value < 0) fatal("set_value: new value less than zero");
555
while (*text == ' ') text++;
561
// "Bite my shiny metal ass!"
562
for (i=0;i<value+1;i++) strcat(setv," ");
568
// There is a rare potential glitch here when capitalization
569
// changes are ruined by digits. Should happen rarely, fix it
572
i=word_cnt % strlen(text);
573
if (isupper(setv[text-orig+i]))
574
setv[text-orig+i]=toupper(typovals[value]);
576
setv[text-orig+i]=typovals[value];
582
for (i=0;i<strlen(setv);i++) setv[i]=value?tolower(setv[i]):toupper(setv[i]);
583
setv[text-orig]=toupper(setv[text-orig]);
588
*strchr(setv,';')=value?';':'.';
593
foo=strchr(setv,'-');
594
if (!foo) foo=strchr(setv,0xad);
600
while (orig[i]==' ') { strcat(setv," "); i++; }
602
case 0: strcat(setv,"``"); if (!just_testing) use_quot="''"; break;
603
case 1: strcat(setv,"''"); if (!just_testing) use_quot="''"; break;
604
case 2: strcat(setv,"'"); if (!just_testing) use_quot="'"; break;
605
case 3: strcat(setv,"`"); if (!just_testing) use_quot="'"; break;
606
case 4: strcat(setv,"\""); if (!just_testing) use_quot="\""; break;
607
case 5: strcat(setv,",,"); if (!just_testing) use_quot="''"; break;
608
case 6: strcat(setv,"\xb4"); if (!just_testing) use_quot="\xb4"; break;
609
case 7: strcat(setv,"\xbd"); if (!just_testing) use_quot="\xbd"; break;
610
default: fatal("gremlins in the keyboard");
617
while (orig[i]==' ') { strcat(setv," "); i++; }
618
if (!value) { strcat(setv,text); return setv; }
619
strcat(setv,lookup_syn_no(text,value-1));
622
default: fatal("bogus mod_type in set_value");
631
char setval_copy[MAXBUF+1];
634
// Mooom! This is worse than terrible!
635
int strspcmp(const char* a,const char* b) {
636
while (isspace(*a)) a++;
637
while (isspace(*b)) b++;
638
if (!*a || !*b) return 31337; // Bleh.
643
int strspcasecmp(const char* a,const char* b) {
644
while (isspace(*a)) a++;
645
while (isspace(*b)) b++;
646
if (!*a || !*b) return 31337; // Bleh.
647
return strcasecmp(a,b);
653
int get_value(const char* orig,const char* water,int* scr,int* va,char test) {
658
if (!orig) fatal("get_value with orig==NULL");
659
if (!water) fatal("get_value with water==NULL");
660
if (!scr) fatal("get_value with sc==NULL");
661
if (!va) fatal("get_value with va==NULL");
665
// Enter dummy mode, save buffer.
668
sc[3]=get_storage(orig,3);
669
sc[2]=get_storage(orig,2);
670
sc[1]=get_storage(orig,1);
671
sc[0]=get_storage(orig,0);
673
for (i3=0;i3<(1<<sc[3]);i3++) {
676
if (!sc[3]) strcpy(b3,orig); else strcpy(b3,set_value(orig,i3,3));
678
for (i2=0;i2<(1<<sc[2]);i2++) {
680
if (!sc[2]) strcpy(b2,b3); else strcpy(b2,set_value(b3,i2,2));
682
for (i1=0;i1<(1<<sc[1]);i1++) {
684
if (!sc[1]) strcpy(b1,b2); else strcpy(b1,set_value(b2,i1,1));
686
for (i0=0;i0<(1<<sc[0]);i0++) {
688
if (!sc[0]) strcpy(b0,b1); else strcpy(b0,set_value(b1,i0,0));
689
// If final result matches...
690
// First loop: just compare
691
// Second loop: without spaces; compare with strnspcmp
692
// Third loop: without spaces and grammar; compare with strnspcmp
693
// Fourth loop: without spaces, grammar and notation; strnspcase...
694
// Note that strnspcmp and strnspcasecmp return 0 if no non-space
695
// characters are found to avoid stupid problems.
697
if ((cnt==0 && !strcmp(water,b0)) ||
698
(cnt==1 && !strspcmp(water,b1)) ||
699
(cnt==2 && !strspcmp(water,b2)) ||
700
(cnt==3 && !strspcasecmp(water,b3))) {
705
strcpy(setval_copy,setv);
707
if (sc[3]) set_value(orig,i3,3);
708
if (sc[2]) set_value(orig,i2,2);
709
if (sc[1]) set_value(orig,i1,1);
710
if (sc[0]) set_value(orig,i0,0);
711
strcpy(setv,setval_copy);
714
// Write capacities and values back...
715
*(scr++)=sc[0]; *(va++)=i0;
716
*(scr++)=sc[1]; *(va++)=i1;
717
*(scr++)=sc[2]; *(va++)=i2;
718
*(scr++)=sc[3]; *(va++)=i3;
719
if (cnt && !warned) {
720
// debug("[!] Data missing in certain channels (spell-checked or reformatted file?).\n");
731
if (cnt<=3) goto redome;
733
strcpy(setval_copy,setv);
738
if (sc[3] && !test) {
740
strcpy(wbuf,set_value(wbuf,0,3));
744
if (sc[2] && !test) {
746
strcpy(wbuf,set_value(wbuf,0,2));
750
if (sc[1] && !test) {
752
strcpy(wbuf,set_value(wbuf,0,1));
756
if (sc[0] && !test) {
758
strcpy(wbuf,set_value(wbuf,0,0));
763
strcpy(setv,setval_copy);
776
int get_water_pos(void) { return water_off; }
777
void set_water_pos(int x) { water_off=x; }
781
char* get_langdesc(void) {
783
return "fine quality technical text";
785
return "draft / e-mail quality technical text";
790
void module_help(void) {
791
debug("This module supports SD_SYNONYMS environment variable that should\n"
792
"point to an alternative 'synonyms' file, if necessary. Make sure\n"
793
"to keep the copy of used alternative file for further reference.\n");
797
void md5_wrong(void) {
798
debug("You have used more than one synonyms file to generate your watermarks.\n"
799
"Make sure you've passed the right one using SD_SYNONYMS environment\n"