~ubuntu-branches/ubuntu/oneiric/foremost/oneiric

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/* FOREMOST
 *
 * By Jesse Kornblum
 *
 * This is a work of the US Government. In accordance with 17 USC 105,
 * copyright protection is not available for any work of the US Government.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 */
 
//#define DEBUG 1
   
#ifndef __FOREMOST_H
#define __FOREMOST_H

/* Version information is defined in the Makefile */

#define AUTHOR      "Jesse Kornblum, Kris Kendall, and Nick Mikus"

/* We use \r\n for newlines as this has to work on Win32. It's redundant for
   everybody else, but shouldn't cause any harm. */
#define COPYRIGHT   "This program is a work of the US Government. "\
"In accordance with 17 USC 105,\r\n"\
"copyright protection is not available for any work of the US Government.\r\n"\
"This is free software; see the source for copying conditions. There is NO\r\n"\
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\r\n"

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include <math.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <signal.h>

/* For va_arg */
#include <stdarg.h>

#ifdef __LINUX
#include <sys/ioctl.h>
#include <sys/mount.h>
#define   u_int64_t   unsigned long long
#endif 


#ifdef __LINUX

#ifndef __USE_BSD
#define __USE_BSD
#endif
#include <endian.h>

#elif defined (__SOLARIS)

#define BIG_ENDIAN    4321
#define LITTLE_ENDIAN 1234

#include <sys/isa_defs.h>
#ifdef _BIG_ENDIAN       
#define BYTE_ORDER BIG_ENDIAN
#else
#define BYTE_ORDER LITTLE_ENDIAN
#endif

#elif defined (__WIN32)
#include <sys/param.h>

#elif defined (__MACOSX)
#include <machine/endian.h>
#define __U16_TYPE unsigned short
#endif


#define TRUE   1
#define FALSE  0
#define ONE_MEGABYTE  1048576


/* RBF - Do we need these type definitions? */ 
#ifdef __SOLARIS
#define   u_int32_t   unsigned int
#define   u_int64_t   unsigned long long
#endif 


/* The only time we're *not* on a UNIX system is when we're on Windows */
#ifndef __WIN32
#ifndef __UNIX
#define __UNIX
#endif  /* ifndef __UNIX */
#endif  /* ifndef __WIN32 */


#ifdef __UNIX

#ifndef __U16_TYPE
#define __U16_TYPE unsigned short
#endif

#include <libgen.h>

#ifndef BYTE_ORDER 

#define BIG_ENDIAN    4321
#define LITTLE_ENDIAN 1234

#define BYTE_ORDER LITTLE_ENDIAN

#endif
/* This avoids compiler warnings on older systems */
int fseeko(FILE *stream, off_t offset, int whence);
off_t ftello(FILE *stream);


#define CMD_PROMPT "$"
#define DIR_SEPARATOR   '/'
#define NEWLINE "\n"
#define LINE_LENGTH 74
#define BLANK_LINE \
"                                                                          "

#endif /* #ifdef __UNIX */

/* This allows us to open standard input in binary mode by default 
   See http://gnuwin32.sourceforge.net/compile.html for more */
#include <fcntl.h>

/* Code specific to Microsoft Windows */
#ifdef __WIN32

/* By default, Windows uses long for off_t. This won't do. We
   need an unsigned number at minimum. Windows doesn't have 64 bit
   numbers though. */
#ifdef off_t
#undef off_t
#endif
#define off_t unsigned long

#define CMD_PROMPT "c:\\>"
#define  DIR_SEPARATOR   '\\'
#define NEWLINE "\r\n"
#define LINE_LENGTH 72
#define BLANK_LINE \
"                                                                        "


/* It would be nice to use 64-bit file lengths in Windows */
#define ftello   ftell
#define fseeko   fseek

#ifndef __CYGWIN
#define  snprintf         _snprintf
#endif

#define  u_int32_t        unsigned long

/* We create macros for the Windows equivalent UNIX functions.
   No worries about lstat to stat; Windows doesn't have symbolic links */
#define lstat(A,B)      stat(A,B)

#define u_int64_t unsigned __int64

#ifndef __CYGWIN
	#define realpath(A,B)   _fullpath(B,A,PATH_MAX) 
#endif
/* Not used in md5deep anymore, but left in here in case I 
   ever need it again. Win32 documentation searches are evil.
   int asprintf(char **strp, const char *fmt, ...);
*/

char *basename(char *a);
extern char *optarg;
extern int optind;
int getopt(int argc, char *const argv[], const char *optstring);

#endif   /* ifdef _WIN32 */


/* On non-glibc systems we have to manually set the __progname variable */
#ifdef __GLIBC__
extern char *__progname;
#else
char *__progname;
#endif /* ifdef __GLIBC__ */

/* -----------------------------------------------------------------
   Program Defaults
   ----------------------------------------------------------------- */
#define MAX_STRING_LENGTH   1024
#define COMMENT_LENGTH   64

/* Modes refer to options that can be set by the user. */

#define mode_none                0
#define mode_verbose          1<<1
#define mode_quiet            1<<2
#define mode_ind_blk          1<<3
#define mode_quick            1<<4
#define mode_write_all        1<<5
#define mode_write_audit      1<<6
#define mode_multi_file	      1<<7

#define MAX_NEEDLES                   254
#define NUM_SEARCH_SPEC_ELEMENTS        6
#define MAX_SUFFIX_LENGTH               8
#define MAX_FILE_TYPES                100
#define FOREMOST_NOEXTENSION_SUFFIX "NONE"
/* Modes 3 to 31 are reserved for future use. We shouldn't use
   modes higher than 31 as Win32 can't go that high. */

#define DEFAULT_MODE              mode_none
#define DEFAULT_CONFIG_FILE       "foremost.conf"
#define DEFAULT_OUTPUT_DIRECTORY  "output"
#define AUDIT_FILE_NAME           "audit.txt"
#define FOREMOST_DIVIDER          "------------------------------------------------------------------"

#define JPEG 0
#define GIF 1
#define BMP 2
#define MPG 3
#define PDF 4
#define DOC 5
#define AVI 6
#define WMV 7
#define HTM 8
#define ZIP 9
#define MOV 10
#define XLS 11
#define PPT 12
#define WPD 13
#define CPP 14
#define OLE 15
#define GZIP 16
#define RIFF 17
#define WAV 18
#define VJPEG 19
#define SXW 20
#define SXC 21
#define SXI 22
#define CONF 23
#define PNG 24
#define RAR 25
#define EXE 26
#define ELF 27
#define REG 28
#define DOCX 29
#define XLSX 30
#define PPTX 31
#define MP4 32


#define KILOBYTE                  1024
#define MEGABYTE                  1024 * KILOBYTE
#define GIGABYTE                  1024 * MEGABYTE
#define TERABYTE                  1024 * GIGABYTE
#define PETABYTE                  1024 * TERABYTE
#define EXABYTE                   1024 * PETABYTE

#define UNITS_BYTES                     0
#define UNITS_KILOB                     1
#define UNITS_MEGAB                     2
#define UNITS_GIGAB                     3
#define UNITS_TERAB                     4
#define UNITS_PETAB                     5
#define UNITS_EXAB                      6

#define SEARCHTYPE_FORWARD      0
#define SEARCHTYPE_REVERSE      1
#define SEARCHTYPE_FORWARD_NEXT 2
#define SEARCHTYPE_ASCII        3

#define FOREMOST_BIG_ENDIAN 0
#define FOREMOST_LITTLE_ENDIAN 1
/*DEFAULT CHUNK SIZE In MB*/
#define CHUNK_SIZE 100 


/* Wildcard is a global variable because it's used by very simple
   functions that don't need the whole state passed to them */

/* -----------------------------------------------------------------
   State Variable and Global Variables
   ----------------------------------------------------------------- */
char wildcard;
typedef struct f_state 
{
  off_t mode;
  char *config_file;
  char *input_file;
  char *output_directory;
  char *start_time;
  char *invocation;
  char *audit_file_name;
  FILE *audit_file;
  int audit_file_open;
  int num_builtin;
  int chunk_size; /*IN MB*/
  int fileswritten;
  int block_size;
  int skip;
  
  int time_stamp;
} f_state;

typedef struct marker
{
    unsigned char* value;
    int len;
    size_t marker_bm_table[UCHAR_MAX+1];
}marker;

typedef struct s_spec
{
    char* suffix;
    int type;
    u_int64_t max_len;
    unsigned char* header;
    unsigned int header_len;
    size_t header_bm_table[UCHAR_MAX+1];

    unsigned char* footer;
    unsigned int footer_len;
    size_t footer_bm_table[UCHAR_MAX+1];
    marker markerlist[5];
    int num_markers;
    int searchtype;                               

    int case_sen;
    
    int found;
    
    char comment[MAX_STRING_LENGTH];/*Used for audit*/
    int written; /*used for -a mode*/
}s_spec;

s_spec search_spec[50];  /*ARRAY OF BUILTIN SEARCH TYPES*/

typedef struct f_info {
  char *file_name;
  off_t total_bytes;

  /* We never use the total number of bytes in a file, 
     only the number of megabytes when we display a time estimate */
  off_t total_megs;
  off_t bytes_read;

#ifdef __WIN32
  /* Win32 is a 32-bit operating system and can't handle file sizes
     larger than 4GB. We use this to keep track of overflows */
  off_t last_read;
  off_t overflow_count;
#endif

  FILE *handle;
  int is_stdin;
} f_info;

/* Set if the user hits ctrl-c */
int signal_caught;

/* -----------------------------------------------------------------
   Function definitions
   ----------------------------------------------------------------- */

/* State functions */

int initialize_state(f_state *s, int argc, char **argv);
void free_state(f_state *s);

char *get_invocation(f_state *s);
char *get_start_time(f_state *s);

int set_config_file(f_state *s, char *fn);
char* get_config_file(f_state *s);

int set_output_directory(f_state *s, char *fn);
char* get_output_directory(f_state *s);

void set_audit_file_open(f_state *s);
int get_audit_file_open(f_state *s);

void set_mode(f_state *s, off_t new_mode);
int get_mode(f_state *s, off_t check_mode);

int set_search_def(f_state *s,char* ft,u_int64_t max_file_size);
void get_search_def(f_state s);

void set_input_file(f_state *s,char* filename);
void get_input_file(f_state *s);

void set_chunk(f_state *s, int size);

void init_bm_table(unsigned char *needle, size_t table[UCHAR_MAX + 1], size_t len, int casesensitive,int searchtype);

void set_skip(f_state *s, int size);
void set_block(f_state *s, int size);


#ifdef __DEBUG
void dump_state(f_state *s);
#endif

/* The audit file */
int open_audit_file(f_state *s);
void audit_msg(f_state *s, char *format, ...);
int close_audit_file(f_state *s);


/* Set up our output directory */
int create_output_directory(f_state *s);
int write_to_disk(f_state *s,s_spec * needle,u_int64_t len,unsigned char* buf,  u_int64_t t_offset);
int create_sub_dirs(f_state *s);
void cleanup_output(f_state *s);

/* Configuration Files */
int load_config_file(f_state *s);


/* Helper functions */
char *current_time(void);
off_t find_file_size(FILE *f);
char *human_readable(off_t size, char *buffer);
char *units(unsigned int c);
unsigned int chop(char *buf);
void print_search_specs(f_state *s);
int memwildcardcmp(const void *s1, const void *s2,size_t n,int caseSensitive);
int charactersMatch(char a, char b, int caseSensitive);
void printx(unsigned char* buf,int start, int end);
unsigned short htos(unsigned char s[],int endian);
unsigned int htoi(unsigned char s[],int endian);
u_int64_t htoll(unsigned char s[],int endian);
int displayPosition(f_state* s,f_info* i,u_int64_t pos);


/* Interface functions 
   These functions stay the same regardless if we're using a
   command line interface or a GUI */
void fatal_error(f_state *s, char *msg);
void print_error(f_state *s, char *fn, char *msg);
void print_message(f_state *s, char *format, va_list argp);
void print_stats(f_state *s);

/* Engine */
int process_file(f_state *s);
int process_stdin(f_state *s);
unsigned char *bm_search(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
	size_t table[UCHAR_MAX + 1], int case_sen,int searchtype);
unsigned char *bm_search_skipn(unsigned char *needle, size_t needle_len,unsigned char *haystack, size_t haystack_len,
	size_t table[UCHAR_MAX + 1], int casesensitive,int searchtype, int start_pos) ;	
#endif /* __FOREMOST_H */

/* BUILTIN */
unsigned char* extract_file(f_state *s,  u_int64_t c_offset,unsigned char *foundat,  u_int64_t buflen, s_spec * needle, u_int64_t f_offset);