2
* duff - Duplicate file finder
3
* Copyright (c) 2005 Camilla Berglund <elmindreda@elmindreda.org>
5
* This software is provided 'as-is', without any express or implied
6
* warranty. In no event will the authors be held liable for any
7
* damages arising from the use of this software.
9
* Permission is granted to anyone to use this software for any
10
* purpose, including commercial applications, and to alter it and
11
* redistribute it freely, subject to the following restrictions:
13
* 1. The origin of this software must not be misrepresented; you
14
* must not claim that you wrote the original software. If you use
15
* this software in a product, an acknowledgment in the product
16
* documentation would be appreciated but is not required.
18
* 2. Altered source versions must be plainly marked as such, and
19
* must not be misrepresented as being the original software.
21
* 3. This notice may not be removed or altered from any source
27
/* Shorthand macro for gettext.
29
#define _(String) gettext(String)
31
/* Only use __attribute__ on GCC and compatible compilers */
33
#define __attribute__(x)
36
/* The number of bytes to use as read buffer when reading files.
37
* NOTE: This must be at least 1 and should likely be multiples of 4096.
39
#define BUFFER_SIZE 8192
41
/* The number of bytes to sample from the beginning of potential duplicates.
42
* NOTE: This must be at least 1 but likely not larger than 4096.
44
#define SAMPLE_SIZE 4096
46
/* The number of bits of file size to use as bucket index.
47
* NOTE: This must be at least 1.
51
/* Typedefs for structs and enums.
53
typedef enum Status Status;
54
typedef enum SymlinkMode SymlinkMode;
55
typedef enum Function Function;
56
typedef struct File File;
57
typedef struct FileList FileList;
59
/* Status modes for files.
63
/* The file has been stat:d but its data has not been touched.
66
/* The beginning of the file has been hashed.
69
/* The entire file has been hashed.
72
/* An error ocurred when reading from the file.
75
/* The file has been reported as a duplicate.
80
/* Symlink dereferencing modes.
84
/* Do not dereference any directory symlinks.
87
/* Dereference all directory symlinks encountered.
90
/* Dereference only those directory symlinks listed on the command line.
95
/* Represents a collected file and potential duplicate.
108
/* Represents a list of files.
117
/* Message digest functions.
127
/* These are defined and documented in dufffile.c */
128
void init_file(File* file, const char* path, const struct stat* sb);
129
void free_file(File* file);
130
int compare_files(File* first, File* second);
131
void generate_file_digest(File* file);
133
/* These are defined and documented in duffutil.c */
134
void init_file_list(FileList* list);
135
File* alloc_file(FileList* list);
136
void empty_file_list(FileList* list);
137
void free_file_list(FileList* list);
138
char* read_path(FILE* stream);
139
void kill_trailing_slashes(char* path);
140
size_t get_field_terminator(void);
141
void set_digest_function(Function function);
142
size_t get_digest_size(void);
143
void digest_init(void);
144
void digest_update(const void* data, size_t size);
145
void digest_finish(uint8_t* digest);
146
void error(const char* format, ...) __attribute__((format(printf, 1, 2))) __attribute__((noreturn));
147
void warning(const char* format, ...) __attribute__((format(printf, 1, 2)));
148
int cluster_header_uses_digest(const char* format);
149
void print_cluster_header(const char* format,
153
const uint8_t* digest);
155
/* These are defined and documented in duffdriver.c */
156
void process_args(int argc, char** argv);