48
#define VERSION "$Id: unsort.c 1267 2007-12-23 00:59:47Z wsl $"
50
#define VERSION "$Id: unsort.c 1324 2008-06-07 20:38:28Z wsl $"
51
static uint32_t u32reverse(uint32_t i) {
52
i = (i & 0xAAAAAAAA) >> 1 | (i & 0x55555555) << 1;
53
i = (i & 0xCCCCCCCC) >> 2 | (i & 0x33333333) << 2;
54
i = (i & 0xF0F0F0F0) >> 4 | (i & 0x0F0F0F0F) << 4;
55
i = (i & 0xFF00FF00) >> 8 | (i & 0x00FF00FF) << 8;
56
return (i << 16) | (i >> 16);
59
static int u32cmp(const void *ap, const void *bp) {
60
uint32_t a = *(const uint32_t *)ap;
61
uint32_t b = *(const uint32_t *)bp;
62
return a < b ? -1 : a > b ? 1 : 0;
65
static void u32swap(uint32_t *a, uint32_t *b) {
72
53
static const struct option long_options[] = {
74
{"version", 0, 0, 'v'},
75
{"random", 0, 0, 'r'},
76
{"permutation", 0, 0, 'p'},
78
{"zero-terminated", 0, 0, 'z'},
80
{"linefeed", 0, 0, 'l'},
54
{"help\0 Print this message to stdout", 0, 0, 'h'},
55
{"version\0 Print the program version", 0, 0, 'v'},
56
{"random\0 Use a random permutation", 0, 0, 'r'},
57
{"heuristic\0 Use a heuristic permutation (default)", 0, 0, 'p'},
58
{"identity\0 Do not change the order of lines", 0, 0, 'n'},
59
{"concatenate\0 Concatenate input before shuffling", 0, 0, 'c'},
60
{"merge\0 Merge input after shuffling in given order", 0, 0, 'm'},
61
{"merge-random\0 Merge input after shuffling (default)", 0, 0, 'M'},
62
{"seed\0 <integer> Seed the permutation", 1, 0, 's'},
63
{"zero-terminated\0 Use \\0 line endings", 0, 0, 'z'},
64
{"null\0 Use \\0 line endings", 0, 0, '0'},
65
{"linefeed\0 Use \\n line endings (default)", 0, 0, 'l'},
84
69
static void usage(FILE *fh, const char *progname) {
86
"Usage: %s [-hvrpz0l] [-s <integer>] [file...]\n"
87
"\t-h, --help Print this message to stdout\n"
88
"\t-v, --version Print the program version\n"
89
"\t-r, --random Use a random permutation\n"
90
"\t-p, --heuristic Use a heuristic permutation (default)\n"
91
"\t-s, --seed <integer> Seed the permutation\n"
92
"\t-z, --zero-terminated Use \\0 line endings\n"
93
"\t-0, --null Use \\0 line endings\n"
94
"\t-l, --linefeed Use \\n line endings (default)\n",
71
fprintf(fh, "Usage: %s [-", progname);
72
for(i = 0; long_options[i].name; i++)
73
if(long_options[i].val && !long_options[i].has_arg)
74
fputc(long_options[i].val, fh);
75
fprintf(fh, "] [-s <integer>] [file...]\n");
76
for(i = 0; long_options[i].name; i++)
77
fprintf(fh, "\t-%c, --%s%s\n",
80
long_options[i].name + strlen(long_options[i].name) + 1);
104
83
int main(int argc, char **argv) {
105
84
int i, fd, option_index;
107
85
struct iovec *iov;
86
uint32_t u, numfiles, count, chunk_count, chunk_start;
87
uint32_t *tlb, *chunk_tlb;
88
filebuf_t *fb, *ds, **dd;
109
90
uint32_t seed = 0;
110
91
bool manual_seed = false;
111
algorithm_t algo = ALGO_HEURISTIC;
93
shuffle_algo_t shuffle_algo = shuffle_heuristic;
94
shuffle_algo_t shuffle_files = shuffle_random;
116
while((i = getopt_long(argc, argv, ":hvrps:z0l", long_options, &option_index)) != EOF) {
99
while((i = getopt_long(argc, argv, ":hvrpncmMs:z0l", long_options, &option_index)) != EOF) {
119
102
puts("unsort - reorder files semi-randomly");
120
103
usage(stdout, *argv);
121
104
exit(ERROR_NONE);
123
printf("unsort %s\ncopyright 2007 Wessel Dankers <wsl@fruit.je>\n", VERSION);
106
printf("unsort %s\ncopyright 2007, 2008 Wessel Dankers <wsl@fruit.je>\n", VERSION);
124
107
exit(ERROR_NONE);
109
shuffle_algo = shuffle_random;
129
algo = ALGO_HEURISTIC;
112
shuffle_algo = shuffle_heuristic;
115
shuffle_algo = shuffle_none;
122
shuffle_files = shuffle_none;
126
shuffle_files = shuffle_random;
133
129
if(optarg && *optarg) {
135
131
seed = strtoul(optarg, &end, 0);
137
133
exit_perror(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
139
135
exit_error(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
141
139
manual_seed = false;
162
numfiles = argc - optind;
169
if(!mt_init_urandom())
170
exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
171
seed = mt_genrand32();
175
dd = xalloc(numfiles * sizeof *dd);
176
ds = xalloc(numfiles * sizeof *ds);
177
tlb = (uint32_t *)ds;
179
shuffle_files(NULL, tlb, numfiles);
180
for(u = 0; u < numfiles; u++)
164
184
if(argc > optind) {
165
185
for(i = optind; i < argc; i++) {
166
188
if(strcmp(argv[i], "-")) {
167
189
fd = open(argv[i], O_RDONLY | O_LARGEFILE);
169
191
warn_perror("Can't open %s", argv[i]);
194
filebuf_init(fb, fd);
175
filebuf_add(STDIN_FILENO);
198
filebuf_init(fb, STDIN_FILENO);
179
filebuf_add(STDIN_FILENO);
202
filebuf_init(*dd, STDIN_FILENO);
182
count = iovec_parse(sep, NULL, NULL);
206
for(u = 0; u < numfiles; u++) {
208
if(iovec_parse(fb, sep, NULL, NULL)) {
210
warn_error("%s: missing linebreak at end of file – line skipped", fb->name);
212
warn_error("missing linebreak at end of input – line skipped");
221
tlb = xalloc(count * sizeof *tlb);
187
222
iov = xalloc(count * sizeof *iov);
188
tlb = xalloc(count * sizeof *tlb);
224
chunk_tlb = (uint32_t *)iov;
225
shuffle_tmp(chunk_tlb + count);
228
merge(dd, numfiles, NULL, chunk_tlb);
230
for(u = 0; u < numfiles; u++) {
232
chunk_start = fb->start;
233
chunk_count = fb->count;
234
shuffle_algo(chunk_tlb + chunk_start, tlb + chunk_start, chunk_count);
193
if(!mt_init_urandom())
194
exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
195
seed = mt_genrand32();
203
for(u = 0; u < count; u++)
204
tlb[u] = u32reverse(u ^ seed);
205
qsort(tlb, (size_t)count, sizeof *tlb, u32cmp);
206
for(u = 0; u < count; u++)
207
tlb[u] = u32reverse(tlb[u]) ^ seed;
210
for(u = 0; u < count; u++)
212
for(u = count - 1; u > 0; u--)
213
u32swap(tlb + mt_genrand32_bounded(0, u + 1), tlb + u);
217
iovec_parse(sep, iov, tlb);
237
shuffle_algo(NULL, tlb, count);
240
for(u = 0; u < numfiles; u++)
241
iovec_parse(dd[u], sep, iov, tlb);
219
243
writev_all(STDOUT_FILENO, iov, count);