~ubuntu-branches/ubuntu/karmic/unsort/karmic

« back to all changes in this revision

Viewing changes to unsort.c

  • Committer: Bazaar Package Importer
  • Author(s): Guus Sliepen
  • Date: 2008-06-08 12:53:54 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20080608125354-yizugn2e7fxzxuix
Tags: 1.1.0-1
New upstream release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
3
        unsort - reorder files semi-randomly
4
 
        Copyright (C) 2007  Wessel Dankers <wsl@fruit.je>
 
4
        Copyright (C) 2007, 2008  Wessel Dankers <wsl@fruit.je>
5
5
 
6
6
        This program is free software: you can redistribute it and/or modify
7
7
        it under the terms of the GNU General Public License as published by
16
16
        You should have received a copy of the GNU General Public License
17
17
        along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
 
        $Id: unsort.c 1267 2007-12-23 00:59:47Z wsl $
 
19
        $Id: unsort.c 1324 2008-06-07 20:38:28Z wsl $
20
20
        $URL: http://rot.zo.oi/svn/wsl/src/unsort/unsort.c $
21
21
 
22
22
*****************************************************************************/
37
37
#include "error.h"
38
38
#include "filebuf.h"
39
39
#include "iovec.h"
 
40
#include "shuffle.h"
 
41
#include "merge.h"
40
42
#include "mt19937ar.h"
41
43
#include "mt19937ar_init.h"
42
44
 
45
47
#endif
46
48
 
47
49
#ifndef VERSION
48
 
#define VERSION "$Id: unsort.c 1267 2007-12-23 00:59:47Z wsl $"
 
50
#define VERSION "$Id: unsort.c 1324 2008-06-07 20:38:28Z wsl $"
49
51
#endif
50
52
 
51
 
static uint32_t u32reverse(uint32_t i) {
52
 
        i = (i & 0xAAAAAAAA) >> 1 | (i & 0x55555555) << 1;
53
 
        i = (i & 0xCCCCCCCC) >> 2 | (i & 0x33333333) << 2;
54
 
        i = (i & 0xF0F0F0F0) >> 4 | (i & 0x0F0F0F0F) << 4;
55
 
        i = (i & 0xFF00FF00) >> 8 | (i & 0x00FF00FF) << 8;
56
 
        return (i << 16) | (i >> 16);
57
 
}
58
 
 
59
 
static int u32cmp(const void *ap, const void *bp) {
60
 
        uint32_t a = *(const uint32_t *)ap;
61
 
        uint32_t b = *(const uint32_t *)bp;
62
 
        return a < b ? -1 : a > b ? 1 : 0;
63
 
}
64
 
 
65
 
static void u32swap(uint32_t *a, uint32_t *b) {
66
 
        uint32_t u;
67
 
        u = *a;
68
 
        *a = *b;
69
 
        *b = u;
70
 
}
71
 
 
72
53
static const struct option long_options[] = {
73
 
        {"help", 0, 0, 'h'},
74
 
        {"version", 0, 0, 'v'},
75
 
        {"random", 0, 0, 'r'},
76
 
        {"permutation", 0, 0, 'p'},
77
 
        {"seed", 1, 0, 's'},
78
 
        {"zero-terminated", 0, 0, 'z'},
79
 
        {"null", 0, 0, '0'},
80
 
        {"linefeed", 0, 0, 'l'},
 
54
        {"help\0             Print this message to stdout", 0, 0, 'h'},
 
55
        {"version\0          Print the program version", 0, 0, 'v'},
 
56
        {"random\0           Use a random permutation", 0, 0, 'r'},
 
57
        {"heuristic\0        Use a heuristic permutation (default)", 0, 0, 'p'},
 
58
        {"identity\0         Do not change the order of lines", 0, 0, 'n'},
 
59
        {"concatenate\0      Concatenate input before shuffling", 0, 0, 'c'},
 
60
        {"merge\0            Merge input after shuffling in given order", 0, 0, 'm'},
 
61
        {"merge-random\0     Merge input after shuffling (default)", 0, 0, 'M'},
 
62
        {"seed\0 <integer>   Seed the permutation", 1, 0, 's'},
 
63
        {"zero-terminated\0  Use \\0 line endings", 0, 0, 'z'},
 
64
        {"null\0             Use \\0 line endings", 0, 0, '0'},
 
65
        {"linefeed\0         Use \\n line endings (default)", 0, 0, 'l'},
81
66
        {0, 0, 0, 0}
82
67
};
83
68
 
84
69
static void usage(FILE *fh, const char *progname) {
85
 
        fprintf(fh,
86
 
                        "Usage: %s [-hvrpz0l] [-s <integer>] [file...]\n"
87
 
                        "\t-h, --help             Print this message to stdout\n"
88
 
                        "\t-v, --version          Print the program version\n"
89
 
                        "\t-r, --random           Use a random permutation\n"
90
 
                        "\t-p, --heuristic        Use a heuristic permutation (default)\n"
91
 
                        "\t-s, --seed <integer>   Seed the permutation\n"
92
 
                        "\t-z, --zero-terminated  Use \\0 line endings\n"
93
 
                        "\t-0, --null             Use \\0 line endings\n"
94
 
                        "\t-l, --linefeed         Use \\n line endings (default)\n",
95
 
                progname);
 
70
        int i;
 
71
        fprintf(fh, "Usage: %s [-", progname);
 
72
        for(i = 0; long_options[i].name; i++)
 
73
                if(long_options[i].val && !long_options[i].has_arg)
 
74
                        fputc(long_options[i].val, fh);
 
75
        fprintf(fh, "] [-s <integer>] [file...]\n");
 
76
        for(i = 0; long_options[i].name; i++)
 
77
                fprintf(fh, "\t-%c, --%s%s\n",
 
78
                        long_options[i].val,
 
79
                        long_options[i].name,
 
80
                        long_options[i].name + strlen(long_options[i].name) + 1);
96
81
}
97
82
 
98
 
typedef enum {
99
 
        ALGO_NONE,
100
 
        ALGO_HEURISTIC,
101
 
        ALGO_RANDOM
102
 
} algorithm_t;
103
 
 
104
83
int main(int argc, char **argv) {
105
84
        int i, fd, option_index;
106
 
        uint32_t count, u;
107
85
        struct iovec *iov;
108
 
        uint32_t *tlb;
 
86
        uint32_t u, numfiles, count, chunk_count, chunk_start;
 
87
        uint32_t *tlb, *chunk_tlb;
 
88
        filebuf_t *fb, *ds, **dd;
 
89
 
109
90
        uint32_t seed = 0;
110
91
        bool manual_seed = false;
111
 
        algorithm_t algo = ALGO_HEURISTIC;
 
92
        bool multi = true;
 
93
        shuffle_algo_t shuffle_algo = shuffle_heuristic;
 
94
        shuffle_algo_t shuffle_files = shuffle_random;
112
95
        char *end;
113
96
        int sep = '\n';
114
97
 
115
98
        opterr = 0;
116
 
        while((i = getopt_long(argc, argv, ":hvrps:z0l", long_options, &option_index)) != EOF) {
 
99
        while((i = getopt_long(argc, argv, ":hvrpncmMs:z0l", long_options, &option_index)) != EOF) {
117
100
                switch(i) {
118
101
                        case 'h':
119
102
                                puts("unsort - reorder files semi-randomly");
120
103
                                usage(stdout, *argv);
121
104
                                exit(ERROR_NONE);
122
105
                        case 'v':
123
 
                                printf("unsort %s\ncopyright 2007 Wessel Dankers <wsl@fruit.je>\n", VERSION);
 
106
                                printf("unsort %s\ncopyright 2007, 2008 Wessel Dankers <wsl@fruit.je>\n", VERSION);
124
107
                                exit(ERROR_NONE);
125
108
                        case 'r':
126
 
                                algo = ALGO_RANDOM;
 
109
                                shuffle_algo = shuffle_random;
127
110
                                break;
128
111
                        case 'p':
129
 
                                algo = ALGO_HEURISTIC;
 
112
                                shuffle_algo = shuffle_heuristic;
 
113
                                break;
 
114
                        case 'n':
 
115
                                shuffle_algo = shuffle_none;
 
116
                                break;
 
117
                        case 'c':
 
118
                                multi = false;
 
119
                                break;
 
120
                        case 'm':
 
121
                                multi = true;
 
122
                                shuffle_files = shuffle_none;
 
123
                                break;
 
124
                        case 'M':
 
125
                                multi = true;
 
126
                                shuffle_files = shuffle_random;
130
127
                                break;
131
128
                        case 's':
132
 
                                errno = 0;
133
129
                                if(optarg && *optarg) {
134
 
                                        manual_seed = true;
 
130
                                        errno = 0;
135
131
                                        seed = strtoul(optarg, &end, 0);
136
132
                                        if(errno)
137
133
                                                exit_perror(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
138
134
                                        if(end && *end)
139
135
                                                exit_error(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
 
136
                                        manual_seed = true;
140
137
                                } else {
 
138
                                        seed = UINT32_C(0);
141
139
                                        manual_seed = false;
142
 
                                        seed = UINT32_C(0);
143
140
                                }
144
141
                                break;
145
142
                        case '0':
161
158
                }
162
159
        }
163
160
 
 
161
        if(argc > optind)
 
162
                numfiles = argc - optind;
 
163
        else
 
164
                numfiles = 1;
 
165
 
 
166
        if(manual_seed) {
 
167
                mt_seed(seed);
 
168
        } else {
 
169
                if(!mt_init_urandom())
 
170
                        exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
 
171
                seed = mt_genrand32();
 
172
        }
 
173
        shuffle_seed(seed);
 
174
 
 
175
        dd = xalloc(numfiles * sizeof *dd);
 
176
        ds = xalloc(numfiles * sizeof *ds);
 
177
        tlb = (uint32_t *)ds;
 
178
 
 
179
        shuffle_files(NULL, tlb, numfiles);
 
180
        for(u = 0; u < numfiles; u++)
 
181
                dd[u] = ds + tlb[u];
 
182
 
 
183
        u = 0;
164
184
        if(argc > optind) {
165
185
                for(i = optind; i < argc; i++) {
 
186
                        fb = dd[u++];
 
187
                        *fb = filebuf_0;
166
188
                        if(strcmp(argv[i], "-")) {
167
189
                                fd = open(argv[i], O_RDONLY | O_LARGEFILE);
168
190
                                if(fd == -1) {
169
191
                                        warn_perror("Can't open %s", argv[i]);
170
192
                                        continue;
171
193
                                }
172
 
                                filebuf_add(fd);
 
194
                                filebuf_init(fb, fd);
173
195
                                close(fd);
 
196
                                fb->name = argv[i];
174
197
                        } else {
175
 
                                filebuf_add(STDIN_FILENO);
 
198
                                filebuf_init(fb, STDIN_FILENO);
176
199
                        }
177
200
                }
178
201
        } else {
179
 
                filebuf_add(STDIN_FILENO);
 
202
                filebuf_init(*dd, STDIN_FILENO);
180
203
        }
181
204
 
182
 
        count = iovec_parse(sep, NULL, NULL);
 
205
        count = 0;
 
206
        for(u = 0; u < numfiles; u++) {
 
207
                fb = dd[u];
 
208
                if(iovec_parse(fb, sep, NULL, NULL)) {
 
209
                        if(fb->name)
 
210
                                warn_error("%s: missing linebreak at end of file – line skipped", fb->name);
 
211
                        else
 
212
                                warn_error("missing linebreak at end of input – line skipped");
 
213
                }
 
214
                fb->start = count;
 
215
                count += fb->count;
 
216
        }
183
217
 
184
218
        if(!count)
185
219
                return 0;
186
220
 
 
221
        tlb = xalloc(count * sizeof *tlb);
187
222
        iov = xalloc(count * sizeof *iov);
188
 
        tlb = xalloc(count * sizeof *tlb);
189
 
 
190
 
        if(manual_seed) {
191
 
                mt_seed(seed);
 
223
 
 
224
        chunk_tlb = (uint32_t *)iov;
 
225
        shuffle_tmp(chunk_tlb + count);
 
226
 
 
227
        if(multi) {
 
228
                merge(dd, numfiles, NULL, chunk_tlb);
 
229
 
 
230
                for(u = 0; u < numfiles; u++) {
 
231
                        fb = dd[u];
 
232
                        chunk_start = fb->start;
 
233
                        chunk_count = fb->count;
 
234
                        shuffle_algo(chunk_tlb + chunk_start, tlb + chunk_start, chunk_count);
 
235
                }
192
236
        } else {
193
 
                if(!mt_init_urandom())
194
 
                        exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
195
 
                seed = mt_genrand32();
196
 
        }
197
 
 
198
 
        switch(algo) {
199
 
                case ALGO_NONE:
200
 
                        tlb = NULL;
201
 
                        break;
202
 
                case ALGO_HEURISTIC:
203
 
                        for(u = 0; u < count; u++)
204
 
                                tlb[u] = u32reverse(u ^ seed);
205
 
                        qsort(tlb, (size_t)count, sizeof *tlb, u32cmp);
206
 
                        for(u = 0; u < count; u++)
207
 
                                tlb[u] = u32reverse(tlb[u]) ^ seed;
208
 
                        break;
209
 
                case ALGO_RANDOM:
210
 
                        for(u = 0; u < count; u++)
211
 
                                tlb[u] = u;
212
 
                        for(u = count - 1; u > 0; u--)
213
 
                                u32swap(tlb + mt_genrand32_bounded(0, u + 1), tlb + u);
214
 
                        break;
215
 
        }
216
 
 
217
 
        iovec_parse(sep, iov, tlb);
 
237
                shuffle_algo(NULL, tlb, count);
 
238
        }
 
239
 
 
240
        for(u = 0; u < numfiles; u++)
 
241
                iovec_parse(dd[u], sep, iov, tlb);
218
242
 
219
243
        writev_all(STDOUT_FILENO, iov, count);
220
244