~ubuntu-branches/ubuntu/quantal/9base/quantal

« back to all changes in this revision

Viewing changes to join/join.c

  • Committer: Bazaar Package Importer
  • Author(s): Daniel Baumann
  • Date: 2010-06-04 17:22:03 UTC
  • mfrom: (1.1.6 upstream)
  • Revision ID: james.westby@ubuntu.com-20100604172203-ei85j0da495sr8ut
Tags: 1:6-1
* Adding Kai as co-maintainer.
* Merging upstream version 6.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*      join F1 F2 on stuff */
 
2
#include <u.h>
 
3
#include <libc.h>
 
4
#include <stdio.h>
 
5
#include <ctype.h>
 
6
#define F1 0
 
7
#define F2 1
 
8
#define F0 3
 
9
#define NFLD    100     /* max field per line */
 
10
#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
 
11
FILE *f[2];
 
12
Rune buf[2][BUFSIZ];    /*input lines */
 
13
Rune *ppi[2][NFLD+1];   /* pointers to fields in lines */
 
14
Rune *s1,*s2;
 
15
#define j1 joinj1
 
16
#define j2 joinj2
 
17
 
 
18
int     j1      = 1;    /* join of this field of file 1 */
 
19
int     j2      = 1;    /* join of this field of file 2 */
 
20
int     olist[2*NFLD];  /* output these fields */
 
21
int     olistf[2*NFLD]; /* from these files */
 
22
int     no;             /* number of entries in olist */
 
23
Rune    sep1    = ' ';  /* default field separator */
 
24
Rune    sep2    = '\t';
 
25
char *sepstr=" ";
 
26
int     discard;        /* count of truncated lines */
 
27
Rune    null[BUFSIZ]/*  = L""*/;
 
28
int     a1;
 
29
int     a2;
 
30
 
 
31
char *getoptarg(int*, char***);
 
32
void output(int, int);
 
33
int input(int);
 
34
void oparse(char*);
 
35
void error(char*, char*);
 
36
void seek1(void), seek2(void);
 
37
Rune *strtorune(Rune *, char *);
 
38
 
 
39
 
 
40
void
 
41
main(int argc, char **argv)
 
42
{
 
43
        int i;
 
44
 
 
45
        while (argc > 1 && argv[1][0] == '-') {
 
46
                if (argv[1][1] == '\0')
 
47
                        break;
 
48
                switch (argv[1][1]) {
 
49
                case '-':
 
50
                        argc--;
 
51
                        argv++;
 
52
                        goto proceed;
 
53
                case 'a':
 
54
                        switch(*getoptarg(&argc, &argv)) {
 
55
                        case '1':
 
56
                                a1++;
 
57
                                break;
 
58
                        case '2':
 
59
                                a2++;
 
60
                                break;
 
61
                        default:
 
62
                                error("incomplete option -a","");
 
63
                        }
 
64
                        break;
 
65
                case 'e':
 
66
                        strtorune(null, getoptarg(&argc, &argv));
 
67
                        break;
 
68
                case 't':
 
69
                        sepstr=getoptarg(&argc, &argv);
 
70
                        chartorune(&sep1, sepstr);
 
71
                        sep2 = sep1;
 
72
                        break;
 
73
                case 'o':
 
74
                        if(argv[1][2]!=0 ||
 
75
                           argc>2 && strchr(argv[2],',')!=0)
 
76
                                oparse(getoptarg(&argc, &argv));
 
77
                        else for (no = 0; no<2*NFLD && argc>2; no++){
 
78
                                if (argv[2][0] == '1' && argv[2][1] == '.') {
 
79
                                        olistf[no] = F1;
 
80
                                        olist[no] = atoi(&argv[2][2]);
 
81
                                } else if (argv[2][0] == '2' && argv[2][1] == '.') {
 
82
                                        olist[no] = atoi(&argv[2][2]);
 
83
                                        olistf[no] = F2;
 
84
                                } else if (argv[2][0] == '0')
 
85
                                        olistf[no] = F0;
 
86
                                else
 
87
                                        break;
 
88
                                argc--;
 
89
                                argv++;
 
90
                        }
 
91
                        break;
 
92
                case 'j':
 
93
                        if(argc <= 2)
 
94
                                break;
 
95
                        if (argv[1][2] == '1')
 
96
                                j1 = atoi(argv[2]);
 
97
                        else if (argv[1][2] == '2')
 
98
                                j2 = atoi(argv[2]);
 
99
                        else
 
100
                                j1 = j2 = atoi(argv[2]);
 
101
                        argc--;
 
102
                        argv++;
 
103
                        break;
 
104
                case '1':
 
105
                        j1 = atoi(getoptarg(&argc, &argv));
 
106
                        break;
 
107
                case '2':
 
108
                        j2 = atoi(getoptarg(&argc, &argv));
 
109
                        break;
 
110
                }
 
111
                argc--;
 
112
                argv++;
 
113
        }
 
114
proceed:
 
115
        for (i = 0; i < no; i++)
 
116
                if (olist[i]-- > NFLD)  /* 0 origin */
 
117
                        error("field number too big in -o","");
 
118
        if (argc != 3)
 
119
                error("usage: join [-1 x -2 y] [-o list] file1 file2","");
 
120
        j1--;
 
121
        j2--;   /* everyone else believes in 0 origin */
 
122
        s1 = ppi[F1][j1];
 
123
        s2 = ppi[F2][j2];
 
124
        if (strcmp(argv[1], "-") == 0)
 
125
                f[F1] = stdin;
 
126
        else if ((f[F1] = fopen(argv[1], "r")) == 0)
 
127
                error("can't open %s", argv[1]);
 
128
        if(strcmp(argv[2], "-") == 0) {
 
129
                f[F2] = stdin;
 
130
        } else if ((f[F2] = fopen(argv[2], "r")) == 0)
 
131
                error("can't open %s", argv[2]);
 
132
 
 
133
        if(ftell(f[F2]) >= 0)
 
134
                seek2();
 
135
        else if(ftell(f[F1]) >= 0)
 
136
                seek1();
 
137
        else
 
138
                error("neither file is randomly accessible","");
 
139
        if (discard)
 
140
                error("some input line was truncated", "");
 
141
        exits("");
 
142
}
 
143
int runecmp(Rune *a, Rune *b){
 
144
        while(*a==*b){
 
145
                if(*a=='\0') return 0;
 
146
                a++;
 
147
                b++;
 
148
        }
 
149
        if(*a<*b) return -1;
 
150
        return 1;
 
151
}
 
152
char *runetostr(char *buf, Rune *r){
 
153
        char *s;
 
154
        for(s=buf;*r;r++) s+=runetochar(s, r);
 
155
        *s='\0';
 
156
        return buf;
 
157
}
 
158
Rune *strtorune(Rune *buf, char *s){
 
159
        Rune *r;
 
160
        for(r=buf;*s;r++) s+=chartorune(r, s);
 
161
        *r='\0';
 
162
        return buf;
 
163
}
 
164
/* lazy.  there ought to be a clean way to combine seek1 & seek2 */
 
165
#define get1() n1=input(F1)
 
166
#define get2() n2=input(F2)
 
167
void
 
168
seek2(void)
 
169
{
 
170
        int n1, n2;
 
171
        int top2=0;
 
172
        int bot2 = ftell(f[F2]);
 
173
        get1();
 
174
        get2();
 
175
        while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
 
176
                if(n1>0 && n2>0 && comp()>0 || n1==0) {
 
177
                        if(a2) output(0, n2);
 
178
                        bot2 = ftell(f[F2]);
 
179
                        get2();
 
180
                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
 
181
                        if(a1) output(n1, 0);
 
182
                        get1();
 
183
                } else /*(n1>0 && n2>0 && comp()==0)*/ {
 
184
                        while(n2>0 && comp()==0) {
 
185
                                output(n1, n2);
 
186
                                top2 = ftell(f[F2]);
 
187
                                get2();
 
188
                        }
 
189
                        fseek(f[F2], bot2, 0);
 
190
                        get2();
 
191
                        get1();
 
192
                        for(;;) {
 
193
                                if(n1>0 && n2>0 && comp()==0) {
 
194
                                        output(n1, n2);
 
195
                                        get2();
 
196
                                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
 
197
                                        fseek(f[F2], bot2, 0);
 
198
                                        get2();
 
199
                                        get1();
 
200
                                } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
 
201
                                        fseek(f[F2], top2, 0);
 
202
                                        bot2 = top2;
 
203
                                        get2();
 
204
                                        break;
 
205
                                }
 
206
                        }
 
207
                }
 
208
        }
 
209
}
 
210
void
 
211
seek1(void)
 
212
{
 
213
        int n1, n2;
 
214
        int top1=0;
 
215
        int bot1 = ftell(f[F1]);
 
216
        get1();
 
217
        get2();
 
218
        while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
 
219
                if(n1>0 && n2>0 && comp()>0 || n1==0) {
 
220
                        if(a2) output(0, n2);
 
221
                        get2();
 
222
                } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
 
223
                        if(a1) output(n1, 0);
 
224
                        bot1 = ftell(f[F1]);
 
225
                        get1();
 
226
                } else /*(n1>0 && n2>0 && comp()==0)*/ {
 
227
                        while(n2>0 && comp()==0) {
 
228
                                output(n1, n2);
 
229
                                top1 = ftell(f[F1]);
 
230
                                get1();
 
231
                        }
 
232
                        fseek(f[F1], bot1, 0);
 
233
                        get2();
 
234
                        get1();
 
235
                        for(;;) {
 
236
                                if(n1>0 && n2>0 && comp()==0) {
 
237
                                        output(n1, n2);
 
238
                                        get1();
 
239
                                } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
 
240
                                        fseek(f[F1], bot1, 0);
 
241
                                        get2();
 
242
                                        get1();
 
243
                                } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
 
244
                                        fseek(f[F1], top1, 0);
 
245
                                        bot1 = top1;
 
246
                                        get1();
 
247
                                        break;
 
248
                                }
 
249
                        }
 
250
                }
 
251
        }
 
252
}
 
253
 
 
254
int
 
255
input(int n)            /* get input line and split into fields */
 
256
{
 
257
        register int i, c;
 
258
        Rune *bp;
 
259
        Rune **pp;
 
260
        char line[BUFSIZ];
 
261
 
 
262
        bp = buf[n];
 
263
        pp = ppi[n];
 
264
        if (fgets(line, BUFSIZ, f[n]) == 0)
 
265
                return(0);
 
266
        strtorune(bp, line);
 
267
        i = 0;
 
268
        do {
 
269
                i++;
 
270
                if (sep1 == ' ')        /* strip multiples */
 
271
                        while ((c = *bp) == sep1 || c == sep2)
 
272
                                bp++;   /* skip blanks */
 
273
                *pp++ = bp;     /* record beginning */
 
274
                while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
 
275
                        bp++;
 
276
                *bp++ = '\0';   /* mark end by overwriting blank */
 
277
        } while (c != '\n' && c != '\0' && i < NFLD-1);
 
278
        if (c != '\n')
 
279
                discard++;
 
280
 
 
281
        *pp = 0;
 
282
        return(i);
 
283
}
 
284
 
 
285
void
 
286
output(int on1, int on2)        /* print items from olist */
 
287
{
 
288
        int i;
 
289
        Rune *temp;
 
290
        char buf[BUFSIZ];
 
291
 
 
292
        if (no <= 0) {  /* default case */
 
293
                printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
 
294
                for (i = 0; i < on1; i++)
 
295
                        if (i != j1)
 
296
                                printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
 
297
                for (i = 0; i < on2; i++)
 
298
                        if (i != j2)
 
299
                                printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
 
300
                printf("\n");
 
301
        } else {
 
302
                for (i = 0; i < no; i++) {
 
303
                        if (olistf[i]==F0 && on1>j1)
 
304
                                temp = ppi[F1][j1];
 
305
                        else if (olistf[i]==F0 && on2>j2)
 
306
                                temp = ppi[F2][j2];
 
307
                        else {
 
308
                                temp = ppi[olistf[i]][olist[i]];
 
309
                                if(olistf[i]==F1 && on1<=olist[i] ||
 
310
                                   olistf[i]==F2 && on2<=olist[i] ||
 
311
                                   *temp==0)
 
312
                                        temp = null;
 
313
                        }
 
314
                        printf("%s", runetostr(buf, temp));
 
315
                        if (i == no - 1)
 
316
                                printf("\n");
 
317
                        else
 
318
                                printf("%s", sepstr);
 
319
                }
 
320
        }
 
321
}
 
322
 
 
323
void
 
324
error(char *s1, char *s2)
 
325
{
 
326
        fprintf(stderr, "join: ");
 
327
        fprintf(stderr, s1, s2);
 
328
        fprintf(stderr, "\n");
 
329
        exits(s1);
 
330
}
 
331
 
 
332
char *
 
333
getoptarg(int *argcp, char ***argvp)
 
334
{
 
335
        int argc = *argcp;
 
336
        char **argv = *argvp;
 
337
        if(argv[1][2] != 0)
 
338
                return &argv[1][2];
 
339
        if(argc<=2 || argv[2][0]=='-')
 
340
                error("incomplete option %s", argv[1]);
 
341
        *argcp = argc-1;
 
342
        *argvp = ++argv;
 
343
        return argv[1];
 
344
}
 
345
 
 
346
void
 
347
oparse(char *s)
 
348
{
 
349
        for (no = 0; no<2*NFLD && *s; no++, s++) {
 
350
                switch(*s) {
 
351
                case 0:
 
352
                        return;
 
353
                case '0':
 
354
                        olistf[no] = F0;
 
355
                        break;
 
356
                case '1':
 
357
                case '2':
 
358
                        if(s[1] == '.' && isdigit((uchar)s[2])) {
 
359
                                olistf[no] = *s=='1'? F1: F2;
 
360
                                olist[no] = atoi(s += 2);
 
361
                                break;
 
362
                        } /* fall thru */
 
363
                default:
 
364
                        error("invalid -o list", "");
 
365
                }
 
366
                if(s[1] == ',')
 
367
                        s++;
 
368
        }
 
369
}