22
29
//usage:#define cut_trivial_usage
23
30
//usage: "[OPTIONS] [FILE]..."
24
31
//usage:#define cut_full_usage "\n\n"
25
//usage: "Print selected fields from each input FILE to stdout\n"
32
//usage: "Print selected fields from FILEs to stdout\n"
26
33
//usage: "\n -b LIST Output only bytes from LIST"
27
34
//usage: "\n -c LIST Output only characters from LIST"
28
//usage: "\n -d CHAR Use CHAR instead of tab as the field delimiter"
29
//usage: "\n -s Output only the lines containing delimiter"
30
//usage: "\n -f N Print only these fields"
35
//usage: "\n -d SEP Field delimiter for input (default -f TAB, -F run of whitespace)"
36
//usage: "\n -O SEP Field delimiter for output (default = -d for -f, one space for -F)"
37
//usage: "\n -D Don't sort/collate sections or match -fF lines without delimiter"
38
//usage: "\n -f LIST Print only these fields (-d is single char)"
39
//usage: IF_FEATURE_CUT_REGEX(
40
//usage: "\n -F LIST Print only these fields (-d is regex)"
42
//usage: "\n -s Output only lines containing delimiter"
31
43
//usage: "\n -n Ignored"
44
//(manpage:-n with -b: don't split multibyte characters)
33
46
//usage:#define cut_example_usage
34
47
//usage: "$ echo \"Hello world\" | cut -f 1 -d ' '\n"
54
#if ENABLE_FEATURE_CUT_REGEX
58
typedef struct { int rm_eo, rm_so; } regmatch_t;
59
#define xregcomp(x, ...) *(x) = 0
60
#define regexec(...) 0
41
63
/* This is a NOEXEC applet. Be very careful! */
45
#define OPT_STR "b:c:f:d:sn"
67
#define OPT_STR "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n"
46
68
#define CUT_OPT_BYTE_FLGS (1 << 0)
47
69
#define CUT_OPT_CHAR_FLGS (1 << 1)
48
70
#define CUT_OPT_FIELDS_FLGS (1 << 2)
49
71
#define CUT_OPT_DELIM_FLGS (1 << 3)
50
#define CUT_OPT_SUPPRESS_FLGS (1 << 4)
72
#define CUT_OPT_ODELIM_FLGS (1 << 4)
73
#define CUT_OPT_SUPPRESS_FLGS (1 << 5)
74
#define CUT_OPT_NOSORT_FLGS (1 << 6)
75
#define CUT_OPT_REGEX_FLGS ((1 << 7) * ENABLE_FEATURE_CUT_REGEX)
63
82
static int cmpfunc(const void *a, const void *b)
65
84
return (((struct cut_list *) a)->startpos -
66
85
((struct cut_list *) b)->startpos);
69
static void cut_file(FILE *file, char delim, const struct cut_list *cut_lists, unsigned nlists)
88
static void cut_file(FILE *file, const char *delim, const char *odelim,
89
const struct cut_list *cut_lists, unsigned nlists)
72
92
unsigned linenum = 0; /* keep these zero-based to be consistent */
94
int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
96
if (shoe) xregcomp(®, delim, REG_EXTENDED);
74
98
/* go through every line in the file */
75
99
while ((line = xmalloc_fgetline(file)) != NULL) {
79
103
char *printed = xzalloc(linelen + 1);
80
104
char *orig_line = line;
81
105
unsigned cl_pos = 0;
84
107
/* cut based on chars/bytes XXX: only works when sizeof(char) == byte */
85
108
if (option_mask32 & (CUT_OPT_CHAR_FLGS | CUT_OPT_BYTE_FLGS)) {
86
109
/* print the chars specified in each cut list */
87
110
for (; cl_pos < nlists; cl_pos++) {
88
spos = cut_lists[cl_pos].startpos;
89
while (spos < linelen) {
111
for (spos = cut_lists[cl_pos].startpos; spos < linelen;) {
90
112
if (!printed[spos]) {
91
113
printed[spos] = 'X';
92
114
putchar(line[spos]);
95
if (spos > cut_lists[cl_pos].endpos
96
/* NON_RANGE is -1, so if below is true,
97
* the above was true too (spos is >= 0) */
98
/* || cut_lists[cl_pos].endpos == NON_RANGE */
116
if (++spos > cut_lists[cl_pos].endpos) {
104
} else if (delim == '\n') { /* cut by lines */
121
} else if (*delim == '\n') { /* cut by lines */
105
122
spos = cut_lists[cl_pos].startpos;
107
124
/* get out if we have no more lists to process or if the lines
136
151
} else { /* cut by fields */
137
int ndelim = -1; /* zero-based / one-based problem */
138
int nfields_printed = 0;
142
delimiter[0] = delim;
145
/* does this line contain any delimiters? */
146
if (strchr(line, delim) == NULL) {
147
if (!(option_mask32 & CUT_OPT_SUPPRESS_FLGS))
152
/* process each list on this line, for as long as we've got
153
* a line to process */
154
for (; cl_pos < nlists && line; cl_pos++) {
155
spos = cut_lists[cl_pos].startpos;
157
/* find the field we're looking for */
158
while (line && ndelim < spos) {
159
field = strsep(&line, delimiter);
163
/* we found it, and it hasn't been printed yet */
164
if (field && ndelim == spos && !printed[ndelim]) {
165
/* if this isn't our first time through, we need to
166
* print the delimiter after the last field that was
168
if (nfields_printed > 0)
170
fputs(field, stdout);
171
printed[ndelim] = 'X';
172
nfields_printed++; /* shouldn't overflow.. */
177
/* keep going as long as we have a line to work with,
178
* this is a list, and we're not at the end of that
180
} while (spos <= cut_lists[cl_pos].endpos && line
181
&& cut_lists[cl_pos].endpos != NON_RANGE);
152
unsigned uu = 0, start = 0, end = 0, out = 0;
155
/* Loop through bytes, finding next delimiter */
157
/* End of current range? */
158
if (end == linelen || dcount > cut_lists[cl_pos].endpos) {
159
if (++cl_pos >= nlists) break;
160
if (option_mask32 & CUT_OPT_NOSORT_FLGS)
161
start = dcount = uu = 0;
164
/* End of current line? */
166
/* If we've seen no delimiters, check -s */
167
if (!cl_pos && !dcount && !shoe) {
168
if (option_mask32 & CUT_OPT_SUPPRESS_FLGS)
170
} else if (dcount<cut_lists[cl_pos].startpos)
174
/* Find next delimiter */
176
regmatch_t rr = {-1, -1};
178
if (!regexec(®, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
185
} else if (line[end = uu++] != *delim)
188
/* Got delimiter. Loop if not yet within range. */
189
if (dcount++ < cut_lists[cl_pos].startpos) {
194
if (end != start || !shoe)
195
printf("%s%.*s", out++ ? odelim : "", end-start, line + start);
184
/* if we printed anything at all, we need to finish it with a
185
* newline cuz we were handed a chomped line */
201
/* if we printed anything, finish with newline */
197
213
/* growable array holding a series of lists */
198
214
struct cut_list *cut_lists = NULL;
199
215
unsigned nlists = 0; /* number of elements in above list */
200
char delim = '\t'; /* delimiter, default is tab */
201
216
char *sopt, *ltok;
217
const char *delim = NULL;
218
const char *odelim = NULL;
221
#define ARG "bcf"IF_FEATURE_CUT_REGEX("F")
204
222
opt = getopt32(argv, "^"
206
"\0" "b--bcf:c--bcf:f--bcf",
207
&sopt, &sopt, &sopt, <ok
223
OPT_STR // = "b:c:f:d:O:sD"IF_FEATURE_CUT_REGEX("F:")"n"
224
"\0" "b--"ARG":c--"ARG":f--"ARG IF_FEATURE_CUT_REGEX("F--"ARG),
225
&sopt, &sopt, &sopt, &delim, &odelim IF_FEATURE_CUT_REGEX(, &sopt)
227
if (!delim || !*delim)
228
delim = (opt & CUT_OPT_REGEX_FLGS) ? "[[:space:]]+" : "\t";
229
if (!odelim) odelim = (opt & CUT_OPT_REGEX_FLGS) ? " " : delim;
209
231
// argc -= optind;
211
if (!(opt & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS)))
212
bb_error_msg_and_die("expected a list of bytes, characters, or fields");
214
if (opt & CUT_OPT_DELIM_FLGS) {
215
if (ltok[0] && ltok[1]) { /* more than 1 char? */
216
bb_error_msg_and_die("the delimiter must be a single character");
233
if (!(opt & (CUT_OPT_BYTE_FLGS | CUT_OPT_CHAR_FLGS | CUT_OPT_FIELDS_FLGS | CUT_OPT_REGEX_FLGS)))
234
bb_simple_error_msg_and_die("expected a list of bytes, characters, or fields");
221
236
/* non-field (char or byte) cutting has some special handling */
222
if (!(opt & CUT_OPT_FIELDS_FLGS)) {
237
if (!(opt & (CUT_OPT_FIELDS_FLGS|CUT_OPT_REGEX_FLGS))) {
223
238
static const char _op_on_field[] ALIGN1 = " only when operating on fields";
225
240
if (opt & CUT_OPT_SUPPRESS_FLGS) {
226
241
bb_error_msg_and_die
227
("suppressing non-delimited lines makes sense%s",
242
("suppressing non-delimited lines makes sense%s", _op_on_field);
244
if (opt & CUT_OPT_DELIM_FLGS) {
231
245
bb_error_msg_and_die
232
246
("a delimiter may be specified%s", _op_on_field);
264
278
/* get the end pos */
265
279
if (ltok == NULL) {
267
281
} else if (!ltok[0]) {
270
284
e = xatoi_positive(ltok);
271
285
/* if the user specified and end position of 0,
272
286
* that means "til the end of the line" */
290
bb_error_msg_and_die("%d<%d", e, s);
275
291
e--; /* again, arrays are zero based, lines are 1 based */
280
294
/* add the new list */
281
295
cut_lists = xrealloc_vector(cut_lists, 4, nlists);
282
/* NB: startpos is always >= 0,
283
* while endpos may be = NON_RANGE (-1) */
296
/* NB: startpos is always >= 0 */
284
297
cut_lists[nlists].startpos = s;
285
298
cut_lists[nlists].endpos = e;
289
302
/* make sure we got some cut positions out of all that */
291
bb_error_msg_and_die("missing list of positions");
304
bb_simple_error_msg_and_die("missing list of positions");
293
306
/* now that the lists are parsed, we need to sort them to make life
294
307
* easier on us when it comes time to print the chars / fields / lines
296
qsort(cut_lists, nlists, sizeof(cut_lists[0]), cmpfunc);
309
if (!(opt & CUT_OPT_NOSORT_FLGS))
310
qsort(cut_lists, nlists, sizeof(cut_lists[0]), cmpfunc);