/******************************************** split.c copyright 1991, Michael D. Brennan This is a source file for mawk, an implementation of the AWK programming language. Mawk is distributed without warranty under the terms of the GNU General Public License, version 2, 1991. ********************************************/ /* $Log: split.c,v $ * Revision 1.3 1996/02/01 04:39:42 mike * dynamic array scheme * * Revision 1.2 1993/07/15 01:55:03 mike * rm SIZE_T & indent * * Revision 1.1.1.1 1993/07/03 18:58:21 mike * move source to cvs * * Revision 5.4 1993/05/08 18:06:00 mike * null_split * * Revision 5.3 1993/01/01 21:30:48 mike * split new_STRING() into new_STRING and new_STRING0 * * Revision 5.2 1992/07/08 21:19:09 brennan * patch2 * change in split() requires that * bi_split() call load_array() even * when cnt is 0. * * Revision 5.1 1991/12/05 07:56:31 brennan * 1.1 pre-release * */ /* split.c */ /* For all splitting up to MAX_SPLIT fields go into split_buff[], the rest go onto split_ov_list ( split overflow list) We can split one of three ways: (1) By space: space_split() and space_ov_split() (2) By regular expression: re_split() and re_ov_split() (3) By "" (null -- split into characters) null_split() and null_ov_split() */ #define TEMPBUFF_GOES_HERE #include "mawk.h" #include "symtype.h" #include "bi_vars.h" #include "bi_funct.h" #include "memory.h" #include "scan.h" #include "regexp.h" #include "field.h" SPLIT_OV *split_ov_list ; static int PROTO(re_ov_split, (char *, PTR)) ; static int PROTO(space_ov_split, (char *, char *)) ; static int PROTO(null_ov_split, (char *)) ; /* split string s of length slen on SPACE without changing s. load the pieces into STRINGS and ptrs into split_buff[] return the number of pieces */ int space_split(s, slen) register char *s ; unsigned slen ; { char *back = s + slen ; int i = 0 ; int len ; char *q ; STRING *sval ; int lcnt = MAX_SPLIT / 3 ; #define EAT_SPACE() while ( scan_code[*(unsigned char*)s] ==\ SC_SPACE ) s++ #define EAT_NON_SPACE() \ *back = ' ' ; /* sentinel */\ while ( scan_code[*(unsigned char*)s] != SC_SPACE ) s++ ;\ *back = 0 while (lcnt--) { EAT_SPACE() ; if (*s == 0) goto done ; /* mark the front with q */ q = s++ ; EAT_NON_SPACE() ; sval = split_buff[i++] = new_STRING0(len = s - q) ; memcpy(sval->str, q, len) ; EAT_SPACE() ; if (*s == 0) goto done ; q = s++ ; EAT_NON_SPACE() ; sval = split_buff[i++] = new_STRING0(len = s - q) ; memcpy(sval->str, q, len) ; EAT_SPACE() ; if (*s == 0) goto done ; q = s++ ; EAT_NON_SPACE() ; sval = split_buff[i++] = new_STRING0(len = s - q) ; memcpy(sval->str, q, len) ; } /* we've overflowed */ return i + space_ov_split(s, back) ; done: return i ; } static int space_ov_split(s, back) register char *s ; char *back ; { SPLIT_OV dummy ; register SPLIT_OV *tail = &dummy ; char *q ; int cnt = 0 ; unsigned len ; while (1) { EAT_SPACE() ; if (*s == 0) break ; /* done */ q = s++ ; EAT_NON_SPACE() ; tail = tail->link = ZMALLOC(SPLIT_OV) ; tail->sval = new_STRING0(len = s - q) ; memcpy(tail->sval->str, q, len) ; cnt++ ; } tail->link = (SPLIT_OV *) 0 ; split_ov_list = dummy.link ; return cnt ; } /* match a string with a regular expression, but only matches of positive length count */ char * re_pos_match(s, re, lenp) register char *s ; PTR re ; unsigned *lenp ; { while ((s = REmatch(s, re, lenp))) if (*lenp) return s ; else if (*s == 0) break ; else s++ ; return (char *) 0 ; } int re_split(s, re) char *s ; PTR re ; { register char *t ; int i = 0 ; unsigned mlen, len ; STRING *sval ; int lcnt = MAX_SPLIT / 3 ; while (lcnt--) { if (!(t = re_pos_match(s, re, &mlen))) goto done ; sval = split_buff[i++] = new_STRING0(len = t - s) ; memcpy(sval->str, s, len) ; s = t + mlen ; if (!(t = re_pos_match(s, re, &mlen))) goto done ; sval = split_buff[i++] = new_STRING0(len = t - s) ; memcpy(sval->str, s, len) ; s = t + mlen ; if (!(t = re_pos_match(s, re, &mlen))) goto done ; sval = split_buff[i++] = new_STRING0(len = t - s) ; memcpy(sval->str, s, len) ; s = t + mlen ; } /* we've overflowed */ return i + re_ov_split(s, re) ; done: split_buff[i++] = new_STRING(s) ; return i ; } /* we've overflowed split_buff[] , put the rest on the split_ov_list return number of pieces */ static int re_ov_split(s, re) char *s ; PTR re ; { SPLIT_OV dummy ; register SPLIT_OV *tail = &dummy ; int cnt = 1 ; char *t ; unsigned len, mlen ; while ((t = re_pos_match(s, re, &mlen))) { tail = tail->link = ZMALLOC(SPLIT_OV) ; tail->sval = new_STRING0(len = t - s) ; memcpy(tail->sval->str, s, len) ; s = t + mlen ; cnt++ ; } /* and one more */ tail = tail->link = ZMALLOC(SPLIT_OV) ; tail->sval = new_STRING(s) ; tail->link = (SPLIT_OV *) 0 ; split_ov_list = dummy.link ; return cnt ; } int null_split(s) char *s ; { int cnt = 0 ; /* number of fields split */ STRING *sval ; int i = 0 ; /* indexes split_buff[] */ while (*s) { if (cnt == MAX_SPLIT) return cnt + null_ov_split(s) ; sval = new_STRING0(1) ; sval->str[0] = *s++ ; split_buff[i++] = sval ; cnt++ ; } return cnt ; } static int null_ov_split(s) char *s ; { SPLIT_OV dummy ; SPLIT_OV *ovp = &dummy ; int cnt = 0 ; while (*s) { ovp = ovp->link = ZMALLOC(SPLIT_OV) ; ovp->sval = new_STRING0(1) ; ovp->sval->str[0] = *s++ ; cnt++ ; } ovp->link = (SPLIT_OV *) 0 ; split_ov_list = dummy.link ; return cnt ; } /* split(s, X, r) split s into array X on r entry: sp[0] holds r sp[-1] pts at X sp[-2] holds s */ CELL * bi_split(sp) register CELL *sp ; { int cnt ; /* the number of pieces */ if (sp->type < C_RE) cast_for_split(sp) ; /* can be C_RE, C_SPACE or C_SNULL */ sp -= 2 ; if (sp->type < C_STRING) cast1_to_s(sp) ; if (string(sp)->len == 0) /* nothing to split */ cnt = 0 ; else switch ((sp + 2)->type) { case C_RE: cnt = re_split(string(sp)->str, (sp + 2)->ptr) ; break ; case C_SPACE: cnt = space_split(string(sp)->str, string(sp)->len) ; break ; case C_SNULL: /* split on empty string */ cnt = null_split(string(sp)->str) ; break ; default: bozo("bad splitting cell in bi_split") ; } free_STRING(string(sp)) ; sp->type = C_DOUBLE ; sp->dval = (double) cnt ; array_load((ARRAY) (sp + 1)->ptr, cnt) ; return sp ; }