1
/* ploticus data display engine. Software, documentation, and examples.
2
* Copyright 1998-2002 Stephen C. Grubb (scg@jax.org).
3
* Covered by GPL; see the file ./Copyright for details. */
5
/* PROC TABULATE - do frequency distributions, 1-D or 2-D */
7
/* Uses data read by most recent getdata; result becomes "current data set"
9
scg 1/11/00 - added support for special units even when categories not in use
10
scg 1/21/00 - changed tab[][], total[], and grantotal from int to double.
11
added accumfield option.
12
scg 1/21/00 - for 2-way dist, column tags are used to set getdata field names
23
static int fsort(), freqsort();
28
char attr[40], val[256];
34
double tab[MAXROWS][MAXCOLS];
35
double total[2][MAXROWS];
37
char list[2][MAXROWS][WORDLEN];
43
int j, m, fld, nlist[3];
45
int order[2][MAXROWS], valuesgiven[2], nval, dopercents;
46
int forcevertical, forcehorizontal;
47
/* int outputbyfreq[2], reverse[2]; */
53
char valuelist[2][256];
69
char hival[80], lowval[80];
70
double hiv[MAXBINS], lowv[MAXBINS];
75
char rangespec[2][80];
76
int axisset[2]; /* added 1/11/00 scg */
77
char tag[80]; /* added 1/11/00 scg */
78
char fieldnamelist[256]; /* added 1/21/00 scg */
83
TDH_errprog( "pl proc tabulate" );
87
field[0] = -1; field[1] = -1;
89
valuesgiven[0] = valuesgiven[1] = 0;
91
forcevertical = 1; forcehorizontal = 0;
92
ordering[0] = '?'; ordering[1] = '?';
93
strcpy( savetable, "" );
94
gran[0] = gran[1] = 0.0;
95
strcpy( selectex, "" );
96
doranges[0] = doranges[1] = 0;
99
strcpy( rangesepchar, "-" );
100
strcpy( showrange, "" );
101
strcpy( rangespec[0], "" );
102
strcpy( rangespec[1], "" );
103
axisset[0] = axisset[1] = 0; /* was axis1 or axis2 set explicitly - added 1/11/00 scg */
107
/* get attributes.. */
110
line = getnextattr( first, attr, val, &lvp, &nt );
111
if( line == NULL ) break;
113
lineval = &line[lvp];
115
if( stricmp( attr, "datafield1" )==0 ) {
116
field[0] = fref( val ) - 1;
119
else if( stricmp( attr, "datafield2" )==0 ) {
120
field[1] = fref( val ) - 1;
123
else if( stricmp( attr, "accumfield" )==0 ) {
124
accumfield = fref( val ) -1;
126
else if( stricmp( attr, "valuelist1" )==0 ) strcpy( valuelist[0], lineval );
127
else if( stricmp( attr, "valuelist2" )==0 ) strcpy( valuelist[1], lineval );
128
else if( stricmp( attr, "doranges1" )==0 ) {
129
if( strnicmp( val, YESANS, 1 )==0 ) doranges[0] = 1;
130
else doranges[0] = 0;
132
else if( stricmp( attr, "doranges2" )==0 ) {
133
if( strnicmp( val, YESANS, 1 )==0 ) doranges[1] = 1;
134
else doranges[1] = 0;
136
else if( stricmp( attr, "showrangelowonly" )==0 ) {
137
if( strnicmp( val, YESANS, 1 )==0 ) strcpy( showrange, "low" );
139
else if( stricmp( attr, "showrange" )==0 ) strcpy( showrange, val );
140
else if( stricmp( attr, "rangesepchar" )==0 ) strcpy( rangesepchar, val );
141
else if( stricmp( attr, "order1" )==0 ) {
142
if( strnicmp( val, "mag", 3 )==0 ) ordering[0] = 'm';
143
else if( strnicmp( val, "rev", 3 )==0 ) ordering[0] = 'r';
144
else if( strnicmp( val, "nat", 3 )==0 ) ordering[0] = 'n';
145
else if( stricmp( val, "none" )==0 ) ordering[0] = '0';
147
else if( stricmp( attr, "ordering2" )==0 ) {
148
if( strnicmp( val, "mag", 3 ) == 0 ) ordering[1] = 'm';
149
else if( strnicmp( val, "rev", 3 ) == 0 ) ordering[1] = 'r';
150
else if( strnicmp( val, "nat", 3 ) == 0 ) ordering[1] = 'n';
151
else if( stricmp( val, "none" ) == 0 ) ordering[1] = '0';
154
else if( stricmp( attr, "percents" )==0 ) {
155
if( strnicmp( val, YESANS, 1 )==0 ) dopercents = 1;
158
else if( stricmp( attr, "savetable" )==0 ) strcpy( savetable, val );
160
else if( stricmp( attr, "select" )==0 ) strcpy( selectex, lineval );
162
else if( stricmp( attr, "axis1" )==0 ) {
163
axis[0] = tolower(val[0]);
166
else if( stricmp( attr, "axis2" )==0 ) {
167
axis[1] = tolower(val[0]);
171
else if( stricmp( attr, "rangespec1" )==0 ) strcpy( rangespec[0], lineval );
172
else if( stricmp( attr, "rangespec2" )==0 ) strcpy( rangespec[1], lineval );
174
else Eerr( 1, "tabulate attribute not recognized", attr );
178
/* overrides and degenerate cases */
179
/* -------------------------- */
180
if( field[0] < 0 ) return( Eerr( 4984, "datafield1 must be specified", "" ) );
182
if( ordering[0] == '?' ) {
183
if( valuelist[0][0] != '\0' ) ordering[0] = '0';
184
else ordering[0] = 'n';
186
if( ordering[1] == '?' ) {
187
if( valuelist[1][0] != '\0' ) ordering[1] = '0';
188
else ordering[1] = 'n';
193
/* now do the computation work.. */
194
/* -------------------------- */
196
if( savetable[0] != '\0' ) {
197
if( stricmp( savetable, "stdout" )==0 ) sfp = stdout;
198
else if( stricmp( savetable, "stderr" )==0 ) sfp = stderr;
199
else sfp = fopen( savetable, "w" ); /* diagnostics */
200
if( sfp == NULL ) sfp = Diagfp;
204
/* initialize tables */
206
for( i = 0; i < MAXROWS; i++ ) {
207
if( i < 2 )nlist[i] = 0;
208
for( j = 0; j < MAXCOLS; j++ ) { tab[i][j] = 0.0; if( i < 2 ) { total[i][j] = 0.0; } }
209
order[0][i] = order[1][i] = i;
212
/* get value list (a comma or space delimited list of values; output distribution will
213
be for only these values (in the order given?) */
214
for( j = 0; j < ndim; j++ ) { /* for all dimensions (1 or 2).. */
215
if( valuelist[j][0] != '\0' ) {
217
for( i = 0, ix = 0; ; i++ ) {
218
if( GL_getseg( tmp, valuelist[j], &ix, " ," ) ) break;
219
strcpy( list[j][i], tmp );
221
if( doranges[j] ) { /* find lowv and hiv */
222
if( i >= MAXBINS ) return( Eerr( 9285, "Sorry, max number of bins exceeded","" ) );
224
GL_getseg( lowval, tmp, &ixx, rangesepchar );
225
strcpy( hival, &tmp[ixx] );
226
if( stricmp( lowval, "c" )==0 && i > 0 ) lowv[i] = hiv[i-1]; /* contiguous*/
227
else lowv[i] = Econv( axis[j], lowval );
228
if( Econv_error( ) ) { /* a non-conformant value.. */
229
lowv[i] = PLHUGE; /* so that we know to compare
230
to value for non-numerics */
234
hiv[i] = Econv( axis[j], hival );
235
Euprint( lowval, axis[j], lowv[i], "" );
236
if( tolower(showrange[0]) == 'l' ) strcpy( list[j][i], lowval );
237
else if( tolower(showrange[0]) == 'a' ) Euprint( list[j][i], axis[j], (lowv[i]+hiv[i])/2.0, "" );
239
else sprintf( list[j][i], "%s%s%s", lowval, rangesepchar, hival );
247
if( rangespec[j][0] != '\0' ) {
248
double binsiz, hilimit, rw;
249
doranges[j] = 1; /* implied */
250
nt = sscanf( rangespec[j], "%s %lf %s", lowval, &binsiz, hival );
251
if( nt < 2 || nt > 3 ) return( Eerr( 2740, "2 or 3 values expected in rangespec",
253
rw = Econv( axis[j], lowval );
254
if( Econv_error() ) return( Eerr( 2750, "warning: error on rangespec lowlimit", rangespec[j] ) );
256
hilimit = Econv(axis[j], hival );
257
if( Econv_error() ) return( Eerr( 2750, "warning: error on rangespec hilimit", rangespec[j] ) );
260
if( !scalebeenset() ) return( Eerr( 2479, "rangespec must have 3 values since scaleing has not yet been set", "" ));
261
hilimit = Elimit( axis[j], 'h', 's' );
263
for( i = 0; i < MAXBINS ; i++ ) {
267
Euprint( lowval, axis[j], lowv[i], "" );
268
Euprint( hival, axis[j], hiv[i], "" );
269
if( tolower(showrange[0]) == 'l' ) strcpy( list[j][i], lowval );
270
else if( tolower(showrange[0]) == 'a' ) Euprint( list[j][i], axis[j], (lowv[i]+hiv[i])/2.0, "" );
271
else sprintf( list[j][i], "%s%s%s", lowval, rangesepchar, hival );
273
if( rw > hilimit ) break;
278
if( doranges[j] && !valuesgiven[j] )
279
return( Eerr( 2052, "A values list must be given when doing ranges.", "" ));
282
/* make a description string */
283
if( ndim == 1 ) sprintf( buf, "// 1-way distribution on field %d %s", field[0]+1, selectex );
284
if( ndim == 2 ) sprintf( buf, "// 2-way distribution on field %d (down) by field %d (across) %s",
285
field[0]+1, field[1]+1, selectex );
286
/* set a DMS variable to the description? */
287
if( savetable[0] != '\0' ) fprintf( sfp, "%s\n", buf );
290
/* process from data already read in earlier.. */
293
if( Nrecords[Dsel] < 1 ) return( Eerr( 32, "No data has been read yet.", "" ) );
295
/* process input data.. */
297
for( irow = 0; irow < Nrecords[Dsel]; irow++ ) {
299
if( selectex[0] != '\0' ) { /* process against selection condition if any.. */
300
stat = do_select( selectex, irow, &result );
301
if( stat != 0 ) { Eerr( stat, "Select error", selectex ); continue; }
302
if( result == 0 ) continue; /* reject */
305
/* for each item requested (1 or 2) */
306
for( i = 0; i < ndim; i++ ) {
308
/* strcpy( val, data[ field[i] ] ); */
309
strcpy( val, da( irow, field[i] ));
310
val[WORDLEN-1] = '\0';
314
/**** if ranges have not been pre defined... */
315
if( !doranges[i] ) { /* classify by value */
317
/**** a. if axis has been set, use special units, Econv, etc. */
318
if( axisset[i] ) { /* code copied from below; uses fval instead of val */
319
fval = Econv( axis[i], val );
321
/* see if already encountered (compare fval) */
322
for( j = 0; j < nlist[i]; j++ )
323
if( GL_close_to( fval, atof(list[i][j]), 0.001 ))
324
{ select[i] = j; break; }
326
if( j == nlist[i] && valuesgiven[i] ) goto NEXT;
328
/* otherwise, add the internal representation to list */
329
if( j == nlist[i] ) {
330
if( (i == 0 && nlist[i] >= MAXROWS) || (i == 1 && nlist[i] >= MAXCOLS) )
331
fprintf( Errfp, "tabulate warning, sorry, table capacity exceeded, skipping %s\n", val );
334
sprintf( list[i][j], "%f", fval );
335
else strcpy( list[i][j], val );
343
/**** b. axis has not been set, dont use Econv */
346
/* see if we've already encountered current val */
347
for( j = 0; j < nlist[i]; j++ ) if( strcmp( val, list[i][j] )== 0 )
348
{ select[i] = j; break; }
350
if( j == nlist[i] && valuesgiven[i] ) goto NEXT; /* not in value list-
354
if( j == nlist[i] ) {
355
if( (i == 0 && nlist[i] >= MAXROWS) || (i == 1 && nlist[i] >= MAXCOLS) )
356
fprintf( Errfp, "tabulate warning, sorry, table capacity exceeded, skipping %s\n", val );
358
strcpy( list[i][j], val );
367
/**** if ranges have been defined.. */
368
else if( doranges[i] ) {
369
fval = Econv( axis[i], val );
371
/* go through list backwards so that boundary values are
372
put into higher category.. */
373
for( j = nlist[i] - 1; j >= 0; j-- ) {
374
if( lowv[j] >= PLHUGE && hiv[j] <= NEGHUGE ) {
375
if( strcmp( list[i][j], val )==0 ) {
380
if( Econv_error() ) continue;
381
if( fval >= lowv[j] && fval <= hiv[j] ) {
386
/* if( j == nlist[i] ) goto NEXT; */ /* not in value list- discard */
387
if( j < 0 ) goto NEXT; /* not in value list- discard */
391
if( accumfield >= 0 ) inc = atof( da( irow, accumfield ) );
395
tab[ 0 ][ select[ 0 ] ] += inc;
398
else if( ndim == 2 ) {
399
( tab[ select[0] ][ select[1] ] ) += inc;
400
total[0][select[0]] += inc;
401
total[1][select[1]] += inc;
411
/* put rows/cols in some kind of order */
413
if( ordering[0] == 'n' ) fsort( list[0], nlist[0], order[0] );
414
else if( ordering[0] == 'm' ) freqsort(tab[0], nlist[0], order[0], 0 );
415
else if( ordering[0] == 'r' ) freqsort(tab[0], nlist[0], order[0], 1 );
417
else if( ndim == 2 ) {
418
if( ordering[0] == 'n' ) fsort( list[0], nlist[0], order[0] );
419
else if( ordering[0] == 'm' ) freqsort(tab[0], nlist[0], order[0], 0 );
420
else if( ordering[0] == 'r' ) freqsort(tab[0], nlist[0], order[0], 1 );
422
if( ordering[1] == 'n' ) fsort( list[1], nlist[1], order[1] );
423
else if( ordering[1] == 'm' ) freqsort(total[1], nlist[1], order[1], 0 );
424
else if( ordering[1] == 'r' ) freqsort(total[1], nlist[1], order[1], 1 );
428
/* --------------------- */
429
/* generate the results */
430
/* --------------------- */
436
StartD[Dsel] = ND[0];
440
/* ------------------ */
441
/* for 1-way tables.. */
442
/* ------------------ */
445
/* set PL data array dimensions for slot 2 */
447
Nrecords[Dsel] = nlist[0];
449
sprintf( buf, "//%-20s N", "Value" );
451
sprintf( buf, "%s Pct", buf );
454
if( savetable[0] != '\0' ) fprintf( sfp, "%s\n\n", buf );
457
/* do output lines.. */
458
for( i = 0; i < nlist[0]; i++ ) {
459
if( !doranges[0] && axisset[0] )
460
Euprint( tag, axis[0], atof(list[0][order[0][i]]), "" );
461
else strcpy( tag, list[0][order[0][i]] );
463
sprintf( buf, "%-20s %5.0f", tag, tab[0][order[0][i]] );
464
if( dopercents ) sprintf( buf, "%s %3.0f", buf,
465
(tab[0][order[0][i]])/(total[0][0]+0.0001)*100 );
466
if( savetable[0] != '\0' ) fprintf( sfp, " %s\n", buf );
468
/* put the results into the PL data array.. */
471
catitem( tag, &curd, &davail );
474
sprintf( buf, "%g", tab[0][order[0][i]] );
475
catitem( buf, &curd, &davail );
478
sprintf( buf, "%g", (double)(tab[0][order[0][i]])/(total[0][0]+0.0001)*100 );
479
catitem( buf, &curd, &davail );
482
ND[Dsel] = curd - StartD[Dsel];
484
/* lastly, do total line */
485
sprintf( buf, "%-20s %5.0f", "//Total:", total[0][0] );
486
if( dopercents ) sprintf( buf, "%s 100", buf );
488
/* Use DMS variable here? */
489
if( savetable[0] != '\0' ) fprintf( sfp, " %s\n", buf );
493
/* ------------------ */
494
/* for 2-way tables.. */
495
/* ------------------ */
496
else if( ndim == 2 ) {
498
/* set PL data array dimensions for slot 2 */
499
if( dopercents ) Nfields[Dsel] = ((nlist[1]+1)*2) + 1;
500
else Nfields[Dsel] = (nlist[1]+1) + 1; /* label, cols, rowtotal */
501
Nrecords[Dsel] = nlist[0];
505
sprintf( buf, "//%-20s", "" );
506
strcpy( fieldnamelist, "rowname" );
507
for( i = 0; i < nlist[1]; i++ ) {
508
if( !doranges[1] && axisset[1] )
509
Euprint( tag, axis[1], atof(list[1][order[1][i]]), "" );
510
else strcpy( tag, list[1][order[1][i]] );
512
sprintf( buf, "%s%6s ", buf, tag );
513
if( dopercents ) sprintf( buf, "%s%s", buf, " Pct " );
515
/* set up the column tags as data field names.. */
516
strcat( fieldnamelist, "," );
517
strcat( fieldnamelist, tag );
520
if( fieldnamelist[0] != '\0' ) {
521
definefieldnames( fieldnamelist );
522
fprintf( Diagfp, "proc tabulate: field names are now: %s\n", fieldnamelist );
525
sprintf( buf, "%s%s ", buf, " Total" );
526
if( dopercents ) sprintf( buf, "%s%s", buf, " Pct " );
527
if( savetable[0] != '\0' ) fprintf( sfp, "%s\n\n", buf );
529
/* do output lines.. */
530
for( j = 0; j < nlist[0]; j++ ) {
531
if( !doranges[0] && axisset[0] )
532
Euprint( tag, axis[0], atof(list[0][order[0][j]]), "" );
533
else strcpy( tag, list[0][order[0][j]] );
535
sprintf( buf, "%-20s", tag );
537
/* put label into PL data */
538
catitem( tag, &curd, &davail );
540
for( i = 0; i < nlist[1]; i++ ) {
541
sprintf( buf, "%s %5.0f ", buf, tab[order[0][j]][order[1][i]] );
544
sprintf( numbuf, "%g", tab[order[0][j]] [order[1][i]] );
545
catitem( numbuf, &curd, &davail );
548
sprintf( buf, "%s %3.0f ", buf,
549
(double)(tab[order[0][j]][order[1][i]])/
550
(total[1][order[1][i]]+0.00001) * 100 );
552
sprintf( numbuf, "%g",
553
(double)(tab[order[0][j]][order[1][i]])/
554
(total[1][order[1][i]]+0.00001) * 100 );
555
catitem( numbuf, &curd, &davail );
559
sprintf( buf, "%s%5.0f ", buf, total[0][order[0][j]] );
561
sprintf( numbuf, "%g", total[0][order[0][j]] );
562
catitem( numbuf, &curd, &davail );
565
sprintf( buf, "%s %3.0f", buf,
566
(double)(total[0][order[0][j]]) / (grantotal+0.00001) * 100 );
567
sprintf( numbuf, "%g",
568
(double)(total[0][order[0][j]]) / (grantotal+0.00001) * 100 );
569
catitem( numbuf, &curd, &davail );
572
if( savetable[0] != '\0' ) fprintf( sfp, " %s\n", buf );
574
ND[Dsel] = curd - StartD[Dsel];
576
/* lastly, do total line.. */
577
sprintf( buf, "%-19s", "//Total:" );
578
for( i = 0; i < nlist[1]; i++ ) {
579
sprintf( buf, "%s %5.0f ", buf, total[1][order[1][i]] );
580
if( dopercents ) sprintf( buf, "%s 100 ", buf );
582
sprintf( buf, "%s %5.0f", buf, grantotal );
583
if( dopercents ) sprintf( buf, "%s 100", buf );
584
/* set DMS var here?? */
585
if( savetable[0] != '\0' ) fprintf( sfp, " %s\n", buf );
589
/* fprintf( Diagfp, "After Xtab: ND=%d Nrecords=%d Nfields=%d\n",
590
ND[Dsel], Nrecords[Dsel], Nfields[Dsel] ); */
592
setintvar( "NRECORDS", Nrecords[Dsel] );
593
setintvar( "NFIELDS", Nfields[Dsel] );
595
/* for( i = 0; i < Nrecords[Dsel]; i++ ) {
596
* for( j = 0; j < Nfields[Dsel]; j++ ) fprintf( Diagfp, "|%s| ", da(i,j) );
597
* fprintf( Diagfp, "\n" );
607
/* ================ */
608
/* sort bin names numerically if all numeric, alphabetically otherwise */
610
fsort( data, nd, order )
611
char data[][WORDLEN];
614
int i, j, used[MAXROWS], mincell, first, allnum, foo, idiff;
618
for( i = 0; i < nd; i++ ) {
620
if( atof( data[i] ) < -31999) strcpy( data[i], "-31000" );
621
if( !GL_goodnum( data[i], &foo )) { allnum = NO; }
624
for( i = 0; i < nd; i++ ) {
626
for( j = 0; j < nd; j++ ) {
627
if( used[j] ) continue;
628
if( first ) { mincell = j; first = NO; }
630
diff = atof( data[j] ) - atof( data[mincell] );
631
if( diff <= 0.0 ) mincell = j;
634
idiff = (strcmp( data[j], data[mincell] ));
635
if( idiff <= 0 ) mincell = j;
643
/* ================ */
645
freqsort( counts, nd, order, reverse )
647
int nd, order[], reverse;
649
int i, j, used[MAXROWS], firstcell, first, diff;
651
for( i = 0; i < nd; i++ )
654
for( i = 0; i < nd; i++ ) {
656
for( j = 0; j < nd; j++ ) {
657
if( used[j] ) continue;
658
if( first ) { firstcell = j; first = NO; }
659
diff = counts[j] - counts[firstcell];
662
case YES: if (diff >= 0) firstcell = j; break;
663
case NO: if (diff <= 0) firstcell = j; break;
666
order[i] = firstcell;