1
/**********************************************************************
2
* $Id: lwgeom_estimate.c 4168 2009-06-11 16:44:03Z pramsey $
4
* PostGIS - Spatial Types for PostgreSQL
5
* http://postgis.refractions.net
6
* Copyright 2001-2006 Refractions Research Inc.
8
* This is free software; you can redistribute and/or modify it under
9
* the terms of the GNU General Public Licence. See the COPYING file.
11
**********************************************************************/
14
#include "executor/spi.h"
16
#include "commands/vacuum.h"
17
#include "nodes/relation.h"
18
#include "parser/parsetree.h"
19
#include "utils/array.h"
20
#include "utils/lsyscache.h"
21
#include "utils/syscache.h"
23
#include "liblwgeom.h"
24
#include "lwgeom_pg.h"
37
* Assign a number to the postgis statistics kind
41
* 1-100: reserved for assignment by the core Postgres project
42
* 100-199: reserved for assignment by PostGIS
43
* 200-9999: reserved for other globally-known stats kinds
44
* 10000-32767: reserved for private site-local use
47
#define STATISTIC_KIND_GEOMETRY 100
50
* Define this if you want to use standard deviation based
51
* histogram extent computation. If you do, you can also
52
* tweak the deviation factor used in computation with
55
#define USE_STANDARD_DEVIATION 1
58
typedef struct GEOM_STATS_T
60
/* cols * rows = total boxes in grid */
64
/* average bounding box area of not-null features */
65
float4 avgFeatureArea;
68
* average number of histogram cells
69
* covered by the sample not-null features
71
float4 avgFeatureCells;
74
float4 xmin,ymin, xmax, ymax;
77
* variable length # of floats for histogram
83
static float8 estimate_selectivity(BOX2DFLOAT4 *box, GEOM_STATS *geomstats);
86
#define SHOW_DIGS_DOUBLE 15
87
#define MAX_DIGS_DOUBLE (SHOW_DIGS_DOUBLE + 6 + 1 + 3 +1)
90
* Default geometry selectivity factor
92
#define DEFAULT_GEOMETRY_SEL 0.000005
95
* Default geometry join selectivity factor
97
#define DEFAULT_GEOMETRY_JOINSEL 0.000005
100
* Define this to actually DO join selectivity
101
* (as contrary to just return the default JOINSEL value)
102
* Note that this is only possible when compiling postgis
103
* against pgsql >= 800
105
#define REALLY_DO_JOINSEL 1
107
Datum LWGEOM_gist_sel(PG_FUNCTION_ARGS);
108
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS);
109
Datum LWGEOM_estimated_extent(PG_FUNCTION_ARGS);
110
Datum LWGEOM_analyze(PG_FUNCTION_ARGS);
113
#if ! REALLY_DO_JOINSEL
115
* JOIN selectivity in the GiST && operator
116
* for all PG versions
118
PG_FUNCTION_INFO_V1(LWGEOM_gist_joinsel);
119
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS)
121
POSTGIS_DEBUGF(2, "LWGEOM_gist_joinsel called (returning %f)",
122
DEFAULT_GEOMETRY_JOINSEL);
124
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
127
#else /* REALLY_DO_JOINSEL */
129
int calculate_column_intersection(BOX2DFLOAT4 *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2);
132
calculate_column_intersection(BOX2DFLOAT4 *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2)
135
* Calculate the intersection of two columns from their geomstats extents - return true
136
* if a valid intersection was found, false if there is no overlap
139
float8 i_xmin = LW_MAX(geomstats1->xmin, geomstats2->xmin);
140
float8 i_ymin = LW_MAX(geomstats1->ymin, geomstats2->ymin);
141
float8 i_xmax = LW_MIN(geomstats1->xmax, geomstats2->xmax);
142
float8 i_ymax = LW_MIN(geomstats1->ymax, geomstats2->ymax);
144
/* If the rectangles don't intersect, return false */
145
if (i_xmin > i_xmax || i_ymin > i_ymax)
148
/* Otherwise return the rectangle in search_box */
149
search_box->xmin = i_xmin;
150
search_box->ymin = i_ymin;
151
search_box->xmax = i_xmax;
152
search_box->ymax = i_ymax;
158
* JOIN selectivity in the GiST && operator
159
* for all PG versions
161
PG_FUNCTION_INFO_V1(LWGEOM_gist_joinsel);
162
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS)
164
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
166
/* Oid operator = PG_GETARG_OID(1); */
167
List *args = (List *) PG_GETARG_POINTER(2);
168
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
174
HeapTuple stats1_tuple, stats2_tuple, class_tuple;
175
GEOM_STATS *geomstats1, *geomstats2;
177
* These are to avoid casting the corresponding
178
* "type-punned" pointers, which would break
179
* "strict-aliasing rules".
181
GEOM_STATS **gs1ptr=&geomstats1, **gs2ptr=&geomstats2;
182
int geomstats1_nvalues = 0, geomstats2_nvalues = 0;
183
float8 selectivity1 = 0.0, selectivity2 = 0.0;
184
float4 num1_tuples = 0.0, num2_tuples = 0.0;
185
float4 total_tuples = 0.0, rows_returned = 0.0;
186
BOX2DFLOAT4 search_box;
190
* Join selectivity algorithm. To calculation the selectivity we
191
* calculate the intersection of the two column sample extents,
192
* sum the results, and then multiply by two since for each
193
* geometry in col 1 that intersects a geometry in col 2, the same
198
POSTGIS_DEBUGF(3, "LWGEOM_gist_joinsel called with jointype %d", jointype);
201
* We'll only respond to an inner join/unknown context join
203
if (jointype != JOIN_INNER)
205
elog(NOTICE, "LWGEOM_gist_joinsel called with incorrect join type");
206
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
210
* Determine the oids of the geometry columns we are working with
212
arg1 = (Node *) linitial(args);
213
arg2 = (Node *) lsecond(args);
215
if (!IsA(arg1, Var) || !IsA(arg2, Var))
217
elog(DEBUG1, "LWGEOM_gist_joinsel called with arguments that are not column references");
218
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
224
relid1 = getrelid(var1->varno, root->parse->rtable);
225
relid2 = getrelid(var2->varno, root->parse->rtable);
227
POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2);
229
/* Read the stats tuple from the first column */
230
stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0);
231
if ( ! stats1_tuple )
233
POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
235
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
240
if ( ! get_attstatsslot(stats1_tuple, 0, 0,
241
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
242
(float4 **)gs1ptr, &geomstats1_nvalues) )
244
POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
246
ReleaseSysCache(stats1_tuple);
247
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
251
/* Read the stats tuple from the second column */
252
stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0);
253
if ( ! stats2_tuple )
255
POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");
257
free_attstatsslot(0, NULL, 0, (float *)geomstats1,
259
ReleaseSysCache(stats1_tuple);
260
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
264
if ( ! get_attstatsslot(stats2_tuple, 0, 0,
265
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
266
(float4 **)gs2ptr, &geomstats2_nvalues) )
268
POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
270
free_attstatsslot(0, NULL, 0, (float *)geomstats1,
272
ReleaseSysCache(stats2_tuple);
273
ReleaseSysCache(stats1_tuple);
274
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
279
* Setup the search box - this is the intersection of the two column
282
calculate_column_intersection(&search_box, geomstats1, geomstats2);
284
POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g, %.15g %.15g",geomstats1->xmin,geomstats1->ymin,geomstats1->xmax,geomstats1->ymax);
285
POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g, %.15g %.15g",geomstats2->xmin,geomstats2->ymin,geomstats2->xmax,geomstats2->ymax);
286
POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
289
/* Do the selectivity */
290
selectivity1 = estimate_selectivity(&search_box, geomstats1);
291
selectivity2 = estimate_selectivity(&search_box, geomstats2);
293
POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2);
295
/* Free the statistic tuples */
296
free_attstatsslot(0, NULL, 0, (float *)geomstats1, geomstats1_nvalues);
297
ReleaseSysCache(stats1_tuple);
299
free_attstatsslot(0, NULL, 0, (float *)geomstats2, geomstats2_nvalues);
300
ReleaseSysCache(stats2_tuple);
303
* OK, so before we calculate the join selectivity we also need to
304
* know the number of tuples in each of the columns since
305
* estimate_selectivity returns the number of estimated tuples
306
* divided by the total number of tuples - hence we need to
307
* multiply out the returned selectivity by the total number of rows.
309
class_tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relid1),
312
if (HeapTupleIsValid(class_tuple))
314
Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
315
num1_tuples = reltup->reltuples;
318
ReleaseSysCache(class_tuple);
321
class_tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relid2),
324
if (HeapTupleIsValid(class_tuple))
326
Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
327
num2_tuples = reltup->reltuples;
330
ReleaseSysCache(class_tuple);
334
* Finally calculate the estimate of the number of rows returned
336
* = 2 * (nrows from col1 + nrows from col2) /
337
* total nrows in col1 x total nrows in col2
339
* The factor of 2 accounts for the fact that for each tuple in
340
* col 1 matching col 2,
341
* there will be another match in col 2 matching col 1
344
total_tuples = num1_tuples * num2_tuples;
345
rows_returned = 2 * ((num1_tuples * selectivity1) +
346
(num2_tuples * selectivity2));
348
POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1);
349
POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2);
350
POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned);
353
* One (or both) tuple count is zero...
354
* We return default selectivity estimate.
355
* We could probably attempt at an estimate
356
* w/out looking at tables tuple count, with
357
* a function of selectivity1, selectivity2.
359
if ( ! total_tuples )
361
POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity");
363
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
366
if ( rows_returned > total_tuples )
367
PG_RETURN_FLOAT8(1.0);
369
PG_RETURN_FLOAT8(rows_returned / total_tuples);
372
#endif /* REALLY_DO_JOINSEL */
374
/**************************** FROM POSTGIS ****************/
378
* This function returns an estimate of the selectivity
379
* of a search_box looking at data in the GEOM_STATS
382
* TODO: handle box dimension collapses (probably should be handled
383
* by the statistic generator, avoiding GEOM_STATS with collapsed
387
estimate_selectivity(BOX2DFLOAT4 *box, GEOM_STATS *geomstats)
390
int x_idx_min, x_idx_max, y_idx_min, y_idx_max;
391
double intersect_x, intersect_y, AOI;
392
double cell_area, box_area;
393
double geow, geoh; /* width and height of histogram */
394
int histocols, historows; /* histogram grid size */
396
float overlapping_cells;
397
float avg_feat_cells;
403
* Search box completely miss histogram extent
405
if ( box->xmax < geomstats->xmin ||
406
box->xmin > geomstats->xmax ||
407
box->ymax < geomstats->ymin ||
408
box->ymin > geomstats->ymax )
410
POSTGIS_DEBUG(3, " search_box does not overlaps histogram, returning 0");
416
* Search box completely contains histogram extent
418
if ( box->xmax >= geomstats->xmax &&
419
box->xmin <= geomstats->xmin &&
420
box->ymax >= geomstats->ymax &&
421
box->ymin <= geomstats->ymin )
423
POSTGIS_DEBUG(3, " search_box contains histogram, returning 1");
428
geow = geomstats->xmax-geomstats->xmin;
429
geoh = geomstats->ymax-geomstats->ymin;
431
histocols = geomstats->cols;
432
historows = geomstats->rows;
434
POSTGIS_DEBUGF(3, " histogram has %d cols, %d rows", histocols, historows);
435
POSTGIS_DEBUGF(3, " histogram geosize is %fx%f", geow, geoh);
437
cell_area = (geow*geoh) / (histocols*historows);
438
box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin);
441
/* Find first overlapping column */
442
x_idx_min = (box->xmin-geomstats->xmin) / geow * histocols;
445
POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the left of histogram grid", -x_idx_min);
447
/* should increment the value somehow */
450
if (x_idx_min >= histocols)
452
POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the right of histogram grid", x_idx_min-histocols+1);
454
/* should increment the value somehow */
455
x_idx_min = histocols-1;
458
/* Find first overlapping row */
459
y_idx_min = (box->ymin-geomstats->ymin) / geoh * historows;
462
POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the bottom of histogram grid", -y_idx_min);
464
/* should increment the value somehow */
467
if (y_idx_min >= historows)
469
POSTGIS_DEBUGF(3, " search_box overlaps %d columns on the top of histogram grid", y_idx_min-historows+1);
471
/* should increment the value somehow */
472
y_idx_min = historows-1;
475
/* Find last overlapping column */
476
x_idx_max = (box->xmax-geomstats->xmin) / geow * histocols;
479
/* should increment the value somehow */
482
if (x_idx_max >= histocols )
484
/* should increment the value somehow */
485
x_idx_max = histocols-1;
488
/* Find last overlapping row */
489
y_idx_max = (box->ymax-geomstats->ymin) / geoh * historows;
492
/* should increment the value somehow */
495
if (y_idx_max >= historows)
497
/* should increment the value somehow */
498
y_idx_max = historows-1;
502
* the {x,y}_idx_{min,max}
503
* define the grid squares that the box intersects
505
for (y=y_idx_min; y<=y_idx_max; y++)
507
for (x=x_idx_min; x<=x_idx_max; x++)
512
val = geomstats->value[x+y*histocols];
515
* Of the cell value we get
516
* only the overlap fraction.
519
intersect_x = LW_MIN(box->xmax, geomstats->xmin + (x+1) * geow / histocols) - LW_MAX(box->xmin, geomstats->xmin + x * geow / histocols );
520
intersect_y = LW_MIN(box->ymax, geomstats->ymin + (y+1) * geoh / historows) - LW_MAX(box->ymin, geomstats->ymin+ y * geoh / historows) ;
522
AOI = intersect_x*intersect_y;
523
gain = AOI/cell_area;
525
POSTGIS_DEBUGF(4, " [%d,%d] cell val %.15f",
527
POSTGIS_DEBUGF(4, " [%d,%d] AOI %.15f",
529
POSTGIS_DEBUGF(4, " [%d,%d] gain %.15f",
534
POSTGIS_DEBUGF(4, " [%d,%d] adding %.15f to value",
543
* If the search_box is a point, it will
544
* overlap a single cell and thus get
545
* it's value, which is the fraction of
546
* samples (we can presume of row set also)
547
* which bumped to that cell.
549
* If the table features are points, each
550
* of them will overlap a single histogram cell.
551
* Our search_box value would then be correctly
552
* computed as the sum of the bumped cells values.
554
* If both our search_box AND the sample features
555
* overlap more then a single histogram cell we
556
* need to consider the fact that our sum computation
557
* will have many duplicated included. E.g. each
558
* single sample feature would have contributed to
559
* raise the search_box value by as many times as
560
* many cells in the histogram are commonly overlapped
561
* by both searc_box and feature. We should then
562
* divide our value by the number of cells in the virtual
563
* 'intersection' between average feature cell occupation
564
* and occupation of the search_box. This is as
565
* fuzzy as you understand it :)
567
* Consistency check: whenever the number of cells is
568
* one of whichever part (search_box_occupation,
569
* avg_feature_occupation) the 'intersection' must be 1.
570
* If sounds that our 'intersaction' is actually the
571
* minimun number between search_box_occupation and
572
* avg_feat_occupation.
575
overlapping_cells = (x_idx_max-x_idx_min+1) *
576
(y_idx_max-y_idx_min+1);
577
avg_feat_cells = geomstats->avgFeatureCells;
579
POSTGIS_DEBUGF(3, " search_box overlaps %f cells", overlapping_cells);
580
POSTGIS_DEBUGF(3, " avg feat overlaps %f cells", avg_feat_cells);
582
if ( ! overlapping_cells )
584
POSTGIS_DEBUG(3, " no overlapping cells, returning 0.0");
589
gain = 1/LW_MIN(overlapping_cells, avg_feat_cells);
590
selectivity = value*gain;
592
POSTGIS_DEBUGF(3, " SUM(ov_histo_cells)=%f", value);
593
POSTGIS_DEBUGF(3, " gain=%f", gain);
594
POSTGIS_DEBUGF(3, " selectivity=%f", selectivity);
596
/* prevent rounding overflows */
597
if (selectivity > 1.0) selectivity = 1.0;
598
else if (selectivity < 0) selectivity = 0.0;
604
* This function should return an estimation of the number of
605
* rows returned by a query involving an overlap check
606
* ( it's the restrict function for the && operator )
608
* It can make use (if available) of the statistics collected
609
* by the geometry analyzer function.
611
* Note that the good work is done by estimate_selectivity() above.
612
* This function just tries to find the search_box, loads the statistics
613
* and invoke the work-horse.
615
* This is the one used for PG version >= 7.5
618
PG_FUNCTION_INFO_V1(LWGEOM_gist_sel);
619
Datum LWGEOM_gist_sel(PG_FUNCTION_ARGS)
621
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
623
/* Oid operator = PG_GETARG_OID(1); */
624
List *args = (List *) PG_GETARG_POINTER(2);
625
/* int varRelid = PG_GETARG_INT32(3); */
627
HeapTuple stats_tuple;
628
GEOM_STATS *geomstats;
630
* This is to avoid casting the corresponding
631
* "type-punned" pointer, which would break
632
* "strict-aliasing rules".
634
GEOM_STATS **gsptr=&geomstats;
635
int geomstats_nvalues=0;
639
BOX2DFLOAT4 search_box;
640
float8 selectivity=0;
642
POSTGIS_DEBUG(2, "LWGEOM_gist_sel called");
644
/* Fail if not a binary opclause (probably shouldn't happen) */
645
if (list_length(args) != 2)
647
POSTGIS_DEBUG(3, "LWGEOM_gist_sel: not a binary opclause");
649
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
654
* Find the constant part
656
other = (Node *) linitial(args);
657
if ( ! IsA(other, Const) )
660
other = (Node *) lsecond(args);
664
self = (Var *) lsecond(args);
667
if ( ! IsA(other, Const) )
669
POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");
671
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
675
* We are working on two constants..
676
* TODO: check if expression is true,
677
* returned set would be either
680
if ( ! IsA(self, Var) )
682
POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity");
684
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
688
* Convert the constant to a BOX
691
in = (uchar *)PG_DETOAST_DATUM( ((Const*)other)->constvalue );
692
if ( ! getbox2d_p(in+4, &search_box) )
694
POSTGIS_DEBUG(3, "search box is EMPTY");
696
PG_RETURN_FLOAT8(0.0);
699
POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
702
* Get pg_statistic row
705
relid = getrelid(self->varno, root->parse->rtable);
707
stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
710
POSTGIS_DEBUG(3, " No statistics, returning default estimate");
712
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
716
if ( ! get_attstatsslot(stats_tuple, 0, 0,
717
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
718
(float4 **)gsptr, &geomstats_nvalues) )
720
POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity");
722
ReleaseSysCache(stats_tuple);
723
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
726
POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues);
728
POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f",
729
geomstats->xmin, geomstats->ymin);
730
POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f",
731
geomstats->xmax, geomstats->ymax);
732
POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows);
733
POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols);
734
POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
735
POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
740
selectivity = estimate_selectivity(&search_box, geomstats);
743
POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);
745
free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues);
746
ReleaseSysCache(stats_tuple);
747
PG_RETURN_FLOAT8(selectivity);
753
* This function is called by the analyze function iff
754
* the geometry_analyze() function give it its pointer
755
* (this is always the case so far).
756
* The geometry_analyze() function is also responsible
757
* of deciding the number of "sample" rows we will receive
758
* here. It is able to give use other 'custom' data, but we
759
* won't use them so far.
761
* Our job is to build some statistics on the sample data
762
* for use by operator estimators.
764
* Currently we only need statistics to estimate the number of rows
765
* overlapping a given extent (estimation function bound
766
* to the && operator).
770
compute_geometry_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
771
int samplerows, double totalrows)
773
MemoryContext old_context;
776
BOX2DFLOAT4 **sampleboxes;
777
GEOM_STATS *geomstats;
779
int null_cnt=0, notnull_cnt=0, examinedsamples=0;
780
BOX2DFLOAT4 *sample_extent=NULL;
781
double total_width=0;
782
double total_boxes_area=0;
783
int total_boxes_cells=0;
787
#if USE_STANDARD_DEVIATION
788
/* for standard deviation */
789
double avgLOWx, avgLOWy, avgHIGx, avgHIGy;
790
double sumLOWx=0, sumLOWy=0, sumHIGx=0, sumHIGy=0;
791
double sdLOWx=0, sdLOWy=0, sdHIGx=0, sdHIGy=0;
792
BOX2DFLOAT4 *newhistobox=NULL;
794
double geow, geoh; /* width and height of histogram */
796
int cols, rows; /* histogram grid size */
797
BOX2DFLOAT4 histobox;
800
* This is where geometry_analyze
801
* should put its' custom parameters.
803
/* void *mystats = stats->extra_data; */
806
* We'll build an histogram having from 40 to 400 boxesPerSide
807
* Total number of cells is determined by attribute stat
808
* target. It can go from 1600 to 160000 (stat target: 10,1000)
810
histocells = 160*stats->attr->attstattarget;
813
POSTGIS_DEBUG(2, "compute_geometry_stats called");
814
POSTGIS_DEBUGF(3, " samplerows: %d", samplerows);
815
POSTGIS_DEBUGF(3, " histogram cells: %d", histocells);
818
* We might need less space, but don't think
819
* its worth saving...
821
sampleboxes = palloc(sizeof(BOX2DFLOAT4 *)*samplerows);
825
* o find extent of the sample rows
826
* o count null-infinite/not-null values
827
* o compute total_width
828
* o compute total features's box area (for avgFeatureArea)
829
* o sum features box coordinates (for standard deviation)
831
for (i=0; i<samplerows; i++)
837
datum = fetchfunc(stats, i, &isnull);
848
geom = (PG_LWGEOM *)PG_DETOAST_DATUM(datum);
850
if ( ! getbox2d_p(SERIALIZED_FORM(geom), &box) )
852
/* Skip empty geometry */
853
POSTGIS_DEBUGF(3, " skipped empty geometry %d", i);
859
* Skip infinite geoms
861
if ( ! finite(box.xmin) ||
862
! finite(box.xmax) ||
863
! finite(box.ymin) ||
866
POSTGIS_DEBUGF(3, " skipped infinite geometry %d", i);
873
* TODO: reduce BOX2DFLOAT4 copies
875
sampleboxes[notnull_cnt] = palloc(sizeof(BOX2DFLOAT4));
876
memcpy(sampleboxes[notnull_cnt], &box, sizeof(BOX2DFLOAT4));
879
* Add to sample extent union
881
if ( ! sample_extent )
883
sample_extent = palloc(sizeof(BOX2DFLOAT4));
884
memcpy(sample_extent, &box, sizeof(BOX2DFLOAT4));
888
sample_extent->xmax = LWGEOM_Maxf(sample_extent->xmax,
890
sample_extent->ymax = LWGEOM_Maxf(sample_extent->ymax,
892
sample_extent->xmin = LWGEOM_Minf(sample_extent->xmin,
894
sample_extent->ymin = LWGEOM_Minf(sample_extent->ymin,
898
/* TODO: ask if we need geom or bvol size for stawidth */
899
total_width += geom->size;
900
total_boxes_area += (box.xmax-box.xmin)*(box.ymax-box.ymin);
902
#if USE_STANDARD_DEVIATION
904
* Add bvol coordinates to sum for standard deviation
915
/* give backend a chance of interrupting us */
916
vacuum_delay_point();
922
elog(NOTICE, " no notnull values, invalid stats");
923
stats->stats_valid = false;
927
#if USE_STANDARD_DEVIATION
929
POSTGIS_DEBUGF(3, " sample_extent: xmin,ymin: %f,%f",
930
sample_extent->xmin, sample_extent->ymin);
931
POSTGIS_DEBUGF(3, " sample_extent: xmax,ymax: %f,%f",
932
sample_extent->xmax, sample_extent->ymax);
936
* o compute standard deviation
938
avgLOWx = sumLOWx/notnull_cnt;
939
avgLOWy = sumLOWy/notnull_cnt;
940
avgHIGx = sumHIGx/notnull_cnt;
941
avgHIGy = sumHIGy/notnull_cnt;
942
for (i=0; i<notnull_cnt; i++)
945
box = (BOX2DFLOAT4 *)sampleboxes[i];
947
sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx);
948
sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy);
949
sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx);
950
sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy);
952
sdLOWx = sqrt(sdLOWx/notnull_cnt);
953
sdLOWy = sqrt(sdLOWy/notnull_cnt);
954
sdHIGx = sqrt(sdHIGx/notnull_cnt);
955
sdHIGy = sqrt(sdHIGy/notnull_cnt);
957
POSTGIS_DEBUG(3, " standard deviations:");
958
POSTGIS_DEBUGF(3, " LOWx - avg:%f sd:%f", avgLOWx, sdLOWx);
959
POSTGIS_DEBUGF(3, " LOWy - avg:%f sd:%f", avgLOWy, sdLOWy);
960
POSTGIS_DEBUGF(3, " HIGx - avg:%f sd:%f", avgHIGx, sdHIGx);
961
POSTGIS_DEBUGF(3, " HIGy - avg:%f sd:%f", avgHIGy, sdHIGy);
963
histobox.xmin = LW_MAX((avgLOWx - SDFACTOR * sdLOWx),
964
sample_extent->xmin);
965
histobox.ymin = LW_MAX((avgLOWy - SDFACTOR * sdLOWy),
966
sample_extent->ymin);
967
histobox.xmax = LW_MIN((avgHIGx + SDFACTOR * sdHIGx),
968
sample_extent->xmax);
969
histobox.ymax = LW_MIN((avgHIGy + SDFACTOR * sdHIGy),
970
sample_extent->ymax);
972
POSTGIS_DEBUGF(3, " sd_extent: xmin,ymin: %f,%f",
973
histobox.xmin, histobox.ymin);
974
POSTGIS_DEBUGF(3, " sd_extent: xmax,ymax: %f,%f",
975
histobox.xmin, histobox.ymax);
979
* o skip hard deviants
980
* o compute new histogram box
982
for (i=0; i<notnull_cnt; i++)
985
box = (BOX2DFLOAT4 *)sampleboxes[i];
987
if ( box->xmin > histobox.xmax ||
988
box->xmax < histobox.xmin ||
989
box->ymin > histobox.ymax ||
990
box->ymax < histobox.ymin )
992
POSTGIS_DEBUGF(4, " feat %d is an hard deviant, skipped", i);
994
sampleboxes[i] = NULL;
999
newhistobox = palloc(sizeof(BOX2DFLOAT4));
1000
memcpy(newhistobox, box, sizeof(BOX2DFLOAT4));
1004
if ( box->xmin < newhistobox->xmin )
1005
newhistobox->xmin = box->xmin;
1006
if ( box->ymin < newhistobox->ymin )
1007
newhistobox->ymin = box->ymin;
1008
if ( box->xmax > newhistobox->xmax )
1009
newhistobox->xmax = box->xmax;
1010
if ( box->ymax > newhistobox->ymax )
1011
newhistobox->ymax = box->ymax;
1016
* Set histogram extent as the intersection between
1017
* standard deviation based histogram extent
1018
* and computed sample extent after removal of
1019
* hard deviants (there might be no hard deviants).
1021
if ( histobox.xmin < newhistobox->xmin )
1022
histobox.xmin = newhistobox->xmin;
1023
if ( histobox.ymin < newhistobox->ymin )
1024
histobox.ymin = newhistobox->ymin;
1025
if ( histobox.xmax > newhistobox->xmax )
1026
histobox.xmax = newhistobox->xmax;
1027
if ( histobox.ymax > newhistobox->ymax )
1028
histobox.ymax = newhistobox->ymax;
1031
#else /* ! USE_STANDARD_DEVIATION */
1034
* Set histogram extent box
1036
histobox.xmin = sample_extent->xmin;
1037
histobox.ymin = sample_extent->ymin;
1038
histobox.xmax = sample_extent->xmax;
1039
histobox.ymax = sample_extent->ymax;
1040
#endif /* USE_STANDARD_DEVIATION */
1043
POSTGIS_DEBUGF(3, " histogram_extent: xmin,ymin: %f,%f",
1044
histobox.xmin, histobox.ymin);
1045
POSTGIS_DEBUGF(3, " histogram_extent: xmax,ymax: %f,%f",
1046
histobox.xmax, histobox.ymax);
1049
geow = histobox.xmax - histobox.xmin;
1050
geoh = histobox.ymax - histobox.ymin;
1053
* Compute histogram cols and rows based on aspect ratio
1054
* of histogram extent
1056
if ( ! geow && ! geoh )
1076
cols = ceil(sqrt((double)histocells*(geow/geoh)));
1077
rows = ceil((double)histocells/cols);
1081
rows = ceil(sqrt((double)histocells*(geoh/geow)));
1082
cols = ceil((double)histocells/rows);
1084
histocells = cols*rows;
1087
POSTGIS_DEBUGF(3, " computed histogram grid size (CxR): %dx%d (%d cells)", cols, rows, histocells);
1091
* Create the histogram (GEOM_STATS)
1093
old_context = MemoryContextSwitchTo(stats->anl_context);
1094
geom_stats_size=sizeof(GEOM_STATS)+(histocells-1)*sizeof(float4);
1095
geomstats = palloc(geom_stats_size);
1096
MemoryContextSwitchTo(old_context);
1098
geomstats->avgFeatureArea = total_boxes_area/notnull_cnt;
1099
geomstats->xmin = histobox.xmin;
1100
geomstats->ymin = histobox.ymin;
1101
geomstats->xmax = histobox.xmax;
1102
geomstats->ymax = histobox.ymax;
1103
geomstats->cols = cols;
1104
geomstats->rows = rows;
1106
/* Initialize all values to 0 */
1107
for (i=0;i<histocells; i++) geomstats->value[i] = 0;
1109
cell_width = geow/cols;
1110
cell_height = geoh/rows;
1111
cell_area = cell_width*cell_height;
1113
POSTGIS_DEBUGF(4, "cell_width: %f", cell_width);
1114
POSTGIS_DEBUGF(4, "cell_height: %f", cell_height);
1119
* o fill histogram values with the number of
1120
* features' bbox overlaps: a feature's bvol
1121
* can fully overlap (1) or partially overlap
1122
* (fraction of 1) an histogram cell.
1124
* o compute total cells occupation
1127
for (i=0; i<notnull_cnt; i++)
1130
int x_idx_min, x_idx_max, x;
1131
int y_idx_min, y_idx_max, y;
1134
box = (BOX2DFLOAT4 *)sampleboxes[i];
1135
if ( ! box ) continue; /* hard deviant.. */
1137
/* give backend a chance of interrupting us */
1138
vacuum_delay_point();
1140
POSTGIS_DEBUGF(4, " feat %d box is %f %f, %f %f",
1141
i, box->xmax, box->ymax,
1142
box->xmin, box->ymin);
1144
/* Find first overlapping column */
1145
x_idx_min = (box->xmin-geomstats->xmin) / geow * cols;
1146
if (x_idx_min <0) x_idx_min = 0;
1147
if (x_idx_min >= cols) x_idx_min = cols-1;
1149
/* Find first overlapping row */
1150
y_idx_min = (box->ymin-geomstats->ymin) / geoh * rows;
1151
if (y_idx_min <0) y_idx_min = 0;
1152
if (y_idx_min >= rows) y_idx_min = rows-1;
1154
/* Find last overlapping column */
1155
x_idx_max = (box->xmax-geomstats->xmin) / geow * cols;
1156
if (x_idx_max <0) x_idx_max = 0;
1157
if (x_idx_max >= cols ) x_idx_max = cols-1;
1159
/* Find last overlapping row */
1160
y_idx_max = (box->ymax-geomstats->ymin) / geoh * rows;
1161
if (y_idx_max <0) y_idx_max = 0;
1162
if (y_idx_max >= rows) y_idx_max = rows-1;
1164
POSTGIS_DEBUGF(4, " feat %d overlaps columns %d-%d, rows %d-%d",
1165
i, x_idx_min, x_idx_max, y_idx_min, y_idx_max);
1168
* the {x,y}_idx_{min,max}
1169
* define the grid squares that the box intersects
1171
for (y=y_idx_min; y<=y_idx_max; y++)
1173
for (x=x_idx_min; x<=x_idx_max; x++)
1175
geomstats->value[x+y*cols] += 1;
1181
* before adding to the total cells
1182
* we could decide if we really
1183
* want this feature to count
1185
total_boxes_cells += numcells;
1190
POSTGIS_DEBUGF(3, " examined_samples: %d/%d", examinedsamples, samplerows);
1192
if ( ! examinedsamples )
1194
elog(NOTICE, " no examined values, invalid stats");
1195
stats->stats_valid = false;
1197
POSTGIS_DEBUG(3, " no stats have been gathered");
1202
/* what about null features (TODO) ? */
1203
geomstats->avgFeatureCells = (float4)total_boxes_cells/examinedsamples;
1205
POSTGIS_DEBUGF(3, " histo: total_boxes_cells: %d", total_boxes_cells);
1206
POSTGIS_DEBUGF(3, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
1207
POSTGIS_DEBUGF(3, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
1211
* Normalize histogram
1213
* We divide each histogram cell value
1214
* by the number of samples examined.
1217
for (i=0; i<histocells; i++)
1218
geomstats->value[i] /= examinedsamples;
1222
for (x=0; x<cols; x++)
1224
for (y=0; y<rows; y++)
1226
POSTGIS_DEBUGF(4, " histo[%d,%d] = %.15f", x, y, geomstats->value[x+y*cols]);
1233
* Write the statistics data
1235
stats->stakind[0] = STATISTIC_KIND_GEOMETRY;
1236
stats->staop[0] = InvalidOid;
1237
stats->stanumbers[0] = (float4 *)geomstats;
1238
stats->numnumbers[0] = geom_stats_size/sizeof(float4);
1240
stats->stanullfrac = (float4)null_cnt/samplerows;
1241
stats->stawidth = total_width/notnull_cnt;
1242
stats->stadistinct = -1.0;
1244
POSTGIS_DEBUGF(3, " out: slot 0: kind %d (STATISTIC_KIND_GEOMETRY)",
1246
POSTGIS_DEBUGF(3, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
1247
POSTGIS_DEBUGF(3, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
1248
POSTGIS_DEBUGF(3, " out: null fraction: %d/%d=%g", null_cnt, samplerows, stats->stanullfrac);
1249
POSTGIS_DEBUGF(3, " out: average width: %d bytes", stats->stawidth);
1250
POSTGIS_DEBUG(3, " out: distinct values: all (no check done)");
1252
stats->stats_valid = true;
1256
* This function will be called when the ANALYZE command is run
1257
* on a column of the "geometry" type.
1259
* It will need to return a stats builder function reference
1260
* and a "minimum" sample rows to feed it.
1261
* If we want analisys to be completely skipped we can return
1262
* FALSE and leave output vals untouched.
1264
* What we know from this call is:
1266
* o The pg_attribute row referring to the specific column.
1267
* Could be used to get reltuples from pg_class (which
1268
* might quite inexact though...) and use them to set an
1269
* appropriate minimum number of sample rows to feed to
1270
* the stats builder. The stats builder will also receive
1271
* a more accurate "estimation" of the number or rows.
1273
* o The pg_type row for the specific column.
1274
* Could be used to set stat builder / sample rows
1275
* based on domain type (when postgis will be implemented
1278
* Being this experimental we'll stick to a static stat_builder/sample_rows
1282
PG_FUNCTION_INFO_V1(LWGEOM_analyze);
1283
Datum LWGEOM_analyze(PG_FUNCTION_ARGS)
1285
VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
1286
Form_pg_attribute attr = stats->attr;
1288
POSTGIS_DEBUG(2, "lwgeom_analyze called");
1290
/* If the attstattarget column is negative, use the default value */
1291
/* NB: it is okay to scribble on stats->attr since it's a copy */
1292
if (attr->attstattarget < 0)
1293
attr->attstattarget = default_statistics_target;
1295
POSTGIS_DEBUGF(3, " attribute stat target: %d", attr->attstattarget);
1298
* There might be a reason not to analyze this column
1299
* (can we detect the absence of an index?)
1302
elog(NOTICE, "compute_geometry_stats not implemented yet");
1303
PG_RETURN_BOOL(false);
1306
/* Setup the minimum rows and the algorithm function */
1307
stats->minrows = 300 * stats->attr->attstattarget;
1308
stats->compute_stats = compute_geometry_stats;
1310
POSTGIS_DEBUGF(3, " minrows: %d", stats->minrows);
1312
/* Indicate we are done successfully */
1313
PG_RETURN_BOOL(true);
1317
* Return the estimated extent of the table
1318
* looking at gathered statistics (or NULL if
1319
* no statistics have been gathered).
1321
PG_FUNCTION_INFO_V1(LWGEOM_estimated_extent);
1322
Datum LWGEOM_estimated_extent(PG_FUNCTION_ARGS)
1331
ArrayType *array = NULL;
1333
SPITupleTable *tuptable;
1340
if ( PG_NARGS() == 3 )
1342
txnsp = PG_GETARG_TEXT_P(0);
1343
txtbl = PG_GETARG_TEXT_P(1);
1344
txcol = PG_GETARG_TEXT_P(2);
1346
else if ( PG_NARGS() == 2 )
1348
txtbl = PG_GETARG_TEXT_P(0);
1349
txcol = PG_GETARG_TEXT_P(1);
1353
elog(ERROR, "estimated_extent() called with wrong number of arguments");
1357
POSTGIS_DEBUG(2, "LWGEOM_estimated_extent called");
1359
/* Connect to SPI manager */
1360
SPIcode = SPI_connect();
1361
if (SPIcode != SPI_OK_CONNECT)
1363
elog(ERROR, "LWGEOM_estimated_extent: couldnt open a connection to SPI");
1367
querysize = VARSIZE(txtbl)+VARSIZE(txcol)+516;
1371
nsp = palloc(VARSIZE(txnsp)+1);
1372
memcpy(nsp, VARDATA(txnsp), VARSIZE(txnsp)-VARHDRSZ);
1373
nsp[VARSIZE(txnsp)-VARHDRSZ]='\0';
1374
querysize += VARSIZE(txnsp);
1378
querysize += 32; /* current_schema() */
1381
tbl = palloc(VARSIZE(txtbl)+1);
1382
memcpy(tbl, VARDATA(txtbl), VARSIZE(txtbl)-VARHDRSZ);
1383
tbl[VARSIZE(txtbl)-VARHDRSZ]='\0';
1385
col = palloc(VARSIZE(txcol)+1);
1386
memcpy(col, VARDATA(txcol), VARSIZE(txcol)-VARHDRSZ);
1387
col[VARSIZE(txcol)-VARHDRSZ]='\0';
1389
#if POSTGIS_DEBUG_LEVEL > 0
1392
POSTGIS_DEBUGF(3, " schema:%s table:%s column:%s", nsp, tbl, col);
1396
POSTGIS_DEBUGF(3, " schema:current_schema() table:%s column:%s",
1401
query = palloc(querysize);
1404
/* Security check: because we access information in the pg_statistic table, we must run as the database
1405
superuser (by marking the function as SECURITY DEFINER) and check permissions ourselves */
1408
sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '%s.%s', 'select')", nsp, tbl);
1412
sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '%s', 'select')", tbl);
1415
POSTGIS_DEBUGF(4, "permission check sql query is: %s", query);
1417
SPIcode = SPI_exec(query, 1);
1418
if (SPIcode != SPI_OK_SELECT)
1421
elog(ERROR, "LWGEOM_estimated_extent: couldn't execute permission check sql via SPI");
1425
tuptable = SPI_tuptable;
1426
tupdesc = SPI_tuptable->tupdesc;
1427
tuple = tuptable->vals[0];
1429
if (!DatumGetBool(SPI_getbinval(tuple, tupdesc, 1, &isnull)))
1432
elog(ERROR, "LWGEOM_estimated_extent: permission denied for relation %s", tbl);
1437
/* Return the stats data */
1440
sprintf(query, "SELECT s.stanumbers1[5:8] FROM pg_statistic s, pg_class c, pg_attribute a, pg_namespace n WHERE c.relname = '%s' AND a.attrelid = c.oid AND a.attname = '%s' AND n.nspname = '%s' AND c.relnamespace = n.oid AND s.starelid=c.oid AND s.staattnum = a.attnum AND staattnum = attnum", tbl, col, nsp);
1444
sprintf(query, "SELECT s.stanumbers1[5:8] FROM pg_statistic s, pg_class c, pg_attribute a, pg_namespace n WHERE c.relname = '%s' AND a.attrelid = c.oid AND a.attname = '%s' AND n.nspname = current_schema() AND c.relnamespace = n.oid AND s.starelid=c.oid AND s.staattnum = a.attnum AND staattnum = attnum", tbl, col);
1447
POSTGIS_DEBUGF(4, " query: %s", query);
1449
SPIcode = SPI_exec(query, 1);
1450
if (SPIcode != SPI_OK_SELECT )
1453
elog(ERROR,"LWGEOM_estimated_extent: couldnt execute sql via SPI");
1456
if (SPI_processed != 1)
1460
POSTGIS_DEBUGF(3, " %d stat rows", SPI_processed);
1462
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate table within current schema");
1467
tuptable = SPI_tuptable;
1468
tupdesc = SPI_tuptable->tupdesc;
1469
tuple = tuptable->vals[0];
1470
array = DatumGetArrayTypeP(SPI_getbinval(tuple, tupdesc, 1, &isnull));
1475
POSTGIS_DEBUG(3, " stats are NULL");
1477
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate statistics for table");
1481
if ( ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)) != 4 )
1483
elog(ERROR, " corrupted histogram");
1487
POSTGIS_DEBUGF(3, " stats array has %d elems", ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)));
1490
* Construct box2dfloat4.
1491
* Must allocate this in upper executor context
1492
* to keep it alive after SPI_finish().
1494
box = SPI_palloc(sizeof(BOX2DFLOAT4));
1496
/* Construct the box */
1497
memcpy(box, ARR_DATA_PTR(array), sizeof(BOX2DFLOAT4));
1499
POSTGIS_DEBUGF(3, " histogram extent = %g %g, %g %g", box->xmin,
1500
box->ymin, box->xmax, box->ymax);
1502
SPIcode = SPI_finish();
1503
if (SPIcode != SPI_OK_FINISH )
1505
elog(ERROR, "LWGEOM_estimated_extent: couldnt disconnect from SPI");
1508
/* TODO: enlarge the box by some factor */
1510
PG_RETURN_POINTER(box);
1516
/**********************************************************************
1518
* Revision 1.39 2006/05/30 08:38:58 strk
1519
* Added some missing copyright headers.
1521
* Revision 1.38 2006/03/13 10:54:08 strk
1522
* Applied patch from Mark Cave Ayland embedding access control for
1523
* the estimated_extent functions.
1525
* Revision 1.37 2006/01/09 11:48:15 strk
1526
* Fixed "strict-aliasing rule" breaks.
1528
* Revision 1.36 2005/12/30 17:40:37 strk
1529
* Moved PG_LWGEOM WKB I/O and SRID get/set funx
1530
* from lwgeom_api.c to lwgeom_pg.c.
1531
* Made lwgeom_from_ewkb directly invoke grammar parser rather then invoke
1532
* the PG_LWGEOM-specific function.
1533
* Cleaned up signedness-related and comments-related warnings for the files
1534
* being committed (more to do on other files)
1536
* Revision 1.35 2005/10/10 16:19:16 strk
1537
* Fixed null values fraction computation in geometry analyzer as suggested by Michael Fuhr
1539
* Revision 1.34 2005/09/08 19:26:22 strk
1540
* Handled search_box outside of histogram_box case in selectivity estimator
1542
* Revision 1.33 2005/06/28 22:00:09 strk
1543
* Fixed extimators to work with postgresql 8.1.x
1545
* Revision 1.32 2005/04/22 01:07:09 strk
1546
* Fixed bug in join selectivity estimator returning invalid estimates (>1)
1548
* Revision 1.31 2005/04/18 14:12:43 strk
1549
* Slightly changed standard deviation computation to be more corner-case-friendly.
1551
* Revision 1.30 2005/04/18 10:57:13 strk
1552
* Applied patched by Ron Mayer fixing memory leakages and invalid results
1553
* in join selectivity estimator. Fixed some return to use default JOIN
1554
* selectivity estimate instead of default RESTRICT selectivity estimate.
1556
* Revision 1.29 2005/03/25 09:34:25 strk
1559
* Revision 1.28 2005/03/24 16:27:32 strk
1560
* Added comments in estimate_allocation() bugfix point.
1562
* Revision 1.27 2005/03/24 14:45:50 strk
1563
* Fixed bug in estimated_extent() returning pointer to a memory allocated in SPI memory context
1565
* Revision 1.26 2005/03/08 09:27:23 strk
1566
* RESTRICT selectivity estimator use self->varno instead of varRelid.
1567
* Seems to work for subqueries...
1569
* Revision 1.25 2005/03/08 09:23:34 strk
1570
* Fixed debugging lines.
1572
* Revision 1.24 2005/02/21 16:22:32 strk
1573
* Changed min() max() usage with LW_MIN() LW_MAX()
1575
* Revision 1.23 2005/02/10 10:52:53 strk
1576
* Changed 'char' to 'uchar' (unsigned char typedef) wherever octet is actually
1579
* Revision 1.22 2005/01/13 18:26:49 strk
1580
* estimated_extent() implemented for PG<80
1582
* Revision 1.21 2005/01/13 17:41:40 strk
1583
* estimated_extent() prepared for future expansion (support of pre-800 PGSQL)
1585
* Revision 1.20 2005/01/07 09:52:12 strk
1586
* JOINSEL disabled for builds against pgsql<80
1588
* Revision 1.19 2004/12/22 17:12:34 strk
1589
* Added Mark Cave-Ayland implementation of JOIN selectivity estimator.
1591
* Revision 1.18 2004/12/21 12:21:45 mcayland
1592
* Fixed bug in pass 4 where sample boxes were referred as BOXs and not BOX2DFLOAT4. Also increased SDFACTOR to 3.25
1594
* Revision 1.17 2004/12/17 18:00:33 strk
1595
* LWGEOM_gist_joinsel defined for all PG versions
1597
* Revision 1.16 2004/12/17 11:07:48 strk
1598
* Added missing prototype
1600
* Revision 1.15 2004/12/13 14:03:07 strk
1601
* Initial skeleton on join selectivity estimator.
1602
* Current estimators application for box2d && box2d operator.
1604
* Revision 1.14 2004/12/13 12:25:27 strk
1605
* Removed obsoleted function and fixed some warnings.
1607
* Revision 1.13 2004/12/10 12:35:11 strk
1608
* implemented estimated_extent() function
1610
* Revision 1.12 2004/11/04 11:40:08 strk
1611
* Renamed max/min/avg macros to LW_MAX, LW_MIN, LW_AVG.
1613
* Revision 1.11 2004/10/27 11:02:24 strk
1614
* Removed another getbox2d() call.
1616
* Revision 1.10 2004/10/25 17:07:09 strk
1617
* Obsoleted getbox2d(). Use getbox2d_p() or getbox2d_internal() instead.
1619
* Revision 1.9 2004/10/08 13:20:54 strk
1621
* Changed LWGEOM structure to point to an actual BOX2DFLOAT4.
1622
* Renamed most function to reflect a TYPE_method naming convention.
1623
* (you'll need a dump/reload for it to work)
1624
* Added more manipulation functions.
1626
**********************************************************************/