1
/**********************************************************************
2
* $Id: lwgeom_estimate.c 3740 2009-02-19 09:38:32Z mcayland $
4
* PostGIS - Spatial Types for PostgreSQL
5
* http://postgis.refractions.net
6
* Copyright 2001-2006 Refractions Research Inc.
8
* This is free software; you can redistribute and/or modify it under
9
* the terms of the GNU General Public Licence. See the COPYING file.
11
**********************************************************************/
21
#include "executor/spi.h"
23
#include "parser/parsetree.h"
24
#include "utils/array.h"
26
#include "liblwgeom.h"
27
#include "lwgeom_pg.h"
29
#include "utils/selfuncs.h"
30
#include "utils/syscache.h"
31
#include "utils/guc.h"
32
#include "utils/builtins.h"
34
/*#define DEBUG_GEOMETRY_STATS 1*/
39
#include "commands/vacuum.h"
40
#include "utils/lsyscache.h"
43
* Assign a number to the postgis statistics kind
47
* 1-100: reserved for assignment by the core Postgres project
48
* 100-199: reserved for assignment by PostGIS
49
* 200-9999: reserved for other globally-known stats kinds
50
* 10000-32767: reserved for private site-local use
53
#define STATISTIC_KIND_GEOMETRY 100
56
* Define this if you want to use standard deviation based
57
* histogram extent computation. If you do, you can also
58
* tweak the deviation factor used in computation with
61
#define USE_STANDARD_DEVIATION 1
64
typedef struct GEOM_STATS_T
66
/* cols * rows = total boxes in grid */
70
/* average bounding box area of not-null features */
71
float4 avgFeatureArea;
74
* average number of histogram cells
75
* covered by the sample not-null features
77
float4 avgFeatureCells;
80
float4 xmin,ymin, xmax, ymax;
83
* variable length # of floats for histogram
88
static float8 estimate_selectivity(BOX2DFLOAT4 *box, GEOM_STATS *geomstats);
90
#endif /* USE_VERSION >= 80 */
92
#define SHOW_DIGS_DOUBLE 15
93
#define MAX_DIGS_DOUBLE (SHOW_DIGS_DOUBLE + 6 + 1 + 3 +1)
96
* Default geometry selectivity factor
98
#define DEFAULT_GEOMETRY_SEL 0.000005
101
* Default geometry join selectivity factor
103
#define DEFAULT_GEOMETRY_JOINSEL 0.000005
106
* Define this to actually DO join selectivity
107
* (as contrary to just return the default JOINSEL value)
108
* Note that this is only possible when compiling postgis
109
* against pgsql >= 800
111
#define REALLY_DO_JOINSEL 1
113
/* --------------------------------------------
116
* 2d histogram is a bounding box with a bunch of cells in it.
117
* The cells will have width (xmax-xmin)/boxesPerSide
118
* and height(ymax-ymin)/boxesPerSide
119
* The first box is the ll corner's box, the send is directly to the right
122
* Size of structure is:
123
* 4 (size) + 32 (box) + 4 (boxesPerSide) +
124
* boxesPerSide*boxesPerSide*4 (data)
126
typedef struct histotag
128
int32 size; /* postgres variable-length type requirement */
129
int boxesPerSide; /* boxesPerSide * boxesPerSide = total boxes in grid */
130
double avgFeatureArea; /* average bbox area of features in this histogram */
131
double xmin,ymin, xmax, ymax; /* BOX of area */
132
unsigned int value[1]; /* variable length # of ints for histogram */
135
Datum lwhistogram2d_in(PG_FUNCTION_ARGS);
136
Datum lwhistogram2d_out(PG_FUNCTION_ARGS);
137
Datum create_lwhistogram2d(PG_FUNCTION_ARGS);
138
Datum build_lwhistogram2d(PG_FUNCTION_ARGS);
139
Datum explode_lwhistogram2d(PG_FUNCTION_ARGS);
140
Datum estimate_lwhistogram2d(PG_FUNCTION_ARGS);
141
Datum LWGEOM_gist_sel(PG_FUNCTION_ARGS);
142
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS);
143
Datum LWGEOM_estimated_extent(PG_FUNCTION_ARGS);
144
#if USE_VERSION >= 80
145
Datum LWGEOM_analyze(PG_FUNCTION_ARGS);
149
* text form of LWHISTOGRAM2D is:
150
* 'HISTOGRAM2D(xmin,ymin,xmax,ymax,boxesPerSide;value[0],value[1],...')
151
* note the ";" in the middle (for easy parsing)
152
* I dont expect anyone to actually create one by hand
154
PG_FUNCTION_INFO_V1(lwhistogram2d_in);
155
Datum lwhistogram2d_in(PG_FUNCTION_ARGS)
157
char *str = PG_GETARG_CSTRING(0);
158
LWHISTOGRAM2D *histo ;
160
double xmin,ymin,xmax,ymax;
162
double avgFeatureArea;
166
/*elog(NOTICE, "lwhistogram2d parser called");*/
170
while (isspace(*str))
173
if (strstr(str,"HISTOGRAM2D(") != str)
175
elog(ERROR, "lwhistogram2d parser - doesnt start with 'HISTOGRAM2D(\n");
178
if (strstr(str,";") == NULL)
180
elog(ERROR, "lwhistogram2d parser - doesnt have a ; in sring!\n");
184
nitems = sscanf(str,"HISTOGRAM2D(%lf,%lf,%lf,%lf,%i,%lf;",&xmin,&ymin,&xmax,&ymax,&boxesPerSide,&avgFeatureArea);
188
elog(ERROR, "lwhistogram2d parser - couldnt parse initial portion of histogram!\n");
192
if ( (boxesPerSide > 50) || (boxesPerSide <1) )
194
elog(ERROR, "lwhistogram2d parser - boxesPerSide is too big or too small\n");
198
str2 = strstr(str,";");
203
elog(ERROR, "lwhistogram2d parser - no histogram values\n");
207
histo = (LWHISTOGRAM2D *) palloc (sizeof(LWHISTOGRAM2D) + (boxesPerSide*boxesPerSide-1)*4 );
208
histo->size = sizeof(LWHISTOGRAM2D) + (boxesPerSide*boxesPerSide-1)*4;
210
for (t=0;t<boxesPerSide*boxesPerSide;t++)
212
datum = strtol(str2,&str3,10); /* str2=start of int, str3=end of int, base 10 */
213
/* str3 points to "," or ")" */
216
elog(ERROR, "lwhistogram2d parser - histogram values prematurely ended!\n");
219
histo->value[t] = (unsigned int) datum;
220
str2= str3+1; /* move past the "," or ")" */
226
histo->avgFeatureArea = avgFeatureArea;
227
histo->boxesPerSide = boxesPerSide;
229
PG_RETURN_POINTER(histo);
235
PG_FUNCTION_INFO_V1(lwhistogram2d_out);
236
Datum lwhistogram2d_out(PG_FUNCTION_ARGS)
238
LWHISTOGRAM2D *histo = (LWHISTOGRAM2D *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
244
size = 26+6*MAX_DIGS_DOUBLE + histo->boxesPerSide*histo->boxesPerSide* (MAX_DIGS_DOUBLE+1);
245
result = palloc(size);
247
sprintf(result,"HISTOGRAM2D(%.15g,%.15g,%.15g,%.15g,%i,%.15g;",
248
histo->xmin,histo->ymin,histo->xmax,histo->ymax,histo->boxesPerSide,histo->avgFeatureArea );
251
elog(NOTICE,"so far: %s",result);
252
elog(NOTICE,"buffsize=%i, size=%i",size,histo->size);
255
for (t=0;t<histo->boxesPerSide*histo->boxesPerSide;t++)
257
if (t) sprintf(temp, ",%u", histo->value[t]);
258
else sprintf(temp, "%u", histo->value[t]);
264
elog(NOTICE,"about to return string (len=%i): -%s-",strlen(result),result);
265
elog(NOTICE, "result@%x", result);
269
PG_RETURN_CSTRING(result);
272
/*create_lwhistogram2d(BOX2D, boxesPerSide)*/
273
/* returns a histgram with 0s in all the boxes.*/
274
PG_FUNCTION_INFO_V1(create_lwhistogram2d);
275
Datum create_lwhistogram2d(PG_FUNCTION_ARGS)
277
/*BOX3D *bbox = (BOX3D *) PG_GETARG_POINTER(0);*/
278
BOX2DFLOAT4 *bbox = (BOX2DFLOAT4 *)PG_GETARG_DATUM(0);
279
int32 boxesPerSide = PG_GETARG_INT32(1);
280
LWHISTOGRAM2D *histo;
283
if ( (boxesPerSide <1) || (boxesPerSide >50) )
285
elog(ERROR, "create_lwhistogram2d - boxesPerSide is too small or big.\n");
289
size = sizeof(LWHISTOGRAM2D) + (boxesPerSide*boxesPerSide-1)*4 ;
291
histo = (LWHISTOGRAM2D *)palloc(size);
294
histo->xmin = bbox->xmin;
295
histo->ymin = bbox->ymin;
296
histo->xmax = bbox->xmax;
297
histo->ymax = bbox->ymax;
299
histo->avgFeatureArea = 0;
301
histo->boxesPerSide = boxesPerSide;
303
for (t=0;t<boxesPerSide*boxesPerSide; t++)
308
/*elog(NOTICE,"create_lwhistogram2d returning");*/
310
PG_RETURN_POINTER(histo);
314
* build_histogram2d (LWHISTOGRAM2D, tablename, columnname)
315
* executes the SPI 'SELECT box3d(columnname) FROM tablename'
316
* and sticks all the results in the histogram
318
PG_FUNCTION_INFO_V1(build_lwhistogram2d);
319
Datum build_lwhistogram2d(PG_FUNCTION_ARGS)
321
LWHISTOGRAM2D *histo = (LWHISTOGRAM2D *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
322
char *tablename, *columnname;
323
LWHISTOGRAM2D *result;
326
SPITupleTable *tuptable;
333
double box_area, area_intersect, cell_area;
334
int x_idx_min, x_idx_max;
335
int y_idx_min, y_idx_max;
336
double xmin,ymin, xmax,ymax;
337
double intersect_x, intersect_y;
343
double sum_area_new = 0;
344
int sum_area_numb_new =0;
347
int tuplimit = 500000; /* No. of tuples returned on each cursor fetch */
352
/*elog(NOTICE,"build_lwhistogram2d called");*/
359
#if DEBUG_GEOMETRY_STATS
360
elog(NOTICE, " build_histogram2d: histogram extent = %g %g, %g %g",
361
histo->xmin, histo->ymin, histo->xmax, histo->ymax);
365
result = (LWHISTOGRAM2D *) malloc(histo->size);
366
memcpy(result,histo,histo->size);
370
for(t=0;t<histo->boxesPerSide*histo->boxesPerSide;t++)
372
total+=histo->value[t];
377
sum_area = histo->avgFeatureArea * total;
378
sum_area_numb = total;
382
tablename = DatumGetCString(DirectFunctionCall1(textout,
383
PointerGetDatum(PG_GETARG_DATUM(1))));
385
columnname = DatumGetCString(DirectFunctionCall1(textout,
386
PointerGetDatum(PG_GETARG_DATUM(2))));
388
#if DEBUG_GEOMETRY_STATS
389
elog(NOTICE,"Start build_histogram2d with %i items already existing", sum_area_numb);
390
elog(NOTICE,"table=\"%s\", column = \"%s\"", tablename, columnname);
394
SPIcode = SPI_connect();
396
if (SPIcode != SPI_OK_CONNECT)
398
elog(ERROR,"build_histogram2d: couldnt open a connection to SPI");
403
sprintf(sql,"SELECT box2d(\"%s\") FROM \"%s\"",columnname,tablename);
404
/*elog(NOTICE,"executing %s",sql);*/
406
SPIplan = SPI_prepare(sql, 0, NULL);
409
elog(ERROR,"build_histogram2d: couldnt create query plan via SPI");
413
#if USE_VERSION >= 80
414
SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL, 1);
416
SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL);
418
if (SPIportal == NULL)
420
elog(ERROR,"build_histogram2d: couldn't create cursor via SPI");
426
while (moredata==TRUE)
429
#if DEBUG_GEOMETRY_STATS
430
elog(NOTICE,"about to fetch...");
432
SPI_cursor_fetch(SPIportal, TRUE, tuplimit);
434
ntuples = SPI_processed;
436
#if DEBUG_GEOMETRY_STATS
437
elog(NOTICE,"processing %d records", ntuples);
442
tuptable = SPI_tuptable;
443
tupdesc = SPI_tuptable->tupdesc;
445
cell_area = ( (xmax-xmin)*(ymax-ymin)/(histo->boxesPerSide*histo->boxesPerSide) );
447
for (t=0;t<ntuples;t++)
449
tuple = tuptable->vals[t];
450
datum = SPI_getbinval(tuple, tupdesc, 1, &isnull);
453
box = (BOX2DFLOAT4 *)DatumGetPointer(datum);
454
box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin);
456
sum_area_new += box_area;
457
sum_area_numb_new ++;
459
if (box_area > cell_area )
460
box_area = cell_area;
462
box_area =0; /* for precision! */
464
/* check to see which boxes this intersects */
465
x_idx_min = (box->xmin-xmin)/(xmax-xmin)*histo->boxesPerSide;
468
if (x_idx_min >= histo->boxesPerSide)
469
x_idx_min = histo->boxesPerSide-1;
470
y_idx_min = (box->ymin-ymin)/(ymax-ymin)*histo->boxesPerSide;
473
if (y_idx_min >= histo->boxesPerSide)
474
y_idx_min = histo->boxesPerSide-1;
476
x_idx_max = (box->xmax-xmin)/(xmax-xmin)*histo->boxesPerSide;
479
if (x_idx_max >= histo->boxesPerSide)
480
x_idx_max = histo->boxesPerSide-1;
481
y_idx_max = (box->ymax-ymin)/(ymax-ymin)*histo->boxesPerSide ;
484
if (y_idx_max >= histo->boxesPerSide)
485
y_idx_max = histo->boxesPerSide-1;
488
* the {x,y}_idx_{min,max} define the grid squares that the box intersects
489
* if the area of the intersect between the box and the grid square > 5% of
492
#if DEBUG_GEOMETRY_STATS
493
elog(NOTICE,"box is : (%.15g,%.15g to %.15g,%.15g)",box->xmin,box->ymin, box->xmax, box->ymax);
494
elog(NOTICE," search is in x: %i to %i y: %i to %i",x_idx_min, x_idx_max, y_idx_min,y_idx_max);
497
for (y= y_idx_min; y<=y_idx_max;y++)
499
for (x=x_idx_min;x<= x_idx_max;x++)
501
intersect_x = LW_MIN(box->xmax, xmin+ (x+1) * (xmax-xmin)/histo->boxesPerSide ) - LW_MAX(box->xmin, xmin + x*(xmax-xmin)/histo->boxesPerSide ) ;
503
intersect_y = LW_MIN(box->ymax, ymin+ (y+1) * (ymax-ymin)/histo->boxesPerSide ) - LW_MAX(box->ymin, ymin+ y*(ymax-ymin)/histo->boxesPerSide ) ;
505
/* for a point, intersect_x=0, intersect_y=0, box_area =0*/
506
#if DEBUG_GEOMETRY_STATS
507
elog(NOTICE,"x=%i,y=%i, intersect_x= %.15g, intersect_y = %.15g",x,y,intersect_x,intersect_y);
509
if ( (intersect_x>=0) && (intersect_y>=0) )
511
area_intersect = intersect_x*intersect_y;
512
if (area_intersect >= box_area*0.05)
514
#if DEBUG_GEOMETRY_STATS
518
result->value[x+y*histo->boxesPerSide]++;
526
} /* End of for loop */
529
* Free all the results after each fetch, otherwise all tuples stay
530
* in memory until the end of the table...
532
SPI_freetuptable(tuptable);
536
} /* End of if ntuples > 0 */
538
} /* End of while loop */
541
/* Close the cursor */
542
SPI_cursor_close(SPIportal);
544
SPIcode =SPI_finish();
545
if (SPIcode != SPI_OK_FINISH )
547
elog(ERROR,"build_histogram2d: couldnt disconnect from SPI");
551
#if DEBUG_GEOMETRY_STATS
552
elog(NOTICE,"finishing up build_histogram2d ");
555
/*pfree(tablename);*/
556
/*pfree(columnname);*/
559
for(t=0;t<histo->boxesPerSide*histo->boxesPerSide;t++)
561
total+=result->value[t];
564
#if DEBUG_GEOMETRY_STATS
565
elog(NOTICE ,"histogram finishes with %i items in it - acutally added %i rows and %i bumps\n",total,sum_area_numb_new,bump);
566
elog(NOTICE,"done build_histogram2d ");
570
/* re-calculate statistics on avg bbox size */
571
if (sum_area_numb_new >0)
572
result->avgFeatureArea = (sum_area_new+sum_area)/((double)(sum_area_numb_new+sum_area_numb));
574
PG_RETURN_POINTER(result) ;
578
* explode_lwhistogram2d(histogram2d, tablename::text)
579
* executes CREATE TABLE tablename (the_geom geometry, id int, hits int, percent float)
581
* DOES NOT UPDATE GEOMETRY_COLUMNS
583
PG_FUNCTION_INFO_V1(explode_lwhistogram2d);
584
Datum explode_lwhistogram2d(PG_FUNCTION_ARGS)
586
LWHISTOGRAM2D *histo = (LWHISTOGRAM2D *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
596
cellx = (histo->xmax-histo->xmin)/histo->boxesPerSide;
597
celly = (histo->ymax-histo->ymin)/histo->boxesPerSide;
599
tablename = DatumGetCString(DirectFunctionCall1(textout,
600
PointerGetDatum(PG_GETARG_DATUM(1))));
603
for(t=0;t<histo->boxesPerSide*histo->boxesPerSide;t++)
605
total+=histo->value[t];
610
SPIcode = SPI_connect();
611
if (SPIcode != SPI_OK_CONNECT)
613
elog(ERROR,"build_histogram2d: couldnt open a connection to SPI");
618
sprintf(sql,"CREATE TABLE %s (the_geom geometry, id int, hits int, percent float)",tablename);
620
SPIcode = SPI_exec(sql, 2147483640 ); /* max signed int32 */
622
if (SPIcode != SPI_OK_UTILITY )
624
elog(ERROR,"explode_histogram2d: couldnt create table");
628
for(y=0;y<histo->boxesPerSide;y++)
630
for(x=0;x<histo->boxesPerSide;x++)
633
sprintf(geom,"POLYGON((%.15g %.15g, %.15g %.15g, %.15g %.15g, %.15g %.15g, %.15g %.15g ))",
634
histo->xmin + x*cellx, histo->ymin+y*celly,
635
histo->xmin + (x)*cellx, histo->ymin+ (y+1)*celly,
636
histo->xmin + (x+1)*cellx, histo->ymin+ (y+1)*celly,
637
histo->xmin + (x+1)*cellx, histo->ymin+y*celly,
638
histo->xmin + x*cellx, histo->ymin+y*celly
640
sprintf(sql,"INSERT INTO %s VALUES('%s'::geometry,%i,%i,%.15g)",tablename,geom,t,histo->value[t],histo->value[t]/((double)total)*100.0);
642
SPIcode = SPI_exec(sql, 2147483640 ); /* max signed int32 */
643
if (SPIcode != SPI_OK_INSERT )
645
elog(ERROR,"explode_histogram2d: couldnt insert into");
651
SPIcode =SPI_finish();
652
if (SPIcode != SPI_OK_FINISH )
654
elog(ERROR,"build_histogram2d: couldnt disconnect from SPI");
658
PG_RETURN_POINTER(histo) ;
662
* estimate_histogram2d(histogram2d, box2d)
663
* returns a % estimate of the # of features that will be returned by that box query
665
* For each grid cell that intersects the query box
666
* Calculate area of intersection (AOI)
667
* IF AOI < avgFeatureArea THEN set AOI = avgFeatureArea
668
* SUM AOI/area-of-cell*value-of-cell
670
* change : instead of avgFeatureArea, use avgFeatureArea or 10% of a grid cell (whichever is smaller)
672
PG_FUNCTION_INFO_V1(estimate_lwhistogram2d);
673
Datum estimate_lwhistogram2d(PG_FUNCTION_ARGS)
675
LWHISTOGRAM2D *histo = (LWHISTOGRAM2D *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
676
BOX2DFLOAT4 *box = (BOX2DFLOAT4 *) PG_GETARG_POINTER(1);
678
int x_idx_min, x_idx_max, y_idx_min, y_idx_max;
679
double intersect_x, intersect_y, AOI;
681
double xmin,ymin,xmax,ymax;
685
double avg_feature_size;
695
cell_area = ( (xmax-xmin)*(ymax-ymin)/(histo->boxesPerSide*histo->boxesPerSide) );
697
avg_feature_size = histo->avgFeatureArea;
698
if ( avg_feature_size > cell_area*0.1)
700
avg_feature_size = cell_area*0.1;
704
#if DEBUG_GEOMETRY_STATS
705
elog(NOTICE,"start estimate_histogram2d: ");
706
elog(NOTICE,"box is : (%.15g,%.15g to %.15g,%.15g)",box->xmin,box->ymin, box->xmax, box->ymax);
709
box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin);
711
if (box_area<0) box_area = 0; /* for precision! */
714
* check to see which boxes this intersects
716
x_idx_min = (box->xmin-xmin)/(xmax-xmin)*histo->boxesPerSide;
717
if (x_idx_min <0) x_idx_min = 0;
718
if (x_idx_min >= histo->boxesPerSide)
719
x_idx_min = histo->boxesPerSide-1;
720
y_idx_min = (box->ymin-ymin)/(ymax-ymin)*histo->boxesPerSide;
721
if (y_idx_min <0) y_idx_min = 0;
722
if (y_idx_min >= histo->boxesPerSide)
723
y_idx_min = histo->boxesPerSide-1;
725
x_idx_max = (box->xmax-xmin)/(xmax-xmin)*histo->boxesPerSide;
726
if (x_idx_max <0) x_idx_max = 0;
727
if (x_idx_max >= histo->boxesPerSide)
728
x_idx_max = histo->boxesPerSide-1;
729
y_idx_max = (box->ymax-ymin)/(ymax-ymin)*histo->boxesPerSide ;
730
if (y_idx_max <0) y_idx_max = 0;
731
if (y_idx_max >= histo->boxesPerSide)
732
y_idx_max = histo->boxesPerSide-1;
734
/* The {x,y}_idx_{min,max} define the grid squares that the box intersects */
736
#if DEBUG_GEOMETRY_STATS
737
elog(NOTICE," search is in x: %i to %i y: %i to %i",x_idx_min, x_idx_max, y_idx_min,y_idx_max);
740
for (y= y_idx_min; y<=y_idx_max;y++)
742
for (x=x_idx_min;x<= x_idx_max;x++)
744
intersect_x = LW_MIN(box->xmax, xmin+ (x+1) * (xmax-xmin)/histo->boxesPerSide ) - LW_MAX(box->xmin, xmin+ x*(xmax-xmin)/histo->boxesPerSide ) ;
745
intersect_y = LW_MIN(box->ymax, ymin+ (y+1) * (ymax-ymin)/histo->boxesPerSide ) - LW_MAX(box->ymin, ymin+ y*(ymax-ymin)/histo->boxesPerSide ) ;
747
/* for a point, intersect_x=0, intersect_y=0, box_area =0 */
748
/* elog(NOTICE,"x=%i,y=%i, intersect_x= %.15g, intersect_y = %.15g",x,y,intersect_x,intersect_y); */
749
if ( (intersect_x>=0) && (intersect_y>=0) )
751
AOI = intersect_x*intersect_y;
752
if (AOI< avg_feature_size)
753
AOI = avg_feature_size;
754
result_sum += AOI/cell_area *
755
histo->value[x+y*histo->boxesPerSide];
760
for(t=0;t<histo->boxesPerSide*histo->boxesPerSide;t++)
762
total+=histo->value[t];
765
if ( (histo->avgFeatureArea <=0) && (box_area <=0) )
766
PG_RETURN_FLOAT8(1.0/((double)(total)));
768
PG_RETURN_FLOAT8(result_sum/((double)total));
773
#if ! REALLY_DO_JOINSEL || USE_VERSION < 80
775
* JOIN selectivity in the GiST && operator
776
* for all PG versions
778
PG_FUNCTION_INFO_V1(LWGEOM_gist_joinsel);
779
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS)
781
#if DEBUG_GEOMETRY_STATS
782
elog(NOTICE, "LWGEOM_gist_joinsel called (returning %f)",
783
DEFAULT_GEOMETRY_JOINSEL);
785
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
788
#else /* REALLY_DO_JOINSEL && USE_VERSION >= 80 */
790
int calculate_column_intersection(BOX2DFLOAT4 *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2);
793
calculate_column_intersection(BOX2DFLOAT4 *search_box, GEOM_STATS *geomstats1, GEOM_STATS *geomstats2)
796
* Calculate the intersection of two columns from their geomstats extents - return true
797
* if a valid intersection was found, false if there is no overlap
800
float8 i_xmin = LW_MAX(geomstats1->xmin, geomstats2->xmin);
801
float8 i_ymin = LW_MAX(geomstats1->ymin, geomstats2->ymin);
802
float8 i_xmax = LW_MIN(geomstats1->xmax, geomstats2->xmax);
803
float8 i_ymax = LW_MIN(geomstats1->ymax, geomstats2->ymax);
805
/* If the rectangles don't intersect, return false */
806
if (i_xmin > i_xmax || i_ymin > i_ymax)
809
/* Otherwise return the rectangle in search_box */
810
search_box->xmin = i_xmin;
811
search_box->ymin = i_ymin;
812
search_box->xmax = i_xmax;
813
search_box->ymax = i_ymax;
819
* JOIN selectivity in the GiST && operator
820
* for all PG versions
822
PG_FUNCTION_INFO_V1(LWGEOM_gist_joinsel);
823
Datum LWGEOM_gist_joinsel(PG_FUNCTION_ARGS)
826
Query *root = (Query *) PG_GETARG_POINTER(0);
828
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
830
/* Oid operator = PG_GETARG_OID(1); */
831
List *args = (List *) PG_GETARG_POINTER(2);
832
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
838
HeapTuple stats1_tuple, stats2_tuple, class_tuple;
839
GEOM_STATS *geomstats1, *geomstats2;
841
* These are to avoid casting the corresponding
842
* "type-punned" pointers, which would break
843
* "strict-aliasing rules".
845
GEOM_STATS **gs1ptr=&geomstats1, **gs2ptr=&geomstats2;
846
int geomstats1_nvalues = 0, geomstats2_nvalues = 0;
847
float8 selectivity1 = 0.0, selectivity2 = 0.0;
848
float4 num1_tuples = 0.0, num2_tuples = 0.0;
849
float4 total_tuples = 0.0, rows_returned = 0.0;
850
BOX2DFLOAT4 search_box;
854
* Join selectivity algorithm. To calculation the selectivity we
855
* calculate the intersection of the two column sample extents,
856
* sum the results, and then multiply by two since for each
857
* geometry in col 1 that intersects a geometry in col 2, the same
862
#if DEBUG_GEOMETRY_STATS
863
elog(NOTICE, "LWGEOM_gist_joinsel called with jointype %d", jointype);
867
* We'll only respond to an inner join/unknown context join
869
if (jointype != JOIN_INNER)
871
elog(NOTICE, "LWGEOM_gist_joinsel called with incorrect join type");
872
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
876
* Determine the oids of the geometry columns we are working with
878
arg1 = (Node *) linitial(args);
879
arg2 = (Node *) lsecond(args);
881
if (!IsA(arg1, Var) || !IsA(arg2, Var))
883
elog(DEBUG1, "LWGEOM_gist_joinsel called with arguments that are not column references");
884
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
890
relid1 = getrelid(var1->varno, root->rtable);
891
relid2 = getrelid(var2->varno, root->rtable);
893
relid1 = getrelid(var1->varno, root->parse->rtable);
894
relid2 = getrelid(var2->varno, root->parse->rtable);
897
#if DEBUG_GEOMETRY_STATS
898
elog(NOTICE, "Working with relations oids: %d %d", relid1, relid2);
901
/* Read the stats tuple from the first column */
902
stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0);
903
if ( ! stats1_tuple )
905
#if DEBUG_GEOMETRY_STATS
906
elog(NOTICE, " No statistics, returning default geometry join selectivity");
908
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
913
if ( ! get_attstatsslot(stats1_tuple, 0, 0,
914
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
915
(float4 **)gs1ptr, &geomstats1_nvalues) )
917
#if DEBUG_GEOMETRY_STATS
918
elog(NOTICE, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
920
ReleaseSysCache(stats1_tuple);
921
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
925
/* Read the stats tuple from the second column */
926
stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0);
927
if ( ! stats2_tuple )
929
#if DEBUG_GEOMETRY_STATS
930
elog(NOTICE, " No statistics, returning default geometry join selectivity");
932
free_attstatsslot(0, NULL, 0, (float *)geomstats1,
934
ReleaseSysCache(stats1_tuple);
935
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
939
if ( ! get_attstatsslot(stats2_tuple, 0, 0,
940
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
941
(float4 **)gs2ptr, &geomstats2_nvalues) )
943
#if DEBUG_GEOMETRY_STATS
944
elog(NOTICE, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry join selectivity");
946
free_attstatsslot(0, NULL, 0, (float *)geomstats1,
948
ReleaseSysCache(stats2_tuple);
949
ReleaseSysCache(stats1_tuple);
950
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
955
* Setup the search box - this is the intersection of the two column
958
calculate_column_intersection(&search_box, geomstats1, geomstats2);
960
#if DEBUG_GEOMETRY_STATS
961
elog(NOTICE," -- geomstats1 box: %.15g %.15g, %.15g %.15g",geomstats1->xmin,geomstats1->ymin,geomstats1->xmax,geomstats1->ymax);
962
elog(NOTICE," -- geomstats2 box: %.15g %.15g, %.15g %.15g",geomstats2->xmin,geomstats2->ymin,geomstats2->xmax,geomstats2->ymax);
963
elog(NOTICE," -- calculated intersection box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
967
/* Do the selectivity */
968
selectivity1 = estimate_selectivity(&search_box, geomstats1);
969
selectivity2 = estimate_selectivity(&search_box, geomstats2);
971
#if DEBUG_GEOMETRY_STATS
972
elog(NOTICE, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2);
975
/* Free the statistic tuples */
976
free_attstatsslot(0, NULL, 0, (float *)geomstats1, geomstats1_nvalues);
977
ReleaseSysCache(stats1_tuple);
979
free_attstatsslot(0, NULL, 0, (float *)geomstats2, geomstats2_nvalues);
980
ReleaseSysCache(stats2_tuple);
983
* OK, so before we calculate the join selectivity we also need to
984
* know the number of tuples in each of the columns since
985
* estimate_selectivity returns the number of estimated tuples
986
* divided by the total number of tuples - hence we need to
987
* multiply out the returned selectivity by the total number of rows.
989
class_tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relid1),
992
if (HeapTupleIsValid(class_tuple))
994
Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
995
num1_tuples = reltup->reltuples;
998
ReleaseSysCache(class_tuple);
1001
class_tuple = SearchSysCache(RELOID, ObjectIdGetDatum(relid2),
1004
if (HeapTupleIsValid(class_tuple))
1006
Form_pg_class reltup = (Form_pg_class) GETSTRUCT(class_tuple);
1007
num2_tuples = reltup->reltuples;
1010
ReleaseSysCache(class_tuple);
1014
* Finally calculate the estimate of the number of rows returned
1016
* = 2 * (nrows from col1 + nrows from col2) /
1017
* total nrows in col1 x total nrows in col2
1019
* The factor of 2 accounts for the fact that for each tuple in
1020
* col 1 matching col 2,
1021
* there will be another match in col 2 matching col 1
1024
total_tuples = num1_tuples * num2_tuples;
1025
rows_returned = 2 * ((num1_tuples * selectivity1) +
1026
(num2_tuples * selectivity2));
1028
#if DEBUG_GEOMETRY_STATS
1029
elog(NOTICE, "Rows from rel1: %f", num1_tuples * selectivity1);
1030
elog(NOTICE, "Rows from rel2: %f", num2_tuples * selectivity2);
1031
elog(NOTICE, "Estimated rows returned: %f", rows_returned);
1035
* One (or both) tuple count is zero...
1036
* We return default selectivity estimate.
1037
* We could probably attempt at an estimate
1038
* w/out looking at tables tuple count, with
1039
* a function of selectivity1, selectivity2.
1041
if ( ! total_tuples )
1043
#if DEBUG_GEOMETRY_STATS
1044
elog(NOTICE, "Total tuples == 0, returning default join selectivity");
1046
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_JOINSEL);
1049
if ( rows_returned > total_tuples )
1050
PG_RETURN_FLOAT8(1.0);
1052
PG_RETURN_FLOAT8(rows_returned / total_tuples);
1055
#endif /* REALLY_DO_JOINSEL */
1057
/**************************** FROM POSTGIS ****************/
1060
#if USE_VERSION < 80
1062
* get_restriction_var
1063
* Examine the args of a restriction clause to see if it's of the
1064
* form (var op something) or (something op var). If so, extract
1065
* and return the var and the other argument.
1068
* args: clause argument list
1069
* varRelid: see specs for restriction selectivity functions
1071
* Outputs: (these are set only if TRUE is returned)
1072
* *var: gets Var node
1073
* *other: gets other clause argument
1074
* *varonleft: set TRUE if var is on the left, FALSE if on the right
1076
* Returns TRUE if a Var is identified, otherwise FALSE.
1079
get_restriction_var(List *args, int varRelid, Var **var,
1080
Node **other, bool *varonleft)
1084
if (length(args) != 2) return false;
1086
left = (Node *) lfirst(args);
1087
right = (Node *) lsecond(args);
1089
/* Ignore any binary-compatible relabeling */
1091
if (IsA(left, RelabelType))
1092
left = (Node *)((RelabelType *) left)->arg;
1093
if (IsA(right, RelabelType))
1094
right = (Node *)((RelabelType *) right)->arg;
1096
/* Look for the var */
1098
if (IsA(left, Var) &&
1099
(varRelid == 0 || varRelid == ((Var *) left)->varno))
1101
*var = (Var *) left;
1105
else if (IsA(right, Var) &&
1106
(varRelid == 0 || varRelid == ((Var *) right)->varno))
1108
*var = (Var *) right;
1114
/* Duh, it's too complicated for me... */
1121
/* restriction in the GiST && operator */
1122
PG_FUNCTION_INFO_V1(LWGEOM_gist_sel);
1123
Datum LWGEOM_gist_sel(PG_FUNCTION_ARGS)
1125
Query *root = (Query *) PG_GETARG_POINTER(0);
1126
List *args = (List *) PG_GETARG_POINTER(2);
1127
int varRelid = PG_GETARG_INT32(3);
1129
BOX2DFLOAT4 search_box;
1132
SPITupleTable *tuptable;
1148
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1151
#if DEBUG_GEOMETRY_STATS
1152
elog(NOTICE,"LWGEOM_gist_sel was called");
1155
if (!get_restriction_var(args, varRelid, &var, &other, &varonleft))
1157
#if DEBUG_GEOMETRY_STATS
1158
elog(NOTICE,"get_restriction_var FAILED -returning early");
1160
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1163
relid = getrelid(var->varno, root->rtable);
1164
if (relid == InvalidOid)
1166
#if DEBUG_GEOMETRY_STATS
1167
elog(NOTICE,"getrelid FAILED (invalid oid) -returning early");
1169
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1172
#if DEBUG_GEOMETRY_STATS
1173
elog(NOTICE,"operator's oid = %i (this should be GEOMETRY && GEOMETRY)",operator);
1174
elog(NOTICE,"relations' oid = %i (this should be the relation that the && is working on) ",relid);
1175
elog(NOTICE,"varatt oid = %i (basically relations column #) ",var->varattno);
1179
if (IsA(other, Const) &&((Const *) other)->constisnull)
1181
#if DEBUG_GEOMETRY_STATS
1182
elog(NOTICE,"other operand of && is NULL - returning early");
1184
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1187
if (IsA(other, Const))
1189
#if DEBUG_GEOMETRY_STATS
1190
elog(NOTICE,"The other side of the && is a constant with type (oid) = %i and length %i. This should be GEOMETRY with length -1 (variable length)",((Const*)other)->consttype,((Const*)other)->constlen);
1196
#if DEBUG_GEOMETRY_STATS
1197
elog(NOTICE,"the other side of && isnt a constant - returning early");
1199
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1202
/* get the BOX thats being searched in */
1203
in = (char *)PG_DETOAST_DATUM( ((Const*)other)->constvalue );
1205
if ( ! getbox2d_p(in+4, &search_box) )
1208
#if DEBUG_GEOMETRY_STATS
1209
elog(NOTICE, "search box is EMPTY");
1211
PG_RETURN_FLOAT8(0.0);
1214
#if DEBUG_GEOMETRY_STATS
1215
elog(NOTICE,"requested search box is : (%.15g %.15g, %.15g %.15g)",search_box->xmin,search_box->ymin,search_box->xmax,search_box->ymax);
1219
SPIcode = SPI_connect();
1220
if (SPIcode != SPI_OK_CONNECT)
1222
elog(NOTICE,"LWGEOM_gist_sel: couldnt open a connection to SPI:%i",SPIcode);
1223
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1226
sprintf(sql,"SELECT stats FROM GEOMETRY_COLUMNS WHERE attrelid=%u AND varattnum=%i",relid,var->varattno);
1227
#if DEBUG_GEOMETRY_STATS
1228
elog(NOTICE,"sql:%s",sql);
1230
SPIcode = SPI_exec(sql, 1 );
1231
if (SPIcode != SPI_OK_SELECT )
1234
elog(NOTICE,"LWGEOM_gist_sel: couldnt execute sql via SPI");
1235
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1238
if (SPI_processed !=1)
1241
#if DEBUG_GEOMETRY_STATS
1242
elog(NOTICE,"LWGEOM_gist_sel: geometry_columns didnt return a unique value");
1244
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1247
tuptable = SPI_tuptable;
1248
tupdesc = SPI_tuptable->tupdesc;
1249
tuple = tuptable->vals[0];
1250
datum = SPI_getbinval(tuple, tupdesc, 1, &isnull);
1254
#if DEBUG_GEOMETRY_STATS
1255
elog(NOTICE,"LWGEOM_gist_sel: geometry_columns returned a null histogram");
1257
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1259
#if DEBUG_GEOMETRY_STATS
1260
elog(NOTICE,"LWGEOM_gist_sel: checking against estimate_histogram2d");
1263
/* now we have the histogram, and our search box - use the estimate_histogram2d(histo,box) to get the result! */
1264
myest = DatumGetFloat8( DirectFunctionCall2( estimate_lwhistogram2d, datum, PointerGetDatum(&search_box) ) );
1266
if ( (myest<0) || (myest!=myest) ) /* <0? or NaN? */
1268
#if DEBUG_GEOMETRY_STATS
1269
elog(NOTICE,"LWGEOM_gist_sel: got something crazy back from estimate_histogram2d");
1271
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1274
SPIcode =SPI_finish();
1275
if (SPIcode != SPI_OK_FINISH )
1277
#if DEBUG_GEOMETRY_STATS
1278
elog(NOTICE,"LWGEOM_gist_sel: couldnt disconnect from SPI");
1280
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL) ;
1283
#if DEBUG_GEOMETRY_STATS
1284
elog(NOTICE,"LWGEOM_gist_sel: finished, returning with %lf",myest);
1286
PG_RETURN_FLOAT8(myest);
1290
* Return the extent of the table
1291
* looking at gathered statistics (or NULL if
1292
* no statistics have been gathered).
1294
PG_FUNCTION_INFO_V1(LWGEOM_estimated_extent);
1295
Datum LWGEOM_estimated_extent(PG_FUNCTION_ARGS)
1305
SPITupleTable *tuptable;
1308
LWHISTOGRAM2D *histo;
1314
if ( PG_NARGS() == 3 )
1316
txnsp = PG_GETARG_TEXT_P(0);
1317
nsp = palloc(VARSIZE(txnsp)+1);
1318
memcpy(nsp, VARDATA(txnsp), VARSIZE(txnsp)-VARHDRSZ);
1319
nsp[VARSIZE(txnsp)-VARHDRSZ]='\0';
1321
txtbl = PG_GETARG_TEXT_P(1);
1322
txcol = PG_GETARG_TEXT_P(2);
1324
else if ( PG_NARGS() == 2 )
1326
txtbl = PG_GETARG_TEXT_P(0);
1327
txcol = PG_GETARG_TEXT_P(1);
1331
elog(ERROR, "estimated_extent() called with wrong number of arguments");
1335
tbl = palloc(VARSIZE(txtbl)+1);
1336
memcpy(tbl, VARDATA(txtbl), VARSIZE(txtbl)-VARHDRSZ);
1337
tbl[VARSIZE(txtbl)-VARHDRSZ]='\0';
1339
col = palloc(VARSIZE(txcol)+1);
1340
memcpy(col, VARDATA(txcol), VARSIZE(txcol)-VARHDRSZ);
1341
col[VARSIZE(txcol)-VARHDRSZ]='\0';
1343
#if DEBUG_GEOMETRY_STATS
1344
elog(NOTICE, "LWGEOM_estimated_extent called");
1347
/* Connect to SPI manager */
1348
SPIcode = SPI_connect();
1349
if (SPIcode != SPI_OK_CONNECT)
1351
elog(ERROR, "LWGEOM_estimated_extent: couldnt open a connection to SPI");
1355
querysize = strlen(tbl)+strlen(col)+256;
1358
querysize += strlen(nsp)+32;
1359
query = palloc(querysize);
1360
sprintf(query, "SELECT stats FROM geometry_columns WHERE f_table_schema = '%s' AND f_table_name = '%s' AND f_geometry_column = '%s'", nsp, tbl, col);
1364
query = palloc(querysize);
1365
sprintf(query, "SELECT stats FROM geometry_columns WHERE f_table_name = '%s' AND f_geometry_column = '%s'", tbl, col);
1368
#if DEBUG_GEOMETRY_STATS > 1
1369
elog(NOTICE, " query: %s", query);
1372
SPIcode = SPI_exec(query, 1);
1373
if (SPIcode != SPI_OK_SELECT )
1376
elog(ERROR,"LWGEOM_estimated_extent: couldnt execute sql via SPI");
1379
if (SPI_processed > 1)
1382
elog(NOTICE, " More then a single row (%d) in geometry_columns matches given schema/table/column specs", SPI_processed);
1385
if (SPI_processed == 0)
1388
#if DEBUG_GEOMETRY_STATS
1389
elog(NOTICE, " %d stat rows", SPI_processed);
1391
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate table within current schema");
1395
tuptable = SPI_tuptable;
1396
tupdesc = SPI_tuptable->tupdesc;
1397
tuple = tuptable->vals[0];
1398
datum = SPI_getbinval(tuple, tupdesc, 1, &isnull);
1402
#if DEBUG_GEOMETRY_STATS
1403
elog(NOTICE, " stats are NULL");
1405
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate statistics for table");
1409
histo = (LWHISTOGRAM2D *)PG_DETOAST_DATUM(datum);
1411
#if DEBUG_GEOMETRY_STATS
1412
elog(NOTICE, " histogram extent = %g %g, %g %g", histo->xmin,
1413
histo->ymin, histo->xmax, histo->ymax);
1417
* Construct box2dfloat4.
1418
* Must allocate this in upper executor context
1419
* to keep it alive after SPI_finish().
1421
box = SPI_palloc(sizeof(BOX2DFLOAT4));
1423
box->xmin = histo->xmin;
1424
box->ymin = histo->ymin;
1425
box->xmax = histo->xmax;
1426
box->ymax = histo->ymax;
1428
#if DEBUG_GEOMETRY_STATS
1429
elog(NOTICE, " histogram extent = %f %f, %f %f", box->xmin,
1430
box->ymin, box->xmax, box->ymax);
1433
SPIcode = SPI_finish();
1434
if (SPIcode != SPI_OK_FINISH )
1436
elog(ERROR, "LWGEOM_estimated_extent: couldnt disconnect from SPI");
1439
PG_RETURN_POINTER(box);
1442
#else /* USE_VERSION >= 80 */
1445
* This function returns an estimate of the selectivity
1446
* of a search_box looking at data in the GEOM_STATS
1449
* TODO: handle box dimension collapses (probably should be handled
1450
* by the statistic generator, avoiding GEOM_STATS with collapsed
1454
estimate_selectivity(BOX2DFLOAT4 *box, GEOM_STATS *geomstats)
1457
int x_idx_min, x_idx_max, y_idx_min, y_idx_max;
1458
double intersect_x, intersect_y, AOI;
1459
double cell_area, box_area;
1460
double geow, geoh; /* width and height of histogram */
1461
int histocols, historows; /* histogram grid size */
1463
float overlapping_cells;
1464
float avg_feat_cells;
1470
* Search box completely miss histogram extent
1472
if ( box->xmax < geomstats->xmin ||
1473
box->xmin > geomstats->xmax ||
1474
box->ymax < geomstats->ymin ||
1475
box->ymin > geomstats->ymax )
1477
#if DEBUG_GEOMETRY_STATS
1478
elog(NOTICE, " search_box does not overlaps histogram, returning 0");
1484
* Search box completely contains histogram extent
1486
if ( box->xmax >= geomstats->xmax &&
1487
box->xmin <= geomstats->xmin &&
1488
box->ymax >= geomstats->ymax &&
1489
box->ymin <= geomstats->ymin )
1491
#if DEBUG_GEOMETRY_STATS
1492
elog(NOTICE, " search_box contains histogram, returning 1");
1497
geow = geomstats->xmax-geomstats->xmin;
1498
geoh = geomstats->ymax-geomstats->ymin;
1500
histocols = geomstats->cols;
1501
historows = geomstats->rows;
1503
#if DEBUG_GEOMETRY_STATS
1504
elog(NOTICE, " histogram has %d cols, %d rows", histocols, historows);
1505
elog(NOTICE, " histogram geosize is %fx%f", geow, geoh);
1508
cell_area = (geow*geoh) / (histocols*historows);
1509
box_area = (box->xmax-box->xmin)*(box->ymax-box->ymin);
1512
/* Find first overlapping column */
1513
x_idx_min = (box->xmin-geomstats->xmin) / geow * histocols;
1514
if (x_idx_min < 0) {
1515
#if DEBUG_GEOMETRY_STATS
1516
elog(NOTICE, " search_box overlaps %d columns on the left of histogram grid", -x_idx_min);
1518
/* should increment the value somehow */
1521
if (x_idx_min >= histocols)
1523
#if DEBUG_GEOMETRY_STATS
1524
elog(NOTICE, " search_box overlaps %d columns on the right of histogram grid", x_idx_min-histocols+1);
1526
/* should increment the value somehow */
1527
x_idx_min = histocols-1;
1530
/* Find first overlapping row */
1531
y_idx_min = (box->ymin-geomstats->ymin) / geoh * historows;
1534
#if DEBUG_GEOMETRY_STATS
1535
elog(NOTICE, " search_box overlaps %d columns on the bottom of histogram grid", -y_idx_min);
1537
/* should increment the value somehow */
1540
if (y_idx_min >= historows)
1542
#if DEBUG_GEOMETRY_STATS
1543
elog(NOTICE, " search_box overlaps %d columns on the top of histogram grid", y_idx_min-historows+1);
1545
/* should increment the value somehow */
1546
y_idx_min = historows-1;
1549
/* Find last overlapping column */
1550
x_idx_max = (box->xmax-geomstats->xmin) / geow * histocols;
1553
/* should increment the value somehow */
1556
if (x_idx_max >= histocols )
1558
/* should increment the value somehow */
1559
x_idx_max = histocols-1;
1562
/* Find last overlapping row */
1563
y_idx_max = (box->ymax-geomstats->ymin) / geoh * historows;
1566
/* should increment the value somehow */
1569
if (y_idx_max >= historows)
1571
/* should increment the value somehow */
1572
y_idx_max = historows-1;
1576
* the {x,y}_idx_{min,max}
1577
* define the grid squares that the box intersects
1579
for (y=y_idx_min; y<=y_idx_max; y++)
1581
for (x=x_idx_min; x<=x_idx_max; x++)
1586
val = geomstats->value[x+y*histocols];
1589
* Of the cell value we get
1590
* only the overlap fraction.
1593
intersect_x = LW_MIN(box->xmax, geomstats->xmin + (x+1) * geow / histocols) - LW_MAX(box->xmin, geomstats->xmin + x * geow / histocols );
1594
intersect_y = LW_MIN(box->ymax, geomstats->ymin + (y+1) * geoh / historows) - LW_MAX(box->ymin, geomstats->ymin+ y * geoh / historows) ;
1596
AOI = intersect_x*intersect_y;
1597
gain = AOI/cell_area;
1599
#if DEBUG_GEOMETRY_STATS > 1
1600
elog(NOTICE, " [%d,%d] cell val %.15f",
1602
elog(NOTICE, " [%d,%d] AOI %.15f",
1604
elog(NOTICE, " [%d,%d] gain %.15f",
1610
#if DEBUG_GEOMETRY_STATS > 1
1611
elog(NOTICE, " [%d,%d] adding %.15f to value",
1620
* If the search_box is a point, it will
1621
* overlap a single cell and thus get
1622
* it's value, which is the fraction of
1623
* samples (we can presume of row set also)
1624
* which bumped to that cell.
1626
* If the table features are points, each
1627
* of them will overlap a single histogram cell.
1628
* Our search_box value would then be correctly
1629
* computed as the sum of the bumped cells values.
1631
* If both our search_box AND the sample features
1632
* overlap more then a single histogram cell we
1633
* need to consider the fact that our sum computation
1634
* will have many duplicated included. E.g. each
1635
* single sample feature would have contributed to
1636
* raise the search_box value by as many times as
1637
* many cells in the histogram are commonly overlapped
1638
* by both searc_box and feature. We should then
1639
* divide our value by the number of cells in the virtual
1640
* 'intersection' between average feature cell occupation
1641
* and occupation of the search_box. This is as
1642
* fuzzy as you understand it :)
1644
* Consistency check: whenever the number of cells is
1645
* one of whichever part (search_box_occupation,
1646
* avg_feature_occupation) the 'intersection' must be 1.
1647
* If sounds that our 'intersaction' is actually the
1648
* minimun number between search_box_occupation and
1649
* avg_feat_occupation.
1652
overlapping_cells = (x_idx_max-x_idx_min+1) *
1653
(y_idx_max-y_idx_min+1);
1654
avg_feat_cells = geomstats->avgFeatureCells;
1656
#if DEBUG_GEOMETRY_STATS
1657
elog(NOTICE, " search_box overlaps %f cells", overlapping_cells);
1658
elog(NOTICE, " avg feat overlaps %f cells", avg_feat_cells);
1661
if ( ! overlapping_cells )
1663
#if DEBUG_GEOMETRY_STATS
1664
elog(NOTICE, " no overlapping cells, returning 0.0");
1669
gain = 1/LW_MIN(overlapping_cells, avg_feat_cells);
1670
selectivity = value*gain;
1672
#if DEBUG_GEOMETRY_STATS
1673
elog(NOTICE, " SUM(ov_histo_cells)=%f", value);
1674
elog(NOTICE, " gain=%f", gain);
1675
elog(NOTICE, " selectivity=%f", selectivity);
1678
/* prevent rounding overflows */
1679
if (selectivity > 1.0) selectivity = 1.0;
1680
else if (selectivity < 0) selectivity = 0.0;
1686
* This function should return an estimation of the number of
1687
* rows returned by a query involving an overlap check
1688
* ( it's the restrict function for the && operator )
1690
* It can make use (if available) of the statistics collected
1691
* by the geometry analyzer function.
1693
* Note that the good work is done by estimate_selectivity() above.
1694
* This function just tries to find the search_box, loads the statistics
1695
* and invoke the work-horse.
1697
* This is the one used for PG version >= 7.5
1700
PG_FUNCTION_INFO_V1(LWGEOM_gist_sel);
1701
Datum LWGEOM_gist_sel(PG_FUNCTION_ARGS)
1703
#if USE_VERSION < 81
1704
Query *root = (Query *) PG_GETARG_POINTER(0);
1706
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1708
/* Oid operator = PG_GETARG_OID(1); */
1709
List *args = (List *) PG_GETARG_POINTER(2);
1710
/* int varRelid = PG_GETARG_INT32(3); */
1712
HeapTuple stats_tuple;
1713
GEOM_STATS *geomstats;
1715
* This is to avoid casting the corresponding
1716
* "type-punned" pointer, which would break
1717
* "strict-aliasing rules".
1719
GEOM_STATS **gsptr=&geomstats;
1720
int geomstats_nvalues=0;
1724
BOX2DFLOAT4 search_box;
1725
float8 selectivity=0;
1727
#if DEBUG_GEOMETRY_STATS
1728
elog(NOTICE, "LWGEOM_gist_sel called");
1731
/* Fail if not a binary opclause (probably shouldn't happen) */
1732
if (list_length(args) != 2)
1734
#if DEBUG_GEOMETRY_STATS
1735
elog(NOTICE, "LWGEOM_gist_sel: not a binary opclause");
1737
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1742
* Find the constant part
1744
other = (Node *) linitial(args);
1745
if ( ! IsA(other, Const) )
1747
self = (Var *)other;
1748
other = (Node *) lsecond(args);
1752
self = (Var *) lsecond(args);
1755
if ( ! IsA(other, Const) )
1757
#if DEBUG_GEOMETRY_STATS
1758
elog(NOTICE, " no constant arguments - returning default selectivity");
1760
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1764
* We are working on two constants..
1765
* TODO: check if expression is true,
1766
* returned set would be either
1767
* the whole or none.
1769
if ( ! IsA(self, Var) )
1771
#if DEBUG_GEOMETRY_STATS
1772
elog(NOTICE, " no variable argument ? - returning default selectivity");
1774
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1778
* Convert the constant to a BOX
1781
in = (uchar *)PG_DETOAST_DATUM( ((Const*)other)->constvalue );
1782
if ( ! getbox2d_p(in+4, &search_box) )
1784
#if DEBUG_GEOMETRY_STATS
1785
elog(NOTICE, "search box is EMPTY");
1787
PG_RETURN_FLOAT8(0.0);
1790
#if DEBUG_GEOMETRY_STATS > 1
1791
elog(NOTICE," requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);
1795
* Get pg_statistic row
1798
#if USE_VERSION < 81
1799
/* relid = getrelid(varRelid, root->rtable); */
1800
relid = getrelid(self->varno, root->rtable);
1802
relid = getrelid(self->varno, root->parse->rtable);
1805
stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
1806
if ( ! stats_tuple )
1808
#if DEBUG_GEOMETRY_STATS
1809
elog(NOTICE, " No statistics, returning default estimate");
1811
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1815
if ( ! get_attstatsslot(stats_tuple, 0, 0,
1816
STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
1817
(float4 **)gsptr, &geomstats_nvalues) )
1819
#if DEBUG_GEOMETRY_STATS
1820
elog(NOTICE, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity");
1822
ReleaseSysCache(stats_tuple);
1823
PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
1827
#if DEBUG_GEOMETRY_STATS > 1
1828
elog(NOTICE, " %d read from stats", geomstats_nvalues);
1831
#if DEBUG_GEOMETRY_STATS > 1
1832
elog(NOTICE, " histo: xmin,ymin: %f,%f",
1833
geomstats->xmin, geomstats->ymin);
1834
elog(NOTICE, " histo: xmax,ymax: %f,%f",
1835
geomstats->xmax, geomstats->ymax);
1836
elog(NOTICE, " histo: cols: %f", geomstats->rows);
1837
elog(NOTICE, " histo: rows: %f", geomstats->cols);
1838
elog(NOTICE, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
1839
elog(NOTICE, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
1845
selectivity = estimate_selectivity(&search_box, geomstats);
1848
#if DEBUG_GEOMETRY_STATS
1849
elog(NOTICE, " returning computed value: %f", selectivity);
1852
free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues);
1853
ReleaseSysCache(stats_tuple);
1854
PG_RETURN_FLOAT8(selectivity);
1860
* This function is called by the analyze function iff
1861
* the geometry_analyze() function give it its pointer
1862
* (this is always the case so far).
1863
* The geometry_analyze() function is also responsible
1864
* of deciding the number of "sample" rows we will receive
1865
* here. It is able to give use other 'custom' data, but we
1866
* won't use them so far.
1868
* Our job is to build some statistics on the sample data
1869
* for use by operator estimators.
1871
* Currently we only need statistics to estimate the number of rows
1872
* overlapping a given extent (estimation function bound
1873
* to the && operator).
1877
compute_geometry_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
1878
int samplerows, double totalrows)
1880
MemoryContext old_context;
1882
int geom_stats_size;
1883
BOX2DFLOAT4 **sampleboxes;
1884
GEOM_STATS *geomstats;
1886
int null_cnt=0, notnull_cnt=0, examinedsamples=0;
1887
BOX2DFLOAT4 *sample_extent=NULL;
1888
double total_width=0;
1889
double total_boxes_area=0;
1890
int total_boxes_cells=0;
1894
#if USE_STANDARD_DEVIATION
1895
/* for standard deviation */
1896
double avgLOWx, avgLOWy, avgHIGx, avgHIGy;
1897
double sumLOWx=0, sumLOWy=0, sumHIGx=0, sumHIGy=0;
1898
double sdLOWx=0, sdLOWy=0, sdHIGx=0, sdHIGy=0;
1899
BOX2DFLOAT4 *newhistobox=NULL;
1901
double geow, geoh; /* width and height of histogram */
1903
int cols, rows; /* histogram grid size */
1904
BOX2DFLOAT4 histobox;
1907
* This is where geometry_analyze
1908
* should put its' custom parameters.
1910
/* void *mystats = stats->extra_data; */
1913
* We'll build an histogram having from 40 to 400 boxesPerSide
1914
* Total number of cells is determined by attribute stat
1915
* target. It can go from 1600 to 160000 (stat target: 10,1000)
1917
histocells = 160*stats->attr->attstattarget;
1920
#if DEBUG_GEOMETRY_STATS
1921
elog(NOTICE, "compute_geometry_stats called");
1922
elog(NOTICE, " samplerows: %d", samplerows);
1923
elog(NOTICE, " histogram cells: %d", histocells);
1927
* We might need less space, but don't think
1928
* its worth saving...
1930
sampleboxes = palloc(sizeof(BOX2DFLOAT4 *)*samplerows);
1934
* o find extent of the sample rows
1935
* o count null-infinite/not-null values
1936
* o compute total_width
1937
* o compute total features's box area (for avgFeatureArea)
1938
* o sum features box coordinates (for standard deviation)
1940
for (i=0; i<samplerows; i++)
1946
datum = fetchfunc(stats, i, &isnull);
1956
geom = (PG_LWGEOM *)PG_DETOAST_DATUM(datum);
1958
if ( ! getbox2d_p(SERIALIZED_FORM(geom), &box) )
1960
/* Skip empty geometry */
1961
#if DEBUG_GEOMETRY_STATS
1962
elog(NOTICE, " skipped empty geometry %d", i);
1968
* Skip infinite geoms
1970
if ( ! finite(box.xmin) ||
1971
! finite(box.xmax) ||
1972
! finite(box.ymin) ||
1973
! finite(box.ymax) )
1975
#if DEBUG_GEOMETRY_STATS
1976
elog(NOTICE, " skipped infinite geometry %d", i);
1982
* Cache bounding box
1983
* TODO: reduce BOX2DFLOAT4 copies
1985
sampleboxes[notnull_cnt] = palloc(sizeof(BOX2DFLOAT4));
1986
memcpy(sampleboxes[notnull_cnt], &box, sizeof(BOX2DFLOAT4));
1989
* Add to sample extent union
1991
if ( ! sample_extent )
1993
sample_extent = palloc(sizeof(BOX2DFLOAT4));
1994
memcpy(sample_extent, &box, sizeof(BOX2DFLOAT4));
1998
sample_extent->xmax = LWGEOM_Maxf(sample_extent->xmax,
2000
sample_extent->ymax = LWGEOM_Maxf(sample_extent->ymax,
2002
sample_extent->xmin = LWGEOM_Minf(sample_extent->xmin,
2004
sample_extent->ymin = LWGEOM_Minf(sample_extent->ymin,
2008
/* TODO: ask if we need geom or bvol size for stawidth */
2009
total_width += geom->size;
2010
total_boxes_area += (box.xmax-box.xmin)*(box.ymax-box.ymin);
2012
#if USE_STANDARD_DEVIATION
2014
* Add bvol coordinates to sum for standard deviation
2017
sumLOWx += box.xmin;
2018
sumLOWy += box.ymin;
2019
sumHIGx += box.xmax;
2020
sumHIGy += box.ymax;
2025
/* give backend a chance of interrupting us */
2026
vacuum_delay_point();
2030
if ( ! notnull_cnt ) {
2031
elog(NOTICE, " no notnull values, invalid stats");
2032
stats->stats_valid = false;
2036
#if USE_STANDARD_DEVIATION
2038
#if DEBUG_GEOMETRY_STATS
2039
elog(NOTICE, " sample_extent: xmin,ymin: %f,%f",
2040
sample_extent->xmin, sample_extent->ymin);
2041
elog(NOTICE, " sample_extent: xmax,ymax: %f,%f",
2042
sample_extent->xmax, sample_extent->ymax);
2047
* o compute standard deviation
2049
avgLOWx = sumLOWx/notnull_cnt;
2050
avgLOWy = sumLOWy/notnull_cnt;
2051
avgHIGx = sumHIGx/notnull_cnt;
2052
avgHIGy = sumHIGy/notnull_cnt;
2053
for (i=0; i<notnull_cnt; i++)
2056
box = (BOX2DFLOAT4 *)sampleboxes[i];
2058
sdLOWx += (box->xmin - avgLOWx) * (box->xmin - avgLOWx);
2059
sdLOWy += (box->ymin - avgLOWy) * (box->ymin - avgLOWy);
2060
sdHIGx += (box->xmax - avgHIGx) * (box->xmax - avgHIGx);
2061
sdHIGy += (box->ymax - avgHIGy) * (box->ymax - avgHIGy);
2063
sdLOWx = sqrt(sdLOWx/notnull_cnt);
2064
sdLOWy = sqrt(sdLOWy/notnull_cnt);
2065
sdHIGx = sqrt(sdHIGx/notnull_cnt);
2066
sdHIGy = sqrt(sdHIGy/notnull_cnt);
2068
#if DEBUG_GEOMETRY_STATS
2069
elog(NOTICE, " standard deviations:");
2070
elog(NOTICE, " LOWx - avg:%f sd:%f", avgLOWx, sdLOWx);
2071
elog(NOTICE, " LOWy - avg:%f sd:%f", avgLOWy, sdLOWy);
2072
elog(NOTICE, " HIGx - avg:%f sd:%f", avgHIGx, sdHIGx);
2073
elog(NOTICE, " HIGy - avg:%f sd:%f", avgHIGy, sdHIGy);
2076
histobox.xmin = LW_MAX((avgLOWx - SDFACTOR * sdLOWx),
2077
sample_extent->xmin);
2078
histobox.ymin = LW_MAX((avgLOWy - SDFACTOR * sdLOWy),
2079
sample_extent->ymin);
2080
histobox.xmax = LW_MIN((avgHIGx + SDFACTOR * sdHIGx),
2081
sample_extent->xmax);
2082
histobox.ymax = LW_MIN((avgHIGy + SDFACTOR * sdHIGy),
2083
sample_extent->ymax);
2085
#if DEBUG_GEOMETRY_STATS
2086
elog(NOTICE, " sd_extent: xmin,ymin: %f,%f",
2087
histobox.xmin, histobox.ymin);
2088
elog(NOTICE, " sd_extent: xmax,ymax: %f,%f",
2089
histobox.xmin, histobox.ymax);
2094
* o skip hard deviants
2095
* o compute new histogram box
2097
for (i=0; i<notnull_cnt; i++)
2100
box = (BOX2DFLOAT4 *)sampleboxes[i];
2102
if ( box->xmin > histobox.xmax ||
2103
box->xmax < histobox.xmin ||
2104
box->ymin > histobox.ymax ||
2105
box->ymax < histobox.ymin )
2107
#if DEBUG_GEOMETRY_STATS > 1
2108
elog(NOTICE, " feat %d is an hard deviant, skipped", i);
2110
sampleboxes[i] = NULL;
2113
if ( ! newhistobox ) {
2114
newhistobox = palloc(sizeof(BOX2DFLOAT4));
2115
memcpy(newhistobox, box, sizeof(BOX2DFLOAT4));
2117
if ( box->xmin < newhistobox->xmin )
2118
newhistobox->xmin = box->xmin;
2119
if ( box->ymin < newhistobox->ymin )
2120
newhistobox->ymin = box->ymin;
2121
if ( box->xmax > newhistobox->xmax )
2122
newhistobox->xmax = box->xmax;
2123
if ( box->ymax > newhistobox->ymax )
2124
newhistobox->ymax = box->ymax;
2129
* Set histogram extent as the intersection between
2130
* standard deviation based histogram extent
2131
* and computed sample extent after removal of
2132
* hard deviants (there might be no hard deviants).
2134
if ( histobox.xmin < newhistobox->xmin )
2135
histobox.xmin = newhistobox->xmin;
2136
if ( histobox.ymin < newhistobox->ymin )
2137
histobox.ymin = newhistobox->ymin;
2138
if ( histobox.xmax > newhistobox->xmax )
2139
histobox.xmax = newhistobox->xmax;
2140
if ( histobox.ymax > newhistobox->ymax )
2141
histobox.ymax = newhistobox->ymax;
2144
#else /* ! USE_STANDARD_DEVIATION */
2147
* Set histogram extent box
2149
histobox.xmin = sample_extent->xmin;
2150
histobox.ymin = sample_extent->ymin;
2151
histobox.xmax = sample_extent->xmax;
2152
histobox.ymax = sample_extent->ymax;
2153
#endif /* USE_STANDARD_DEVIATION */
2156
#if DEBUG_GEOMETRY_STATS
2157
elog(NOTICE, " histogram_extent: xmin,ymin: %f,%f",
2158
histobox.xmin, histobox.ymin);
2159
elog(NOTICE, " histogram_extent: xmax,ymax: %f,%f",
2160
histobox.xmax, histobox.ymax);
2164
geow = histobox.xmax - histobox.xmin;
2165
geoh = histobox.ymax - histobox.ymin;
2168
* Compute histogram cols and rows based on aspect ratio
2169
* of histogram extent
2171
if ( ! geow && ! geoh ) {
2175
} else if ( ! geow ) {
2178
} else if ( ! geoh ) {
2183
cols = ceil(sqrt((double)histocells*(geow/geoh)));
2184
rows = ceil((double)histocells/cols);
2186
rows = ceil(sqrt((double)histocells*(geoh/geow)));
2187
cols = ceil((double)histocells/rows);
2189
histocells = cols*rows;
2191
#if DEBUG_GEOMETRY_STATS
2192
elog(NOTICE, " computed histogram grid size (CxR): %dx%d (%d cells)", cols, rows, histocells);
2197
* Create the histogram (GEOM_STATS)
2199
old_context = MemoryContextSwitchTo(stats->anl_context);
2200
geom_stats_size=sizeof(GEOM_STATS)+(histocells-1)*sizeof(float4);
2201
geomstats = palloc(geom_stats_size);
2202
MemoryContextSwitchTo(old_context);
2204
geomstats->avgFeatureArea = total_boxes_area/notnull_cnt;
2205
geomstats->xmin = histobox.xmin;
2206
geomstats->ymin = histobox.ymin;
2207
geomstats->xmax = histobox.xmax;
2208
geomstats->ymax = histobox.ymax;
2209
geomstats->cols = cols;
2210
geomstats->rows = rows;
2212
/* Initialize all values to 0 */
2213
for (i=0;i<histocells; i++) geomstats->value[i] = 0;
2215
cell_width = geow/cols;
2216
cell_height = geoh/rows;
2217
cell_area = cell_width*cell_height;
2219
#if DEBUG_GEOMETRY_STATS > 2
2220
elog(NOTICE, "cell_width: %f", cell_width);
2221
elog(NOTICE, "cell_height: %f", cell_height);
2227
* o fill histogram values with the number of
2228
* features' bbox overlaps: a feature's bvol
2229
* can fully overlap (1) or partially overlap
2230
* (fraction of 1) an histogram cell.
2232
* o compute total cells occupation
2235
for (i=0; i<notnull_cnt; i++)
2238
int x_idx_min, x_idx_max, x;
2239
int y_idx_min, y_idx_max, y;
2242
box = (BOX2DFLOAT4 *)sampleboxes[i];
2243
if ( ! box ) continue; /* hard deviant.. */
2245
/* give backend a chance of interrupting us */
2246
vacuum_delay_point();
2248
#if DEBUG_GEOMETRY_STATS > 2
2249
elog(NOTICE, " feat %d box is %f %f, %f %f",
2250
i, box->xmax, box->ymax,
2251
box->xmin, box->ymin);
2254
/* Find first overlapping column */
2255
x_idx_min = (box->xmin-geomstats->xmin) / geow * cols;
2256
if (x_idx_min <0) x_idx_min = 0;
2257
if (x_idx_min >= cols) x_idx_min = cols-1;
2259
/* Find first overlapping row */
2260
y_idx_min = (box->ymin-geomstats->ymin) / geoh * rows;
2261
if (y_idx_min <0) y_idx_min = 0;
2262
if (y_idx_min >= rows) y_idx_min = rows-1;
2264
/* Find last overlapping column */
2265
x_idx_max = (box->xmax-geomstats->xmin) / geow * cols;
2266
if (x_idx_max <0) x_idx_max = 0;
2267
if (x_idx_max >= cols ) x_idx_max = cols-1;
2269
/* Find last overlapping row */
2270
y_idx_max = (box->ymax-geomstats->ymin) / geoh * rows;
2271
if (y_idx_max <0) y_idx_max = 0;
2272
if (y_idx_max >= rows) y_idx_max = rows-1;
2273
#if DEBUG_GEOMETRY_STATS > 2
2274
elog(NOTICE, " feat %d overlaps columns %d-%d, rows %d-%d",
2275
i, x_idx_min, x_idx_max, y_idx_min, y_idx_max);
2279
* the {x,y}_idx_{min,max}
2280
* define the grid squares that the box intersects
2282
for (y=y_idx_min; y<=y_idx_max; y++)
2284
for (x=x_idx_min; x<=x_idx_max; x++)
2286
geomstats->value[x+y*cols] += 1;
2292
* before adding to the total cells
2293
* we could decide if we really
2294
* want this feature to count
2296
total_boxes_cells += numcells;
2300
#if DEBUG_GEOMETRY_STATS
2301
elog(NOTICE, " examined_samples: %d/%d", examinedsamples, samplerows);
2304
if ( ! examinedsamples ) {
2305
elog(NOTICE, " no examined values, invalid stats");
2306
stats->stats_valid = false;
2307
#if DEBUG_GEOMETRY_STATS
2308
elog(NOTICE, " no stats have been gathered");
2313
/* what about null features (TODO) ? */
2314
geomstats->avgFeatureCells = (float4)total_boxes_cells/examinedsamples;
2316
#if DEBUG_GEOMETRY_STATS
2317
elog(NOTICE, " histo: total_boxes_cells: %d", total_boxes_cells);
2318
elog(NOTICE, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
2319
elog(NOTICE, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);
2324
* Normalize histogram
2326
* We divide each histogram cell value
2327
* by the number of samples examined.
2330
for (i=0; i<histocells; i++)
2331
geomstats->value[i] /= examinedsamples;
2333
#if DEBUG_GEOMETRY_STATS > 1
2336
for (x=0; x<cols; x++)
2338
for (y=0; y<rows; y++)
2340
elog(NOTICE, " histo[%d,%d] = %.15f", x, y, geomstats->value[x+y*cols]);
2347
* Write the statistics data
2349
stats->stakind[0] = STATISTIC_KIND_GEOMETRY;
2350
stats->staop[0] = InvalidOid;
2351
stats->stanumbers[0] = (float4 *)geomstats;
2352
stats->numnumbers[0] = geom_stats_size/sizeof(float4);
2354
stats->stanullfrac = (float4)null_cnt/samplerows;
2355
stats->stawidth = total_width/notnull_cnt;
2356
stats->stadistinct = -1.0;
2358
#if DEBUG_GEOMETRY_STATS
2359
elog(NOTICE, " out: slot 0: kind %d (STATISTIC_KIND_GEOMETRY)",
2361
elog(NOTICE, " out: slot 0: op %d (InvalidOid)", stats->staop[0]);
2362
elog(NOTICE, " out: slot 0: numnumbers %d", stats->numnumbers[0]);
2363
elog(NOTICE, " out: null fraction: %d/%d=%g", null_cnt, samplerows, stats->stanullfrac);
2364
elog(NOTICE, " out: average width: %d bytes", stats->stawidth);
2365
elog(NOTICE, " out: distinct values: all (no check done)");
2368
stats->stats_valid = true;
2372
* This function will be called when the ANALYZE command is run
2373
* on a column of the "geometry" type.
2375
* It will need to return a stats builder function reference
2376
* and a "minimum" sample rows to feed it.
2377
* If we want analisys to be completely skipped we can return
2378
* FALSE and leave output vals untouched.
2380
* What we know from this call is:
2382
* o The pg_attribute row referring to the specific column.
2383
* Could be used to get reltuples from pg_class (which
2384
* might quite inexact though...) and use them to set an
2385
* appropriate minimum number of sample rows to feed to
2386
* the stats builder. The stats builder will also receive
2387
* a more accurate "estimation" of the number or rows.
2389
* o The pg_type row for the specific column.
2390
* Could be used to set stat builder / sample rows
2391
* based on domain type (when postgis will be implemented
2394
* Being this experimental we'll stick to a static stat_builder/sample_rows
2398
PG_FUNCTION_INFO_V1(LWGEOM_analyze);
2399
Datum LWGEOM_analyze(PG_FUNCTION_ARGS)
2401
VacAttrStats *stats = (VacAttrStats *)PG_GETARG_POINTER(0);
2402
Form_pg_attribute attr = stats->attr;
2404
#if DEBUG_GEOMETRY_STATS
2405
elog(NOTICE, "lwgeom_analyze called");
2408
/* If the attstattarget column is negative, use the default value */
2409
/* NB: it is okay to scribble on stats->attr since it's a copy */
2410
if (attr->attstattarget < 0)
2411
attr->attstattarget = default_statistics_target;
2413
#if DEBUG_GEOMETRY_STATS
2414
elog(NOTICE, " attribute stat target: %d", attr->attstattarget);
2418
* There might be a reason not to analyze this column
2419
* (can we detect the absence of an index?)
2422
elog(NOTICE, "compute_geometry_stats not implemented yet");
2423
PG_RETURN_BOOL(false);
2426
/* Setup the minimum rows and the algorithm function */
2427
stats->minrows = 300 * stats->attr->attstattarget;
2428
stats->compute_stats = compute_geometry_stats;
2430
#if DEBUG_GEOMETRY_STATS
2431
elog(NOTICE, " minrows: %d", stats->minrows);
2434
/* Indicate we are done successfully */
2435
PG_RETURN_BOOL(true);
2439
* Return the estimated extent of the table
2440
* looking at gathered statistics (or NULL if
2441
* no statistics have been gathered).
2443
PG_FUNCTION_INFO_V1(LWGEOM_estimated_extent);
2444
Datum LWGEOM_estimated_extent(PG_FUNCTION_ARGS)
2453
ArrayType *array = NULL;
2455
SPITupleTable *tuptable;
2462
if ( PG_NARGS() == 3 )
2464
txnsp = PG_GETARG_TEXT_P(0);
2465
txtbl = PG_GETARG_TEXT_P(1);
2466
txcol = PG_GETARG_TEXT_P(2);
2468
else if ( PG_NARGS() == 2 )
2470
txtbl = PG_GETARG_TEXT_P(0);
2471
txcol = PG_GETARG_TEXT_P(1);
2475
elog(ERROR, "estimated_extent() called with wrong number of arguments");
2479
#if DEBUG_GEOMETRY_STATS
2480
elog(NOTICE, "LWGEOM_estimated_extent called");
2483
/* Connect to SPI manager */
2484
SPIcode = SPI_connect();
2485
if (SPIcode != SPI_OK_CONNECT)
2487
elog(ERROR, "LWGEOM_estimated_extent: couldnt open a connection to SPI");
2491
querysize = VARSIZE(txtbl)+VARSIZE(txcol)+516;
2494
nsp = palloc(VARSIZE(txnsp)+1);
2495
memcpy(nsp, VARDATA(txnsp), VARSIZE(txnsp)-VARHDRSZ);
2496
nsp[VARSIZE(txnsp)-VARHDRSZ]='\0';
2497
querysize += VARSIZE(txnsp);
2499
querysize += 32; /* current_schema() */
2502
tbl = palloc(VARSIZE(txtbl)+1);
2503
memcpy(tbl, VARDATA(txtbl), VARSIZE(txtbl)-VARHDRSZ);
2504
tbl[VARSIZE(txtbl)-VARHDRSZ]='\0';
2506
col = palloc(VARSIZE(txcol)+1);
2507
memcpy(col, VARDATA(txcol), VARSIZE(txcol)-VARHDRSZ);
2508
col[VARSIZE(txcol)-VARHDRSZ]='\0';
2510
#if DEBUG_GEOMETRY_STATS
2512
elog(NOTICE, " schema:%s table:%s column:%s", nsp, tbl, col);
2514
elog(NOTICE, " schema:current_schema() table:%s column:%s",
2519
query = palloc(querysize);
2522
/* Security check: because we access information in the pg_statistic table, we must run as the database
2523
superuser (by marking the function as SECURITY DEFINER) and check permissions ourselves */
2526
sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '%s.%s', 'select')", nsp, tbl);
2530
sprintf(query, "SELECT has_table_privilege((SELECT usesysid FROM pg_user WHERE usename = session_user), '%s', 'select')", tbl);
2533
#if DEBUG_GEOMETRY_STATS > 1
2534
elog(NOTICE, "permission check sql query is: %s", query);
2537
SPIcode = SPI_exec(query, 1);
2538
if (SPIcode != SPI_OK_SELECT)
2541
elog(ERROR, "LWGEOM_estimated_extent: couldn't execute permission check sql via SPI");
2545
tuptable = SPI_tuptable;
2546
tupdesc = SPI_tuptable->tupdesc;
2547
tuple = tuptable->vals[0];
2549
if (!DatumGetBool(SPI_getbinval(tuple, tupdesc, 1, &isnull)))
2552
elog(ERROR, "LWGEOM_estimated_extent: permission denied for relation %s", tbl);
2557
/* Return the stats data */
2560
sprintf(query, "SELECT s.stanumbers1[5:8] FROM pg_statistic s, pg_class c, pg_attribute a, pg_namespace n WHERE c.relname = '%s' AND a.attrelid = c.oid AND a.attname = '%s' AND n.nspname = '%s' AND c.relnamespace = n.oid AND s.starelid=c.oid AND s.staattnum = a.attnum AND staattnum = attnum", tbl, col, nsp);
2564
sprintf(query, "SELECT s.stanumbers1[5:8] FROM pg_statistic s, pg_class c, pg_attribute a, pg_namespace n WHERE c.relname = '%s' AND a.attrelid = c.oid AND a.attname = '%s' AND n.nspname = current_schema() AND c.relnamespace = n.oid AND s.starelid=c.oid AND s.staattnum = a.attnum AND staattnum = attnum", tbl, col);
2567
#if DEBUG_GEOMETRY_STATS > 1
2568
elog(NOTICE, " query: %s", query);
2571
SPIcode = SPI_exec(query, 1);
2572
if (SPIcode != SPI_OK_SELECT )
2575
elog(ERROR,"LWGEOM_estimated_extent: couldnt execute sql via SPI");
2578
if (SPI_processed != 1)
2581
#if DEBUG_GEOMETRY_STATS
2582
elog(NOTICE, " %d stat rows", SPI_processed);
2584
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate table within current schema");
2588
tuptable = SPI_tuptable;
2589
tupdesc = SPI_tuptable->tupdesc;
2590
tuple = tuptable->vals[0];
2591
array = DatumGetArrayTypeP(SPI_getbinval(tuple, tupdesc, 1, &isnull));
2595
#if DEBUG_GEOMETRY_STATS
2596
elog(NOTICE, " stats are NULL");
2598
elog(ERROR, "LWGEOM_estimated_extent: couldn't locate statistics for table");
2601
if ( ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)) != 4 )
2603
elog(ERROR, " corrupted histogram");
2607
#if DEBUG_GEOMETRY_STATS
2608
elog(NOTICE, " stats array has %d elems", ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array)));
2612
* Construct box2dfloat4.
2613
* Must allocate this in upper executor context
2614
* to keep it alive after SPI_finish().
2616
box = SPI_palloc(sizeof(BOX2DFLOAT4));
2618
/* Construct the box */
2619
memcpy(box, ARR_DATA_PTR(array), sizeof(BOX2DFLOAT4));
2621
#if DEBUG_GEOMETRY_STATS
2622
elog(NOTICE, " histogram extent = %g %g, %g %g", box->xmin,
2623
box->ymin, box->xmax, box->ymax);
2626
SPIcode = SPI_finish();
2627
if (SPIcode != SPI_OK_FINISH )
2629
elog(ERROR, "LWGEOM_estimated_extent: couldnt disconnect from SPI");
2632
/* TODO: enlarge the box by some factor */
2634
PG_RETURN_POINTER(box);
2638
#endif /* USE_VERSION >= 80 */
2641
/**********************************************************************
2643
* Revision 1.39 2006/05/30 08:38:58 strk
2644
* Added some missing copyright headers.
2646
* Revision 1.38 2006/03/13 10:54:08 strk
2647
* Applied patch from Mark Cave Ayland embedding access control for
2648
* the estimated_extent functions.
2650
* Revision 1.37 2006/01/09 11:48:15 strk
2651
* Fixed "strict-aliasing rule" breaks.
2653
* Revision 1.36 2005/12/30 17:40:37 strk
2654
* Moved PG_LWGEOM WKB I/O and SRID get/set funx
2655
* from lwgeom_api.c to lwgeom_pg.c.
2656
* Made lwgeom_from_ewkb directly invoke grammar parser rather then invoke
2657
* the PG_LWGEOM-specific function.
2658
* Cleaned up signedness-related and comments-related warnings for the files
2659
* being committed (more to do on other files)
2661
* Revision 1.35 2005/10/10 16:19:16 strk
2662
* Fixed null values fraction computation in geometry analyzer as suggested by Michael Fuhr
2664
* Revision 1.34 2005/09/08 19:26:22 strk
2665
* Handled search_box outside of histogram_box case in selectivity estimator
2667
* Revision 1.33 2005/06/28 22:00:09 strk
2668
* Fixed extimators to work with postgresql 8.1.x
2670
* Revision 1.32 2005/04/22 01:07:09 strk
2671
* Fixed bug in join selectivity estimator returning invalid estimates (>1)
2673
* Revision 1.31 2005/04/18 14:12:43 strk
2674
* Slightly changed standard deviation computation to be more corner-case-friendly.
2676
* Revision 1.30 2005/04/18 10:57:13 strk
2677
* Applied patched by Ron Mayer fixing memory leakages and invalid results
2678
* in join selectivity estimator. Fixed some return to use default JOIN
2679
* selectivity estimate instead of default RESTRICT selectivity estimate.
2681
* Revision 1.29 2005/03/25 09:34:25 strk
2684
* Revision 1.28 2005/03/24 16:27:32 strk
2685
* Added comments in estimate_allocation() bugfix point.
2687
* Revision 1.27 2005/03/24 14:45:50 strk
2688
* Fixed bug in estimated_extent() returning pointer to a memory allocated in SPI memory context
2690
* Revision 1.26 2005/03/08 09:27:23 strk
2691
* RESTRICT selectivity estimator use self->varno instead of varRelid.
2692
* Seems to work for subqueries...
2694
* Revision 1.25 2005/03/08 09:23:34 strk
2695
* Fixed debugging lines.
2697
* Revision 1.24 2005/02/21 16:22:32 strk
2698
* Changed min() max() usage with LW_MIN() LW_MAX()
2700
* Revision 1.23 2005/02/10 10:52:53 strk
2701
* Changed 'char' to 'uchar' (unsigned char typedef) wherever octet is actually
2704
* Revision 1.22 2005/01/13 18:26:49 strk
2705
* estimated_extent() implemented for PG<80
2707
* Revision 1.21 2005/01/13 17:41:40 strk
2708
* estimated_extent() prepared for future expansion (support of pre-800 PGSQL)
2710
* Revision 1.20 2005/01/07 09:52:12 strk
2711
* JOINSEL disabled for builds against pgsql<80
2713
* Revision 1.19 2004/12/22 17:12:34 strk
2714
* Added Mark Cave-Ayland implementation of JOIN selectivity estimator.
2716
* Revision 1.18 2004/12/21 12:21:45 mcayland
2717
* Fixed bug in pass 4 where sample boxes were referred as BOXs and not BOX2DFLOAT4. Also increased SDFACTOR to 3.25
2719
* Revision 1.17 2004/12/17 18:00:33 strk
2720
* LWGEOM_gist_joinsel defined for all PG versions
2722
* Revision 1.16 2004/12/17 11:07:48 strk
2723
* Added missing prototype
2725
* Revision 1.15 2004/12/13 14:03:07 strk
2726
* Initial skeleton on join selectivity estimator.
2727
* Current estimators application for box2d && box2d operator.
2729
* Revision 1.14 2004/12/13 12:25:27 strk
2730
* Removed obsoleted function and fixed some warnings.
2732
* Revision 1.13 2004/12/10 12:35:11 strk
2733
* implemented estimated_extent() function
2735
* Revision 1.12 2004/11/04 11:40:08 strk
2736
* Renamed max/min/avg macros to LW_MAX, LW_MIN, LW_AVG.
2738
* Revision 1.11 2004/10/27 11:02:24 strk
2739
* Removed another getbox2d() call.
2741
* Revision 1.10 2004/10/25 17:07:09 strk
2742
* Obsoleted getbox2d(). Use getbox2d_p() or getbox2d_internal() instead.
2744
* Revision 1.9 2004/10/08 13:20:54 strk
2746
* Changed LWGEOM structure to point to an actual BOX2DFLOAT4.
2747
* Renamed most function to reflect a TYPE_method naming convention.
2748
* (you'll need a dump/reload for it to work)
2749
* Added more manipulation functions.
2751
**********************************************************************/