1
/*-------------------------------------------------------------------------
4
* The query optimizer external interface.
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177.4.1 2005-01-28 19:36:02 tgl Exp $
13
*-------------------------------------------------------------------------
20
#include "catalog/pg_operator.h"
21
#include "catalog/pg_type.h"
22
#include "executor/executor.h"
23
#include "executor/nodeAgg.h"
24
#include "miscadmin.h"
25
#include "nodes/makefuncs.h"
26
#ifdef OPTIMIZER_DEBUG
27
#include "nodes/print.h"
29
#include "optimizer/clauses.h"
30
#include "optimizer/cost.h"
31
#include "optimizer/pathnode.h"
32
#include "optimizer/paths.h"
33
#include "optimizer/planmain.h"
34
#include "optimizer/planner.h"
35
#include "optimizer/prep.h"
36
#include "optimizer/subselect.h"
37
#include "optimizer/tlist.h"
38
#include "optimizer/var.h"
39
#include "parser/analyze.h"
40
#include "parser/parsetree.h"
41
#include "parser/parse_expr.h"
42
#include "parser/parse_oper.h"
43
#include "utils/selfuncs.h"
44
#include "utils/syscache.h"
47
ParamListInfo PlannerBoundParamList = NULL; /* current boundParams */
50
/* Expression kind codes for preprocess_expression */
51
#define EXPRKIND_QUAL 0
52
#define EXPRKIND_TARGET 1
53
#define EXPRKIND_RTFUNC 2
54
#define EXPRKIND_LIMIT 3
55
#define EXPRKIND_ININFO 4
58
static Node *preprocess_expression(Query *parse, Node *expr, int kind);
59
static void preprocess_qual_conditions(Query *parse, Node *jtnode);
60
static Plan *inheritance_planner(Query *parse, List *inheritlist);
61
static Plan *grouping_planner(Query *parse, double tuple_fraction);
62
static bool hash_safe_grouping(Query *parse);
63
static List *make_subplanTargetList(Query *parse, List *tlist,
64
AttrNumber **groupColIdx, bool *need_tlist_eval);
65
static void locate_grouping_columns(Query *parse,
68
AttrNumber *groupColIdx);
69
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
72
/*****************************************************************************
74
* Query optimizer entry point
76
*****************************************************************************/
78
planner(Query *parse, bool isCursor, int cursorOptions,
79
ParamListInfo boundParams)
81
double tuple_fraction;
83
Index save_PlannerQueryLevel;
84
List *save_PlannerParamList;
85
ParamListInfo save_PlannerBoundParamList;
88
* The planner can be called recursively (an example is when
89
* eval_const_expressions tries to pre-evaluate an SQL function). So,
90
* these global state variables must be saved and restored.
92
* Query level and the param list cannot be moved into the Query
93
* structure since their whole purpose is communication across
94
* multiple sub-Queries. Also, boundParams is explicitly info from
95
* outside the Query, and so is likewise better handled as a global
98
* Note we do NOT save and restore PlannerPlanId: it exists to assign
99
* unique IDs to SubPlan nodes, and we want those IDs to be unique for
100
* the life of a backend. Also, PlannerInitPlan is saved/restored in
101
* subquery_planner, not here.
103
save_PlannerQueryLevel = PlannerQueryLevel;
104
save_PlannerParamList = PlannerParamList;
105
save_PlannerBoundParamList = PlannerBoundParamList;
107
/* Initialize state for handling outer-level references and params */
108
PlannerQueryLevel = 0; /* will be 1 in top-level subquery_planner */
109
PlannerParamList = NIL;
110
PlannerBoundParamList = boundParams;
112
/* Determine what fraction of the plan is likely to be scanned */
116
* We have no real idea how many tuples the user will ultimately
117
* FETCH from a cursor, but it seems a good bet that he doesn't
118
* want 'em all. Optimize for 10% retrieval (you gotta better
119
* number? Should this be a SETtable parameter?)
121
tuple_fraction = 0.10;
125
/* Default assumption is we need all the tuples */
126
tuple_fraction = 0.0;
129
/* primary planning entry point (may recurse for subqueries) */
130
result_plan = subquery_planner(parse, tuple_fraction);
132
Assert(PlannerQueryLevel == 0);
135
* If creating a plan for a scrollable cursor, make sure it can run
136
* backwards on demand. Add a Material node at the top at need.
138
if (isCursor && (cursorOptions & CURSOR_OPT_SCROLL))
140
if (!ExecSupportsBackwardScan(result_plan))
141
result_plan = materialize_finished_plan(result_plan);
144
/* executor wants to know total number of Params used overall */
145
result_plan->nParamExec = list_length(PlannerParamList);
147
/* final cleanup of the plan */
148
set_plan_references(result_plan, parse->rtable);
150
/* restore state for outer planner, if any */
151
PlannerQueryLevel = save_PlannerQueryLevel;
152
PlannerParamList = save_PlannerParamList;
153
PlannerBoundParamList = save_PlannerBoundParamList;
159
/*--------------------
161
* Invokes the planner on a subquery. We recurse to here for each
162
* sub-SELECT found in the query tree.
164
* parse is the querytree produced by the parser & rewriter.
165
* tuple_fraction is the fraction of tuples we expect will be retrieved.
166
* tuple_fraction is interpreted as explained for grouping_planner, below.
168
* Basically, this routine does the stuff that should only be done once
169
* per Query object. It then calls grouping_planner. At one time,
170
* grouping_planner could be invoked recursively on the same Query object;
171
* that's not currently true, but we keep the separation between the two
172
* routines anyway, in case we need it again someday.
174
* subquery_planner will be called recursively to handle sub-Query nodes
175
* found within the query's expressions and rangetable.
177
* Returns a query plan.
178
*--------------------
181
subquery_planner(Query *parse, double tuple_fraction)
183
List *saved_initplan = PlannerInitPlan;
184
int saved_planid = PlannerPlanId;
191
/* Set up for a new level of subquery */
193
PlannerInitPlan = NIL;
196
* Look for IN clauses at the top level of WHERE, and transform them
197
* into joins. Note that this step only handles IN clauses originally
198
* at top level of WHERE; if we pull up any subqueries in the next
199
* step, their INs are processed just before pulling them up.
201
parse->in_info_list = NIL;
202
if (parse->hasSubLinks)
203
parse->jointree->quals = pull_up_IN_clauses(parse,
204
parse->jointree->quals);
207
* Check to see if any subqueries in the rangetable can be merged into
210
parse->jointree = (FromExpr *)
211
pull_up_subqueries(parse, (Node *) parse->jointree, false);
214
* Detect whether any rangetable entries are RTE_JOIN kind; if not, we
215
* can avoid the expense of doing flatten_join_alias_vars(). Also
216
* check for outer joins --- if none, we can skip
217
* reduce_outer_joins(). This must be done after we have done
218
* pull_up_subqueries, of course.
220
parse->hasJoinRTEs = false;
221
hasOuterJoins = false;
222
foreach(l, parse->rtable)
224
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
226
if (rte->rtekind == RTE_JOIN)
228
parse->hasJoinRTEs = true;
229
if (IS_OUTER_JOIN(rte->jointype))
231
hasOuterJoins = true;
232
/* Can quit scanning once we find an outer join */
239
* Do expression preprocessing on targetlist and quals.
241
parse->targetList = (List *)
242
preprocess_expression(parse, (Node *) parse->targetList,
245
preprocess_qual_conditions(parse, (Node *) parse->jointree);
247
parse->havingQual = preprocess_expression(parse, parse->havingQual,
250
parse->limitOffset = preprocess_expression(parse, parse->limitOffset,
252
parse->limitCount = preprocess_expression(parse, parse->limitCount,
255
parse->in_info_list = (List *)
256
preprocess_expression(parse, (Node *) parse->in_info_list,
259
/* Also need to preprocess expressions for function RTEs */
260
foreach(l, parse->rtable)
262
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
264
if (rte->rtekind == RTE_FUNCTION)
265
rte->funcexpr = preprocess_expression(parse, rte->funcexpr,
270
* A HAVING clause without aggregates is equivalent to a WHERE clause
271
* (except it can only refer to grouped fields). Transfer any
272
* agg-free clauses of the HAVING qual into WHERE. This may seem like
273
* wasting cycles to cater to stupidly-written queries, but there are
274
* other reasons for doing it. Firstly, if the query contains no aggs
275
* at all, then we aren't going to generate an Agg plan node, and so
276
* there'll be no place to execute HAVING conditions; without this
277
* transfer, we'd lose the HAVING condition entirely, which is wrong.
278
* Secondly, when we push down a qual condition into a sub-query, it's
279
* easiest to push the qual into HAVING always, in case it contains
280
* aggs, and then let this code sort it out.
282
* Note that both havingQual and parse->jointree->quals are in
283
* implicitly-ANDed-list form at this point, even though they are
284
* declared as Node *.
287
foreach(l, (List *) parse->havingQual)
289
Node *havingclause = (Node *) lfirst(l);
291
if (contain_agg_clause(havingclause))
292
newHaving = lappend(newHaving, havingclause);
294
parse->jointree->quals = (Node *)
295
lappend((List *) parse->jointree->quals, havingclause);
297
parse->havingQual = (Node *) newHaving;
300
* If we have any outer joins, try to reduce them to plain inner
301
* joins. This step is most easily done after we've done expression
305
reduce_outer_joins(parse);
308
* See if we can simplify the jointree; opportunities for this may
309
* come from having pulled up subqueries, or from flattening explicit
310
* JOIN syntax. We must do this after flattening JOIN alias
311
* variables, since eliminating explicit JOIN nodes from the jointree
312
* will cause get_relids_for_join() to fail. But it should happen
313
* after reduce_outer_joins, anyway.
315
parse->jointree = (FromExpr *)
316
simplify_jointree(parse, (Node *) parse->jointree);
319
* Do the main planning. If we have an inherited target relation,
320
* that needs special processing, else go straight to
323
if (parse->resultRelation &&
324
(lst = expand_inherited_rtentry(parse, parse->resultRelation)) != NIL)
325
plan = inheritance_planner(parse, lst);
327
plan = grouping_planner(parse, tuple_fraction);
330
* If any subplans were generated, or if we're inside a subplan, build
331
* initPlan list and extParam/allParam sets for plan nodes.
333
if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
335
Cost initplan_cost = 0;
337
/* Prepare extParam/allParam sets for all nodes in tree */
338
SS_finalize_plan(plan, parse->rtable);
341
* SS_finalize_plan doesn't handle initPlans, so we have to
342
* manually attach them to the topmost plan node, and add their
343
* extParams to the topmost node's, too.
345
* We also add the total_cost of each initPlan to the startup cost of
346
* the top node. This is a conservative overestimate, since in
347
* fact each initPlan might be executed later than plan startup,
348
* or even not at all.
350
plan->initPlan = PlannerInitPlan;
352
foreach(l, plan->initPlan)
354
SubPlan *initplan = (SubPlan *) lfirst(l);
356
plan->extParam = bms_add_members(plan->extParam,
357
initplan->plan->extParam);
358
/* allParam must include all members of extParam */
359
plan->allParam = bms_add_members(plan->allParam,
361
initplan_cost += initplan->plan->total_cost;
364
plan->startup_cost += initplan_cost;
365
plan->total_cost += initplan_cost;
368
/* Return to outer subquery context */
370
PlannerInitPlan = saved_initplan;
371
/* we do NOT restore PlannerPlanId; that's not an oversight! */
377
* preprocess_expression
378
* Do subquery_planner's preprocessing work for an expression,
379
* which can be a targetlist, a WHERE clause (including JOIN/ON
380
* conditions), or a HAVING clause.
383
preprocess_expression(Query *parse, Node *expr, int kind)
386
* If the query has any join RTEs, replace join alias variables with
387
* base-relation variables. We must do this before sublink processing,
388
* else sublinks expanded out from join aliases wouldn't get
391
if (parse->hasJoinRTEs)
392
expr = flatten_join_alias_vars(parse, expr);
395
* If it's a qual or havingQual, canonicalize it. It seems most
396
* useful to do this before applying eval_const_expressions, since the
397
* latter can optimize flattened AND/ORs better than unflattened ones.
399
* Note: all processing of a qual expression after this point must be
400
* careful to maintain AND/OR flatness --- that is, do not generate a
401
* tree with AND directly under AND, nor OR directly under OR.
403
if (kind == EXPRKIND_QUAL)
405
expr = (Node *) canonicalize_qual((Expr *) expr);
407
#ifdef OPTIMIZER_DEBUG
408
printf("After canonicalize_qual()\n");
414
* Simplify constant expressions.
416
expr = eval_const_expressions(expr);
418
/* Expand SubLinks to SubPlans */
419
if (parse->hasSubLinks)
420
expr = SS_process_sublinks(expr, (kind == EXPRKIND_QUAL));
423
* XXX do not insert anything here unless you have grokked the
424
* comments in SS_replace_correlation_vars ...
427
/* Replace uplevel vars with Param nodes */
428
if (PlannerQueryLevel > 1)
429
expr = SS_replace_correlation_vars(expr);
432
* If it's a qual or havingQual, convert it to implicit-AND format.
433
* (We don't want to do this before eval_const_expressions, since the
434
* latter would be unable to simplify a top-level AND correctly. Also,
435
* SS_process_sublinks expects explicit-AND format.)
437
if (kind == EXPRKIND_QUAL)
438
expr = (Node *) make_ands_implicit((Expr *) expr);
444
* preprocess_qual_conditions
445
* Recursively scan the query's jointree and do subquery_planner's
446
* preprocessing work on each qual condition found therein.
449
preprocess_qual_conditions(Query *parse, Node *jtnode)
453
if (IsA(jtnode, RangeTblRef))
455
/* nothing to do here */
457
else if (IsA(jtnode, FromExpr))
459
FromExpr *f = (FromExpr *) jtnode;
462
foreach(l, f->fromlist)
463
preprocess_qual_conditions(parse, lfirst(l));
465
f->quals = preprocess_expression(parse, f->quals, EXPRKIND_QUAL);
467
else if (IsA(jtnode, JoinExpr))
469
JoinExpr *j = (JoinExpr *) jtnode;
471
preprocess_qual_conditions(parse, j->larg);
472
preprocess_qual_conditions(parse, j->rarg);
474
j->quals = preprocess_expression(parse, j->quals, EXPRKIND_QUAL);
477
elog(ERROR, "unrecognized node type: %d",
478
(int) nodeTag(jtnode));
481
/*--------------------
482
* inheritance_planner
483
* Generate a plan in the case where the result relation is an
486
* We have to handle this case differently from cases where a source
487
* relation is an inheritance set. Source inheritance is expanded at
488
* the bottom of the plan tree (see allpaths.c), but target inheritance
489
* has to be expanded at the top. The reason is that for UPDATE, each
490
* target relation needs a different targetlist matching its own column
491
* set. (This is not so critical for DELETE, but for simplicity we treat
492
* inherited DELETE the same way.) Fortunately, the UPDATE/DELETE target
493
* can never be the nullable side of an outer join, so it's OK to generate
496
* parse is the querytree produced by the parser & rewriter.
497
* inheritlist is an integer list of RT indexes for the result relation set.
499
* Returns a query plan.
500
*--------------------
503
inheritance_planner(Query *parse, List *inheritlist)
505
int parentRTindex = parse->resultRelation;
506
Oid parentOID = getrelid(parentRTindex, parse->rtable);
507
int mainrtlength = list_length(parse->rtable);
508
List *subplans = NIL;
512
foreach(l, inheritlist)
514
int childRTindex = lfirst_int(l);
515
Oid childOID = getrelid(childRTindex, parse->rtable);
519
/* Generate modified query with this rel as target */
520
subquery = (Query *) adjust_inherited_attrs((Node *) parse,
521
parentRTindex, parentOID,
522
childRTindex, childOID);
524
subplan = grouping_planner(subquery, 0.0 /* retrieve all tuples */ );
525
subplans = lappend(subplans, subplan);
528
* XXX my goodness this next bit is ugly. Really need to think about
529
* ways to rein in planner's habit of scribbling on its input.
531
* Planning of the subquery might have modified the rangetable,
532
* either by addition of RTEs due to expansion of inherited source
533
* tables, or by changes of the Query structures inside subquery
534
* RTEs. We have to ensure that this gets propagated back to the
535
* master copy. However, if we aren't done planning yet, we also
536
* need to ensure that subsequent calls to grouping_planner have
537
* virgin sub-Queries to work from. So, if we are at the last
538
* list entry, just copy the subquery rangetable back to the master
539
* copy; if we are not, then extend the master copy by adding
540
* whatever the subquery added. (We assume these added entries
541
* will go untouched by the future grouping_planner calls. We are
542
* also effectively assuming that sub-Queries will get planned
543
* identically each time, or at least that the impacts on their
544
* rangetables will be the same each time. Did I say this is ugly?)
546
if (lnext(l) == NULL)
547
parse->rtable = subquery->rtable;
550
int subrtlength = list_length(subquery->rtable);
552
if (subrtlength > mainrtlength)
556
subrt = list_copy_tail(subquery->rtable, mainrtlength);
557
parse->rtable = list_concat(parse->rtable, subrt);
558
mainrtlength = subrtlength;
562
/* Save preprocessed tlist from first rel for use in Append */
564
tlist = subplan->targetlist;
567
/* Save the target-relations list for the executor, too */
568
parse->resultRelations = inheritlist;
570
/* Mark result as unordered (probably unnecessary) */
571
parse->query_pathkeys = NIL;
573
return (Plan *) make_append(subplans, true, tlist);
576
/*--------------------
578
* Perform planning steps related to grouping, aggregation, etc.
579
* This primarily means adding top-level processing to the basic
580
* query plan produced by query_planner.
582
* parse is the querytree produced by the parser & rewriter.
583
* tuple_fraction is the fraction of tuples we expect will be retrieved
585
* tuple_fraction is interpreted as follows:
586
* 0: expect all tuples to be retrieved (normal case)
587
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
588
* from the plan to be retrieved
589
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
590
* expected to be retrieved (ie, a LIMIT specification)
592
* Returns a query plan. Also, parse->query_pathkeys is returned as the
593
* actual output ordering of the plan (in pathkey format).
594
*--------------------
597
grouping_planner(Query *parse, double tuple_fraction)
599
List *tlist = parse->targetList;
601
List *current_pathkeys;
604
if (parse->setOperations)
606
List *set_sortclauses;
609
* Construct the plan for set operations. The result will not
610
* need any work except perhaps a top-level sort and/or LIMIT.
612
result_plan = plan_set_operations(parse,
616
* Calculate pathkeys representing the sort order (if any) of the
617
* set operation's result. We have to do this before overwriting
618
* the sort key information...
620
current_pathkeys = make_pathkeys_for_sortclauses(set_sortclauses,
621
result_plan->targetlist);
622
current_pathkeys = canonicalize_pathkeys(parse, current_pathkeys);
625
* We should not need to call preprocess_targetlist, since we must
626
* be in a SELECT query node. Instead, use the targetlist
627
* returned by plan_set_operations (since this tells whether it
628
* returned any resjunk columns!), and transfer any sort key
629
* information from the original tlist.
631
Assert(parse->commandType == CMD_SELECT);
633
tlist = postprocess_setop_tlist(result_plan->targetlist, tlist);
636
* Can't handle FOR UPDATE here (parser should have checked
637
* already, but let's make sure).
641
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
642
errmsg("SELECT FOR UPDATE is not allowed with UNION/INTERSECT/EXCEPT")));
645
* Calculate pathkeys that represent result ordering requirements
647
sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
649
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
653
/* No set operations, do regular planning */
655
List *group_pathkeys;
656
AttrNumber *groupColIdx = NULL;
657
bool need_tlist_eval = true;
659
double sub_tuple_fraction;
662
double dNumGroups = 0;
664
AggClauseCounts agg_counts;
665
int numGroupCols = list_length(parse->groupClause);
666
bool use_hashed_grouping = false;
668
MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
670
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
671
tlist = preprocess_targetlist(tlist,
673
parse->resultRelation,
677
* Add TID targets for rels selected FOR UPDATE (should this be
678
* done in preprocess_targetlist?). The executor uses the TID to
679
* know which rows to lock, much as for UPDATE or DELETE.
686
* We've got trouble if the FOR UPDATE appears inside
687
* grouping, since grouping renders a reference to individual
688
* tuple CTIDs invalid. This is also checked at parse time,
689
* but that's insufficient because of rule substitution, query
692
CheckSelectForUpdate(parse);
695
* Currently the executor only supports FOR UPDATE at top
698
if (PlannerQueryLevel > 1)
700
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
701
errmsg("SELECT FOR UPDATE is not allowed in subqueries")));
703
foreach(l, parse->rowMarks)
705
Index rti = lfirst_int(l);
711
resname = (char *) palloc(32);
712
snprintf(resname, 32, "ctid%u", rti);
713
resdom = makeResdom(list_length(tlist) + 1,
720
SelfItemPointerAttributeNumber,
725
ctid = makeTargetEntry(resdom, (Expr *) var);
726
tlist = lappend(tlist, ctid);
731
* Generate appropriate target list for subplan; may be different
732
* from tlist if grouping or aggregation is needed.
734
sub_tlist = make_subplanTargetList(parse, tlist,
735
&groupColIdx, &need_tlist_eval);
738
* Calculate pathkeys that represent grouping/ordering
741
group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
743
sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
747
* Will need actual number of aggregates for estimating costs.
749
* Note: we do not attempt to detect duplicate aggregates here; a
750
* somewhat-overestimated count is okay for our present purposes.
752
* Note: think not that we can turn off hasAggs if we find no aggs.
753
* It is possible for constant-expression simplification to remove
754
* all explicit references to aggs, but we still have to follow
755
* the aggregate semantics (eg, producing only one output row).
759
count_agg_clauses((Node *) tlist, &agg_counts);
760
count_agg_clauses(parse->havingQual, &agg_counts);
764
* Figure out whether we need a sorted result from query_planner.
766
* If we have a GROUP BY clause, then we want a result sorted
767
* properly for grouping. Otherwise, if there is an ORDER BY
768
* clause, we want to sort by the ORDER BY clause. (Note: if we
769
* have both, and ORDER BY is a superset of GROUP BY, it would be
770
* tempting to request sort by ORDER BY --- but that might just
771
* leave us failing to exploit an available sort order at all.
772
* Needs more thought...)
774
if (parse->groupClause)
775
parse->query_pathkeys = group_pathkeys;
776
else if (parse->sortClause)
777
parse->query_pathkeys = sort_pathkeys;
779
parse->query_pathkeys = NIL;
782
* Adjust tuple_fraction if we see that we are going to apply
783
* limiting/grouping/aggregation/etc. This is not overridable by
784
* the caller, since it reflects plan actions that this routine
785
* will certainly take, not assumptions about context.
787
if (parse->limitCount != NULL)
790
* A LIMIT clause limits the absolute number of tuples
791
* returned. However, if it's not a constant LIMIT then we
792
* have to punt; for lack of a better idea, assume 10% of the
793
* plan's result is wanted.
795
double limit_fraction = 0.0;
797
if (IsA(parse->limitCount, Const))
799
Const *limitc = (Const *) parse->limitCount;
800
int32 count = DatumGetInt32(limitc->constvalue);
803
* A NULL-constant LIMIT represents "LIMIT ALL", which we
804
* treat the same as no limit (ie, expect to retrieve all
807
if (!limitc->constisnull && count > 0)
809
limit_fraction = (double) count;
810
/* We must also consider the OFFSET, if present */
811
if (parse->limitOffset != NULL)
813
if (IsA(parse->limitOffset, Const))
817
limitc = (Const *) parse->limitOffset;
818
offset = DatumGetInt32(limitc->constvalue);
819
if (!limitc->constisnull && offset > 0)
820
limit_fraction += (double) offset;
824
/* OFFSET is an expression ... punt ... */
825
limit_fraction = 0.10;
832
/* LIMIT is an expression ... punt ... */
833
limit_fraction = 0.10;
836
if (limit_fraction > 0.0)
839
* If we have absolute limits from both caller and LIMIT,
840
* use the smaller value; if one is fractional and the
841
* other absolute, treat the fraction as a fraction of the
842
* absolute value; else we can multiply the two fractions
845
if (tuple_fraction >= 1.0)
847
if (limit_fraction >= 1.0)
850
tuple_fraction = Min(tuple_fraction, limit_fraction);
854
/* caller absolute, limit fractional */
855
tuple_fraction *= limit_fraction;
856
if (tuple_fraction < 1.0)
857
tuple_fraction = 1.0;
860
else if (tuple_fraction > 0.0)
862
if (limit_fraction >= 1.0)
864
/* caller fractional, limit absolute */
865
tuple_fraction *= limit_fraction;
866
if (tuple_fraction < 1.0)
867
tuple_fraction = 1.0;
871
/* both fractional */
872
tuple_fraction *= limit_fraction;
877
/* no info from caller, just use limit */
878
tuple_fraction = limit_fraction;
884
* With grouping or aggregation, the tuple fraction to pass to
885
* query_planner() may be different from what it is at top level.
887
sub_tuple_fraction = tuple_fraction;
889
if (parse->groupClause)
892
* In GROUP BY mode, we have the little problem that we don't
893
* really know how many input tuples will be needed to make a
894
* group, so we can't translate an output LIMIT count into an
895
* input count. For lack of a better idea, assume 25% of the
896
* input data will be processed if there is any output limit.
897
* However, if the caller gave us a fraction rather than an
898
* absolute count, we can keep using that fraction (which
899
* amounts to assuming that all the groups are about the same
902
if (sub_tuple_fraction >= 1.0)
903
sub_tuple_fraction = 0.25;
906
* If both GROUP BY and ORDER BY are specified, we will need
907
* two levels of sort --- and, therefore, certainly need to
908
* read all the input tuples --- unless ORDER BY is a subset
909
* of GROUP BY. (We have not yet canonicalized the pathkeys,
910
* so must use the slower noncanonical comparison method.)
912
if (parse->groupClause && parse->sortClause &&
913
!noncanonical_pathkeys_contained_in(sort_pathkeys,
915
sub_tuple_fraction = 0.0;
917
else if (parse->hasAggs)
920
* Ungrouped aggregate will certainly want all the input
923
sub_tuple_fraction = 0.0;
925
else if (parse->distinctClause)
928
* SELECT DISTINCT, like GROUP, will absorb an unpredictable
929
* number of input tuples per output tuple. Handle the same
932
if (sub_tuple_fraction >= 1.0)
933
sub_tuple_fraction = 0.25;
937
* Generate the best unsorted and presorted paths for this Query
938
* (but note there may not be any presorted path).
940
query_planner(parse, sub_tlist, sub_tuple_fraction,
941
&cheapest_path, &sorted_path);
944
* We couldn't canonicalize group_pathkeys and sort_pathkeys
945
* before running query_planner(), so do it now.
947
group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
948
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
951
* Consider whether we might want to use hashed grouping.
953
if (parse->groupClause)
956
double cheapest_path_rows;
957
int cheapest_path_width;
960
* Beware in this section of the possibility that
961
* cheapest_path->parent is NULL. This could happen if user
962
* does something silly like SELECT 'foo' GROUP BY 1;
964
if (cheapest_path->parent)
966
cheapest_path_rows = cheapest_path->parent->rows;
967
cheapest_path_width = cheapest_path->parent->width;
971
cheapest_path_rows = 1; /* assume non-set result */
972
cheapest_path_width = 100; /* arbitrary */
976
* Always estimate the number of groups. We can't do this
977
* until after running query_planner(), either.
979
groupExprs = get_sortgrouplist_exprs(parse->groupClause,
981
dNumGroups = estimate_num_groups(parse,
984
/* Also want it as a long int --- but 'ware overflow! */
985
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
988
* Check can't-do-it conditions, including whether the
989
* grouping operators are hashjoinable.
991
* Executor doesn't support hashed aggregation with DISTINCT
992
* aggregates. (Doing so would imply storing *all* the input
993
* values in the hash table, which seems like a certain
996
if (!enable_hashagg || !hash_safe_grouping(parse))
997
use_hashed_grouping = false;
998
else if (agg_counts.numDistinctAggs != 0)
999
use_hashed_grouping = false;
1003
* Use hashed grouping if (a) we think we can fit the
1004
* hashtable into work_mem, *and* (b) the estimated cost
1005
* is no more than doing it the other way. While avoiding
1006
* the need for sorted input is usually a win, the fact
1007
* that the output won't be sorted may be a loss; so we
1008
* need to do an actual cost comparison.
1012
/* Estimate per-hash-entry space at tuple width... */
1013
hashentrysize = cheapest_path_width;
1014
/* plus space for pass-by-ref transition values... */
1015
hashentrysize += agg_counts.transitionSpace;
1016
/* plus the per-hash-entry overhead */
1017
hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
1019
if (hashentrysize * dNumGroups <= work_mem * 1024L)
1022
* Okay, do the cost comparison. We need to consider
1023
* cheapest_path + hashagg [+ final sort] versus
1024
* either cheapest_path [+ sort] + group or agg [+
1025
* final sort] or presorted_path + group or agg [+
1026
* final sort] where brackets indicate a step that may
1027
* not be needed. We assume query_planner() will have
1028
* returned a presorted path only if it's a winner
1029
* compared to cheapest_path for this purpose.
1031
* These path variables are dummies that just hold cost
1032
* fields; we don't make actual Paths for these steps.
1037
cost_agg(&hashed_p, parse,
1038
AGG_HASHED, agg_counts.numAggs,
1039
numGroupCols, dNumGroups,
1040
cheapest_path->startup_cost,
1041
cheapest_path->total_cost,
1042
cheapest_path_rows);
1043
/* Result of hashed agg is always unsorted */
1045
cost_sort(&hashed_p, parse, sort_pathkeys,
1046
hashed_p.total_cost,
1048
cheapest_path_width);
1052
sorted_p.startup_cost = sorted_path->startup_cost;
1053
sorted_p.total_cost = sorted_path->total_cost;
1054
current_pathkeys = sorted_path->pathkeys;
1058
sorted_p.startup_cost = cheapest_path->startup_cost;
1059
sorted_p.total_cost = cheapest_path->total_cost;
1060
current_pathkeys = cheapest_path->pathkeys;
1062
if (!pathkeys_contained_in(group_pathkeys,
1065
cost_sort(&sorted_p, parse, group_pathkeys,
1066
sorted_p.total_cost,
1068
cheapest_path_width);
1069
current_pathkeys = group_pathkeys;
1072
cost_agg(&sorted_p, parse,
1073
AGG_SORTED, agg_counts.numAggs,
1074
numGroupCols, dNumGroups,
1075
sorted_p.startup_cost,
1076
sorted_p.total_cost,
1077
cheapest_path_rows);
1079
cost_group(&sorted_p, parse,
1080
numGroupCols, dNumGroups,
1081
sorted_p.startup_cost,
1082
sorted_p.total_cost,
1083
cheapest_path_rows);
1084
/* The Agg or Group node will preserve ordering */
1085
if (sort_pathkeys &&
1086
!pathkeys_contained_in(sort_pathkeys,
1089
cost_sort(&sorted_p, parse, sort_pathkeys,
1090
sorted_p.total_cost,
1092
cheapest_path_width);
1096
* Now make the decision using the top-level tuple
1097
* fraction. First we have to convert an absolute
1098
* count (LIMIT) into fractional form.
1100
if (tuple_fraction >= 1.0)
1101
tuple_fraction /= dNumGroups;
1103
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1104
tuple_fraction) < 0)
1106
/* Hashed is cheaper, so use it */
1107
use_hashed_grouping = true;
1114
* Select the best path and create a plan to execute it.
1116
* If we are doing hashed grouping, we will always read all the input
1117
* tuples, so use the cheapest-total path. Otherwise, trust
1118
* query_planner's decision about which to use.
1120
if (sorted_path && !use_hashed_grouping)
1122
result_plan = create_plan(parse, sorted_path);
1123
current_pathkeys = sorted_path->pathkeys;
1127
result_plan = create_plan(parse, cheapest_path);
1128
current_pathkeys = cheapest_path->pathkeys;
1132
* create_plan() returns a plan with just a "flat" tlist of
1133
* required Vars. Usually we need to insert the sub_tlist as the
1134
* tlist of the top plan node. However, we can skip that if we
1135
* determined that whatever query_planner chose to return will be
1138
if (need_tlist_eval)
1141
* If the top-level plan node is one that cannot do expression
1142
* evaluation, we must insert a Result node to project the
1145
if (!is_projection_capable_plan(result_plan))
1147
result_plan = (Plan *) make_result(sub_tlist, NULL,
1153
* Otherwise, just replace the subplan's flat tlist with
1154
* the desired tlist.
1156
result_plan->targetlist = sub_tlist;
1160
* Also, account for the cost of evaluation of the sub_tlist.
1162
* Up to now, we have only been dealing with "flat" tlists,
1163
* containing just Vars. So their evaluation cost is zero
1164
* according to the model used by cost_qual_eval() (or if you
1165
* prefer, the cost is factored into cpu_tuple_cost). Thus we
1166
* can avoid accounting for tlist cost throughout
1167
* query_planner() and subroutines. But now we've inserted a
1168
* tlist that might contain actual operators, sub-selects, etc
1169
* --- so we'd better account for its cost.
1171
* Below this point, any tlist eval cost for added-on nodes
1172
* should be accounted for as we create those nodes.
1173
* Presently, of the node types we can add on, only Agg and
1174
* Group project new tlists (the rest just copy their input
1175
* tuples) --- so make_agg() and make_group() are responsible
1176
* for computing the added cost.
1178
cost_qual_eval(&tlist_cost, sub_tlist);
1179
result_plan->startup_cost += tlist_cost.startup;
1180
result_plan->total_cost += tlist_cost.startup +
1181
tlist_cost.per_tuple * result_plan->plan_rows;
1186
* Since we're using query_planner's tlist and not the one
1187
* make_subplanTargetList calculated, we have to refigure any
1188
* grouping-column indexes make_subplanTargetList computed.
1190
locate_grouping_columns(parse, tlist, result_plan->targetlist,
1195
* Insert AGG or GROUP node if needed, plus an explicit sort step
1198
* HAVING clause, if any, becomes qual of the Agg node
1200
if (use_hashed_grouping)
1202
/* Hashed aggregate plan --- no sort needed */
1203
result_plan = (Plan *) make_agg(parse,
1205
(List *) parse->havingQual,
1212
/* Hashed aggregation produces randomly-ordered results */
1213
current_pathkeys = NIL;
1215
else if (parse->hasAggs)
1217
/* Plain aggregate plan --- sort if needed */
1218
AggStrategy aggstrategy;
1220
if (parse->groupClause)
1222
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
1224
result_plan = (Plan *)
1225
make_sort_from_groupcols(parse,
1229
current_pathkeys = group_pathkeys;
1231
aggstrategy = AGG_SORTED;
1234
* The AGG node will not change the sort ordering of its
1235
* groups, so current_pathkeys describes the result too.
1240
aggstrategy = AGG_PLAIN;
1241
/* Result will be only one row anyway; no sort order */
1242
current_pathkeys = NIL;
1245
result_plan = (Plan *) make_agg(parse,
1247
(List *) parse->havingQual,
1258
* If there are no Aggs, we shouldn't have any HAVING qual
1261
Assert(parse->havingQual == NULL);
1264
* If we have a GROUP BY clause, insert a group node (plus the
1265
* appropriate sort node, if necessary).
1267
if (parse->groupClause)
1270
* Add an explicit sort if we couldn't make the path come
1271
* out the way the GROUP node needs it.
1273
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
1275
result_plan = (Plan *)
1276
make_sort_from_groupcols(parse,
1280
current_pathkeys = group_pathkeys;
1283
result_plan = (Plan *) make_group(parse,
1289
/* The Group node won't change sort ordering */
1292
} /* end of if (setOperations) */
1295
* If we were not able to make the plan come out in the right order,
1296
* add an explicit sort step.
1298
if (parse->sortClause)
1300
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
1302
result_plan = (Plan *)
1303
make_sort_from_sortclauses(parse,
1306
current_pathkeys = sort_pathkeys;
1311
* If there is a DISTINCT clause, add the UNIQUE node.
1313
if (parse->distinctClause)
1315
result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
1318
* If there was grouping or aggregation, leave plan_rows as-is
1319
* (ie, assume the result was already mostly unique). If not,
1320
* it's reasonable to assume the UNIQUE filter has effects
1321
* comparable to GROUP BY.
1323
if (!parse->groupClause && !parse->hasAggs)
1325
List *distinctExprs;
1327
distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
1329
result_plan->plan_rows = estimate_num_groups(parse,
1331
result_plan->plan_rows);
1336
* Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
1338
if (parse->limitOffset || parse->limitCount)
1340
result_plan = (Plan *) make_limit(result_plan,
1346
* Return the actual output ordering in query_pathkeys for possible
1347
* use by an outer query level.
1349
parse->query_pathkeys = current_pathkeys;
1355
* hash_safe_grouping - are grouping operators hashable?
1357
* We assume hashed aggregation will work if the datatype's equality operator
1358
* is marked hashjoinable.
1361
hash_safe_grouping(Query *parse)
1365
foreach(gl, parse->groupClause)
1367
GroupClause *grpcl = (GroupClause *) lfirst(gl);
1368
TargetEntry *tle = get_sortgroupclause_tle(grpcl, parse->targetList);
1372
optup = equality_oper(tle->resdom->restype, true);
1375
oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
1376
ReleaseSysCache(optup);
1384
* make_subplanTargetList
1385
* Generate appropriate target list when grouping is required.
1387
* When grouping_planner inserts Aggregate or Group plan nodes above
1388
* the result of query_planner, we typically want to pass a different
1389
* target list to query_planner than the outer plan nodes should have.
1390
* This routine generates the correct target list for the subplan.
1392
* The initial target list passed from the parser already contains entries
1393
* for all ORDER BY and GROUP BY expressions, but it will not have entries
1394
* for variables used only in HAVING clauses; so we need to add those
1395
* variables to the subplan target list. Also, if we are doing either
1396
* grouping or aggregation, we flatten all expressions except GROUP BY items
1397
* into their component variables; the other expressions will be computed by
1398
* the inserted nodes rather than by the subplan. For example,
1399
* given a query like
1400
* SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
1401
* we want to pass this targetlist to the subplan:
1403
* where the a+b target will be used by the Sort/Group steps, and the
1404
* other targets will be used for computing the final results. (In the
1405
* above example we could theoretically suppress the a and b targets and
1406
* pass down only c,d,a+b, but it's not really worth the trouble to
1407
* eliminate simple var references from the subplan. We will avoid doing
1408
* the extra computation to recompute a+b at the outer level; see
1409
* replace_vars_with_subplan_refs() in setrefs.c.)
1411
* If we are grouping or aggregating, *and* there are no non-Var grouping
1412
* expressions, then the returned tlist is effectively dummy; we do not
1413
* need to force it to be evaluated, because all the Vars it contains
1414
* should be present in the output of query_planner anyway.
1416
* 'parse' is the query being processed.
1417
* 'tlist' is the query's target list.
1418
* 'groupColIdx' receives an array of column numbers for the GROUP BY
1419
* expressions (if there are any) in the subplan's target list.
1420
* 'need_tlist_eval' is set true if we really need to evaluate the
1423
* The result is the targetlist to be passed to the subplan.
1427
make_subplanTargetList(Query *parse,
1429
AttrNumber **groupColIdx,
1430
bool *need_tlist_eval)
1436
*groupColIdx = NULL;
1439
* If we're not grouping or aggregating, nothing to do here;
1440
* query_planner should receive the unmodified target list.
1442
if (!parse->hasAggs && !parse->groupClause)
1444
*need_tlist_eval = true;
1449
* Otherwise, start with a "flattened" tlist (having just the vars
1450
* mentioned in the targetlist and HAVING qual --- but not upper-
1451
* level Vars; they will be replaced by Params later on).
1453
sub_tlist = flatten_tlist(tlist);
1454
extravars = pull_var_clause(parse->havingQual, false);
1455
sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
1456
list_free(extravars);
1457
*need_tlist_eval = false; /* only eval if not flat tlist */
1460
* If grouping, create sub_tlist entries for all GROUP BY expressions
1461
* (GROUP BY items that are simple Vars should be in the list
1462
* already), and make an array showing where the group columns are in
1465
numCols = list_length(parse->groupClause);
1469
AttrNumber *grpColIdx;
1472
grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
1473
*groupColIdx = grpColIdx;
1475
foreach(gl, parse->groupClause)
1477
GroupClause *grpcl = (GroupClause *) lfirst(gl);
1478
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
1479
TargetEntry *te = NULL;
1482
/* Find or make a matching sub_tlist entry */
1483
foreach(sl, sub_tlist)
1485
te = (TargetEntry *) lfirst(sl);
1486
if (equal(groupexpr, te->expr))
1491
te = makeTargetEntry(makeResdom(list_length(sub_tlist) + 1,
1492
exprType(groupexpr),
1493
exprTypmod(groupexpr),
1496
(Expr *) groupexpr);
1497
sub_tlist = lappend(sub_tlist, te);
1498
*need_tlist_eval = true; /* it's not flat anymore */
1501
/* and save its resno */
1502
grpColIdx[keyno++] = te->resdom->resno;
1510
* locate_grouping_columns
1511
* Locate grouping columns in the tlist chosen by query_planner.
1513
* This is only needed if we don't use the sub_tlist chosen by
1514
* make_subplanTargetList. We have to forget the column indexes found
1515
* by that routine and re-locate the grouping vars in the real sub_tlist.
1518
locate_grouping_columns(Query *parse,
1521
AttrNumber *groupColIdx)
1527
* No work unless grouping.
1529
if (!parse->groupClause)
1531
Assert(groupColIdx == NULL);
1534
Assert(groupColIdx != NULL);
1536
foreach(gl, parse->groupClause)
1538
GroupClause *grpcl = (GroupClause *) lfirst(gl);
1539
Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
1540
TargetEntry *te = NULL;
1543
foreach(sl, sub_tlist)
1545
te = (TargetEntry *) lfirst(sl);
1546
if (equal(groupexpr, te->expr))
1550
elog(ERROR, "failed to locate grouping columns");
1552
groupColIdx[keyno++] = te->resdom->resno;
1557
* postprocess_setop_tlist
1558
* Fix up targetlist returned by plan_set_operations().
1560
* We need to transpose sort key info from the orig_tlist into new_tlist.
1561
* NOTE: this would not be good enough if we supported resjunk sort keys
1562
* for results of set operations --- then, we'd need to project a whole
1563
* new tlist to evaluate the resjunk columns. For now, just ereport if we
1564
* find any resjunk columns in orig_tlist.
1567
postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
1570
ListCell *orig_tlist_item = list_head(orig_tlist);
1572
foreach(l, new_tlist)
1574
TargetEntry *new_tle = (TargetEntry *) lfirst(l);
1575
TargetEntry *orig_tle;
1577
/* ignore resjunk columns in setop result */
1578
if (new_tle->resdom->resjunk)
1581
Assert(orig_tlist_item != NULL);
1582
orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
1583
orig_tlist_item = lnext(orig_tlist_item);
1584
if (orig_tle->resdom->resjunk) /* should not happen */
1585
elog(ERROR, "resjunk output columns are not implemented");
1586
Assert(new_tle->resdom->resno == orig_tle->resdom->resno);
1587
Assert(new_tle->resdom->restype == orig_tle->resdom->restype);
1588
new_tle->resdom->ressortgroupref = orig_tle->resdom->ressortgroupref;
1590
if (orig_tlist_item != NULL)
1591
elog(ERROR, "resjunk output columns are not implemented");