1
/*-------------------------------------------------------------------------
4
* Routines to find all possible paths for processing a set of joins
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.91.4.1 2005-01-23 02:22:27 tgl Exp $
13
*-------------------------------------------------------------------------
19
#include "optimizer/clauses.h"
20
#include "optimizer/cost.h"
21
#include "optimizer/pathnode.h"
22
#include "optimizer/paths.h"
23
#include "parser/parsetree.h"
24
#include "utils/lsyscache.h"
27
static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel,
28
RelOptInfo *outerrel, RelOptInfo *innerrel,
29
List *restrictlist, List *mergeclause_list,
31
static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
32
RelOptInfo *outerrel, RelOptInfo *innerrel,
33
List *restrictlist, List *mergeclause_list,
35
static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
36
RelOptInfo *outerrel, RelOptInfo *innerrel,
37
List *restrictlist, JoinType jointype);
38
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
46
* add_paths_to_joinrel
47
* Given a join relation and two component rels from which it can be made,
48
* consider all possible paths that use the two component rels as outer
49
* and inner rel respectively. Add these paths to the join rel's pathlist
50
* if they survive comparison with other paths (and remove any existing
51
* paths that are dominated by these paths).
53
* Modifies the pathlist field of the joinrel node to contain the best
57
add_paths_to_joinrel(Query *root,
64
List *mergeclause_list = NIL;
67
* Find potential mergejoin clauses. We can skip this if we are not
68
* interested in doing a mergejoin. However, mergejoin is currently
69
* our only way of implementing full outer joins, so override
70
* mergejoin disable if it's a full join.
72
if (enable_mergejoin || jointype == JOIN_FULL)
73
mergeclause_list = select_mergejoin_clauses(joinrel,
80
* 1. Consider mergejoin paths where both relations must be explicitly
83
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
84
restrictlist, mergeclause_list, jointype);
87
* 2. Consider paths where the outer relation need not be explicitly
88
* sorted. This includes both nestloops and mergejoins where the outer
89
* path is already ordered.
91
match_unsorted_outer(root, joinrel, outerrel, innerrel,
92
restrictlist, mergeclause_list, jointype);
97
* 3. Consider paths where the inner relation need not be explicitly
98
* sorted. This includes mergejoins only (nestloops were already
99
* built in match_unsorted_outer).
101
* Diked out as redundant 2/13/2000 -- tgl. There isn't any really
102
* significant difference between the inner and outer side of a
103
* mergejoin, so match_unsorted_inner creates no paths that aren't
104
* equivalent to those made by match_unsorted_outer when
105
* add_paths_to_joinrel() is invoked with the two rels given in the
108
match_unsorted_inner(root, joinrel, outerrel, innerrel,
109
restrictlist, mergeclause_list, jointype);
113
* 4. Consider paths where both outer and inner relations must be
114
* hashed before being joined.
117
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
118
restrictlist, jointype);
122
* sort_inner_and_outer
123
* Create mergejoin join paths by explicitly sorting both the outer and
124
* inner join relations on each available merge ordering.
126
* 'joinrel' is the join relation
127
* 'outerrel' is the outer join relation
128
* 'innerrel' is the inner join relation
129
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
130
* clauses that apply to this join
131
* 'mergeclause_list' is a list of RestrictInfo nodes for available
132
* mergejoin clauses in this join
133
* 'jointype' is the type of join to do
136
sort_inner_and_outer(Query *root,
138
RelOptInfo *outerrel,
139
RelOptInfo *innerrel,
141
List *mergeclause_list,
151
* If we are doing a right or full join, we must use *all* the
152
* mergeclauses as join clauses, else we will not have a valid plan.
159
case JOIN_UNIQUE_OUTER:
160
case JOIN_UNIQUE_INNER:
161
useallclauses = false;
165
useallclauses = true;
168
elog(ERROR, "unrecognized join type: %d",
170
useallclauses = false; /* keep compiler quiet */
175
* We only consider the cheapest-total-cost input paths, since we are
176
* assuming here that a sort is required. We will consider
177
* cheapest-startup-cost input paths later, and only if they don't
180
* If unique-ification is requested, do it and then handle as a plain
183
outer_path = outerrel->cheapest_total_path;
184
inner_path = innerrel->cheapest_total_path;
185
if (jointype == JOIN_UNIQUE_OUTER)
187
outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
188
jointype = JOIN_INNER;
190
else if (jointype == JOIN_UNIQUE_INNER)
192
inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
193
jointype = JOIN_INNER;
197
* Each possible ordering of the available mergejoin clauses will
198
* generate a differently-sorted result path at essentially the same
199
* cost. We have no basis for choosing one over another at this level
200
* of joining, but some sort orders may be more useful than others for
201
* higher-level mergejoins, so it's worth considering multiple
204
* Actually, it's not quite true that every mergeclause ordering will
205
* generate a different path order, because some of the clauses may be
206
* redundant. Therefore, what we do is convert the mergeclause list
207
* to a list of canonical pathkeys, and then consider different
208
* orderings of the pathkeys.
210
* Generating a path for *every* permutation of the pathkeys doesn't seem
211
* like a winning strategy; the cost in planning time is too high. For
212
* now, we generate one path for each pathkey, listing that pathkey
213
* first and the rest in random order. This should allow at least a
214
* one-clause mergejoin without re-sorting against any other possible
215
* mergejoin partner path. But if we've not guessed the right
216
* ordering of secondary keys, we may end up evaluating clauses as
217
* qpquals when they could have been done as mergeclauses. We need to
218
* figure out a better way. (Two possible approaches: look at all the
219
* relevant index relations to suggest plausible sort orders, or make
220
* just one output path and somehow mark it as having a sort-order
221
* that can be rearranged freely.)
223
all_pathkeys = make_pathkeys_for_mergeclauses(root,
227
foreach(l, all_pathkeys)
229
List *front_pathkey = (List *) lfirst(l);
231
List *cur_mergeclauses;
234
List *merge_pathkeys;
236
/* Make a pathkey list with this guy first. */
237
if (l != list_head(all_pathkeys))
238
cur_pathkeys = lcons(front_pathkey,
239
list_delete_ptr(list_copy(all_pathkeys),
242
cur_pathkeys = all_pathkeys; /* no work at first one... */
245
* Select mergeclause(s) that match this sort ordering. If we had
246
* redundant merge clauses then we will get a subset of the
247
* original clause list. There had better be some match,
250
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
253
Assert(cur_mergeclauses != NIL);
255
/* Forget it if can't use all the clauses in right/full join */
257
list_length(cur_mergeclauses) != list_length(mergeclause_list))
261
* Build sort pathkeys for both sides.
263
* Note: it's possible that the cheapest paths will already be sorted
264
* properly. create_mergejoin_path will detect that case and
265
* suppress an explicit sort step, so we needn't do so here.
267
outerkeys = make_pathkeys_for_mergeclauses(root,
270
innerkeys = make_pathkeys_for_mergeclauses(root,
273
/* Build pathkeys representing output sort order. */
274
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
278
* And now we can make the path.
280
add_path(joinrel, (Path *)
281
create_mergejoin_path(root,
295
* match_unsorted_outer
296
* Creates possible join paths for processing a single join relation
297
* 'joinrel' by employing either iterative substitution or
298
* mergejoining on each of its possible outer paths (considering
299
* only outer paths that are already ordered well enough for merging).
301
* We always generate a nestloop path for each available outer path.
302
* In fact we may generate as many as four: one on the cheapest-total-cost
303
* inner path, one on the same with materialization, one on the
304
* cheapest-startup-cost inner path (if different),
305
* and one on the best inner-indexscan path (if any).
307
* We also consider mergejoins if mergejoin clauses are available. We have
308
* two ways to generate the inner path for a mergejoin: sort the cheapest
309
* inner path, or use an inner path that is already suitably ordered for the
310
* merge. If we have several mergeclauses, it could be that there is no inner
311
* path (or only a very expensive one) for the full list of mergeclauses, but
312
* better paths exist if we truncate the mergeclause list (thereby discarding
313
* some sort key requirements). So, we consider truncations of the
314
* mergeclause list as well as the full list. (Ideally we'd consider all
315
* subsets of the mergeclause list, but that seems way too expensive.)
317
* 'joinrel' is the join relation
318
* 'outerrel' is the outer join relation
319
* 'innerrel' is the inner join relation
320
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
321
* clauses that apply to this join
322
* 'mergeclause_list' is a list of RestrictInfo nodes for available
323
* mergejoin clauses in this join
324
* 'jointype' is the type of join to do
327
match_unsorted_outer(Query *root,
329
RelOptInfo *outerrel,
330
RelOptInfo *innerrel,
332
List *mergeclause_list,
335
JoinType save_jointype = jointype;
338
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
339
Path *inner_cheapest_total = innerrel->cheapest_total_path;
340
Path *matpath = NULL;
341
Path *bestinnerjoin = NULL;
345
* Nestloop only supports inner, left, and IN joins. Also, if we are
346
* doing a right or full join, we must use *all* the mergeclauses as
347
* join clauses, else we will not have a valid plan. (Although these
348
* two flags are currently inverses, keep them separate for clarity
349
* and possible future changes.)
356
case JOIN_UNIQUE_OUTER:
357
case JOIN_UNIQUE_INNER:
359
useallclauses = false;
364
useallclauses = true;
367
elog(ERROR, "unrecognized join type: %d",
369
nestjoinOK = false; /* keep compiler quiet */
370
useallclauses = false;
375
* If we need to unique-ify the inner path, we will consider only the
378
if (jointype == JOIN_UNIQUE_INNER)
380
inner_cheapest_total = (Path *)
381
create_unique_path(root, innerrel, inner_cheapest_total);
382
inner_cheapest_startup = inner_cheapest_total;
383
jointype = JOIN_INNER;
388
* If the cheapest inner path is a join or seqscan, we should
389
* consider materializing it. (This is a heuristic: we could
390
* consider it always, but for inner indexscans it's probably a
393
if (!(IsA(inner_cheapest_total, IndexPath) ||
394
IsA(inner_cheapest_total, TidPath)))
396
create_material_path(innerrel, inner_cheapest_total);
399
* Get the best innerjoin indexpath (if any) for this outer rel.
400
* It's the same for all outer paths.
402
bestinnerjoin = best_inner_indexscan(root, innerrel,
403
outerrel->relids, jointype);
406
foreach(l, outerrel->pathlist)
408
Path *outerpath = (Path *) lfirst(l);
409
List *merge_pathkeys;
413
Path *cheapest_startup_inner;
414
Path *cheapest_total_inner;
419
* If we need to unique-ify the outer path, it's pointless to
420
* consider any but the cheapest outer.
422
if (save_jointype == JOIN_UNIQUE_OUTER)
424
if (outerpath != outerrel->cheapest_total_path)
426
outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
427
jointype = JOIN_INNER;
431
* The result will have this sort order (even if it is implemented
432
* as a nestloop, and even if some of the mergeclauses are
433
* implemented by qpquals rather than as true mergeclauses):
435
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
436
outerpath->pathkeys);
441
* Always consider a nestloop join with this outer and
442
* cheapest-total-cost inner. When appropriate, also consider
443
* using the materialized form of the cheapest inner, the
444
* cheapest-startup-cost inner path, and the best innerjoin
447
add_path(joinrel, (Path *)
448
create_nestloop_path(root,
452
inner_cheapest_total,
456
add_path(joinrel, (Path *)
457
create_nestloop_path(root,
464
if (inner_cheapest_startup != inner_cheapest_total)
465
add_path(joinrel, (Path *)
466
create_nestloop_path(root,
470
inner_cheapest_startup,
473
if (bestinnerjoin != NULL)
474
add_path(joinrel, (Path *)
475
create_nestloop_path(root,
484
/* Can't do anything else if outer path needs to be unique'd */
485
if (save_jointype == JOIN_UNIQUE_OUTER)
488
/* Look for useful mergeclauses (if any) */
489
mergeclauses = find_mergeclauses_for_pathkeys(root,
494
* Done with this outer path if no chance for a mergejoin.
496
* Special corner case: for "x FULL JOIN y ON true", there will be no
497
* join clauses at all. Ordinarily we'd generate a clauseless
498
* nestloop path, but since mergejoin is our only join type that
499
* supports FULL JOIN, it's necessary to generate a clauseless
500
* mergejoin path instead.
502
* Unfortunately this can't easily be extended to handle the case
503
* where there are joinclauses but none of them use mergejoinable
504
* operators; nodeMergejoin.c can only do a full join correctly if
505
* all the joinclauses are mergeclauses.
507
if (mergeclauses == NIL)
509
if (jointype == JOIN_FULL && restrictlist == NIL)
510
/* okay to try for mergejoin */ ;
514
if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
517
/* Compute the required ordering of the inner path */
518
innersortkeys = make_pathkeys_for_mergeclauses(root,
523
* Generate a mergejoin on the basis of sorting the cheapest
524
* inner. Since a sort will be needed, only cheapest total cost
525
* matters. (But create_mergejoin_path will do the right thing if
526
* inner_cheapest_total is already correctly sorted.)
528
add_path(joinrel, (Path *)
529
create_mergejoin_path(root,
533
inner_cheapest_total,
540
/* Can't do anything else if inner path needs to be unique'd */
541
if (save_jointype == JOIN_UNIQUE_INNER)
545
* Look for presorted inner paths that satisfy the innersortkey
546
* list --- or any truncation thereof, if we are allowed to build
547
* a mergejoin using a subset of the merge clauses. Here, we
548
* consider both cheap startup cost and cheap total cost. Ignore
549
* inner_cheapest_total, since we already made a path with it.
551
num_sortkeys = list_length(innersortkeys);
552
if (num_sortkeys > 1 && !useallclauses)
553
trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
555
trialsortkeys = innersortkeys; /* won't really truncate */
556
cheapest_startup_inner = NULL;
557
cheapest_total_inner = NULL;
559
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
562
List *newclauses = NIL;
565
* Look for an inner path ordered well enough for the first
566
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is
567
* modified destructively, which is why we made a copy...
569
trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
570
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
573
if (innerpath != NULL &&
574
innerpath != inner_cheapest_total &&
575
(cheapest_total_inner == NULL ||
576
compare_path_costs(innerpath, cheapest_total_inner,
579
/* Found a cheap (or even-cheaper) sorted path */
580
/* Select the right mergeclauses, if we didn't already */
581
if (sortkeycnt < num_sortkeys)
584
find_mergeclauses_for_pathkeys(root,
587
Assert(newclauses != NIL);
590
newclauses = mergeclauses;
591
add_path(joinrel, (Path *)
592
create_mergejoin_path(root,
602
cheapest_total_inner = innerpath;
604
/* Same on the basis of cheapest startup cost ... */
605
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
608
if (innerpath != NULL &&
609
innerpath != inner_cheapest_total &&
610
(cheapest_startup_inner == NULL ||
611
compare_path_costs(innerpath, cheapest_startup_inner,
614
/* Found a cheap (or even-cheaper) sorted path */
615
if (innerpath != cheapest_total_inner)
618
* Avoid rebuilding clause list if we already made
619
* one; saves memory in big join trees...
621
if (newclauses == NIL)
623
if (sortkeycnt < num_sortkeys)
626
find_mergeclauses_for_pathkeys(root,
629
Assert(newclauses != NIL);
632
newclauses = mergeclauses;
634
add_path(joinrel, (Path *)
635
create_mergejoin_path(root,
646
cheapest_startup_inner = innerpath;
650
* Don't consider truncated sortkeys if we need all clauses.
659
* hash_inner_and_outer
660
* Create hashjoin join paths by explicitly hashing both the outer and
661
* inner keys of each available hash clause.
663
* 'joinrel' is the join relation
664
* 'outerrel' is the outer join relation
665
* 'innerrel' is the inner join relation
666
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
667
* clauses that apply to this join
668
* 'jointype' is the type of join to do
671
hash_inner_and_outer(Query *root,
673
RelOptInfo *outerrel,
674
RelOptInfo *innerrel,
683
* Hashjoin only supports inner, left, and IN joins.
689
case JOIN_UNIQUE_OUTER:
690
case JOIN_UNIQUE_INNER:
701
* We need to build only one hashpath for any given pair of outer and
702
* inner relations; all of the hashable clauses will be used as keys.
704
* Scan the join's restrictinfo list to find hashjoinable clauses that
705
* are usable with this pair of sub-relations.
708
foreach(l, restrictlist)
710
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
712
if (!restrictinfo->can_join ||
713
restrictinfo->hashjoinoperator == InvalidOid)
714
continue; /* not hashjoinable */
717
* If processing an outer join, only use its own join clauses for
718
* hashing. For inner joins we need not be so picky.
720
if (isouterjoin && restrictinfo->is_pushed_down)
724
* Check if clause is usable with these input rels.
726
if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
727
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
729
/* righthand side is inner */
731
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
732
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
734
/* lefthand side is inner */
737
continue; /* no good for these input relations */
739
hashclauses = lappend(hashclauses, restrictinfo);
742
/* If we found any usable hashclauses, make a path */
746
* We consider both the cheapest-total-cost and
747
* cheapest-startup-cost outer paths. There's no need to consider
748
* any but the cheapest-total-cost inner path, however.
750
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
751
Path *cheapest_total_outer = outerrel->cheapest_total_path;
752
Path *cheapest_total_inner = innerrel->cheapest_total_path;
754
/* Unique-ify if need be */
755
if (jointype == JOIN_UNIQUE_OUTER)
757
cheapest_total_outer = (Path *)
758
create_unique_path(root, outerrel, cheapest_total_outer);
759
cheapest_startup_outer = cheapest_total_outer;
760
jointype = JOIN_INNER;
762
else if (jointype == JOIN_UNIQUE_INNER)
764
cheapest_total_inner = (Path *)
765
create_unique_path(root, innerrel, cheapest_total_inner);
766
jointype = JOIN_INNER;
769
add_path(joinrel, (Path *)
770
create_hashjoin_path(root,
773
cheapest_total_outer,
774
cheapest_total_inner,
777
if (cheapest_startup_outer != cheapest_total_outer)
778
add_path(joinrel, (Path *)
779
create_hashjoin_path(root,
782
cheapest_startup_outer,
783
cheapest_total_inner,
790
* select_mergejoin_clauses
791
* Select mergejoin clauses that are usable for a particular join.
792
* Returns a list of RestrictInfo nodes for those clauses.
794
* We examine each restrictinfo clause known for the join to see
795
* if it is mergejoinable and involves vars from the two sub-relations
796
* currently of interest.
799
select_mergejoin_clauses(RelOptInfo *joinrel,
800
RelOptInfo *outerrel,
801
RelOptInfo *innerrel,
805
List *result_list = NIL;
806
bool isouterjoin = IS_OUTER_JOIN(jointype);
809
foreach(l, restrictlist)
811
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
814
* If processing an outer join, only use its own join clauses in
815
* the merge. For inner joins we need not be so picky.
817
* Furthermore, if it is a right/full join then *all* the explicit
818
* join clauses must be mergejoinable, else the executor will
819
* fail. If we are asked for a right join then just return NIL to
820
* indicate no mergejoin is possible (we can handle it as a left
821
* join instead). If we are asked for a full join then emit an
822
* error, because there is no fallback.
826
if (restrictinfo->is_pushed_down)
831
if (!restrictinfo->can_join ||
832
restrictinfo->mergejoinoperator == InvalidOid)
833
return NIL; /* not mergejoinable */
836
if (!restrictinfo->can_join ||
837
restrictinfo->mergejoinoperator == InvalidOid)
839
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
840
errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
843
/* otherwise, it's OK to have nonmergeable join quals */
848
if (!restrictinfo->can_join ||
849
restrictinfo->mergejoinoperator == InvalidOid)
850
continue; /* not mergejoinable */
853
* Check if clause is usable with these input rels. All the vars
854
* needed on each side of the clause must be available from one or
855
* the other of the input rels.
857
if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
858
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
860
/* righthand side is inner */
862
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
863
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
865
/* lefthand side is inner */
868
continue; /* no good for these input relations */
870
result_list = lcons(restrictinfo, result_list);