1
/*-------------------------------------------------------------------------
4
* routines for fast build of inverted index
7
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8
* Portions Copyright (c) 1994, Regents of the University of California
11
* src/backend/access/gin/ginbulk.c
12
*-------------------------------------------------------------------------
17
#include "access/gin_private.h"
18
#include "utils/datum.h"
19
#include "utils/memutils.h"
22
#define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */
23
#define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
26
/* Combiner function for rbtree.c */
28
ginCombineData(RBNode *existing, const RBNode *newdata, void *arg)
30
GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
31
const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
32
BuildAccumulator *accum = (BuildAccumulator *) arg;
35
* Note this code assumes that newdata contains only one itempointer.
37
if (eo->count >= eo->maxcount)
39
accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
41
eo->list = (ItemPointerData *)
42
repalloc(eo->list, sizeof(ItemPointerData) * eo->maxcount);
43
accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
46
/* If item pointers are not ordered, they will need to be sorted later */
47
if (eo->shouldSort == FALSE)
51
res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
55
eo->shouldSort = TRUE;
58
eo->list[eo->count] = en->list[0];
62
/* Comparator function for rbtree.c */
64
cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg)
66
const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
67
const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
68
BuildAccumulator *accum = (BuildAccumulator *) arg;
70
return ginCompareAttEntries(accum->ginstate,
71
ea->attnum, ea->key, ea->category,
72
eb->attnum, eb->key, eb->category);
75
/* Allocator function for rbtree.c */
77
ginAllocEntryAccumulator(void *arg)
79
BuildAccumulator *accum = (BuildAccumulator *) arg;
80
GinEntryAccumulator *ea;
83
* Allocate memory by rather big chunks to decrease overhead. We have no
84
* need to reclaim RBNodes individually, so this costs nothing.
86
if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
88
accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
89
accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
93
/* Allocate new RBNode from current chunk */
94
ea = accum->entryallocator + accum->eas_used;
101
ginInitBA(BuildAccumulator *accum)
103
/* accum->ginstate is intentionally not set here */
104
accum->allocatedMemory = 0;
105
accum->entryallocator = NULL;
107
accum->tree = rb_create(sizeof(GinEntryAccumulator),
110
ginAllocEntryAccumulator,
111
NULL, /* no freefunc needed */
116
* This is basically the same as datumCopy(), but extended to count
117
* palloc'd space in accum->allocatedMemory.
120
getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
122
Form_pg_attribute att = accum->ginstate->origTupdesc->attrs[attnum - 1];
129
res = datumCopy(value, false, att->attlen);
130
accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
136
* Find/store one entry from indexed value.
139
ginInsertBAEntry(BuildAccumulator *accum,
140
ItemPointer heapptr, OffsetNumber attnum,
141
Datum key, GinNullCategory category)
143
GinEntryAccumulator eatmp;
144
GinEntryAccumulator *ea;
148
* For the moment, fill only the fields of eatmp that will be looked at by
149
* cmpEntryAccumulator or ginCombineData.
151
eatmp.attnum = attnum;
153
eatmp.category = category;
154
/* temporarily set up single-entry itempointer list */
155
eatmp.list = heapptr;
157
ea = (GinEntryAccumulator *) rb_insert(accum->tree, (RBNode *) &eatmp,
163
* Finish initializing new tree entry, including making permanent
164
* copies of the datum (if it's not null) and itempointer.
166
if (category == GIN_CAT_NORM_KEY)
167
ea->key = getDatumCopy(accum, attnum, key);
168
ea->maxcount = DEF_NPTR;
170
ea->shouldSort = FALSE;
172
(ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
173
ea->list[0] = *heapptr;
174
accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
179
* ginCombineData did everything needed.
185
* Insert the entries for one heap pointer.
187
* Since the entries are being inserted into a balanced binary tree, you
188
* might think that the order of insertion wouldn't be critical, but it turns
189
* out that inserting the entries in sorted order results in a lot of
190
* rebalancing operations and is slow. To prevent this, we attempt to insert
191
* the nodes in an order that will produce a nearly-balanced tree if the input
194
* We do this as follows. First, we imagine that we have an array whose size
195
* is the smallest power of two greater than or equal to the actual array
196
* size. Second, we insert the middle entry of our virtual array into the
197
* tree; then, we insert the middles of each half of our virtual array, then
198
* middles of quarters, etc.
201
ginInsertBAEntries(BuildAccumulator *accum,
202
ItemPointer heapptr, OffsetNumber attnum,
203
Datum *entries, GinNullCategory *categories,
206
uint32 step = nentries;
211
Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
214
* step will contain largest power of 2 and <= nentries
220
step |= (step >> 16);
228
for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
229
ginInsertBAEntry(accum, heapptr, attnum,
230
entries[i], categories[i]);
237
qsortCompareItemPointers(const void *a, const void *b)
239
int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
241
/* Assert that there are no equal item pointers being sorted */
246
/* Prepare to read out the rbtree contents using ginGetBAEntry */
248
ginBeginBAScan(BuildAccumulator *accum)
250
rb_begin_iterate(accum->tree, LeftRightWalk);
254
* Get the next entry in sequence from the BuildAccumulator's rbtree.
255
* This consists of a single key datum and a list (array) of one or more
256
* heap TIDs in which that key is found. The list is guaranteed sorted.
259
ginGetBAEntry(BuildAccumulator *accum,
260
OffsetNumber *attnum, Datum *key, GinNullCategory *category,
263
GinEntryAccumulator *entry;
264
ItemPointerData *list;
266
entry = (GinEntryAccumulator *) rb_iterate(accum->tree);
269
return NULL; /* no more entries */
271
*attnum = entry->attnum;
273
*category = entry->category;
277
Assert(list != NULL && entry->count > 0);
279
if (entry->shouldSort && entry->count > 1)
280
qsort(list, entry->count, sizeof(ItemPointerData),
281
qsortCompareItemPointers);