1
/*-------------------------------------------------------------------------
4
* Management of "logical tapes" within temporary files.
6
* This module exists to support sorting via multiple merge passes (see
7
* tuplesort.c). Merging is an ideal algorithm for tape devices, but if
8
* we implement it on disk by creating a separate file for each "tape",
9
* there is an annoying problem: the peak space usage is at least twice
10
* the volume of actual data to be sorted. (This must be so because each
11
* datum will appear in both the input and output tapes of the final
12
* merge pass. For seven-tape polyphase merge, which is otherwise a
13
* pretty good algorithm, peak usage is more like 4x actual data volume.)
15
* We can work around this problem by recognizing that any one tape
16
* dataset (with the possible exception of the final output) is written
17
* and read exactly once in a perfectly sequential manner. Therefore,
18
* a datum once read will not be required again, and we can recycle its
19
* space for use by the new tape dataset(s) being generated. In this way,
20
* the total space usage is essentially just the actual data volume, plus
21
* insignificant bookkeeping and start/stop overhead.
23
* Few OSes allow arbitrary parts of a file to be released back to the OS,
24
* so we have to implement this space-recycling ourselves within a single
25
* logical file. logtape.c exists to perform this bookkeeping and provide
26
* the illusion of N independent tape devices to tuplesort.c. Note that
27
* logtape.c itself depends on buffile.c to provide a "logical file" of
28
* larger size than the underlying OS may support.
30
* For simplicity, we allocate and release space in the underlying file
31
* in BLCKSZ-size blocks. Space allocation boils down to keeping track
32
* of which blocks in the underlying file belong to which logical tape,
33
* plus any blocks that are free (recycled and not yet reused).
34
* The blocks in each logical tape are remembered using a method borrowed
35
* from the Unix HFS filesystem: we store data block numbers in an
36
* "indirect block". If an indirect block fills up, we write it out to
37
* the underlying file and remember its location in a second-level indirect
38
* block. In the same way second-level blocks are remembered in third-
39
* level blocks, and so on if necessary (of course we're talking huge
40
* amounts of data here). The topmost indirect block of a given logical
41
* tape is never actually written out to the physical file, but all lower-
42
* level indirect blocks will be.
44
* The initial write pass is guaranteed to fill the underlying file
45
* perfectly sequentially, no matter how data is divided into logical tapes.
46
* Once we begin merge passes, the access pattern becomes considerably
47
* less predictable --- but the seeking involved should be comparable to
48
* what would happen if we kept each logical tape in a separate file,
49
* so there's no serious performance penalty paid to obtain the space
50
* savings of recycling. We try to localize the write accesses by always
51
* writing to the lowest-numbered free block when we have a choice; it's
52
* not clear this helps much, but it can't hurt. (XXX perhaps a LIFO
53
* policy for free blocks would be better?)
55
* To support the above policy of writing to the lowest free block,
56
* ltsGetFreeBlock sorts the list of free block numbers into decreasing
57
* order each time it is asked for a block and the list isn't currently
58
* sorted. This is an efficient way to handle it because we expect cycles
59
* of releasing many blocks followed by re-using many blocks, due to
60
* tuplesort.c's "preread" behavior.
62
* Since all the bookkeeping and buffer memory is allocated with palloc(),
63
* and the underlying file(s) are made with OpenTemporaryFile, all resources
64
* for a logical tape set are certain to be cleaned up even if processing
65
* is aborted by ereport(ERROR). To avoid confusion, the caller should take
66
* care that all calls for a single LogicalTapeSet are made in the same
69
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
70
* Portions Copyright (c) 1994, Regents of the University of California
73
* src/backend/utils/sort/logtape.c
75
*-------------------------------------------------------------------------
80
#include "storage/buffile.h"
81
#include "utils/logtape.h"
84
* Block indexes are "long"s, so we can fit this many per indirect block.
85
* NB: we assume this is an exact fit!
87
#define BLOCKS_PER_INDIR_BLOCK ((int) (BLCKSZ / sizeof(long)))
90
* We use a struct like this for each active indirection level of each
91
* logical tape. If the indirect block is not the highest level of its
92
* tape, the "nextup" link points to the next higher level. Only the
93
* "ptrs" array is written out if we have to dump the indirect block to
94
* disk. If "ptrs" is not completely full, we store -1L in the first
95
* unused slot at completion of the write phase for the logical tape.
97
typedef struct IndirectBlock
99
int nextSlot; /* next pointer slot to write or read */
100
struct IndirectBlock *nextup; /* parent indirect level, or NULL if
102
long ptrs[BLOCKS_PER_INDIR_BLOCK]; /* indexes of contained blocks */
106
* This data structure represents a single "logical tape" within the set
107
* of logical tapes stored in the same file. We must keep track of the
108
* current partially-read-or-written data block as well as the active
109
* indirect block level(s).
111
typedef struct LogicalTape
113
IndirectBlock *indirect; /* bottom of my indirect-block hierarchy */
114
bool writing; /* T while in write phase */
115
bool frozen; /* T if blocks should not be freed when read */
116
bool dirty; /* does buffer need to be written? */
119
* The total data volume in the logical tape is numFullBlocks * BLCKSZ +
120
* lastBlockBytes. BUT: we do not update lastBlockBytes during writing,
121
* only at completion of a write phase.
123
long numFullBlocks; /* number of complete blocks in log tape */
124
int lastBlockBytes; /* valid bytes in last (incomplete) block */
127
* Buffer for current data block. Note we don't bother to store the
128
* actual file block number of the data block (during the write phase it
129
* hasn't been assigned yet, and during read we don't care anymore). But
130
* we do need the relative block number so we can detect end-of-tape while
133
char *buffer; /* physical buffer (separately palloc'd) */
134
long curBlockNumber; /* this block's logical blk# within tape */
135
int pos; /* next read/write position in buffer */
136
int nbytes; /* total # of valid bytes in buffer */
140
* This data structure represents a set of related "logical tapes" sharing
141
* space in a single underlying file. (But that "file" may be multiple files
142
* if needed to escape OS limits on file size; buffile.c handles that for us.)
143
* The number of tapes is fixed at creation.
145
struct LogicalTapeSet
147
BufFile *pfile; /* underlying file for whole tape set */
148
long nFileBlocks; /* # of blocks used in underlying file */
151
* We store the numbers of recycled-and-available blocks in freeBlocks[].
152
* When there are no such blocks, we extend the underlying file.
154
* If forgetFreeSpace is true then any freed blocks are simply forgotten
155
* rather than being remembered in freeBlocks[]. See notes for
156
* LogicalTapeSetForgetFreeSpace().
158
* If blocksSorted is true then the block numbers in freeBlocks are in
159
* *decreasing* order, so that removing the last entry gives us the lowest
160
* free block. We re-sort the blocks whenever a block is demanded; this
161
* should be reasonably efficient given the expected usage pattern.
163
bool forgetFreeSpace; /* are we remembering free blocks? */
164
bool blocksSorted; /* is freeBlocks[] currently in order? */
165
long *freeBlocks; /* resizable array */
166
int nFreeBlocks; /* # of currently free blocks */
167
int freeBlocksLen; /* current allocated length of freeBlocks[] */
170
* tapes[] is declared size 1 since C wants a fixed size, but actually it
171
* is of length nTapes.
173
int nTapes; /* # of logical tapes in set */
174
LogicalTape tapes[1]; /* must be last in struct! */
177
static void ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
178
static void ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer);
179
static long ltsGetFreeBlock(LogicalTapeSet *lts);
180
static void ltsReleaseBlock(LogicalTapeSet *lts, long blocknum);
181
static void ltsRecordBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect,
183
static long ltsRewindIndirectBlock(LogicalTapeSet *lts,
184
IndirectBlock *indirect,
186
static long ltsRewindFrozenIndirectBlock(LogicalTapeSet *lts,
187
IndirectBlock *indirect);
188
static long ltsRecallNextBlockNum(LogicalTapeSet *lts,
189
IndirectBlock *indirect,
191
static long ltsRecallPrevBlockNum(LogicalTapeSet *lts,
192
IndirectBlock *indirect);
193
static void ltsDumpBuffer(LogicalTapeSet *lts, LogicalTape *lt);
197
* Write a block-sized buffer to the specified block of the underlying file.
199
* NB: should not attempt to write beyond current end of file (ie, create
200
* "holes" in file), since BufFile doesn't allow that. The first write pass
201
* must write blocks sequentially.
203
* No need for an error return convention; we ereport() on any error.
206
ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
208
if (BufFileSeekBlock(lts->pfile, blocknum) != 0 ||
209
BufFileWrite(lts->pfile, buffer, BLCKSZ) != BLCKSZ)
211
/* XXX is it okay to assume errno is correct? */
212
(errcode_for_file_access(),
213
errmsg("could not write block %ld of temporary file: %m",
215
errhint("Perhaps out of disk space?")));
219
* Read a block-sized buffer from the specified block of the underlying file.
221
* No need for an error return convention; we ereport() on any error. This
222
* module should never attempt to read a block it doesn't know is there.
225
ltsReadBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
227
if (BufFileSeekBlock(lts->pfile, blocknum) != 0 ||
228
BufFileRead(lts->pfile, buffer, BLCKSZ) != BLCKSZ)
230
/* XXX is it okay to assume errno is correct? */
231
(errcode_for_file_access(),
232
errmsg("could not read block %ld of temporary file: %m",
237
* qsort comparator for sorting freeBlocks[] into decreasing order.
240
freeBlocks_cmp(const void *a, const void *b)
242
long ablk = *((const long *) a);
243
long bblk = *((const long *) b);
245
/* can't just subtract because long might be wider than int */
254
* Select a currently unused block for writing to.
256
* NB: should only be called when writer is ready to write immediately,
257
* to ensure that first write pass is sequential.
260
ltsGetFreeBlock(LogicalTapeSet *lts)
263
* If there are multiple free blocks, we select the one appearing last in
264
* freeBlocks[] (after sorting the array if needed). If there are none,
265
* assign the next block at the end of the file.
267
if (lts->nFreeBlocks > 0)
269
if (!lts->blocksSorted)
271
qsort((void *) lts->freeBlocks, lts->nFreeBlocks,
272
sizeof(long), freeBlocks_cmp);
273
lts->blocksSorted = true;
275
return lts->freeBlocks[--lts->nFreeBlocks];
278
return lts->nFileBlocks++;
282
* Return a block# to the freelist.
285
ltsReleaseBlock(LogicalTapeSet *lts, long blocknum)
290
* Do nothing if we're no longer interested in remembering free space.
292
if (lts->forgetFreeSpace)
296
* Enlarge freeBlocks array if full.
298
if (lts->nFreeBlocks >= lts->freeBlocksLen)
300
lts->freeBlocksLen *= 2;
301
lts->freeBlocks = (long *) repalloc(lts->freeBlocks,
302
lts->freeBlocksLen * sizeof(long));
306
* Add blocknum to array, and mark the array unsorted if it's no longer in
309
ndx = lts->nFreeBlocks++;
310
lts->freeBlocks[ndx] = blocknum;
311
if (ndx > 0 && lts->freeBlocks[ndx - 1] < blocknum)
312
lts->blocksSorted = false;
316
* These routines manipulate indirect-block hierarchies. All are recursive
317
* so that they don't have any specific limit on the depth of hierarchy.
321
* Record a data block number in a logical tape's lowest indirect block,
322
* or record an indirect block's number in the next higher indirect level.
325
ltsRecordBlockNum(LogicalTapeSet *lts, IndirectBlock *indirect,
328
if (indirect->nextSlot >= BLOCKS_PER_INDIR_BLOCK)
331
* This indirect block is full, so dump it out and recursively save
332
* its address in the next indirection level. Create a new
333
* indirection level if there wasn't one before.
335
long indirblock = ltsGetFreeBlock(lts);
337
ltsWriteBlock(lts, indirblock, (void *) indirect->ptrs);
338
if (indirect->nextup == NULL)
340
indirect->nextup = (IndirectBlock *) palloc(sizeof(IndirectBlock));
341
indirect->nextup->nextSlot = 0;
342
indirect->nextup->nextup = NULL;
344
ltsRecordBlockNum(lts, indirect->nextup, indirblock);
347
* Reset to fill another indirect block at this level.
349
indirect->nextSlot = 0;
351
indirect->ptrs[indirect->nextSlot++] = blocknum;
355
* Reset a logical tape's indirect-block hierarchy after a write pass
356
* to prepare for reading. We dump out partly-filled blocks except
357
* at the top of the hierarchy, and we rewind each level to the start.
358
* This call returns the first data block number, or -1L if the tape
361
* Unless 'freezing' is true, release indirect blocks to the free pool after
365
ltsRewindIndirectBlock(LogicalTapeSet *lts,
366
IndirectBlock *indirect,
369
/* Handle case of never-written-to tape */
370
if (indirect == NULL)
373
/* Insert sentinel if block is not full */
374
if (indirect->nextSlot < BLOCKS_PER_INDIR_BLOCK)
375
indirect->ptrs[indirect->nextSlot] = -1L;
378
* If block is not topmost, write it out, and recurse to obtain address of
379
* first block in this hierarchy level. Read that one in.
381
if (indirect->nextup != NULL)
383
long indirblock = ltsGetFreeBlock(lts);
385
ltsWriteBlock(lts, indirblock, (void *) indirect->ptrs);
386
ltsRecordBlockNum(lts, indirect->nextup, indirblock);
387
indirblock = ltsRewindIndirectBlock(lts, indirect->nextup, freezing);
388
Assert(indirblock != -1L);
389
ltsReadBlock(lts, indirblock, (void *) indirect->ptrs);
391
ltsReleaseBlock(lts, indirblock);
395
* Reset my next-block pointer, and then fetch a block number if any.
397
indirect->nextSlot = 0;
398
if (indirect->ptrs[0] == -1L)
400
return indirect->ptrs[indirect->nextSlot++];
404
* Rewind a previously-frozen indirect-block hierarchy for another read pass.
405
* This call returns the first data block number, or -1L if the tape
409
ltsRewindFrozenIndirectBlock(LogicalTapeSet *lts,
410
IndirectBlock *indirect)
412
/* Handle case of never-written-to tape */
413
if (indirect == NULL)
417
* If block is not topmost, recurse to obtain address of first block in
418
* this hierarchy level. Read that one in.
420
if (indirect->nextup != NULL)
424
indirblock = ltsRewindFrozenIndirectBlock(lts, indirect->nextup);
425
Assert(indirblock != -1L);
426
ltsReadBlock(lts, indirblock, (void *) indirect->ptrs);
430
* Reset my next-block pointer, and then fetch a block number if any.
432
indirect->nextSlot = 0;
433
if (indirect->ptrs[0] == -1L)
435
return indirect->ptrs[indirect->nextSlot++];
439
* Obtain next data block number in the forward direction, or -1L if no more.
441
* Unless 'frozen' is true, release indirect blocks to the free pool after
445
ltsRecallNextBlockNum(LogicalTapeSet *lts,
446
IndirectBlock *indirect,
449
/* Handle case of never-written-to tape */
450
if (indirect == NULL)
453
if (indirect->nextSlot >= BLOCKS_PER_INDIR_BLOCK ||
454
indirect->ptrs[indirect->nextSlot] == -1L)
458
if (indirect->nextup == NULL)
459
return -1L; /* nothing left at this level */
460
indirblock = ltsRecallNextBlockNum(lts, indirect->nextup, frozen);
461
if (indirblock == -1L)
462
return -1L; /* nothing left at this level */
463
ltsReadBlock(lts, indirblock, (void *) indirect->ptrs);
465
ltsReleaseBlock(lts, indirblock);
466
indirect->nextSlot = 0;
468
if (indirect->ptrs[indirect->nextSlot] == -1L)
470
return indirect->ptrs[indirect->nextSlot++];
474
* Obtain next data block number in the reverse direction, or -1L if no more.
476
* Note this fetches the block# before the one last returned, no matter which
477
* direction of call returned that one. If we fail, no change in state.
479
* This routine can only be used in 'frozen' state, so there's no need to
480
* pass a parameter telling whether to release blocks ... we never do.
483
ltsRecallPrevBlockNum(LogicalTapeSet *lts,
484
IndirectBlock *indirect)
486
/* Handle case of never-written-to tape */
487
if (indirect == NULL)
490
if (indirect->nextSlot <= 1)
494
if (indirect->nextup == NULL)
495
return -1L; /* nothing left at this level */
496
indirblock = ltsRecallPrevBlockNum(lts, indirect->nextup);
497
if (indirblock == -1L)
498
return -1L; /* nothing left at this level */
499
ltsReadBlock(lts, indirblock, (void *) indirect->ptrs);
502
* The previous block would only have been written out if full, so we
503
* need not search it for a -1 sentinel.
505
indirect->nextSlot = BLOCKS_PER_INDIR_BLOCK + 1;
507
indirect->nextSlot--;
508
return indirect->ptrs[indirect->nextSlot - 1];
513
* Create a set of logical tapes in a temporary underlying file.
515
* Each tape is initialized in write state.
518
LogicalTapeSetCreate(int ntapes)
525
* Create top-level struct including per-tape LogicalTape structs. First
526
* LogicalTape struct is already counted in sizeof(LogicalTapeSet).
529
lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) +
530
(ntapes - 1) *sizeof(LogicalTape));
531
lts->pfile = BufFileCreateTemp(false);
532
lts->nFileBlocks = 0L;
533
lts->forgetFreeSpace = false;
534
lts->blocksSorted = true; /* a zero-length array is sorted ... */
535
lts->freeBlocksLen = 32; /* reasonable initial guess */
536
lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long));
537
lts->nFreeBlocks = 0;
538
lts->nTapes = ntapes;
541
* Initialize per-tape structs. Note we allocate the I/O buffer and
542
* first-level indirect block for a tape only when it is first actually
543
* written to. This avoids wasting memory space when tuplesort.c
544
* overestimates the number of tapes needed.
546
for (i = 0; i < ntapes; i++)
553
lt->numFullBlocks = 0L;
554
lt->lastBlockBytes = 0;
556
lt->curBlockNumber = 0L;
564
* Close a logical tape set and release all resources.
567
LogicalTapeSetClose(LogicalTapeSet *lts)
574
BufFileClose(lts->pfile);
575
for (i = 0; i < lts->nTapes; i++)
578
for (ib = lt->indirect; ib != NULL; ib = nextib)
586
pfree(lts->freeBlocks);
591
* Mark a logical tape set as not needing management of free space anymore.
593
* This should be called if the caller does not intend to write any more data
594
* into the tape set, but is reading from un-frozen tapes. Since no more
595
* writes are planned, remembering free blocks is no longer useful. Setting
596
* this flag lets us avoid wasting time and space in ltsReleaseBlock(), which
597
* is not designed to handle large numbers of free blocks.
600
LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts)
602
lts->forgetFreeSpace = true;
606
* Dump the dirty buffer of a logical tape.
609
ltsDumpBuffer(LogicalTapeSet *lts, LogicalTape *lt)
611
long datablock = ltsGetFreeBlock(lts);
614
ltsWriteBlock(lts, datablock, (void *) lt->buffer);
615
ltsRecordBlockNum(lts, lt->indirect, datablock);
617
/* Caller must do other state update as needed */
621
* Write to a logical tape.
623
* There are no error returns; we ereport() on failure.
626
LogicalTapeWrite(LogicalTapeSet *lts, int tapenum,
627
void *ptr, size_t size)
632
Assert(tapenum >= 0 && tapenum < lts->nTapes);
633
lt = <s->tapes[tapenum];
636
/* Allocate data buffer and first indirect block on first write */
637
if (lt->buffer == NULL)
638
lt->buffer = (char *) palloc(BLCKSZ);
639
if (lt->indirect == NULL)
641
lt->indirect = (IndirectBlock *) palloc(sizeof(IndirectBlock));
642
lt->indirect->nextSlot = 0;
643
lt->indirect->nextup = NULL;
648
if (lt->pos >= BLCKSZ)
650
/* Buffer full, dump it out */
652
ltsDumpBuffer(lts, lt);
655
/* Hmm, went directly from reading to writing? */
656
elog(ERROR, "invalid logtape state: should be dirty");
659
lt->curBlockNumber++;
664
nthistime = BLCKSZ - lt->pos;
665
if (nthistime > size)
667
Assert(nthistime > 0);
669
memcpy(lt->buffer + lt->pos, ptr, nthistime);
672
lt->pos += nthistime;
673
if (lt->nbytes < lt->pos)
674
lt->nbytes = lt->pos;
675
ptr = (void *) ((char *) ptr + nthistime);
681
* Rewind logical tape and switch from writing to reading or vice versa.
683
* Unless the tape has been "frozen" in read state, forWrite must be the
684
* opposite of the previous tape state.
687
LogicalTapeRewind(LogicalTapeSet *lts, int tapenum, bool forWrite)
692
Assert(tapenum >= 0 && tapenum < lts->nTapes);
693
lt = <s->tapes[tapenum];
700
* Completion of a write phase. Flush last partial data block,
701
* flush any partial indirect blocks, rewind for normal
702
* (destructive) read.
705
ltsDumpBuffer(lts, lt);
706
lt->lastBlockBytes = lt->nbytes;
708
datablocknum = ltsRewindIndirectBlock(lts, lt->indirect, false);
713
* This is only OK if tape is frozen; we rewind for (another) read
717
datablocknum = ltsRewindFrozenIndirectBlock(lts, lt->indirect);
719
/* Read the first block, or reset if tape is empty */
720
lt->curBlockNumber = 0L;
723
if (datablocknum != -1L)
725
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
727
ltsReleaseBlock(lts, datablocknum);
728
lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ?
729
BLCKSZ : lt->lastBlockBytes;
735
* Completion of a read phase. Rewind and prepare for write.
737
* NOTE: we assume the caller has read the tape to the end; otherwise
738
* untouched data and indirect blocks will not have been freed. We
739
* could add more code to free any unread blocks, but in current usage
740
* of this module it'd be useless code.
745
Assert(!lt->writing && !lt->frozen);
746
/* Must truncate the indirect-block hierarchy down to one level. */
749
for (ib = lt->indirect->nextup; ib != NULL; ib = nextib)
754
lt->indirect->nextSlot = 0;
755
lt->indirect->nextup = NULL;
759
lt->numFullBlocks = 0L;
760
lt->lastBlockBytes = 0;
761
lt->curBlockNumber = 0L;
768
* Read from a logical tape.
770
* Early EOF is indicated by return value less than #bytes requested.
773
LogicalTapeRead(LogicalTapeSet *lts, int tapenum,
774
void *ptr, size_t size)
780
Assert(tapenum >= 0 && tapenum < lts->nTapes);
781
lt = <s->tapes[tapenum];
782
Assert(!lt->writing);
786
if (lt->pos >= lt->nbytes)
788
/* Try to load more data into buffer. */
789
long datablocknum = ltsRecallNextBlockNum(lts, lt->indirect,
792
if (datablocknum == -1L)
794
lt->curBlockNumber++;
796
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
798
ltsReleaseBlock(lts, datablocknum);
799
lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ?
800
BLCKSZ : lt->lastBlockBytes;
802
break; /* EOF (possible here?) */
805
nthistime = lt->nbytes - lt->pos;
806
if (nthistime > size)
808
Assert(nthistime > 0);
810
memcpy(ptr, lt->buffer + lt->pos, nthistime);
812
lt->pos += nthistime;
813
ptr = (void *) ((char *) ptr + nthistime);
822
* "Freeze" the contents of a tape so that it can be read multiple times
823
* and/or read backwards. Once a tape is frozen, its contents will not
824
* be released until the LogicalTapeSet is destroyed. This is expected
825
* to be used only for the final output pass of a merge.
827
* This *must* be called just at the end of a write pass, before the
828
* tape is rewound (after rewind is too late!). It performs a rewind
829
* and switch to read mode "for free". An immediately following rewind-
830
* for-read call is OK but not necessary.
833
LogicalTapeFreeze(LogicalTapeSet *lts, int tapenum)
838
Assert(tapenum >= 0 && tapenum < lts->nTapes);
839
lt = <s->tapes[tapenum];
843
* Completion of a write phase. Flush last partial data block, flush any
844
* partial indirect blocks, rewind for nondestructive read.
847
ltsDumpBuffer(lts, lt);
848
lt->lastBlockBytes = lt->nbytes;
851
datablocknum = ltsRewindIndirectBlock(lts, lt->indirect, true);
852
/* Read the first block, or reset if tape is empty */
853
lt->curBlockNumber = 0L;
856
if (datablocknum != -1L)
858
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
859
lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ?
860
BLCKSZ : lt->lastBlockBytes;
865
* Backspace the tape a given number of bytes. (We also support a more
866
* general seek interface, see below.)
868
* *Only* a frozen-for-read tape can be backed up; we don't support
869
* random access during write, and an unfrozen read tape may have
870
* already discarded the desired data!
872
* Return value is TRUE if seek successful, FALSE if there isn't that much
873
* data before the current point (in which case there's no state change).
876
LogicalTapeBackspace(LogicalTapeSet *lts, int tapenum, size_t size)
882
Assert(tapenum >= 0 && tapenum < lts->nTapes);
883
lt = <s->tapes[tapenum];
887
* Easy case for seek within current block.
889
if (size <= (size_t) lt->pos)
891
lt->pos -= (int) size;
896
* Not-so-easy case. Figure out whether it's possible at all.
898
size -= (size_t) lt->pos; /* part within this block */
899
nblocks = size / BLCKSZ;
900
size = size % BLCKSZ;
904
newpos = (int) (BLCKSZ - size);
908
if (nblocks > lt->curBlockNumber)
909
return false; /* a seek too far... */
912
* OK, we need to back up nblocks blocks. This implementation would be
913
* pretty inefficient for long seeks, but we really aren't expecting that
914
* (a seek over one tuple is typical).
916
while (nblocks-- > 0)
918
long datablocknum = ltsRecallPrevBlockNum(lts, lt->indirect);
920
if (datablocknum == -1L)
921
elog(ERROR, "unexpected end of tape");
922
lt->curBlockNumber--;
925
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
934
* Seek to an arbitrary position in a logical tape.
936
* *Only* a frozen-for-read tape can be seeked.
938
* Return value is TRUE if seek successful, FALSE if there isn't that much
939
* data in the tape (in which case there's no state change).
942
LogicalTapeSeek(LogicalTapeSet *lts, int tapenum,
943
long blocknum, int offset)
947
Assert(tapenum >= 0 && tapenum < lts->nTapes);
948
lt = <s->tapes[tapenum];
950
Assert(offset >= 0 && offset <= BLCKSZ);
953
* Easy case for seek within current block.
955
if (blocknum == lt->curBlockNumber && offset <= lt->nbytes)
962
* Not-so-easy case. Figure out whether it's possible at all.
964
if (blocknum < 0 || blocknum > lt->numFullBlocks ||
965
(blocknum == lt->numFullBlocks && offset > lt->lastBlockBytes))
969
* OK, advance or back up to the target block. This implementation would
970
* be pretty inefficient for long seeks, but we really aren't expecting
971
* that (a seek over one tuple is typical).
973
while (lt->curBlockNumber > blocknum)
975
long datablocknum = ltsRecallPrevBlockNum(lts, lt->indirect);
977
if (datablocknum == -1L)
978
elog(ERROR, "unexpected end of tape");
979
if (--lt->curBlockNumber == blocknum)
980
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
982
while (lt->curBlockNumber < blocknum)
984
long datablocknum = ltsRecallNextBlockNum(lts, lt->indirect,
987
if (datablocknum == -1L)
988
elog(ERROR, "unexpected end of tape");
989
if (++lt->curBlockNumber == blocknum)
990
ltsReadBlock(lts, datablocknum, (void *) lt->buffer);
992
lt->nbytes = (lt->curBlockNumber < lt->numFullBlocks) ?
993
BLCKSZ : lt->lastBlockBytes;
999
* Obtain current position in a form suitable for a later LogicalTapeSeek.
1001
* NOTE: it'd be OK to do this during write phase with intention of using
1002
* the position for a seek after freezing. Not clear if anyone needs that.
1005
LogicalTapeTell(LogicalTapeSet *lts, int tapenum,
1006
long *blocknum, int *offset)
1010
Assert(tapenum >= 0 && tapenum < lts->nTapes);
1011
lt = <s->tapes[tapenum];
1012
*blocknum = lt->curBlockNumber;
1017
* Obtain total disk space currently used by a LogicalTapeSet, in blocks.
1020
LogicalTapeSetBlocks(LogicalTapeSet *lts)
1022
return lts->nFileBlocks;