1
/*-------------------------------------------------------------------------
4
* This code manages relations that reside on magnetic disk.
6
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7
* Portions Copyright (c) 1994, Regents of the University of California
11
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.114 2004-12-31 22:01:13 pgsql Exp $
13
*-------------------------------------------------------------------------
22
#include "catalog/catalog.h"
23
#include "miscadmin.h"
24
#include "postmaster/bgwriter.h"
25
#include "storage/fd.h"
26
#include "storage/smgr.h"
27
#include "utils/hsearch.h"
28
#include "utils/memutils.h"
32
* The magnetic disk storage manager keeps track of open file
33
* descriptors in its own descriptor pool. This is done to make it
34
* easier to support relations that are larger than the operating
35
* system's file size limit (often 2GBytes). In order to do that,
36
* we break relations up into chunks of < 2GBytes and store one chunk
37
* in each of several files that represent the relation. See the
38
* BLCKSZ and RELSEG_SIZE configuration constants in pg_config_manual.h.
39
* All chunks except the last MUST have size exactly equal to RELSEG_SIZE
40
* blocks --- see mdnblocks() and mdtruncate().
42
* The file descriptor pointer (md_fd field) stored in the SMgrRelation
43
* cache is, therefore, just the head of a list of MdfdVec objects.
44
* But note the md_fd pointer can be NULL, indicating relation not open.
46
* Note that mdfd_chain == NULL does not necessarily mean the relation
47
* doesn't have another segment after this one; we may just not have
48
* opened the next segment yet. (We could not have "all segments are
49
* in the chain" as an invariant anyway, since another backend could
50
* extend the relation when we weren't looking.)
52
* All MdfdVec objects are palloc'd in the MdCxt memory context.
55
typedef struct _MdfdVec
57
File mdfd_vfd; /* fd number in fd.c's pool */
58
BlockNumber mdfd_segno; /* segment number, from 0 */
59
#ifndef LET_OS_MANAGE_FILESIZE /* for large relations */
60
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
64
static MemoryContext MdCxt; /* context for all md.c allocations */
68
* In some contexts (currently, standalone backends and the bgwriter process)
69
* we keep track of pending fsync operations: we need to remember all relation
70
* segments that have been written since the last checkpoint, so that we can
71
* fsync them down to disk before completing the next checkpoint. This hash
72
* table remembers the pending operations. We use a hash table not because
73
* we want to look up individual operations, but simply as a convenient way
74
* of eliminating duplicate requests.
76
* (Regular backends do not track pending operations locally, but forward
77
* them to the bgwriter.)
79
* XXX for WIN32, may want to expand this to track pending deletes, too.
83
RelFileNode rnode; /* the targeted relation */
84
BlockNumber segno; /* which segment */
85
} PendingOperationEntry;
87
static HTAB *pendingOpsTable = NULL;
91
static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);
92
static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
93
static MdfdVec *_fdvec_alloc(void);
95
#ifndef LET_OS_MANAGE_FILESIZE
96
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
99
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
101
static BlockNumber _mdnblocks(File file, Size blcksz);
105
* mdinit() -- Initialize private state for magnetic disk storage manager.
110
MdCxt = AllocSetContextCreate(TopMemoryContext,
112
ALLOCSET_DEFAULT_MINSIZE,
113
ALLOCSET_DEFAULT_INITSIZE,
114
ALLOCSET_DEFAULT_MAXSIZE);
117
* Create pending-operations hashtable if we need it. Currently, we
118
* need it if we are standalone (not under a postmaster) OR if we are
119
* a bootstrap-mode subprocess of a postmaster (that is, a startup or
122
if (!IsUnderPostmaster || IsBootstrapProcessingMode())
126
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
127
hash_ctl.keysize = sizeof(PendingOperationEntry);
128
hash_ctl.entrysize = sizeof(PendingOperationEntry);
129
hash_ctl.hash = tag_hash;
130
hash_ctl.hcxt = MdCxt;
131
pendingOpsTable = hash_create("Pending Ops Table",
134
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
141
* mdcreate() -- Create a new relation on magnetic disk.
143
* If isRedo is true, it's okay for the relation to exist already.
146
mdcreate(SMgrRelation reln, bool isRedo)
151
if (isRedo && reln->md_fd != NULL)
152
return true; /* created and opened already... */
154
Assert(reln->md_fd == NULL);
156
path = relpath(reln->smgr_rnode);
158
fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
162
int save_errno = errno;
165
* During bootstrap, there are cases where a system relation will
166
* be accessed (by internal backend processes) before the
167
* bootstrap script nominally creates it. Therefore, allow the
168
* file to exist already, even if isRedo is not set. (See also
171
if (isRedo || IsBootstrapProcessingMode())
172
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
176
/* be sure to return the error reported by create, not open */
185
reln->md_fd = _fdvec_alloc();
187
reln->md_fd->mdfd_vfd = fd;
188
reln->md_fd->mdfd_segno = 0;
189
#ifndef LET_OS_MANAGE_FILESIZE
190
reln->md_fd->mdfd_chain = NULL;
197
* mdunlink() -- Unlink a relation.
199
* Note that we're passed a RelFileNode --- by the time this is called,
200
* there won't be an SMgrRelation hashtable entry anymore.
202
* If isRedo is true, it's okay for the relation to be already gone.
205
mdunlink(RelFileNode rnode, bool isRedo)
211
path = relpath(rnode);
213
/* Delete the first segment, or only segment if not doing segmenting */
214
if (unlink(path) < 0)
216
if (!isRedo || errno != ENOENT)
223
#ifndef LET_OS_MANAGE_FILESIZE
224
/* Get the additional segments, if any */
227
char *segpath = (char *) palloc(strlen(path) + 12);
230
for (segno = 1;; segno++)
232
sprintf(segpath, "%s.%u", path, segno);
233
if (unlink(segpath) < 0)
235
/* ENOENT is expected after the last segment... */
255
* mdextend() -- Add a block to the specified relation.
257
* The semantics are basically the same as mdwrite(): write at the
258
* specified position. However, we are expecting to extend the
259
* relation (ie, blocknum is the current EOF), and so in case of
260
* failure we clean up by truncating.
262
* This routine returns true or false, with errno set as appropriate.
264
* Note: this routine used to call mdnblocks() to get the block position
265
* to write at, but that's pretty silly since the caller needs to know where
266
* the block will be written, and accordingly must have done mdnblocks()
267
* already. Might as well pass in the position and save a seek.
270
mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
276
v = _mdfd_getseg(reln, blocknum, false);
278
#ifndef LET_OS_MANAGE_FILESIZE
279
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
280
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
282
seekpos = (long) (BLCKSZ * (blocknum));
286
* Note: because caller obtained blocknum by calling _mdnblocks, which
287
* did a seek(SEEK_END), this seek is often redundant and will be
288
* optimized away by fd.c. It's not redundant, however, if there is a
289
* partial page at the end of the file. In that case we want to try
290
* to overwrite the partial page with a full page. It's also not
291
* redundant if bufmgr.c had to dump another buffer of the same file
292
* to make room for the new page's buffer.
294
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
297
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
301
int save_errno = errno;
303
/* Remove the partially-written page */
304
FileTruncate(v->mdfd_vfd, seekpos);
305
FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
313
if (!register_dirty_segment(reln, v))
317
#ifndef LET_OS_MANAGE_FILESIZE
318
Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
325
* mdopen() -- Open the specified relation. ereport's on failure.
326
* (Optionally, can return NULL instead of ereport for ENOENT.)
328
* Note we only open the first segment, when there are multiple segments.
331
mdopen(SMgrRelation reln, bool allowNotFound)
337
/* No work if already open */
341
path = relpath(reln->smgr_rnode);
343
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
348
* During bootstrap, there are cases where a system relation will
349
* be accessed (by internal backend processes) before the
350
* bootstrap script nominally creates it. Therefore, accept
351
* mdopen() as a substitute for mdcreate() in bootstrap mode only.
354
if (IsBootstrapProcessingMode())
355
fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
359
if (allowNotFound && errno == ENOENT)
362
(errcode_for_file_access(),
363
errmsg("could not open relation %u/%u/%u: %m",
364
reln->smgr_rnode.spcNode,
365
reln->smgr_rnode.dbNode,
366
reln->smgr_rnode.relNode)));
372
reln->md_fd = mdfd = _fdvec_alloc();
375
mdfd->mdfd_segno = 0;
376
#ifndef LET_OS_MANAGE_FILESIZE
377
mdfd->mdfd_chain = NULL;
378
Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
385
* mdclose() -- Close the specified relation, if it isn't closed already.
387
* Returns true or false with errno set as appropriate.
390
mdclose(SMgrRelation reln)
392
MdfdVec *v = reln->md_fd;
394
/* No work if already closed */
398
reln->md_fd = NULL; /* prevent dangling pointer after error */
400
#ifndef LET_OS_MANAGE_FILESIZE
405
/* if not closed already */
406
if (v->mdfd_vfd >= 0)
407
FileClose(v->mdfd_vfd);
408
/* Now free vector */
413
if (v->mdfd_vfd >= 0)
414
FileClose(v->mdfd_vfd);
422
* mdread() -- Read the specified block from a relation.
425
mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
432
v = _mdfd_getseg(reln, blocknum, false);
434
#ifndef LET_OS_MANAGE_FILESIZE
435
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
436
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
438
seekpos = (long) (BLCKSZ * (blocknum));
441
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
445
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
448
* If we are at or past EOF, return zeroes without complaining.
449
* Also substitute zeroes if we found a partial block at EOF.
451
* XXX this is really ugly, bad design. However the current
452
* implementation of hash indexes requires it, because hash index
453
* pages are initialized out-of-order.
456
(nbytes > 0 && mdnblocks(reln) == blocknum))
457
MemSet(buffer, 0, BLCKSZ);
466
* mdwrite() -- Write the supplied block at the appropriate location.
469
mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
474
v = _mdfd_getseg(reln, blocknum, false);
476
#ifndef LET_OS_MANAGE_FILESIZE
477
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
478
Assert(seekpos < BLCKSZ * RELSEG_SIZE);
480
seekpos = (long) (BLCKSZ * (blocknum));
483
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
486
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
491
if (!register_dirty_segment(reln, v))
499
* mdnblocks() -- Get the number of blocks stored in a relation.
501
* Important side effect: all segments of the relation are opened
502
* and added to the mdfd_chain list. If this routine has not been
503
* called, then only segments up to the last one actually touched
504
* are present in the chain...
506
* Returns # of blocks, or InvalidBlockNumber on error.
509
mdnblocks(SMgrRelation reln)
511
MdfdVec *v = mdopen(reln, false);
513
#ifndef LET_OS_MANAGE_FILESIZE
515
BlockNumber segno = 0;
518
* Skip through any segments that aren't the last one, to avoid
519
* redundant seeks on them. We have previously verified that these
520
* segments are exactly RELSEG_SIZE long, and it's useless to recheck
521
* that each time. (NOTE: this assumption could only be wrong if
522
* another backend has truncated the relation. We rely on higher code
523
* levels to handle that scenario by closing and re-opening the md
526
while (v->mdfd_chain != NULL)
534
nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
535
if (nblocks > ((BlockNumber) RELSEG_SIZE))
536
elog(FATAL, "segment too big");
537
if (nblocks < ((BlockNumber) RELSEG_SIZE))
538
return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
541
* If segment is exactly RELSEG_SIZE, advance to next one.
545
if (v->mdfd_chain == NULL)
548
* Because we pass O_CREAT, we will create the next segment
549
* (with zero length) immediately, if the last segment is of
550
* length REL_SEGSIZE. This is unnecessary but harmless, and
551
* testing for the case would take more cycles than it seems
554
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
555
if (v->mdfd_chain == NULL)
556
return InvalidBlockNumber; /* failed? */
562
return _mdnblocks(v->mdfd_vfd, BLCKSZ);
567
* mdtruncate() -- Truncate relation to specified number of blocks.
569
* Returns # of blocks or InvalidBlockNumber on error.
572
mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
577
#ifndef LET_OS_MANAGE_FILESIZE
578
BlockNumber priorblocks;
582
* NOTE: mdnblocks makes sure we have opened all existing segments, so
583
* that truncate/delete loop will get them all!
585
curnblk = mdnblocks(reln);
586
if (curnblk == InvalidBlockNumber)
587
return InvalidBlockNumber; /* mdnblocks failed */
588
if (nblocks > curnblk)
589
return InvalidBlockNumber; /* bogus request */
590
if (nblocks == curnblk)
591
return nblocks; /* no work */
593
v = mdopen(reln, false);
595
#ifndef LET_OS_MANAGE_FILESIZE
601
if (priorblocks > nblocks)
604
* This segment is no longer wanted at all (and has already
605
* been unlinked from the mdfd_chain). We truncate the file
606
* before deleting it because if other backends are holding
607
* the file open, the unlink will fail on some platforms.
608
* Better a zero-size file gets left around than a big file...
610
FileTruncate(v->mdfd_vfd, 0);
611
FileUnlink(v->mdfd_vfd);
613
Assert(ov != reln->md_fd); /* we never drop the 1st segment */
616
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
619
* This is the last segment we want to keep. Truncate the file
620
* to the right length, and clear chain link that points to
621
* any remaining segments (which we shall zap). NOTE: if
622
* nblocks is exactly a multiple K of RELSEG_SIZE, we will
623
* truncate the K+1st segment to 0 length but keep it. This is
624
* mainly so that the right thing happens if nblocks==0.
626
BlockNumber lastsegblocks = nblocks - priorblocks;
628
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
629
return InvalidBlockNumber;
632
if (!register_dirty_segment(reln, v))
633
return InvalidBlockNumber;
636
ov->mdfd_chain = NULL;
641
* We still need this segment and 0 or more blocks beyond it,
642
* so nothing to do here.
646
priorblocks += RELSEG_SIZE;
649
if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
650
return InvalidBlockNumber;
653
if (!register_dirty_segment(reln, v))
654
return InvalidBlockNumber;
662
* mdimmedsync() -- Immediately sync a relation to stable storage.
665
mdimmedsync(SMgrRelation reln)
671
* NOTE: mdnblocks makes sure we have opened all existing segments, so
672
* that fsync loop will get them all!
674
curnblk = mdnblocks(reln);
675
if (curnblk == InvalidBlockNumber)
676
return false; /* mdnblocks failed */
678
v = mdopen(reln, false);
680
#ifndef LET_OS_MANAGE_FILESIZE
683
if (FileSync(v->mdfd_vfd) < 0)
688
if (FileSync(v->mdfd_vfd) < 0)
696
* mdsync() -- Sync previous writes to stable storage.
698
* This is only called during checkpoints, and checkpoints should only
699
* occur in processes that have created a pendingOpsTable.
704
HASH_SEQ_STATUS hstat;
705
PendingOperationEntry *entry;
707
if (!pendingOpsTable)
711
* If we are in the bgwriter, the sync had better include all fsync
712
* requests that were queued by backends before the checkpoint REDO
713
* point was determined. We go that a little better by accepting all
714
* requests queued up to the point where we start fsync'ing.
716
AbsorbFsyncRequests();
718
hash_seq_init(&hstat, pendingOpsTable);
719
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
722
* If fsync is off then we don't have to bother opening the file
723
* at all. (We delay checking until this point so that changing
724
* fsync on the fly behaves sensibly.)
732
* Find or create an smgr hash entry for this relation. This
733
* may seem a bit unclean -- md calling smgr? But it's really
734
* the best solution. It ensures that the open file reference
735
* isn't permanently leaked if we get an error here. (You may
736
* say "but an unreferenced SMgrRelation is still a leak!" Not
737
* really, because the only case in which a checkpoint is done
738
* by a process that isn't about to shut down is in the
739
* bgwriter, and it will periodically do smgrcloseall(). This
740
* fact justifies our not closing the reln in the success path
741
* either, which is a good thing since in non-bgwriter cases
742
* we couldn't safely do that.) Furthermore, in many cases
743
* the relation will have been dirtied through this same smgr
744
* relation, and so we can save a file open/close cycle.
746
reln = smgropen(entry->rnode);
749
* It is possible that the relation has been dropped or
750
* truncated since the fsync request was entered. Therefore,
751
* we have to allow file-not-found errors. This applies both
752
* during _mdfd_getseg() and during FileSync, since fd.c might
753
* have closed the file behind our back.
755
seg = _mdfd_getseg(reln,
756
entry->segno * ((BlockNumber) RELSEG_SIZE),
760
if (FileSync(seg->mdfd_vfd) < 0 &&
764
(errcode_for_file_access(),
765
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
767
entry->rnode.spcNode,
769
entry->rnode.relNode)));
775
/* Okay, delete this entry */
776
if (hash_search(pendingOpsTable, entry,
777
HASH_REMOVE, NULL) == NULL)
778
elog(ERROR, "pendingOpsTable corrupted");
785
* register_dirty_segment() -- Mark a relation segment as needing fsync
787
* If there is a local pending-ops table, just make an entry in it for
788
* mdsync to process later. Otherwise, try to pass off the fsync request
789
* to the background writer process. If that fails, just do the fsync
790
* locally before returning (we expect this will not happen often enough
791
* to be a performance problem).
793
* A false result implies I/O failure during local fsync. errno will be
794
* valid for error reporting.
797
register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
801
PendingOperationEntry entry;
803
/* ensure any pad bytes in the struct are zeroed */
804
MemSet(&entry, 0, sizeof(entry));
805
entry.rnode = reln->smgr_rnode;
806
entry.segno = seg->mdfd_segno;
808
if (hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL) != NULL)
810
/* out of memory: fall through to do it locally */
814
if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
818
if (FileSync(seg->mdfd_vfd) < 0)
824
* RememberFsyncRequest() -- callback from bgwriter side of fsync request
826
* We stuff the fsync request into the local hash table for execution
827
* during the bgwriter's next checkpoint.
830
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
832
PendingOperationEntry entry;
834
Assert(pendingOpsTable);
836
/* ensure any pad bytes in the struct are zeroed */
837
MemSet(&entry, 0, sizeof(entry));
841
if (hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL) == NULL)
843
(errcode(ERRCODE_OUT_OF_MEMORY),
844
errmsg("out of memory")));
848
* _fdvec_alloc() -- Make a MdfdVec object.
853
return (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
856
#ifndef LET_OS_MANAGE_FILESIZE
859
* Open the specified segment of the relation,
860
* and make a MdfdVec object for it. Returns NULL on failure.
863
_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
870
path = relpath(reln->smgr_rnode);
874
/* be sure we have enough space for the '.segno' */
875
fullpath = (char *) palloc(strlen(path) + 12);
876
sprintf(fullpath, "%s.%u", path, segno);
883
fd = FileNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600);
890
/* allocate an mdfdvec entry for it */
895
v->mdfd_segno = segno;
896
v->mdfd_chain = NULL;
897
Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
902
#endif /* LET_OS_MANAGE_FILESIZE */
905
* _mdfd_getseg() -- Find the segment of the relation holding the
906
* specified block. ereport's on failure.
907
* (Optionally, can return NULL instead of ereport for ENOENT.)
910
_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
912
MdfdVec *v = mdopen(reln, allowNotFound);
914
#ifndef LET_OS_MANAGE_FILESIZE
915
BlockNumber segstogo;
916
BlockNumber nextsegno;
919
return NULL; /* only possible if allowNotFound */
921
for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1;
923
nextsegno++, segstogo--)
925
if (v->mdfd_chain == NULL)
928
* We will create the next segment only if the target block is
929
* within it. This prevents Sorcerer's Apprentice syndrome if
930
* a bug at higher levels causes us to be handed a
931
* ridiculously large blkno --- otherwise we could create many
932
* thousands of empty segment files before reaching the
933
* "target" block. We should never need to create more than
934
* one new segment per call, so this restriction seems
937
* BUT: when doing WAL recovery, disable this logic and create
938
* segments unconditionally. In this case it seems better
939
* to assume the given blkno is good (it presumably came from
940
* a CRC-checked WAL record); furthermore this lets us cope
941
* in the case where we are replaying WAL data that has a write
942
* into a high-numbered segment of a relation that was later
943
* deleted. We want to go ahead and create the segments so
944
* we can finish out the replay.
946
v->mdfd_chain = _mdfd_openseg(reln,
948
(segstogo == 1 || InRecovery) ? O_CREAT : 0);
949
if (v->mdfd_chain == NULL)
951
if (allowNotFound && errno == ENOENT)
954
(errcode_for_file_access(),
955
errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m",
957
reln->smgr_rnode.spcNode,
958
reln->smgr_rnode.dbNode,
959
reln->smgr_rnode.relNode,
971
* Get number of blocks present in a single disk file
974
_mdnblocks(File file, Size blcksz)
978
len = FileSeek(file, 0L, SEEK_END);
980
return 0; /* on failure, assume file is empty */
981
return (BlockNumber) (len / blcksz);