|
|
|
|
@ -16,8 +16,8 @@ |
|
|
|
|
* including base tables, scratch files (e.g., sort and hash spool |
|
|
|
|
* files), and random calls to C library routines like system(3); it |
|
|
|
|
* is quite easy to exceed system limits on the number of open files a |
|
|
|
|
* single process can have. (This is around 256 on many modern |
|
|
|
|
* operating systems, but can be as low as 32 on others.) |
|
|
|
|
* single process can have. (This is around 1024 on many modern |
|
|
|
|
* operating systems, but may be lower on others.) |
|
|
|
|
* |
|
|
|
|
* VFDs are managed as an LRU pool, with actual OS file descriptors |
|
|
|
|
* being opened and closed as needed. Obviously, if a routine is |
|
|
|
|
@ -167,15 +167,6 @@ int max_safe_fds = 32; /* default if not changed */ |
|
|
|
|
|
|
|
|
|
#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Note: a VFD's seekPos is normally always valid, but if for some reason |
|
|
|
|
* an lseek() fails, it might become set to FileUnknownPos. We can struggle |
|
|
|
|
* along without knowing the seek position in many cases, but in some places |
|
|
|
|
* we have to fail if we don't have it. |
|
|
|
|
*/ |
|
|
|
|
#define FileUnknownPos ((off_t) -1) |
|
|
|
|
#define FilePosIsUnknown(pos) ((pos) < 0) |
|
|
|
|
|
|
|
|
|
/* these are the assigned bits in fdstate below: */ |
|
|
|
|
#define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */ |
|
|
|
|
#define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */ |
|
|
|
|
@ -189,7 +180,6 @@ typedef struct vfd |
|
|
|
|
File nextFree; /* link to next free VFD, if in freelist */ |
|
|
|
|
File lruMoreRecently; /* doubly linked recency-of-use list */ |
|
|
|
|
File lruLessRecently; |
|
|
|
|
off_t seekPos; /* current logical file position, or -1 */ |
|
|
|
|
off_t fileSize; /* current size of file (0 if not temporary) */ |
|
|
|
|
char *fileName; /* name of file, or NULL for unused VFD */ |
|
|
|
|
/* NB: fileName is malloc'd, and must be free'd when closing the VFD */ |
|
|
|
|
@ -407,9 +397,7 @@ pg_fdatasync(int fd) |
|
|
|
|
/*
|
|
|
|
|
* pg_flush_data --- advise OS that the described dirty data should be flushed |
|
|
|
|
* |
|
|
|
|
* offset of 0 with nbytes 0 means that the entire file should be flushed; |
|
|
|
|
* in this case, this function may have side-effects on the file's |
|
|
|
|
* seek position! |
|
|
|
|
* offset of 0 with nbytes 0 means that the entire file should be flushed |
|
|
|
|
*/ |
|
|
|
|
void |
|
|
|
|
pg_flush_data(int fd, off_t offset, off_t nbytes) |
|
|
|
|
@ -1029,22 +1017,6 @@ LruDelete(File file) |
|
|
|
|
|
|
|
|
|
vfdP = &VfdCache[file]; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Normally we should know the seek position, but if for some reason we |
|
|
|
|
* have lost track of it, try again to get it. If we still can't get it, |
|
|
|
|
* we have a problem: we will be unable to restore the file seek position |
|
|
|
|
* when and if the file is re-opened. But we can't really throw an error |
|
|
|
|
* and refuse to close the file, or activities such as transaction cleanup |
|
|
|
|
* will be broken. |
|
|
|
|
*/ |
|
|
|
|
if (FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
{ |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); |
|
|
|
|
if (FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
elog(LOG, "could not seek file \"%s\" before closing: %m", |
|
|
|
|
vfdP->fileName); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Close the file. We aren't expecting this to fail; if it does, better |
|
|
|
|
* to leak the FD than to mess up our internal state. |
|
|
|
|
@ -1113,33 +1085,6 @@ LruInsert(File file) |
|
|
|
|
{ |
|
|
|
|
++nfile; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Seek to the right position. We need no special case for seekPos |
|
|
|
|
* equal to FileUnknownPos, as lseek() will certainly reject that |
|
|
|
|
* (thus completing the logic noted in LruDelete() that we will fail |
|
|
|
|
* to re-open a file if we couldn't get its seek position before |
|
|
|
|
* closing). |
|
|
|
|
*/ |
|
|
|
|
if (vfdP->seekPos != (off_t) 0) |
|
|
|
|
{ |
|
|
|
|
if (lseek(vfdP->fd, vfdP->seekPos, SEEK_SET) < 0) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* If we fail to restore the seek position, treat it like an |
|
|
|
|
* open() failure. |
|
|
|
|
*/ |
|
|
|
|
int save_errno = errno; |
|
|
|
|
|
|
|
|
|
elog(LOG, "could not seek file \"%s\" after re-opening: %m", |
|
|
|
|
vfdP->fileName); |
|
|
|
|
(void) close(vfdP->fd); |
|
|
|
|
vfdP->fd = VFD_CLOSED; |
|
|
|
|
--nfile; |
|
|
|
|
errno = save_errno; |
|
|
|
|
return -1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@ -1406,7 +1351,6 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) |
|
|
|
|
/* Saved flags are adjusted to be OK for re-opening file */ |
|
|
|
|
vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); |
|
|
|
|
vfdP->fileMode = fileMode; |
|
|
|
|
vfdP->seekPos = 0; |
|
|
|
|
vfdP->fileSize = 0; |
|
|
|
|
vfdP->fdstate = 0x0; |
|
|
|
|
vfdP->resowner = NULL; |
|
|
|
|
@ -1820,7 +1764,6 @@ FileClose(File file) |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* FilePrefetch - initiate asynchronous read of a given range of the file. |
|
|
|
|
* The logical seek position is unaffected. |
|
|
|
|
* |
|
|
|
|
* Currently the only implementation of this function is using posix_fadvise |
|
|
|
|
* which is the simplest standardized interface that accomplishes this. |
|
|
|
|
@ -1867,10 +1810,6 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) |
|
|
|
|
file, VfdCache[file].fileName, |
|
|
|
|
(int64) offset, (int64) nbytes)); |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Caution: do not call pg_flush_data with nbytes = 0, it could trash the |
|
|
|
|
* file's seek position. We prefer to define that as a no-op here. |
|
|
|
|
*/ |
|
|
|
|
if (nbytes <= 0) |
|
|
|
|
return; |
|
|
|
|
|
|
|
|
|
@ -1884,7 +1823,8 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int |
|
|
|
|
FileRead(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
FileRead(File file, char *buffer, int amount, off_t offset, |
|
|
|
|
uint32 wait_event_info) |
|
|
|
|
{ |
|
|
|
|
int returnCode; |
|
|
|
|
Vfd *vfdP; |
|
|
|
|
@ -1893,7 +1833,7 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
|
|
|
|
|
DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %d %p", |
|
|
|
|
file, VfdCache[file].fileName, |
|
|
|
|
(int64) VfdCache[file].seekPos, |
|
|
|
|
(int64) offset, |
|
|
|
|
amount, buffer)); |
|
|
|
|
|
|
|
|
|
returnCode = FileAccess(file); |
|
|
|
|
@ -1904,16 +1844,10 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
|
|
|
|
|
retry: |
|
|
|
|
pgstat_report_wait_start(wait_event_info); |
|
|
|
|
returnCode = read(vfdP->fd, buffer, amount); |
|
|
|
|
returnCode = pg_pread(vfdP->fd, buffer, amount, offset); |
|
|
|
|
pgstat_report_wait_end(); |
|
|
|
|
|
|
|
|
|
if (returnCode >= 0) |
|
|
|
|
{ |
|
|
|
|
/* if seekPos is unknown, leave it that way */ |
|
|
|
|
if (!FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
vfdP->seekPos += returnCode; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
if (returnCode < 0) |
|
|
|
|
{ |
|
|
|
|
/*
|
|
|
|
|
* Windows may run out of kernel buffers and return "Insufficient |
|
|
|
|
@ -1939,16 +1873,14 @@ retry: |
|
|
|
|
/* OK to retry if interrupted */ |
|
|
|
|
if (errno == EINTR) |
|
|
|
|
goto retry; |
|
|
|
|
|
|
|
|
|
/* Trouble, so assume we don't know the file position anymore */ |
|
|
|
|
vfdP->seekPos = FileUnknownPos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return returnCode; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int |
|
|
|
|
FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
FileWrite(File file, char *buffer, int amount, off_t offset, |
|
|
|
|
uint32 wait_event_info) |
|
|
|
|
{ |
|
|
|
|
int returnCode; |
|
|
|
|
Vfd *vfdP; |
|
|
|
|
@ -1957,7 +1889,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
|
|
|
|
|
DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %d %p", |
|
|
|
|
file, VfdCache[file].fileName, |
|
|
|
|
(int64) VfdCache[file].seekPos, |
|
|
|
|
(int64) offset, |
|
|
|
|
amount, buffer)); |
|
|
|
|
|
|
|
|
|
returnCode = FileAccess(file); |
|
|
|
|
@ -1976,26 +1908,13 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
*/ |
|
|
|
|
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) |
|
|
|
|
{ |
|
|
|
|
off_t newPos; |
|
|
|
|
off_t past_write = offset + amount; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Normally we should know the seek position, but if for some reason |
|
|
|
|
* we have lost track of it, try again to get it. Here, it's fine to |
|
|
|
|
* throw an error if we still can't get it. |
|
|
|
|
*/ |
|
|
|
|
if (FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
{ |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, (off_t) 0, SEEK_CUR); |
|
|
|
|
if (FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
elog(ERROR, "could not seek file \"%s\": %m", vfdP->fileName); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
newPos = vfdP->seekPos + amount; |
|
|
|
|
if (newPos > vfdP->fileSize) |
|
|
|
|
if (past_write > vfdP->fileSize) |
|
|
|
|
{ |
|
|
|
|
uint64 newTotal = temporary_files_size; |
|
|
|
|
|
|
|
|
|
newTotal += newPos - vfdP->fileSize; |
|
|
|
|
newTotal += past_write - vfdP->fileSize; |
|
|
|
|
if (newTotal > (uint64) temp_file_limit * (uint64) 1024) |
|
|
|
|
ereport(ERROR, |
|
|
|
|
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), |
|
|
|
|
@ -2007,7 +1926,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) |
|
|
|
|
retry: |
|
|
|
|
errno = 0; |
|
|
|
|
pgstat_report_wait_start(wait_event_info); |
|
|
|
|
returnCode = write(vfdP->fd, buffer, amount); |
|
|
|
|
returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset); |
|
|
|
|
pgstat_report_wait_end(); |
|
|
|
|
|
|
|
|
|
/* if write didn't set errno, assume problem is no disk space */ |
|
|
|
|
@ -2016,10 +1935,6 @@ retry: |
|
|
|
|
|
|
|
|
|
if (returnCode >= 0) |
|
|
|
|
{ |
|
|
|
|
/* if seekPos is unknown, leave it that way */ |
|
|
|
|
if (!FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
vfdP->seekPos += returnCode; |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Maintain fileSize and temporary_files_size if it's a temp file. |
|
|
|
|
* |
|
|
|
|
@ -2029,12 +1944,12 @@ retry: |
|
|
|
|
*/ |
|
|
|
|
if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) |
|
|
|
|
{ |
|
|
|
|
off_t newPos = vfdP->seekPos; |
|
|
|
|
off_t past_write = offset + amount; |
|
|
|
|
|
|
|
|
|
if (newPos > vfdP->fileSize) |
|
|
|
|
if (past_write > vfdP->fileSize) |
|
|
|
|
{ |
|
|
|
|
temporary_files_size += newPos - vfdP->fileSize; |
|
|
|
|
vfdP->fileSize = newPos; |
|
|
|
|
temporary_files_size += past_write - vfdP->fileSize; |
|
|
|
|
vfdP->fileSize = past_write; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
@ -2060,9 +1975,6 @@ retry: |
|
|
|
|
/* OK to retry if interrupted */ |
|
|
|
|
if (errno == EINTR) |
|
|
|
|
goto retry; |
|
|
|
|
|
|
|
|
|
/* Trouble, so assume we don't know the file position anymore */ |
|
|
|
|
vfdP->seekPos = FileUnknownPos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return returnCode; |
|
|
|
|
@ -2090,92 +2002,25 @@ FileSync(File file, uint32 wait_event_info) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
off_t |
|
|
|
|
FileSeek(File file, off_t offset, int whence) |
|
|
|
|
FileSize(File file) |
|
|
|
|
{ |
|
|
|
|
Vfd *vfdP; |
|
|
|
|
|
|
|
|
|
Assert(FileIsValid(file)); |
|
|
|
|
|
|
|
|
|
DO_DB(elog(LOG, "FileSeek: %d (%s) " INT64_FORMAT " " INT64_FORMAT " %d", |
|
|
|
|
file, VfdCache[file].fileName, |
|
|
|
|
(int64) VfdCache[file].seekPos, |
|
|
|
|
(int64) offset, whence)); |
|
|
|
|
DO_DB(elog(LOG, "FileSize %d (%s)", |
|
|
|
|
file, VfdCache[file].fileName)); |
|
|
|
|
|
|
|
|
|
vfdP = &VfdCache[file]; |
|
|
|
|
|
|
|
|
|
if (FileIsNotOpen(file)) |
|
|
|
|
{ |
|
|
|
|
switch (whence) |
|
|
|
|
{ |
|
|
|
|
case SEEK_SET: |
|
|
|
|
if (offset < 0) |
|
|
|
|
{ |
|
|
|
|
errno = EINVAL; |
|
|
|
|
return (off_t) -1; |
|
|
|
|
} |
|
|
|
|
vfdP->seekPos = offset; |
|
|
|
|
break; |
|
|
|
|
case SEEK_CUR: |
|
|
|
|
if (FilePosIsUnknown(vfdP->seekPos) || |
|
|
|
|
vfdP->seekPos + offset < 0) |
|
|
|
|
{ |
|
|
|
|
errno = EINVAL; |
|
|
|
|
return (off_t) -1; |
|
|
|
|
} |
|
|
|
|
vfdP->seekPos += offset; |
|
|
|
|
break; |
|
|
|
|
case SEEK_END: |
|
|
|
|
if (FileAccess(file) < 0) |
|
|
|
|
return (off_t) -1; |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, offset, whence); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
elog(ERROR, "invalid whence: %d", whence); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
if (FileAccess(file) < 0) |
|
|
|
|
return (off_t) -1; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
switch (whence) |
|
|
|
|
{ |
|
|
|
|
case SEEK_SET: |
|
|
|
|
if (offset < 0) |
|
|
|
|
{ |
|
|
|
|
errno = EINVAL; |
|
|
|
|
return (off_t) -1; |
|
|
|
|
} |
|
|
|
|
if (vfdP->seekPos != offset) |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, offset, whence); |
|
|
|
|
break; |
|
|
|
|
case SEEK_CUR: |
|
|
|
|
if (offset != 0 || FilePosIsUnknown(vfdP->seekPos)) |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, offset, whence); |
|
|
|
|
break; |
|
|
|
|
case SEEK_END: |
|
|
|
|
vfdP->seekPos = lseek(vfdP->fd, offset, whence); |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
elog(ERROR, "invalid whence: %d", whence); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return vfdP->seekPos; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* XXX not actually used but here for completeness |
|
|
|
|
*/ |
|
|
|
|
#ifdef NOT_USED |
|
|
|
|
off_t |
|
|
|
|
FileTell(File file) |
|
|
|
|
{ |
|
|
|
|
Assert(FileIsValid(file)); |
|
|
|
|
DO_DB(elog(LOG, "FileTell %d (%s)", |
|
|
|
|
file, VfdCache[file].fileName)); |
|
|
|
|
return VfdCache[file].seekPos; |
|
|
|
|
return lseek(VfdCache[file].fd, 0, SEEK_END); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
int |
|
|
|
|
FileTruncate(File file, off_t offset, uint32 wait_event_info) |
|
|
|
|
|