1
/* File object implementation */
3
#define PY_SSIZE_T_CLEAN
5
#include "structmember.h"
7
#ifdef HAVE_SYS_TYPES_H
9
#endif /* HAVE_SYS_TYPES_H */
12
#define fileno _fileno
13
/* can simulate truncate with Win32 API functions; see file_truncate */
14
#define HAVE_FTRUNCATE
15
#define WIN32_LEAN_AND_MEAN
20
/* Need GetVersion to see if on NT so safe to use _wfopen */
21
#define WIN32_LEAN_AND_MEAN
25
#if defined(PYOS_OS2) && defined(PYCC_GCC)
29
#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
31
#ifndef DONT_HAVE_ERRNO_H
35
#ifdef HAVE_GETC_UNLOCKED
36
#define GETC(f) getc_unlocked(f)
37
#define FLOCKFILE(f) flockfile(f)
38
#define FUNLOCKFILE(f) funlockfile(f)
40
#define GETC(f) getc(f)
42
#define FUNLOCKFILE(f)
45
/* Bits in f_newlinetypes */
46
#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47
#define NEWLINE_CR 1 /* \r newline seen */
48
#define NEWLINE_LF 2 /* \n newline seen */
49
#define NEWLINE_CRLF 4 /* \r\n newline seen */
52
* These macros release the GIL while preventing the f_close() function being
53
* called in the interval between them. For that purpose, a running total of
54
* the number of currently running unlocked code sections is kept in
55
* the unlocked_count field of the PyFileObject. The close() method raises
56
* an IOError if that field is non-zero. See issue #815646, #595601.
59
#define FILE_BEGIN_ALLOW_THREADS(fobj) \
61
fobj->unlocked_count++; \
62
Py_BEGIN_ALLOW_THREADS
64
#define FILE_END_ALLOW_THREADS(fobj) \
65
Py_END_ALLOW_THREADS \
66
fobj->unlocked_count--; \
67
assert(fobj->unlocked_count >= 0); \
70
#define FILE_ABORT_ALLOW_THREADS(fobj) \
72
fobj->unlocked_count--; \
73
assert(fobj->unlocked_count >= 0);
80
PyFile_AsFile(PyObject *f)
82
if (f == NULL || !PyFile_Check(f))
85
return ((PyFileObject *)f)->f_fp;
88
void PyFile_IncUseCount(PyFileObject *fobj)
90
fobj->unlocked_count++;
93
void PyFile_DecUseCount(PyFileObject *fobj)
95
fobj->unlocked_count--;
96
assert(fobj->unlocked_count >= 0);
100
PyFile_Name(PyObject *f)
102
if (f == NULL || !PyFile_Check(f))
105
return ((PyFileObject *)f)->f_name;
108
/* This is a safe wrapper around PyObject_Print to print to the FILE
109
of a PyFileObject. PyObject_Print releases the GIL but knows nothing
110
about PyFileObject. */
112
file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
115
PyFile_IncUseCount(f);
116
result = PyObject_Print(op, f->f_fp, flags);
117
PyFile_DecUseCount(f);
121
/* On Unix, fopen will succeed for directories.
122
In Python, there should be no file objects referring to
123
directories, so we need a check. */
126
dircheck(PyFileObject* f)
128
#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
132
if (fstat(fileno(f->f_fp), &buf) == 0 &&
133
S_ISDIR(buf.st_mode)) {
134
char *msg = strerror(EISDIR);
135
PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
137
PyErr_SetObject(PyExc_IOError, exc);
147
fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
148
int (*close)(FILE *))
150
assert(name != NULL);
152
assert(PyFile_Check(f));
153
assert(f->f_fp == NULL);
155
Py_DECREF(f->f_name);
156
Py_DECREF(f->f_mode);
157
Py_DECREF(f->f_encoding);
158
Py_DECREF(f->f_errors);
163
f->f_mode = PyString_FromString(mode);
167
f->f_binary = strchr(mode,'b') != NULL;
169
f->f_univ_newline = (strchr(mode, 'U') != NULL);
170
f->f_newlinetypes = NEWLINE_UNKNOWN;
173
f->f_encoding = Py_None;
175
f->f_errors = Py_None;
177
if (f->f_mode == NULL)
181
return (PyObject *) f;
184
/* check for known incorrect mode strings - problem is, platforms are
185
free to accept any mode characters they like and are supposed to
186
ignore stuff they don't understand... write or append mode with
187
universal newline support is expressly forbidden by PEP 278.
188
Additionally, remove the 'U' from the mode string as platforms
189
won't know what it is. Non-zero return signals an exception */
191
_PyFile_SanitizeMode(char *mode)
194
size_t len = strlen(mode);
197
PyErr_SetString(PyExc_ValueError, "empty mode string");
201
upos = strchr(mode, 'U');
203
memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
205
if (mode[0] == 'w' || mode[0] == 'a') {
206
PyErr_Format(PyExc_ValueError, "universal newline "
207
"mode can only be used with modes "
208
"starting with 'r'");
212
if (mode[0] != 'r') {
213
memmove(mode+1, mode, strlen(mode)+1);
217
if (!strchr(mode, 'b')) {
218
memmove(mode+2, mode+1, strlen(mode));
221
} else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
222
PyErr_Format(PyExc_ValueError, "mode string must begin with "
223
"one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
231
open_the_file(PyFileObject *f, char *name, char *mode)
235
assert(PyFile_Check(f));
237
/* windows ignores the passed name in order to support Unicode */
238
assert(f->f_name != NULL);
240
assert(name != NULL);
242
assert(mode != NULL);
243
assert(f->f_fp == NULL);
245
/* probably need to replace 'U' by 'rb' */
246
newmode = PyMem_MALLOC(strlen(mode) + 3);
251
strcpy(newmode, mode);
253
if (_PyFile_SanitizeMode(newmode)) {
258
/* rexec.py can't stop a user from getting the file() constructor --
259
all they have to do is get *any* file object f, and then do
260
type(f). Here we prevent them from doing damage with it. */
261
if (PyEval_GetRestricted()) {
262
PyErr_SetString(PyExc_IOError,
263
"file() constructor not accessible in restricted mode");
270
if (PyUnicode_Check(f->f_name)) {
272
wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
273
if (f->f_name && wmode) {
274
FILE_BEGIN_ALLOW_THREADS(f)
275
/* PyUnicode_AS_UNICODE OK without thread
276
lock as it is a simple dereference. */
277
f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
278
PyUnicode_AS_UNICODE(wmode));
279
FILE_END_ALLOW_THREADS(f)
284
if (NULL == f->f_fp && NULL != name) {
285
FILE_BEGIN_ALLOW_THREADS(f)
286
f->f_fp = fopen(name, newmode);
287
FILE_END_ALLOW_THREADS(f)
290
if (f->f_fp == NULL) {
291
#if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
292
/* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
293
* across all Windows flavors. When it sets EINVAL varies
294
* across Windows flavors, the exact conditions aren't
295
* documented, and the answer lies in the OS's implementation
296
* of Win32's CreateFile function (whose source is secret).
297
* Seems the best we can do is map EINVAL to ENOENT.
298
* Starting with Visual Studio .NET 2005, EINVAL is correctly
299
* set by our CRT error handler (set in exceptions.c.)
301
if (errno == 0) /* bad mode string */
303
else if (errno == EINVAL) /* unknown, but not a mode string */
306
/* EINVAL is returned when an invalid filename or
307
* an invalid mode is supplied. */
308
if (errno == EINVAL) {
311
PyOS_snprintf(message, 100,
312
"invalid mode ('%.50s') or filename", mode);
313
v = Py_BuildValue("(isO)", errno, message, f->f_name);
315
PyErr_SetObject(PyExc_IOError, v);
320
PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
329
return (PyObject *)f;
333
close_the_file(PyFileObject *f)
336
int (*local_close)(FILE *);
337
FILE *local_fp = f->f_fp;
338
if (local_fp != NULL) {
339
local_close = f->f_close;
340
if (local_close != NULL && f->unlocked_count > 0) {
341
if (f->ob_refcnt > 0) {
342
PyErr_SetString(PyExc_IOError,
343
"close() called during concurrent "
344
"operation on the same file object.");
346
/* This should not happen unless someone is
347
* carelessly playing with the PyFileObject
348
* struct fields and/or its associated FILE
350
PyErr_SetString(PyExc_SystemError,
351
"PyFileObject locking error in "
352
"destructor (refcnt <= 0 at close).");
356
/* NULL out the FILE pointer before releasing the GIL, because
357
* it will not be valid anymore after the close() function is
360
if (local_close != NULL) {
361
Py_BEGIN_ALLOW_THREADS
363
sts = (*local_close)(local_fp);
366
return PyErr_SetFromErrno(PyExc_IOError);
368
return PyInt_FromLong((long)sts);
375
PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
377
PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
380
PyObject *o_name = PyString_FromString(name);
383
if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
389
return (PyObject *) f;
393
PyFile_FromString(char *name, char *mode)
395
extern int fclose(FILE *);
398
f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
400
if (open_the_file(f, name, mode) == NULL) {
405
return (PyObject *)f;
409
PyFile_SetBufSize(PyObject *f, int bufsize)
411
PyFileObject *file = (PyFileObject *)f;
432
if (type == _IONBF) {
433
PyMem_Free(file->f_setbuf);
434
file->f_setbuf = NULL;
436
file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
440
setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
441
#else /* !HAVE_SETVBUF */
442
setbuf(file->f_fp, file->f_setbuf);
443
#endif /* !HAVE_SETVBUF */
447
/* Set the encoding used to output Unicode strings.
448
Return 1 on success, 0 on failure. */
451
PyFile_SetEncoding(PyObject *f, const char *enc)
453
return PyFile_SetEncodingAndErrors(f, enc, NULL);
457
PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
459
PyFileObject *file = (PyFileObject*)f;
460
PyObject *str, *oerrors;
462
assert(PyFile_Check(f));
463
str = PyString_FromString(enc);
467
oerrors = PyString_FromString(errors);
476
Py_DECREF(file->f_encoding);
477
file->f_encoding = str;
478
Py_DECREF(file->f_errors);
479
file->f_errors = oerrors;
486
PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
490
/* Refuse regular file I/O if there's data in the iteration-buffer.
491
* Mixing them would cause data to arrive out of order, as the read*
492
* methods don't use the iteration buffer. */
494
err_iterbuffered(void)
496
PyErr_SetString(PyExc_ValueError,
497
"Mixing iteration and read methods would lose data");
501
static void drop_readahead(PyFileObject *);
506
file_dealloc(PyFileObject *f)
509
if (f->weakreflist != NULL)
510
PyObject_ClearWeakRefs((PyObject *) f);
511
ret = close_the_file(f);
513
PySys_WriteStderr("close failed in file object destructor:\n");
519
PyMem_Free(f->f_setbuf);
520
Py_XDECREF(f->f_name);
521
Py_XDECREF(f->f_mode);
522
Py_XDECREF(f->f_encoding);
523
Py_XDECREF(f->f_errors);
525
Py_TYPE(f)->tp_free((PyObject *)f);
529
file_repr(PyFileObject *f)
531
if (PyUnicode_Check(f->f_name)) {
532
#ifdef Py_USING_UNICODE
533
PyObject *ret = NULL;
534
PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
535
const char *name_str = name ? PyString_AsString(name) : "?";
536
ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
537
f->f_fp == NULL ? "closed" : "open",
539
PyString_AsString(f->f_mode),
545
return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
546
f->f_fp == NULL ? "closed" : "open",
547
PyString_AsString(f->f_name),
548
PyString_AsString(f->f_mode),
554
file_close(PyFileObject *f)
556
PyObject *sts = close_the_file(f);
557
PyMem_Free(f->f_setbuf);
563
/* Our very own off_t-like type, 64-bit if possible */
564
#if !defined(HAVE_LARGEFILE_SUPPORT)
565
typedef off_t Py_off_t;
566
#elif SIZEOF_OFF_T >= 8
567
typedef off_t Py_off_t;
568
#elif SIZEOF_FPOS_T >= 8
569
typedef fpos_t Py_off_t;
571
#error "Large file support, but neither off_t nor fpos_t is large enough."
575
/* a portable fseek() function
576
return 0 on success, non-zero on failure (with errno set) */
578
_portable_fseek(FILE *fp, Py_off_t offset, int whence)
580
#if !defined(HAVE_LARGEFILE_SUPPORT)
581
return fseek(fp, offset, whence);
582
#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
583
return fseeko(fp, offset, whence);
584
#elif defined(HAVE_FSEEK64)
585
return fseek64(fp, offset, whence);
586
#elif defined(__BEOS__)
587
return _fseek(fp, offset, whence);
588
#elif SIZEOF_FPOS_T >= 8
589
/* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
590
and fgetpos() to implement fseek()*/
596
if (_lseeki64(fileno(fp), 0, 2) == -1)
599
if (fseek(fp, 0, SEEK_END) != 0)
604
if (fgetpos(fp, &pos) != 0)
608
/* case SEEK_SET: break; */
610
return fsetpos(fp, &offset);
612
#error "Large file support, but no way to fseek."
617
/* a portable ftell() function
618
Return -1 on failure with errno set appropriately, current file
619
position on success */
621
_portable_ftell(FILE* fp)
623
#if !defined(HAVE_LARGEFILE_SUPPORT)
625
#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
627
#elif defined(HAVE_FTELL64)
629
#elif SIZEOF_FPOS_T >= 8
631
if (fgetpos(fp, &pos) != 0)
635
#error "Large file support, but no way to ftell."
641
file_seek(PyFileObject *f, PyObject *args)
646
PyObject *offobj, *off_index;
652
if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
654
off_index = PyNumber_Index(offobj);
656
if (!PyFloat_Check(offobj))
658
/* Deprecated in 2.6 */
660
if (PyErr_WarnEx(PyExc_DeprecationWarning,
661
"integer argument expected, got float",
667
#if !defined(HAVE_LARGEFILE_SUPPORT)
668
offset = PyInt_AsLong(off_index);
670
offset = PyLong_Check(off_index) ?
671
PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
673
Py_DECREF(off_index);
674
if (PyErr_Occurred())
677
FILE_BEGIN_ALLOW_THREADS(f)
679
ret = _portable_fseek(f->f_fp, offset, whence);
680
FILE_END_ALLOW_THREADS(f)
683
PyErr_SetFromErrno(PyExc_IOError);
693
#ifdef HAVE_FTRUNCATE
695
file_truncate(PyFileObject *f, PyObject *args)
698
PyObject *newsizeobj = NULL;
704
if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
707
/* Get current file position. If the file happens to be open for
708
* update and the last operation was an input operation, C doesn't
709
* define what the later fflush() will do, but we promise truncate()
710
* won't change the current position (and fflush() *does* change it
711
* then at least on Windows). The easiest thing is to capture
712
* current pos now and seek back to it at the end.
714
FILE_BEGIN_ALLOW_THREADS(f)
716
initialpos = _portable_ftell(f->f_fp);
717
FILE_END_ALLOW_THREADS(f)
718
if (initialpos == -1)
721
/* Set newsize to current postion if newsizeobj NULL, else to the
724
if (newsizeobj != NULL) {
725
#if !defined(HAVE_LARGEFILE_SUPPORT)
726
newsize = PyInt_AsLong(newsizeobj);
728
newsize = PyLong_Check(newsizeobj) ?
729
PyLong_AsLongLong(newsizeobj) :
730
PyInt_AsLong(newsizeobj);
732
if (PyErr_Occurred())
735
else /* default to current position */
736
newsize = initialpos;
738
/* Flush the stream. We're mixing stream-level I/O with lower-level
739
* I/O, and a flush may be necessary to synch both platform views
740
* of the current file state.
742
FILE_BEGIN_ALLOW_THREADS(f)
744
ret = fflush(f->f_fp);
745
FILE_END_ALLOW_THREADS(f)
750
/* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
751
so don't even try using it. */
755
/* Have to move current pos to desired endpoint on Windows. */
756
FILE_BEGIN_ALLOW_THREADS(f)
758
ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
759
FILE_END_ALLOW_THREADS(f)
763
/* Truncate. Note that this may grow the file! */
764
FILE_BEGIN_ALLOW_THREADS(f)
766
hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
767
ret = hFile == (HANDLE)-1;
769
ret = SetEndOfFile(hFile) == 0;
773
FILE_END_ALLOW_THREADS(f)
778
FILE_BEGIN_ALLOW_THREADS(f)
780
ret = ftruncate(fileno(f->f_fp), newsize);
781
FILE_END_ALLOW_THREADS(f)
784
#endif /* !MS_WINDOWS */
786
/* Restore original file position. */
787
FILE_BEGIN_ALLOW_THREADS(f)
789
ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
790
FILE_END_ALLOW_THREADS(f)
798
PyErr_SetFromErrno(PyExc_IOError);
802
#endif /* HAVE_FTRUNCATE */
805
file_tell(PyFileObject *f)
811
FILE_BEGIN_ALLOW_THREADS(f)
813
pos = _portable_ftell(f->f_fp);
814
FILE_END_ALLOW_THREADS(f)
817
PyErr_SetFromErrno(PyExc_IOError);
821
if (f->f_skipnextlf) {
825
f->f_newlinetypes |= NEWLINE_CRLF;
828
} else if (c != EOF) ungetc(c, f->f_fp);
830
#if !defined(HAVE_LARGEFILE_SUPPORT)
831
return PyInt_FromLong(pos);
833
return PyLong_FromLongLong(pos);
838
file_fileno(PyFileObject *f)
842
return PyInt_FromLong((long) fileno(f->f_fp));
846
file_flush(PyFileObject *f)
852
FILE_BEGIN_ALLOW_THREADS(f)
854
res = fflush(f->f_fp);
855
FILE_END_ALLOW_THREADS(f)
857
PyErr_SetFromErrno(PyExc_IOError);
866
file_isatty(PyFileObject *f)
871
FILE_BEGIN_ALLOW_THREADS(f)
872
res = isatty((int)fileno(f->f_fp));
873
FILE_END_ALLOW_THREADS(f)
874
return PyBool_FromLong(res);
879
#define SMALLCHUNK 8192
881
#define SMALLCHUNK BUFSIZ
885
#define BIGCHUNK (512 * 32)
887
#define BIGCHUNK (512 * 1024)
891
new_buffersize(PyFileObject *f, size_t currentsize)
896
if (fstat(fileno(f->f_fp), &st) == 0) {
898
/* The following is not a bug: we really need to call lseek()
899
*and* ftell(). The reason is that some stdio libraries
900
mistakenly flush their buffer when ftell() is called and
901
the lseek() call it makes fails, thereby throwing away
902
data that cannot be recovered in any way. To avoid this,
903
we first test lseek(), and only call ftell() if lseek()
904
works. We can't use the lseek() value either, because we
905
need to take the amount of buffered data into account.
906
(Yet another reason why stdio stinks. :-) */
907
pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
909
pos = ftell(f->f_fp);
913
if (end > pos && pos >= 0)
914
return currentsize + end - pos + 1;
915
/* Add 1 so if the file were to grow we'd notice. */
918
if (currentsize > SMALLCHUNK) {
919
/* Keep doubling until we reach BIGCHUNK;
920
then keep adding BIGCHUNK. */
921
if (currentsize <= BIGCHUNK)
922
return currentsize + currentsize;
924
return currentsize + BIGCHUNK;
926
return currentsize + SMALLCHUNK;
929
#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
930
#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
933
#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
936
#define BLOCKED_ERRNO(x) ((x) == EAGAIN)
938
#define BLOCKED_ERRNO(x) 0
944
file_read(PyFileObject *f, PyObject *args)
946
long bytesrequested = -1;
947
size_t bytesread, buffersize, chunksize;
952
/* refuse to mix with f.next() */
953
if (f->f_buf != NULL &&
954
(f->f_bufend - f->f_bufptr) > 0 &&
956
return err_iterbuffered();
957
if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
959
if (bytesrequested < 0)
960
buffersize = new_buffersize(f, (size_t)0);
962
buffersize = bytesrequested;
963
if (buffersize > PY_SSIZE_T_MAX) {
964
PyErr_SetString(PyExc_OverflowError,
965
"requested number of bytes is more than a Python string can hold");
968
v = PyString_FromStringAndSize((char *)NULL, buffersize);
973
FILE_BEGIN_ALLOW_THREADS(f)
975
chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
976
buffersize - bytesread, f->f_fp, (PyObject *)f);
977
FILE_END_ALLOW_THREADS(f)
978
if (chunksize == 0) {
979
if (!ferror(f->f_fp))
982
/* When in non-blocking mode, data shouldn't
983
* be discarded if a blocking signal was
984
* received. That will also happen if
985
* chunksize != 0, but bytesread < buffersize. */
986
if (bytesread > 0 && BLOCKED_ERRNO(errno))
988
PyErr_SetFromErrno(PyExc_IOError);
992
bytesread += chunksize;
993
if (bytesread < buffersize) {
997
if (bytesrequested < 0) {
998
buffersize = new_buffersize(f, buffersize);
999
if (_PyString_Resize(&v, buffersize) < 0)
1002
/* Got what was requested. */
1006
if (bytesread != buffersize)
1007
_PyString_Resize(&v, bytesread);
1012
file_readinto(PyFileObject *f, PyObject *args)
1016
Py_ssize_t ndone, nnow;
1019
if (f->f_fp == NULL)
1020
return err_closed();
1021
/* refuse to mix with f.next() */
1022
if (f->f_buf != NULL &&
1023
(f->f_bufend - f->f_bufptr) > 0 &&
1024
f->f_buf[0] != '\0')
1025
return err_iterbuffered();
1026
if (!PyArg_ParseTuple(args, "w*", &pbuf))
1032
FILE_BEGIN_ALLOW_THREADS(f)
1034
nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1036
FILE_END_ALLOW_THREADS(f)
1038
if (!ferror(f->f_fp))
1040
PyErr_SetFromErrno(PyExc_IOError);
1042
PyBuffer_Release(&pbuf);
1048
PyBuffer_Release(&pbuf);
1049
return PyInt_FromSsize_t(ndone);
1052
/**************************************************************************
1053
Routine to get next line using platform fgets().
1057
+ MS threadsafe getc is very slow (multiple layers of function calls before+
1058
after each character, to lock+unlock the stream).
1059
+ The stream-locking functions are MS-internal -- can't access them from user
1061
+ There's nothing Tim could find in the MS C or platform SDK libraries that
1062
can worm around this.
1063
+ MS fgets locks/unlocks only once per line; it's the only hook we have.
1065
So we use fgets for speed(!), despite that it's painful.
1067
MS realloc is also slow.
1069
Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1073
Tru64 Unix getline_via_fgets significantly faster
1075
CAUTION: The C std isn't clear about this: in those cases where fgets
1076
writes something into the buffer, can it write into any position beyond the
1077
required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
1078
known on which it does; and it would be a strange way to code fgets. Still,
1079
getline_via_fgets may not work correctly if it does. The std test
1080
test_bufio.py should fail if platform fgets() routinely writes beyond the
1081
trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1082
**************************************************************************/
1084
/* Use this routine if told to, or by default on non-get_unlocked()
1085
* platforms unless told not to. Yikes! Let's spell that out:
1086
* On a platform with getc_unlocked():
1087
* By default, use getc_unlocked().
1088
* If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1089
* On a platform without getc_unlocked():
1090
* By default, use fgets().
1091
* If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1093
#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1094
#define USE_FGETS_IN_GETLINE
1097
#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1098
#undef USE_FGETS_IN_GETLINE
1101
#ifdef USE_FGETS_IN_GETLINE
1103
getline_via_fgets(PyFileObject *f, FILE *fp)
1105
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
1106
* no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
1107
* to fill this much of the buffer with a known value in order to figure out
1108
* how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
1109
* than "most" lines, we waste time filling unused buffer slots. 100 is
1110
* surely adequate for most peoples' email archives, chewing over source code,
1111
* etc -- "regular old text files".
1112
* MAXBUFSIZE is the maximum line length that lets us get away with the less
1113
* fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1114
* cautions about boosting that. 300 was chosen because the worst real-life
1115
* text-crunching job reported on Python-Dev was a mail-log crawler where over
1116
* half the lines were 254 chars.
1118
#define INITBUFSIZE 100
1119
#define MAXBUFSIZE 300
1121
char buf[MAXBUFSIZE];
1122
PyObject* v; /* the string object result */
1123
char* pvfree; /* address of next free slot */
1124
char* pvend; /* address one beyond last free slot */
1125
size_t nfree; /* # of free buffer slots; pvend-pvfree */
1126
size_t total_v_size; /* total # of slots in buffer */
1127
size_t increment; /* amount to increment the buffer */
1130
/* Optimize for normal case: avoid _PyString_Resize if at all
1131
* possible via first reading into stack buffer "buf".
1133
total_v_size = INITBUFSIZE; /* start small and pray */
1136
FILE_BEGIN_ALLOW_THREADS(f)
1137
pvend = buf + total_v_size;
1138
nfree = pvend - pvfree;
1139
memset(pvfree, '\n', nfree);
1140
assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1141
p = fgets(pvfree, (int)nfree, fp);
1142
FILE_END_ALLOW_THREADS(f)
1146
if (PyErr_CheckSignals())
1148
v = PyString_FromStringAndSize(buf, pvfree - buf);
1151
/* fgets read *something* */
1152
p = memchr(pvfree, '\n', nfree);
1154
/* Did the \n come from fgets or from us?
1155
* Since fgets stops at the first \n, and then writes
1156
* \0, if it's from fgets a \0 must be next. But if
1157
* that's so, it could not have come from us, since
1158
* the \n's we filled the buffer with have only more
1159
* \n's to the right.
1161
if (p+1 < pvend && *(p+1) == '\0') {
1162
/* It's from fgets: we win! In particular,
1163
* we haven't done any mallocs yet, and can
1164
* build the final result on the first try.
1166
++p; /* include \n from fgets */
1169
/* Must be from us: fgets didn't fill the
1170
* buffer and didn't find a newline, so it
1171
* must be the last and newline-free line of
1174
assert(p > pvfree && *(p-1) == '\0');
1175
--p; /* don't include \0 from fgets */
1177
v = PyString_FromStringAndSize(buf, p - buf);
1180
/* yuck: fgets overwrote all the newlines, i.e. the entire
1181
* buffer. So this line isn't over yet, or maybe it is but
1182
* we're exactly at EOF. If we haven't already, try using the
1183
* rest of the stack buffer.
1185
assert(*(pvend-1) == '\0');
1186
if (pvfree == buf) {
1187
pvfree = pvend - 1; /* overwrite trailing null */
1188
total_v_size = MAXBUFSIZE;
1194
/* The stack buffer isn't big enough; malloc a string object and read
1197
total_v_size = MAXBUFSIZE << 1;
1198
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1201
/* copy over everything except the last null byte */
1202
memcpy(BUF(v), buf, MAXBUFSIZE-1);
1203
pvfree = BUF(v) + MAXBUFSIZE - 1;
1205
/* Keep reading stuff into v; if it ever ends successfully, break
1206
* after setting p one beyond the end of the line. The code here is
1207
* very much like the code above, except reads into v's buffer; see
1208
* the code above for detailed comments about the logic.
1211
FILE_BEGIN_ALLOW_THREADS(f)
1212
pvend = BUF(v) + total_v_size;
1213
nfree = pvend - pvfree;
1214
memset(pvfree, '\n', nfree);
1215
assert(nfree < INT_MAX);
1216
p = fgets(pvfree, (int)nfree, fp);
1217
FILE_END_ALLOW_THREADS(f)
1221
if (PyErr_CheckSignals()) {
1228
p = memchr(pvfree, '\n', nfree);
1230
if (p+1 < pvend && *(p+1) == '\0') {
1231
/* \n came from fgets */
1235
/* \n came from us; last line of file, no newline */
1236
assert(p > pvfree && *(p-1) == '\0');
1240
/* expand buffer and try again */
1241
assert(*(pvend-1) == '\0');
1242
increment = total_v_size >> 2; /* mild exponential growth */
1243
prev_v_size = total_v_size;
1244
total_v_size += increment;
1245
/* check for overflow */
1246
if (total_v_size <= prev_v_size ||
1247
total_v_size > PY_SSIZE_T_MAX) {
1248
PyErr_SetString(PyExc_OverflowError,
1249
"line is longer than a Python string can hold");
1253
if (_PyString_Resize(&v, (int)total_v_size) < 0)
1255
/* overwrite the trailing null byte */
1256
pvfree = BUF(v) + (prev_v_size - 1);
1258
if (BUF(v) + total_v_size != p)
1259
_PyString_Resize(&v, p - BUF(v));
1264
#endif /* ifdef USE_FGETS_IN_GETLINE */
1266
/* Internal routine to get a line.
1267
Size argument interpretation:
1269
<= 0: read arbitrary line
1273
get_line(PyFileObject *f, int n)
1278
size_t total_v_size; /* total # of slots in buffer */
1279
size_t used_v_size; /* # used slots in buffer */
1280
size_t increment; /* amount to increment the buffer */
1282
int newlinetypes = f->f_newlinetypes;
1283
int skipnextlf = f->f_skipnextlf;
1284
int univ_newline = f->f_univ_newline;
1286
#if defined(USE_FGETS_IN_GETLINE)
1287
if (n <= 0 && !univ_newline )
1288
return getline_via_fgets(f, fp);
1290
total_v_size = n > 0 ? n : 100;
1291
v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1295
end = buf + total_v_size;
1298
FILE_BEGIN_ALLOW_THREADS(f)
1301
c = 'x'; /* Shut up gcc warning */
1302
while ( buf != end && (c = GETC(fp)) != EOF ) {
1306
/* Seeing a \n here with
1307
* skipnextlf true means we
1310
newlinetypes |= NEWLINE_CRLF;
1312
if (c == EOF) break;
1314
newlinetypes |= NEWLINE_CR;
1320
} else if ( c == '\n')
1321
newlinetypes |= NEWLINE_LF;
1323
if (c == '\n') break;
1325
if ( c == EOF && skipnextlf )
1326
newlinetypes |= NEWLINE_CR;
1327
} else /* If not universal newlines use the normal loop */
1328
while ((c = GETC(fp)) != EOF &&
1329
(*buf++ = c) != '\n' &&
1333
FILE_END_ALLOW_THREADS(f)
1334
f->f_newlinetypes = newlinetypes;
1335
f->f_skipnextlf = skipnextlf;
1340
PyErr_SetFromErrno(PyExc_IOError);
1346
if (PyErr_CheckSignals()) {
1352
/* Must be because buf == end */
1355
used_v_size = total_v_size;
1356
increment = total_v_size >> 2; /* mild exponential growth */
1357
total_v_size += increment;
1358
if (total_v_size > PY_SSIZE_T_MAX) {
1359
PyErr_SetString(PyExc_OverflowError,
1360
"line is longer than a Python string can hold");
1364
if (_PyString_Resize(&v, total_v_size) < 0)
1366
buf = BUF(v) + used_v_size;
1367
end = BUF(v) + total_v_size;
1370
used_v_size = buf - BUF(v);
1371
if (used_v_size != total_v_size)
1372
_PyString_Resize(&v, used_v_size);
1376
/* External C interface */
1379
PyFile_GetLine(PyObject *f, int n)
1384
PyErr_BadInternalCall();
1388
if (PyFile_Check(f)) {
1389
PyFileObject *fo = (PyFileObject *)f;
1390
if (fo->f_fp == NULL)
1391
return err_closed();
1392
/* refuse to mix with f.next() */
1393
if (fo->f_buf != NULL &&
1394
(fo->f_bufend - fo->f_bufptr) > 0 &&
1395
fo->f_buf[0] != '\0')
1396
return err_iterbuffered();
1397
result = get_line(fo, n);
1403
reader = PyObject_GetAttrString(f, "readline");
1407
args = PyTuple_New(0);
1409
args = Py_BuildValue("(i)", n);
1414
result = PyEval_CallObject(reader, args);
1417
if (result != NULL && !PyString_Check(result) &&
1418
!PyUnicode_Check(result)) {
1421
PyErr_SetString(PyExc_TypeError,
1422
"object.readline() returned non-string");
1426
if (n < 0 && result != NULL && PyString_Check(result)) {
1427
char *s = PyString_AS_STRING(result);
1428
Py_ssize_t len = PyString_GET_SIZE(result);
1432
PyErr_SetString(PyExc_EOFError,
1433
"EOF when reading a line");
1435
else if (s[len-1] == '\n') {
1436
if (result->ob_refcnt == 1)
1437
_PyString_Resize(&result, len-1);
1440
v = PyString_FromStringAndSize(s, len-1);
1446
#ifdef Py_USING_UNICODE
1447
if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1448
Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1449
Py_ssize_t len = PyUnicode_GET_SIZE(result);
1453
PyErr_SetString(PyExc_EOFError,
1454
"EOF when reading a line");
1456
else if (s[len-1] == '\n') {
1457
if (result->ob_refcnt == 1)
1458
PyUnicode_Resize(&result, len-1);
1461
v = PyUnicode_FromUnicode(s, len-1);
1474
file_readline(PyFileObject *f, PyObject *args)
1478
if (f->f_fp == NULL)
1479
return err_closed();
1480
/* refuse to mix with f.next() */
1481
if (f->f_buf != NULL &&
1482
(f->f_bufend - f->f_bufptr) > 0 &&
1483
f->f_buf[0] != '\0')
1484
return err_iterbuffered();
1485
if (!PyArg_ParseTuple(args, "|i:readline", &n))
1488
return PyString_FromString("");
1491
return get_line(f, n);
1495
file_readlines(PyFileObject *f, PyObject *args)
1498
PyObject *list = NULL;
1500
char small_buffer[SMALLCHUNK];
1501
char *buffer = small_buffer;
1502
size_t buffersize = SMALLCHUNK;
1503
PyObject *big_buffer = NULL;
1506
size_t totalread = 0;
1511
if (f->f_fp == NULL)
1512
return err_closed();
1513
/* refuse to mix with f.next() */
1514
if (f->f_buf != NULL &&
1515
(f->f_bufend - f->f_bufptr) > 0 &&
1516
f->f_buf[0] != '\0')
1517
return err_iterbuffered();
1518
if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1520
if ((list = PyList_New(0)) == NULL)
1526
FILE_BEGIN_ALLOW_THREADS(f)
1528
nread = Py_UniversalNewlineFread(buffer+nfilled,
1529
buffersize-nfilled, f->f_fp, (PyObject *)f);
1530
FILE_END_ALLOW_THREADS(f)
1531
shortread = (nread < buffersize-nfilled);
1535
if (!ferror(f->f_fp))
1537
PyErr_SetFromErrno(PyExc_IOError);
1542
p = (char *)memchr(buffer+nfilled, '\n', nread);
1544
/* Need a larger buffer to fit this line */
1547
if (buffersize > PY_SSIZE_T_MAX) {
1548
PyErr_SetString(PyExc_OverflowError,
1549
"line is longer than a Python string can hold");
1552
if (big_buffer == NULL) {
1553
/* Create the big buffer */
1554
big_buffer = PyString_FromStringAndSize(
1556
if (big_buffer == NULL)
1558
buffer = PyString_AS_STRING(big_buffer);
1559
memcpy(buffer, small_buffer, nfilled);
1562
/* Grow the big buffer */
1563
if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1565
buffer = PyString_AS_STRING(big_buffer);
1569
end = buffer+nfilled+nread;
1572
/* Process complete lines */
1574
line = PyString_FromStringAndSize(q, p-q);
1577
err = PyList_Append(list, line);
1582
p = (char *)memchr(q, '\n', end-q);
1583
} while (p != NULL);
1584
/* Move the remaining incomplete line to the start */
1586
memmove(buffer, q, nfilled);
1588
if (totalread >= (size_t)sizehint)
1592
/* Partial last line */
1593
line = PyString_FromStringAndSize(buffer, nfilled);
1597
/* Need to complete the last line */
1598
PyObject *rest = get_line(f, 0);
1603
PyString_Concat(&line, rest);
1608
err = PyList_Append(list, line);
1615
Py_XDECREF(big_buffer);
1624
file_write(PyFileObject *f, PyObject *args)
1629
if (f->f_fp == NULL)
1630
return err_closed();
1632
if (!PyArg_ParseTuple(args, "s*", &pbuf))
1637
if (!PyArg_ParseTuple(args, "t#", &s, &n))
1640
FILE_BEGIN_ALLOW_THREADS(f)
1642
n2 = fwrite(s, 1, n, f->f_fp);
1643
FILE_END_ALLOW_THREADS(f)
1645
PyBuffer_Release(&pbuf);
1647
PyErr_SetFromErrno(PyExc_IOError);
1656
file_writelines(PyFileObject *f, PyObject *seq)
1658
#define CHUNKSIZE 1000
1659
PyObject *list, *line;
1660
PyObject *it; /* iter(seq) */
1663
Py_ssize_t i, j, nwritten, len;
1665
assert(seq != NULL);
1666
if (f->f_fp == NULL)
1667
return err_closed();
1671
islist = PyList_Check(seq);
1675
it = PyObject_GetIter(seq);
1677
PyErr_SetString(PyExc_TypeError,
1678
"writelines() requires an iterable argument");
1681
/* From here on, fail by going to error, to reclaim "it". */
1682
list = PyList_New(CHUNKSIZE);
1687
/* Strategy: slurp CHUNKSIZE lines into a private list,
1688
checking that they are all strings, then write that list
1689
without holding the interpreter lock, then come back for more. */
1690
for (index = 0; ; index += CHUNKSIZE) {
1693
list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1696
j = PyList_GET_SIZE(list);
1699
for (j = 0; j < CHUNKSIZE; j++) {
1700
line = PyIter_Next(it);
1702
if (PyErr_Occurred())
1706
PyList_SetItem(list, j, line);
1712
/* Check that all entries are indeed strings. If not,
1713
apply the same rules as for file.write() and
1714
convert the results to strings. This is slow, but
1715
seems to be the only way since all conversion APIs
1716
could potentially execute Python code. */
1717
for (i = 0; i < j; i++) {
1718
PyObject *v = PyList_GET_ITEM(list, i);
1719
if (!PyString_Check(v)) {
1721
if (((f->f_binary &&
1722
PyObject_AsReadBuffer(v,
1723
(const void**)&buffer,
1725
PyObject_AsCharBuffer(v,
1728
PyErr_SetString(PyExc_TypeError,
1729
"writelines() argument must be a sequence of strings");
1732
line = PyString_FromStringAndSize(buffer,
1737
PyList_SET_ITEM(list, i, line);
1741
/* Since we are releasing the global lock, the
1742
following code may *not* execute Python code. */
1744
FILE_BEGIN_ALLOW_THREADS(f)
1746
for (i = 0; i < j; i++) {
1747
line = PyList_GET_ITEM(list, i);
1748
len = PyString_GET_SIZE(line);
1749
nwritten = fwrite(PyString_AS_STRING(line),
1751
if (nwritten != len) {
1752
FILE_ABORT_ALLOW_THREADS(f)
1753
PyErr_SetFromErrno(PyExc_IOError);
1758
FILE_END_ALLOW_THREADS(f)
1774
file_self(PyFileObject *f)
1776
if (f->f_fp == NULL)
1777
return err_closed();
1779
return (PyObject *)f;
1783
file_xreadlines(PyFileObject *f)
1785
if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1786
"try 'for line in f' instead", 1) < 0)
1788
return file_self(f);
1792
file_exit(PyObject *f, PyObject *args)
1794
PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1796
/* If error occurred, pass through */
1799
/* We cannot return the result of close since a true
1800
* value will be interpreted as "yes, swallow the
1801
* exception if one was raised inside the with block". */
1805
PyDoc_STRVAR(readline_doc,
1806
"readline([size]) -> next line from the file, as a string.\n"
1808
"Retain newline. A non-negative size argument limits the maximum\n"
1809
"number of bytes to return (an incomplete line may be returned then).\n"
1810
"Return an empty string at EOF.");
1812
PyDoc_STRVAR(read_doc,
1813
"read([size]) -> read at most size bytes, returned as a string.\n"
1815
"If the size argument is negative or omitted, read until EOF is reached.\n"
1816
"Notice that when in non-blocking mode, less data than what was requested\n"
1817
"may be returned, even if no size parameter was given.");
1819
PyDoc_STRVAR(write_doc,
1820
"write(str) -> None. Write string str to file.\n"
1822
"Note that due to buffering, flush() or close() may be needed before\n"
1823
"the file on disk reflects the data written.");
1825
PyDoc_STRVAR(fileno_doc,
1826
"fileno() -> integer \"file descriptor\".\n"
1828
"This is needed for lower-level file interfaces, such os.read().");
1830
PyDoc_STRVAR(seek_doc,
1831
"seek(offset[, whence]) -> None. Move to new file position.\n"
1833
"Argument offset is a byte count. Optional argument whence defaults to\n"
1834
"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1835
"(move relative to current position, positive or negative), and 2 (move\n"
1836
"relative to end of file, usually negative, although many platforms allow\n"
1837
"seeking beyond the end of a file). If the file is opened in text mode,\n"
1838
"only offsets returned by tell() are legal. Use of other offsets causes\n"
1839
"undefined behavior."
1841
"Note that not all file objects are seekable.");
1843
#ifdef HAVE_FTRUNCATE
1844
PyDoc_STRVAR(truncate_doc,
1845
"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1847
"Size defaults to the current file position, as returned by tell().");
1850
PyDoc_STRVAR(tell_doc,
1851
"tell() -> current file position, an integer (may be a long integer).");
1853
PyDoc_STRVAR(readinto_doc,
1854
"readinto() -> Undocumented. Don't use this; it may go away.");
1856
PyDoc_STRVAR(readlines_doc,
1857
"readlines([size]) -> list of strings, each a line from the file.\n"
1859
"Call readline() repeatedly and return a list of the lines so read.\n"
1860
"The optional size argument, if given, is an approximate bound on the\n"
1861
"total number of bytes in the lines returned.");
1863
PyDoc_STRVAR(xreadlines_doc,
1864
"xreadlines() -> returns self.\n"
1866
"For backward compatibility. File objects now include the performance\n"
1867
"optimizations previously implemented in the xreadlines module.");
1869
PyDoc_STRVAR(writelines_doc,
1870
"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1872
"Note that newlines are not added. The sequence can be any iterable object\n"
1873
"producing strings. This is equivalent to calling write() for each string.");
1875
PyDoc_STRVAR(flush_doc,
1876
"flush() -> None. Flush the internal I/O buffer.");
1878
PyDoc_STRVAR(close_doc,
1879
"close() -> None or (perhaps) an integer. Close the file.\n"
1881
"Sets data attribute .closed to True. A closed file cannot be used for\n"
1882
"further I/O operations. close() may be called more than once without\n"
1883
"error. Some kinds of file objects (for example, opened by popen())\n"
1884
"may return an exit status upon closing.");
1886
PyDoc_STRVAR(isatty_doc,
1887
"isatty() -> true or false. True if the file is connected to a tty device.");
1889
PyDoc_STRVAR(enter_doc,
1890
"__enter__() -> self.");
1892
PyDoc_STRVAR(exit_doc,
1893
"__exit__(*excinfo) -> None. Closes the file.");
1895
static PyMethodDef file_methods[] = {
1896
{"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1897
{"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1898
{"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1899
{"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1900
{"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1901
#ifdef HAVE_FTRUNCATE
1902
{"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1904
{"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1905
{"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1906
{"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1907
{"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1908
{"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1909
{"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1910
{"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1911
{"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1912
{"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
1913
{"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
1914
{NULL, NULL} /* sentinel */
1917
#define OFF(x) offsetof(PyFileObject, x)
1919
static PyMemberDef file_memberlist[] = {
1920
{"mode", T_OBJECT, OFF(f_mode), RO,
1921
"file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1922
{"name", T_OBJECT, OFF(f_name), RO,
1924
{"encoding", T_OBJECT, OFF(f_encoding), RO,
1926
{"errors", T_OBJECT, OFF(f_errors), RO,
1927
"Unicode error handler"},
1928
/* getattr(f, "closed") is implemented without this table */
1929
{NULL} /* Sentinel */
1933
get_closed(PyFileObject *f, void *closure)
1935
return PyBool_FromLong((long)(f->f_fp == 0));
1938
get_newlines(PyFileObject *f, void *closure)
1940
switch (f->f_newlinetypes) {
1941
case NEWLINE_UNKNOWN:
1945
return PyString_FromString("\r");
1947
return PyString_FromString("\n");
1948
case NEWLINE_CR|NEWLINE_LF:
1949
return Py_BuildValue("(ss)", "\r", "\n");
1951
return PyString_FromString("\r\n");
1952
case NEWLINE_CR|NEWLINE_CRLF:
1953
return Py_BuildValue("(ss)", "\r", "\r\n");
1954
case NEWLINE_LF|NEWLINE_CRLF:
1955
return Py_BuildValue("(ss)", "\n", "\r\n");
1956
case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1957
return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1959
PyErr_Format(PyExc_SystemError,
1960
"Unknown newlines value 0x%x\n",
1967
get_softspace(PyFileObject *f, void *closure)
1969
if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
1971
return PyInt_FromLong(f->f_softspace);
1975
set_softspace(PyFileObject *f, PyObject *value)
1978
if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
1981
if (value == NULL) {
1982
PyErr_SetString(PyExc_TypeError,
1983
"can't delete softspace attribute");
1987
new = PyInt_AsLong(value);
1988
if (new == -1 && PyErr_Occurred())
1990
f->f_softspace = new;
1994
static PyGetSetDef file_getsetlist[] = {
1995
{"closed", (getter)get_closed, NULL, "True if the file is closed"},
1996
{"newlines", (getter)get_newlines, NULL,
1997
"end-of-line convention used in this file"},
1998
{"softspace", (getter)get_softspace, (setter)set_softspace,
1999
"flag indicating that a space needs to be printed; used by print"},
2004
drop_readahead(PyFileObject *f)
2006
if (f->f_buf != NULL) {
2007
PyMem_Free(f->f_buf);
2012
/* Make sure that file has a readahead buffer with at least one byte
2013
(unless at EOF) and no more than bufsize. Returns negative value on
2014
error, will set MemoryError if bufsize bytes cannot be allocated. */
2016
readahead(PyFileObject *f, int bufsize)
2018
Py_ssize_t chunksize;
2020
if (f->f_buf != NULL) {
2021
if( (f->f_bufend - f->f_bufptr) >= 1)
2026
if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2030
FILE_BEGIN_ALLOW_THREADS(f)
2032
chunksize = Py_UniversalNewlineFread(
2033
f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2034
FILE_END_ALLOW_THREADS(f)
2035
if (chunksize == 0) {
2036
if (ferror(f->f_fp)) {
2037
PyErr_SetFromErrno(PyExc_IOError);
2043
f->f_bufptr = f->f_buf;
2044
f->f_bufend = f->f_buf + chunksize;
2048
/* Used by file_iternext. The returned string will start with 'skip'
2049
uninitialized bytes followed by the remainder of the line. Don't be
2050
horrified by the recursive call: maximum recursion depth is limited by
2051
logarithmic buffer growth to about 50 even when reading a 1gb line. */
2053
static PyStringObject *
2054
readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2061
if (f->f_buf == NULL)
2062
if (readahead(f, bufsize) < 0)
2065
len = f->f_bufend - f->f_bufptr;
2067
return (PyStringObject *)
2068
PyString_FromStringAndSize(NULL, skip);
2069
bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2070
if (bufptr != NULL) {
2071
bufptr++; /* Count the '\n' */
2072
len = bufptr - f->f_bufptr;
2073
s = (PyStringObject *)
2074
PyString_FromStringAndSize(NULL, skip+len);
2077
memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2078
f->f_bufptr = bufptr;
2079
if (bufptr == f->f_bufend)
2082
bufptr = f->f_bufptr;
2084
f->f_buf = NULL; /* Force new readahead buffer */
2085
assert(skip+len < INT_MAX);
2086
s = readahead_get_line_skip(
2087
f, (int)(skip+len), bufsize + (bufsize>>2) );
2092
memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2098
/* A larger buffer size may actually decrease performance. */
2099
#define READAHEAD_BUFSIZE 8192
2102
file_iternext(PyFileObject *f)
2106
if (f->f_fp == NULL)
2107
return err_closed();
2109
l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2110
if (l == NULL || PyString_GET_SIZE(l) == 0) {
2114
return (PyObject *)l;
2119
file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2122
static PyObject *not_yet_string;
2124
assert(type != NULL && type->tp_alloc != NULL);
2126
if (not_yet_string == NULL) {
2127
not_yet_string = PyString_InternFromString("<uninitialized file>");
2128
if (not_yet_string == NULL)
2132
self = type->tp_alloc(type, 0);
2134
/* Always fill in the name and mode, so that nobody else
2135
needs to special-case NULLs there. */
2136
Py_INCREF(not_yet_string);
2137
((PyFileObject *)self)->f_name = not_yet_string;
2138
Py_INCREF(not_yet_string);
2139
((PyFileObject *)self)->f_mode = not_yet_string;
2141
((PyFileObject *)self)->f_encoding = Py_None;
2143
((PyFileObject *)self)->f_errors = Py_None;
2144
((PyFileObject *)self)->weakreflist = NULL;
2145
((PyFileObject *)self)->unlocked_count = 0;
2151
file_init(PyObject *self, PyObject *args, PyObject *kwds)
2153
PyFileObject *foself = (PyFileObject *)self;
2155
static char *kwlist[] = {"name", "mode", "buffering", 0};
2159
int wideargument = 0;
2161
assert(PyFile_Check(self));
2162
if (foself->f_fp != NULL) {
2163
/* Have to close the existing file first. */
2164
PyObject *closeresult = file_close(foself);
2165
if (closeresult == NULL)
2167
Py_DECREF(closeresult);
2170
#ifdef Py_WIN_WIDE_FILENAMES
2171
if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
2173
if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2174
kwlist, &po, &mode, &bufsize)) {
2176
if (fill_file_fields(foself, NULL, po, mode,
2180
/* Drop the argument parsing error as narrow
2181
strings are also valid. */
2187
if (!wideargument) {
2190
if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2191
Py_FileSystemDefaultEncoding,
2196
/* We parse again to get the name as a PyObject */
2197
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2198
kwlist, &o_name, &mode,
2202
if (fill_file_fields(foself, NULL, o_name, mode,
2206
if (open_the_file(foself, name, mode) == NULL)
2208
foself->f_setbuf = NULL;
2209
PyFile_SetBufSize(self, bufsize);
2216
PyMem_Free(name); /* free the encoded string */
2220
PyDoc_VAR(file_doc) =
2222
"file(name[, mode[, buffering]]) -> file object\n"
2224
"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2225
"writing or appending. The file will be created if it doesn't exist\n"
2226
"when opened for writing or appending; it will be truncated when\n"
2227
"opened for writing. Add a 'b' to the mode for binary files.\n"
2228
"Add a '+' to the mode to allow simultaneous reading and writing.\n"
2229
"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2230
"buffered, and larger numbers specify the buffer size. The preferred way\n"
2231
"to open a file is with the builtin open() function.\n"
2234
"Add a 'U' to mode to open the file for input with universal newline\n"
2235
"support. Any line ending in the input file will be seen as a '\\n'\n"
2236
"in Python. Also, a file so opened gains the attribute 'newlines';\n"
2237
"the value for this attribute is one of None (no newline read yet),\n"
2238
"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2240
"'U' cannot be combined with 'w' or '+' mode.\n"
2243
PyTypeObject PyFile_Type = {
2244
PyVarObject_HEAD_INIT(&PyType_Type, 0)
2246
sizeof(PyFileObject),
2248
(destructor)file_dealloc, /* tp_dealloc */
2253
(reprfunc)file_repr, /* tp_repr */
2254
0, /* tp_as_number */
2255
0, /* tp_as_sequence */
2256
0, /* tp_as_mapping */
2260
PyObject_GenericGetAttr, /* tp_getattro */
2261
/* softspace is writable: we must supply tp_setattro */
2262
PyObject_GenericSetAttr, /* tp_setattro */
2263
0, /* tp_as_buffer */
2264
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2265
file_doc, /* tp_doc */
2266
0, /* tp_traverse */
2268
0, /* tp_richcompare */
2269
offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2270
(getiterfunc)file_self, /* tp_iter */
2271
(iternextfunc)file_iternext, /* tp_iternext */
2272
file_methods, /* tp_methods */
2273
file_memberlist, /* tp_members */
2274
file_getsetlist, /* tp_getset */
2277
0, /* tp_descr_get */
2278
0, /* tp_descr_set */
2279
0, /* tp_dictoffset */
2280
file_init, /* tp_init */
2281
PyType_GenericAlloc, /* tp_alloc */
2282
file_new, /* tp_new */
2283
PyObject_Del, /* tp_free */
2286
/* Interface for the 'soft space' between print items. */
2289
PyFile_SoftSpace(PyObject *f, int newflag)
2295
else if (PyFile_Check(f)) {
2296
oldflag = ((PyFileObject *)f)->f_softspace;
2297
((PyFileObject *)f)->f_softspace = newflag;
2301
v = PyObject_GetAttrString(f, "softspace");
2306
oldflag = PyInt_AsLong(v);
2307
assert(oldflag < INT_MAX);
2310
v = PyInt_FromLong((long)newflag);
2314
if (PyObject_SetAttrString(f, "softspace", v) != 0)
2319
return (int)oldflag;
2322
/* Interfaces to write objects/strings to file-like objects */
2325
PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2327
PyObject *writer, *value, *args, *result;
2329
PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2332
else if (PyFile_Check(f)) {
2333
PyFileObject *fobj = (PyFileObject *) f;
2334
#ifdef Py_USING_UNICODE
2335
PyObject *enc = fobj->f_encoding;
2338
if (fobj->f_fp == NULL) {
2342
#ifdef Py_USING_UNICODE
2343
if ((flags & Py_PRINT_RAW) &&
2344
PyUnicode_Check(v) && enc != Py_None) {
2345
char *cenc = PyString_AS_STRING(enc);
2346
char *errors = fobj->f_errors == Py_None ?
2347
"strict" : PyString_AS_STRING(fobj->f_errors);
2348
value = PyUnicode_AsEncodedString(v, cenc, errors);
2355
result = file_PyObject_Print(value, fobj, flags);
2359
return file_PyObject_Print(v, fobj, flags);
2362
writer = PyObject_GetAttrString(f, "write");
2365
if (flags & Py_PRINT_RAW) {
2366
if (PyUnicode_Check(v)) {
2370
value = PyObject_Str(v);
2373
value = PyObject_Repr(v);
2374
if (value == NULL) {
2378
args = PyTuple_Pack(1, value);
2384
result = PyEval_CallObject(writer, args);
2395
PyFile_WriteString(const char *s, PyObject *f)
2399
/* Should be caused by a pre-existing error */
2400
if (!PyErr_Occurred())
2401
PyErr_SetString(PyExc_SystemError,
2402
"null file for PyFile_WriteString");
2405
else if (PyFile_Check(f)) {
2406
PyFileObject *fobj = (PyFileObject *) f;
2407
FILE *fp = PyFile_AsFile(f);
2412
FILE_BEGIN_ALLOW_THREADS(fobj)
2414
FILE_END_ALLOW_THREADS(fobj)
2417
else if (!PyErr_Occurred()) {
2418
PyObject *v = PyString_FromString(s);
2422
err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2430
/* Try to get a file-descriptor from a Python object. If the object
2431
is an integer or long integer, its value is returned. If not, the
2432
object's fileno() method is called if it exists; the method must return
2433
an integer or long integer, which is returned as the file descriptor value.
2434
-1 is returned on failure.
2437
int PyObject_AsFileDescriptor(PyObject *o)
2442
if (PyInt_Check(o)) {
2443
fd = PyInt_AsLong(o);
2445
else if (PyLong_Check(o)) {
2446
fd = PyLong_AsLong(o);
2448
else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2450
PyObject *fno = PyEval_CallObject(meth, NULL);
2455
if (PyInt_Check(fno)) {
2456
fd = PyInt_AsLong(fno);
2459
else if (PyLong_Check(fno)) {
2460
fd = PyLong_AsLong(fno);
2464
PyErr_SetString(PyExc_TypeError,
2465
"fileno() returned a non-integer");
2471
PyErr_SetString(PyExc_TypeError,
2472
"argument must be an int, or have a fileno() method.");
2477
PyErr_Format(PyExc_ValueError,
2478
"file descriptor cannot be a negative integer (%i)",
2485
/* From here on we need access to the real fgets and fread */
2490
** Py_UniversalNewlineFgets is an fgets variation that understands
2491
** all of \r, \n and \r\n conventions.
2492
** The stream should be opened in binary mode.
2493
** If fobj is NULL the routine always does newline conversion, and
2494
** it may peek one char ahead to gobble the second char in \r\n.
2495
** If fobj is non-NULL it must be a PyFileObject. In this case there
2496
** is no readahead but in stead a flag is used to skip a following
2497
** \n on the next read. Also, if the file is open in binary mode
2498
** the whole conversion is skipped. Finally, the routine keeps track of
2499
** the different types of newlines seen.
2500
** Note that we need no error handling: fgets() treats error and eof
2504
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2508
int newlinetypes = 0;
2510
int univ_newline = 1;
2513
if (!PyFile_Check(fobj)) {
2514
errno = ENXIO; /* What can you do... */
2517
univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2518
if ( !univ_newline )
2519
return fgets(buf, n, stream);
2520
newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2521
skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2524
c = 'x'; /* Shut up gcc warning */
2525
while (--n > 0 && (c = GETC(stream)) != EOF ) {
2529
/* Seeing a \n here with skipnextlf true
2530
** means we saw a \r before.
2532
newlinetypes |= NEWLINE_CRLF;
2534
if (c == EOF) break;
2537
** Note that c == EOF also brings us here,
2538
** so we're okay if the last char in the file
2541
newlinetypes |= NEWLINE_CR;
2545
/* A \r is translated into a \n, and we skip
2546
** an adjacent \n, if any. We don't set the
2547
** newlinetypes flag until we've seen the next char.
2551
} else if ( c == '\n') {
2552
newlinetypes |= NEWLINE_LF;
2555
if (c == '\n') break;
2557
if ( c == EOF && skipnextlf )
2558
newlinetypes |= NEWLINE_CR;
2559
FUNLOCKFILE(stream);
2562
((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2563
((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2564
} else if ( skipnextlf ) {
2565
/* If we have no file object we cannot save the
2566
** skipnextlf flag. We have to readahead, which
2567
** will cause a pause if we're reading from an
2568
** interactive stream, but that is very unlikely
2569
** unless we're doing something silly like
2570
** execfile("/dev/tty").
2582
** Py_UniversalNewlineFread is an fread variation that understands
2583
** all of \r, \n and \r\n conventions.
2584
** The stream should be opened in binary mode.
2585
** fobj must be a PyFileObject. In this case there
2586
** is no readahead but in stead a flag is used to skip a following
2587
** \n on the next read. Also, if the file is open in binary mode
2588
** the whole conversion is skipped. Finally, the routine keeps track of
2589
** the different types of newlines seen.
2592
Py_UniversalNewlineFread(char *buf, size_t n,
2593
FILE *stream, PyObject *fobj)
2596
PyFileObject *f = (PyFileObject *)fobj;
2597
int newlinetypes, skipnextlf;
2599
assert(buf != NULL);
2600
assert(stream != NULL);
2602
if (!fobj || !PyFile_Check(fobj)) {
2603
errno = ENXIO; /* What can you do... */
2606
if (!f->f_univ_newline)
2607
return fread(buf, 1, n, stream);
2608
newlinetypes = f->f_newlinetypes;
2609
skipnextlf = f->f_skipnextlf;
2610
/* Invariant: n is the number of bytes remaining to be filled
2618
nread = fread(dst, 1, n, stream);
2623
n -= nread; /* assuming 1 byte out for each in; will adjust */
2624
shortread = n != 0; /* true iff EOF or error */
2628
/* Save as LF and set flag to skip next LF. */
2632
else if (skipnextlf && c == '\n') {
2633
/* Skip LF, and remember we saw CR LF. */
2635
newlinetypes |= NEWLINE_CRLF;
2639
/* Normal char to be stored in buffer. Also
2640
* update the newlinetypes flag if either this
2641
* is an LF or the previous char was a CR.
2644
newlinetypes |= NEWLINE_LF;
2645
else if (skipnextlf)
2646
newlinetypes |= NEWLINE_CR;
2652
/* If this is EOF, update type flags. */
2653
if (skipnextlf && feof(stream))
2654
newlinetypes |= NEWLINE_CR;
2658
f->f_newlinetypes = newlinetypes;
2659
f->f_skipnextlf = skipnextlf;