2
/* Write Python objects to files and read them back.
3
This is intended for writing and reading compiled Python code only;
4
a true persistent storage facility would be much harder, since
5
it would have to take circular links and sharing into account. */
7
#define PY_SSIZE_T_CLEAN
10
#include "longintrepr.h"
14
/* High water mark to determine when the marshalled object is dangerously deep
15
* and risks coring the interpreter. When the object stack gets this deep,
16
* raise an exception instead of continuing.
18
#define MAX_MARSHAL_STACK_DEPTH 2000
22
#define TYPE_FALSE 'F'
24
#define TYPE_STOPITER 'S'
25
#define TYPE_ELLIPSIS '.'
27
#define TYPE_INT64 'I'
28
#define TYPE_FLOAT 'f'
29
#define TYPE_BINARY_FLOAT 'g'
30
#define TYPE_COMPLEX 'x'
31
#define TYPE_BINARY_COMPLEX 'y'
33
#define TYPE_STRING 's'
34
#define TYPE_INTERNED 't'
35
#define TYPE_STRINGREF 'R'
36
#define TYPE_TUPLE '('
40
#define TYPE_UNICODE 'u'
41
#define TYPE_UNKNOWN '?'
43
#define TYPE_FROZENSET '>'
49
/* If fp == NULL, the following are valid: */
53
PyObject *strings; /* dict on marshal, list on unmarshal */
57
#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
58
else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
62
w_more(int c, WFILE *p)
64
Py_ssize_t size, newsize;
66
return; /* An error already occurred */
67
size = PyString_Size(p->str);
68
newsize = size + size + 1024;
69
if (newsize > 32*1024*1024) {
70
newsize = size + (size >> 3); /* 12.5% overallocation */
72
if (_PyString_Resize(&p->str, newsize) != 0) {
73
p->ptr = p->end = NULL;
76
p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
78
PyString_AS_STRING((PyStringObject *)p->str) + newsize;
79
*p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
84
w_string(char *s, int n, WFILE *p)
87
fwrite(s, 1, n, p->fp);
98
w_short(int x, WFILE *p)
100
w_byte((char)( x & 0xff), p);
101
w_byte((char)((x>> 8) & 0xff), p);
105
w_long(long x, WFILE *p)
107
w_byte((char)( x & 0xff), p);
108
w_byte((char)((x>> 8) & 0xff), p);
109
w_byte((char)((x>>16) & 0xff), p);
110
w_byte((char)((x>>24) & 0xff), p);
115
w_long64(long x, WFILE *p)
123
w_object(PyObject *v, WFILE *p)
129
if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
132
else if (v == NULL) {
133
w_byte(TYPE_NULL, p);
135
else if (v == Py_None) {
136
w_byte(TYPE_NONE, p);
138
else if (v == PyExc_StopIteration) {
139
w_byte(TYPE_STOPITER, p);
141
else if (v == Py_Ellipsis) {
142
w_byte(TYPE_ELLIPSIS, p);
144
else if (v == Py_False) {
145
w_byte(TYPE_FALSE, p);
147
else if (v == Py_True) {
148
w_byte(TYPE_TRUE, p);
150
else if (PyInt_CheckExact(v)) {
151
long x = PyInt_AS_LONG((PyIntObject *)v);
153
long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
155
w_byte(TYPE_INT64, p);
165
else if (PyLong_CheckExact(v)) {
166
PyLongObject *ob = (PyLongObject *)v;
167
w_byte(TYPE_LONG, p);
172
for (i = 0; i < n; i++)
173
w_short(ob->ob_digit[i], p);
175
else if (PyFloat_CheckExact(v)) {
176
if (p->version > 1) {
177
unsigned char buf[8];
178
if (_PyFloat_Pack8(PyFloat_AsDouble(v),
183
w_byte(TYPE_BINARY_FLOAT, p);
184
w_string((char*)buf, 8, p);
187
char buf[256]; /* Plenty to format any double */
188
PyFloat_AsReprString(buf, (PyFloatObject *)v);
190
w_byte(TYPE_FLOAT, p);
192
w_string(buf, (int)n, p);
195
#ifndef WITHOUT_COMPLEX
196
else if (PyComplex_CheckExact(v)) {
197
if (p->version > 1) {
198
unsigned char buf[8];
199
if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
204
w_byte(TYPE_BINARY_COMPLEX, p);
205
w_string((char*)buf, 8, p);
206
if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
211
w_string((char*)buf, 8, p);
214
char buf[256]; /* Plenty to format any double */
216
w_byte(TYPE_COMPLEX, p);
217
temp = (PyFloatObject*)PyFloat_FromDouble(
218
PyComplex_RealAsDouble(v));
223
PyFloat_AsReprString(buf, temp);
227
w_string(buf, (int)n, p);
228
temp = (PyFloatObject*)PyFloat_FromDouble(
229
PyComplex_ImagAsDouble(v));
234
PyFloat_AsReprString(buf, temp);
238
w_string(buf, (int)n, p);
242
else if (PyString_CheckExact(v)) {
243
if (p->strings && PyString_CHECK_INTERNED(v)) {
244
PyObject *o = PyDict_GetItem(p->strings, v);
246
long w = PyInt_AsLong(o);
247
w_byte(TYPE_STRINGREF, p);
253
o = PyInt_FromSsize_t(PyDict_Size(p->strings));
255
PyDict_SetItem(p->strings, v, o) >= 0;
262
w_byte(TYPE_INTERNED, p);
266
w_byte(TYPE_STRING, p);
268
n = PyString_GET_SIZE(v);
270
/* huge strings are not supported */
276
w_string(PyString_AS_STRING(v), (int)n, p);
278
#ifdef Py_USING_UNICODE
279
else if (PyUnicode_CheckExact(v)) {
281
utf8 = PyUnicode_AsUTF8String(v);
287
w_byte(TYPE_UNICODE, p);
288
n = PyString_GET_SIZE(utf8);
295
w_string(PyString_AS_STRING(utf8), (int)n, p);
299
else if (PyTuple_CheckExact(v)) {
300
w_byte(TYPE_TUPLE, p);
303
for (i = 0; i < n; i++) {
304
w_object(PyTuple_GET_ITEM(v, i), p);
307
else if (PyList_CheckExact(v)) {
308
w_byte(TYPE_LIST, p);
309
n = PyList_GET_SIZE(v);
311
for (i = 0; i < n; i++) {
312
w_object(PyList_GET_ITEM(v, i), p);
315
else if (PyDict_CheckExact(v)) {
317
PyObject *key, *value;
318
w_byte(TYPE_DICT, p);
319
/* This one is NULL object terminated! */
321
while (PyDict_Next(v, &pos, &key, &value)) {
325
w_object((PyObject *)NULL, p);
327
else if (PyAnySet_CheckExact(v)) {
328
PyObject *value, *it;
330
if (PyObject_TypeCheck(v, &PySet_Type))
333
w_byte(TYPE_FROZENSET, p);
334
n = PyObject_Size(v);
341
it = PyObject_GetIter(v);
347
while ((value = PyIter_Next(it)) != NULL) {
352
if (PyErr_Occurred()) {
358
else if (PyCode_Check(v)) {
359
PyCodeObject *co = (PyCodeObject *)v;
360
w_byte(TYPE_CODE, p);
361
w_long(co->co_argcount, p);
362
w_long(co->co_nlocals, p);
363
w_long(co->co_stacksize, p);
364
w_long(co->co_flags, p);
365
w_object(co->co_code, p);
366
w_object(co->co_consts, p);
367
w_object(co->co_names, p);
368
w_object(co->co_varnames, p);
369
w_object(co->co_freevars, p);
370
w_object(co->co_cellvars, p);
371
w_object(co->co_filename, p);
372
w_object(co->co_name, p);
373
w_long(co->co_firstlineno, p);
374
w_object(co->co_lnotab, p);
376
else if (PyObject_CheckReadBuffer(v)) {
377
/* Write unknown buffer-style objects as a string */
379
PyBufferProcs *pb = v->ob_type->tp_as_buffer;
380
w_byte(TYPE_STRING, p);
381
n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
388
w_string(s, (int)n, p);
391
w_byte(TYPE_UNKNOWN, p);
398
/* version currently has no effect for writing longs. */
400
PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
407
wf.version = version;
412
PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
418
wf.strings = (version > 0) ? PyDict_New() : NULL;
419
wf.version = version;
421
Py_XDECREF(wf.strings);
424
typedef WFILE RFILE; /* Same struct with different invariants */
426
#define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
428
#define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
431
r_string(char *s, int n, RFILE *p)
434
/* The result fits into int because it must be <=n. */
435
return (int)fread(s, 1, n, p->fp);
436
if (p->end - p->ptr < n)
437
n = (int)(p->end - p->ptr);
438
memcpy(s, p->ptr, n);
449
/* Sign-extension, in case short greater than 16 bits */
458
register FILE *fp = p->fp;
461
x |= (long)getc(fp) << 8;
462
x |= (long)getc(fp) << 16;
463
x |= (long)getc(fp) << 24;
467
x |= (long)rs_byte(p) << 8;
468
x |= (long)rs_byte(p) << 16;
469
x |= (long)rs_byte(p) << 24;
472
/* Sign extension for 64-bit machines */
473
x |= -(x & 0x80000000L);
478
/* r_long64 deals with the TYPE_INT64 code. On a machine with
479
sizeof(long) > 4, it returns a Python int object, else a Python long
480
object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
481
so there's no inefficiency here in returning a PyLong on 32-bit boxes
482
for everything written via TYPE_INT64 (i.e., if an int is written via
483
TYPE_INT64, it *needs* more than 32 bits).
488
long lo4 = r_long(p);
489
long hi4 = r_long(p);
491
long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
492
return PyInt_FromLong(x);
494
unsigned char buf[8];
496
int is_little_endian = (int)*(char*)&one;
497
if (is_little_endian) {
498
memcpy(buf, &lo4, 4);
499
memcpy(buf+4, &hi4, 4);
502
memcpy(buf, &hi4, 4);
503
memcpy(buf+4, &lo4, 4);
505
return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
512
/* NULL is a valid return value, it does not necessarily means that
513
an exception is set. */
516
int type = r_byte(p);
521
if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
523
PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
530
PyErr_SetString(PyExc_EOFError,
531
"EOF read where object expected");
545
Py_INCREF(PyExc_StopIteration);
546
retval = PyExc_StopIteration;
550
Py_INCREF(Py_Ellipsis);
551
retval = Py_Ellipsis;
565
retval = PyInt_FromLong(r_long(p));
569
retval = r_long64(p);
577
if (n < -INT_MAX || n > INT_MAX) {
578
PyErr_SetString(PyExc_ValueError,
584
ob = _PyLong_New(size);
590
for (i = 0; i < size; i++) {
591
int digit = r_short(p);
594
PyErr_SetString(PyExc_ValueError,
600
ob->ob_digit[i] = digit;
602
retval = (PyObject *)ob;
611
if (n == EOF || r_string(buf, (int)n, p) != n) {
612
PyErr_SetString(PyExc_EOFError,
613
"EOF read where object expected");
619
PyFPE_START_PROTECT("atof", break)
620
dx = PyOS_ascii_atof(buf);
621
PyFPE_END_PROTECT(dx)
622
retval = PyFloat_FromDouble(dx);
626
case TYPE_BINARY_FLOAT:
628
unsigned char buf[8];
630
if (r_string((char*)buf, 8, p) != 8) {
631
PyErr_SetString(PyExc_EOFError,
632
"EOF read where object expected");
636
x = _PyFloat_Unpack8(buf, 1);
637
if (x == -1.0 && PyErr_Occurred()) {
641
retval = PyFloat_FromDouble(x);
645
#ifndef WITHOUT_COMPLEX
651
if (n == EOF || r_string(buf, (int)n, p) != n) {
652
PyErr_SetString(PyExc_EOFError,
653
"EOF read where object expected");
659
PyFPE_START_PROTECT("atof", break;)
660
c.real = PyOS_ascii_atof(buf);
663
if (n == EOF || r_string(buf, (int)n, p) != n) {
664
PyErr_SetString(PyExc_EOFError,
665
"EOF read where object expected");
670
PyFPE_START_PROTECT("atof", break)
671
c.imag = PyOS_ascii_atof(buf);
673
retval = PyComplex_FromCComplex(c);
677
case TYPE_BINARY_COMPLEX:
679
unsigned char buf[8];
681
if (r_string((char*)buf, 8, p) != 8) {
682
PyErr_SetString(PyExc_EOFError,
683
"EOF read where object expected");
687
c.real = _PyFloat_Unpack8(buf, 1);
688
if (c.real == -1.0 && PyErr_Occurred()) {
692
if (r_string((char*)buf, 8, p) != 8) {
693
PyErr_SetString(PyExc_EOFError,
694
"EOF read where object expected");
698
c.imag = _PyFloat_Unpack8(buf, 1);
699
if (c.imag == -1.0 && PyErr_Occurred()) {
703
retval = PyComplex_FromCComplex(c);
711
if (n < 0 || n > INT_MAX) {
712
PyErr_SetString(PyExc_ValueError, "bad marshal data");
716
v = PyString_FromStringAndSize((char *)NULL, n);
721
if (r_string(PyString_AS_STRING(v), (int)n, p) != n) {
723
PyErr_SetString(PyExc_EOFError,
724
"EOF read where object expected");
728
if (type == TYPE_INTERNED) {
729
PyString_InternInPlace(&v);
730
if (PyList_Append(p->strings, v) < 0) {
740
if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
741
PyErr_SetString(PyExc_ValueError, "bad marshal data");
745
v = PyList_GET_ITEM(p->strings, n);
750
#ifdef Py_USING_UNICODE
756
if (n < 0 || n > INT_MAX) {
757
PyErr_SetString(PyExc_ValueError, "bad marshal data");
761
buffer = PyMem_NEW(char, n);
762
if (buffer == NULL) {
763
retval = PyErr_NoMemory();
766
if (r_string(buffer, (int)n, p) != n) {
768
PyErr_SetString(PyExc_EOFError,
769
"EOF read where object expected");
773
v = PyUnicode_DecodeUTF8(buffer, n, NULL);
782
if (n < 0 || n > INT_MAX) {
783
PyErr_SetString(PyExc_ValueError, "bad marshal data");
787
v = PyTuple_New((int)n);
792
for (i = 0; i < n; i++) {
795
if (!PyErr_Occurred())
796
PyErr_SetString(PyExc_TypeError,
797
"NULL object in marshal data");
802
PyTuple_SET_ITEM(v, (int)i, v2);
809
if (n < 0 || n > INT_MAX) {
810
PyErr_SetString(PyExc_ValueError, "bad marshal data");
814
v = PyList_New((int)n);
819
for (i = 0; i < n; i++) {
822
if (!PyErr_Occurred())
823
PyErr_SetString(PyExc_TypeError,
824
"NULL object in marshal data");
829
PyList_SET_ITEM(v, (int)i, v2);
847
PyDict_SetItem(v, key, val);
851
if (PyErr_Occurred()) {
861
if (n < 0 || n > INT_MAX) {
862
PyErr_SetString(PyExc_ValueError, "bad marshal data");
866
v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
871
for (i = 0; i < n; i++) {
874
if (!PyErr_Occurred())
875
PyErr_SetString(PyExc_TypeError,
876
"NULL object in marshal data");
881
if (PySet_Add(v, v2) == -1) {
893
if (PyEval_GetRestricted()) {
894
PyErr_SetString(PyExc_RuntimeError,
895
"cannot unmarshal code objects in "
896
"restricted execution mode");
905
PyObject *code = NULL;
906
PyObject *consts = NULL;
907
PyObject *names = NULL;
908
PyObject *varnames = NULL;
909
PyObject *freevars = NULL;
910
PyObject *cellvars = NULL;
911
PyObject *filename = NULL;
912
PyObject *name = NULL;
914
PyObject *lnotab = NULL;
918
/* XXX ignore long->int overflows for now */
919
argcount = (int)r_long(p);
920
nlocals = (int)r_long(p);
921
stacksize = (int)r_long(p);
922
flags = (int)r_long(p);
926
consts = r_object(p);
932
varnames = r_object(p);
933
if (varnames == NULL)
935
freevars = r_object(p);
936
if (freevars == NULL)
938
cellvars = r_object(p);
939
if (cellvars == NULL)
941
filename = r_object(p);
942
if (filename == NULL)
947
firstlineno = (int)r_long(p);
948
lnotab = r_object(p);
952
v = (PyObject *) PyCode_New(
953
argcount, nlocals, stacksize, flags,
954
code, consts, names, varnames,
955
freevars, cellvars, filename, name,
956
firstlineno, lnotab);
962
Py_XDECREF(varnames);
963
Py_XDECREF(freevars);
964
Py_XDECREF(cellvars);
965
Py_XDECREF(filename);
974
/* Bogus data got written, which isn't ideal.
975
This will let you keep working and recover. */
976
PyErr_SetString(PyExc_ValueError, "bad marshal data");
986
read_object(RFILE *p)
989
if (PyErr_Occurred()) {
990
fprintf(stderr, "XXX readobject called with exception set\n");
994
if (v == NULL && !PyErr_Occurred())
995
PyErr_SetString(PyExc_TypeError, "NULL object in marshal data");
1000
PyMarshal_ReadShortFromFile(FILE *fp)
1006
rf.end = rf.ptr = NULL;
1007
return r_short(&rf);
1011
PyMarshal_ReadLongFromFile(FILE *fp)
1016
rf.ptr = rf.end = NULL;
1021
/* Return size of file in bytes; < 0 if unknown. */
1023
getfilesize(FILE *fp)
1026
if (fstat(fileno(fp), &st) != 0)
1033
/* If we can get the size of the file up-front, and it's reasonably small,
1034
* read it in one gulp and delegate to ...FromString() instead. Much quicker
1035
* than reading a byte at a time from file; speeds .pyc imports.
1036
* CAUTION: since this may read the entire remainder of the file, don't
1037
* call it unless you know you're done with the file.
1040
PyMarshal_ReadLastObjectFromFile(FILE *fp)
1042
/* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
1043
* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
1045
#define SMALL_FILE_LIMIT (1L << 14)
1046
#define REASONABLE_FILE_LIMIT (1L << 18)
1051
filesize = getfilesize(fp);
1053
char buf[SMALL_FILE_LIMIT];
1055
if (filesize <= SMALL_FILE_LIMIT)
1057
else if (filesize <= REASONABLE_FILE_LIMIT)
1058
pBuf = (char *)PyMem_MALLOC(filesize);
1062
/* filesize must fit into an int, because it
1063
is smaller than REASONABLE_FILE_LIMIT */
1064
n = fread(pBuf, 1, (int)filesize, fp);
1065
v = PyMarshal_ReadObjectFromString(pBuf, n);
1073
/* We don't have fstat, or we do but the file is larger than
1074
* REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1076
return PyMarshal_ReadObjectFromFile(fp);
1078
#undef SMALL_FILE_LIMIT
1079
#undef REASONABLE_FILE_LIMIT
1083
PyMarshal_ReadObjectFromFile(FILE *fp)
1088
rf.strings = PyList_New(0);
1090
rf.ptr = rf.end = NULL;
1091
result = r_object(&rf);
1092
Py_DECREF(rf.strings);
1097
PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1104
rf.strings = PyList_New(0);
1106
result = r_object(&rf);
1107
Py_DECREF(rf.strings);
1112
PyMarshal_WriteObjectToString(PyObject *x, int version)
1116
wf.str = PyString_FromStringAndSize((char *)NULL, 50);
1119
wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
1120
wf.end = wf.ptr + PyString_Size(wf.str);
1123
wf.version = version;
1124
wf.strings = (version > 0) ? PyDict_New() : NULL;
1126
Py_XDECREF(wf.strings);
1127
if (wf.str != NULL) {
1128
char *base = PyString_AS_STRING((PyStringObject *)wf.str);
1129
if (wf.ptr - base > PY_SSIZE_T_MAX) {
1131
PyErr_SetString(PyExc_OverflowError,
1132
"too much marshall data for a string");
1135
_PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base));
1139
PyErr_SetString(PyExc_ValueError,
1140
(wf.error==1)?"unmarshallable object"
1141
:"object too deeply nested to marshal");
1147
/* And an interface for Python programs... */
1150
marshal_dump(PyObject *self, PyObject *args)
1155
int version = Py_MARSHAL_VERSION;
1156
if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1158
if (!PyFile_Check(f)) {
1159
PyErr_SetString(PyExc_TypeError,
1160
"marshal.dump() 2nd arg must be file");
1163
wf.fp = PyFile_AsFile(f);
1165
wf.ptr = wf.end = NULL;
1168
wf.strings = (version > 0) ? PyDict_New() : 0;
1169
wf.version = version;
1171
Py_XDECREF(wf.strings);
1173
PyErr_SetString(PyExc_ValueError,
1174
(wf.error==1)?"unmarshallable object"
1175
:"object too deeply nested to marshal");
1183
marshal_load(PyObject *self, PyObject *f)
1187
if (!PyFile_Check(f)) {
1188
PyErr_SetString(PyExc_TypeError,
1189
"marshal.load() arg must be file");
1192
rf.fp = PyFile_AsFile(f);
1193
rf.strings = PyList_New(0);
1195
result = read_object(&rf);
1196
Py_DECREF(rf.strings);
1201
marshal_dumps(PyObject *self, PyObject *args)
1204
int version = Py_MARSHAL_VERSION;
1205
if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1207
return PyMarshal_WriteObjectToString(x, version);
1211
marshal_loads(PyObject *self, PyObject *args)
1217
if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
1222
rf.strings = PyList_New(0);
1224
result = read_object(&rf);
1225
Py_DECREF(rf.strings);
1229
static PyMethodDef marshal_methods[] = {
1230
{"dump", marshal_dump, METH_VARARGS},
1231
{"load", marshal_load, METH_O},
1232
{"dumps", marshal_dumps, METH_VARARGS},
1233
{"loads", marshal_loads, METH_VARARGS},
1234
{NULL, NULL} /* sentinel */
1238
PyMarshal_Init(void)
1240
PyObject *mod = Py_InitModule("marshal", marshal_methods);
1243
PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);