1
/* Copyright (C) 2009 Canonical Ltd
3
* This program is free software: you can redistribute it and/or modify
4
* it under the terms of the GNU General Public License version 3 as
5
* published by the Free Software Foundation.
7
* This program is distributed in the hope that it will be useful, but
8
* WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
* General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program. If not, see <http://www.gnu.org/licenses/>.
16
/* The core of parsing is split into a pure C module, so that we can guarantee
17
* that we won't be creating objects in the internal loops.
20
#include "_scanner_core.h"
23
# define Py_TYPE(o) ((o)->ob_type)
26
// %zd is the gcc convention for defining that we are formatting a size_t
27
// object, windows seems to prefer %ld, though perhaps we need to first check
30
# if defined(_M_X64) || defined(__amd64__)
31
# define SSIZET_FMT "%ld"
33
# define SSIZET_FMT "%d"
35
# define snprintf _snprintf
37
# define SSIZET_FMT "%zd"
41
# define inline __inline__
42
#elif defined(_MSC_VER)
43
# define inline __inline
55
void _dump_object_to_ref_info(struct ref_info *info, PyObject *c_obj,
58
static void _write_to_ref_info(struct ref_info *info, const char *fmt_string, ...)
59
__attribute__((format(printf, 2, 3)));
61
static void _write_to_ref_info(struct ref_info *info, const char *fmt_string, ...);
64
/* The address of the last thing we dumped. Stuff like dumping the string
65
* interned dictionary will dump the same string 2x in a row. This helps
68
static PyObject *_last_dumped = NULL;
77
_basic_object_size(PyObject *c_obj)
80
size = c_obj->ob_type->tp_basicsize;
81
if (PyType_HasFeature(c_obj->ob_type, Py_TPFLAGS_HAVE_GC)) {
82
size += sizeof(PyGC_Head);
89
_var_object_size(PyVarObject *c_obj)
91
Py_ssize_t num_entries;
92
num_entries = PyObject_Size((PyObject *)c_obj);
93
if (num_entries < 0) {
94
/* This object doesn't support len() */
98
return _basic_object_size((PyObject *)c_obj)
99
+ num_entries * c_obj->ob_type->tp_itemsize;
103
_size_of_from__sizeof__(PyObject *c_obj)
105
PyObject *size_obj = NULL;
106
Py_ssize_t size = -1;
108
if (PyType_CheckExact(c_obj)) {
109
// Types themselves may have a __sizeof__ attribute, but it is the
110
// unbound method, which takes an instance
113
size_obj = PyObject_CallMethod(c_obj, "__sizeof__", NULL);
114
if (size_obj == NULL) {
115
// Not sure what happened, but this won't work, it could be a simple
116
// attribute error, or it could be something else.
120
size = PyInt_AsSsize_t(size_obj);
122
// Probably an error occurred, we don't know for sure, but we might as
123
// well just claim that we don't know the size. We *could* check
124
// PyErr_Occurred(), but if we are just clearing it anyway...
128
// There is one trick left. Namely, __sizeof__ doesn't seem to include the
129
// GC overhead, so let's add that back in
130
if (PyType_HasFeature(c_obj->ob_type, Py_TPFLAGS_HAVE_GC)) {
131
size += sizeof(PyGC_Head);
138
_size_of_list(PyListObject *c_obj)
141
size = _basic_object_size((PyObject *)c_obj);
142
size += sizeof(PyObject*) * c_obj->allocated;
148
_size_of_set(PySetObject *c_obj)
151
size = _basic_object_size((PyObject *)c_obj);
152
if (c_obj->table != c_obj->smalltable) {
153
size += sizeof(setentry) * (c_obj->mask + 1);
160
_size_of_dict(PyDictObject *c_obj)
163
size = _basic_object_size((PyObject *)c_obj);
164
if (c_obj->ma_table != c_obj->ma_smalltable) {
165
size += sizeof(PyDictEntry) * (c_obj->ma_mask + 1);
172
_size_of_unicode(PyUnicodeObject *c_obj)
175
size = _basic_object_size((PyObject *)c_obj);
176
size += Py_UNICODE_SIZE * c_obj->length;
182
_size_of(PyObject *c_obj)
186
if PyList_Check(c_obj) {
187
return _size_of_list((PyListObject *)c_obj);
188
} else if PyAnySet_Check(c_obj) {
189
return _size_of_set((PySetObject *)c_obj);
190
} else if PyDict_Check(c_obj) {
191
return _size_of_dict((PyDictObject *)c_obj);
192
} else if PyUnicode_Check(c_obj) {
193
return _size_of_unicode((PyUnicodeObject *)c_obj);
196
size = _size_of_from__sizeof__(c_obj);
201
if (c_obj->ob_type->tp_itemsize != 0) {
202
// Variable length object with inline storage
203
// total size is tp_itemsize * ob_size
204
return _var_object_size((PyVarObject *)c_obj);
206
return _basic_object_size(c_obj);
211
_write_to_ref_info(struct ref_info *info, const char *fmt_string, ...)
213
char temp_buf[1024] = {0};
217
va_start(args, fmt_string);
218
n_bytes = vsnprintf(temp_buf, 1024, fmt_string, args);
220
info->write(info->data, temp_buf, n_bytes);
225
_write_static_to_info(struct ref_info *info, const char data[])
227
/* These are static strings, do we need to do strlen() each time? */
228
info->write(info->data, data, strlen(data));
232
_dump_reference(PyObject *c_obj, void* val)
234
struct ref_info *info;
236
char buf[24] = {0}; /* it seems that 64-bit long fits in 20 decimals */
238
info = (struct ref_info*)val;
239
/* TODO: This is casting a pointer into an unsigned long, which we assume
240
* is 'long enough'. We probably should really be using uintptr_t or
241
* something like that.
245
n_bytes = snprintf(buf, 24, "%lu", (unsigned long)c_obj);
247
n_bytes = snprintf(buf, 24, ", %lu", (unsigned long)c_obj);
249
info->write(info->data, buf, n_bytes);
255
_dump_child(PyObject *c_obj, void *val)
257
struct ref_info *info;
258
info = (struct ref_info *)val;
259
// The caller has asked us to dump self, but no recursive children
260
_dump_object_to_ref_info(info, c_obj, 0);
266
_dump_if_no_traverse(PyObject *c_obj, void *val)
268
struct ref_info *info;
269
info = (struct ref_info *)val;
270
/* Objects without traverse are simple things without refs, and built-in
271
* types have a traverse, but they won't be part of gc.get_objects().
273
if (Py_TYPE(c_obj)->tp_traverse == NULL
274
|| (PyType_Check(c_obj)
275
&& !PyType_HasFeature((PyTypeObject*)c_obj, Py_TPFLAGS_HEAPTYPE)))
277
_dump_object_to_ref_info(info, c_obj, 0);
278
} else if (!PyType_HasFeature(Py_TYPE(c_obj), Py_TPFLAGS_HAVE_GC)) {
279
/* This object is not considered part of the garbage collector, even
280
* if it does [not] have a tp_traverse function.
282
_dump_object_to_ref_info(info, c_obj, 1);
289
_dump_json_c_string(struct ref_info *info, const char *buf, Py_ssize_t len)
293
char out_buf[1024] = {0};
295
// Never try to dump more than 100 chars
303
end = out_buf + 1024;
305
for (i = 0; i < len; ++i) {
307
if (c <= 0x1f || c > 0x7e) { // use the unicode escape sequence
308
ptr += snprintf(ptr, end-ptr, "\\u00%02x",
309
((unsigned short)c & 0xFF));
310
} else if (c == '\\' || c == '/' || c == '"') {
321
info->write(info->data, out_buf, ptr-out_buf);
325
_dump_string(struct ref_info *info, PyObject *c_obj)
330
str_buf = PyString_AS_STRING(c_obj);
331
str_size = PyString_GET_SIZE(c_obj);
333
_dump_json_c_string(info, str_buf, str_size);
338
_dump_unicode(struct ref_info *info, PyObject *c_obj)
340
// TODO: consider writing to a small memory buffer, before writing to disk
342
Py_UNICODE *uni_buf, c;
344
char out_buf[1024] = {0}, *ptr, *end;
346
uni_buf = PyUnicode_AS_UNICODE(c_obj);
347
uni_size = PyUnicode_GET_SIZE(c_obj);
349
// Never try to dump more than this many chars
350
if (uni_size > 100) {
354
end = out_buf + 1024;
356
for (i = 0; i < uni_size; ++i) {
358
if (c <= 0x1f || c > 0x7e) {
359
ptr += snprintf(ptr, end-ptr, "\\u%04x",
360
((unsigned short)c & 0xFFFF));
361
} else if (c == '\\' || c == '/' || c == '"') {
370
/* We should fail here */
372
info->write(info->data, out_buf, ptr-out_buf);
377
_dump_object_info(write_callback write, void *callee_data,
378
PyObject *c_obj, PyObject *nodump, int recurse)
380
struct ref_info info;
383
info.data = callee_data;
385
info.nodump = nodump;
386
if (nodump != NULL) {
389
_dump_object_to_ref_info(&info, c_obj, recurse);
390
if (info.nodump != NULL) {
396
_dump_object_to_ref_info(struct ref_info *info, PyObject *c_obj, int recurse)
401
if (info->nodump != NULL &&
402
info->nodump != Py_None
403
&& PyAnySet_Check(info->nodump))
405
if (c_obj == info->nodump) {
406
/* Don't dump the 'nodump' set. */
409
/* note this isn't exactly what we want. It checks for equality, not
410
* the exact object. However, for what it is used for, it is often
413
retval = PySet_Contains(info->nodump, c_obj);
415
/* This object is part of the no-dump set, don't dump the object */
417
} else if (retval == -1) {
418
/* An error was raised, but we don't care, ignore it */
423
if (c_obj == _last_dumped) {
424
/* We just dumped this object, no need to do it again. */
427
_last_dumped = c_obj;
428
size = _size_of(c_obj);
429
_write_to_ref_info(info, "{\"address\": %lu, \"type\": ",
430
(unsigned long)c_obj);
431
_dump_json_c_string(info, c_obj->ob_type->tp_name, -1);
432
_write_to_ref_info(info, ", \"size\": " SSIZET_FMT, _size_of(c_obj));
434
if (PyModule_Check(c_obj)) {
435
_write_static_to_info(info, ", \"name\": ");
436
_dump_json_c_string(info, PyModule_GetName(c_obj), -1);
437
} else if (PyFunction_Check(c_obj)) {
438
_write_static_to_info(info, ", \"name\": ");
439
_dump_string(info, ((PyFunctionObject *)c_obj)->func_name);
440
} else if (PyType_Check(c_obj)) {
441
_write_static_to_info(info, ", \"name\": ");
442
_dump_json_c_string(info, ((PyTypeObject *)c_obj)->tp_name, -1);
443
} else if (PyClass_Check(c_obj)) {
444
/* Old style class */
445
_write_static_to_info(info, ", \"name\": ");
446
_dump_string(info, ((PyClassObject *)c_obj)->cl_name);
448
if (PyString_Check(c_obj)) {
449
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyString_GET_SIZE(c_obj));
450
_write_static_to_info(info, ", \"value\": ");
451
_dump_string(info, c_obj);
452
} else if (PyUnicode_Check(c_obj)) {
453
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyUnicode_GET_SIZE(c_obj));
454
_write_static_to_info(info, ", \"value\": ");
455
_dump_unicode(info, c_obj);
456
} else if (PyBool_Check(c_obj)) {
457
if (c_obj == Py_True) {
458
_write_static_to_info(info, ", \"value\": \"True\"");
459
} else if (c_obj == Py_False) {
460
_write_static_to_info(info, ", \"value\": \"False\"");
462
_write_to_ref_info(info, ", \"value\": %ld", PyInt_AS_LONG(c_obj));
464
} else if (PyInt_CheckExact(c_obj)) {
465
_write_to_ref_info(info, ", \"value\": %ld", PyInt_AS_LONG(c_obj));
466
} else if (PyTuple_Check(c_obj)) {
467
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyTuple_GET_SIZE(c_obj));
468
} else if (PyList_Check(c_obj)) {
469
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyList_GET_SIZE(c_obj));
470
} else if (PyAnySet_Check(c_obj)) {
471
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PySet_GET_SIZE(c_obj));
472
} else if (PyDict_Check(c_obj)) {
473
_write_to_ref_info(info, ", \"len\": " SSIZET_FMT, PyDict_Size(c_obj));
475
_write_static_to_info(info, ", \"refs\": [");
476
if (Py_TYPE(c_obj)->tp_traverse != NULL) {
478
Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_reference, info);
480
_write_static_to_info(info, "]}\n");
481
if (Py_TYPE(c_obj)->tp_traverse != NULL && recurse != 0) {
482
if (recurse == 2) { /* Always dump one layer deeper */
483
Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_child, info);
484
} else if (recurse == 1) {
485
/* strings and such aren't in gc.get_objects, so we need to dump
486
* them when they are referenced.
488
Py_TYPE(c_obj)->tp_traverse(c_obj, _dump_if_no_traverse, info);
494
_append_object(PyObject *visiting, void* data)
497
lst = (PyObject *)data;
501
if (PyList_Append(data, visiting) == -1) {
507
* Return a PyList of all objects referenced via tp_traverse.
509
PyObject *_get_referents(PyObject *c_obj)
517
if (Py_TYPE(c_obj)->tp_traverse != NULL) {
518
Py_TYPE(c_obj)->tp_traverse(c_obj, _append_object, lst);