142
142
tree.xmlFreeDoc(c_doc)
145
cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf,
146
_BaseParser parser, attrib, nsmap, extra_attrs) except -1:
147
u"""Initialise a new Element object.
149
This is used when users instantiate a Python Element class
150
directly, without it being mapped to an existing XML node.
156
_htmlTagValidOrRaise(name_utf)
157
c_doc = _newHTMLDoc()
159
_tagValidOrRaise(name_utf)
161
c_node = _createElement(c_doc, name_utf)
163
if c_doc is not NULL:
164
tree.xmlFreeDoc(c_doc)
165
return python.PyErr_NoMemory()
166
tree.xmlDocSetRootElement(c_doc, c_node)
167
doc = _documentFactory(c_doc, parser)
168
# add namespaces to node if necessary
169
_initNodeNamespaces(c_node, doc, ns_utf, nsmap)
170
_initNodeAttributes(c_node, doc, attrib, extra_attrs)
171
_registerProxy(element, doc, c_node)
145
175
cdef _Element _makeSubElement(_Element parent, tag, text, tail,
146
176
attrib, nsmap, extra_attrs):
147
177
u"""Create a new child element and initialize text content, namespaces and
264
295
cdef _ns_node_ref* c_nsref_ptr
265
296
cdef xmlNs* c_nsdef
266
297
cdef xmlNode* c_node
267
cdef cstd.size_t c_ns_list_size
268
cdef cstd.size_t c_ns_list_len
298
cdef size_t c_ns_list_size
299
cdef size_t c_ns_list_len
272
303
c_ns_list_size = 0
358
389
cstd.free(c_ns_list)
392
cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1:
393
cdef xmlNode* c_parent
394
cdef xmlNode* c_child
395
if c_node.children is NULL:
396
tree.xmlUnlinkNode(c_node)
399
c_parent = c_node.parent
400
# fix parent links of children
401
c_child = c_node.children
402
while c_child is not NULL:
403
c_child.parent = c_parent
404
c_child = c_child.next
406
# fix namespace references of children if their parent's namespace
407
# declarations get lost
408
if c_node.nsDef is not NULL:
409
c_child = c_node.children
410
while c_child is not NULL:
411
moveNodeToDocument(doc, doc._c_doc, c_child)
412
c_child = c_child.next
414
# fix sibling links to/from child slice
415
if c_node.prev is NULL:
416
c_parent.children = c_node.children
418
c_node.prev.next = c_node.children
419
c_node.children.prev = c_node.prev
420
if c_node.next is NULL:
421
c_parent.last = c_node.last
423
c_node.next.prev = c_node.last
424
c_node.last.next = c_node.next
427
c_node.children = c_node.last = NULL
428
c_node.parent = c_node.next = c_node.prev = NULL
361
431
cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
416
486
value = _utf8(value)
417
487
c_value = _cstr(value)
419
tree.xmlSetProp(element._c_node, c_tag, c_value)
421
491
c_ns = element._doc._findOrBuildNodeNs(element._c_node,
423
tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
493
tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
426
496
cdef int _delAttribute(_Element element, key) except -1:
428
497
cdef char* c_href
429
498
ns, tag = _getNsTag(key)
674
743
cdef _collectChildren(_Element element):
675
744
cdef xmlNode* c_node
745
cdef list result = []
677
746
c_node = element._c_node.children
678
747
if c_node is not NULL:
679
748
if not _isElement(c_node):
680
749
c_node = _nextElement(c_node)
681
750
while c_node is not NULL:
682
python.PyList_Append(result, _elementFactory(element._doc, c_node))
751
result.append(_elementFactory(element._doc, c_node))
683
752
c_node = _nextElement(c_node)
840
909
c_target = c_tail
843
cdef void _copyTail(xmlNode* c_tail, xmlNode* c_target):
912
cdef int _copyTail(xmlNode* c_tail, xmlNode* c_target) except -1:
844
913
cdef xmlNode* c_new_tail
845
914
# tail copying support: look for any text nodes trailing this node and
846
915
# copy it to the target node
850
919
c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
852
921
c_new_tail = tree.xmlCopyNode(c_tail, 0)
922
if c_new_tail is NULL:
923
python.PyErr_NoMemory()
853
924
tree.xmlAddNextSibling(c_target, c_new_tail)
854
925
c_target = c_new_tail
855
926
c_tail = _textNodeOrSkip(c_tail.next)
929
cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1:
931
cdef xmlNode* c_sibling = c_node
932
while c_sibling.prev != NULL and \
933
(c_sibling.prev.type == tree.XML_PI_NODE or \
934
c_sibling.prev.type == tree.XML_COMMENT_NODE):
935
c_sibling = c_sibling.prev
936
while c_sibling != c_node:
937
c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
939
python.PyErr_NoMemory()
940
tree.xmlAddPrevSibling(c_target, c_copy)
941
c_sibling = c_sibling.next
942
while c_sibling.next != NULL and \
943
(c_sibling.next.type == tree.XML_PI_NODE or \
944
c_sibling.next.type == tree.XML_COMMENT_NODE):
945
c_sibling = c_sibling.next
946
c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
948
python.PyErr_NoMemory()
949
tree.xmlAddNextSibling(c_target, c_copy)
857
951
cdef int _deleteSlice(_Document doc, xmlNode* c_node,
858
952
Py_ssize_t count, Py_ssize_t step) except -1:
932
1026
# R->L, remember right neighbour
933
1027
c_orig_neighbour = _nextElement(c_node)
1029
# We remove the original slice elements one by one. Since we hold
1030
# a Python reference to all elements that we will insert, it is
1031
# safe to let _removeNode() try (and fail) to free them even if
1032
# the element itself or one of its descendents will be reinserted.
937
1035
while c_node is not NULL and c < slicelength:
1093
cdef int isutf8py(pystring):
1191
cdef int check_string_utf8(pystring):
1192
u"""Check if a string looks like valid UTF-8 XML content. Returns 0
1193
for ASCII, 1 for UTF-8 and -1 in the case of errors, such as NULL
1194
bytes or ASCII control characters.
1095
1197
cdef char* c_end
1131
1233
return python.PyString_FromStringAndSize(s, slen)
1133
1235
cdef object _utf8(object s):
1134
if python.PyString_Check(s):
1137
u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes"
1138
elif python.PyUnicode_Check(s):
1237
if python.PyString_CheckExact(s):
1238
invalid = check_string_utf8(s)
1239
elif python.PyUnicode_CheckExact(s) or python.PyUnicode_Check(s):
1139
1240
s = python.PyUnicode_AsUTF8String(s)
1140
if isutf8py(s) == -1:
1142
u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes"
1241
invalid = check_string_utf8(s) == -1
1242
elif python.PyString_Check(s):
1243
invalid = check_string_utf8(s)
1144
1245
raise TypeError, u"Argument must be string or unicode."
1248
u"All strings must be XML compatible: Unicode or ASCII, no NULL bytes"
1147
1251
cdef bint _isFilePath(char* c_path):
1148
1252
u"simple heuristic to see if a path is a filename"
1149
1254
# test if it looks like an absolute Unix path or a Windows network path
1150
1255
if c_path[0] == c'/':
1269
1375
return tree.xmlValidateNCName(c_name, 0) == 0
1271
1377
cdef int _htmlNameIsValid(char* c_name):
1272
1379
if c_name is NULL or c_name[0] == c'\0':
1274
1381
while c_name[0] != c'\0':
1275
if c_name[0] == c'&' or \
1276
c_name[0] == c'<' or \
1277
c_name[0] == c'>' or \
1278
c_name[0] == c'/' or \
1279
c_name[0] == c'"' or \
1280
c_name[0] == c"'" or \
1281
c_name[0] == c'\x09' or \
1282
c_name[0] == c'\x0A' or \
1283
c_name[0] == c'\x0B' or \
1284
c_name[0] == c'\x0C' or \
1285
c_name[0] == c'\x20':
1383
if c in (c'&', c'<', c'>', c'/', c'"', c"'",
1384
c'\t', c'\n', c'\x0B', c'\x0C', c'\r', c' '):
1287
1386
c_name = c_name + 1