504
def visit_text(self, node):
505
self.text.append(node.data)
504
507
def visit_element(self, node):
505
508
name = node.localName
506
509
if name is None: # not sure this can happen here (DOM comment node), but just for the case
509
func = getattr(self, "process_" + name, None)
511
func = getattr(self, "process_%s" % name, None)
512
elif name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6',):
513
self.process_heading(node)
514
elif name in ('ol', 'ul',):
515
self.process_list(node)
517
515
self.process_inline(node)
519
def process_br(self, node):
520
self.text.append('\n') # without this, std multi-line text below some heading misses a whitespace
521
# when it gets merged to float text, like word word wordword word word
523
517
def visit_node_list_element_only(self, nodelist):
524
518
for node in nodelist:
525
519
if node.nodeType == Node.ELEMENT_NODE:
534
528
result.extend(self.node_list_text_only(node.childNodes))
535
529
return "".join(result)
537
def visit_text(self, node):
538
self.text.append(node.data)
540
def process_dl(self, node):
542
indent = " " * self.depth
531
def process_page(self, node):
543
532
for i in node.childNodes:
544
533
if i.nodeType == Node.ELEMENT_NODE:
545
if i.localName == 'dt':
546
self.text.append(indent)
547
text = self.node_list_text_only(i.childNodes)
548
self.text.append(text.replace("\n", " "))
549
elif i.localName == 'dd':
550
self.text.append(":: ")
551
self.process_list_item(i)
553
raise ConvertError("Illegal list element %s" % i.localName)
555
self.text.append("\n")
534
self.visit_element(i)
535
elif i.nodeType == Node.TEXT_NODE: # if this is missing, all std text under a headline is dropped!
536
txt = i.data.strip() # IMPORTANT: don't leave this unstripped or there will be wrong blanks
538
self.text.append(txt)
539
#we use <pre class="comment"> now, so this is currently unused:
540
#elif i.nodeType == Node.COMMENT_NODE:
541
# self.text.append(i.data)
542
# self.text.append("\n")
544
def process_br(self, node):
545
self.text.append('\n') # without this, std multi-line text below some heading misses a whitespace
546
# when it gets merged to float text, like word word wordword word word
558
548
def process_heading(self, node):
559
549
text = self.node_list_text_only(node.childNodes).strip()
563
553
self.text.append(self.new_line)
564
554
self.text.append("%s %s %s" % (hstr, text.replace("\n", " "), hstr))
565
555
self.text.append(self.new_line)
557
process_h1 = process_heading
558
process_h2 = process_heading
559
process_h3 = process_heading
560
process_h4 = process_heading
561
process_h5 = process_heading
562
process_h6 = process_heading
567
564
def _get_list_item_markup(self, list, listitem):
568
markup = " " * self.depth
570
if list.localName == 'ol':
566
#indent = str(self.depth) * self.depth # nice for debugging :)
567
indent = " " * self.depth
569
name = list.localName
571
class_ = listitem.getAttribute("class")
571
574
if list.hasAttribute("type"):
572
575
type = list.getAttribute("type")
575
markup = "%s%s. " % (markup, type)
578
markup = "%s. " % type
577
580
class_ = listitem.getAttribute("class")
578
581
if class_ == "gap":
579
markup = "\n" + markup
580
583
style = listitem.getAttribute("style")
581
if not re.match(u"list-style-type:\s*none", style, re.I):
584
if re.match(u"list-style-type:\s*none", style, re.I):
591
raise ConvertError("Illegal list type %s" % name)
592
return before, indent, markup
594
def process_dl(self, node):
596
markup = ":: " # can there be a dl dd without dt?
597
for i in node.childNodes:
598
if i.nodeType == Node.ELEMENT_NODE:
601
before, indent, markup = self._get_list_item_markup(node, i)
602
self.text.append(before+indent)
603
text = self.node_list_text_only(i.childNodes)
604
self.text.append(text.replace("\n", " "))
606
self.text.append(markup)
607
self.process_list_item(i, indent)
609
raise ConvertError("Illegal list element %s" % i.localName)
612
self.text.append("\n")
585
614
def process_list(self, node):
588
617
if i.nodeType == Node.ELEMENT_NODE:
589
618
name = i.localName
591
self.text.append(self._get_list_item_markup(node, i))
592
self.process_list_item(i)
620
before, indent, markup = self._get_list_item_markup(node, i)
621
self.text.append(before+indent+markup)
622
self.process_list_item(i, indent)
593
623
elif name in ('ol', 'ul',):
594
624
self.process_list(i)
595
625
elif name == 'dl':
596
626
self.process_dl(i)
598
628
raise ConvertError("Illegal list element %s" % i.localName)
600
631
self.text.append("\n")
603
def process_list_item(self, node):
633
process_ul = process_list
634
process_ol = process_list
636
def process_list_item(self, node, indent):
605
639
for i in node.childNodes:
606
640
name = i.localName
643
self.text.append(indent)
608
644
self.process_paragraph_item(i)
609
645
self.text.append("\n")
611
647
elif name == 'pre':
649
self.text.append(indent)
612
650
self.process_preformatted_item(i)
614
652
elif name in ('ol', 'ul',):
618
656
self.process_dl(i)
620
658
elif name == 'table':
660
self.text.append(indent)
621
661
self.process_table(i)
624
664
# self.process_inline(i)
627
668
self.process_paragraph_item(node)
628
669
self.text.append("\n")
630
671
def process_blockquote(self, node):
672
# XXX this does not really work. e.g.:
632
677
for i in node.childNodes:
633
678
if i.nodeType == Node.ELEMENT_NODE:
652
697
self.visit_node_list_element_only(i.childNodes)
653
698
elif name == 'blockquote':
654
699
self.process_blockquote(i)
655
elif name in ('br',):
656
703
self.process_br(i)
658
705
raise ConvertError("process_blockquote: Don't support %s element" % name)
661
def process_page(self, node):
662
for i in node.childNodes:
663
if i.nodeType == Node.ELEMENT_NODE:
664
self.visit_element(i)
665
elif i.nodeType == Node.TEXT_NODE: # if this is missing, all std text under a headline is dropped!
668
self.text.append(txt)
669
#we use <pre class="comment"> now, so this is currently unused:
670
#elif i.nodeType == Node.COMMENT_NODE:
671
# self.text.append(i.data)
672
# self.text.append("\n")
674
708
def process_inline(self, node):
675
709
if node.nodeType == Node.TEXT_NODE:
676
710
self.text.append(node.data.strip('\n'))
679
713
name = node.localName # can be None for DOM Comment nodes
682
func = getattr(self, "process_" + name, None)
717
if name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6',): # headers are not allowed here (e.g. inside a ul li),
718
text = self.node_list_text_only(node.childNodes).strip() # but can be inserted via the editor
719
self.text.append(text) # so we just drop the header markup and keep the text
722
func = getattr(self, "process_%s" % name, None)
707
747
elif name == 'font':
708
748
command = "" # just throw away font settings
709
elif name in ('h1', 'h2', 'h3', 'h4', 'h5', 'h6',): # headers are not allowed here (e.g. inside a ul li),
710
text = self.node_list_text_only(node.childNodes).strip() # but can be inserted via the editor
711
self.text.append(text) # so we just drop the header markup and keep the text
714
750
raise ConvertError("process_inline: Don't support %s element" % name)
731
767
def process_div(self, node):
732
768
# ignore div tags - just descend
733
769
for i in node.childNodes:
734
self.process_inline(i)
770
self.visit_element(i)
736
772
def process_tt(self, node):
737
773
text = self.node_list_text_only(node.childNodes).replace("\n", " ")
1130
1166
pass #print name, data, filename, alt
1131
1167
raise ConvertError("Unknown smiley icon '%s'" % filename)
1133
elif src and src.startswith("http://") and wikiutil.isPicture(src):
1169
elif src and src.startswith("http:") and wikiutil.isPicture(src):
1134
1170
self.text.extend([self.white_space, src, self.white_space])
1136
1172
raise ConvertError("Strange image src: '%s'" % src)