~roeldeconinck/mnemosyne-proj/SMImporter

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 31 22:48:09 2011

@author: RDC
"""

import xml.etree.cElementTree as ElementTree
from xml.etree import ElementTree as OldElementTree
import copy

tree=ElementTree.parse(r'C:\Workspace\Mnemosyne\trunk\import\mnemosyne\tests\files\VeryShort.xml')
#tree=ElementTree.parse(r'C:\Workspace\Mnemosyne\trunk\import\mnemosyne\tests\files\Shortestt.xml')

root = tree.getroot()

def show_element(el, maximum=20):
    """show all tags and corresponding texts of an element
    
    maximum specifies the max number of tags to be showed (default 20)
    """

    length = len(el)
    for element in list(el)[:min(length, 20)]:
        # show subelements
        print 'tag = %s, text = %s' %(element.tag, element.text)
    if len(el.items()) > 0:
        # There are items too
        print 'The items are', el.items()
    print '\n'     

def split(el, topics=[], items=[], cat=[]):
    """Split an element in a list with topics and a list with items"""
    
    # we make a copy of the current tags to use them in the iterative calls
    topics=topics
    items=items
    if el.findtext('Type') == 'Topic':
        # Element is a Topic
        topics.append(el)
        # Keep the category as a tag
        cat.append(el.findtext('Title'))
        cat_prev = copy.copy(cat)
        # check for all subelements
        for e in list(el):
            split(e, topics, items, cat_prev)
            
    elif el.findtext('Type') == 'Item':
        items.append({'element':el, 'categories':cat})
     
    # no else condition required: the loop will continue 

    return topics, items
    

selements = list(tree.getiterator(tag='SuperMemoElement'))
for e in selements:
    show_element(e)
    
s=tree.find('SuperMemoElement')
t,i = split(s, [], [], [])