1
by Wolter Hellmund
Initial commit |
1 |
#!/usr/bin/env python
|
2 |
||
3 |
import sys |
|
4 |
import re |
|
5 |
||
6 |
lc_identification = re.compile("^LC_IDENTIFICATION") |
|
7 |
lc_end_identification = re.compile("^END\s+?LC_IDENTIFICATION") |
|
8 |
lc_all = re.compile("^LC_") |
|
9 |
lc_end_all= re.compile("^END\s+?LC_") |
|
10 |
blank = re.compile("^\s*$") |
|
11 |
string_segment = re.compile('"\;"|"') |
|
12 |
||
13 |
comment = re.compile("^(%|#)") |
|
14 |
copy_entity = re.compile("^copy") |
|
15 |
||
16 |
isLC_IDENTIFICATION=False |
|
17 |
process=False |
|
18 |
||
19 |
for line in sys.stdin: |
|
20 |
if comment.search(line): |
|
21 |
print line, |
|
22 |
continue
|
|
23 |
if lc_identification.search(line): |
|
24 |
isLC_IDENTIFICATION=True |
|
25 |
print line, |
|
26 |
continue
|
|
27 |
if lc_end_identification.search(line): |
|
28 |
isLC_IDENTIFICATION=False |
|
29 |
print line, |
|
30 |
continue
|
|
31 |
if isLC_IDENTIFICATION: |
|
32 |
print line, |
|
33 |
continue
|
|
34 |
if lc_all.search(line): |
|
35 |
process=True |
|
36 |
print line, |
|
37 |
continue
|
|
38 |
if lc_end_all.search(line): |
|
39 |
process=False |
|
40 |
print line, |
|
41 |
continue
|
|
42 |
if copy_entity.search(line): |
|
43 |
print line, |
|
44 |
continue
|
|
45 |
if process and not blank.search(line) and not re.match("^\w+?\s*\d*(;\d+)*$", line): |
|
46 |
# Phew now we can encode
|
|
47 |
for string in string_segment.split(re.sub('^[^"]*?"', "", re.sub('"[^"]*?$', "", re.sub('/$|";/$', '"', line)))): |
|
48 |
string = re.sub('\n', "", string) |
|
49 |
newstring = "" |
|
50 |
if not blank.search(string): |
|
51 |
newword = string.decode('utf-8') |
|
52 |
for char in re.sub("<U[\dA-F]{4,4}>", "", newword): |
|
53 |
encode = "<U%04X>" % (ord(char)) |
|
54 |
newword = re.sub(char, encode, newword) |
|
55 |
newstring = newstring + newword.encode('utf-8') |
|
56 |
line = re.sub(string, newstring, line) |
|
57 |
#print "'%s' %s\n%s\n" % (string, newstring, line)
|
|
58 |
print line, |
|
59 |
else: |
|
60 |
print line, |