1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#!/usr/bin/python3
import string
import apt_pkg
import sys
import gettext, locale
import os
from utils import parse_descr, utf8
def to_msg(str):
" convert a string to a gettext msgid/msgstr str "
out = []
str = escape(str)
if "\n" in str:
for raw in str.split("\n"):
if raw:
out.append('"%s\\n"' % raw)
return "\n".join(out)
else:
return '"%s"' % str
def escape(str):
#str = utf8(str)
str = str.replace("\\", "\\\\")
str = str.replace("\"", "\\\"")
return str
def lookup(parser, tag, value):
" jump to a given tag with the given value in the TagFile "
# FIXME: this sucks
parser.jump(0)
while parser.step():
v = parser.section.get(tag)
if v == value:
#print "tag found: %s" % value
return
#print "lookup failed: %s " % value
def descr_to_po(pkg, descr, descr_trans, all_chunks):
""" this is called if we found a valid translation """
(short_descr, long_descr_chunks) = parse_descr(descr)
# check if we have seen a chunk before and if so, ignore
# it
if short_descr == "":
sys.stderr.write("Invalid short_descr for: %s\n" % pkg)
return ""
if short_descr in all_chunks:
return ""
else:
all_chunks.add(short_descr)
for chunk in long_descr_chunks:
if chunk == "":
sys.stderr.write("Invalid chunk for: %s\n" % pkg)
return ""
if chunk in all_chunks:
return ""
else:
all_chunks.add(chunk)
# now output a po msgid/msgstr
(short_descr_trans, long_descr_trans) = parse_descr(descr_trans)
# translate chunk for chunk assuming that the original translator
# didn't mess that up
res = "#. Description\n"
res += 'msgid %s\n' % to_msg(short_descr)
res += 'msgstr %s\n\n' % to_msg(short_descr_trans)
# number of chunks is unequal
if len(long_descr_chunks) != len(long_descr_trans):
sys.stderr.write("BAD CHUNKS LEN: %s\n" % pkg)
#sys.stderr.write("orig: %s\n" % long_descr_chunks)
#sys.stderr.write("trans: %s\n" % long_descr_trans)
return ""
# write out the translation
for i in range(len(long_descr_chunks)):
msgid = to_msg(long_descr_chunks[i])
msgstr = to_msg(long_descr_trans[i])
# *sigh* msgfmt is unhappy if both strings have a different
# pattern of \n at the end, fix it here
if msgid.endswith('\\n"') and not msgstr.endswith('\\n"'):
sys.stderr.write("different number of \\n for '%s', correcting\n" % pkg)
msgstr = msgstr[:-1] + '\\n"'
elif msgstr.endswith('\\n"') and not msgid.endswith('\\n"'):
sys.stderr.write("different number of \\n for '%s', correcting\n" % pkg)
msgstr = msgstr[:-3] + '"'
# write out the record
res += "#. Description\n"
res += 'msgid %s\n' % msgid
res += 'msgstr %s\n\n' % msgstr
return res
def translation2po(inf, out, transf, lang):
# header
out.write("""
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"Report-Msgid-Bugs-To: \\n"
"POT-Creation-Date: 2005-06-15 10:23+0200\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
\n
""")
all_chunks = set()
parser = apt_pkg.TagFile(inf)
parser_trans = apt_pkg.TagFile(open(transf))
while parser.step():
pkg = parser.section.get("Package")
descr = parser.section.get("Description-en")
# jump to the right bit in the file
lookup(parser_trans, "Package", pkg)
descr_trans = parser_trans.section.get("Description-%s" % lang)
# we have a descr for that pkg
if descr != None and descr_trans != None:
md5_trans = parser_trans.section.get("Description-md5")
md5 = str(apt_pkg.md5sum(descr.encode()+b"\n"))
#print "'%s'" % descr
#print "'%s'" % descr_trans
#print "md5: %s -> trans: %s " % (md5, md5_trans)
if md5 == md5_trans:
res = descr_to_po(pkg, descr, descr_trans, all_chunks)
out.write(res)
if __name__ == "__main__":
#print escape("\\")
#sys.exit(1)
if len(sys.argv) < 4:
print("%s Packages Translation lang" % sys.argv[0])
print("need a Packages file and a Translation-$LANG file ")
print("and a lang (e.g. Packages Translation-de de) ")
print(" it will generate a po file from that")
sys.exit(1)
inf = open(sys.argv[1])
out = sys.stdout
transf = sys.argv[2]
lang = sys.argv[3]
translation2po(inf, out, transf, lang)
|