~vorlon/apt-ddtp-tools/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/python3

import string
import apt_pkg
import sys
import gettext, locale
import os

from utils import parse_descr, utf8

def to_msg(str):
	" convert a string to a gettext msgid/msgstr str "
	out = []
	str = escape(str)
	if "\n" in str:
		for raw in str.split("\n"):
			if raw:
				out.append('"%s\\n"' % raw)
		return "\n".join(out)
	else:
		return '"%s"' % str

def escape(str):
	#str = utf8(str)
	str = str.replace("\\", "\\\\")
	str = str.replace("\"", "\\\"")
	return str


def lookup(parser, tag, value):
	" jump to a given tag with the given value in the TagFile "
	# FIXME: this sucks
	parser.jump(0)
	while parser.step():
		v = parser.section.get(tag)
		if v == value:
			#print "tag found: %s" % value
			return
	#print "lookup failed: %s " % value


def descr_to_po(pkg, descr, descr_trans, all_chunks):
	""" this is called if we found a valid translation """
	(short_descr, long_descr_chunks) = parse_descr(descr)

	# check if we have seen a chunk before and if so, ignore
	# it
	if short_descr == "":
		sys.stderr.write("Invalid short_descr for: %s\n" % pkg)
		return ""
	if short_descr in all_chunks:
		return ""
	else:
		all_chunks.add(short_descr)
	for chunk in long_descr_chunks:
		if chunk == "":
			sys.stderr.write("Invalid chunk for: %s\n" % pkg)
			return ""
		if chunk in all_chunks:
			return ""
		else:
			all_chunks.add(chunk)

	# now output a po msgid/msgstr
	(short_descr_trans, long_descr_trans) = parse_descr(descr_trans)
	# translate chunk for chunk assuming that the original translator
	# didn't mess that up
	res = "#. Description\n"
	res += 'msgid %s\n' % to_msg(short_descr)
	res += 'msgstr %s\n\n' % to_msg(short_descr_trans)
	# number of chunks is unequal
	if len(long_descr_chunks) != len(long_descr_trans):
		sys.stderr.write("BAD CHUNKS LEN: %s\n" % pkg)
		#sys.stderr.write("orig: %s\n" % long_descr_chunks)
		#sys.stderr.write("trans: %s\n" % long_descr_trans)
		return ""
	# write out the translation
	for i in range(len(long_descr_chunks)):
		msgid = to_msg(long_descr_chunks[i])
		msgstr = to_msg(long_descr_trans[i])
		# *sigh* msgfmt is unhappy if both strings have a different
		#        pattern of \n at the end, fix it here
		if msgid.endswith('\\n"') and not msgstr.endswith('\\n"'):
			sys.stderr.write("different number of \\n for '%s', correcting\n" % pkg)
			msgstr = msgstr[:-1] + '\\n"'
		elif msgstr.endswith('\\n"') and not msgid.endswith('\\n"'):
			sys.stderr.write("different number of \\n for '%s', correcting\n" % pkg)
			msgstr = msgstr[:-3] + '"'
		# write out the record
		res += "#. Description\n"
		res += 'msgid %s\n' % msgid
		res += 'msgstr %s\n\n' % msgstr
	return res
		


def translation2po(inf, out, transf, lang):
	# header
	out.write("""
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"Report-Msgid-Bugs-To: \\n"
"POT-Creation-Date: 2005-06-15 10:23+0200\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
\n
""")
	all_chunks = set()
	parser = apt_pkg.TagFile(inf)
	parser_trans = apt_pkg.TagFile(open(transf))
	while parser.step():
		pkg = parser.section.get("Package")
		descr = parser.section.get("Description-en")
		# jump to the right bit in the file
		lookup(parser_trans, "Package", pkg)
		descr_trans = parser_trans.section.get("Description-%s" % lang)
		# we have a descr for that pkg
		if descr != None and descr_trans != None:
			md5_trans = parser_trans.section.get("Description-md5")
			md5 = str(apt_pkg.md5sum(descr.encode()+b"\n"))
			#print "'%s'" % descr
			#print "'%s'" % descr_trans
			#print "md5: %s -> trans: %s " % (md5, md5_trans)
			if md5 == md5_trans:
				res = descr_to_po(pkg, descr, descr_trans, all_chunks)
				out.write(res)


if __name__ == "__main__":
	#print escape("\\")
	#sys.exit(1)

	if len(sys.argv) < 4:
		print("%s Packages Translation lang" % sys.argv[0])
		print("need a Packages file and a Translation-$LANG file ")
		print("and a lang (e.g. Packages Translation-de de) ")
		print(" it will generate a po file from that")
		sys.exit(1)

	inf = open(sys.argv[1])
	out = sys.stdout
	transf = sys.argv[2]
	lang = sys.argv[3]

	translation2po(inf, out, transf, lang)