1
# DebFile: a Python representation of Debian .deb binary packages.
2
# Copyright (C) 2007 Stefano Zacchiroli <zack@debian.org>
3
# Copyright (C) 2007 Filippo Giunchedi <filippo@debian.org>
5
# This program is free software: you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation, either version 3 of the License, or
8
# (at your option) any later version.
10
# This program is distributed in the hope that it will be useful, but
11
# WITHOUT ANY WARRANTY; without even the implied warranty of
12
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
# General Public License for more details.
15
# You should have received a copy of the GNU General Public License
16
# along with this program. If not, see <http://www.gnu.org/licenses/>.
23
from arfile import ArFile, ArError
24
from changelog import Changelog
25
from deb822 import Deb822
27
DATA_PART = 'data.tar.gz'
28
CTRL_PART = 'control.tar.gz'
29
INFO_PART = 'debian-binary'
30
MAINT_SCRIPTS = ['preinst', 'postinst', 'prerm', 'postrm', 'config']
32
CONTROL_FILE = 'control'
33
CHANGELOG_NATIVE = 'usr/share/doc/%s/changelog.gz' # with package stem
34
CHANGELOG_DEBIAN = 'usr/share/doc/%s/changelog.Debian.gz'
38
class DebError(ArError):
42
class DebPart(object):
43
"""'Part' of a .deb binary package.
45
A .deb package is considered as made of 2 parts: a 'data' part
46
(corresponding to the 'data.tar.gz' archive embedded in a .deb) and a
47
'control' part (the 'control.tar.gz' archive). Each of them is represented
48
by an instance of this class.
50
When referring to file members of the underlying .tar.gz archive, file
51
names can be specified in one of 3 formats "file", "./file", "/file". In
52
all cases the file is considered relative to the root of the archive. For
53
the control part the preferred mechanism is the first one (as in
54
deb.control.get_content('control') ); for the data part the preferred
55
mechanism is the third one (as in deb.data.get_file('/etc/vim/vimrc') ).
58
def __init__(self, member):
59
self.__member = member # arfile.ArMember file member
63
"""Return a TarFile object corresponding to this part of a .deb
66
if self.__tgz is None:
67
gz = gzip.GzipFile(fileobj=self.__member, mode='r')
68
self.__tgz = tarfile.TarFile(fileobj=gz, mode='r')
72
def __normalize_member(fname):
73
""" try (not so hard) to obtain a member file name in a form relative
74
to the .tar.gz root and with no heading '.' """
76
if fname.startswith('./'):
78
elif fname.startswith('/'):
82
# XXX in some of the following methods, compatibility among >= 2.5 and <<
83
# 2.5 python versions had to be taken into account. TarFile << 2.5 indeed
84
# was buggied and returned member file names with an heading './' only for
85
# the *first* file member. TarFile >= 2.5 fixed this and has the heading
86
# './' for all file members.
88
def has_file(self, fname):
89
"""Check if this part contains a given file name."""
91
fname = DebPart.__normalize_member(fname)
92
names = self.tgz().getnames()
93
return (('./' + fname in names) \
94
or (fname in names)) # XXX python << 2.5 TarFile compatibility
96
def get_file(self, fname):
97
"""Return a file object corresponding to a given file name."""
99
fname = DebPart.__normalize_member(fname)
101
return (self.tgz().extractfile('./' + fname))
102
except KeyError: # XXX python << 2.5 TarFile compatibility
103
return (self.tgz().extractfile(fname))
105
def get_content(self, fname):
106
"""Return the string content of a given file, or None (e.g. for
109
f = self.get_file(fname)
111
if f: # can be None for non regular or link files
116
# container emulation
119
return iter(self.tgz().getnames())
121
def __contains__(self, fname):
122
return self.has_file(fname)
124
def __getitem__(self, fname):
125
return self.get_content(fname)
128
class DebData(DebPart):
133
class DebControl(DebPart):
136
""" Return a dictionary of maintainer scripts (postinst, prerm, ...)
137
mapping script names to script text. """
140
for fname in MAINT_SCRIPTS:
141
if self.has_file(fname):
142
scripts[fname] = self.get_content(fname)
146
def debcontrol(self):
147
""" Return the debian/control as a Deb822 (a Debian-specific dict-like
150
For a string representation of debian/control try
151
.get_content('control') """
153
return Deb822(self.get_content(CONTROL_FILE))
156
""" Return a dictionary mapping filenames (of the data part) to
157
md5sums. Fails if the control part does not contain a 'md5sum' file.
159
Keys of the returned dictionary are the left-hand side values of lines
160
in the md5sums member of control.tar.gz, usually file names relative to
161
the file system root (without heading '/' or './'). """
163
if not self.has_file(MD5_FILE):
164
raise DebError("'%s' file not found, can't list MD5 sums" %
167
md5_file = self.get_file(MD5_FILE)
169
for line in md5_file.readlines():
170
# we need to support spaces in filenames, .split() is not enough
171
md5, fname = line.rstrip('\r\n').split(None, 1)
177
class DebFile(ArFile):
178
"""Representation of a .deb file (a Debian binary package)
180
DebFile objects have the following (read-only) properties:
181
- version debian .deb file format version (not related with the
182
contained package version), 2.0 at the time of writing
183
for all .deb packages in the Debian archive
184
- data DebPart object corresponding to the data.tar.gz
185
archive contained in the .deb file
186
- control DebPart object corresponding to the control.tar.gz
187
archive contained in the .deb file
190
def __init__(self, filename=None, mode='r', fileobj=None):
191
ArFile.__init__(self, filename, mode, fileobj)
192
required_names = set([INFO_PART, CTRL_PART, DATA_PART])
193
actual_names = set(self.getnames())
194
if not (required_names <= actual_names):
196
"the following required .deb members are missing: " \
197
+ string.join(required_names - actual_names))
200
self.__parts[CTRL_PART] = DebControl(self.getmember(CTRL_PART))
201
self.__parts[DATA_PART] = DebData(self.getmember(DATA_PART))
202
self.__pkgname = None # updated lazily by __updatePkgName
204
f = self.getmember(INFO_PART)
205
self.__version = f.read().strip()
208
def __updatePkgName(self):
209
self.__pkgname = self.debcontrol()['package']
211
version = property(lambda self: self.__version)
212
data = property(lambda self: self.__parts[DATA_PART])
213
control = property(lambda self: self.__parts[CTRL_PART])
215
# proxy methods for the appropriate parts
217
def debcontrol(self):
218
""" See .control.debcontrol() """
219
return self.control.debcontrol()
222
""" See .control.scripts() """
223
return self.control.scripts()
226
""" See .control.md5sums() """
227
return self.control.md5sums()
230
""" Return a Changelog object for the changelog.Debian.gz of the
231
present .deb package. Return None if no changelog can be found. """
233
if self.__pkgname is None:
234
self.__updatePkgName()
236
for fname in [ CHANGELOG_DEBIAN % self.__pkgname,
237
CHANGELOG_NATIVE % self.__pkgname ]:
238
if self.data.has_file(fname):
239
gz = gzip.GzipFile(fileobj=self.data.get_file(fname))
240
raw_changelog = gz.read()
242
return Changelog(raw_changelog)
246
if __name__ == '__main__':
248
deb = DebFile(filename=sys.argv[1])
249
tgz = deb.control.tgz()
250
print tgz.getmember('control')