164
def iter_paragraphs(cls, sequence, fields=None, shared_storage=True):
215
def iter_paragraphs(cls, sequence, fields=None, use_apt_pkg=True,
216
shared_storage=False):
165
217
"""Generator that yields a Deb822 object for each paragraph in sequence.
167
219
:param sequence: same as in __init__.
169
221
:param fields: likewise.
171
:param shared_storage: if sequence is a file(), apt_pkg will be used
172
if available to parse the file, since it's much much faster. On the
173
other hand, yielded objects will share storage, so they can't be
174
kept across iterations. (Also, PGP signatures won't be stripped
175
with apt_pkg.) Set this parameter to False to disable using apt_pkg.
223
:param use_apt_pkg: if sequence is a file(), apt_pkg will be used
224
if available to parse the file, since it's much much faster. Set
225
this parameter to False to disable using apt_pkg.
226
:param shared_storage: if sequence is a file(), use_apt_pkg is True,
227
and shared_storage is True, yielded objects will share storage, so
228
they can't be kept across iterations. (Also, PGP signatures won't
229
be stripped.) By default, this parameter is False, causing a copy
230
of the parsed data to be made through each iteration. Except for
231
with raw Deb822 paragraphs (as opposed to _multivalued subclasses),
232
the speed gained by setting shared_storage=True is marginal. This
233
parameter has no effect if use_apt_pkg is False or apt_pkg is not
178
# TODO Think about still using apt_pkg even if shared_storage is False,
179
# by somehow instructing the constructor to make copy of the data. (If
180
# this is still faster.)
182
if _have_apt_pkg and shared_storage and isinstance(sequence, file):
237
if _have_apt_pkg and use_apt_pkg and isinstance(sequence, file):
183
238
parser = apt_pkg.ParseTagFile(sequence)
184
239
while parser.Step() == 1:
185
yield cls(fields=fields, _parsed=parser.Section)
241
parsed = parser.Section
243
# Since parser.Section doesn't have an items method, we
244
# need to imitate that method here and make a Deb822Dict
245
# from the result in order to preserve order.
246
items = [(key, parser.Section[key])
247
for key in parser.Section.keys()]
248
parsed = Deb822Dict(items)
249
yield cls(fields=fields, _parsed=parsed)
187
252
iterable = iter(sequence)
188
253
x = cls(iterable, fields)
373
446
if not blank_line.match(line):
374
447
lines.append(line)
377
elif state == 'SIGNED MESSAGE' and blank_line.match(line):
379
elif m.group('action') == 'BEGIN':
380
state = m.group('what')
381
elif m.group('action') == 'END':
449
if not gpg_pre_lines:
450
# There's no gpg signature, so we should stop at
453
elif state == 'SIGNED MESSAGE':
454
if blank_line.match(line):
457
gpg_pre_lines.append(line)
458
elif state == 'SIGNATURE':
459
gpg_post_lines.append(line)
461
if m.group('action') == 'BEGIN':
462
state = m.group('what')
463
elif m.group('action') == 'END':
464
gpg_post_lines.append(line)
466
if not blank_line.match(line):
468
gpg_pre_lines.append(line)
470
gpg_post_lines.append(line)
473
return (gpg_pre_lines, lines, gpg_post_lines)
387
475
raise EOFError('only blank lines found in input')
389
gpg_stripped_paragraph = staticmethod(gpg_stripped_paragraph)
477
split_gpg_and_payload = staticmethod(split_gpg_and_payload)
479
def gpg_stripped_paragraph(cls, sequence):
480
return cls.split_gpg_and_payload(sequence)[1]
482
gpg_stripped_paragraph = classmethod(gpg_stripped_paragraph)
484
def get_gpg_info(self):
485
"""Return a GpgInfo object with GPG signature information
487
This method will raise ValueError if the signature is not available
488
(e.g. the original text cannot be found)"""
490
# raw_text is saved (as a string) only for Changes and Dsc (see
491
# _gpg_multivalued.__init__) which is small compared to Packages or
492
# Sources which contain no signature
493
if not hasattr(self, 'raw_text'):
494
raise ValueError, "original text cannot be found"
496
if self.gpg_info is None:
497
self.gpg_info = GpgInfo.from_sequence(self.raw_text)
503
# XXX check what happens if input contains more that one signature
505
"""A wrapper around gnupg parsable output obtained via --status-fd
507
This class is really a dictionary containing parsed output from gnupg plus
508
some methods to make sense of the data.
509
Keys are keywords and values are arguments suitably splitted.
510
See /usr/share/doc/gnupg/DETAILS.gz"""
512
# keys with format "key keyid uid"
513
uidkeys = ('GOODSIG', 'EXPSIG', 'EXPKEYSIG', 'REVKEYSIG', 'BADSIG')
516
"""Is the signature valid?"""
517
return self.has_key('GOODSIG') or self.has_key('VALIDSIG')
519
# XXX implement as a property?
520
# XXX handle utf-8 %-encoding
522
"""Return the primary ID of the signee key, None is not available"""
526
def from_output(out, err=None):
527
"""Create a new GpgInfo object from gpg(v) --status-fd output (out) and
528
optionally collect stderr as well (err).
530
Both out and err can be lines in newline-terminated sequence or regular strings."""
534
if isinstance(out, basestring):
535
out = out.split('\n')
536
if isinstance(err, basestring):
537
err = err.split('\n')
544
if not l.startswith(header):
550
# str.partition() would be better, 2.5 only though
553
if key in GpgInfo.uidkeys:
554
# value is "keyid UID", don't split UID
555
value = l[s+1:].split(' ', 1)
557
value = l[s+1:].split(' ')
562
# XXX how to handle sequences of lines? file() returns \n-terminated
564
def from_sequence(sequence, keyrings=['/usr/share/keyrings/debian-keyring.gpg'],
565
executable=["/usr/bin/gpgv"]):
566
"""Create a new GpgInfo object from the given sequence.
568
Sequence is a sequence of lines or a string
569
executable is a list of args for subprocess.Popen, the first element being the gpg executable"""
571
# XXX check for gpg as well and use --verify accordingly?
573
#args.extend(["--status-fd", "1", "--no-default-keyring"])
574
args.extend(["--status-fd", "1"])
576
[args.extend(["--keyring", k]) for k in keyrings if os.path.isfile(k) and os.access(k, os.R_OK)]
578
if "--keyring" not in args:
579
raise IOError, "cannot access none of given keyrings"
582
p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
583
# XXX what to do with exit code?
585
if isinstance(sequence, basestring):
586
(out, err) = p.communicate(sequence)
588
(out, err) = p.communicate("\n".join(sequence))
590
return GpgInfo.from_output(out, err)
593
def from_file(target, *args):
594
"""Create a new GpgInfo object from the given file, calls from_sequence(file(target), *args)"""
595
return from_sequence(file(target), *args)
599
class PkgRelation(object):
600
"""Inter-package relationships
602
Structured representation of the relationships of a package to another,
603
i.e. of what can appear in a Deb882 field like Depends, Recommends,
604
Suggests, ... (see Debian Policy 7.1).
607
# XXX *NOT* a real dependency parser, and that is not even a goal here, we
608
# just parse as much as we need to split the various parts composing a
609
# dependency, checking their correctness wrt policy is out of scope
610
__dep_RE = re.compile( \
611
r'^\s*(?P<name>[a-zA-Z0-9.+\-]{2,})(\s*\(\s*(?P<relop>[>=<]+)\s*(?P<version>[0-9a-zA-Z:\-+~.]+)\s*\))?(\s*\[(?P<archs>[\s!\w\-]+)\])?\s*$')
612
__comma_sep_RE = re.compile(r'\s*,\s*')
613
__pipe_sep_RE = re.compile(r'\s*\|\s*')
614
__blank_sep_RE = re.compile(r'\s*')
617
def parse_relations(cls, raw):
618
"""Parse a package relationship string (i.e. the value of a field like
619
Depends, Recommends, Build-Depends ...)
621
def parse_archs(raw):
622
# assumption: no space beween '!' and architecture name
624
for arch in cls.__blank_sep_RE.split(raw.strip()):
625
if len(arch) and arch[0] == '!':
626
archs.append((False, arch[1:]))
628
archs.append((True, arch))
632
match = cls.__dep_RE.match(raw)
634
parts = match.groupdict()
635
d = { 'name': parts['name'] }
636
if not (parts['relop'] is None or parts['version'] is None):
637
d['version'] = (parts['relop'], parts['version'])
640
if parts['archs'] is None:
643
d['arch'] = parse_archs(parts['archs'])
646
print >> sys.stderr, \
647
'deb822.py: WARNING: cannot parse package' \
648
' relationship "%s", returning it raw' % raw
649
return { 'name': raw, 'version': None, 'arch': None }
651
tl_deps = cls.__comma_sep_RE.split(raw.strip()) # top-level deps
652
cnf = map(cls.__pipe_sep_RE.split, tl_deps)
653
return map(lambda or_deps: map(parse_rel, or_deps), cnf)
657
"""Format to string structured inter-package relationships
659
Perform the inverse operation of parse_relations, returning a string
660
suitable to be written in a package stanza.
662
def pp_arch(arch_spec):
663
(excl, arch) = arch_spec
669
def pp_atomic_dep(dep):
671
if dep.has_key('version') and dep['version'] is not None:
672
s += ' (%s %s)' % dep['version']
673
if dep.has_key('arch') and dep['arch'] is not None:
674
s += ' [%s]' % string.join(map(pp_arch, dep['arch']))
677
pp_or_dep = lambda deps: string.join(map(pp_atomic_dep, deps), ' | ')
678
return string.join(map(pp_or_dep, rels), ', ')
681
class _lowercase_dict(dict):
682
"""Dictionary wrapper which lowercase keys upon lookup."""
684
def __getitem__(self, key):
685
return dict.__getitem__(self, key.lower())
688
class _PkgRelationMixin(object):
689
"""Package relationship mixin
691
Inheriting from this mixin you can extend a Deb882 object with attributes
692
letting you access inter-package relationship in a structured way, rather
693
than as strings. For example, while you can usually use pkg['depends'] to
694
obtain the Depends string of package pkg, mixing in with this class you
695
gain pkg.depends to access Depends as a Pkgrel instance
697
To use, subclass _PkgRelationMixin from a class with a _relationship_fields
698
attribute. It should be a list of field names for which structured access
699
is desired; for each of them a method wild be added to the inherited class.
700
The method name will be the lowercase version of field name; '-' will be
701
mangled as '_'. The method would return relationships in the same format of
702
the PkgRelation' relations property.
704
See Packages and Sources as examples.
707
def __init__(self, *args, **kwargs):
708
self.__relations = _lowercase_dict({})
709
self.__parsed_relations = False
710
for name in self._relationship_fields:
711
# To avoid reimplementing Deb822 key lookup logic we use a really
712
# simple dict subclass which just lowercase keys upon lookup. Since
713
# dictionary building happens only here, we ensure that all keys
714
# are in fact lowercase.
715
# With this trick we enable users to use the same key (i.e. field
716
# name) of Deb822 objects on the dictionary returned by the
717
# relations property.
718
keyname = name.lower()
719
if self.has_key(name):
720
self.__relations[keyname] = None # lazy value
721
# all lazy values will be expanded before setting
722
# __parsed_relations to True
724
self.__relations[keyname] = []
728
"""Return a dictionary of inter-package relationships among the current
731
Dictionary keys depend on the package kind. Binary packages have keys
732
like 'depends', 'recommends', ... while source packages have keys like
733
'build-depends', 'build-depends-indep' and so on. See the Debian policy
734
for the comprehensive field list.
736
Dictionary values are package relationships returned as lists of lists
737
of dictionaries (see below for some examples).
739
The encoding of package relationships is as follows:
740
- the top-level lists corresponds to the comma-separated list of
741
Deb822, their components form a conjuction, i.e. they have to be
743
- the inner lists corresponds to the pipe-separated list of Deb822,
744
their components form a disjunction, i.e. they have to be OR-ed
746
- member of the inner lists are dictionaries with the following keys:
747
- name: package (or virtual package) name
748
- version: A pair <operator, version> if the relationship is
749
versioned, None otherwise. operator is one of "<<",
750
"<=", "=", ">=", ">>"; version is the given version as
752
- arch: A list of pairs <polarity, architecture> if the
753
relationship is architecture specific, None otherwise.
754
Polarity is a boolean (false if the architecture is
755
negated with "!", true otherwise), architecture the
756
Debian archtiecture name as a string.
760
"emacs | emacsen, make, debianutils (>= 1.7)" becomes
761
[ [ {'name': 'emacs'}, {'name': 'emacsen'} ],
762
[ {'name': 'make'} ],
763
[ {'name': 'debianutils', 'version': ('>=', '1.7')} ] ]
765
"tcl8.4-dev, procps [!hurd-i386]" becomes
766
[ [ {'name': 'tcl8.4-dev'} ],
767
[ {'name': 'procps', 'arch': (false, 'hurd-i386')} ] ]
769
if not self.__parsed_relations:
770
lazy_rels = filter(lambda n: self.__relations[n] is None,
771
self.__relations.keys())
773
self.__relations[n] = PkgRelation.parse_relations(self[n])
774
self.__parsed_relations = True
775
return self.__relations
393
777
class _multivalued(Deb822):
394
778
"""A class with (R/W) support for multivalued fields.
396
780
To use, create a subclass with a _multivalued_fields attribute. It should
397
781
be a dictionary with *lower-case* keys, with lists of human-readable
398
782
identifiers of the fields as the values. Please see Dsc, Changes, and
399
783
PdiffIndex as examples.
403
786
def __init__(self, *args, **kwargs):
404
787
Deb822.__init__(self, *args, **kwargs)