~mvo/archive-crawler/mvo

« back to all changes in this revision

Viewing changes to DesktopDataExtractor/__init__.py

  • Committer: Michael Vogt
  • Date: 2011-04-19 13:52:37 UTC
  • mfrom: (120.1.5 fix_symbolic_links)
  • Revision ID: michael.vogt@ubuntu.com-20110419135237-28zw5j0l6njcylh7
mergedĀ fromĀ lp:~j-johan-edwards/archive-crawler/fix-symbolic-extraction

Show diffs side-by-side

added added

removed removed

Lines of Context:
16
16
import fnmatch
17
17
import glob
18
18
import pickle
 
19
import time
19
20
 
20
21
import ArchiveCrawler
21
22
 
290
291
        else:
291
292
            return None
292
293
 
 
294
    # FIXME: This method works around shortcomings in tarfile.extract().
 
295
    # Once tarfile can reliably extract symlinked data, this method
 
296
    # should be removed; all calls to it will become unnecessary.
 
297
    def _readSymbolicPath(self, tarfile, path):
 
298
        """ converts a path with symbolic links to an non-linked one """
 
299
        member_paths = tarfile.getnames()
 
300
        return_path = ''
 
301
        start = time.time()
 
302
        # go through all directories/files from top to bottom.
 
303
        for path_element in path.split('/'):
 
304
            # tentatively add the current element to the return path
 
305
            next_path = os.path.join(return_path, path_element)
 
306
            if next_path not in member_paths:
 
307
                raise Exception('tar does not contain %s' % next_path)
 
308
            # now see if it's symbolic
 
309
            next_member = tarfile.getmember(next_path)
 
310
            if next_member.issym():
 
311
                # if it is, discard the new element, and add its linked path
 
312
                next_path = os.path.join(return_path, next_member.linkpath)
 
313
                # now resolve all '..' references
 
314
                next_path_stack = []
 
315
                for element in next_path.split('/'):
 
316
                    if element == '..':
 
317
                        next_path_stack.pop()
 
318
                    else:
 
319
                        next_path_stack.append(element)
 
320
                next_path = '/'.join(next_path_stack)
 
321
                # ensure we are not caught in an infinite symlink cycle 
 
322
                if (time.time() - start) > 2:
 
323
                    logging.warn('symlink cycle in %s' % tarfile.name)
 
324
                    return path
 
325
            # update the return path
 
326
            return_path = next_path
 
327
        return return_path
 
328
 
293
329
    def extract_icon(self, tarfile, iconName, newIconName):
294
330
        logging.info("extract_icon: %s %s %s" % (tarfile.name, iconName, newIconName))
295
331
        extractName = iconName
296
332
        if iconName.startswith('/'):
297
333
            extractName = ".%s" % iconName
298
334
        try:
 
335
            extractName = self._readSymbolicPath(tarfile, extractName)
299
336
            iconFile = tarfile.extractfile(extractName)
300
337
            outicon = open(newIconName, "w")
301
338
            outicon.write(iconFile.read())