2
# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
4
# This module is part of GitPython and is released under
5
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
16
from gitdb.util import (
25
__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux",
26
"join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList",
27
"BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists',
28
'RemoteProgress', 'rmtree')
33
"""Remove the given recursively.
34
:note: we use shutil rmtree but adjust its behaviour to see whether files that
35
couldn't be deleted are read-only. Windows will not remove them in that case"""
36
def onerror(func, path, exc_info):
37
if not os.access(path, os.W_OK):
38
# Is the error an access error ?
39
os.chmod(path, stat.S_IWUSR)
44
return shutil.rmtree(path, False, onerror)
48
def stream_copy(source, destination, chunk_size=512*1024):
49
"""Copy all data from the source stream into the destination stream in chunks
52
:return: amount of bytes written"""
55
chunk = source.read(chunk_size)
56
destination.write(chunk)
58
if len(chunk) < chunk_size:
60
# END reading output stream
64
"""Join path tokens together similar to os.path.join, but always use
65
'/' instead of possibly '\' on windows."""
72
elif path == '' or path.endswith('/'):
76
# END for each path token to add
79
def to_native_path_windows(path):
80
return path.replace('/','\\')
82
def to_native_path_linux(path):
83
return path.replace('\\','/')
85
if sys.platform.startswith('win'):
86
to_native_path = to_native_path_windows
88
# no need for any work on linux
89
def to_native_path_linux(path):
91
to_native_path = to_native_path_linux
93
def join_path_native(a, *p):
95
As join path, but makes sure an OS native path is returned. This is only
96
needed to play it safe on my dear windows and to assure nice paths that only
98
return to_native_path(join_path(a, *p))
100
def assure_directory_exists(path, is_file=False):
101
"""Assure that the directory pointed to by path exists.
103
:param is_file: If True, path is assumed to be a file and handled correctly.
104
Otherwise it must be a directory
105
:return: True if the directory was created, False if it already existed"""
107
path = os.path.dirname(path)
109
if not os.path.isdir(path):
115
""":return: string identifying the currently active system user as name@node
116
:note: user can be set with the 'USER' environment variable, usually set on windows"""
118
username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
119
if username == ukn and hasattr(os, 'getlogin'):
120
username = os.getlogin()
121
# END get username from login
122
return "%s@%s" % (username, platform.node())
128
class RemoteProgress(object):
130
Handler providing an interface to parse progress information emitted by git-push
131
and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
134
BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)]
135
STAGE_MASK = BEGIN|END
136
OP_MASK = ~STAGE_MASK
138
__slots__ = ("_cur_line", "_seen_ops")
139
re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
140
re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
143
self._seen_ops = list()
145
def _parse_progress_line(self, line):
146
"""Parse progress information from the given line as retrieved by git-push
149
:return: list(line, ...) list of lines that could not be processed"""
151
# Counting objects: 4, done.
152
# Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
153
self._cur_line = line
154
sub_lines = line.split('\r')
155
failed_lines = list()
156
for sline in sub_lines:
157
# find esacpe characters and cut them away - regex will not work with
158
# them as they are non-ascii. As git might expect a tty, it will send them
159
last_valid_index = None
160
for i,c in enumerate(reversed(sline)):
163
last_valid_index = -i-1
164
# END character was non-ascii
165
# END for each character in sline
166
if last_valid_index is not None:
167
sline = sline[:last_valid_index]
168
# END cut away invalid part
169
sline = sline.rstrip()
171
cur_count, max_count = None, None
172
match = self.re_op_relative.match(sline)
174
match = self.re_op_absolute.match(sline)
177
self.line_dropped(sline)
178
failed_lines.append(sline)
180
# END could not get match
183
remote, op_name, percent, cur_count, max_count, message = match.groups()
186
if op_name == "Counting objects":
187
op_code |= self.COUNTING
188
elif op_name == "Compressing objects":
189
op_code |= self.COMPRESSING
190
elif op_name == "Writing objects":
191
op_code |= self.WRITING
192
elif op_name == 'Receiving objects':
193
op_code |= self.RECEIVING
194
elif op_name == 'Resolving deltas':
195
op_code |= self.RESOLVING
197
# Note: On windows it can happen that partial lines are sent
198
# Hence we get something like "CompreReceiving objects", which is
199
# a blend of "Compressing objects" and "Receiving objects".
200
# This can't really be prevented, so we drop the line verbosely
201
# to make sure we get informed in case the process spits out new
202
# commands at some point.
203
self.line_dropped(sline)
204
sys.stderr.write("Operation name %r unknown - skipping line '%s'" % (op_name, sline))
205
# Note: Don't add this line to the failed lines, as we have to silently
211
if op_code not in self._seen_ops:
212
self._seen_ops.append(op_code)
213
op_code |= self.BEGIN
218
# END message handling
220
message = message.strip()
221
done_token = ', done.'
222
if message.endswith(done_token):
224
message = message[:-len(done_token)]
225
# END end message handling
227
self.update(op_code, cur_count, max_count, message)
228
# END for each sub line
231
def line_dropped(self, line):
232
"""Called whenever a line could not be understood and was therefore dropped."""
235
def update(self, op_code, cur_count, max_count=None, message=''):
236
"""Called whenever the progress changes
239
Integer allowing to be compared against Operation IDs and stage IDs.
241
Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
242
ID as well as END. It may be that BEGIN and END are set at once in case only
243
one progress message was emitted due to the speed of the operation.
244
Between BEGIN and END, none of these flags will be set
246
Operation IDs are all held within the OP_MASK. Only one Operation ID will
248
:param cur_count: Current absolute count of items
251
The maximum count of items we expect. It may be None in case there is
252
no maximum number of items or if it is (yet) unknown.
255
In case of the 'WRITING' operation, it contains the amount of bytes
256
transferred. It may possibly be used for other purposes as well.
258
You may read the contents of the current line in self._cur_line"""
263
"""Actors hold information about a person acting on the repository. They
264
can be committers and authors or anything with a name and an email as
265
mentioned in the git log entries."""
267
name_only_regex = re.compile( r'<(.+)>' )
268
name_email_regex = re.compile( r'(.*) <(.+?)>' )
270
# ENVIRONMENT VARIABLES
271
# read when creating new commits
272
env_author_name = "GIT_AUTHOR_NAME"
273
env_author_email = "GIT_AUTHOR_EMAIL"
274
env_committer_name = "GIT_COMMITTER_NAME"
275
env_committer_email = "GIT_COMMITTER_EMAIL"
281
__slots__ = ('name', 'email')
283
def __init__(self, name, email):
287
def __eq__(self, other):
288
return self.name == other.name and self.email == other.email
290
def __ne__(self, other):
291
return not (self == other)
294
return hash((self.name, self.email))
300
return '<git.Actor "%s <%s>">' % (self.name, self.email)
303
def _from_string(cls, string):
304
"""Create an Actor from a string.
305
:param string: is the string, which is expected to be in regular git format
307
John Doe <jdoe@example.com>
310
m = cls.name_email_regex.search(string)
312
name, email = m.groups()
313
return Actor(name, email)
315
m = cls.name_only_regex.search(string)
317
return Actor(m.group(1), None)
319
# assume best and use the whole string as name
320
return Actor(string, None)
321
# END special case name
322
# END handle name/email matching
325
def _main_actor(cls, env_name, env_email, config_reader=None):
326
actor = Actor('', '')
327
default_email = get_user_id()
328
default_name = default_email.split('@')[0]
330
for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name),
331
('email', env_email, cls.conf_email, default_email)):
333
setattr(actor, attr, os.environ[evar])
335
if config_reader is not None:
336
setattr(actor, attr, config_reader.get_value('user', cvar, default))
337
#END config-reader handling
338
if not getattr(actor, attr):
339
setattr(actor, attr, default)
341
#END for each item to retrieve
346
def committer(cls, config_reader=None):
348
:return: Actor instance corresponding to the configured committer. It behaves
349
similar to the git implementation, such that the environment will override
350
configuration values of config_reader. If no value is set at all, it will be
352
:param config_reader: ConfigReader to use to retrieve the values from in case
353
they are not set in the environment"""
354
return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader)
357
def author(cls, config_reader=None):
358
"""Same as committer(), but defines the main author. It may be specified in the environment,
359
but defaults to the committer"""
360
return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader)
364
Represents stat information as presented by git at the end of a merge. It is
365
created from the output of a diff operation.
371
s.total # full-stat-dict
372
s.files # dict( filepath : stat-dict )
376
A dictionary with the following keys and values::
378
deletions = number of deleted lines as int
379
insertions = number of inserted lines as int
380
lines = total number of lines changed as int, or deletions + insertions
384
In addition to the items in the stat-dict, it features additional information::
386
files = number of changed files as int"""
387
__slots__ = ("total", "files")
389
def __init__(self, total, files):
394
def _list_from_string(cls, repo, text):
395
"""Create a Stat object from output retrieved by git-diff.
398
hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': dict()}
399
for line in text.splitlines():
400
(raw_insertions, raw_deletions, filename) = line.split("\t")
401
insertions = raw_insertions != '-' and int(raw_insertions) or 0
402
deletions = raw_deletions != '-' and int(raw_deletions) or 0
403
hsh['total']['insertions'] += insertions
404
hsh['total']['deletions'] += deletions
405
hsh['total']['lines'] += insertions + deletions
406
hsh['total']['files'] += 1
407
hsh['files'][filename.strip()] = {'insertions': insertions,
408
'deletions': deletions,
409
'lines': insertions + deletions}
410
return Stats(hsh['total'], hsh['files'])
413
class IndexFileSHA1Writer(object):
414
"""Wrapper around a file-like object that remembers the SHA1 of
415
the data written to it. It will write a sha when the stream is closed
416
or if the asked for explicitly usign write_sha.
418
Only useful to the indexfile
420
:note: Based on the dulwich project"""
421
__slots__ = ("f", "sha1")
423
def __init__(self, f):
425
self.sha1 = make_sha("")
427
def write(self, data):
428
self.sha1.update(data)
429
return self.f.write(data)
432
sha = self.sha1.digest()
437
sha = self.write_sha()
445
class LockFile(object):
446
"""Provides methods to obtain, check for, and release a file based lock which
447
should be used to handle concurrent access to the same file.
449
As we are a utility class to be derived from, we only use protected methods.
451
Locks will automatically be released on destruction"""
452
__slots__ = ("_file_path", "_owns_lock")
454
def __init__(self, file_path):
455
self._file_path = file_path
456
self._owns_lock = False
461
def _lock_file_path(self):
462
""":return: Path to lockfile"""
463
return "%s.lock" % (self._file_path)
466
""":return: True if we have a lock and if the lockfile still exists
467
:raise AssertionError: if our lock-file does not exist"""
468
if not self._owns_lock:
473
def _obtain_lock_or_raise(self):
474
"""Create a lock file as flag for other instances, mark our instance as lock-holder
476
:raise IOError: if a lock was already present or a lock file could not be written"""
479
lock_file = self._lock_file_path()
480
if os.path.isfile(lock_file):
481
raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
484
fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
487
raise IOError(str(e))
489
self._owns_lock = True
491
def _obtain_lock(self):
492
"""The default implementation will raise if a lock cannot be obtained.
493
Subclasses may override this method to provide a different implementation"""
494
return self._obtain_lock_or_raise()
496
def _release_lock(self):
497
"""Release our lock if we have one"""
498
if not self._has_lock():
501
# if someone removed our file beforhand, lets just flag this issue
502
# instead of failing, to make it more usable.
503
lfp = self._lock_file_path()
505
# on bloody windows, the file needs write permissions to be removable.
513
self._owns_lock = False
516
class BlockingLockFile(LockFile):
517
"""The lock file will block until a lock could be obtained, or fail after
520
:note: If the directory containing the lock was removed, an exception will
521
be raised during the blocking period, preventing hangs as the lock
522
can never be obtained."""
523
__slots__ = ("_check_interval", "_max_block_time")
524
def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
525
"""Configure the instance
527
:parm check_interval_s:
528
Period of time to sleep until the lock is checked the next time.
529
By default, it waits a nearly unlimited time
531
:parm max_block_time_s: Maximum amount of seconds we may lock"""
532
super(BlockingLockFile, self).__init__(file_path)
533
self._check_interval = check_interval_s
534
self._max_block_time = max_block_time_s
536
def _obtain_lock(self):
537
"""This method blocks until it obtained the lock, or raises IOError if
538
it ran out of time or if the parent directory was not available anymore.
539
If this method returns, you are guranteed to own the lock"""
540
starttime = time.time()
541
maxtime = starttime + float(self._max_block_time)
544
super(BlockingLockFile, self)._obtain_lock()
546
# synity check: if the directory leading to the lockfile is not
547
# readable anymore, raise an execption
548
curtime = time.time()
549
if not os.path.isdir(os.path.dirname(self._lock_file_path())):
550
msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
552
# END handle missing directory
554
if curtime >= maxtime:
555
msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
557
# END abort if we wait too long
558
time.sleep(self._check_interval)
564
class IterableList(list):
566
List of iterable objects allowing to query an object by id or by named index::
573
It requires an id_attribute name to be set which will be queried from its
574
contained items to have a means for comparison.
576
A prefix can be specified which is to be used in case the id returned by the
577
items always contains a prefix that does not matter to the user, so it
579
__slots__ = ('_id_attr', '_prefix')
581
def __new__(cls, id_attr, prefix=''):
582
return super(IterableList,cls).__new__(cls)
584
def __init__(self, id_attr, prefix=''):
585
self._id_attr = id_attr
586
self._prefix = prefix
587
if not isinstance(id_attr, basestring):
588
raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
589
# END help debugging !
591
def __contains__(self, attr):
592
# first try identy match for performance
593
rval = list.__contains__(self, attr)
598
# otherwise make a full name search
602
except (AttributeError, TypeError):
604
#END handle membership
606
def __getattr__(self, attr):
607
attr = self._prefix + attr
609
if getattr(item, self._id_attr) == attr:
612
return list.__getattribute__(self, attr)
614
def __getitem__(self, index):
615
if isinstance(index, int):
616
return list.__getitem__(self,index)
619
return getattr(self, index)
620
except AttributeError:
621
raise IndexError( "No item found with id %r" % (self._prefix + index) )
624
def __delitem__(self, index):
626
if not isinstance(index, int):
628
name = self._prefix + index
629
for i, item in enumerate(self):
630
if getattr(item, self._id_attr) == name:
636
raise IndexError("Item with name %s not found" % name)
638
#END get index to delete
639
list.__delitem__(self, delindex)
642
class Iterable(object):
643
"""Defines an interface for iterable items which is to assure a uniform
644
way to retrieve and iterate items within the git repository"""
646
_id_attribute_ = "attribute that most suitably identifies your instance"
649
def list_items(cls, repo, *args, **kwargs):
651
Find all items of this type - subclasses can specify args and kwargs differently.
652
If no args are given, subclasses are obliged to return all items if no additional
655
:note: Favor the iter_items method as it will
657
:return:list(Item,...) list of item instances"""
658
out_list = IterableList( cls._id_attribute_ )
659
out_list.extend(cls.iter_items(repo, *args, **kwargs))
664
def iter_items(cls, repo, *args, **kwargs):
665
"""For more information about the arguments, see list_items
666
:return: iterator yielding Items"""
667
raise NotImplementedError("To be implemented by Subclass")