~ubuntu-branches/ubuntu/precise/dulwich/precise-security

1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
1
# test_diff_tree.py -- Tests for file and tree diff utilities.
2
# Copyright (C) 2010 Google, Inc.
3
#
4
# This program is free software; you can redistribute it and/or
5
# modify it under the terms of the GNU General Public License
6
# as published by the Free Software Foundation; either version 2
7
# or (at your option) a later version of the License.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
17
# MA  02110-1301, USA.
18
19
"""Tests for file and tree diff utilities."""
20
21
from dulwich.diff_tree import (
22
    CHANGE_MODIFY,
23
    CHANGE_RENAME,
24
    CHANGE_COPY,
25
    CHANGE_UNCHANGED,
26
    TreeChange,
27
    _merge_entries,
28
    _merge_entries_py,
29
    tree_changes,
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
30
    tree_changes_for_merge,
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
31
    _count_blocks,
32
    _count_blocks_py,
33
    _similarity_score,
34
    _tree_change_key,
35
    RenameDetector,
36
    _is_tree,
37
    _is_tree_py
38
    )
39
from dulwich.index import (
40
    commit_tree,
41
    )
42
from dulwich._compat import (
43
    permutations,
44
    )
45
from dulwich.object_store import (
46
    MemoryObjectStore,
47
    )
48
from dulwich.objects import (
49
    ShaFile,
50
    Blob,
51
    TreeEntry,
52
    Tree,
53
    )
54
from dulwich.tests import (
55
    TestCase,
56
    )
57
from dulwich.tests.utils import (
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
58
    F,
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
59
    make_object,
60
    functest_builder,
61
    ext_functest_builder,
62
    )
63
64
65
class DiffTestCase(TestCase):
66
67
    def setUp(self):
68
        super(DiffTestCase, self).setUp()
69
        self.store = MemoryObjectStore()
70
        self.empty_tree = self.commit_tree([])
71
72
    def commit_tree(self, entries):
73
        commit_blobs = []
74
        for entry in entries:
75
            if len(entry) == 2:
76
                path, obj = entry
77
                mode = F
78
            else:
79
                path, obj, mode = entry
80
            if isinstance(obj, Blob):
81
                self.store.add_object(obj)
82
                sha = obj.id
83
            else:
84
                sha = obj
85
            commit_blobs.append((path, sha, mode))
86
        return self.store[commit_tree(self.store, commit_blobs)]
87
88
89
class TreeChangesTest(DiffTestCase):
90
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
91
    def setUp(self):
92
        super(TreeChangesTest, self).setUp()
93
        self.detector = RenameDetector(self.store)
94
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
95
    def assertMergeFails(self, merge_entries, name, mode, sha):
96
        t = Tree()
97
        t[name] = (mode, sha)
98
        self.assertRaises(TypeError, merge_entries, '', t, t)
99
100
    def _do_test_merge_entries(self, merge_entries):
101
        blob_a1 = make_object(Blob, data='a1')
102
        blob_a2 = make_object(Blob, data='a2')
103
        blob_b1 = make_object(Blob, data='b1')
104
        blob_c2 = make_object(Blob, data='c2')
105
        tree1 = self.commit_tree([('a', blob_a1, 0100644),
106
                                  ('b', blob_b1, 0100755)])
107
        tree2 = self.commit_tree([('a', blob_a2, 0100644),
108
                                  ('c', blob_c2, 0100755)])
109
110
        self.assertEqual([], merge_entries('', self.empty_tree,
111
                                           self.empty_tree))
112
        self.assertEqual([
113
          ((None, None, None), ('a', 0100644, blob_a1.id)),
114
          ((None, None, None), ('b', 0100755, blob_b1.id)),
115
          ], merge_entries('', self.empty_tree, tree1))
116
        self.assertEqual([
117
          ((None, None, None), ('x/a', 0100644, blob_a1.id)),
118
          ((None, None, None), ('x/b', 0100755, blob_b1.id)),
119
          ], merge_entries('x', self.empty_tree, tree1))
120
121
        self.assertEqual([
122
          (('a', 0100644, blob_a2.id), (None, None, None)),
123
          (('c', 0100755, blob_c2.id), (None, None, None)),
124
          ], merge_entries('', tree2, self.empty_tree))
125
126
        self.assertEqual([
127
          (('a', 0100644, blob_a1.id), ('a', 0100644, blob_a2.id)),
128
          (('b', 0100755, blob_b1.id), (None, None, None)),
129
          ((None, None, None), ('c', 0100755, blob_c2.id)),
130
          ], merge_entries('', tree1, tree2))
131
132
        self.assertEqual([
133
          (('a', 0100644, blob_a2.id), ('a', 0100644, blob_a1.id)),
134
          ((None, None, None), ('b', 0100755, blob_b1.id)),
135
          (('c', 0100755, blob_c2.id), (None, None, None)),
136
          ], merge_entries('', tree2, tree1))
137
138
        self.assertMergeFails(merge_entries, 0xdeadbeef, 0100644, '1' * 40)
139
        self.assertMergeFails(merge_entries, 'a', 'deadbeef', '1' * 40)
140
        self.assertMergeFails(merge_entries, 'a', 0100644, 0xdeadbeef)
141
142
    test_merge_entries = functest_builder(_do_test_merge_entries,
143
                                          _merge_entries_py)
144
    test_merge_entries_extension = ext_functest_builder(_do_test_merge_entries,
145
                                                        _merge_entries)
146
147
    def _do_test_is_tree(self, is_tree):
148
        self.assertFalse(is_tree(TreeEntry(None, None, None)))
149
        self.assertFalse(is_tree(TreeEntry('a', 0100644, 'a' * 40)))
150
        self.assertFalse(is_tree(TreeEntry('a', 0100755, 'a' * 40)))
151
        self.assertFalse(is_tree(TreeEntry('a', 0120000, 'a' * 40)))
152
        self.assertTrue(is_tree(TreeEntry('a', 0040000, 'a' * 40)))
153
        self.assertRaises(TypeError, is_tree, TreeEntry('a', 'x', 'a' * 40))
154
        self.assertRaises(AttributeError, is_tree, 1234)
155
156
    test_is_tree = functest_builder(_do_test_is_tree, _is_tree_py)
157
    test_is_tree_extension = ext_functest_builder(_do_test_is_tree, _is_tree)
158
159
    def assertChangesEqual(self, expected, tree1, tree2, **kwargs):
160
        actual = list(tree_changes(self.store, tree1.id, tree2.id, **kwargs))
161
        self.assertEqual(expected, actual)
162
163
    # For brevity, the following tests use tuples instead of TreeEntry objects.
164
165
    def test_tree_changes_empty(self):
166
        self.assertChangesEqual([], self.empty_tree, self.empty_tree)
167
168
    def test_tree_changes_no_changes(self):
169
        blob = make_object(Blob, data='blob')
170
        tree = self.commit_tree([('a', blob), ('b/c', blob)])
171
        self.assertChangesEqual([], self.empty_tree, self.empty_tree)
172
        self.assertChangesEqual([], tree, tree)
173
        self.assertChangesEqual(
174
          [TreeChange(CHANGE_UNCHANGED, ('a', F, blob.id), ('a', F, blob.id)),
175
           TreeChange(CHANGE_UNCHANGED, ('b/c', F, blob.id),
176
                      ('b/c', F, blob.id))],
177
          tree, tree, want_unchanged=True)
178
179
    def test_tree_changes_add_delete(self):
180
        blob_a = make_object(Blob, data='a')
181
        blob_b = make_object(Blob, data='b')
182
        tree = self.commit_tree([('a', blob_a, 0100644),
183
                                 ('x/b', blob_b, 0100755)])
184
        self.assertChangesEqual(
185
          [TreeChange.add(('a', 0100644, blob_a.id)),
186
           TreeChange.add(('x/b', 0100755, blob_b.id))],
187
          self.empty_tree, tree)
188
        self.assertChangesEqual(
189
          [TreeChange.delete(('a', 0100644, blob_a.id)),
190
           TreeChange.delete(('x/b', 0100755, blob_b.id))],
191
          tree, self.empty_tree)
192
193
    def test_tree_changes_modify_contents(self):
194
        blob_a1 = make_object(Blob, data='a1')
195
        blob_a2 = make_object(Blob, data='a2')
196
        tree1 = self.commit_tree([('a', blob_a1)])
197
        tree2 = self.commit_tree([('a', blob_a2)])
198
        self.assertChangesEqual(
199
          [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
200
                      ('a', F, blob_a2.id))], tree1, tree2)
201
202
    def test_tree_changes_modify_mode(self):
203
        blob_a = make_object(Blob, data='a')
204
        tree1 = self.commit_tree([('a', blob_a, 0100644)])
205
        tree2 = self.commit_tree([('a', blob_a, 0100755)])
206
        self.assertChangesEqual(
207
          [TreeChange(CHANGE_MODIFY, ('a', 0100644, blob_a.id),
208
                      ('a', 0100755, blob_a.id))], tree1, tree2)
209
210
    def test_tree_changes_change_type(self):
211
        blob_a1 = make_object(Blob, data='a')
212
        blob_a2 = make_object(Blob, data='/foo/bar')
213
        tree1 = self.commit_tree([('a', blob_a1, 0100644)])
214
        tree2 = self.commit_tree([('a', blob_a2, 0120000)])
215
        self.assertChangesEqual(
216
          [TreeChange.delete(('a', 0100644, blob_a1.id)),
217
           TreeChange.add(('a', 0120000, blob_a2.id))],
218
          tree1, tree2)
219
220
    def test_tree_changes_to_tree(self):
221
        blob_a = make_object(Blob, data='a')
222
        blob_x = make_object(Blob, data='x')
223
        tree1 = self.commit_tree([('a', blob_a)])
224
        tree2 = self.commit_tree([('a/x', blob_x)])
225
        self.assertChangesEqual(
226
          [TreeChange.delete(('a', F, blob_a.id)),
227
           TreeChange.add(('a/x', F, blob_x.id))],
228
          tree1, tree2)
229
230
    def test_tree_changes_complex(self):
231
        blob_a_1 = make_object(Blob, data='a1_1')
232
        blob_bx1_1 = make_object(Blob, data='bx1_1')
233
        blob_bx2_1 = make_object(Blob, data='bx2_1')
234
        blob_by1_1 = make_object(Blob, data='by1_1')
235
        blob_by2_1 = make_object(Blob, data='by2_1')
236
        tree1 = self.commit_tree([
237
          ('a', blob_a_1),
238
          ('b/x/1', blob_bx1_1),
239
          ('b/x/2', blob_bx2_1),
240
          ('b/y/1', blob_by1_1),
241
          ('b/y/2', blob_by2_1),
242
          ])
243
244
        blob_a_2 = make_object(Blob, data='a1_2')
245
        blob_bx1_2 = blob_bx1_1
246
        blob_by_2 = make_object(Blob, data='by_2')
247
        blob_c_2 = make_object(Blob, data='c_2')
248
        tree2 = self.commit_tree([
249
          ('a', blob_a_2),
250
          ('b/x/1', blob_bx1_2),
251
          ('b/y', blob_by_2),
252
          ('c', blob_c_2),
253
          ])
254
255
        self.assertChangesEqual(
256
          [TreeChange(CHANGE_MODIFY, ('a', F, blob_a_1.id),
257
                      ('a', F, blob_a_2.id)),
258
           TreeChange.delete(('b/x/2', F, blob_bx2_1.id)),
259
           TreeChange.add(('b/y', F, blob_by_2.id)),
260
           TreeChange.delete(('b/y/1', F, blob_by1_1.id)),
261
           TreeChange.delete(('b/y/2', F, blob_by2_1.id)),
262
           TreeChange.add(('c', F, blob_c_2.id))],
263
          tree1, tree2)
264
265
    def test_tree_changes_name_order(self):
266
        blob = make_object(Blob, data='a')
267
        tree1 = self.commit_tree([('a', blob), ('a.', blob), ('a..', blob)])
268
        # Tree order is the reverse of this, so if we used tree order, 'a..'
269
        # would not be merged.
270
        tree2 = self.commit_tree([('a/x', blob), ('a./x', blob), ('a..', blob)])
271
272
        self.assertChangesEqual(
273
          [TreeChange.delete(('a', F, blob.id)),
274
           TreeChange.add(('a/x', F, blob.id)),
275
           TreeChange.delete(('a.', F, blob.id)),
276
           TreeChange.add(('a./x', F, blob.id))],
277
          tree1, tree2)
278
279
    def test_tree_changes_prune(self):
280
        blob_a1 = make_object(Blob, data='a1')
281
        blob_a2 = make_object(Blob, data='a2')
282
        blob_x = make_object(Blob, data='x')
283
        tree1 = self.commit_tree([('a', blob_a1), ('b/x', blob_x)])
284
        tree2 = self.commit_tree([('a', blob_a2), ('b/x', blob_x)])
285
        # Remove identical items so lookups will fail unless we prune.
286
        subtree = self.store[tree1['b'][1]]
287
        for entry in subtree.iteritems():
288
            del self.store[entry.sha]
289
        del self.store[subtree.id]
290
291
        self.assertChangesEqual(
292
          [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
293
                      ('a', F, blob_a2.id))],
294
          tree1, tree2)
295
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
296
    def test_tree_changes_rename_detector(self):
297
        blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
298
        blob_a2 = make_object(Blob, data='a\nb\nc\ne\n')
299
        blob_b = make_object(Blob, data='b')
300
        tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
301
        tree2 = self.commit_tree([('c', blob_a2), ('b', blob_b)])
302
        detector = RenameDetector(self.store)
303
304
        self.assertChangesEqual(
305
          [TreeChange.delete(('a', F, blob_a1.id)),
306
           TreeChange.add(('c', F, blob_a2.id))],
307
          tree1, tree2)
308
        self.assertChangesEqual(
309
          [TreeChange.delete(('a', F, blob_a1.id)),
310
           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
311
                      ('b', F, blob_b.id)),
312
           TreeChange.add(('c', F, blob_a2.id))],
313
          tree1, tree2, want_unchanged=True)
314
        self.assertChangesEqual(
315
          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
316
                      ('c', F, blob_a2.id))],
317
          tree1, tree2, rename_detector=detector)
318
        self.assertChangesEqual(
319
          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
320
                      ('c', F, blob_a2.id)),
321
           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
322
                      ('b', F, blob_b.id))],
323
          tree1, tree2, rename_detector=detector, want_unchanged=True)
324
325
    def assertChangesForMergeEqual(self, expected, parent_trees, merge_tree,
326
                                   **kwargs):
327
        parent_tree_ids = [t.id for t in parent_trees]
328
        actual = list(tree_changes_for_merge(
329
          self.store, parent_tree_ids, merge_tree.id, **kwargs))
330
        self.assertEqual(expected, actual)
331
332
        parent_tree_ids.reverse()
333
        expected = [list(reversed(cs)) for cs in expected]
334
        actual = list(tree_changes_for_merge(
335
          self.store, parent_tree_ids, merge_tree.id, **kwargs))
336
        self.assertEqual(expected, actual)
337
338
    def test_tree_changes_for_merge_add_no_conflict(self):
339
        blob = make_object(Blob, data='blob')
340
        parent1 = self.commit_tree([])
341
        parent2 = merge = self.commit_tree([('a', blob)])
342
        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
343
        self.assertChangesForMergeEqual([], [parent2, parent2], merge)
344
345
    def test_tree_changes_for_merge_add_modify_conflict(self):
346
        blob1 = make_object(Blob, data='1')
347
        blob2 = make_object(Blob, data='2')
348
        parent1 = self.commit_tree([])
349
        parent2 = self.commit_tree([('a', blob1)])
350
        merge = self.commit_tree([('a', blob2)])
351
        self.assertChangesForMergeEqual(
352
          [[TreeChange.add(('a', F, blob2.id)),
353
            TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id))]],
354
          [parent1, parent2], merge)
355
356
    def test_tree_changes_for_merge_modify_modify_conflict(self):
357
        blob1 = make_object(Blob, data='1')
358
        blob2 = make_object(Blob, data='2')
359
        blob3 = make_object(Blob, data='3')
360
        parent1 = self.commit_tree([('a', blob1)])
361
        parent2 = self.commit_tree([('a', blob2)])
362
        merge = self.commit_tree([('a', blob3)])
363
        self.assertChangesForMergeEqual(
364
          [[TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
365
            TreeChange(CHANGE_MODIFY, ('a', F, blob2.id), ('a', F, blob3.id))]],
366
          [parent1, parent2], merge)
367
368
    def test_tree_changes_for_merge_modify_no_conflict(self):
369
        blob1 = make_object(Blob, data='1')
370
        blob2 = make_object(Blob, data='2')
371
        parent1 = self.commit_tree([('a', blob1)])
372
        parent2 = merge = self.commit_tree([('a', blob2)])
373
        self.assertChangesForMergeEqual([], [parent1, parent2], merge)
374
375
    def test_tree_changes_for_merge_delete_delete_conflict(self):
376
        blob1 = make_object(Blob, data='1')
377
        blob2 = make_object(Blob, data='2')
378
        parent1 = self.commit_tree([('a', blob1)])
379
        parent2 = self.commit_tree([('a', blob2)])
380
        merge = self.commit_tree([])
381
        self.assertChangesForMergeEqual(
382
          [[TreeChange.delete(('a', F, blob1.id)),
383
            TreeChange.delete(('a', F, blob2.id))]],
384
          [parent1, parent2], merge)
385
386
    def test_tree_changes_for_merge_delete_no_conflict(self):
387
        blob = make_object(Blob, data='blob')
388
        has = self.commit_tree([('a', blob)])
389
        doesnt_have = self.commit_tree([])
390
        self.assertChangesForMergeEqual([], [has, has], doesnt_have)
391
        self.assertChangesForMergeEqual([], [has, doesnt_have], doesnt_have)
392
393
    def test_tree_changes_for_merge_octopus_no_conflict(self):
394
        r = range(5)
395
        blobs = [make_object(Blob, data=str(i)) for i in r]
396
        parents = [self.commit_tree([('a', blobs[i])]) for i in r]
397
        for i in r:
398
            # Take the SHA from each of the parents.
399
            self.assertChangesForMergeEqual([], parents, parents[i])
400
401
    def test_tree_changes_for_merge_octopus_modify_conflict(self):
402
        # Because the octopus merge strategy is limited, I doubt it's possible
403
        # to create this with the git command line. But the output is well-
404
        # defined, so test it anyway.
405
        r = range(5)
406
        parent_blobs = [make_object(Blob, data=str(i)) for i in r]
407
        merge_blob = make_object(Blob, data='merge')
408
        parents = [self.commit_tree([('a', parent_blobs[i])]) for i in r]
409
        merge = self.commit_tree([('a', merge_blob)])
410
        expected = [[TreeChange(CHANGE_MODIFY, ('a', F, parent_blobs[i].id),
411
                                ('a', F, merge_blob.id)) for i in r]]
412
        self.assertChangesForMergeEqual(expected, parents, merge)
413
414
    def test_tree_changes_for_merge_octopus_delete(self):
415
        blob1 = make_object(Blob, data='1')
416
        blob2 = make_object(Blob, data='3')
417
        parent1 = self.commit_tree([('a', blob1)])
418
        parent2 = self.commit_tree([('a', blob2)])
419
        parent3 = merge = self.commit_tree([])
420
        self.assertChangesForMergeEqual([], [parent1, parent1, parent1], merge)
421
        self.assertChangesForMergeEqual([], [parent1, parent1, parent3], merge)
422
        self.assertChangesForMergeEqual([], [parent1, parent3, parent3], merge)
423
        self.assertChangesForMergeEqual(
424
          [[TreeChange.delete(('a', F, blob1.id)),
425
            TreeChange.delete(('a', F, blob2.id)),
426
            None]],
427
          [parent1, parent2, parent3], merge)
428
429
    def test_tree_changes_for_merge_add_add_same_conflict(self):
430
        blob = make_object(Blob, data='a\nb\nc\nd\n')
431
        parent1 = self.commit_tree([('a', blob)])
432
        parent2 = self.commit_tree([])
433
        merge = self.commit_tree([('b', blob)])
434
        add = TreeChange.add(('b', F, blob.id))
435
        self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
436
437
    def test_tree_changes_for_merge_add_exact_rename_conflict(self):
438
        blob = make_object(Blob, data='a\nb\nc\nd\n')
439
        parent1 = self.commit_tree([('a', blob)])
440
        parent2 = self.commit_tree([])
441
        merge = self.commit_tree([('b', blob)])
442
        self.assertChangesForMergeEqual(
443
          [[TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
444
            TreeChange.add(('b', F, blob.id))]],
445
          [parent1, parent2], merge, rename_detector=self.detector)
446
447
    def test_tree_changes_for_merge_add_content_rename_conflict(self):
448
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
449
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
450
        parent1 = self.commit_tree([('a', blob1)])
451
        parent2 = self.commit_tree([])
452
        merge = self.commit_tree([('b', blob2)])
453
        self.assertChangesForMergeEqual(
454
          [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
455
            TreeChange.add(('b', F, blob2.id))]],
456
          [parent1, parent2], merge, rename_detector=self.detector)
457
458
    def test_tree_changes_for_merge_modify_rename_conflict(self):
459
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
460
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
461
        parent1 = self.commit_tree([('a', blob1)])
462
        parent2 = self.commit_tree([('b', blob1)])
463
        merge = self.commit_tree([('b', blob2)])
464
        self.assertChangesForMergeEqual(
465
          [[TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id)),
466
            TreeChange(CHANGE_MODIFY, ('b', F, blob1.id), ('b', F, blob2.id))]],
467
          [parent1, parent2], merge, rename_detector=self.detector)
468
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
469
470
class RenameDetectionTest(DiffTestCase):
471
472
    def _do_test_count_blocks(self, count_blocks):
473
        blob = make_object(Blob, data='a\nb\na\n')
474
        self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, count_blocks(blob))
475
476
    test_count_blocks = functest_builder(_do_test_count_blocks,
477
                                         _count_blocks_py)
478
    test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
479
                                                       _count_blocks)
480
481
    def _do_test_count_blocks_no_newline(self, count_blocks):
482
        blob = make_object(Blob, data='a\na')
483
        self.assertEqual({hash('a\n'): 2, hash('a'): 1}, _count_blocks(blob))
484
485
    test_count_blocks_no_newline = functest_builder(
486
      _do_test_count_blocks_no_newline, _count_blocks_py)
487
    test_count_blocks_no_newline_extension = ext_functest_builder(
488
       _do_test_count_blocks_no_newline, _count_blocks)
489
490
    def _do_test_count_blocks_chunks(self, count_blocks):
491
        blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
492
        self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
493
494
    test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
495
                                                _count_blocks_py)
496
    test_count_blocks_chunks_extension = ext_functest_builder(
497
      _do_test_count_blocks_chunks, _count_blocks)
498
499
    def _do_test_count_blocks_long_lines(self, count_blocks):
500
        a = 'a' * 64
501
        data = a + 'xxx\ny\n' + a + 'zzz\n'
502
        blob = make_object(Blob, data=data)
503
        self.assertEqual({hash('a' * 64): 128, hash('xxx\n'): 4, hash('y\n'): 2,
504
                          hash('zzz\n'): 4},
505
                         _count_blocks(blob))
506
507
    test_count_blocks_long_lines = functest_builder(
508
      _do_test_count_blocks_long_lines, _count_blocks_py)
509
    test_count_blocks_long_lines_extension = ext_functest_builder(
510
      _do_test_count_blocks_long_lines, _count_blocks)
511
512
    def assertSimilar(self, expected_score, blob1, blob2):
513
        self.assertEqual(expected_score, _similarity_score(blob1, blob2))
514
        self.assertEqual(expected_score, _similarity_score(blob2, blob1))
515
516
    def test_similarity_score(self):
517
        blob0 = make_object(Blob, data='')
518
        blob1 = make_object(Blob, data='ab\ncd\ncd\n')
519
        blob2 = make_object(Blob, data='ab\n')
520
        blob3 = make_object(Blob, data='cd\n')
521
        blob4 = make_object(Blob, data='cd\ncd\n')
522
523
        self.assertSimilar(100, blob0, blob0)
524
        self.assertSimilar(0, blob0, blob1)
525
        self.assertSimilar(33, blob1, blob2)
526
        self.assertSimilar(33, blob1, blob3)
527
        self.assertSimilar(66, blob1, blob4)
528
        self.assertSimilar(0, blob2, blob3)
529
        self.assertSimilar(50, blob3, blob4)
530
531
    def test_similarity_score_cache(self):
532
        blob1 = make_object(Blob, data='ab\ncd\n')
533
        blob2 = make_object(Blob, data='ab\n')
534
535
        block_cache = {}
536
        self.assertEqual(
537
          50, _similarity_score(blob1, blob2, block_cache=block_cache))
538
        self.assertEqual(set([blob1.id, blob2.id]), set(block_cache))
539
540
        def fail_chunks():
541
            self.fail('Unexpected call to as_raw_chunks()')
542
543
        blob1.as_raw_chunks = blob2.as_raw_chunks = fail_chunks
544
        blob1.raw_length = lambda: 6
545
        blob2.raw_length = lambda: 3
546
        self.assertEqual(
547
          50, _similarity_score(blob1, blob2, block_cache=block_cache))
548
549
    def test_tree_entry_sort(self):
550
        sha = 'abcd' * 10
551
        expected_entries = [
552
          TreeChange.add(TreeEntry('aaa', F, sha)),
553
          TreeChange(CHANGE_COPY, TreeEntry('bbb', F, sha),
554
                     TreeEntry('aab', F, sha)),
555
          TreeChange(CHANGE_MODIFY, TreeEntry('bbb', F, sha),
556
                     TreeEntry('bbb', F, 'dabc' * 10)),
557
          TreeChange(CHANGE_RENAME, TreeEntry('bbc', F, sha),
558
                     TreeEntry('ddd', F, sha)),
559
          TreeChange.delete(TreeEntry('ccc', F, sha)),
560
          ]
561
562
        for perm in permutations(expected_entries):
563
            self.assertEqual(expected_entries,
564
                             sorted(perm, key=_tree_change_key))
565
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
566
    def detect_renames(self, tree1, tree2, want_unchanged=False, **kwargs):
567
        detector = RenameDetector(self.store, **kwargs)
568
        return detector.changes_with_renames(tree1.id, tree2.id,
569
                                             want_unchanged=want_unchanged)
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
570
571
    def test_no_renames(self):
572
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
573
        blob2 = make_object(Blob, data='a\nb\ne\nf\n')
574
        blob3 = make_object(Blob, data='a\nb\ng\nh\n')
575
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
576
        tree2 = self.commit_tree([('a', blob1), ('b', blob3)])
577
        self.assertEqual(
578
          [TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob3.id))],
579
          self.detect_renames(tree1, tree2))
580
581
    def test_exact_rename_one_to_one(self):
582
        blob1 = make_object(Blob, data='1')
583
        blob2 = make_object(Blob, data='2')
584
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
585
        tree2 = self.commit_tree([('c', blob1), ('d', blob2)])
586
        self.assertEqual(
587
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob1.id)),
588
           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('d', F, blob2.id))],
589
          self.detect_renames(tree1, tree2))
590
591
    def test_exact_rename_split_different_type(self):
592
        blob = make_object(Blob, data='/foo')
593
        tree1 = self.commit_tree([('a', blob, 0100644)])
594
        tree2 = self.commit_tree([('a', blob, 0120000)])
595
        self.assertEqual(
596
          [TreeChange.add(('a', 0120000, blob.id)),
597
           TreeChange.delete(('a', 0100644, blob.id))],
598
          self.detect_renames(tree1, tree2))
599
600
    def test_exact_rename_and_different_type(self):
601
        blob1 = make_object(Blob, data='1')
602
        blob2 = make_object(Blob, data='2')
603
        tree1 = self.commit_tree([('a', blob1)])
604
        tree2 = self.commit_tree([('a', blob2, 0120000), ('b', blob1)])
605
        self.assertEqual(
606
          [TreeChange.add(('a', 0120000, blob2.id)),
607
           TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id))],
608
          self.detect_renames(tree1, tree2))
609
610
    def test_exact_rename_one_to_many(self):
611
        blob = make_object(Blob, data='1')
612
        tree1 = self.commit_tree([('a', blob)])
613
        tree2 = self.commit_tree([('b', blob), ('c', blob)])
614
        self.assertEqual(
615
          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('b', F, blob.id)),
616
           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('c', F, blob.id))],
617
          self.detect_renames(tree1, tree2))
618
619
    def test_exact_rename_many_to_one(self):
620
        blob = make_object(Blob, data='1')
621
        tree1 = self.commit_tree([('a', blob), ('b', blob)])
622
        tree2 = self.commit_tree([('c', blob)])
623
        self.assertEqual(
624
          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
625
           TreeChange.delete(('b', F, blob.id))],
626
          self.detect_renames(tree1, tree2))
627
628
    def test_exact_rename_many_to_many(self):
629
        blob = make_object(Blob, data='1')
630
        tree1 = self.commit_tree([('a', blob), ('b', blob)])
631
        tree2 = self.commit_tree([('c', blob), ('d', blob), ('e', blob)])
632
        self.assertEqual(
633
          [TreeChange(CHANGE_RENAME, ('a', F, blob.id), ('c', F, blob.id)),
634
           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('e', F, blob.id)),
635
           TreeChange(CHANGE_RENAME, ('b', F, blob.id), ('d', F, blob.id))],
636
          self.detect_renames(tree1, tree2))
637
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
638
    def test_exact_copy_modify(self):
639
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
640
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
641
        tree1 = self.commit_tree([('a', blob1)])
642
        tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
643
        self.assertEqual(
644
          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
645
           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob1.id))],
646
          self.detect_renames(tree1, tree2))
647
648
    def test_exact_copy_change_mode(self):
649
        blob = make_object(Blob, data='a\nb\nc\nd\n')
650
        tree1 = self.commit_tree([('a', blob)])
651
        tree2 = self.commit_tree([('a', blob, 0100755), ('b', blob)])
652
        self.assertEqual(
653
          [TreeChange(CHANGE_MODIFY, ('a', F, blob.id),
654
                      ('a', 0100755, blob.id)),
655
           TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
656
          self.detect_renames(tree1, tree2))
657
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
658
    def test_rename_threshold(self):
659
        blob1 = make_object(Blob, data='a\nb\nc\n')
660
        blob2 = make_object(Blob, data='a\nb\nd\n')
661
        tree1 = self.commit_tree([('a', blob1)])
662
        tree2 = self.commit_tree([('b', blob2)])
663
        self.assertEqual(
664
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
665
          self.detect_renames(tree1, tree2, rename_threshold=50))
666
        self.assertEqual(
667
          [TreeChange.delete(('a', F, blob1.id)),
668
           TreeChange.add(('b', F, blob2.id))],
669
          self.detect_renames(tree1, tree2, rename_threshold=75))
670
671
    def test_content_rename_max_files(self):
672
        blob1 = make_object(Blob, data='a\nb\nc\nd')
673
        blob4 = make_object(Blob, data='a\nb\nc\ne\n')
674
        blob2 = make_object(Blob, data='e\nf\ng\nh\n')
675
        blob3 = make_object(Blob, data='e\nf\ng\ni\n')
676
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
677
        tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
678
        self.assertEqual(
679
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('d', F, blob4.id)),
680
           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
681
          self.detect_renames(tree1, tree2))
682
        self.assertEqual(
683
          [TreeChange.delete(('a', F, blob1.id)),
684
           TreeChange.delete(('b', F, blob2.id)),
685
           TreeChange.add(('c', F, blob3.id)),
686
           TreeChange.add(('d', F, blob4.id))],
687
          self.detect_renames(tree1, tree2, max_files=1))
688
689
    def test_content_rename_one_to_one(self):
690
        b11 = make_object(Blob, data='a\nb\nc\nd\n')
691
        b12 = make_object(Blob, data='a\nb\nc\ne\n')
692
        b21 = make_object(Blob, data='e\nf\ng\n\h')
693
        b22 = make_object(Blob, data='e\nf\ng\n\i')
694
        tree1 = self.commit_tree([('a', b11), ('b', b21)])
695
        tree2 = self.commit_tree([('c', b12), ('d', b22)])
696
        self.assertEqual(
697
          [TreeChange(CHANGE_RENAME, ('a', F, b11.id), ('c', F, b12.id)),
698
           TreeChange(CHANGE_RENAME, ('b', F, b21.id), ('d', F, b22.id))],
699
          self.detect_renames(tree1, tree2))
700
701
    def test_content_rename_one_to_one_ordering(self):
702
        blob1 = make_object(Blob, data='a\nb\nc\nd\ne\nf\n')
703
        blob2 = make_object(Blob, data='a\nb\nc\nd\ng\nh\n')
704
        # 6/10 match to blob1, 8/10 match to blob2
705
        blob3 = make_object(Blob, data='a\nb\nc\nd\ng\ni\n')
706
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
707
        tree2 = self.commit_tree([('c', blob3)])
708
        self.assertEqual(
709
          [TreeChange.delete(('a', F, blob1.id)),
710
           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('c', F, blob3.id))],
711
          self.detect_renames(tree1, tree2))
712
713
        tree3 = self.commit_tree([('a', blob2), ('b', blob1)])
714
        tree4 = self.commit_tree([('c', blob3)])
715
        self.assertEqual(
716
          [TreeChange(CHANGE_RENAME, ('a', F, blob2.id), ('c', F, blob3.id)),
717
           TreeChange.delete(('b', F, blob1.id))],
718
          self.detect_renames(tree3, tree4))
719
720
    def test_content_rename_one_to_many(self):
721
        blob1 = make_object(Blob, data='aa\nb\nc\nd\ne\n')
722
        blob2 = make_object(Blob, data='ab\nb\nc\nd\ne\n')  # 8/11 match
723
        blob3 = make_object(Blob, data='aa\nb\nc\nd\nf\n')  # 9/11 match
724
        tree1 = self.commit_tree([('a', blob1)])
725
        tree2 = self.commit_tree([('b', blob2), ('c', blob3)])
726
        self.assertEqual(
727
          [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id)),
728
           TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id))],
729
          self.detect_renames(tree1, tree2))
730
731
    def test_content_rename_many_to_one(self):
732
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
733
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
734
        blob3 = make_object(Blob, data='a\nb\nc\nf\n')
735
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
736
        tree2 = self.commit_tree([('c', blob3)])
737
        self.assertEqual(
738
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
739
           TreeChange.delete(('b', F, blob2.id))],
740
          self.detect_renames(tree1, tree2))
741
742
    def test_content_rename_many_to_many(self):
743
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
744
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
745
        blob3 = make_object(Blob, data='a\nb\nc\nf\n')
746
        blob4 = make_object(Blob, data='a\nb\nc\ng\n')
747
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
748
        tree2 = self.commit_tree([('c', blob3), ('d', blob4)])
749
        # TODO(dborowitz): Distribute renames rather than greedily choosing
750
        # copies.
751
        self.assertEqual(
752
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('c', F, blob3.id)),
753
           TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('d', F, blob4.id)),
754
           TreeChange.delete(('b', F, blob2.id))],
755
          self.detect_renames(tree1, tree2))
756
757
    def test_content_rename_gitlink(self):
758
        blob1 = make_object(Blob, data='blob1')
759
        blob2 = make_object(Blob, data='blob2')
760
        link1 = '1' * 40
761
        link2 = '2' * 40
762
        tree1 = self.commit_tree([('a', blob1), ('b', link1, 0160000)])
763
        tree2 = self.commit_tree([('c', blob2), ('d', link2, 0160000)])
764
        self.assertEqual(
765
          [TreeChange.delete(('a', 0100644, blob1.id)),
766
           TreeChange.delete(('b', 0160000, link1)),
767
           TreeChange.add(('c', 0100644, blob2.id)),
768
           TreeChange.add(('d', 0160000, link2))],
769
          self.detect_renames(tree1, tree2))
770
771
    def test_exact_rename_swap(self):
772
        blob1 = make_object(Blob, data='1')
773
        blob2 = make_object(Blob, data='2')
774
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
775
        tree2 = self.commit_tree([('a', blob2), ('b', blob1)])
776
        self.assertEqual(
777
          [TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob2.id)),
778
           TreeChange(CHANGE_MODIFY, ('b', F, blob2.id), ('b', F, blob1.id))],
779
          self.detect_renames(tree1, tree2))
780
        self.assertEqual(
781
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob1.id)),
782
           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob2.id))],
783
          self.detect_renames(tree1, tree2, rewrite_threshold=50))
784
785
    def test_content_rename_swap(self):
786
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
787
        blob2 = make_object(Blob, data='e\nf\ng\nh\n')
788
        blob3 = make_object(Blob, data='a\nb\nc\ne\n')
789
        blob4 = make_object(Blob, data='e\nf\ng\ni\n')
790
        tree1 = self.commit_tree([('a', blob1), ('b', blob2)])
791
        tree2 = self.commit_tree([('a', blob4), ('b', blob3)])
792
        self.assertEqual(
793
          [TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob3.id)),
794
           TreeChange(CHANGE_RENAME, ('b', F, blob2.id), ('a', F, blob4.id))],
795
          self.detect_renames(tree1, tree2, rewrite_threshold=60))
796
797
    def test_rewrite_threshold(self):
798
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
799
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
800
        blob3 = make_object(Blob, data='a\nb\nf\ng\n')
801
802
        tree1 = self.commit_tree([('a', blob1)])
803
        tree2 = self.commit_tree([('a', blob3), ('b', blob2)])
804
805
        no_renames = [
806
          TreeChange(CHANGE_MODIFY, ('a', F, blob1.id), ('a', F, blob3.id)),
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
807
          TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))]
1.2.14 by Jelmer Vernooij
Import upstream version 0.7.0
808
        self.assertEqual(
809
          no_renames, self.detect_renames(tree1, tree2))
810
        self.assertEqual(
811
          no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
812
        self.assertEqual(
813
          [TreeChange.add(('a', F, blob3.id)),
814
           TreeChange(CHANGE_RENAME, ('a', F, blob1.id), ('b', F, blob2.id))],
815
          self.detect_renames(tree1, tree2, rewrite_threshold=80))
816
817
    def test_find_copies_harder_exact(self):
818
        blob = make_object(Blob, data='blob')
819
        tree1 = self.commit_tree([('a', blob)])
820
        tree2 = self.commit_tree([('a', blob), ('b', blob)])
821
        self.assertEqual([TreeChange.add(('b', F, blob.id))],
822
                         self.detect_renames(tree1, tree2))
823
        self.assertEqual(
824
          [TreeChange(CHANGE_COPY, ('a', F, blob.id), ('b', F, blob.id))],
825
          self.detect_renames(tree1, tree2, find_copies_harder=True))
826
827
    def test_find_copies_harder_content(self):
828
        blob1 = make_object(Blob, data='a\nb\nc\nd\n')
829
        blob2 = make_object(Blob, data='a\nb\nc\ne\n')
830
        tree1 = self.commit_tree([('a', blob1)])
831
        tree2 = self.commit_tree([('a', blob1), ('b', blob2)])
832
        self.assertEqual([TreeChange.add(('b', F, blob2.id))],
833
                         self.detect_renames(tree1, tree2))
834
        self.assertEqual(
835
          [TreeChange(CHANGE_COPY, ('a', F, blob1.id), ('b', F, blob2.id))],
836
          self.detect_renames(tree1, tree2, find_copies_harder=True))
837
838
    def test_find_copies_harder_with_rewrites(self):
839
        blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
840
        blob_a2 = make_object(Blob, data='f\ng\nh\ni\n')
841
        blob_b2 = make_object(Blob, data='a\nb\nc\ne\n')
842
        tree1 = self.commit_tree([('a', blob_a1)])
843
        tree2 = self.commit_tree([('a', blob_a2), ('b', blob_b2)])
844
        self.assertEqual(
845
          [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
846
                      ('a', F, blob_a2.id)),
847
           TreeChange(CHANGE_COPY, ('a', F, blob_a1.id), ('b', F, blob_b2.id))],
848
          self.detect_renames(tree1, tree2, find_copies_harder=True))
849
        self.assertEqual(
850
          [TreeChange.add(('a', F, blob_a2.id)),
851
           TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
852
                      ('b', F, blob_b2.id))],
853
          self.detect_renames(tree1, tree2, rewrite_threshold=50,
854
                              find_copies_harder=True))
1.2.16 by Jelmer Vernooij
Import upstream version 0.8.0
855
856
    def test_reuse_detector(self):
857
        blob = make_object(Blob, data='blob')
858
        tree1 = self.commit_tree([('a', blob)])
859
        tree2 = self.commit_tree([('b', blob)])
860
        detector = RenameDetector(self.store)
861
        changes = [TreeChange(CHANGE_RENAME, ('a', F, blob.id),
862
                              ('b', F, blob.id))]
863
        self.assertEqual(changes,
864
                         detector.changes_with_renames(tree1.id, tree2.id))
865
        self.assertEqual(changes,
866
                         detector.changes_with_renames(tree1.id, tree2.id))
867
868
    def test_want_unchanged(self):
869
        blob_a1 = make_object(Blob, data='a\nb\nc\nd\n')
870
        blob_b = make_object(Blob, data='b')
871
        blob_c2 = make_object(Blob, data='a\nb\nc\ne\n')
872
        tree1 = self.commit_tree([('a', blob_a1), ('b', blob_b)])
873
        tree2 = self.commit_tree([('c', blob_c2), ('b', blob_b)])
874
        detector = RenameDetector(self.store)
875
        self.assertEqual(
876
          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
877
                      ('c', F, blob_c2.id))],
878
          self.detect_renames(tree1, tree2))
879
        self.assertEqual(
880
          [TreeChange(CHANGE_RENAME, ('a', F, blob_a1.id),
881
                      ('c', F, blob_c2.id)),
882
           TreeChange(CHANGE_UNCHANGED, ('b', F, blob_b.id),
883
                      ('b', F, blob_b.id))],
884
          self.detect_renames(tree1, tree2, want_unchanged=True))