2
* compose_delta.c: Delta window composition.
4
* ====================================================================
5
* Copyright (c) 2000-2004 CollabNet. All rights reserved.
7
* This software is licensed as described in the file COPYING, which
8
* you should have received as part of this distribution. The terms
9
* are also available at http://subversion.tigris.org/license-1.html.
10
* If newer versions of this license are posted there, you may use a
11
* newer version instead, at your option.
13
* This software consists of voluntary contributions made by many
14
* individuals. For exact contribution history, see the revision
15
* history and logs, available at http://subversion.tigris.org/.
16
* ====================================================================
22
#include <apr_general.h> /* For APR_INLINE */
24
#include "svn_delta.h"
25
#include "svn_pools.h"
28
/* Define a MIN macro if this platform doesn't already have one. */
30
#define MIN(a, b) ((a) < (b) ? (a) : (b))
34
/* ==================================================================== */
35
/* Support for efficient small-block allocation from pools. */
37
/* The following structs will be allocated and freed often: */
39
/* A node in the range index tree. */
40
typedef struct range_index_node_t range_index_node_t;
41
struct range_index_node_t
43
/* 'offset' and 'limit' define the range in the source window. */
47
/* 'target_offset' is where that range is represented in the target. */
48
apr_size_t target_offset;
50
/* 'left' and 'right' link the node into a splay tree. */
51
range_index_node_t *left, *right;
53
/* 'prev' and 'next' link it into an ordered, doubly-linked list. */
54
range_index_node_t *prev, *next;
57
/* A node in a list of ranges for source and target op copies. */
64
typedef struct range_list_node_t range_list_node_t;
65
struct range_list_node_t
67
/* Where does the range come from?
68
'offset' and 'limit' always refer to the "virtual" source data
69
for the second delta window. For a target range, the actual
70
offset to use for generating the target op is 'target_offset';
71
that field isn't used by source ranges. */
74
/* 'offset' and 'limit' define the range. */
78
/* 'target_offset' is the start of the range in the target. */
79
apr_size_t target_offset;
81
/* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
82
range_list_node_t *prev, *next;
86
/* This is what will be allocated: */
87
typedef union alloc_block_t alloc_block_t;
90
range_index_node_t index_node;
91
range_list_node_t list_node;
93
/* Links free blocks into a freelist. */
94
alloc_block_t *next_free;
98
/* Allocate a block. */
99
static APR_INLINE void *
100
alloc_block (apr_pool_t *pool, alloc_block_t **free_list)
102
alloc_block_t *block;
103
if (*free_list == NULL)
104
block = apr_palloc(pool, sizeof(*block));
108
*free_list = block->next_free;
113
/* Return the block back to the free list. */
114
static APR_INLINE void
115
free_block (void *ptr, alloc_block_t **free_list)
117
/* Wrapper functions take care of type safety. */
118
alloc_block_t *const block = ptr;
119
block->next_free = *free_list;
125
/* ==================================================================== */
126
/* Mapping offsets in the target streem to txdelta ops. */
128
typedef struct offset_index_t
134
/* Create an index mapping target stream offsets to delta ops in
135
WINDOW. Allocate from POOL. */
137
static offset_index_t *
138
create_offset_index (const svn_txdelta_window_t *window, apr_pool_t *pool)
140
offset_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
141
apr_size_t offset = 0;
144
ndx->length = window->num_ops;
145
ndx->offs = apr_palloc(pool, (ndx->length + 1) * sizeof(*ndx->offs));
147
for (i = 0; i < ndx->length; ++i)
149
ndx->offs[i] = offset;
150
offset += window->ops[i].length;
152
ndx->offs[ndx->length] = offset;
157
/* Find the index of the delta op thet defines that data at OFFSET in
161
search_offset_index (const offset_index_t *ndx, apr_size_t offset)
165
assert(offset < ndx->offs[ndx->length]);
167
for (lo = 0, hi = ndx->length, op = (lo + hi)/2;
171
const apr_size_t this_offset = ndx->offs[op];
172
const apr_size_t next_offset = ndx->offs[op + 1];
173
if (offset < this_offset)
175
else if (offset > next_offset)
179
/* this_offset <= offset <= next_offset */
180
if (offset == next_offset)
186
assert(ndx->offs[op] <= offset && offset < ndx->offs[op + 1]);
192
/* ==================================================================== */
193
/* Mapping ranges in the source stream to ranges in the composed delta. */
195
/* The range index tree. */
196
typedef struct range_index_t
198
range_index_node_t *tree;
199
alloc_block_t *free_list;
203
/* Create a range index tree. Allocate from POOL. */
204
static range_index_t *
205
create_range_index (apr_pool_t *pool)
207
range_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
210
ndx->free_list = NULL;
214
/* Allocate a node for the range index tree. */
215
static range_index_node_t *
216
alloc_range_index_node (range_index_t *ndx,
219
apr_size_t target_offset)
221
range_index_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
222
node->offset = offset;
224
node->target_offset = target_offset;
225
node->left = node->right = NULL;
226
node->prev = node->next = NULL;
230
/* Free a node from the range index tree. */
232
free_range_index_node (range_index_t *ndx, range_index_node_t *node)
235
node->next->prev = node->prev;
237
node->prev->next = node->next;
238
free_block(node, &ndx->free_list);
242
/* Splay the index tree, using OFFSET as the key. */
245
splay_range_index (apr_size_t offset, range_index_t *ndx)
247
range_index_node_t *tree = ndx->tree;
248
range_index_node_t scratch_node;
249
range_index_node_t *left, *right;
254
scratch_node.left = scratch_node.right = NULL;
255
left = right = &scratch_node;
259
if (offset < tree->offset)
261
if (tree->left != NULL
262
&& offset < tree->left->offset)
265
range_index_node_t *const node = tree->left;
266
tree->left = node->right;
270
if (tree->left == NULL)
273
/* Remember the right subtree */
278
else if (offset > tree->offset)
280
if (tree->right != NULL
281
&& offset > tree->right->offset)
284
range_index_node_t *const node = tree->right;
285
tree->right = node->left;
289
if (tree->right == NULL)
292
/* Remember the left subtree */
301
/* Link in the left and right subtrees */
302
left->right = tree->left;
303
right->left = tree->right;
304
tree->left = scratch_node.right;
305
tree->right = scratch_node.left;
307
/* The basic top-down splay is finished, but we may still need to
308
turn the tree around. What we want is to put the node with the
309
largest offset where node->offset <= offset at the top of the
310
tree, so that we can insert the new data (or search for existing
311
ranges) to the right of the root. This makes cleaning up the
312
tree after an insert much simpler, and -- incidentally -- makes
313
the whole range index magic work. */
314
if (offset < tree->offset && tree->left != NULL)
316
if (tree->left->right == NULL)
318
/* A single right rotation is enough. */
319
range_index_node_t *const node = tree->left;
320
tree->left = node->right; /* Which is always NULL. */
326
/* Slide down to the rightmost node in the left subtree. */
327
range_index_node_t **nodep = &tree->left;
328
while ((*nodep)->right != NULL)
329
nodep = &(*nodep)->right;
331
/* Now move this node to root in one giant promotion. */
336
right->left = tree->right; /* Which is always NULL, too. */
342
/* Sanity check ... */
343
assert ((offset >= tree->offset)
344
|| ((tree->left == NULL)
345
&& (tree->prev == NULL)));
350
/* Remove all ranges from NDX that fall into the root's range. To
351
keep the range index as small as possible, we must also remove
352
nodes that don't fall into the new range, but have become redundant
353
because the new range overlaps the beginning of the next range.
356
new-range: |-----------------|
357
range-1: |-----------------|
358
range-2: |--------------------|
360
Before new-range was inserted, range-1 and range-2 were both
361
necessary. Now the union of new-range and range-2 completely covers
362
range-1, which has become redundant now.
364
FIXME: But, of course, there's a catch. range-1 must still remain
365
in the tree if we want to optimize the number of target copy ops in
366
the case were a copy falls within range-1, but starts before
367
range-2 and ends after new-range. */
370
delete_subtree (range_index_t *ndx, range_index_node_t *node)
374
delete_subtree(ndx, node->left);
375
delete_subtree(ndx, node->right);
376
free_range_index_node(ndx, node);
381
clean_tree (range_index_t *ndx, apr_size_t limit)
383
apr_size_t top_offset = limit + 1;
384
range_index_node_t **nodep = &ndx->tree->right;
385
while (*nodep != NULL)
387
range_index_node_t *const node = *nodep;
388
apr_size_t const offset =
389
(node->right != NULL && node->right->offset < top_offset
390
? node->right->offset
393
if (node->limit <= limit
394
|| (node->offset < limit && offset < limit))
396
*nodep = node->right;
398
delete_subtree(ndx, node);
402
top_offset = node->offset;
409
/* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
410
range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
411
all ranges from NDX that are superseded by the new range.
412
NOTE: The range index must be splayed to OFFSET! */
415
insert_range (apr_size_t offset, apr_size_t limit, apr_size_t target_offset,
418
range_index_node_t *node = NULL;
420
if (ndx->tree == NULL)
422
node = alloc_range_index_node(ndx, offset, limit, target_offset);
427
if (offset == ndx->tree->offset
428
&& limit > ndx->tree->limit)
430
ndx->tree->limit = limit;
431
ndx->tree->target_offset = target_offset;
432
clean_tree(ndx, limit);
434
else if (offset > ndx->tree->offset
435
&& limit > ndx->tree->limit)
437
/* We have to make the same sort of checks as clean_tree()
438
does for superseded ranges. Have to merge these someday. */
440
const svn_boolean_t insert_range_p =
442
|| ndx->tree->limit < ndx->tree->next->offset
443
|| limit > ndx->tree->next->limit);
447
/* Again, we have to check if the new node and the one
448
to the left of the root override root's range. */
449
if (ndx->tree->prev && ndx->tree->prev->limit > offset)
451
/* Replace the data in the splayed node. */
452
ndx->tree->offset = offset;
453
ndx->tree->limit = limit;
454
ndx->tree->target_offset = target_offset;
458
/* Insert the range to the right of the splayed node. */
459
node = alloc_range_index_node(ndx, offset, limit,
461
if ((node->next = ndx->tree->next) != NULL)
462
node->next->prev = node;
463
ndx->tree->next = node;
464
node->prev = ndx->tree;
466
node->right = ndx->tree->right;
467
ndx->tree->right = NULL;
468
node->left = ndx->tree;
471
clean_tree(ndx, limit);
474
/* Ignore the range */;
476
else if (offset < ndx->tree->offset)
478
assert(ndx->tree->left == NULL);
480
/* Insert the range left of the splayed node */
481
node = alloc_range_index_node(ndx, offset, limit, target_offset);
482
node->left = node->prev = NULL;
483
node->right = node->next = ndx->tree;
484
ndx->tree = node->next->prev = node;
485
clean_tree(ndx, limit);
488
/* Ignore the range */;
494
/* ==================================================================== */
495
/* Juggling with lists of ranges. */
497
/* Allocate a node and add it to the range list. LIST is the head of
498
the range list, TAIL is the last node in the list. NDX holds the
499
freelist; OFFSET, LIMIT and KIND are node data. */
500
static range_list_node_t *
501
alloc_range_list (range_list_node_t **list,
502
range_list_node_t **tail,
504
enum range_kind kind,
507
apr_size_t target_offset)
509
range_list_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
511
node->offset = offset;
513
node->target_offset = target_offset;
516
node->prev = node->next = NULL;
517
*list = *tail = node;
523
(*tail)->next = node;
529
/* Free a range list. LIST is the head of the list, NDX holds the freelist. */
531
free_range_list (range_list_node_t *list, range_index_t *ndx)
535
range_list_node_t *const node = list;
537
free_block(node, &ndx->free_list);
542
/* Based on the data in NDX, build a list of ranges that cover
543
[OFFSET, LIMIT) in the "virtual" source data.
544
NOTE: The range index must be splayed to OFFSET! */
546
static range_list_node_t *
547
build_range_list (apr_size_t offset, apr_size_t limit, range_index_t *ndx)
549
range_list_node_t *range_list = NULL;
550
range_list_node_t *last_range = NULL;
551
range_index_node_t *node = ndx->tree;
553
while (offset < limit)
556
return alloc_range_list(&range_list, &last_range, ndx,
560
if (offset < node->offset)
562
if (limit <= node->offset)
563
return alloc_range_list(&range_list, &last_range, ndx,
568
alloc_range_list(&range_list, &last_range, ndx,
570
offset, node->offset, 0);
571
offset = node->offset;
576
/* TODO: (Potential optimization) Investigate if it would
577
make sense to forbid range_from_target lengths shorter
578
than, say, VD_KEY_SIZE (see vdelta.c) */
580
if (offset >= node->limit)
584
const apr_size_t target_offset =
585
offset - node->offset + node->target_offset;
587
if (limit <= node->limit)
588
return alloc_range_list(&range_list, &last_range, ndx,
590
offset, limit, target_offset);
593
alloc_range_list(&range_list, &last_range, ndx,
595
offset, node->limit, target_offset);
596
offset = node->limit;
603
assert(!"A range's offset isn't smaller than its limit? Impossible!");
608
/* Copy the instructions from WINDOW that define the range [OFFSET,
609
LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
610
represented by BUILD_BATON. Use NDX to find the instructions in
611
WINDOW. Allocate space in BUILD_BATON from POOL. */
614
copy_source_ops (apr_size_t offset, apr_size_t limit,
615
apr_size_t target_offset,
616
svn_txdelta__ops_baton_t *build_baton,
617
const svn_txdelta_window_t *window,
618
const offset_index_t *ndx,
621
const int first_op = search_offset_index (ndx, offset);
622
const int last_op = search_offset_index (ndx, limit - 1);
625
for (op_ndx = first_op; op_ndx <= last_op; ++op_ndx)
627
const svn_txdelta_op_t *const op = &window->ops[op_ndx];
628
const apr_size_t *const off = &ndx->offs[op_ndx];
630
const apr_size_t fix_offset = (offset > off[0] ? offset - off[0] : 0);
631
const apr_size_t fix_limit = (off[1] > limit ? off[1] - limit : 0);
633
/* It would be extremely weird if the fixed-up op had zero length. */
634
assert(fix_offset + fix_limit < op->length);
636
if (op->action_code != svn_txdelta_target)
638
/* Delta ops that don't depend on the virtual target can be
639
copied to the composite unchanged. */
640
const char *const new_data = (op->action_code == svn_txdelta_new
641
? (window->new_data->data
642
+ op->offset + fix_offset)
645
svn_txdelta__insert_op(build_baton, op->action_code,
646
op->offset + fix_offset,
647
op->length - fix_offset - fix_limit,
652
/* The source of a target copy must start before the current
653
offset in the (virtual) target stream. */
654
assert(op->offset < off[0]);
656
if (op->offset + op->length - fix_limit <= off[0])
658
/* The recursion _must_ end, otherwise the delta has
659
circular references, and that is not possible. */
660
copy_source_ops(op->offset + fix_offset,
661
op->offset + op->length - fix_limit,
663
build_baton, window, ndx, pool);
667
/* This is an overlapping target copy.
668
The idea here is to transpose the pattern, then generate
669
another overlapping copy. */
670
const apr_size_t ptn_length = off[0] - op->offset;
671
const apr_size_t ptn_overlap = fix_offset % ptn_length;
672
apr_size_t fix_off = fix_offset;
673
apr_size_t tgt_off = target_offset;
674
assert(ptn_length > ptn_overlap);
676
/* ### FIXME: ptn_overlap is unsigned, so the if() condition
677
below is always true! Either it should be '> 0', or the
678
code block should be unconditional. See also r2288. */
679
if (ptn_overlap >= 0)
681
/* Issue second subrange in the pattern. */
682
const apr_size_t length =
683
MIN(op->length - fix_off - fix_limit,
684
ptn_length - ptn_overlap);
685
copy_source_ops(op->offset + ptn_overlap,
686
op->offset + ptn_overlap + length,
688
build_baton, window, ndx, pool);
693
assert(fix_off + fix_limit <= op->length);
695
&& fix_off + fix_limit < op->length)
697
/* Issue the first subrange in the pattern. */
698
const apr_size_t length =
699
MIN(op->length - fix_off - fix_limit, ptn_overlap);
700
copy_source_ops(op->offset,
703
build_baton, window, ndx, pool);
708
assert(fix_off + fix_limit <= op->length);
709
if (fix_off + fix_limit < op->length)
711
/* Now multiply the pattern */
712
svn_txdelta__insert_op(build_baton, svn_txdelta_target,
713
tgt_off - ptn_length,
714
op->length - fix_off - fix_limit,
720
/* Adjust the target offset for the next op in the list. */
721
target_offset += op->length - fix_offset - fix_limit;
727
/* ==================================================================== */
728
/* Bringing it all together. */
731
svn_txdelta_window_t *
732
svn_txdelta__compose_windows (const svn_txdelta_window_t *window_A,
733
const svn_txdelta_window_t *window_B,
736
svn_txdelta__ops_baton_t build_baton = { 0 };
737
svn_txdelta_window_t *composite;
738
apr_pool_t *subpool = svn_pool_create(pool);
739
offset_index_t *offset_index = create_offset_index(window_A, subpool);
740
range_index_t *range_index = create_range_index(subpool);
741
apr_size_t target_offset = 0;
744
/* Read the description of the delta composition algorithm in
745
notes/fs-improvements.txt before going any further.
746
You have been warned. */
747
build_baton.new_data = svn_stringbuf_create("", pool);
748
for (i = 0; i < window_B->num_ops; ++i)
750
const svn_txdelta_op_t *const op = &window_B->ops[i];
751
if (op->action_code != svn_txdelta_source)
753
/* Delta ops that don't depend on the source can be copied
754
to the composite unchanged. */
755
const char *const new_data =
756
(op->action_code == svn_txdelta_new
757
? window_B->new_data->data + op->offset
759
svn_txdelta__insert_op(&build_baton, op->action_code,
760
op->offset, op->length,
765
/* NOTE: Remember that `offset' and `limit' refer to
766
positions in window_B's _source_ stream, which is the
767
same as window_A's _target_ stream! */
768
const apr_size_t offset = op->offset;
769
const apr_size_t limit = op->offset + op->length;
770
range_list_node_t *range_list, *range;
771
apr_size_t tgt_off = target_offset;
773
splay_range_index(offset, range_index);
774
range_list = build_range_list(offset, limit, range_index);
776
for (range = range_list; range; range = range->next)
778
if (range->kind == range_from_target)
779
svn_txdelta__insert_op(&build_baton, svn_txdelta_target,
780
range->target_offset,
781
range->limit - range->offset,
784
copy_source_ops(range->offset, range->limit, tgt_off,
785
&build_baton, window_A, offset_index,
788
tgt_off += range->limit - range->offset;
790
assert (tgt_off == target_offset + op->length);
792
free_range_list(range_list, range_index);
793
insert_range(offset, limit, target_offset, range_index);
796
/* Remember the new offset in the would-be target stream. */
797
target_offset += op->length;
800
svn_pool_destroy(subpool);
802
composite = svn_txdelta__make_window(&build_baton, pool);
803
composite->sview_offset = window_A->sview_offset;
804
composite->sview_len = window_A->sview_len;
805
composite->tview_len = window_B->tview_len;