35
35
#include <assert.h>
36
36
#include "ocfs2/ocfs2.h"
39
* Structures which describe a path through a btree, and functions to
42
* The idea here is to be as generic as possible with the tree
45
struct ocfs2_path_item {
48
struct ocfs2_extent_list *el;
51
#define OCFS2_MAX_PATH_DEPTH 5
55
struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
58
#define path_root_blkno(_path) ((_path)->p_node[0].blkno)
59
#define path_root_buf(_path) ((_path)->p_node[0].buf)
60
#define path_root_el(_path) ((_path)->p_node[0].el)
61
#define path_leaf_blkno(_path) ((_path)->p_node[(_path)->p_tree_depth].blkno)
62
#define path_leaf_buf(_path) ((_path)->p_node[(_path)->p_tree_depth].buf)
63
#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
64
#define path_num_items(_path) ((_path)->p_tree_depth + 1)
68
struct ocfs2_dinode *di;
69
struct ocfs2_extent_rec rec;
72
* Reset the actual path elements so that we can re-use the structure
73
* to build another path. Generally, this involves freeing the buffer
76
static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
78
int i, start = 0, depth = 0;
79
struct ocfs2_path_item *node;
84
for(i = start; i < path_num_items(path); i++) {
85
node = &path->p_node[i];
89
ocfs2_free(&node->buf);
96
* Tree depth may change during truncate, or insert. If we're
97
* keeping the root extent list, then make sure that our path
98
* structure reflects the proper depth.
101
depth = path_root_el(path)->l_tree_depth;
103
path->p_tree_depth = depth;
106
static void ocfs2_free_path(struct ocfs2_path *path)
108
/* We don't free the root because often in libocfs2 the root is a
109
* shared buffer such as the inode. Caller must be responsible for
110
* handling the root of the path.
113
ocfs2_reinit_path(path, 1);
119
* All the elements of src into dest. After this call, src could be freed
120
* without affecting dest.
122
* Both paths should have the same root. Any non-root elements of dest
125
static void ocfs2_cp_path(ocfs2_filesys *fs,
126
struct ocfs2_path *dest,
127
struct ocfs2_path *src)
130
struct ocfs2_extent_block *eb = NULL;
132
assert(path_root_blkno(dest) == path_root_blkno(src));
133
dest->p_tree_depth = src->p_tree_depth;
135
for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
136
if (!src->p_node[i].buf) {
137
if (dest->p_node[i].buf)
138
ocfs2_free(dest->p_node[i].buf);
139
dest->p_node[i].blkno = 0;
140
dest->p_node[i].buf = NULL;
141
dest->p_node[i].el = NULL;
145
if (!dest->p_node[i].buf)
146
ocfs2_malloc_block(fs->fs_io, &dest->p_node[i].buf);
148
assert(dest->p_node[i].buf);
149
memcpy(dest->p_node[i].buf, src->p_node[i].buf,
151
eb = (struct ocfs2_extent_block *)dest->p_node[i].buf;
152
dest->p_node[i].el = &eb->h_list;
154
dest->p_node[i].blkno = src->p_node[i].blkno;
159
* Make the *dest path the same as src and re-initialize src path to
162
static void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
166
assert(path_root_blkno(dest) == path_root_blkno(src));
168
for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
169
ocfs2_free(&dest->p_node[i].buf);
171
dest->p_node[i].blkno = src->p_node[i].blkno;
172
dest->p_node[i].buf = src->p_node[i].buf;
173
dest->p_node[i].el = src->p_node[i].el;
175
src->p_node[i].blkno = 0;
176
src->p_node[i].buf = NULL;
177
src->p_node[i].el = NULL;
182
* Insert an extent block at given index.
185
* This buf will be inserted into the path, so the caller shouldn't free it.
187
static inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
190
struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *) buf;
192
* Right now, no root buf is an extent block, so this helps
193
* catch code errors with dinode trees. The assertion can be
194
* safely removed if we ever need to insert extent block
195
* structures at the root.
199
path->p_node[index].blkno = eb->h_blkno;
200
path->p_node[index].buf = (char *)buf;
201
path->p_node[index].el = &eb->h_list;
204
static struct ocfs2_path *ocfs2_new_path(ocfs2_filesys* fs, char *buf,
205
struct ocfs2_extent_list *root_el)
208
struct ocfs2_path *path = NULL;
209
struct ocfs2_dinode *di = (struct ocfs2_dinode *)buf;
211
assert(root_el->l_tree_depth < OCFS2_MAX_PATH_DEPTH);
213
ret = ocfs2_malloc0(sizeof(*path), &path);
215
path->p_tree_depth = root_el->l_tree_depth;
216
path->p_node[0].blkno = di->i_blkno;
217
path->p_node[0].buf = buf;
218
path->p_node[0].el = root_el;
225
* Allocate and initialize a new path based on a disk inode tree.
227
static struct ocfs2_path *ocfs2_new_inode_path(ocfs2_filesys *fs,
228
struct ocfs2_dinode *di)
230
struct ocfs2_extent_list *el = &di->id2.i_list;
232
return ocfs2_new_path(fs, (char *)di, el);
235
/* Write all the extent block information to the disk.
236
* We write all paths furthur down than subtree_index.
237
* The caller will handle writing the sub_index.
239
static errcode_t ocfs2_write_path_eb(ocfs2_filesys *fs,
240
struct ocfs2_path *path, int sub_index)
245
for (i = path->p_tree_depth; i > sub_index; i--) {
246
ret = ocfs2_write_extent_block(fs,
247
path->p_node[i].blkno,
248
path->p_node[i].buf);
256
/* some extent blocks is modified and we need to synchronize them to the disk
259
* We will not update the inode if subtree_index is "0" since it should be
260
* updated by the caller.
262
* left_path or right_path can be NULL, but they can't be NULL in the same time.
263
* And if they are both not NULL, we will treat subtree_index in the right_path
264
* as the right extent block information.
266
static errcode_t ocfs2_sync_path_to_disk(ocfs2_filesys *fs,
267
struct ocfs2_path *left_path,
268
struct ocfs2_path *right_path,
272
struct ocfs2_path *path = NULL;
274
assert(left_path || right_path);
276
ret = ocfs2_write_path_eb(fs, left_path, subtree_index);
282
ret = ocfs2_write_path_eb(fs, right_path, subtree_index);
288
/* subtree_index indicates an extent block. */
289
path = right_path ? right_path : left_path;
291
ret = ocfs2_write_extent_block(fs,
292
path->p_node[subtree_index].blkno,
293
path->p_node[subtree_index].buf);
302
* Return the index of the extent record which contains cluster #v_cluster.
303
* -1 is returned if it was not found.
305
* Should work fine on interior and exterior nodes.
307
int ocfs2_search_extent_list(struct ocfs2_extent_list *el, uint32_t v_cluster)
311
struct ocfs2_extent_rec *rec;
312
uint32_t rec_end, rec_start, clusters;
314
for(i = 0; i < el->l_next_free_rec; i++) {
315
rec = &el->l_recs[i];
317
rec_start = rec->e_cpos;
318
clusters = ocfs2_rec_clusters(el->l_tree_depth, rec);
320
rec_end = rec_start + clusters;
322
if (v_cluster >= rec_start && v_cluster < rec_end) {
331
enum ocfs2_contig_type {
339
* NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
340
* ocfs2_extent_contig only work properly against leaf nodes!
342
static inline int ocfs2_block_extent_contig(ocfs2_filesys *fs,
343
struct ocfs2_extent_rec *ext,
346
uint64_t blk_end = ext->e_blkno;
348
blk_end += ocfs2_clusters_to_blocks(fs, ext->e_leaf_clusters);
350
return blkno == blk_end;
353
static inline int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
354
struct ocfs2_extent_rec *right)
358
left_range = left->e_cpos + left->e_leaf_clusters;
360
return (left_range == right->e_cpos);
363
static enum ocfs2_contig_type
364
ocfs2_extent_contig(ocfs2_filesys *fs,
365
struct ocfs2_extent_rec *ext,
366
struct ocfs2_extent_rec *insert_rec)
368
uint64_t blkno = insert_rec->e_blkno;
371
* Refuse to coalesce extent records with different flag
372
* fields - we don't want to mix unwritten extents with user
375
if (ext->e_flags != insert_rec->e_flags)
378
if (ocfs2_extents_adjacent(ext, insert_rec) &&
379
ocfs2_block_extent_contig(fs, ext, blkno))
382
blkno = ext->e_blkno;
383
if (ocfs2_extents_adjacent(insert_rec, ext) &&
384
ocfs2_block_extent_contig(fs, insert_rec, blkno))
391
* NOTE: We can have pretty much any combination of contiguousness and
394
* The usefulness of APPEND_TAIL is more in that it lets us know that
395
* we'll have to update the path to that leaf.
397
enum ocfs2_append_type {
402
enum ocfs2_split_type {
408
struct ocfs2_insert_type {
409
enum ocfs2_split_type ins_split;
410
enum ocfs2_append_type ins_appending;
411
enum ocfs2_contig_type ins_contig;
412
int ins_contig_index;
416
struct ocfs2_merge_ctxt {
417
enum ocfs2_contig_type c_contig_type;
418
int c_has_empty_extent;
419
int c_split_covers_rec;
423
* Helper function for ocfs2_add_branch() and shift_tree_depth().
425
* Returns the sum of the rightmost extent rec logical offset and
428
* ocfs2_add_branch() uses this to determine what logical cluster
429
* value should be populated into the leftmost new branch records.
431
* shift_tree_depth() uses this to determine the # clusters
432
* value for the new topmost tree record.
434
static inline uint32_t ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
436
uint16_t i = el->l_next_free_rec - 1;
438
return el->l_recs[i].e_cpos +
439
ocfs2_rec_clusters(el->l_tree_depth, &el->l_recs[i]);
444
* Add an entire tree branch to our inode. eb_buf is the extent block
445
* to start at, if we don't want to start the branch at the dinode
448
* last_eb_buf is required as we have to update it's next_leaf pointer
449
* for the new last extent block.
451
* the new branch will be 'empty' in the sense that every block will
452
* contain a single record with e_clusters == 0.
454
static int ocfs2_add_branch(ocfs2_filesys *fs,
455
struct ocfs2_dinode *fe,
461
uint64_t next_blkno, new_last_eb_blk;
462
struct ocfs2_extent_block *eb;
463
struct ocfs2_extent_list *eb_el;
464
struct ocfs2_extent_list *el;
466
uint64_t *new_blknos = NULL;
467
char **new_eb_bufs = NULL;
470
assert(*last_eb_buf);
473
eb = (struct ocfs2_extent_block *) eb_buf;
476
el = &fe->id2.i_list;
478
/* we never add a branch to a leaf. */
479
assert(el->l_tree_depth);
481
new_blocks = el->l_tree_depth;
483
/* allocate the number of new eb blocks we need new_blocks should be
485
ret = ocfs2_malloc0(sizeof(uint64_t) * new_blocks, &new_blknos);
488
memset(new_blknos, 0, sizeof(uint64_t) * new_blocks);
490
ret = ocfs2_malloc0(sizeof(char *) * new_blocks, &new_eb_bufs);
493
memset(new_eb_bufs, 0, sizeof(char *) * new_blocks);
495
for (i = 0; i < new_blocks; i++) {
496
ret = ocfs2_malloc_block(fs->fs_io, &buf);
499
new_eb_bufs[i] = buf;
501
ret = ocfs2_new_extent_block(fs, &new_blknos[i]);
505
ret = ocfs2_read_extent_block(fs, new_blknos[i], buf);
510
eb = (struct ocfs2_extent_block *)(*last_eb_buf);
511
new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
513
/* Note: new_eb_bufs[new_blocks - 1] is the guy which will be
514
* linked with the rest of the tree.
515
* conversly, new_eb_bufs[0] is the new bottommost leaf.
517
* when we leave the loop, new_last_eb_blk will point to the
518
* newest leaf, and next_blkno will point to the topmost extent
521
next_blkno = new_last_eb_blk = 0;
522
for(i = 0; i < new_blocks; i++) {
523
buf = new_eb_bufs[i];
524
eb = (struct ocfs2_extent_block *) buf;
527
eb->h_next_leaf_blk = 0;
528
eb_el->l_tree_depth = i;
529
eb_el->l_next_free_rec = 1;
530
memset(eb_el->l_recs, 0,
531
sizeof(struct ocfs2_extent_rec) * eb_el->l_count);
533
* This actually counts as an empty extent as
536
eb_el->l_recs[0].e_cpos = new_cpos;
537
eb_el->l_recs[0].e_blkno = next_blkno;
539
* eb_el isn't always an interior node, but even leaf
540
* nodes want a zero'd flags and reserved field so
541
* this gets the whole 32 bits regardless of use.
543
eb_el->l_recs[0].e_int_clusters = 0;
545
if (!eb_el->l_tree_depth)
546
new_last_eb_blk = eb->h_blkno;
548
next_blkno = eb->h_blkno;
551
/* Link the new branch into the rest of the tree (el will
552
* either be on the fe, or the extent block passed in.
554
i = el->l_next_free_rec;
555
el->l_recs[i].e_blkno = next_blkno;
556
el->l_recs[i].e_cpos = new_cpos;
557
el->l_recs[i].e_int_clusters = 0;
558
el->l_next_free_rec++;
560
/* fe needs a new last extent block pointer, as does the
561
* next_leaf on the previously last-extent-block.
563
fe->i_last_eb_blk = new_last_eb_blk;
565
/* here all the extent block and the new inode information should be
566
* written back to the disk.
568
for(i = 0; i < new_blocks; i++) {
569
buf = new_eb_bufs[i];
570
ret = ocfs2_write_extent_block(fs, new_blknos[i], buf);
575
/* update last_eb_buf's next_leaf pointer for
576
* the new last extent block.
578
eb = (struct ocfs2_extent_block *)(*last_eb_buf);
579
eb->h_next_leaf_blk = new_last_eb_blk;
580
ret = ocfs2_write_extent_block(fs, eb->h_blkno, *last_eb_buf);
585
eb = (struct ocfs2_extent_block *)eb_buf;
586
ret = ocfs2_write_extent_block(fs, eb->h_blkno, eb_buf);
592
* Some callers want to track the rightmost leaf so pass it
595
memcpy(*last_eb_buf, new_eb_bufs[0], fs->fs_blocksize);
597
/* The inode information isn't updated since we use duplicated extent
598
* block in the insertion and it may fail in other steps.
603
for (i = 0; i < new_blocks; i++)
605
ocfs2_free(&new_eb_bufs[i]);
606
ocfs2_free(&new_eb_bufs);
609
if (ret && new_blknos)
610
for (i = 0; i < new_blocks; i++)
612
ocfs2_delete_extent_block(fs, new_blknos[i]);
615
ocfs2_free(&new_blknos);
621
* Should only be called when there is no space left in any of the
622
* leaf nodes. What we want to do is find the lowest tree depth
623
* non-leaf extent block with room for new records. There are three
624
* valid results of this search:
626
* 1) a lowest extent block is found, then we pass it back in
627
* *target_buf and return '0'
629
* 2) the search fails to find anything, but the dinode has room. We
630
* pass NULL back in *target_buf, but still return '0'
632
* 3) the search fails to find anything AND the dinode is full, in
633
* which case we return > 0
635
* return status < 0 indicates an error.
637
static errcode_t ocfs2_find_branch_target(ocfs2_filesys *fs,
638
struct ocfs2_dinode *fe,
644
struct ocfs2_extent_block *eb;
645
struct ocfs2_extent_list *el;
646
char *buf = NULL, *lowest_buf = NULL;
650
el = &fe->id2.i_list;
652
ret = ocfs2_malloc_block(fs->fs_io, &buf);
656
while(el->l_tree_depth > 1) {
657
if (el->l_next_free_rec == 0) {
658
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
661
i = el->l_next_free_rec - 1;
662
blkno = el->l_recs[i].e_blkno;
664
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
668
ret = ocfs2_read_extent_block(fs, blkno, buf);
672
eb = (struct ocfs2_extent_block *) buf;
675
if (el->l_next_free_rec < el->l_count)
679
/* If we didn't find one and the fe doesn't have any room,
682
&& (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
685
*target_buf = lowest_buf;
687
if (buf && !*target_buf)
694
* This function will discard the rightmost extent record.
696
static void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
698
int next_free = el->l_next_free_rec;
699
int count = el->l_count;
700
unsigned int num_bytes;
703
/* This will cause us to go off the end of our extent list. */
704
assert(next_free < count);
706
num_bytes = sizeof(struct ocfs2_extent_rec) * next_free;
708
memmove(&el->l_recs[1], &el->l_recs[0], num_bytes);
711
static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
712
struct ocfs2_extent_rec *insert_rec)
714
int i, insert_index, next_free, has_empty, num_bytes;
715
uint32_t insert_cpos = insert_rec->e_cpos;
716
struct ocfs2_extent_rec *rec;
718
next_free = el->l_next_free_rec;
719
has_empty = ocfs2_is_empty_extent(&el->l_recs[0]);
723
/* The tree code before us didn't allow enough room in the leaf. */
724
if (el->l_next_free_rec == el->l_count && !has_empty)
728
* The easiest way to approach this is to just remove the
729
* empty extent and temporarily decrement next_free.
733
* If next_free was 1 (only an empty extent), this
734
* loop won't execute, which is fine. We still want
735
* the decrement above to happen.
737
for(i = 0; i < (next_free - 1); i++)
738
el->l_recs[i] = el->l_recs[i+1];
743
/* Figure out what the new record index should be. */
744
for(i = 0; i < next_free; i++) {
745
rec = &el->l_recs[i];
747
if (insert_cpos < rec->e_cpos)
752
assert(insert_index >= 0);
753
assert(insert_index < el->l_count);
754
assert(insert_index <= next_free);
756
/* No need to memmove if we're just adding to the tail. */
757
if (insert_index != next_free) {
758
assert(next_free < el->l_count);
760
num_bytes = next_free - insert_index;
761
num_bytes *= sizeof(struct ocfs2_extent_rec);
762
memmove(&el->l_recs[insert_index + 1],
763
&el->l_recs[insert_index],
768
* Either we had an empty extent, and need to re-increment or
769
* there was no empty extent on a non full rightmost leaf node,
770
* in which case we still need to increment.
773
el->l_next_free_rec = next_free;
774
/* Make sure none of the math above just messed up our tree. */
775
assert(el->l_next_free_rec <= el->l_count);
777
el->l_recs[insert_index] = *insert_rec;
780
static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
782
int size, num_recs = el->l_next_free_rec;
786
if (ocfs2_is_empty_extent(&el->l_recs[0])) {
788
size = num_recs * sizeof(struct ocfs2_extent_rec);
789
memmove(&el->l_recs[0], &el->l_recs[1], size);
790
memset(&el->l_recs[num_recs], 0,
791
sizeof(struct ocfs2_extent_rec));
792
el->l_next_free_rec = num_recs;
797
* Create an empty extent record .
799
* l_next_free_rec may be updated.
801
* If an empty extent already exists do nothing.
803
static void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
805
int next_free = el->l_next_free_rec;
807
assert(el->l_tree_depth == 0);
812
if (ocfs2_is_empty_extent(&el->l_recs[0]))
815
ocfs2_shift_records_right(el);
818
el->l_next_free_rec += 1;
819
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
823
* For a rotation which involves two leaf nodes, the "root node" is
824
* the lowest level tree node which contains a path to both leafs. This
825
* resulting set of information can be used to form a complete "subtree"
827
* This function is passed two full paths from the dinode down to a
828
* pair of adjacent leaves. It's task is to figure out which path
829
* index contains the subtree root - this can be the root index itself
830
* in a worst-case rotation.
832
* The array index of the subtree root is passed back.
834
static int ocfs2_find_subtree_root(struct ocfs2_path *left,
835
struct ocfs2_path *right)
839
/* Check that the caller passed in two paths from the same tree. */
840
assert(path_root_blkno(left) == path_root_blkno(right));
845
/* The caller didn't pass two adjacent paths. */
846
if (i > left->p_tree_depth)
848
} while (left->p_node[i].blkno == right->p_node[i].blkno);
853
typedef errcode_t (path_insert_t)(void *, char *);
856
* Traverse a btree path in search of cpos, starting at root_el.
858
* This code can be called with a cpos larger than the tree, in which
859
* case it will return the rightmost path.
861
static errcode_t __ocfs2_find_path(ocfs2_filesys *fs,
862
struct ocfs2_extent_list *root_el,
871
struct ocfs2_extent_block *eb;
872
struct ocfs2_extent_list *el;
873
struct ocfs2_extent_rec *rec;
876
while (el->l_tree_depth) {
877
if (el->l_next_free_rec == 0) {
878
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
884
for(i = 0; i < el->l_next_free_rec - 1; i++) {
885
rec = &el->l_recs[i];
888
* In the case that cpos is off the allocation
889
* tree, this should just wind up returning the
892
range = rec->e_cpos +
893
ocfs2_rec_clusters(el->l_tree_depth, rec);
894
if (cpos >= rec->e_cpos && cpos < range)
898
blkno = el->l_recs[i].e_blkno;
900
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
904
ret = ocfs2_malloc_block(fs->fs_io, &buf);
908
ret = ocfs2_read_extent_block(fs, blkno, buf);
912
eb = (struct ocfs2_extent_block *) buf;
915
if (el->l_next_free_rec > el->l_count) {
916
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
920
/* The user's callback must give us the tip for how to
921
* handle the buf we allocated by return values.
924
* the function succeeds,and it will use the buf and
925
* take care of the buffer release.
928
* the function succeeds, and there is no need for buf,
929
* so we will release it.
932
* the function fails.
935
ret = func(data, buf);
949
/* Catch any trailing buf that the loop didn't handle. */
957
* Given an initialized path (that is, it has a valid root extent
958
* list), this function will traverse the btree in search of the path
959
* which would contain cpos.
961
* The path traveled is recorded in the path structure.
963
* Note that this will not do any comparisons on leaf node extent
964
* records, so it will work fine in the case that we just added a tree
967
struct find_path_data {
969
struct ocfs2_path *path;
972
static errcode_t find_path_ins(void *data, char *eb)
974
struct find_path_data *fp = data;
976
ocfs2_path_insert_eb(fp->path, fp->index, eb);
982
static int ocfs2_find_path(ocfs2_filesys *fs, struct ocfs2_path *path,
985
struct find_path_data data;
989
return __ocfs2_find_path(fs, path_root_el(path), cpos,
990
find_path_ins, &data);
994
* Find the leaf block in the tree which would contain cpos. No
995
* checking of the actual leaf is done.
997
* This function doesn't handle non btree extent lists.
999
int ocfs2_find_leaf(ocfs2_filesys *fs, struct ocfs2_dinode *di,
1000
uint32_t cpos, char **leaf_buf)
1004
struct ocfs2_path *path = NULL;
1005
struct ocfs2_extent_list *el = &di->id2.i_list;
1007
assert(el->l_tree_depth > 0);
1009
path = ocfs2_new_inode_path(fs, di);
1011
ret = OCFS2_ET_NO_MEMORY;
1015
ret = ocfs2_find_path(fs, path, cpos);
1019
ret = ocfs2_malloc_block(fs->fs_io, &buf);
1023
memcpy(buf, path_leaf_buf(path), fs->fs_blocksize);
1026
ocfs2_free_path(path);
1030
int ocfs2_xattr_find_leaf(ocfs2_filesys *fs, struct ocfs2_xattr_block *xb,
1031
uint32_t cpos, char **leaf_buf)
1035
struct ocfs2_path *path = NULL;
1036
struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1038
assert(el->l_tree_depth > 0);
1041
ret = ocfs2_malloc0(sizeof(*path), &path);
1044
ret = OCFS2_ET_NO_MEMORY;
1047
path->p_tree_depth = el->l_tree_depth;
1048
path->p_node[0].blkno = xb->xb_blkno;
1049
path->p_node[0].buf = (char *)xb;
1050
path->p_node[0].el = el;
1053
ret = ocfs2_find_path(fs, path, cpos);
1057
ret = ocfs2_malloc_block(fs->fs_io, &buf);
1061
memcpy(buf, path_leaf_buf(path), fs->fs_blocksize);
1064
ocfs2_free_path(path);
1069
* Adjust the adjacent records (left_rec, right_rec) involved in a rotation.
1071
* Basically, we've moved stuff around at the bottom of the tree and
1072
* we need to fix up the extent records above the changes to reflect
1075
* left_rec: the record on the left.
1076
* left_child_el: is the child list pointed to by left_rec
1077
* right_rec: the record to the right of left_rec
1078
* right_child_el: is the child list pointed to by right_rec
1080
* By definition, this only works on interior nodes.
1082
static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1083
struct ocfs2_extent_list *left_child_el,
1084
struct ocfs2_extent_rec *right_rec,
1085
struct ocfs2_extent_list *right_child_el)
1087
uint32_t left_clusters, right_end;
1090
* Interior nodes never have holes. Their cpos is the cpos of
1091
* the leftmost record in their child list. Their cluster
1092
* count covers the full theoretical range of their child list
1093
* - the range between their cpos and the cpos of the record
1094
* immediately to their right.
1096
left_clusters = right_child_el->l_recs[0].e_cpos;
1097
if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
1098
assert(right_child_el->l_next_free_rec > 1);
1099
left_clusters = right_child_el->l_recs[1].e_cpos;
1101
left_clusters -= left_rec->e_cpos;
1102
left_rec->e_int_clusters = left_clusters;
1105
* Calculate the rightmost cluster count boundary before
1106
* moving cpos - we will need to adjust clusters after
1107
* updating e_cpos to keep the same highest cluster count.
1109
right_end = right_rec->e_cpos;
1110
right_end += right_rec->e_int_clusters;
1112
right_rec->e_cpos = left_rec->e_cpos;
1113
right_rec->e_cpos += left_clusters;
1115
right_end -= right_rec->e_cpos;
1116
right_rec->e_int_clusters = right_end;
1120
* Adjust the adjacent root node records involved in a
1121
* rotation. left_el_blkno is passed in as a key so that we can easily
1122
* find it's index in the root list.
1124
static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
1125
struct ocfs2_extent_list *left_el,
1126
struct ocfs2_extent_list *right_el,
1127
uint64_t left_el_blkno)
1131
assert(root_el->l_tree_depth > left_el->l_tree_depth);
1133
for(i = 0; i < root_el->l_next_free_rec - 1; i++) {
1134
if (root_el->l_recs[i].e_blkno == left_el_blkno)
1139
* The path walking code should have never returned a root and
1140
* two paths which are not adjacent.
1142
assert(i < (root_el->l_next_free_rec - 1));
1144
ocfs2_adjust_adjacent_records(&root_el->l_recs[i], left_el,
1145
&root_el->l_recs[i + 1], right_el);
1149
* We've changed a leaf block (in right_path) and need to reflect that
1150
* change back up the subtree.
1152
* This happens in multiple places:
1153
* - When we've moved an extent record from the left path leaf to the right
1154
* path leaf to make room for an empty extent in the left path leaf.
1155
* - When our insert into the right path leaf is at the leftmost edge
1156
* and requires an update of the path immediately to it's left. This
1157
* can occur at the end of some types of rotation and appending inserts.
1159
static void ocfs2_complete_edge_insert(ocfs2_filesys *fs,
1160
struct ocfs2_path *left_path,
1161
struct ocfs2_path *right_path,
1166
struct ocfs2_extent_list *el, *left_el, *right_el;
1167
struct ocfs2_extent_rec *left_rec, *right_rec;
1170
* Update the counts and position values within all the
1171
* interior nodes to reflect the leaf rotation we just did.
1173
* The root node is handled below the loop.
1175
* We begin the loop with right_el and left_el pointing to the
1176
* leaf lists and work our way up.
1178
* NOTE: within this loop, left_el and right_el always refer
1179
* to the *child* lists.
1181
left_el = path_leaf_el(left_path);
1182
right_el = path_leaf_el(right_path);
1183
for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
1186
* One nice property of knowing that all of these
1187
* nodes are below the root is that we only deal with
1188
* the leftmost right node record and the rightmost
1191
el = left_path->p_node[i].el;
1192
idx = left_el->l_next_free_rec - 1;
1193
left_rec = &el->l_recs[idx];
1195
el = right_path->p_node[i].el;
1196
right_rec = &el->l_recs[0];
1198
ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
1202
* Setup our list pointers now so that the current
1203
* parents become children in the next iteration.
1205
left_el = left_path->p_node[i].el;
1206
right_el = right_path->p_node[i].el;
1210
* At the root node, adjust the two adjacent records which
1211
* begin our path to the leaves.
1214
el = left_path->p_node[subtree_index].el;
1215
left_el = left_path->p_node[subtree_index + 1].el;
1216
right_el = right_path->p_node[subtree_index + 1].el;
1217
blkno = left_path->p_node[subtree_index + 1].blkno;
1219
ocfs2_adjust_root_records(el, left_el, right_el, blkno);
1221
/* ocfs2_adjust_root_records only update the extent block in the left
1222
* path, and actually right_path->p_node[subtree_index].eb indicates the
1223
* same extent block, so we must keep them the same content.
1225
memcpy(right_path->p_node[subtree_index].buf,
1226
left_path->p_node[subtree_index].buf, fs->fs_blocksize);
1229
/* Rotate the subtree to right.
1231
* Note: After successful rotation, the extent block will be flashed
1232
* to disk accordingly.
1234
static errcode_t ocfs2_rotate_subtree_right(ocfs2_filesys *fs,
1235
struct ocfs2_path *left_path,
1236
struct ocfs2_path *right_path,
1241
char *right_leaf_eb;
1242
char *left_leaf_eb = NULL;
1243
struct ocfs2_extent_list *right_el, *left_el;
1244
struct ocfs2_extent_rec move_rec;
1245
struct ocfs2_extent_block *eb;
1247
left_leaf_eb = path_leaf_buf(left_path);
1248
eb = (struct ocfs2_extent_block *)left_leaf_eb;
1249
left_el = path_leaf_el(left_path);
1251
if (left_el->l_next_free_rec != left_el->l_count)
1252
return OCFS2_ET_CORRUPT_EXTENT_BLOCK;
1255
* This extent block may already have an empty record, so we
1256
* return early if so.
1258
if (ocfs2_is_empty_extent(&left_el->l_recs[0]))
1261
assert(left_path->p_node[subtree_index].blkno ==
1262
right_path->p_node[subtree_index].blkno);
1264
right_leaf_eb = path_leaf_buf(right_path);
1265
right_el = path_leaf_el(right_path);
1267
ocfs2_create_empty_extent(right_el);
1269
/* Do the copy now. */
1270
i = left_el->l_next_free_rec - 1;
1271
move_rec = left_el->l_recs[i];
1272
right_el->l_recs[0] = move_rec;
1275
* Clear out the record we just copied and shift everything
1276
* over, leaving an empty extent in the left leaf.
1278
* We temporarily subtract from next_free_rec so that the
1279
* shift will lose the tail record (which is now defunct).
1281
left_el->l_next_free_rec -= 1;
1282
ocfs2_shift_records_right(left_el);
1283
memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
1284
left_el->l_next_free_rec += 1;
1286
ocfs2_complete_edge_insert(fs, left_path, right_path, subtree_index);
1288
ret = ocfs2_sync_path_to_disk(fs, left_path, right_path, subtree_index);
1294
* Given a full path, determine what cpos value would return us a path
1295
* containing the leaf immediately to the left of the current one.
1297
* Will return zero if the path passed in is already the leftmost path.
1299
static int ocfs2_find_cpos_for_left_leaf(struct ocfs2_path *path,
1304
struct ocfs2_extent_list *el;
1306
assert(path->p_tree_depth > 0);
1310
blkno = path_leaf_blkno(path);
1312
/* Start at the tree node just above the leaf and work our way up. */
1313
i = path->p_tree_depth - 1;
1315
el = path->p_node[i].el;
1317
/* Find the extent record just before the one in our path. */
1318
for(j = 0; j < el->l_next_free_rec; j++) {
1319
if (el->l_recs[j].e_blkno == blkno) {
1323
* We've determined that the
1324
* path specified is already
1325
* the leftmost one - return a
1331
* The leftmost record points to our
1332
* leaf - we need to travel up the
1338
*cpos = el->l_recs[j - 1].e_cpos;
1339
*cpos = *cpos + ocfs2_rec_clusters(
1341
&el->l_recs[j - 1]);
1348
* If we got here, we never found a valid node where
1349
* the tree indicated one should be.
1351
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
1355
blkno = path->p_node[i].blkno;
1364
* Trap the case where we're inserting into the theoretical range past
1365
* the _actual_ left leaf range. Otherwise, we'll rotate a record
1366
* whose cpos is less than ours into the right leaf.
1368
* It's only necessary to look at the rightmost record of the left
1369
* leaf because the logic that calls us should ensure that the
1370
* theoretical ranges in the path components above the leaves are
1373
static int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
1374
uint32_t insert_cpos)
1376
struct ocfs2_extent_list *left_el;
1377
struct ocfs2_extent_rec *rec;
1380
left_el = path_leaf_el(left_path);
1381
next_free = left_el->l_next_free_rec;
1382
rec = &left_el->l_recs[next_free - 1];
1384
if (insert_cpos > rec->e_cpos)
1389
static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el,
1392
int next_free = el->l_next_free_rec;
1394
struct ocfs2_extent_rec *rec;
1399
rec = &el->l_recs[0];
1400
if (ocfs2_is_empty_extent(rec)) {
1404
rec = &el->l_recs[1];
1407
range = rec->e_cpos + ocfs2_rec_clusters(el->l_tree_depth, rec);
1408
if (cpos >= rec->e_cpos && cpos < range)
1414
* Rotate all the records in a btree right one record, starting at insert_cpos.
1416
* The path to the rightmost leaf should be passed in.
1418
* The array is assumed to be large enough to hold an entire path (tree depth).
1420
* Upon succesful return from this function:
1422
* - The 'right_path' array will contain a path to the leaf block
1423
* whose range contains e_cpos.
1424
* - That leaf block will have a single empty extent in list index 0.
1425
* - In the case that the rotation requires a post-insert update,
1426
* *ret_left_path will contain a valid path which can be passed to
1427
* ocfs2_insert_path().
1429
static int ocfs2_rotate_tree_right(ocfs2_filesys *fs,
1430
enum ocfs2_split_type split,
1431
uint32_t insert_cpos,
1432
struct ocfs2_path *right_path,
1433
struct ocfs2_path **ret_left_path)
1437
struct ocfs2_path *left_path = NULL;
1439
*ret_left_path = NULL;
1441
left_path = ocfs2_new_path(fs, path_root_buf(right_path),
1442
path_root_el(right_path));
1444
ret = OCFS2_ET_NO_MEMORY;
1448
ret = ocfs2_find_cpos_for_left_leaf(right_path, &cpos);
1453
* What we want to do here is:
1455
* 1) Start with the rightmost path.
1457
* 2) Determine a path to the leaf block directly to the left
1460
* 3) Determine the 'subtree root' - the lowest level tree node
1461
* which contains a path to both leaves.
1463
* 4) Rotate the subtree.
1465
* 5) Find the next subtree by considering the left path to be
1466
* the new right path.
1468
* The check at the top of this while loop also accepts
1469
* insert_cpos == cpos because cpos is only a _theoretical_
1470
* value to get us the left path - insert_cpos might very well
1471
* be filling that hole.
1473
* Stop at a cpos of '0' because we either started at the
1474
* leftmost branch (i.e., a tree with one branch and a
1475
* rotation inside of it), or we've gone as far as we can in
1476
* rotating subtrees.
1478
while (cpos && insert_cpos <= cpos) {
1480
ret = ocfs2_find_path(fs, left_path, cpos);
1484
if (path_leaf_blkno(left_path) == path_leaf_blkno(right_path))
1487
if (split == SPLIT_NONE &&
1488
ocfs2_rotate_requires_path_adjustment(left_path,
1491
* We've rotated the tree as much as we
1492
* should. The rest is up to
1493
* ocfs2_insert_path() to complete, after the
1494
* record insertion. We indicate this
1495
* situation by returning the left path.
1497
* The reason we don't adjust the records here
1498
* before the record insert is that an error
1499
* later might break the rule where a parent
1500
* record e_cpos will reflect the actual
1501
* e_cpos of the 1st nonempty record of the
1504
*ret_left_path = left_path;
1508
start = ocfs2_find_subtree_root(left_path, right_path);
1510
ret = ocfs2_rotate_subtree_right(fs, left_path, right_path,
1515
if (split != SPLIT_NONE &&
1516
ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
1519
* A rotate moves the rightmost left leaf
1520
* record over to the leftmost right leaf
1521
* slot. If we're doing an extent split
1522
* instead of a real insert, then we have to
1523
* check that the extent to be split wasn't
1524
* just moved over. If it was, then we can
1525
* exit here, passing left_path back -
1526
* ocfs2_split_extent() is smart enough to
1527
* search both leaves.
1529
*ret_left_path = left_path;
1533
* There is no need to re-read the next right path
1534
* as we know that it'll be our current left
1535
* path. Optimize by copying values instead.
1537
ocfs2_mv_path(right_path, left_path);
1539
ret = ocfs2_find_cpos_for_left_leaf(right_path, &cpos);
1545
ocfs2_free_path(left_path);
1551
static void ocfs2_update_edge_lengths(struct ocfs2_path *path)
1554
struct ocfs2_extent_rec *rec;
1555
struct ocfs2_extent_list *el;
1556
struct ocfs2_extent_block *eb;
1559
/* Path should always be rightmost. */
1560
eb = (struct ocfs2_extent_block *)path_leaf_buf(path);
1561
assert(eb->h_next_leaf_blk == 0ULL);
1564
assert(el->l_next_free_rec > 0);
1565
idx = el->l_next_free_rec - 1;
1566
rec = &el->l_recs[idx];
1567
range = rec->e_cpos + ocfs2_rec_clusters(el->l_tree_depth, rec);
1569
for (i = 0; i < path->p_tree_depth; i++) {
1570
el = path->p_node[i].el;
1571
idx = el->l_next_free_rec - 1;
1572
rec = &el->l_recs[idx];
1574
rec->e_int_clusters = range;
1575
rec->e_int_clusters -= rec->e_cpos;
1579
static errcode_t ocfs2_unlink_path(ocfs2_filesys *fs,
1580
struct ocfs2_path *path, int unlink_start)
1583
struct ocfs2_extent_block *eb;
1584
struct ocfs2_extent_list *el;
1587
for(i = unlink_start; i < path_num_items(path); i++) {
1588
buf = path->p_node[i].buf;
1590
eb = (struct ocfs2_extent_block *)buf;
1592
* Not all nodes might have had their final count
1593
* decremented by the caller - handle this here.
1596
assert(el->l_next_free_rec <= 1);
1598
el->l_next_free_rec = 0;
1599
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
1600
ret = ocfs2_delete_extent_block(fs, path->p_node[i].blkno);
1608
* ocfs2_unlink_subtree will delete extent blocks in the "right_path"
1609
* from "subtree_index".
1611
static errcode_t ocfs2_unlink_subtree(ocfs2_filesys *fs,
1612
struct ocfs2_path *left_path,
1613
struct ocfs2_path *right_path,
1618
struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
1619
struct ocfs2_extent_list *el;
1620
struct ocfs2_extent_block *eb;
1622
el = path_leaf_el(left_path);
1624
eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].buf;
1626
for(i = 1; i < root_el->l_next_free_rec; i++)
1627
if (root_el->l_recs[i].e_blkno == eb->h_blkno)
1630
assert(i < root_el->l_next_free_rec);
1632
memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
1633
root_el->l_next_free_rec -= 1;
1635
eb = (struct ocfs2_extent_block *)path_leaf_buf(left_path);
1636
eb->h_next_leaf_blk = 0;
1638
ret = ocfs2_unlink_path(fs, right_path, subtree_index + 1);
1645
static int ocfs2_rotate_subtree_left(ocfs2_filesys *fs,
1646
struct ocfs2_path *left_path,
1647
struct ocfs2_path *right_path,
1652
int i, del_right_subtree = 0, right_has_empty = 0;
1653
char *root_buf, *di_buf = path_root_buf(right_path);
1654
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_buf;
1655
struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
1656
struct ocfs2_extent_block *eb;
1660
right_leaf_el = path_leaf_el(right_path);
1661
left_leaf_el = path_leaf_el(left_path);
1662
root_buf = left_path->p_node[subtree_index].buf;
1663
assert(left_path->p_node[subtree_index].blkno ==
1664
right_path->p_node[subtree_index].blkno);
1666
if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
1669
eb = (struct ocfs2_extent_block *)path_leaf_buf(right_path);
1670
if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) {
1672
* It's legal for us to proceed if the right leaf is
1673
* the rightmost one and it has an empty extent. There
1674
* are two cases to handle - whether the leaf will be
1675
* empty after removal or not. If the leaf isn't empty
1676
* then just remove the empty extent up front. The
1677
* next block will handle empty leaves by flagging
1680
* Non rightmost leaves will throw EAGAIN and the
1681
* caller can manually move the subtree and retry.
1684
if (eb->h_next_leaf_blk != 0ULL)
1687
if (right_leaf_el->l_next_free_rec > 1) {
1688
ocfs2_remove_empty_extent(right_leaf_el);
1690
right_has_empty = 1;
1693
if (eb->h_next_leaf_blk == 0ULL &&
1694
right_leaf_el->l_next_free_rec == 1) {
1696
* We have to update i_last_eb_blk during the meta
1699
del_right_subtree = 1;
1703
* Getting here with an empty extent in the right path implies
1704
* that it's the rightmost path and will be deleted.
1706
assert(!right_has_empty || del_right_subtree);
1708
if (!right_has_empty) {
1710
* Only do this if we're moving a real
1711
* record. Otherwise, the action is delayed until
1712
* after removal of the right path in which case we
1713
* can do a simple shift to remove the empty extent.
1715
ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
1716
memset(&right_leaf_el->l_recs[0], 0,
1717
sizeof(struct ocfs2_extent_rec));
1719
if (eb->h_next_leaf_blk == 0ULL) {
1721
* Move recs over to get rid of empty extent, decrease
1722
* next_free. This is allowed to remove the last
1723
* extent in our leaf (setting l_next_free_rec to
1724
* zero) - the delete code below won't care.
1726
ocfs2_remove_empty_extent(right_leaf_el);
1729
if (del_right_subtree) {
1730
ocfs2_unlink_subtree(fs, left_path, right_path, subtree_index);
1731
ocfs2_update_edge_lengths(left_path);
1734
* Now the good extent block information is stored in left_path, so
1735
* synchronize the right path with it.
1737
for (i = 0; i <= subtree_index; i++)
1738
memcpy(right_path->p_node[i].buf,
1739
left_path->p_node[i].buf,
1742
eb = (struct ocfs2_extent_block *)path_leaf_buf(left_path);
1743
di->i_last_eb_blk = eb->h_blkno;
1746
* Removal of the extent in the left leaf was skipped
1747
* above so we could delete the right path
1750
if (right_has_empty)
1751
ocfs2_remove_empty_extent(left_leaf_el);
1756
* The extent block in the right path belwo subtree_index
1757
* have been deleted, so we don't need to synchronize
1760
ret = ocfs2_sync_path_to_disk(fs, left_path, NULL,
1763
ocfs2_complete_edge_insert(fs, left_path, right_path,
1766
ret = ocfs2_sync_path_to_disk(fs, left_path, right_path,
1774
* Given a full path, determine what cpos value would return us a path
1775
* containing the leaf immediately to the right of the current one.
1777
* Will return zero if the path passed in is already the rightmost path.
1779
* This looks similar, but is subtly different to
1780
* ocfs2_find_cpos_for_left_leaf().
1782
static int ocfs2_find_cpos_for_right_leaf(ocfs2_filesys *fs,
1783
struct ocfs2_path *path,
1788
struct ocfs2_extent_list *el;
1792
if (path->p_tree_depth == 0)
1795
blkno = path_leaf_blkno(path);
1797
/* Start at the tree node just above the leaf and work our way up. */
1798
i = path->p_tree_depth - 1;
1802
el = path->p_node[i].el;
1805
* Find the extent record just after the one in our
1808
next_free = el->l_next_free_rec;
1809
for(j = 0; j < el->l_next_free_rec; j++) {
1810
if (el->l_recs[j].e_blkno == blkno) {
1811
if (j == (next_free - 1)) {
1814
* We've determined that the
1815
* path specified is already
1816
* the rightmost one - return a
1822
* The rightmost record points to our
1823
* leaf - we need to travel up the
1829
*cpos = el->l_recs[j + 1].e_cpos;
1835
* If we got here, we never found a valid node where
1836
* the tree indicated one should be.
1838
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
1842
blkno = path->p_node[i].blkno;
1850
static void ocfs2_rotate_rightmost_leaf_left(ocfs2_filesys *fs,
1851
struct ocfs2_extent_list *el)
1853
if (!ocfs2_is_empty_extent(&el->l_recs[0]))
1856
ocfs2_remove_empty_extent(el);
1861
static int __ocfs2_rotate_tree_left(ocfs2_filesys *fs,
1862
struct ocfs2_path *path,
1863
struct ocfs2_path **empty_extent_path)
1865
int i, ret, subtree_root, deleted;
1866
uint32_t right_cpos;
1867
struct ocfs2_path *left_path = NULL;
1868
struct ocfs2_path *right_path = NULL;
1870
assert(ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
1872
*empty_extent_path = NULL;
1874
ret = ocfs2_find_cpos_for_right_leaf(fs, path, &right_cpos);
1878
left_path = ocfs2_new_path(fs, path_root_buf(path),
1879
path_root_el(path));
1881
ret = OCFS2_ET_NO_MEMORY;
1885
ocfs2_cp_path(fs, left_path, path);
1887
right_path = ocfs2_new_path(fs, path_root_buf(path),
1888
path_root_el(path));
1890
ret = OCFS2_ET_NO_MEMORY;
1894
while (right_cpos) {
1895
ret = ocfs2_find_path(fs, right_path, right_cpos);
1899
subtree_root = ocfs2_find_subtree_root(left_path,
1902
ret = ocfs2_rotate_subtree_left(fs, left_path,
1903
right_path, subtree_root,
1905
if (ret == EAGAIN) {
1907
* The rotation has to temporarily stop due to
1908
* the right subtree having an empty
1909
* extent. Pass it back to the caller for a
1912
*empty_extent_path = right_path;
1920
* The subtree rotate might have removed records on
1921
* the rightmost edge. If so, then rotation is
1927
ocfs2_mv_path(left_path, right_path);
1929
ret = ocfs2_find_cpos_for_right_leaf(fs, left_path,
1936
ocfs2_free_path(right_path);
1937
ocfs2_free_path(left_path);
1940
* the path's information is changed during the process of rotation,
1943
for (i = 1; i <= path->p_tree_depth; i++) {
1944
ret = ocfs2_read_extent_block(fs, path->p_node[i].blkno,
1945
path->p_node[i].buf);
1953
static int ocfs2_remove_rightmost_path(ocfs2_filesys *fs,
1954
struct ocfs2_path *path)
1956
int ret, subtree_index, i;
1958
struct ocfs2_path *left_path = NULL;
1959
struct ocfs2_dinode *di;
1960
struct ocfs2_extent_block *eb;
1961
struct ocfs2_extent_list *el;
1964
* XXX: This code assumes that the root is an inode, which is
1965
* true for now but may change as tree code gets generic.
1967
di = (struct ocfs2_dinode *)path_root_buf(path);
1969
ret = ocfs2_find_cpos_for_left_leaf(path, &cpos);
1975
* We have a path to the left of this one - it needs
1978
left_path = ocfs2_new_path(fs, path_root_buf(path),
1979
path_root_el(path));
1981
ret = OCFS2_ET_NO_MEMORY;
1985
ret = ocfs2_find_path(fs, left_path, cpos);
1989
subtree_index = ocfs2_find_subtree_root(left_path, path);
1991
ocfs2_unlink_subtree(fs, left_path, path,
1993
ocfs2_update_edge_lengths(left_path);
1996
* Now the good extent block information is stored in left_path, so
1997
* synchronize the right path with it.
1999
for (i = 0; i <= subtree_index; i++)
2000
memcpy(path->p_node[i].buf,
2001
left_path->p_node[i].buf,
2003
ret = ocfs2_sync_path_to_disk(fs, left_path,
2004
NULL, subtree_index);
2008
eb = (struct ocfs2_extent_block *)path_leaf_buf(left_path);
2009
di->i_last_eb_blk = eb->h_blkno;
2012
* 'path' is also the leftmost path which
2013
* means it must be the only one. This gets
2014
* handled differently because we want to
2015
* revert the inode back to having extents
2018
ocfs2_unlink_path(fs, path, 1);
2020
el = &di->id2.i_list;
2021
el->l_tree_depth = 0;
2022
el->l_next_free_rec = 0;
2023
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2025
di->i_last_eb_blk = 0;
2029
ocfs2_free_path(left_path);
2034
* Left rotation of btree records.
2036
* In many ways, this is (unsurprisingly) the opposite of right
2037
* rotation. We start at some non-rightmost path containing an empty
2038
* extent in the leaf block. The code works its way to the rightmost
2039
* path by rotating records to the left in every subtree.
2041
* This is used by any code which reduces the number of extent records
2042
* in a leaf. After removal, an empty record should be placed in the
2043
* leftmost list position.
2045
* This won't handle a length update of the rightmost path records if
2046
* the rightmost tree leaf record is removed so the caller is
2047
* responsible for detecting and correcting that.
2049
static int ocfs2_rotate_tree_left(ocfs2_filesys *fs, struct ocfs2_path *path)
2052
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
2053
struct ocfs2_extent_block *eb;
2054
struct ocfs2_extent_list *el;
2056
el = path_leaf_el(path);
2057
if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2060
if (path->p_tree_depth == 0) {
2061
rightmost_no_delete:
2063
* In-inode extents. This is trivially handled, so do
2066
ocfs2_rotate_rightmost_leaf_left(fs,
2067
path_leaf_el(path));
2069
/* we have to synchronize the modified extent block to disk. */
2070
if (path->p_tree_depth > 0) {
2071
ret = ocfs2_write_extent_block(fs,
2072
path_leaf_blkno(path),
2073
path_leaf_buf(path));
2080
* Handle rightmost branch now. There's several cases:
2081
* 1) simple rotation leaving records in there. That's trivial.
2082
* 2) rotation requiring a branch delete - there's no more
2083
* records left. Two cases of this:
2084
* a) There are branches to the left.
2085
* b) This is also the leftmost (the only) branch.
2087
* 1) is handled via ocfs2_rotate_rightmost_leaf_left()
2088
* 2a) we need the left branch so that we can update it with the unlink
2089
* 2b) we need to bring the inode back to inline extents.
2092
eb = (struct ocfs2_extent_block *)path_leaf_buf(path);
2094
if (eb->h_next_leaf_blk == 0) {
2096
* This gets a bit tricky if we're going to delete the
2097
* rightmost path. Get the other cases out of the way
2100
if (el->l_next_free_rec > 1)
2101
goto rightmost_no_delete;
2103
if (el->l_next_free_rec == 0) {
2104
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
2109
* XXX: The caller can not trust "path" any more after
2110
* this as it will have been deleted. What do we do?
2112
* In theory the rotate-for-merge code will never get
2113
* here because it'll always ask for a rotate in a
2117
ret = ocfs2_remove_rightmost_path(fs, path);
2122
* Now we can loop, remembering the path we get from EAGAIN
2123
* and restarting from there.
2126
ret = __ocfs2_rotate_tree_left(fs, path, &restart_path);
2127
if (ret && ret != EAGAIN) {
2131
while (ret == EAGAIN) {
2132
tmp_path = restart_path;
2133
restart_path = NULL;
2135
ret = __ocfs2_rotate_tree_left(fs, tmp_path, &restart_path);
2136
if (ret && ret != EAGAIN) {
2140
ocfs2_free_path(tmp_path);
2148
ocfs2_free_path(tmp_path);
2149
ocfs2_free_path(restart_path);
2153
static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
2156
struct ocfs2_extent_rec *rec = &el->l_recs[index];
2159
if (rec->e_leaf_clusters == 0) {
2161
* We consumed all of the merged-from record. An empty
2162
* extent cannot exist anywhere but the 1st array
2163
* position, so move things over if the merged-from
2164
* record doesn't occupy that position.
2166
* This creates a new empty extent so the caller
2167
* should be smart enough to have removed any existing
2171
assert(!ocfs2_is_empty_extent(&el->l_recs[0]));
2172
size = index * sizeof(struct ocfs2_extent_rec);
2173
memmove(&el->l_recs[1], &el->l_recs[0], size);
2177
* Always memset - the caller doesn't check whether it
2178
* created an empty extent, so there could be junk in
2181
memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2186
* Remove split_rec clusters from the record at index and merge them
2187
* onto the beginning of the record at index + 1.
2189
static int ocfs2_merge_rec_right(ocfs2_filesys *fs,
2190
struct ocfs2_extent_rec *split_rec,
2191
struct ocfs2_extent_list *el, int index)
2193
unsigned int split_clusters = split_rec->e_leaf_clusters;
2194
struct ocfs2_extent_rec *left_rec;
2195
struct ocfs2_extent_rec *right_rec;
2197
assert(index < el->l_next_free_rec);
2199
left_rec = &el->l_recs[index];
2200
right_rec = &el->l_recs[index + 1];
2202
left_rec->e_leaf_clusters -= split_clusters;
2204
right_rec->e_cpos -= split_clusters;
2205
right_rec->e_blkno -=
2206
ocfs2_clusters_to_blocks(fs, split_clusters);
2207
right_rec->e_leaf_clusters += split_clusters;
2209
ocfs2_cleanup_merge(el, index);
2215
* Remove split_rec clusters from the record at index and merge them
2216
* onto the tail of the record at index - 1.
2218
static int ocfs2_merge_rec_left(ocfs2_filesys *fs,
2219
struct ocfs2_extent_rec *split_rec,
2220
struct ocfs2_extent_list *el, int index)
2222
int has_empty_extent = 0;
2223
unsigned int split_clusters = split_rec->e_leaf_clusters;
2224
struct ocfs2_extent_rec *left_rec;
2225
struct ocfs2_extent_rec *right_rec;
2229
left_rec = &el->l_recs[index - 1];
2230
right_rec = &el->l_recs[index];
2231
if (ocfs2_is_empty_extent(&el->l_recs[0]))
2232
has_empty_extent = 1;
2234
if (has_empty_extent && index == 1) {
2236
* The easy case - we can just plop the record right in.
2238
*left_rec = *split_rec;
2240
has_empty_extent = 0;
2242
left_rec->e_leaf_clusters += split_clusters;
2245
right_rec->e_cpos += split_clusters;
2246
right_rec->e_blkno +=
2247
ocfs2_clusters_to_blocks(fs, split_clusters);
2248
right_rec->e_leaf_clusters -= split_clusters;
2250
ocfs2_cleanup_merge(el, index);
2255
static int ocfs2_try_to_merge_extent(ocfs2_filesys *fs,
2256
struct ocfs2_path *left_path,
2258
struct ocfs2_extent_rec *split_rec,
2259
struct ocfs2_merge_ctxt *ctxt)
2263
struct ocfs2_extent_list *el = path_leaf_el(left_path);
2264
struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
2266
assert(ctxt->c_contig_type != CONTIG_NONE);
2268
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
2270
* The merge code will need to create an empty
2271
* extent to take the place of the newly
2272
* emptied slot. Remove any pre-existing empty
2273
* extents - having more than one in a leaf is
2276
ret = ocfs2_rotate_tree_left(fs, left_path);
2281
rec = &el->l_recs[split_index];
2284
if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
2286
* Left-right contig implies this.
2288
assert(ctxt->c_split_covers_rec);
2289
assert(split_index != 0);
2292
* Since the leftright insert always covers the entire
2293
* extent, this call will delete the insert record
2294
* entirely, resulting in an empty extent record added to
2297
* Since the adding of an empty extent shifts
2298
* everything back to the right, there's no need to
2299
* update split_index here.
2301
ret = ocfs2_merge_rec_left(fs, split_rec, el, split_index);
2306
* We can only get this from logic error above.
2308
assert(ocfs2_is_empty_extent(&el->l_recs[0]));
2311
* The left merge left us with an empty extent, remove
2314
ret = ocfs2_rotate_tree_left(fs, left_path);
2319
rec = &el->l_recs[split_index];
2322
* Note that we don't pass split_rec here on purpose -
2323
* we've merged it into the left side.
2325
ret = ocfs2_merge_rec_right(fs, rec, el, split_index);
2329
assert(ocfs2_is_empty_extent(&el->l_recs[0]));
2331
ret = ocfs2_rotate_tree_left(fs, left_path);
2333
* Error from this last rotate is not critical, so
2334
* don't bubble it up.
2339
* Merge a record to the left or right.
2341
* 'contig_type' is relative to the existing record,
2342
* so for example, if we're "right contig", it's to
2343
* the record on the left (hence the left merge).
2345
if (ctxt->c_contig_type == CONTIG_RIGHT) {
2346
ret = ocfs2_merge_rec_left(fs,
2352
ret = ocfs2_merge_rec_right(fs,
2359
/* we have to synchronize the modified extent block to disk. */
2360
if (left_path->p_tree_depth > 0) {
2361
ret = ocfs2_write_extent_block(fs,
2362
path_leaf_blkno(left_path),
2363
path_leaf_buf(left_path));
2368
if (ctxt->c_split_covers_rec) {
2370
* The merge may have left an empty extent in
2371
* our leaf. Try to rotate it away.
2373
ret = ocfs2_rotate_tree_left(fs,left_path);
2382
static void ocfs2_subtract_from_rec(ocfs2_filesys *fs,
2383
enum ocfs2_split_type split,
2384
struct ocfs2_extent_rec *rec,
2385
struct ocfs2_extent_rec *split_rec)
2387
uint64_t len_blocks;
2389
len_blocks = ocfs2_clusters_to_blocks(fs, split_rec->e_leaf_clusters);
2391
if (split == SPLIT_LEFT) {
2393
* Region is on the left edge of the existing
2396
rec->e_cpos += split_rec->e_leaf_clusters;
2397
rec->e_blkno += len_blocks;
2398
rec->e_leaf_clusters -= split_rec->e_leaf_clusters;
2401
* Region is on the right edge of the existing
2404
rec->e_leaf_clusters -= split_rec->e_leaf_clusters;
2409
* Change the depth of the tree. That means allocating an extent block,
2410
* copying all extent records from the dinode into the extent block,
2411
* and then pointing the dinode to the new extent_block.
2413
static errcode_t shift_tree_depth(ocfs2_filesys *fs,
2414
struct ocfs2_dinode *di,
2420
struct ocfs2_extent_block *eb;
2421
struct ocfs2_extent_list *el;
2422
uint32_t new_clusters;
2424
el = &di->id2.i_list;
2425
if (el->l_next_free_rec != el->l_count)
2426
return OCFS2_ET_INTERNAL_FAILURE;
2428
ret = ocfs2_malloc_block(fs->fs_io, &buf);
2432
ret = ocfs2_new_extent_block(fs, &blkno);
2436
ret = ocfs2_read_extent_block(fs, blkno, buf);
2440
eb = (struct ocfs2_extent_block *)buf;
2441
eb->h_list.l_tree_depth = el->l_tree_depth;
2442
eb->h_list.l_next_free_rec = el->l_next_free_rec;
2443
memcpy(eb->h_list.l_recs, el->l_recs,
2444
sizeof(struct ocfs2_extent_rec) * el->l_count);
2446
new_clusters = ocfs2_sum_rightmost_rec(&eb->h_list);
2449
memset(el->l_recs, 0,
2450
sizeof(struct ocfs2_extent_rec) * el->l_count);
2451
el->l_recs[0].e_cpos = 0;
2452
el->l_recs[0].e_blkno = blkno;
2453
el->l_recs[0].e_int_clusters = new_clusters;
2454
el->l_next_free_rec = 1;
2456
if (el->l_tree_depth == 1)
2457
di->i_last_eb_blk = blkno;
2459
ret = ocfs2_write_extent_block(fs, blkno, buf);
2463
if (buf && !*new_eb)
2469
static enum ocfs2_contig_type
2470
ocfs2_figure_merge_contig_type(ocfs2_filesys *fs,
2471
struct ocfs2_extent_list *el, int index,
2472
struct ocfs2_extent_rec *split_rec)
2474
struct ocfs2_extent_rec *rec;
2475
enum ocfs2_contig_type ret = CONTIG_NONE;
2478
* We're careful to check for an empty extent record here -
2479
* the merge code will know what to do if it sees one.
2483
rec = &el->l_recs[index - 1];
2484
if (index == 1 && ocfs2_is_empty_extent(rec)) {
2485
if (split_rec->e_cpos == el->l_recs[index].e_cpos)
2488
ret = ocfs2_extent_contig(fs, rec, split_rec);
2492
if (index < el->l_next_free_rec - 1) {
2493
enum ocfs2_contig_type contig_type;
2495
rec = &el->l_recs[index + 1];
2496
contig_type = ocfs2_extent_contig(fs, rec, split_rec);
2498
if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
2499
ret = CONTIG_LEFTRIGHT;
2500
else if (ret == CONTIG_NONE)
2507
static void ocfs2_figure_contig_type(ocfs2_filesys *fs,
2508
struct ocfs2_insert_type *insert,
2509
struct ocfs2_extent_list *el,
2510
struct ocfs2_extent_rec *insert_rec)
2513
enum ocfs2_contig_type contig_type = CONTIG_NONE;
2515
assert(el->l_tree_depth == 0);
2517
for(i = 0; i < el->l_next_free_rec; i++) {
2518
contig_type = ocfs2_extent_contig(fs, &el->l_recs[i],
2520
if (contig_type != CONTIG_NONE) {
2521
insert->ins_contig_index = i;
2525
insert->ins_contig = contig_type;
2529
* This should only be called against the righmost leaf extent list.
2531
* ocfs2_figure_appending_type() will figure out whether we'll have to
2532
* insert at the tail of the rightmost leaf.
2534
* This should also work against the dinode list for tree's with 0
2535
* depth. If we consider the dinode list to be the rightmost leaf node
2536
* then the logic here makes sense.
2538
static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
2539
struct ocfs2_extent_list *el,
2540
struct ocfs2_extent_rec *insert_rec)
2543
uint32_t cpos = insert_rec->e_cpos;
2544
struct ocfs2_extent_rec *rec;
2546
insert->ins_appending = APPEND_NONE;
2548
assert(el->l_tree_depth == 0);
2550
if (!el->l_next_free_rec)
2551
goto set_tail_append;
2553
if (ocfs2_is_empty_extent(&el->l_recs[0])) {
2554
/* Were all records empty? */
2555
if (el->l_next_free_rec == 1)
2556
goto set_tail_append;
2559
i = el->l_next_free_rec - 1;
2560
rec = &el->l_recs[i];
2562
if (cpos >= (rec->e_cpos + rec->e_leaf_clusters))
2563
goto set_tail_append;
2568
insert->ins_appending = APPEND_TAIL;
2572
* Helper function called at the begining of an insert.
2574
* This computes a few things that are commonly used in the process of
2575
* inserting into the btree:
2576
* - Whether the new extent is contiguous with an existing one.
2577
* - The current tree depth.
2578
* - Whether the insert is an appending one.
2579
* - The total # of free records in the tree.
2581
* All of the information is stored on the ocfs2_insert_type
2584
static int ocfs2_figure_insert_type(struct insert_ctxt *ctxt,
2587
struct ocfs2_insert_type *insert)
2590
struct ocfs2_extent_block *eb;
2591
struct ocfs2_extent_list *el;
2592
struct ocfs2_dinode *di = ctxt->di;
2593
struct ocfs2_extent_rec *insert_rec = &ctxt->rec;
2594
ocfs2_filesys *fs = ctxt->fs;
2595
struct ocfs2_path *path = NULL;
2596
char *buf = *last_eb_buf;
2598
insert->ins_split = SPLIT_NONE;
2600
el = &di->id2.i_list;
2601
insert->ins_tree_depth = el->l_tree_depth;
2603
if (el->l_tree_depth) {
2605
* If we have tree depth, we read in the
2606
* rightmost extent block ahead of time as
2607
* ocfs2_figure_insert_type() and ocfs2_add_branch()
2608
* may want it later.
2611
ret = ocfs2_read_extent_block(fs, di->i_last_eb_blk, buf);
2615
eb = (struct ocfs2_extent_block *) buf;
2619
* Unless we have a contiguous insert, we'll need to know if
2620
* there is room left in our allocation tree for another
2623
* XXX: This test is simplistic, we can search for empty
2624
* extent records too.
2626
*free_records = el->l_count - el->l_next_free_rec;
2628
if (!insert->ins_tree_depth) {
2629
ocfs2_figure_contig_type(fs, insert, el, insert_rec);
2630
ocfs2_figure_appending_type(insert, el, insert_rec);
2634
path = ocfs2_new_inode_path(fs, di);
2636
ret = OCFS2_ET_NO_MEMORY;
2640
* In the case that we're inserting past what the tree
2641
* currently accounts for, ocf2_find_path() will return for
2642
* us the rightmost tree path. This is accounted for below in
2643
* the appending code.
2645
ret = ocfs2_find_path(fs, path, insert_rec->e_cpos);
2649
el = path_leaf_el(path);
2652
* Now that we have the path, there's two things we want to determine:
2653
* 1) Contiguousness (also set contig_index if this is so)
2655
* 2) Are we doing an append? We can trivially break this up
2656
* into two types of appends: simple record append, or a
2657
* rotate inside the tail leaf.
2659
ocfs2_figure_contig_type(fs, insert, el, insert_rec);
2662
* The insert code isn't quite ready to deal with all cases of
2663
* left contiguousness. Specifically, if it's an insert into
2664
* the 1st record in a leaf, it will require the adjustment of
2665
* e_clusters on the last record of the path directly to it's
2666
* left. For now, just catch that case and fool the layers
2667
* above us. This works just fine for tree_depth == 0, which
2668
* is why we allow that above.
2670
if (insert->ins_contig == CONTIG_LEFT &&
2671
insert->ins_contig_index == 0)
2672
insert->ins_contig = CONTIG_NONE;
2675
* Ok, so we can simply compare against last_eb to figure out
2676
* whether the path doesn't exist. This will only happen in
2677
* the case that we're doing a tail append, so maybe we can
2678
* take advantage of that information somehow.
2680
if (di->i_last_eb_blk == path_leaf_blkno(path)) {
2682
* Ok, ocfs2_find_path() returned us the rightmost
2683
* tree path. This might be an appending insert. There are
2685
* 1) We're doing a true append at the tail:
2686
* -This might even be off the end of the leaf
2687
* 2) We're "appending" by rotating in the tail
2689
ocfs2_figure_appending_type(insert, el, insert_rec);
2693
ocfs2_free_path(path);
2699
* Do the final bits of extent record insertion at the target leaf
2700
* list. If this leaf is part of an allocation tree, it is assumed
2701
* that the tree above has been prepared.
2703
static void ocfs2_insert_at_leaf(ocfs2_filesys *fs,
2704
struct ocfs2_extent_rec *insert_rec,
2705
struct ocfs2_extent_list *el,
2706
struct ocfs2_insert_type *insert)
2708
int i = insert->ins_contig_index;
2710
struct ocfs2_extent_rec *rec;
2712
assert(el->l_tree_depth == 0);
2714
if (insert->ins_split != SPLIT_NONE) {
2715
i = ocfs2_search_extent_list(el, insert_rec->e_cpos);
2717
rec = &el->l_recs[i];
2718
ocfs2_subtract_from_rec(fs, insert->ins_split, rec,
2724
* Contiguous insert - either left or right.
2726
if (insert->ins_contig != CONTIG_NONE) {
2727
rec = &el->l_recs[i];
2728
if (insert->ins_contig == CONTIG_LEFT) {
2729
rec->e_blkno = insert_rec->e_blkno;
2730
rec->e_cpos = insert_rec->e_cpos;
2732
rec->e_leaf_clusters += insert_rec->e_leaf_clusters;
2737
* Handle insert into an empty leaf.
2739
if (el->l_next_free_rec == 0 ||
2740
(el->l_next_free_rec == 1 &&
2741
ocfs2_is_empty_extent(&el->l_recs[0]))) {
2742
el->l_recs[0] = *insert_rec;
2743
el->l_next_free_rec = 1;
2750
if (insert->ins_appending == APPEND_TAIL) {
2751
i = el->l_next_free_rec - 1;
2752
rec = &el->l_recs[i];
2753
range = rec->e_cpos + rec->e_leaf_clusters;
2754
assert(insert_rec->e_cpos >= range);
2757
el->l_recs[i] = *insert_rec;
2758
el->l_next_free_rec += 1;
2764
* Ok, we have to rotate.
2766
* At this point, it is safe to assume that inserting into an
2767
* empty leaf and appending to a leaf have both been handled
2770
* This leaf needs to have space, either by the empty 1st
2771
* extent record, or by virtue of an l_next_rec < l_count.
2773
ocfs2_rotate_leaf(el, insert_rec);
2776
static int ocfs2_adjust_rightmost_records(ocfs2_filesys *fs,
2777
struct ocfs2_path *path,
2778
struct ocfs2_extent_rec *insert_rec)
2781
struct ocfs2_extent_list *el;
2782
struct ocfs2_extent_rec *rec;
2785
* Update everything except the leaf block.
2787
for (i = 0; i < path->p_tree_depth; i++) {
2788
el = path->p_node[i].el;
2790
next_free = el->l_next_free_rec;
2792
return OCFS2_ET_CORRUPT_EXTENT_BLOCK;
2794
rec = &el->l_recs[next_free - 1];
2796
rec->e_int_clusters = insert_rec->e_cpos;
2797
rec->e_int_clusters += insert_rec->e_leaf_clusters;
2798
rec->e_int_clusters -= rec->e_cpos;
2804
static int ocfs2_append_rec_to_path(ocfs2_filesys *fs,
2805
struct ocfs2_extent_rec *insert_rec,
2806
struct ocfs2_path *right_path,
2807
struct ocfs2_path **ret_left_path)
2810
struct ocfs2_extent_list *el;
2811
struct ocfs2_path *left_path = NULL;
2813
*ret_left_path = NULL;
2816
* This shouldn't happen for non-trees. The extent rec cluster
2817
* count manipulation below only works for interior nodes.
2819
assert(right_path->p_tree_depth > 0);
2822
* If our appending insert is at the leftmost edge of a leaf,
2823
* then we might need to update the rightmost records of the
2827
el = path_leaf_el(right_path);
2828
next_free = el->l_next_free_rec;
2829
if (next_free == 0 ||
2830
(next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
2833
ret = ocfs2_find_cpos_for_left_leaf(right_path, &left_cpos);
2837
* No need to worry if the append is already in the
2841
left_path = ocfs2_new_path(fs,
2842
path_root_buf(right_path),
2843
path_root_el(right_path));
2845
ret = OCFS2_ET_NO_MEMORY;
2849
ret = ocfs2_find_path(fs, left_path, left_cpos);
2855
ret = ocfs2_adjust_rightmost_records(fs, right_path, insert_rec);
2859
*ret_left_path = left_path;
2863
ocfs2_free_path(left_path);
2867
static void ocfs2_split_record(ocfs2_filesys *fs,
2868
struct ocfs2_path *left_path,
2869
struct ocfs2_path *right_path,
2870
struct ocfs2_extent_rec *split_rec,
2871
enum ocfs2_split_type split)
2874
uint32_t cpos = split_rec->e_cpos;
2875
struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
2876
struct ocfs2_extent_rec *rec, *tmprec;
2878
right_el = path_leaf_el(right_path);;
2880
left_el = path_leaf_el(left_path);
2883
insert_el = right_el;
2884
index = ocfs2_search_extent_list(el, cpos);
2886
if (index == 0 && left_path) {
2887
assert(!ocfs2_is_empty_extent(&el->l_recs[0]));
2890
* This typically means that the record
2891
* started in the left path but moved to the
2892
* right as a result of rotation. We either
2893
* move the existing record to the left, or we
2894
* do the later insert there.
2896
* In this case, the left path should always
2897
* exist as the rotate code will have passed
2898
* it back for a post-insert update.
2901
if (split == SPLIT_LEFT) {
2903
* It's a left split. Since we know
2904
* that the rotate code gave us an
2905
* empty extent in the left path, we
2906
* can just do the insert there.
2908
insert_el = left_el;
2911
* Right split - we have to move the
2912
* existing record over to the left
2913
* leaf. The insert will be into the
2914
* newly created empty extent in the
2917
tmprec = &right_el->l_recs[index];
2918
ocfs2_rotate_leaf(left_el, tmprec);
2921
memset(tmprec, 0, sizeof(*tmprec));
2922
index = ocfs2_search_extent_list(left_el, cpos);
2923
assert(index != -1);
2928
assert(ocfs2_is_empty_extent(&left_el->l_recs[0]));
2930
* Left path is easy - we can just allow the insert to
2934
insert_el = left_el;
2935
index = ocfs2_search_extent_list(el, cpos);
2936
assert(index != -1);
2939
rec = &el->l_recs[index];
2940
ocfs2_subtract_from_rec(fs, split, rec, split_rec);
2941
ocfs2_rotate_leaf(insert_el, split_rec);
2945
* This function only does inserts on an allocation b-tree. For dinode
2946
* lists, ocfs2_insert_at_leaf() is called directly.
2948
* right_path is the path we want to do the actual insert
2949
* in. left_path should only be passed in if we need to update that
2950
* portion of the tree after an edge insert.
2952
static int ocfs2_insert_path(struct insert_ctxt* ctxt,
2953
struct ocfs2_path *left_path,
2954
struct ocfs2_path *right_path,
2955
struct ocfs2_extent_rec *insert_rec,
2956
struct ocfs2_insert_type *insert)
2958
int ret, subtree_index;
2960
if (insert->ins_split != SPLIT_NONE) {
2962
* We could call ocfs2_insert_at_leaf() for some types
2963
* of splits, but it's easier to just let one seperate
2964
* function sort it all out.
2966
ocfs2_split_record(ctxt->fs, left_path, right_path,
2967
insert_rec, insert->ins_split);
2969
ocfs2_insert_at_leaf(ctxt->fs, insert_rec, path_leaf_el(right_path),
2974
* The rotate code has indicated that we need to fix
2975
* up portions of the tree after the insert.
2977
subtree_index = ocfs2_find_subtree_root(left_path, right_path);
2978
ocfs2_complete_edge_insert(ctxt->fs, left_path,
2979
right_path, subtree_index);
2983
ret = ocfs2_sync_path_to_disk(ctxt->fs, left_path,
2984
right_path, subtree_index);
2993
static int ocfs2_do_insert_extent(struct insert_ctxt* ctxt,
2994
struct ocfs2_insert_type *type)
2996
int ret, rotate = 0;
2998
struct ocfs2_path *right_path = NULL;
2999
struct ocfs2_path *left_path = NULL;
3000
struct ocfs2_extent_rec *insert_rec = &ctxt->rec;
3001
ocfs2_filesys *fs = ctxt->fs;
3002
struct ocfs2_dinode *di = ctxt->di;
3003
struct ocfs2_extent_list *el = &di->id2.i_list;
3005
if (el->l_tree_depth == 0) {
3006
ocfs2_insert_at_leaf(fs, insert_rec, el, type);
3007
goto out_update_clusters;
3010
right_path = ocfs2_new_inode_path(fs, di);
3012
ret = OCFS2_ET_NO_MEMORY;
3017
* Determine the path to start with. Rotations need the
3018
* rightmost path, everything else can go directly to the
3021
cpos = insert_rec->e_cpos;
3022
if (type->ins_appending == APPEND_NONE &&
3023
type->ins_contig == CONTIG_NONE) {
3028
ret = ocfs2_find_path(fs, right_path, cpos);
3033
* Rotations and appends need special treatment - they modify
3034
* parts of the tree's above them.
3036
* Both might pass back a path immediate to the left of the
3037
* one being inserted to. This will be cause
3038
* ocfs2_insert_path() to modify the rightmost records of
3039
* left_path to account for an edge insert.
3041
* XXX: When modifying this code, keep in mind that an insert
3042
* can wind up skipping both of these two special cases...
3046
ret = ocfs2_rotate_tree_right(fs, type->ins_split,
3048
right_path, &left_path);
3051
} else if (type->ins_appending == APPEND_TAIL
3052
&& type->ins_contig != CONTIG_LEFT) {
3053
ret = ocfs2_append_rec_to_path(fs, insert_rec,
3054
right_path, &left_path);
3059
ret = ocfs2_insert_path(ctxt, left_path, right_path, insert_rec, type);
3063
out_update_clusters:
3064
if (type->ins_split == SPLIT_NONE)
3065
di->i_clusters += insert_rec->e_leaf_clusters;
3069
ocfs2_free_path(left_path);
3070
ocfs2_free_path(right_path);
3075
struct duplicate_ctxt {
3076
struct ocfs2_dinode *di;
3077
uint64_t next_leaf_blk;
3080
static errcode_t duplicate_extent_block(ocfs2_filesys *fs,
3081
struct ocfs2_extent_list *old_el,
3082
struct ocfs2_extent_list *new_el,
3083
struct duplicate_ctxt *ctxt)
3087
uint64_t blkno, new_blkno;
3088
struct ocfs2_extent_rec *rec = NULL;
3089
char *eb_buf = NULL, *new_eb_buf = NULL;
3090
struct ocfs2_extent_block *eb = NULL;
3091
struct ocfs2_extent_list *child_old_el = NULL, *child_new_el = NULL;
3093
assert (old_el->l_tree_depth > 0);
3095
/* empty the whole extent list at first. */
3097
new_el->l_next_free_rec = 0;
3098
memset(new_el->l_recs, 0,
3099
sizeof(struct ocfs2_extent_rec) * new_el->l_count);
3101
if (old_el->l_next_free_rec == 0) {
3103
* We have a tree depth > 0 and no extent record in it,
3104
* should it be a corrupted block?
3106
ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK;
3110
ret = ocfs2_malloc_block(fs->fs_io, &eb_buf);
3113
ret = ocfs2_malloc_block(fs->fs_io, &new_eb_buf);
3117
/* we iterate the extent list from the last one for recording
3118
* the next_leaf_blk for the previous leaf.
3120
for (i = old_el->l_next_free_rec - 1; i >= 0; i--) {
3121
rec = &old_el->l_recs[i];
3123
if (!ocfs2_rec_clusters(old_el->l_tree_depth, rec))
3126
blkno = rec->e_blkno;
3127
ret = ocfs2_read_extent_block(fs, blkno, eb_buf);
3131
/* First make the new_buf the same as the old buf. */
3132
memcpy(new_eb_buf, eb_buf, fs->fs_blocksize);
3134
eb = (struct ocfs2_extent_block *)eb_buf;
3135
child_old_el = &eb->h_list;
3136
eb = (struct ocfs2_extent_block *)new_eb_buf;
3137
child_new_el = &eb->h_list;
3139
if (child_old_el->l_tree_depth > 0) {
3140
/* the extent record in our list still has child extent
3141
* block, so we have to iterate it.
3143
ret = duplicate_extent_block(fs,
3151
/* now we allocate a new extent block and save it. */
3152
ret = ocfs2_new_extent_block(fs, &new_blkno);
3156
eb = (struct ocfs2_extent_block *)new_eb_buf;
3157
eb->h_blkno = new_blkno;
3158
if (child_old_el->l_tree_depth == 0) {
3160
* This is the leaf blkno, we have to set its
3161
* h_next_leaf_blk and then record itself for
3164
eb->h_next_leaf_blk = ctxt->next_leaf_blk;
3165
ctxt->next_leaf_blk = new_blkno;
3168
ret = ocfs2_write_extent_block(fs, new_blkno, new_eb_buf);
3172
memcpy(&new_el->l_recs[i], rec, sizeof(struct ocfs2_extent_rec));
3173
new_el->l_recs[i].e_blkno = new_blkno;
3175
eb = (struct ocfs2_extent_block *)new_eb_buf;
3176
/* set the new i_last_eb_blk in the new dinode. */
3177
if (ctxt->di->i_last_eb_blk == blkno)
3178
ctxt->di->i_last_eb_blk = new_blkno;
3181
new_el->l_next_free_rec = old_el->l_next_free_rec;
3186
ocfs2_free(&eb_buf);
3188
ocfs2_free(&new_eb_buf);
3189
/* Free all the extent block we allocate. */
3191
for (i = 0; i < old_el->l_next_free_rec; i++) {
3192
rec = &new_el->l_recs[i];
3194
ocfs2_delete_extent_block(fs, rec->e_blkno);
3201
static errcode_t duplicate_extent_block_dinode(ocfs2_filesys *fs,
3202
char *old_buf, char *new_buf)
3205
struct ocfs2_dinode *old_di = NULL, *new_di = NULL;
3206
struct ocfs2_extent_list *old_el = NULL, *new_el = NULL;
3207
struct duplicate_ctxt ctxt;
3209
old_di = (struct ocfs2_dinode *)old_buf;
3210
old_el = &old_di->id2.i_list;
3211
new_di = (struct ocfs2_dinode *)new_buf;
3212
new_el = &new_di->id2.i_list;
3214
assert(old_el->l_tree_depth > 0);
3216
/* empty the whole extent list at first. */
3218
memset(new_el->l_recs, 0,
3219
sizeof(struct ocfs2_extent_rec) * new_el->l_count);
3220
new_el->l_next_free_rec = 0;
3222
memset(&ctxt, 0, sizeof(ctxt));
3224
ctxt.next_leaf_blk = 0;
3225
ret = duplicate_extent_block(fs, old_el, new_el, &ctxt);
3230
static void free_duplicated_extent_block(ocfs2_filesys *fs,
3231
struct ocfs2_extent_list *el)
3236
struct ocfs2_extent_rec *rec;
3237
struct ocfs2_extent_list *child_el;
3238
struct ocfs2_extent_block *eb;
3240
assert(el->l_tree_depth > 0);
3242
ret = ocfs2_malloc_block(fs->fs_io, &buf);
3246
for (i = 0; i < el->l_next_free_rec; i ++) {
3247
rec = &el->l_recs[i];
3249
if (!ocfs2_rec_clusters(el->l_tree_depth, rec))
3252
ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf);
3256
eb = (struct ocfs2_extent_block *)buf;
3257
child_el = &eb->h_list;
3258
if (child_el->l_tree_depth > 0)
3259
free_duplicated_extent_block(fs, child_el);
3261
ocfs2_delete_extent_block(fs, rec->e_blkno);
3268
static void free_duplicated_extent_block_dinode(ocfs2_filesys *fs,
3271
struct ocfs2_dinode *di = NULL;
3272
struct ocfs2_extent_list *el = NULL;
3274
di = (struct ocfs2_dinode *)di_buf;
3275
el = &di->id2.i_list;
3277
assert(el->l_tree_depth > 0);
3279
free_duplicated_extent_block(fs, el);
3283
* Grow a b-tree so that it has more records.
3285
* We might shift the tree depth in which case existing paths should
3286
* be considered invalid.
3288
* Tree depth after the grow is returned via *final_depth.
3290
* *last_eb will be updated by ocfs2_add_branch().
3292
static int ocfs2_grow_tree(ocfs2_filesys *fs, struct ocfs2_dinode *di,
3293
int *final_depth, char **last_eb)
3296
char *eb_buf = NULL;
3298
int depth = di->id2.i_list.l_tree_depth;
3300
shift = ocfs2_find_branch_target(fs, di, &eb_buf);
3306
/* We traveled all the way to the bottom of the allocation tree
3307
* and didn't find room for any more extents - we need to add
3308
* another tree level */
3311
/* shift_tree_depth will return us a buffer with
3312
* the new extent block (so we can pass that to
3313
* ocfs2_add_branch). */
3314
ret = shift_tree_depth(fs, di, &eb_buf);
3321
* Special case: we have room now if we shifted from
3322
* tree_depth 0, so no more work needs to be done.
3324
* We won't be calling add_branch, so pass
3325
* back *last_eb as the new leaf.
3328
memcpy(*last_eb, eb_buf, fs->fs_blocksize);
3333
/* call ocfs2_add_branch to add the final part of the tree with
3335
ret = ocfs2_add_branch(fs, di, eb_buf, last_eb);
3339
*final_depth = depth;
37
#include "extent_tree.h"
3344
40
* Insert an extent into an inode btree.
3346
errcode_t ocfs2_insert_extent(ocfs2_filesys *fs, uint64_t ino, uint32_t cpos,
3347
uint64_t c_blkno, uint32_t clusters,
42
errcode_t ocfs2_inode_insert_extent(ocfs2_filesys *fs, uint64_t ino,
43
uint32_t cpos, uint64_t c_blkno,
44
uint32_t clusters, uint16_t flag)
3351
47
ocfs2_cached_inode *ci = NULL;