2
* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
6
* Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7
* Programm System Institute
8
* Pereslavl-Zalessky Russia
12
* This file contains functions dealing with S+tree
27
* pathrelse_and_restore
31
* search_for_position_by_key
33
* prepare_for_direct_item
34
* prepare_for_direntry_item
35
* prepare_for_delete_or_cut
36
* calc_deleted_bytes_number
39
* reiserfs_delete_item
40
* reiserfs_delete_solid_item
41
* reiserfs_delete_object
42
* maybe_indirect_to_direct
43
* indirect_to_direct_roll_back
44
* reiserfs_cut_from_item
46
* reiserfs_do_truncate
47
* reiserfs_paste_into_item
48
* reiserfs_insert_item
51
#include <linux/time.h>
52
#include <linux/string.h>
53
#include <linux/pagemap.h>
54
#include <linux/reiserfs_fs.h>
55
#include <linux/buffer_head.h>
56
#include <linux/quotaops.h>
58
/* Does the buffer contain a disk block which is in the tree. */
59
inline int B_IS_IN_TREE(const struct buffer_head *bh)
62
RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
63
"PAP-1010: block (%b) has too big level (%z)", bh, bh);
65
return (B_LEVEL(bh) != FREE_LEVEL);
69
// to gets item head in le form
71
inline void copy_item_head(struct item_head *to,
72
const struct item_head *from)
74
memcpy(to, from, IH_SIZE);
77
/* k1 is pointer to on-disk structure which is stored in little-endian
78
form. k2 is pointer to cpu variable. For key of items of the same
79
object this returns 0.
80
Returns: -1 if key1 < key2
83
inline int comp_short_keys(const struct reiserfs_key *le_key,
84
const struct cpu_key *cpu_key)
87
n = le32_to_cpu(le_key->k_dir_id);
88
if (n < cpu_key->on_disk_key.k_dir_id)
90
if (n > cpu_key->on_disk_key.k_dir_id)
92
n = le32_to_cpu(le_key->k_objectid);
93
if (n < cpu_key->on_disk_key.k_objectid)
95
if (n > cpu_key->on_disk_key.k_objectid)
100
/* k1 is pointer to on-disk structure which is stored in little-endian
101
form. k2 is pointer to cpu variable.
102
Compare keys using all 4 key fields.
103
Returns: -1 if key1 < key2 0
104
if key1 = key2 1 if key1 > key2 */
105
static inline int comp_keys(const struct reiserfs_key *le_key,
106
const struct cpu_key *cpu_key)
110
retval = comp_short_keys(le_key, cpu_key);
113
if (le_key_k_offset(le_key_version(le_key), le_key) <
114
cpu_key_k_offset(cpu_key))
116
if (le_key_k_offset(le_key_version(le_key), le_key) >
117
cpu_key_k_offset(cpu_key))
120
if (cpu_key->key_length == 3)
123
/* this part is needed only when tail conversion is in progress */
124
if (le_key_k_type(le_key_version(le_key), le_key) <
125
cpu_key_k_type(cpu_key))
128
if (le_key_k_type(le_key_version(le_key), le_key) >
129
cpu_key_k_type(cpu_key))
135
inline int comp_short_le_keys(const struct reiserfs_key *key1,
136
const struct reiserfs_key *key2)
138
__u32 *k1_u32, *k2_u32;
139
int key_length = REISERFS_SHORT_KEY_LEN;
141
k1_u32 = (__u32 *) key1;
142
k2_u32 = (__u32 *) key2;
143
for (; key_length--; ++k1_u32, ++k2_u32) {
144
if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
146
if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
152
inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
155
to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156
to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
158
// find out version of the key
159
version = le_key_version(from);
160
to->version = version;
161
to->on_disk_key.k_offset = le_key_k_offset(version, from);
162
to->on_disk_key.k_type = le_key_k_type(version, from);
165
// this does not say which one is bigger, it only returns 1 if keys
166
// are not equal, 0 otherwise
167
inline int comp_le_keys(const struct reiserfs_key *k1,
168
const struct reiserfs_key *k2)
170
return memcmp(k1, k2, sizeof(struct reiserfs_key));
173
/**************************************************************************
174
* Binary search toolkit function *
175
* Search for an item in the array by the item key *
176
* Returns: 1 if found, 0 if not found; *
177
* *pos = number of the searched element if found, else the *
178
* number of the first element that is larger than key. *
179
**************************************************************************/
180
/* For those not familiar with binary search: lbound is the leftmost item that it
181
could be, rbound the rightmost item that it could be. We examine the item
182
halfway between lbound and rbound, and that tells us either that we can increase
183
lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
184
there are no possible items, and we have not found it. With each examination we
185
cut the number of possible items it could be by one more than half rounded down,
187
static inline int bin_search(const void *key, /* Key to search for. */
188
const void *base, /* First item in the array. */
189
int num, /* Number of items in the array. */
190
int width, /* Item size in the array.
191
searched. Lest the reader be
192
confused, note that this is crafted
193
as a general function, and when it
194
is applied specifically to the array
195
of item headers in a node, width
196
is actually the item header size not
198
int *pos /* Number of the searched for element. */
201
int rbound, lbound, j;
203
for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
204
lbound <= rbound; j = (rbound + lbound) / 2)
206
((struct reiserfs_key *)((char *)base + j * width),
207
(struct cpu_key *)key)) {
216
return ITEM_FOUND; /* Key found in the array. */
219
/* bin_search did not find given key, it returns position of key,
220
that is minimal and greater than the given one. */
222
return ITEM_NOT_FOUND;
226
/* Minimal possible key. It is never in the tree. */
227
const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
229
/* Maximal possible key. It is never in the tree. */
230
static const struct reiserfs_key MAX_KEY = {
231
__constant_cpu_to_le32(0xffffffff),
232
__constant_cpu_to_le32(0xffffffff),
233
{{__constant_cpu_to_le32(0xffffffff),
234
__constant_cpu_to_le32(0xffffffff)},}
237
/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
238
of the path, and going upwards. We must check the path's validity at each step. If the key is not in
239
the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
240
case we return a special key, either MIN_KEY or MAX_KEY. */
241
static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242
const struct super_block *sb)
244
int position, path_offset = chk_path->path_length;
245
struct buffer_head *parent;
247
RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
248
"PAP-5010: invalid offset in the path");
250
/* While not higher in path than first element. */
251
while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
253
RFALSE(!buffer_uptodate
254
(PATH_OFFSET_PBUFFER(chk_path, path_offset)),
255
"PAP-5020: parent is not uptodate");
257
/* Parent at the path is not in the tree now. */
260
PATH_OFFSET_PBUFFER(chk_path, path_offset)))
262
/* Check whether position in the parent is correct. */
264
PATH_OFFSET_POSITION(chk_path,
268
/* Check whether parent at the path really points to the child. */
269
if (B_N_CHILD_NUM(parent, position) !=
270
PATH_OFFSET_PBUFFER(chk_path,
271
path_offset + 1)->b_blocknr)
273
/* Return delimiting key if position in the parent is not equal to zero. */
275
return B_N_PDELIM_KEY(parent, position - 1);
277
/* Return MIN_KEY if we are in the root of the buffer tree. */
278
if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
279
b_blocknr == SB_ROOT_BLOCK(sb))
284
/* Get delimiting key of the buffer at the path and its right neighbor. */
285
inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
286
const struct super_block *sb)
288
int position, path_offset = chk_path->path_length;
289
struct buffer_head *parent;
291
RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
292
"PAP-5030: invalid offset in the path");
294
while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
296
RFALSE(!buffer_uptodate
297
(PATH_OFFSET_PBUFFER(chk_path, path_offset)),
298
"PAP-5040: parent is not uptodate");
300
/* Parent at the path is not in the tree now. */
303
PATH_OFFSET_PBUFFER(chk_path, path_offset)))
305
/* Check whether position in the parent is correct. */
307
PATH_OFFSET_POSITION(chk_path,
311
/* Check whether parent at the path really points to the child. */
312
if (B_N_CHILD_NUM(parent, position) !=
313
PATH_OFFSET_PBUFFER(chk_path,
314
path_offset + 1)->b_blocknr)
316
/* Return delimiting key if position in the parent is not the last one. */
317
if (position != B_NR_ITEMS(parent))
318
return B_N_PDELIM_KEY(parent, position);
320
/* Return MAX_KEY if we are in the root of the buffer tree. */
321
if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
322
b_blocknr == SB_ROOT_BLOCK(sb))
327
/* Check whether a key is contained in the tree rooted from a buffer at a path. */
328
/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
329
the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
330
buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
331
this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
332
static inline int key_in_buffer(struct treepath *chk_path, /* Path which should be checked. */
333
const struct cpu_key *key, /* Key which should be checked. */
334
struct super_block *sb
338
RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
339
|| chk_path->path_length > MAX_HEIGHT,
340
"PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
341
key, chk_path->path_length);
342
RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
343
"PAP-5060: device must not be NODEV");
345
if (comp_keys(get_lkey(chk_path, sb), key) == 1)
346
/* left delimiting key is bigger, that the key we look for */
348
/* if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
349
if (comp_keys(get_rkey(chk_path, sb), key) != 1)
350
/* key must be less than right delimitiing key */
355
int reiserfs_check_path(struct treepath *p)
357
RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
358
"path not properly relsed");
362
/* Drop the reference to each buffer in a path and restore
363
* dirty bits clean when preparing the buffer for the log.
364
* This version should only be called from fix_nodes() */
365
void pathrelse_and_restore(struct super_block *sb,
366
struct treepath *search_path)
368
int path_offset = search_path->path_length;
370
RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
371
"clm-4000: invalid path offset");
373
while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
374
struct buffer_head *bh;
375
bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
376
reiserfs_restore_prepared_buffer(sb, bh);
379
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
382
/* Drop the reference to each buffer in a path */
383
void pathrelse(struct treepath *search_path)
385
int path_offset = search_path->path_length;
387
RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
388
"PAP-5090: invalid path offset");
390
while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
391
brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
393
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
396
static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
398
struct block_head *blkh;
399
struct item_head *ih;
405
blkh = (struct block_head *)buf;
406
if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
407
reiserfs_warning(NULL, "reiserfs-5080",
408
"this should be caught earlier");
412
nr = blkh_nr_item(blkh);
413
if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
414
/* item number is too big or too small */
415
reiserfs_warning(NULL, "reiserfs-5081",
416
"nr_item seems wrong: %z", bh);
419
ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420
used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
421
if (used_space != blocksize - blkh_free_space(blkh)) {
422
/* free space does not match to calculated amount of use space */
423
reiserfs_warning(NULL, "reiserfs-5082",
424
"free space seems wrong: %z", bh);
427
// FIXME: it is_leaf will hit performance too much - we may have
430
/* check tables of item heads */
431
ih = (struct item_head *)(buf + BLKH_SIZE);
432
prev_location = blocksize;
433
for (i = 0; i < nr; i++, ih++) {
434
if (le_ih_k_type(ih) == TYPE_ANY) {
435
reiserfs_warning(NULL, "reiserfs-5083",
436
"wrong item type for item %h",
440
if (ih_location(ih) >= blocksize
441
|| ih_location(ih) < IH_SIZE * nr) {
442
reiserfs_warning(NULL, "reiserfs-5084",
443
"item location seems wrong: %h",
447
if (ih_item_len(ih) < 1
448
|| ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
449
reiserfs_warning(NULL, "reiserfs-5085",
450
"item length seems wrong: %h",
454
if (prev_location - ih_location(ih) != ih_item_len(ih)) {
455
reiserfs_warning(NULL, "reiserfs-5086",
456
"item location seems wrong "
457
"(second one): %h", ih);
460
prev_location = ih_location(ih);
463
// one may imagine much more checks
467
/* returns 1 if buf looks like an internal node, 0 otherwise */
468
static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
470
struct block_head *blkh;
474
blkh = (struct block_head *)buf;
475
nr = blkh_level(blkh);
476
if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
477
/* this level is not possible for internal nodes */
478
reiserfs_warning(NULL, "reiserfs-5087",
479
"this should be caught earlier");
483
nr = blkh_nr_item(blkh);
484
if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
485
/* for internal which is not root we might check min number of keys */
486
reiserfs_warning(NULL, "reiserfs-5088",
487
"number of key seems wrong: %z", bh);
491
used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
492
if (used_space != blocksize - blkh_free_space(blkh)) {
493
reiserfs_warning(NULL, "reiserfs-5089",
494
"free space seems wrong: %z", bh);
497
// one may imagine much more checks
501
// make sure that bh contains formatted node of reiserfs tree of
503
static int is_tree_node(struct buffer_head *bh, int level)
505
if (B_LEVEL(bh) != level) {
506
reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
507
"not match to the expected one %d",
511
if (level == DISK_LEAF_NODE_LEVEL)
512
return is_leaf(bh->b_data, bh->b_size, bh);
514
return is_internal(bh->b_data, bh->b_size, bh);
517
#define SEARCH_BY_KEY_READA 16
520
* The function is NOT SCHEDULE-SAFE!
521
* It might unlock the write lock if we needed to wait for a block
522
* to be read. Note that in this case it won't recover the lock to avoid
523
* high contention resulting from too much lock requests, especially
524
* the caller (search_by_key) will perform other schedule-unsafe
525
* operations just after calling this function.
527
* @return true if we have unlocked
529
static bool search_by_key_reada(struct super_block *s,
530
struct buffer_head **bh,
531
b_blocknr_t *b, int num)
534
bool unlocked = false;
536
for (i = 0; i < num; i++) {
537
bh[i] = sb_getblk(s, b[i]);
540
* We are going to read some blocks on which we
541
* have a reference. It's safe, though we might be
542
* reading blocks concurrently changed if we release
543
* the lock. But it's still fine because we check later
544
* if the tree changed
546
for (j = 0; j < i; j++) {
548
* note, this needs attention if we are getting rid of the BKL
549
* you have to make sure the prepared bit isn't set on this buffer
551
if (!buffer_uptodate(bh[j])) {
553
reiserfs_write_unlock(s);
556
ll_rw_block(READA, 1, bh + j);
563
/**************************************************************************
564
* Algorithm SearchByKey *
565
* look for item in the Disk S+Tree by its key *
566
* Input: sb - super block *
567
* key - pointer to the key to search *
568
* Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
569
* search_path - path from the root to the needed leaf *
570
**************************************************************************/
572
/* This function fills up the path from the root to the leaf as it
573
descends the tree looking for the key. It uses reiserfs_bread to
574
try to find buffers in the cache given their block number. If it
575
does not find them in the cache it reads them from disk. For each
576
node search_by_key finds using reiserfs_bread it then uses
577
bin_search to look through that node. bin_search will find the
578
position of the block_number of the next node if it is looking
579
through an internal node. If it is looking through a leaf node
580
bin_search will find the position of the item which has key either
581
equal to given key, or which is the maximal key less than the given
582
key. search_by_key returns a path that must be checked for the
583
correctness of the top of the path but need not be checked for the
584
correctness of the bottom of the path */
585
/* The function is NOT SCHEDULE-SAFE! */
586
int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to search. */
587
struct treepath *search_path,/* This structure was
588
allocated and initialized
590
function. It is filled up
592
int stop_level /* How far down the tree to search. To
593
stop at leaf level - set to
594
DISK_LEAF_NODE_LEVEL */
597
b_blocknr_t block_number;
599
struct buffer_head *bh;
600
struct path_element *last_element;
601
int node_level, retval;
602
int right_neighbor_of_leaf_node;
604
struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
605
b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
608
#ifdef CONFIG_REISERFS_CHECK
609
int repeat_counter = 0;
612
PROC_INFO_INC(sb, search_by_key);
614
/* As we add each node to a path we increase its count. This means that
615
we must be careful to release all nodes in a path before we either
616
discard the path struct or re-use the path struct, as we do here. */
618
pathrelse(search_path);
620
right_neighbor_of_leaf_node = 0;
622
/* With each iteration of this loop we search through the items in the
623
current node, and calculate the next current node(next path element)
624
for the next iteration of this loop.. */
625
block_number = SB_ROOT_BLOCK(sb);
629
#ifdef CONFIG_REISERFS_CHECK
630
if (!(++repeat_counter % 50000))
631
reiserfs_warning(sb, "PAP-5100",
632
"%s: there were %d iterations of "
633
"while loop looking for key %K",
634
current->comm, repeat_counter,
638
/* prep path to have another element added to it. */
640
PATH_OFFSET_PELEMENT(search_path,
641
++search_path->path_length);
642
fs_gen = get_generation(sb);
644
/* Read the next tree node, and set the last element in the path to
645
have a pointer to it. */
646
if ((bh = last_element->pe_buffer =
647
sb_getblk(sb, block_number))) {
648
bool unlocked = false;
650
if (!buffer_uptodate(bh) && reada_count > 1)
651
/* may unlock the write lock */
652
unlocked = search_by_key_reada(sb, reada_bh,
653
reada_blocks, reada_count);
655
* If we haven't already unlocked the write lock,
656
* then we need to do that here before reading
659
if (!buffer_uptodate(bh) && !unlocked) {
660
reiserfs_write_unlock(sb);
663
ll_rw_block(READ, 1, &bh);
667
reiserfs_write_lock(sb);
668
if (!buffer_uptodate(bh))
672
search_path->path_length--;
673
pathrelse(search_path);
677
if (expected_level == -1)
678
expected_level = SB_TREE_HEIGHT(sb);
681
/* It is possible that schedule occurred. We must check whether the key
682
to search is still in the tree rooted from the current buffer. If
683
not then repeat search from the root. */
684
if (fs_changed(fs_gen, sb) &&
685
(!B_IS_IN_TREE(bh) ||
686
B_LEVEL(bh) != expected_level ||
687
!key_in_buffer(search_path, key, sb))) {
688
PROC_INFO_INC(sb, search_by_key_fs_changed);
689
PROC_INFO_INC(sb, search_by_key_restarted);
691
sbk_restarted[expected_level - 1]);
692
pathrelse(search_path);
694
/* Get the root block number so that we can repeat the search
695
starting from the root. */
696
block_number = SB_ROOT_BLOCK(sb);
698
right_neighbor_of_leaf_node = 0;
700
/* repeat search from the root */
704
/* only check that the key is in the buffer if key is not
705
equal to the MAX_KEY. Latter case is only possible in
706
"finish_unfinished()" processing during mount. */
707
RFALSE(comp_keys(&MAX_KEY, key) &&
708
!key_in_buffer(search_path, key, sb),
709
"PAP-5130: key is not in the buffer");
710
#ifdef CONFIG_REISERFS_CHECK
711
if (REISERFS_SB(sb)->cur_tb) {
712
print_cur_tb("5140");
713
reiserfs_panic(sb, "PAP-5140",
714
"schedule occurred in do_balance!");
718
// make sure, that the node contents look like a node of
720
if (!is_tree_node(bh, expected_level)) {
721
reiserfs_error(sb, "vs-5150",
722
"invalid format found in block %ld. "
723
"Fsck?", bh->b_blocknr);
724
pathrelse(search_path);
728
/* ok, we have acquired next formatted node in the tree */
729
node_level = B_LEVEL(bh);
731
PROC_INFO_BH_STAT(sb, bh, node_level - 1);
733
RFALSE(node_level < stop_level,
734
"vs-5152: tree level (%d) is less than stop level (%d)",
735
node_level, stop_level);
737
retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
740
DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
742
&(last_element->pe_position));
743
if (node_level == stop_level) {
747
/* we are not in the stop level */
748
if (retval == ITEM_FOUND)
749
/* item has been found, so we choose the pointer which is to the right of the found one */
750
last_element->pe_position++;
752
/* if item was not found we choose the position which is to
753
the left of the found item. This requires no code,
754
bin_search did it already. */
756
/* So we have chosen a position in the current node which is
757
an internal node. Now we calculate child block number by
758
position in the node. */
760
B_N_CHILD_NUM(bh, last_element->pe_position);
762
/* if we are going to read leaf nodes, try for read ahead as well */
763
if ((search_path->reada & PATH_READA) &&
764
node_level == DISK_LEAF_NODE_LEVEL + 1) {
765
int pos = last_element->pe_position;
766
int limit = B_NR_ITEMS(bh);
767
struct reiserfs_key *le_key;
769
if (search_path->reada & PATH_READA_BACK)
771
while (reada_count < SEARCH_BY_KEY_READA) {
774
reada_blocks[reada_count++] =
775
B_N_CHILD_NUM(bh, pos);
776
if (search_path->reada & PATH_READA_BACK)
782
* check to make sure we're in the same object
784
le_key = B_N_PDELIM_KEY(bh, pos);
785
if (le32_to_cpu(le_key->k_objectid) !=
786
key->on_disk_key.k_objectid) {
794
/* Form the path to an item and position in this item which contains
795
file byte defined by key. If there is no such item
796
corresponding to the key, we point the path to the item with
797
maximal key less than key, and *pos_in_item is set to one
798
past the last entry/byte in the item. If searching for entry in a
799
directory item, and it is not found, *pos_in_item is set to one
800
entry more than the entry with maximal key which is less than the
803
Note that if there is no entry in this same node which is one more,
804
then we point to an imaginary entry. for direct items, the
805
position is in units of bytes, for indirect items the position is
806
in units of blocknr entries, for directory items the position is in
807
units of directory entries. */
809
/* The function is NOT SCHEDULE-SAFE! */
810
int search_for_position_by_key(struct super_block *sb, /* Pointer to the super block. */
811
const struct cpu_key *p_cpu_key, /* Key to search (cpu variable) */
812
struct treepath *search_path /* Filled up by this function. */
815
struct item_head *p_le_ih; /* pointer to on-disk structure */
817
loff_t item_offset, offset;
818
struct reiserfs_dir_entry de;
821
/* If searching for directory entry. */
822
if (is_direntry_cpu_key(p_cpu_key))
823
return search_by_entry_key(sb, p_cpu_key, search_path,
826
/* If not searching for directory entry. */
828
/* If item is found. */
829
retval = search_item(sb, p_cpu_key, search_path);
830
if (retval == IO_ERROR)
832
if (retval == ITEM_FOUND) {
836
(PATH_PLAST_BUFFER(search_path),
837
PATH_LAST_POSITION(search_path))),
838
"PAP-5165: item length equals zero");
840
pos_in_item(search_path) = 0;
841
return POSITION_FOUND;
844
RFALSE(!PATH_LAST_POSITION(search_path),
845
"PAP-5170: position equals zero");
847
/* Item is not found. Set path to the previous item. */
849
B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
850
--PATH_LAST_POSITION(search_path));
851
blk_size = sb->s_blocksize;
853
if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
854
return FILE_NOT_FOUND;
856
// FIXME: quite ugly this far
858
item_offset = le_ih_k_offset(p_le_ih);
859
offset = cpu_key_k_offset(p_cpu_key);
861
/* Needed byte is contained in the item pointed to by the path. */
862
if (item_offset <= offset &&
863
item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
864
pos_in_item(search_path) = offset - item_offset;
865
if (is_indirect_le_ih(p_le_ih)) {
866
pos_in_item(search_path) /= blk_size;
868
return POSITION_FOUND;
871
/* Needed byte is not contained in the item pointed to by the
872
path. Set pos_in_item out of the item. */
873
if (is_indirect_le_ih(p_le_ih))
874
pos_in_item(search_path) =
875
ih_item_len(p_le_ih) / UNFM_P_SIZE;
877
pos_in_item(search_path) = ih_item_len(p_le_ih);
879
return POSITION_NOT_FOUND;
882
/* Compare given item and item pointed to by the path. */
883
int comp_items(const struct item_head *stored_ih, const struct treepath *path)
885
struct buffer_head *bh = PATH_PLAST_BUFFER(path);
886
struct item_head *ih;
888
/* Last buffer at the path is not in the tree. */
889
if (!B_IS_IN_TREE(bh))
892
/* Last path position is invalid. */
893
if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
896
/* we need only to know, whether it is the same item */
898
return memcmp(stored_ih, ih, IH_SIZE);
901
/* unformatted nodes are not logged anymore, ever. This is safe
904
#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
906
// block can not be forgotten as it is in I/O or held by someone
907
#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
909
// prepare for delete or cut of direct item
910
static inline int prepare_for_direct_item(struct treepath *path,
911
struct item_head *le_ih,
913
loff_t new_file_length, int *cut_size)
917
if (new_file_length == max_reiserfs_offset(inode)) {
918
/* item has to be deleted */
919
*cut_size = -(IH_SIZE + ih_item_len(le_ih));
922
// new file gets truncated
923
if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
925
round_len = ROUND_UP(new_file_length);
926
/* this was new_file_length < le_ih ... */
927
if (round_len < le_ih_k_offset(le_ih)) {
928
*cut_size = -(IH_SIZE + ih_item_len(le_ih));
929
return M_DELETE; /* Delete this item. */
931
/* Calculate first position and size for cutting from item. */
932
pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
933
*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
935
return M_CUT; /* Cut from this item. */
938
// old file: items may have any length
940
if (new_file_length < le_ih_k_offset(le_ih)) {
941
*cut_size = -(IH_SIZE + ih_item_len(le_ih));
942
return M_DELETE; /* Delete this item. */
944
/* Calculate first position and size for cutting from item. */
945
*cut_size = -(ih_item_len(le_ih) -
947
new_file_length + 1 - le_ih_k_offset(le_ih)));
948
return M_CUT; /* Cut from this item. */
951
static inline int prepare_for_direntry_item(struct treepath *path,
952
struct item_head *le_ih,
954
loff_t new_file_length,
957
if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
958
new_file_length == max_reiserfs_offset(inode)) {
959
RFALSE(ih_entry_count(le_ih) != 2,
960
"PAP-5220: incorrect empty directory item (%h)", le_ih);
961
*cut_size = -(IH_SIZE + ih_item_len(le_ih));
962
return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
965
if (ih_entry_count(le_ih) == 1) {
966
/* Delete the directory item such as there is one record only
968
*cut_size = -(IH_SIZE + ih_item_len(le_ih));
972
/* Cut one record from the directory item. */
975
entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
979
#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
981
/* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
982
If the path points to an indirect item, remove some number of its unformatted nodes.
983
In case of file truncate calculate whether this item must be deleted/truncated or last
984
unformatted node of this item will be converted to a direct item.
985
This function returns a determination of what balance mode the calling function should employ. */
986
static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed, /* Number of unformatted nodes which were removed
987
from end of the file. */
988
int *cut_size, unsigned long long new_file_length /* MAX_KEY_OFFSET in case of delete. */
991
struct super_block *sb = inode->i_sb;
992
struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
993
struct buffer_head *bh = PATH_PLAST_BUFFER(path);
995
BUG_ON(!th->t_trans_id);
997
/* Stat_data item. */
998
if (is_statdata_le_ih(p_le_ih)) {
1000
RFALSE(new_file_length != max_reiserfs_offset(inode),
1001
"PAP-5210: mode must be M_DELETE");
1003
*cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1007
/* Directory item. */
1008
if (is_direntry_le_ih(p_le_ih))
1009
return prepare_for_direntry_item(path, p_le_ih, inode,
1014
if (is_direct_le_ih(p_le_ih))
1015
return prepare_for_direct_item(path, p_le_ih, inode,
1016
new_file_length, cut_size);
1018
/* Case of an indirect item. */
1020
int blk_size = sb->s_blocksize;
1021
struct item_head s_ih;
1027
if ( new_file_length == max_reiserfs_offset (inode) ) {
1028
/* prepare_for_delete_or_cut() is called by
1029
* reiserfs_delete_item() */
1030
new_file_length = 0;
1037
bh = PATH_PLAST_BUFFER(path);
1038
copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1039
pos = I_UNFM_NUM(&s_ih);
1041
while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1045
/* Each unformatted block deletion may involve one additional
1046
* bitmap block into the transaction, thereby the initial
1047
* journal space reservation might not be enough. */
1048
if (!delete && (*cut_size) != 0 &&
1049
reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1052
unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
1053
block = get_block_num(unfm, 0);
1056
reiserfs_prepare_for_journal(sb, bh, 1);
1057
put_block_num(unfm, 0, 0);
1058
journal_mark_dirty(th, sb, bh);
1059
reiserfs_free_block(th, inode, block, 1);
1062
reiserfs_write_unlock(sb);
1064
reiserfs_write_lock(sb);
1066
if (item_moved (&s_ih, path)) {
1073
(*cut_size) -= UNFM_P_SIZE;
1076
(*cut_size) -= IH_SIZE;
1081
/* a trick. If the buffer has been logged, this will do nothing. If
1082
** we've broken the loop without logging it, it will restore the
1084
reiserfs_restore_prepared_buffer(sb, bh);
1085
} while (need_re_search &&
1086
search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1087
pos_in_item(path) = pos * UNFM_P_SIZE;
1089
if (*cut_size == 0) {
1090
/* Nothing were cut. maybe convert last unformatted node to the
1098
/* Calculate number of bytes which will be deleted or cut during balance */
1099
static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1102
struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
1104
if (is_statdata_le_ih(p_le_ih))
1109
M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1110
if (is_direntry_le_ih(p_le_ih)) {
1111
/* return EMPTY_DIR_SIZE; We delete emty directoris only.
1112
* we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1113
* empty size. ick. FIXME, is this right? */
1117
if (is_indirect_le_ih(p_le_ih))
1118
del_size = (del_size / UNFM_P_SIZE) *
1119
(PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1123
static void init_tb_struct(struct reiserfs_transaction_handle *th,
1124
struct tree_balance *tb,
1125
struct super_block *sb,
1126
struct treepath *path, int size)
1129
BUG_ON(!th->t_trans_id);
1131
memset(tb, '\0', sizeof(struct tree_balance));
1132
tb->transaction_handle = th;
1135
PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1136
PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1137
tb->insert_size[0] = size;
1140
void padd_item(char *item, int total_length, int length)
1144
for (i = total_length; i > length;)
1148
#ifdef REISERQUOTA_DEBUG
1149
char key2type(struct reiserfs_key *ih)
1151
if (is_direntry_le_key(2, ih))
1153
if (is_direct_le_key(2, ih))
1155
if (is_indirect_le_key(2, ih))
1157
if (is_statdata_le_key(2, ih))
1162
char head2type(struct item_head *ih)
1164
if (is_direntry_le_ih(ih))
1166
if (is_direct_le_ih(ih))
1168
if (is_indirect_le_ih(ih))
1170
if (is_statdata_le_ih(ih))
1176
/* Delete object item.
1177
* th - active transaction handle
1178
* path - path to the deleted item
1179
* item_key - key to search for the deleted item
1180
* indode - used for updating i_blocks and quotas
1181
* un_bh - NULL or unformatted node pointer
1183
int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1184
struct treepath *path, const struct cpu_key *item_key,
1185
struct inode *inode, struct buffer_head *un_bh)
1187
struct super_block *sb = inode->i_sb;
1188
struct tree_balance s_del_balance;
1189
struct item_head s_ih;
1190
struct item_head *q_ih;
1191
int quota_cut_bytes;
1192
int ret_value, del_size, removed;
1194
#ifdef CONFIG_REISERFS_CHECK
1199
BUG_ON(!th->t_trans_id);
1201
init_tb_struct(th, &s_del_balance, sb, path,
1202
0 /*size is unknown */ );
1207
#ifdef CONFIG_REISERFS_CHECK
1211
prepare_for_delete_or_cut(th, inode, path,
1214
max_reiserfs_offset(inode));
1216
RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1218
copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1219
s_del_balance.insert_size[0] = del_size;
1221
ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1222
if (ret_value != REPEAT_SEARCH)
1225
PROC_INFO_INC(sb, delete_item_restarted);
1227
// file system changed, repeat search
1229
search_for_position_by_key(sb, item_key, path);
1230
if (ret_value == IO_ERROR)
1232
if (ret_value == FILE_NOT_FOUND) {
1233
reiserfs_warning(sb, "vs-5340",
1234
"no items of the file %K found",
1240
if (ret_value != CARRY_ON) {
1241
unfix_nodes(&s_del_balance);
1244
// reiserfs_delete_item returns item length when success
1245
ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1246
q_ih = get_ih(path);
1247
quota_cut_bytes = ih_item_len(q_ih);
1249
/* hack so the quota code doesn't have to guess if the file
1250
** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1251
** We test the offset because the tail might have been
1252
** split into multiple items, and we only want to decrement for
1253
** the unfm node once
1255
if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1256
if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1257
quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1259
quota_cut_bytes = 0;
1267
/* We are in direct2indirect conversion, so move tail contents
1268
to the unformatted node */
1269
/* note, we do the copy before preparing the buffer because we
1270
** don't care about the contents of the unformatted node yet.
1271
** the only thing we really care about is the direct item's data
1272
** is in the unformatted node.
1274
** Otherwise, we would have to call reiserfs_prepare_for_journal on
1275
** the unformatted node, which might schedule, meaning we'd have to
1276
** loop all the way back up to the start of the while loop.
1278
** The unformatted node must be dirtied later on. We can't be
1279
** sure here if the entire tail has been deleted yet.
1281
** un_bh is from the page cache (all unformatted nodes are
1282
** from the page cache) and might be a highmem page. So, we
1283
** can't use un_bh->b_data.
1287
data = kmap_atomic(un_bh->b_page, KM_USER0);
1288
off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1290
B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1292
kunmap_atomic(data, KM_USER0);
1294
/* Perform balancing after all resources have been collected at once. */
1295
do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1297
#ifdef REISERQUOTA_DEBUG
1298
reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1299
"reiserquota delete_item(): freeing %u, id=%u type=%c",
1300
quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1302
dquot_free_space_nodirty(inode, quota_cut_bytes);
1304
/* Return deleted body length */
1308
/* Summary Of Mechanisms For Handling Collisions Between Processes:
1310
deletion of the body of the object is performed by iput(), with the
1311
result that if multiple processes are operating on a file, the
1312
deletion of the body of the file is deferred until the last process
1313
that has an open inode performs its iput().
1315
writes and truncates are protected from collisions by use of
1318
creates, linking, and mknod are protected from collisions with other
1319
processes by making the reiserfs_add_entry() the last step in the
1320
creation, and then rolling back all changes if there was a collision.
1324
/* this deletes item which never gets split */
1325
void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1326
struct inode *inode, struct reiserfs_key *key)
1328
struct tree_balance tb;
1329
INITIALIZE_PATH(path);
1332
struct cpu_key cpu_key;
1334
int quota_cut_bytes = 0;
1336
BUG_ON(!th->t_trans_id);
1338
le_key2cpu_key(&cpu_key, key);
1341
retval = search_item(th->t_super, &cpu_key, &path);
1342
if (retval == IO_ERROR) {
1343
reiserfs_error(th->t_super, "vs-5350",
1344
"i/o failure occurred trying "
1345
"to delete %K", &cpu_key);
1348
if (retval != ITEM_FOUND) {
1350
// No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1352
((unsigned long long)
1353
GET_HASH_VALUE(le_key_k_offset
1354
(le_key_version(key), key)) == 0
1355
&& (unsigned long long)
1356
GET_GENERATION_NUMBER(le_key_k_offset
1357
(le_key_version(key),
1359
reiserfs_warning(th->t_super, "vs-5355",
1360
"%k not found", key);
1365
item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1366
init_tb_struct(th, &tb, th->t_super, &path,
1367
-(IH_SIZE + item_len));
1369
quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1371
retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1372
if (retval == REPEAT_SEARCH) {
1373
PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1377
if (retval == CARRY_ON) {
1378
do_balance(&tb, NULL, NULL, M_DELETE);
1379
if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1380
#ifdef REISERQUOTA_DEBUG
1381
reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1382
"reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1383
quota_cut_bytes, inode->i_uid,
1386
dquot_free_space_nodirty(inode,
1391
// IO_ERROR, NO_DISK_SPACE, etc
1392
reiserfs_warning(th->t_super, "vs-5360",
1393
"could not delete %K due to fix_nodes failure",
1399
reiserfs_check_path(&path);
1402
int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1403
struct inode *inode)
1407
BUG_ON(!th->t_trans_id);
1409
/* for directory this deletes item containing "." and ".." */
1411
reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1415
#if defined( USE_INODE_GENERATION_COUNTER )
1416
if (!old_format_only(th->t_super)) {
1417
__le32 *inode_generation;
1420
&REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1421
le32_add_cpu(inode_generation, 1);
1423
/* USE_INODE_GENERATION_COUNTER */
1425
reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1430
static void unmap_buffers(struct page *page, loff_t pos)
1432
struct buffer_head *bh;
1433
struct buffer_head *head;
1434
struct buffer_head *next;
1435
unsigned long tail_index;
1436
unsigned long cur_index;
1439
if (page_has_buffers(page)) {
1440
tail_index = pos & (PAGE_CACHE_SIZE - 1);
1442
head = page_buffers(page);
1445
next = bh->b_this_page;
1447
/* we want to unmap the buffers that contain the tail, and
1448
** all the buffers after it (since the tail must be at the
1449
** end of the file). We don't want to unmap file data
1450
** before the tail, since it might be dirty and waiting to
1453
cur_index += bh->b_size;
1454
if (cur_index > tail_index) {
1455
reiserfs_unmap_buffer(bh);
1458
} while (bh != head);
1463
static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1464
struct inode *inode,
1466
struct treepath *path,
1467
const struct cpu_key *item_key,
1468
loff_t new_file_size, char *mode)
1470
struct super_block *sb = inode->i_sb;
1471
int block_size = sb->s_blocksize;
1473
BUG_ON(!th->t_trans_id);
1474
BUG_ON(new_file_size != inode->i_size);
1476
/* the page being sent in could be NULL if there was an i/o error
1477
** reading in the last block. The user will hit problems trying to
1478
** read the file, but for now we just skip the indirect2direct
1480
if (atomic_read(&inode->i_count) > 1 ||
1481
!tail_has_to_be_packed(inode) ||
1482
!page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1483
/* leave tail in an unformatted node */
1484
*mode = M_SKIP_BALANCING;
1486
block_size - (new_file_size & (block_size - 1));
1490
/* Perform the conversion to a direct_item. */
1491
/* return indirect_to_direct(inode, path, item_key,
1492
new_file_size, mode); */
1493
return indirect2direct(th, inode, page, path, item_key,
1494
new_file_size, mode);
1497
/* we did indirect_to_direct conversion. And we have inserted direct
1498
item successesfully, but there were no disk space to cut unfm
1499
pointer being converted. Therefore we have to delete inserted
1501
static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1502
struct inode *inode, struct treepath *path)
1504
struct cpu_key tail_key;
1507
BUG_ON(!th->t_trans_id);
1509
make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4); // !!!!
1510
tail_key.key_length = 4;
1513
(cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1515
/* look for the last byte of the tail */
1516
if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1518
reiserfs_panic(inode->i_sb, "vs-5615",
1519
"found invalid item");
1520
RFALSE(path->pos_in_item !=
1521
ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1522
"vs-5616: appended bytes found");
1523
PATH_LAST_POSITION(path)--;
1526
reiserfs_delete_item(th, path, &tail_key, inode,
1527
NULL /*unbh not needed */ );
1529
|| removed > tail_len,
1530
"vs-5617: there was tail %d bytes, removed item length %d bytes",
1532
tail_len -= removed;
1533
set_cpu_key_k_offset(&tail_key,
1534
cpu_key_k_offset(&tail_key) - removed);
1536
reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1537
"conversion has been rolled back due to "
1538
"lack of disk space");
1539
//mark_file_without_tail (inode);
1540
mark_inode_dirty(inode);
1543
/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1544
int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1545
struct treepath *path,
1546
struct cpu_key *item_key,
1547
struct inode *inode,
1548
struct page *page, loff_t new_file_size)
1550
struct super_block *sb = inode->i_sb;
1551
/* Every function which is going to call do_balance must first
1552
create a tree_balance structure. Then it must fill up this
1553
structure by using the init_tb_struct and fix_nodes functions.
1554
After that we can make tree balancing. */
1555
struct tree_balance s_cut_balance;
1556
struct item_head *p_le_ih;
1557
int cut_size = 0, /* Amount to be cut. */
1558
ret_value = CARRY_ON, removed = 0, /* Number of the removed unformatted nodes. */
1559
is_inode_locked = 0;
1560
char mode; /* Mode of the balance. */
1562
int quota_cut_bytes;
1563
loff_t tail_pos = 0;
1565
BUG_ON(!th->t_trans_id);
1567
init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1570
/* Repeat this loop until we either cut the item without needing
1571
to balance, or we fix_nodes without schedule occurring */
1573
/* Determine the balance mode, position of the first byte to
1574
be cut, and size to be cut. In case of the indirect item
1575
free unformatted nodes which are pointed to by the cut
1579
prepare_for_delete_or_cut(th, inode, path,
1581
&cut_size, new_file_size);
1582
if (mode == M_CONVERT) {
1583
/* convert last unformatted node to direct item or leave
1584
tail in the unformatted node */
1585
RFALSE(ret_value != CARRY_ON,
1586
"PAP-5570: can not convert twice");
1589
maybe_indirect_to_direct(th, inode, page,
1591
new_file_size, &mode);
1592
if (mode == M_SKIP_BALANCING)
1593
/* tail has been left in the unformatted node */
1596
is_inode_locked = 1;
1598
/* removing of last unformatted node will change value we
1599
have to return to truncate. Save it */
1600
retval2 = ret_value;
1601
/*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1603
/* So, we have performed the first part of the conversion:
1604
inserting the new direct item. Now we are removing the
1605
last unformatted node pointer. Set key to search for
1607
set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1608
item_key->key_length = 4;
1610
(new_file_size & (sb->s_blocksize - 1));
1611
tail_pos = new_file_size;
1612
set_cpu_key_k_offset(item_key, new_file_size + 1);
1613
if (search_for_position_by_key
1615
path) == POSITION_NOT_FOUND) {
1616
print_block(PATH_PLAST_BUFFER(path), 3,
1617
PATH_LAST_POSITION(path) - 1,
1618
PATH_LAST_POSITION(path) + 1);
1619
reiserfs_panic(sb, "PAP-5580", "item to "
1620
"convert does not exist (%K)",
1625
if (cut_size == 0) {
1630
s_cut_balance.insert_size[0] = cut_size;
1632
ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1633
if (ret_value != REPEAT_SEARCH)
1636
PROC_INFO_INC(sb, cut_from_item_restarted);
1639
search_for_position_by_key(sb, item_key, path);
1640
if (ret_value == POSITION_FOUND)
1643
reiserfs_warning(sb, "PAP-5610", "item %K not found",
1645
unfix_nodes(&s_cut_balance);
1646
return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1649
// check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1650
if (ret_value != CARRY_ON) {
1651
if (is_inode_locked) {
1652
// FIXME: this seems to be not needed: we are always able
1654
indirect_to_direct_roll_back(th, inode, path);
1656
if (ret_value == NO_DISK_SPACE)
1657
reiserfs_warning(sb, "reiserfs-5092",
1659
unfix_nodes(&s_cut_balance);
1663
/* go ahead and perform balancing */
1665
RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1667
/* Calculate number of bytes that need to be cut from the item. */
1670
M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
1673
ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1675
ret_value = retval2;
1677
/* For direct items, we only change the quota when deleting the last
1680
p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1681
if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1682
if (mode == M_DELETE &&
1683
(le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1685
// FIXME: this is to keep 3.5 happy
1686
REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1687
quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1689
quota_cut_bytes = 0;
1692
#ifdef CONFIG_REISERFS_CHECK
1693
if (is_inode_locked) {
1694
struct item_head *le_ih =
1695
PATH_PITEM_HEAD(s_cut_balance.tb_path);
1696
/* we are going to complete indirect2direct conversion. Make
1697
sure, that we exactly remove last unformatted node pointer
1699
if (!is_indirect_le_ih(le_ih))
1700
reiserfs_panic(sb, "vs-5652",
1701
"item must be indirect %h", le_ih);
1703
if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1704
reiserfs_panic(sb, "vs-5653", "completing "
1705
"indirect2direct conversion indirect "
1706
"item %h being deleted must be of "
1707
"4 byte long", le_ih);
1710
&& s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1711
reiserfs_panic(sb, "vs-5654", "can not complete "
1712
"indirect2direct conversion of %h "
1713
"(CUT, insert_size==%d)",
1714
le_ih, s_cut_balance.insert_size[0]);
1716
/* it would be useful to make sure, that right neighboring
1717
item is direct item of this file */
1721
do_balance(&s_cut_balance, NULL, NULL, mode);
1722
if (is_inode_locked) {
1723
/* we've done an indirect->direct conversion. when the data block
1724
** was freed, it was removed from the list of blocks that must
1725
** be flushed before the transaction commits, make sure to
1726
** unmap and invalidate it
1728
unmap_buffers(page, tail_pos);
1729
REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1731
#ifdef REISERQUOTA_DEBUG
1732
reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1733
"reiserquota cut_from_item(): freeing %u id=%u type=%c",
1734
quota_cut_bytes, inode->i_uid, '?');
1736
dquot_free_space_nodirty(inode, quota_cut_bytes);
1740
static void truncate_directory(struct reiserfs_transaction_handle *th,
1741
struct inode *inode)
1743
BUG_ON(!th->t_trans_id);
1745
reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1747
set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1748
set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1749
reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1750
reiserfs_update_sd(th, inode);
1751
set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1752
set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1755
/* Truncate file to the new size. Note, this must be called with a transaction
1757
int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1758
struct inode *inode, /* ->i_size contains new size */
1759
struct page *page, /* up to date for last block */
1760
int update_timestamps /* when it is called by
1761
file_release to convert
1762
the tail - no timestamps
1763
should be updated */
1766
INITIALIZE_PATH(s_search_path); /* Path to the current object item. */
1767
struct item_head *p_le_ih; /* Pointer to an item header. */
1768
struct cpu_key s_item_key; /* Key to search for a previous file item. */
1769
loff_t file_size, /* Old file size. */
1770
new_file_size; /* New file size. */
1771
int deleted; /* Number of deleted or truncated bytes. */
1775
BUG_ON(!th->t_trans_id);
1777
(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1778
|| S_ISLNK(inode->i_mode)))
1781
if (S_ISDIR(inode->i_mode)) {
1782
// deletion of directory - no need to update timestamps
1783
truncate_directory(th, inode);
1787
/* Get new file size. */
1788
new_file_size = inode->i_size;
1790
// FIXME: note, that key type is unimportant here
1791
make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1795
search_for_position_by_key(inode->i_sb, &s_item_key,
1797
if (retval == IO_ERROR) {
1798
reiserfs_error(inode->i_sb, "vs-5657",
1799
"i/o failure occurred trying to truncate %K",
1804
if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1805
reiserfs_error(inode->i_sb, "PAP-5660",
1806
"wrong result %d of search for %K", retval,
1813
s_search_path.pos_in_item--;
1815
/* Get real file size (total length of all file items) */
1816
p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1817
if (is_statdata_le_ih(p_le_ih))
1820
loff_t offset = le_ih_k_offset(p_le_ih);
1822
op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1824
/* this may mismatch with real file size: if last direct item
1825
had no padding zeros and last unformatted node had no free
1826
space, this file would have this file size */
1827
file_size = offset + bytes - 1;
1830
* are we doing a full truncate or delete, if so
1831
* kick in the reada code
1833
if (new_file_size == 0)
1834
s_search_path.reada = PATH_READA | PATH_READA_BACK;
1836
if (file_size == 0 || file_size < new_file_size) {
1837
goto update_and_out;
1840
/* Update key to search for the last file item. */
1841
set_cpu_key_k_offset(&s_item_key, file_size);
1844
/* Cut or delete file item. */
1846
reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1847
inode, page, new_file_size);
1849
reiserfs_warning(inode->i_sb, "vs-5665",
1850
"reiserfs_cut_from_item failed");
1851
reiserfs_check_path(&s_search_path);
1855
RFALSE(deleted > file_size,
1856
"PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1857
deleted, file_size, &s_item_key);
1859
/* Change key to search the last file item. */
1860
file_size -= deleted;
1862
set_cpu_key_k_offset(&s_item_key, file_size);
1864
/* While there are bytes to truncate and previous file item is presented in the tree. */
1867
** This loop could take a really long time, and could log
1868
** many more blocks than a transaction can hold. So, we do a polite
1869
** journal end here, and if the transaction needs ending, we make
1870
** sure the file is consistent before ending the current trans
1871
** and starting a new one
1873
if (journal_transaction_should_end(th, 0) ||
1874
reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1875
int orig_len_alloc = th->t_blocks_allocated;
1876
pathrelse(&s_search_path);
1878
if (update_timestamps) {
1879
inode->i_mtime = CURRENT_TIME_SEC;
1880
inode->i_ctime = CURRENT_TIME_SEC;
1882
reiserfs_update_sd(th, inode);
1884
err = journal_end(th, inode->i_sb, orig_len_alloc);
1887
err = journal_begin(th, inode->i_sb,
1888
JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1891
reiserfs_update_inode_transaction(inode);
1893
} while (file_size > ROUND_UP(new_file_size) &&
1894
search_for_position_by_key(inode->i_sb, &s_item_key,
1895
&s_search_path) == POSITION_FOUND);
1897
RFALSE(file_size > ROUND_UP(new_file_size),
1898
"PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1899
new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1902
if (update_timestamps) {
1903
// this is truncate, not file closing
1904
inode->i_mtime = CURRENT_TIME_SEC;
1905
inode->i_ctime = CURRENT_TIME_SEC;
1907
reiserfs_update_sd(th, inode);
1910
pathrelse(&s_search_path);
1914
#ifdef CONFIG_REISERFS_CHECK
1915
// this makes sure, that we __append__, not overwrite or add holes
1916
static void check_research_for_paste(struct treepath *path,
1917
const struct cpu_key *key)
1919
struct item_head *found_ih = get_ih(path);
1921
if (is_direct_le_ih(found_ih)) {
1922
if (le_ih_k_offset(found_ih) +
1923
op_bytes_number(found_ih,
1924
get_last_bh(path)->b_size) !=
1925
cpu_key_k_offset(key)
1926
|| op_bytes_number(found_ih,
1927
get_last_bh(path)->b_size) !=
1929
reiserfs_panic(NULL, "PAP-5720", "found direct item "
1930
"%h or position (%d) does not match "
1931
"to key %K", found_ih,
1932
pos_in_item(path), key);
1934
if (is_indirect_le_ih(found_ih)) {
1935
if (le_ih_k_offset(found_ih) +
1936
op_bytes_number(found_ih,
1937
get_last_bh(path)->b_size) !=
1938
cpu_key_k_offset(key)
1939
|| I_UNFM_NUM(found_ih) != pos_in_item(path)
1940
|| get_ih_free_space(found_ih) != 0)
1941
reiserfs_panic(NULL, "PAP-5730", "found indirect "
1942
"item (%h) or position (%d) does not "
1943
"match to key (%K)",
1944
found_ih, pos_in_item(path), key);
1947
#endif /* config reiserfs check */
1949
/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1950
int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path, /* Path to the pasted item. */
1951
const struct cpu_key *key, /* Key to search for the needed item. */
1952
struct inode *inode, /* Inode item belongs to */
1953
const char *body, /* Pointer to the bytes to paste. */
1955
{ /* Size of pasted bytes. */
1956
struct tree_balance s_paste_balance;
1960
BUG_ON(!th->t_trans_id);
1962
fs_gen = get_generation(inode->i_sb);
1964
#ifdef REISERQUOTA_DEBUG
1965
reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1966
"reiserquota paste_into_item(): allocating %u id=%u type=%c",
1967
pasted_size, inode->i_uid,
1968
key2type(&(key->on_disk_key)));
1971
retval = dquot_alloc_space_nodirty(inode, pasted_size);
1973
pathrelse(search_path);
1976
init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1978
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1979
s_paste_balance.key = key->on_disk_key;
1982
/* DQUOT_* can schedule, must check before the fix_nodes */
1983
if (fs_changed(fs_gen, inode->i_sb)) {
1988
fix_nodes(M_PASTE, &s_paste_balance, NULL,
1989
body)) == REPEAT_SEARCH) {
1991
/* file system changed while we were in the fix_nodes */
1992
PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1994
search_for_position_by_key(th->t_super, key,
1996
if (retval == IO_ERROR) {
2000
if (retval == POSITION_FOUND) {
2001
reiserfs_warning(inode->i_sb, "PAP-5710",
2002
"entry or pasted byte (%K) exists",
2007
#ifdef CONFIG_REISERFS_CHECK
2008
check_research_for_paste(search_path, key);
2012
/* Perform balancing after all resources are collected by fix_nodes, and
2013
accessing them will not risk triggering schedule. */
2014
if (retval == CARRY_ON) {
2015
do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2018
retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2020
/* this also releases the path */
2021
unfix_nodes(&s_paste_balance);
2022
#ifdef REISERQUOTA_DEBUG
2023
reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2024
"reiserquota paste_into_item(): freeing %u id=%u type=%c",
2025
pasted_size, inode->i_uid,
2026
key2type(&(key->on_disk_key)));
2028
dquot_free_space_nodirty(inode, pasted_size);
2032
/* Insert new item into the buffer at the path.
2033
* th - active transaction handle
2034
* path - path to the inserted item
2035
* ih - pointer to the item header to insert
2036
* body - pointer to the bytes to insert
2038
int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2039
struct treepath *path, const struct cpu_key *key,
2040
struct item_head *ih, struct inode *inode,
2043
struct tree_balance s_ins_balance;
2046
int quota_bytes = 0;
2048
BUG_ON(!th->t_trans_id);
2050
if (inode) { /* Do we count quotas for item? */
2051
fs_gen = get_generation(inode->i_sb);
2052
quota_bytes = ih_item_len(ih);
2054
/* hack so the quota code doesn't have to guess if the file has
2055
** a tail, links are always tails, so there's no guessing needed
2057
if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2058
quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2059
#ifdef REISERQUOTA_DEBUG
2060
reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2061
"reiserquota insert_item(): allocating %u id=%u type=%c",
2062
quota_bytes, inode->i_uid, head2type(ih));
2064
/* We can't dirty inode here. It would be immediately written but
2065
* appropriate stat item isn't inserted yet... */
2066
retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2072
init_tb_struct(th, &s_ins_balance, th->t_super, path,
2073
IH_SIZE + ih_item_len(ih));
2074
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2075
s_ins_balance.key = key->on_disk_key;
2077
/* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2078
if (inode && fs_changed(fs_gen, inode->i_sb)) {
2083
fix_nodes(M_INSERT, &s_ins_balance, ih,
2084
body)) == REPEAT_SEARCH) {
2086
/* file system changed while we were in the fix_nodes */
2087
PROC_INFO_INC(th->t_super, insert_item_restarted);
2088
retval = search_item(th->t_super, key, path);
2089
if (retval == IO_ERROR) {
2093
if (retval == ITEM_FOUND) {
2094
reiserfs_warning(th->t_super, "PAP-5760",
2095
"key %K already exists in the tree",
2102
/* make balancing after all resources will be collected at a time */
2103
if (retval == CARRY_ON) {
2104
do_balance(&s_ins_balance, ih, body, M_INSERT);
2108
retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2110
/* also releases the path */
2111
unfix_nodes(&s_ins_balance);
2112
#ifdef REISERQUOTA_DEBUG
2113
reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2114
"reiserquota insert_item(): freeing %u id=%u type=%c",
2115
quota_bytes, inode->i_uid, head2type(ih));
2118
dquot_free_space_nodirty(inode, quota_bytes);