43
39
#include "xfs_inode_item.h"
44
40
#include "xfs_bmap.h"
45
41
#include "xfs_btree_trace.h"
46
#include "xfs_dir2_trace.h"
42
#include "xfs_trace.h"
46
* Define xfs inode iolock lockdep classes. We need to ensure that all active
47
* inodes are considered the same for lockdep purposes, including inodes that
48
* are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
49
* guarantee the locks are considered the same when there are multiple lock
50
* initialisation siteѕ. Also, define a reclaimable inode class so it is
51
* obvious in lockdep reports which class the report is against.
53
static struct lock_class_key xfs_iolock_active;
54
struct lock_class_key xfs_iolock_reclaimable;
50
57
* Allocate and initialise an xfs_inode.
88
100
ip->i_new_size = 0;
91
* Initialize inode's trace buffers.
93
#ifdef XFS_INODE_TRACE
94
ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS);
97
ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS);
99
#ifdef XFS_BTREE_TRACE
100
ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS);
103
ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS);
105
#ifdef XFS_ILOCK_TRACE
106
ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS);
108
#ifdef XFS_DIR2_TRACE
109
ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
112
/* prevent anyone from using this yet */
113
VFS_I(ip)->i_state = I_NEW|I_LOCK;
106
xfs_inode_free_callback(
107
struct rcu_head *head)
109
struct inode *inode = container_of(head, struct inode, i_rcu);
110
struct xfs_inode *ip = XFS_I(inode);
112
INIT_LIST_HEAD(&inode->i_dentry);
113
kmem_zone_free(xfs_inode_zone, ip);
120
118
struct xfs_inode *ip)
177
156
ASSERT(!spin_is_locked(&ip->i_flags_lock));
178
157
ASSERT(completion_done(&ip->i_flush));
180
kmem_zone_free(xfs_inode_zone, ip);
160
* Because we use RCU freeing we need to ensure the inode always
161
* appears to be reclaimed with an invalid inode number when in the
162
* free state. The ip->i_flags_lock provides the barrier against lookup
165
spin_lock(&ip->i_flags_lock);
166
ip->i_flags = XFS_IRECLAIM;
168
spin_unlock(&ip->i_flags_lock);
170
call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
187
177
xfs_iget_cache_hit(
188
178
struct xfs_perag *pag,
189
179
struct xfs_inode *ip,
191
int lock_flags) __releases(pag->pag_ici_lock)
182
int lock_flags) __releases(RCU)
193
184
struct inode *inode = VFS_I(ip);
194
185
struct xfs_mount *mp = ip->i_mount;
189
* check for re-use of an inode within an RCU grace period due to the
190
* radix tree nodes not being updated yet. We monitor for this by
191
* setting the inode number to zero before freeing the inode structure.
192
* If the inode has been reallocated and set up, then the inode number
193
* will not match, so check for that, too.
197
195
spin_lock(&ip->i_flags_lock);
196
if (ip->i_ino != ino) {
197
trace_xfs_iget_skip(ip);
198
XFS_STATS_INC(xs_ig_frecycle);
200
205
* If we are racing with another cache hit that is currently
225
231
* Need to carefully get it back into useable state.
227
233
if (ip->i_flags & XFS_IRECLAIMABLE) {
228
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
234
trace_xfs_iget_reclaim(ip);
231
* We need to set XFS_INEW atomically with clearing the
232
* reclaimable tag so that we do have an indicator of the
233
* inode still being initialized.
237
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
238
* from stomping over us while we recycle the inode. We can't
239
* clear the radix tree reclaimable tag yet as it requires
240
* pag_ici_lock to be held exclusive.
235
ip->i_flags |= XFS_INEW;
236
ip->i_flags &= ~XFS_IRECLAIMABLE;
237
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
242
ip->i_flags |= XFS_IRECLAIM;
239
244
spin_unlock(&ip->i_flags_lock);
240
read_unlock(&pag->pag_ici_lock);
242
247
error = -inode_init_always(mp->m_super, inode);
245
250
* Re-initializing the inode failed, and we are in deep
246
251
* trouble. Try to re-add it to the reclaim list.
248
read_lock(&pag->pag_ici_lock);
249
254
spin_lock(&ip->i_flags_lock);
251
256
ip->i_flags &= ~XFS_INEW;
252
257
ip->i_flags |= XFS_IRECLAIMABLE;
253
258
__xfs_inode_set_reclaim_tag(pag, ip);
259
trace_xfs_iget_reclaim_fail(ip);
256
inode->i_state = I_LOCK|I_NEW;
263
spin_lock(&pag->pag_ici_lock);
264
spin_lock(&ip->i_flags_lock);
265
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
266
ip->i_flags |= XFS_INEW;
267
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
268
inode->i_state = I_NEW;
270
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
271
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
272
lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
273
&xfs_iolock_active, "xfs_iolock_active");
275
spin_unlock(&ip->i_flags_lock);
276
spin_unlock(&pag->pag_ici_lock);
258
278
/* If the VFS inode is being torn down, pause and try again. */
259
279
if (!igrab(inode)) {
280
trace_xfs_iget_skip(ip);
264
285
/* We've got a live one. */
265
286
spin_unlock(&ip->i_flags_lock);
266
read_unlock(&pag->pag_ici_lock);
288
trace_xfs_iget_hit(ip);
269
291
if (lock_flags != 0)
270
292
xfs_ilock(ip, lock_flags);
272
294
xfs_iflags_clear(ip, XFS_ISTALE);
273
xfs_itrace_exit_tag(ip, "xfs_iget.found");
274
295
XFS_STATS_INC(xs_ig_found);
278
300
spin_unlock(&ip->i_flags_lock);
279
read_unlock(&pag->pag_ici_lock);
290
312
struct xfs_inode **ipp,
293
int lock_flags) __releases(pag->pag_ici_lock)
295
316
struct xfs_inode *ip;
297
unsigned long first_index, mask;
298
318
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
300
320
ip = xfs_inode_alloc(mp, ino);
304
error = xfs_iread(mp, tp, ip, bno, flags);
324
error = xfs_iread(mp, tp, ip, flags);
306
326
goto out_destroy;
308
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
328
trace_xfs_iget_miss(ip);
310
330
if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
348
366
ip->i_udquot = ip->i_gdquot = NULL;
349
367
xfs_iflags_set(ip, XFS_INEW);
351
write_unlock(&pag->pag_ici_lock);
369
spin_unlock(&pag->pag_ici_lock);
352
370
radix_tree_preload_end();
357
write_unlock(&pag->pag_ici_lock);
376
spin_unlock(&pag->pag_ici_lock);
358
377
radix_tree_preload_end();
360
379
xfs_iunlock(ip, lock_flags);
403
419
xfs_perag_t *pag;
404
420
xfs_agino_t agino;
406
/* the radix tree exists only in inode capable AGs */
407
if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
422
/* reject inode numbers outside existing AGs */
423
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
410
426
/* get the perag structure and ensure that it's inode capable */
411
pag = xfs_get_perag(mp, ino);
412
if (!pag->pagi_inodeok)
414
ASSERT(pag->pag_ici_init);
427
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
415
428
agino = XFS_INO_TO_AGINO(mp, ino);
419
read_lock(&pag->pag_ici_lock);
420
433
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
423
error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
436
error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
425
438
goto out_error_or_again;
427
read_unlock(&pag->pag_ici_lock);
428
441
XFS_STATS_INC(xs_ig_missed);
430
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
443
error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
431
444
flags, lock_flags);
433
446
goto out_error_or_again;
435
xfs_put_perag(mp, pag);
454
xfs_put_perag(mp, pag);
459
* Decrement reference count of an inode structure and unlock it.
461
* ip -- the inode being released
462
* lock_flags -- this parameter indicates the inode's locks to be
463
* to be released. See the comment on xfs_iunlock() for a list
467
xfs_iput(xfs_inode_t *ip,
470
xfs_itrace_entry(ip);
471
xfs_iunlock(ip, lock_flags);
476
* Special iput for brand-new inodes that are still locked
483
struct inode *inode = VFS_I(ip);
485
xfs_itrace_entry(ip);
487
if ((ip->i_d.di_mode == 0)) {
488
ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
489
make_bad_inode(inode);
491
if (inode->i_state & I_NEW)
492
unlock_new_inode(inode);
494
xfs_iunlock(ip, lock_flags);
499
* This is called free all the memory associated with an inode.
500
* It must free the inode itself and any buffers allocated for
501
* if_extents/if_data and if_broot. It must also free the lock
502
* associated with the inode.
504
* Note: because we don't initialise everything on reallocation out
505
* of the zone, we must ensure we nullify everything correctly before
506
* freeing the structure.
510
struct xfs_inode *ip)
512
struct xfs_mount *mp = ip->i_mount;
513
struct xfs_perag *pag;
515
XFS_STATS_INC(xs_ig_reclaims);
518
* Remove the inode from the per-AG radix tree. It doesn't matter
519
* if it was never added to it because radix_tree_delete can deal
520
* with that case just fine.
522
pag = xfs_get_perag(mp, ip->i_ino);
523
write_lock(&pag->pag_ici_lock);
524
radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
525
write_unlock(&pag->pag_ici_lock);
526
xfs_put_perag(mp, pag);
529
* Here we do an (almost) spurious inode lock in order to coordinate
530
* with inode cache radix tree lookups. This is because the lookup
531
* can reference the inodes in the cache without taking references.
533
* We make that OK here by ensuring that we wait until the inode is
534
* unlocked after the lookup before we go ahead and free it. We get
535
* both the ilock and the iolock because the code may need to drop the
536
* ilock one but will still hold the iolock.
538
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
540
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
546
472
* This is a wrapper routine around the xfs_ilock() routine
547
473
* used to centralize some grungy code. It is used in places
548
474
* that wish to lock the inode solely for reading the extents.
762
688
mrdemote(&ip->i_lock);
763
689
if (lock_flags & XFS_IOLOCK_EXCL)
764
690
mrdemote(&ip->i_iolock);
692
trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
769
* Debug-only routine, without additional rw_semaphore APIs, we can
770
* now only answer requests regarding whether we hold the lock for write
771
* (reader state is outside our visibility, we only track writer state).
773
* Note: this means !xfs_isilocked would give false positives, so don't do that.
780
if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
782
if (!ip->i_lock.mr_writer)
786
if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
788
if (!ip->i_iolock.mr_writer)
701
if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
702
if (!(lock_flags & XFS_ILOCK_SHARED))
703
return !!ip->i_lock.mr_writer;
704
return rwsem_is_locked(&ip->i_lock.mr_lock);
707
if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
708
if (!(lock_flags & XFS_IOLOCK_SHARED))
709
return !!ip->i_iolock.mr_writer;
710
return rwsem_is_locked(&ip->i_iolock.mr_lock);
796
#ifdef XFS_INODE_TRACE
798
#define KTRACE_ENTER(ip, vk, s, line, ra) \
799
ktrace_enter((ip)->i_trace, \
800
/* 0 */ (void *)(__psint_t)(vk), \
801
/* 1 */ (void *)(s), \
802
/* 2 */ (void *)(__psint_t) line, \
803
/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
804
/* 4 */ (void *)(ra), \
806
/* 6 */ (void *)(__psint_t)current_cpu(), \
807
/* 7 */ (void *)(__psint_t)current_pid(), \
808
/* 8 */ (void *)__return_address, \
809
/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL)
812
* Vnode tracing code.
815
_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra)
817
KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra);
821
_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra)
823
KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra);
827
xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra)
829
KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra);
833
_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra)
835
KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra);
839
xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra)
841
KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra);
843
#endif /* XFS_INODE_TRACE */