2
* Copyright (C) International Business Machines Corp., 2000-2005
3
* Portions Copyright (C) Christoph Hellwig, 2001-2002
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13
* the GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
#include <linux/module.h>
23
#include <linux/bio.h>
24
#include <linux/slab.h>
25
#include <linux/init.h>
26
#include <linux/buffer_head.h>
27
#include <linux/mempool.h>
28
#include <linux/seq_file.h>
29
#include "jfs_incore.h"
30
#include "jfs_superblock.h"
31
#include "jfs_filsys.h"
32
#include "jfs_metapage.h"
33
#include "jfs_txnmgr.h"
34
#include "jfs_debug.h"
36
#ifdef CONFIG_JFS_STATISTICS
38
uint pagealloc; /* # of page allocations */
39
uint pagefree; /* # of page frees */
40
uint lockwait; /* # of sleeping lock_metapage() calls */
44
#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
45
#define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)
47
static inline void unlock_metapage(struct metapage *mp)
49
clear_bit_unlock(META_locked, &mp->flag);
53
static inline void __lock_metapage(struct metapage *mp)
55
DECLARE_WAITQUEUE(wait, current);
56
INCREMENT(mpStat.lockwait);
57
add_wait_queue_exclusive(&mp->wait, &wait);
59
set_current_state(TASK_UNINTERRUPTIBLE);
60
if (metapage_locked(mp)) {
61
unlock_page(mp->page);
65
} while (trylock_metapage(mp));
66
__set_current_state(TASK_RUNNING);
67
remove_wait_queue(&mp->wait, &wait);
71
* Must have mp->page locked
73
static inline void lock_metapage(struct metapage *mp)
75
if (trylock_metapage(mp))
79
#define METAPOOL_MIN_PAGES 32
80
static struct kmem_cache *metapage_cache;
81
static mempool_t *metapage_mempool;
83
#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
90
struct metapage *mp[MPS_PER_PAGE];
92
#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
94
static inline struct metapage *page_to_mp(struct page *page, int offset)
96
if (!PagePrivate(page))
98
return mp_anchor(page)->mp[offset >> L2PSIZE];
101
static inline int insert_metapage(struct page *page, struct metapage *mp)
103
struct meta_anchor *a;
105
int l2mp_blocks; /* log2 blocks per metapage */
107
if (PagePrivate(page))
110
a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
113
set_page_private(page, (unsigned long)a);
114
SetPagePrivate(page);
119
l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
120
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
128
static inline void remove_metapage(struct page *page, struct metapage *mp)
130
struct meta_anchor *a = mp_anchor(page);
131
int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
134
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
136
BUG_ON(a->mp[index] != mp);
139
if (--a->mp_count == 0) {
141
set_page_private(page, 0);
142
ClearPagePrivate(page);
147
static inline void inc_io(struct page *page)
149
atomic_inc(&mp_anchor(page)->io_count);
152
static inline void dec_io(struct page *page, void (*handler) (struct page *))
154
if (atomic_dec_and_test(&mp_anchor(page)->io_count))
159
static inline struct metapage *page_to_mp(struct page *page, int offset)
161
return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
164
static inline int insert_metapage(struct page *page, struct metapage *mp)
167
set_page_private(page, (unsigned long)mp);
168
SetPagePrivate(page);
174
static inline void remove_metapage(struct page *page, struct metapage *mp)
176
set_page_private(page, 0);
177
ClearPagePrivate(page);
181
#define inc_io(page) do {} while(0)
182
#define dec_io(page, handler) handler(page)
186
static void init_once(void *foo)
188
struct metapage *mp = (struct metapage *)foo;
196
set_bit(META_free, &mp->flag);
197
init_waitqueue_head(&mp->wait);
200
static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
202
return mempool_alloc(metapage_mempool, gfp_mask);
205
static inline void free_metapage(struct metapage *mp)
208
set_bit(META_free, &mp->flag);
210
mempool_free(mp, metapage_mempool);
213
int __init metapage_init(void)
216
* Allocate the metapage structures
218
metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
220
if (metapage_cache == NULL)
223
metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
226
if (metapage_mempool == NULL) {
227
kmem_cache_destroy(metapage_cache);
234
void metapage_exit(void)
236
mempool_destroy(metapage_mempool);
237
kmem_cache_destroy(metapage_cache);
240
static inline void drop_metapage(struct page *page, struct metapage *mp)
242
if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
243
test_bit(META_io, &mp->flag))
245
remove_metapage(page, mp);
246
INCREMENT(mpStat.pagefree);
251
* Metapage address space operations
254
static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
260
sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
263
if (lblock >= file_blocks)
265
if (lblock + *len > file_blocks)
266
*len = file_blocks - lblock;
269
rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
270
if ((rc == 0) && *len)
271
lblock = (sector_t)xaddr;
274
} /* else no mapping */
279
static void last_read_complete(struct page *page)
281
if (!PageError(page))
282
SetPageUptodate(page);
286
static void metapage_read_end_io(struct bio *bio, int err)
288
struct page *page = bio->bi_private;
290
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
291
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
295
dec_io(page, last_read_complete);
299
static void remove_from_logsync(struct metapage *mp)
301
struct jfs_log *log = mp->log;
304
* This can race. Recheck that log hasn't been set to null, and after
305
* acquiring logsync lock, recheck lsn
310
LOGSYNC_LOCK(log, flags);
316
list_del(&mp->synclist);
318
LOGSYNC_UNLOCK(log, flags);
321
static void last_write_complete(struct page *page)
326
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
327
mp = page_to_mp(page, offset);
328
if (mp && test_bit(META_io, &mp->flag)) {
330
remove_from_logsync(mp);
331
clear_bit(META_io, &mp->flag);
334
* I'd like to call drop_metapage here, but I don't think it's
335
* safe unless I have the page locked
338
end_page_writeback(page);
341
static void metapage_write_end_io(struct bio *bio, int err)
343
struct page *page = bio->bi_private;
345
BUG_ON(!PagePrivate(page));
347
if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
348
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
351
dec_io(page, last_write_complete);
355
static int metapage_writepage(struct page *page, struct writeback_control *wbc)
357
struct bio *bio = NULL;
358
int block_offset; /* block offset of mp within page */
359
struct inode *inode = page->mapping->host;
360
int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
368
sector_t next_block = 0;
370
unsigned long bio_bytes = 0;
371
unsigned long bio_offset = 0;
375
page_start = (sector_t)page->index <<
376
(PAGE_CACHE_SHIFT - inode->i_blkbits);
377
BUG_ON(!PageLocked(page));
378
BUG_ON(PageWriteback(page));
379
set_page_writeback(page);
381
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
382
mp = page_to_mp(page, offset);
384
if (!mp || !test_bit(META_dirty, &mp->flag))
387
if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
390
* Make sure this page isn't blocked indefinitely.
391
* If the journal isn't undergoing I/O, push it
393
if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
394
jfs_flush_journal(mp->log, 0);
398
clear_bit(META_dirty, &mp->flag);
399
set_bit(META_io, &mp->flag);
400
block_offset = offset >> inode->i_blkbits;
401
lblock = page_start + block_offset;
403
if (xlen && lblock == next_block) {
404
/* Contiguous, in memory & on disk */
405
len = min(xlen, blocks_per_mp);
407
bio_bytes += len << inode->i_blkbits;
411
if (bio_add_page(bio, page, bio_bytes, bio_offset) <
415
* Increment counter before submitting i/o to keep
416
* count from hitting zero before we're through
421
submit_bio(WRITE, bio);
426
xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
427
pblock = metapage_get_blocks(inode, lblock, &xlen);
429
printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
431
* We already called inc_io(), but can't cancel it
432
* with dec_io() until we're done with the page
437
len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);
439
bio = bio_alloc(GFP_NOFS, 1);
440
bio->bi_bdev = inode->i_sb->s_bdev;
441
bio->bi_sector = pblock << (inode->i_blkbits - 9);
442
bio->bi_end_io = metapage_write_end_io;
443
bio->bi_private = page;
445
/* Don't call bio_add_page yet, we may add to this vec */
447
bio_bytes = len << inode->i_blkbits;
450
next_block = lblock + len;
453
if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
458
submit_bio(WRITE, bio);
462
redirty_page_for_writepage(wbc, page);
469
if (nr_underway == 0)
470
end_page_writeback(page);
474
/* We should never reach here, since we're only adding one vec */
475
printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
478
print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
479
4, bio, sizeof(*bio), 0);
483
dec_io(page, last_write_complete);
486
dec_io(page, last_write_complete);
490
static int metapage_readpage(struct file *fp, struct page *page)
492
struct inode *inode = page->mapping->host;
493
struct bio *bio = NULL;
495
int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
496
sector_t page_start; /* address of page in fs blocks */
502
BUG_ON(!PageLocked(page));
503
page_start = (sector_t)page->index <<
504
(PAGE_CACHE_SHIFT - inode->i_blkbits);
507
while (block_offset < blocks_per_page) {
508
xlen = blocks_per_page - block_offset;
509
pblock = metapage_get_blocks(inode, page_start + block_offset,
512
if (!PagePrivate(page))
513
insert_metapage(page, NULL);
516
submit_bio(READ, bio);
518
bio = bio_alloc(GFP_NOFS, 1);
519
bio->bi_bdev = inode->i_sb->s_bdev;
520
bio->bi_sector = pblock << (inode->i_blkbits - 9);
521
bio->bi_end_io = metapage_read_end_io;
522
bio->bi_private = page;
523
len = xlen << inode->i_blkbits;
524
offset = block_offset << inode->i_blkbits;
525
if (bio_add_page(bio, page, len, offset) < len)
527
block_offset += xlen;
532
submit_bio(READ, bio);
539
printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
541
dec_io(page, last_read_complete);
545
static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
551
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
552
mp = page_to_mp(page, offset);
557
jfs_info("metapage_releasepage: mp = 0x%p", mp);
558
if (mp->count || mp->nohomeok ||
559
test_bit(META_dirty, &mp->flag)) {
560
jfs_info("count = %ld, nohomeok = %d", mp->count,
566
remove_from_logsync(mp);
567
remove_metapage(page, mp);
568
INCREMENT(mpStat.pagefree);
574
static void metapage_invalidatepage(struct page *page, unsigned long offset)
578
BUG_ON(PageWriteback(page));
580
metapage_releasepage(page, 0);
583
const struct address_space_operations jfs_metapage_aops = {
584
.readpage = metapage_readpage,
585
.writepage = metapage_writepage,
586
.releasepage = metapage_releasepage,
587
.invalidatepage = metapage_invalidatepage,
588
.set_page_dirty = __set_page_dirty_nobuffers,
591
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
592
unsigned int size, int absolute,
597
struct address_space *mapping;
598
struct metapage *mp = NULL;
600
unsigned long page_index;
601
unsigned long page_offset;
603
jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
604
inode->i_ino, lblock, absolute);
606
l2bsize = inode->i_blkbits;
607
l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
608
page_index = lblock >> l2BlocksPerPage;
609
page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
610
if ((page_offset + size) > PAGE_CACHE_SIZE) {
611
jfs_err("MetaData crosses page boundary!!");
612
jfs_err("lblock = %lx, size = %d", lblock, size);
617
mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
620
* If an nfs client tries to read an inode that is larger
621
* than any existing inodes, we may try to read past the
622
* end of the inode map
624
if ((lblock << inode->i_blkbits) >= inode->i_size)
626
mapping = inode->i_mapping;
629
if (new && (PSIZE == PAGE_CACHE_SIZE)) {
630
page = grab_cache_page(mapping, page_index);
632
jfs_err("grab_cache_page failed!");
635
SetPageUptodate(page);
637
page = read_mapping_page(mapping, page_index, NULL);
638
if (IS_ERR(page) || !PageUptodate(page)) {
639
jfs_err("read_mapping_page failed!");
645
mp = page_to_mp(page, page_offset);
647
if (mp->logical_size != size) {
648
jfs_error(inode->i_sb,
649
"__get_metapage: mp->logical_size != size");
650
jfs_err("logical_size = %d, size = %d",
651
mp->logical_size, size);
657
if (test_bit(META_discard, &mp->flag)) {
659
jfs_error(inode->i_sb,
660
"__get_metapage: using a "
661
"discarded metapage");
662
discard_metapage(mp);
665
clear_bit(META_discard, &mp->flag);
668
INCREMENT(mpStat.pagealloc);
669
mp = alloc_metapage(GFP_NOFS);
672
mp->xflag = COMMIT_PAGE;
675
mp->logical_size = size;
676
mp->data = page_address(page) + page_offset;
678
if (unlikely(insert_metapage(page, mp))) {
686
jfs_info("zeroing mp = 0x%p", mp);
687
memset(mp->data, 0, PSIZE);
691
jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
699
void grab_metapage(struct metapage * mp)
701
jfs_info("grab_metapage: mp = 0x%p", mp);
702
page_cache_get(mp->page);
706
unlock_page(mp->page);
709
void force_metapage(struct metapage *mp)
711
struct page *page = mp->page;
712
jfs_info("force_metapage: mp = 0x%p", mp);
713
set_bit(META_forcewrite, &mp->flag);
714
clear_bit(META_sync, &mp->flag);
715
page_cache_get(page);
717
set_page_dirty(page);
718
write_one_page(page, 1);
719
clear_bit(META_forcewrite, &mp->flag);
720
page_cache_release(page);
723
void hold_metapage(struct metapage *mp)
728
void put_metapage(struct metapage *mp)
730
if (mp->count || mp->nohomeok) {
731
/* Someone else will release this */
732
unlock_page(mp->page);
735
page_cache_get(mp->page);
738
unlock_page(mp->page);
739
release_metapage(mp);
742
void release_metapage(struct metapage * mp)
744
struct page *page = mp->page;
745
jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
753
if (--mp->count || mp->nohomeok) {
755
page_cache_release(page);
759
if (test_bit(META_dirty, &mp->flag)) {
760
set_page_dirty(page);
761
if (test_bit(META_sync, &mp->flag)) {
762
clear_bit(META_sync, &mp->flag);
763
write_one_page(page, 1);
764
lock_page(page); /* write_one_page unlocks the page */
766
} else if (mp->lsn) /* discard_metapage doesn't remove it */
767
remove_from_logsync(mp);
769
/* Try to keep metapages from using up too much memory */
770
drop_metapage(page, mp);
773
page_cache_release(page);
776
void __invalidate_metapages(struct inode *ip, s64 addr, int len)
779
int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
780
int BlocksPerPage = 1 << l2BlocksPerPage;
781
/* All callers are interested in block device's mapping */
782
struct address_space *mapping =
783
JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
789
* Mark metapages to discard. They will eventually be
790
* released, but should not be written.
792
for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
793
lblock += BlocksPerPage) {
794
page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
797
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
798
mp = page_to_mp(page, offset);
801
if (mp->index < addr)
803
if (mp->index >= addr + len)
806
clear_bit(META_dirty, &mp->flag);
807
set_bit(META_discard, &mp->flag);
809
remove_from_logsync(mp);
812
page_cache_release(page);
816
#ifdef CONFIG_JFS_STATISTICS
817
static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
820
"JFS Metapage statistics\n"
821
"=======================\n"
822
"page allocations = %d\n"
831
static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
833
return single_open(file, jfs_mpstat_proc_show, NULL);
836
const struct file_operations jfs_mpstat_proc_fops = {
837
.owner = THIS_MODULE,
838
.open = jfs_mpstat_proc_open,
841
.release = single_release,