98
* We need this because the bdev->unplug_fn can sleep and we cannot
99
* hold swap_lock while calling the unplug_fn. And swap_lock
100
* cannot be turned into a mutex.
102
static DECLARE_RWSEM(swap_unplug_sem);
104
void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
108
down_read(&swap_unplug_sem);
109
entry.val = page_private(page);
110
if (PageSwapCache(page)) {
111
struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
112
struct backing_dev_info *bdi;
115
* If the page is removed from swapcache from under us (with a
116
* racy try_to_unuse/swapoff) we need an additional reference
117
* count to avoid reading garbage from page_private(page) above.
118
* If the WARN_ON triggers during a swapoff it maybe the race
119
* condition and it's harmless. However if it triggers without
120
* swapoff it signals a problem.
122
WARN_ON(page_count(page) <= 1);
124
bdi = bdev->bd_inode->i_mapping->backing_dev_info;
125
blk_run_backing_dev(bdi, page);
127
up_read(&swap_unplug_sem);
131
98
* swapon tell device that all the old swap contents can be discarded,
132
99
* to allow the swap device to optimize its wear-levelling.
1520
static void enable_swap_info(struct swap_info_struct *p, int prio,
1521
unsigned char *swap_map)
1525
spin_lock(&swap_lock);
1529
p->prio = --least_priority;
1530
p->swap_map = swap_map;
1531
p->flags |= SWP_WRITEOK;
1532
nr_swap_pages += p->pages;
1533
total_swap_pages += p->pages;
1535
/* insert swap space into swap_list: */
1537
for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
1538
if (p->prio >= swap_info[i]->prio)
1544
swap_list.head = swap_list.next = p->type;
1546
swap_info[prev]->next = p->type;
1547
spin_unlock(&swap_lock);
1553
1550
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1555
1552
struct swap_info_struct *p = NULL;
1621
1618
current->flags &= ~PF_OOM_ORIGIN;
1622
* reading p->prio and p->swap_map outside the lock is
1623
* safe here because only sys_swapon and sys_swapoff
1624
* change them, and there can be no other sys_swapon or
1625
* sys_swapoff for this swap_info_struct at this point.
1624
1627
/* re-insert swap space back into swap_list */
1625
spin_lock(&swap_lock);
1627
p->prio = --least_priority;
1629
for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
1630
if (p->prio >= swap_info[i]->prio)
1636
swap_list.head = swap_list.next = type;
1638
swap_info[prev]->next = type;
1639
nr_swap_pages += p->pages;
1640
total_swap_pages += p->pages;
1641
p->flags |= SWP_WRITEOK;
1642
spin_unlock(&swap_lock);
1628
enable_swap_info(p, p->prio, p->swap_map);
1646
/* wait for any unplug function to finish */
1647
down_write(&swap_unplug_sem);
1648
up_write(&swap_unplug_sem);
1650
1632
destroy_swap_extents(p);
1651
1633
if (p->flags & SWP_CONTINUED)
1652
1634
free_swap_count_continuations(p);
1844
1826
late_initcall(max_swapfiles_check);
1848
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1850
* The swapon system call
1852
SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1829
static struct swap_info_struct *alloc_swap_info(void)
1854
1831
struct swap_info_struct *p;
1856
struct block_device *bdev = NULL;
1857
struct file *swap_file = NULL;
1858
struct address_space *mapping;
1859
1832
unsigned int type;
1862
union swap_header *swap_header;
1863
unsigned int nr_good_pages;
1866
unsigned long maxpages;
1867
unsigned long swapfilepages;
1868
unsigned char *swap_map = NULL;
1869
struct page *page = NULL;
1870
struct inode *inode = NULL;
1873
if (!capable(CAP_SYS_ADMIN))
1876
1834
p = kzalloc(sizeof(*p), GFP_KERNEL);
1836
return ERR_PTR(-ENOMEM);
1880
1838
spin_lock(&swap_lock);
1881
1839
for (type = 0; type < nr_swapfiles; type++) {
1882
1840
if (!(swap_info[type]->flags & SWP_USED))
1886
1843
if (type >= MAX_SWAPFILES) {
1887
1844
spin_unlock(&swap_lock);
1846
return ERR_PTR(-EPERM);
1891
1848
if (type >= nr_swapfiles) {
1892
1849
p->type = type;
1912
1869
spin_unlock(&swap_lock);
1914
name = getname(specialfile);
1915
error = PTR_ERR(name);
1920
swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
1921
error = PTR_ERR(swap_file);
1922
if (IS_ERR(swap_file)) {
1927
p->swap_file = swap_file;
1928
mapping = swap_file->f_mapping;
1929
inode = mapping->host;
1932
for (i = 0; i < nr_swapfiles; i++) {
1933
struct swap_info_struct *q = swap_info[i];
1935
if (i == type || !q->swap_file)
1937
if (mapping == q->swap_file->f_mapping)
1874
static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
1942
1878
if (S_ISBLK(inode->i_mode)) {
1943
bdev = bdgrab(I_BDEV(inode));
1944
error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1879
p->bdev = bdgrab(I_BDEV(inode));
1880
error = blkdev_get(p->bdev,
1881
FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1946
1883
if (error < 0) {
1951
p->old_block_size = block_size(bdev);
1952
error = set_blocksize(bdev, PAGE_SIZE);
1887
p->old_block_size = block_size(p->bdev);
1888
error = set_blocksize(p->bdev, PAGE_SIZE);
1956
1891
p->flags |= SWP_BLKDEV;
1957
1892
} else if (S_ISREG(inode->i_mode)) {
1958
1893
p->bdev = inode->i_sb->s_bdev;
1959
1894
mutex_lock(&inode->i_mutex);
1961
if (IS_SWAPFILE(inode)) {
1969
swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
1972
* Read the swap header.
1974
if (!mapping->a_ops->readpage) {
1978
page = read_mapping_page(mapping, 0, swap_file);
1980
error = PTR_ERR(page);
1983
swap_header = kmap(page);
1895
if (IS_SWAPFILE(inode))
1903
static unsigned long read_swap_header(struct swap_info_struct *p,
1904
union swap_header *swap_header,
1905
struct inode *inode)
1908
unsigned long maxpages;
1909
unsigned long swapfilepages;
1985
1911
if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
1986
1912
printk(KERN_ERR "Unable to find swap-space signature\n");
1991
1916
/* swap partition endianess hack... */
2034
1958
p->highest_bit = maxpages - 1;
1962
swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
2039
1963
if (swapfilepages && maxpages > swapfilepages) {
2040
1964
printk(KERN_WARNING
2041
1965
"Swap area shorter than signature indicates\n");
2044
1968
if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
2046
1970
if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
2049
/* OK, set up the swap map and apply the bad block list */
2050
swap_map = vmalloc(maxpages);
2056
memset(swap_map, 0, maxpages);
1976
static int setup_swap_map_and_extents(struct swap_info_struct *p,
1977
union swap_header *swap_header,
1978
unsigned char *swap_map,
1979
unsigned long maxpages,
1983
unsigned int nr_good_pages;
2057
1986
nr_good_pages = maxpages - 1; /* omit header page */
2059
1988
for (i = 0; i < swap_header->info.nr_badpages; i++) {
2060
1989
unsigned int page_nr = swap_header->info.badpages[i];
2061
if (page_nr == 0 || page_nr > swap_header->info.last_page) {
1990
if (page_nr == 0 || page_nr > swap_header->info.last_page)
2065
1992
if (page_nr < maxpages) {
2066
1993
swap_map[page_nr] = SWAP_MAP_BAD;
2067
1994
nr_good_pages--;
2071
error = swap_cgroup_swapon(type, maxpages);
2075
1998
if (nr_good_pages) {
2076
1999
swap_map[0] = SWAP_MAP_BAD;
2077
2000
p->max = maxpages;
2078
2001
p->pages = nr_good_pages;
2079
nr_extents = setup_swap_extents(p, &span);
2080
if (nr_extents < 0) {
2002
nr_extents = setup_swap_extents(p, span);
2084
2005
nr_good_pages = p->pages;
2086
2007
if (!nr_good_pages) {
2087
2008
printk(KERN_WARNING "Empty swap-file\n");
2015
SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2017
struct swap_info_struct *p;
2019
struct file *swap_file = NULL;
2020
struct address_space *mapping;
2024
union swap_header *swap_header;
2027
unsigned long maxpages;
2028
unsigned char *swap_map = NULL;
2029
struct page *page = NULL;
2030
struct inode *inode = NULL;
2032
if (!capable(CAP_SYS_ADMIN))
2035
p = alloc_swap_info();
2039
name = getname(specialfile);
2041
error = PTR_ERR(name);
2045
swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
2046
if (IS_ERR(swap_file)) {
2047
error = PTR_ERR(swap_file);
2052
p->swap_file = swap_file;
2053
mapping = swap_file->f_mapping;
2055
for (i = 0; i < nr_swapfiles; i++) {
2056
struct swap_info_struct *q = swap_info[i];
2058
if (q == p || !q->swap_file)
2060
if (mapping == q->swap_file->f_mapping) {
2066
inode = mapping->host;
2067
/* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
2068
error = claim_swapfile(p, inode);
2069
if (unlikely(error))
2073
* Read the swap header.
2075
if (!mapping->a_ops->readpage) {
2079
page = read_mapping_page(mapping, 0, swap_file);
2081
error = PTR_ERR(page);
2084
swap_header = kmap(page);
2086
maxpages = read_swap_header(p, swap_header, inode);
2087
if (unlikely(!maxpages)) {
2092
/* OK, set up the swap map and apply the bad block list */
2093
swap_map = vzalloc(maxpages);
2099
error = swap_cgroup_swapon(p->type, maxpages);
2103
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
2105
if (unlikely(nr_extents < 0)) {
2101
2119
mutex_lock(&swapon_mutex);
2102
spin_lock(&swap_lock);
2103
2121
if (swap_flags & SWAP_FLAG_PREFER)
2105
2123
(swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2107
p->prio = --least_priority;
2108
p->swap_map = swap_map;
2109
p->flags |= SWP_WRITEOK;
2110
nr_swap_pages += nr_good_pages;
2111
total_swap_pages += nr_good_pages;
2124
enable_swap_info(p, prio, swap_map);
2113
2126
printk(KERN_INFO "Adding %uk swap on %s. "
2114
2127
"Priority:%d extents:%d across:%lluk %s%s\n",
2115
nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
2128
p->pages<<(PAGE_SHIFT-10), name, p->prio,
2116
2129
nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2117
2130
(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2118
2131
(p->flags & SWP_DISCARDABLE) ? "D" : "");
2120
/* insert swap space into swap_list: */
2122
for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
2123
if (p->prio >= swap_info[i]->prio)
2129
swap_list.head = swap_list.next = type;
2131
swap_info[prev]->next = type;
2132
spin_unlock(&swap_lock);
2133
2133
mutex_unlock(&swapon_mutex);
2134
2134
atomic_inc(&proc_poll_event);
2135
2135
wake_up_interruptible(&proc_poll_wait);
2137
if (S_ISREG(inode->i_mode))
2138
inode->i_flags |= S_SWAPFILE;
2141
set_blocksize(bdev, p->old_block_size);
2142
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2142
if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
2143
set_blocksize(p->bdev, p->old_block_size);
2144
blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2144
2146
destroy_swap_extents(p);
2145
swap_cgroup_swapoff(type);
2147
swap_cgroup_swapoff(p->type);
2147
2148
spin_lock(&swap_lock);
2148
2149
p->swap_file = NULL;
2150
2151
spin_unlock(&swap_lock);
2151
2152
vfree(swap_map);
2154
if (inode && S_ISREG(inode->i_mode)) {
2155
mutex_unlock(&inode->i_mutex);
2153
2158
filp_close(swap_file, NULL);
2155
2161
if (page && !IS_ERR(page)) {