~ubuntu-branches/ubuntu/wily/spl-linux/wily-proposed

« back to all changes in this revision

Viewing changes to module/spl/spl-kmem.c

Committer: Package Import Robot
Author(s): Liang Guo
Date: 2014-07-31 15:16:53 UTC
Revision ID: package-import@ubuntu.com-20140731151653-tgao12alohj26jcs

Tags: upstream-0.6.3+git20140731

Import upstream version 0.6.3+git20140731

files added:
README.markdown

include/linux/delay_compat.h

include/sys/callo.h

man/man5

man/man5/Makefile.am

man/man5/spl-module-parameters.5

patches

patches/fc11-spl-export-symbols.patch

patches/rhel5-spl-export-symbols.patch

files removed:
Makefile.in

aclocal.m4

cmd/Makefile.in

config/config.guess

config/config.sub

config/depcomp

config/install-sh

config/libtool.m4

config/ltmain.sh

config/ltoptions.m4

config/ltsugar.m4

config/ltversion.m4

config/lt~obsolete.m4

config/missing

configure

include/Makefile.in

include/fs/Makefile.in

include/linux/Makefile.in

include/rpc/Makefile.in

include/sharefs/Makefile.in

include/sys/Makefile.in

include/sys/fm/Makefile.in

include/sys/fs/Makefile.in

include/sys/sysevent/Makefile.in

include/util/Makefile.in

include/vm/Makefile.in

lib/Makefile.in

man/Makefile.in

man/man1/Makefile.in

module/spl/spl.mod.c

module/splat/splat.mod.c

rpm/Makefile.in

rpm/fedora/Makefile.in

rpm/generic/Makefile.in

scripts/Makefile.in

spl_config.h.in

files modified:
META

config/rpm.am

config/spl-build.m4

config/spl-meta.m4

configure.ac

include/linux/Makefile.am

include/linux/mm_compat.h

include/sys/Makefile.am

include/sys/atomic.h

include/sys/condvar.h

include/sys/debug.h

include/sys/disp.h

include/sys/isa_defs.h

include/sys/kmem.h

include/sys/kstat.h

include/sys/sdt.h

include/sys/sysmacros.h

include/sys/systeminfo.h

include/sys/thread.h

include/sys/time.h

include/sys/timer.h

include/sys/vmsystm.h

man/Makefile.am

man/man1/Makefile.am

module/spl/spl-condvar.c

module/spl/spl-cred.c

module/spl/spl-debug.c

module/spl/spl-err.c

module/spl/spl-generic.c

module/spl/spl-kmem.c

module/spl/spl-kstat.c

module/spl/spl-proc.c

module/spl/spl-taskq.c

module/spl/spl-thread.c

module/spl/spl-time.c

module/spl/spl-vnode.c

module/splat/splat-condvar.c

module/splat/splat-cred.c

module/splat/splat-ctl.c

module/splat/splat-kmem.c

module/splat/splat-rwlock.c

module/splat/splat-taskq.c

rpm/fedora/spl-dkms.spec.in

rpm/fedora/spl-kmod.spec.in

rpm/fedora/spl.spec.in

rpm/generic/spl-dkms.spec.in

rpm/generic/spl-kmod.spec.in

rpm/generic/spl.spec.in

scripts/kmodtool

Show diffs side-by-side

added added

removed removed

module/spl/spl-kmem.c

#define SS_DEBUG_SUBSYS SS_KMEM

* Within the scope of spl-kmem.c file the kmem_cache_* definitions

* are removed to allow access to the real Linux slab allocator.

#undef kmem_cache_destroy

#undef kmem_cache_create

#undef kmem_cache_alloc

#undef kmem_cache_free

* Cache expiration was implemented because it was part of the default Solaris

* kmem_cache behavior. The idea is that per-cpu objects which haven't been

* accessed in several seconds should be returned to the cache. On the other

* hand Linux slabs never move objects back to the slabs unless there is

* memory pressure on the system. By default both methods are disabled, but

* may be enabled by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.

* memory pressure on the system. By default the Linux method is enabled

* because it has been shown to improve responsiveness on low memory systems.

* This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.

unsigned int spl_kmem_cache_expire = 0;

unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;

EXPORT_SYMBOL(spl_kmem_cache_expire);

module_param(spl_kmem_cache_expire, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");

* KMC_RECLAIM_ONCE is set as the default until zfsonlinux/spl#268 is

* definitively resolved. Depending on the system configuration and

* workload this may increase the likelihood of out of memory events.

* For those cases it is advised that this option be set to zero.

unsigned int spl_kmem_cache_reclaim = KMC_RECLAIM_ONCE;

module_param(spl_kmem_cache_reclaim, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");

unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;

module_param(spl_kmem_cache_obj_per_slab, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");

unsigned int spl_kmem_cache_obj_per_slab_min = SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN;

module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min,

"Minimal number of objects per slab");

unsigned int spl_kmem_cache_max_size = 32;

module_param(spl_kmem_cache_max_size, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");

unsigned int spl_kmem_cache_slab_limit = 0;

module_param(spl_kmem_cache_slab_limit, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_slab_limit,

"Objects less than N bytes use the Linux slab");

unsigned int spl_kmem_cache_kmem_limit = (PAGE_SIZE / 4);

module_param(spl_kmem_cache_kmem_limit, uint, 0644);

MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,

"Objects less than N bytes use the kmalloc");

* The minimum amount of memory measured in pages to be free at all

* times on the system. This is similar to Linux's zone->pages_min

* multiplied by the number of zones and is sized based on that.

681

725

"large kmem_alloc(%llu, 0x%x) at %s:%d (%lld/%llu)\n",

682

726

(unsigned long long) size, flags, func, line,

683

727

kmem_alloc_used_read(), kmem_alloc_max);

684

dump_stack();

728

spl_debug_dumpstack(NULL);

685

729

}

686

730

687

731

/* Use the correct allocator */

850

894

ASSERT(ISP2(size));

851

895

852

896

if (skc->skc_flags & KMC_KMEM)

853

ptr = (void *)__get_free_pages(flags, get_order(size));

897

ptr = (void *)__get_free_pages(flags | __GFP_COMP,

898

get_order(size));

854

899

else

855

900

ptr = __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);

856

901

1333

1378

return;

1334

1379

1335

1380

atomic_inc(&skc->skc_ref);

1336

spl_on_each_cpu(spl_magazine_age, skc, 1);

1381

1382

if (!(skc->skc_flags & KMC_NOMAGAZINE))

1383

spl_on_each_cpu(spl_magazine_age, skc, 1);

1384

1337

1385

spl_slab_reclaim(skc, skc->skc_reap, 0);

1338

1386

1339

1387

while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {

1355

1403

1356

1404

1357

1405

* Size a slab based on the size of each aligned object plus spl_kmem_obj_t.

1358

* When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However,

1406

* When on-slab we want to target spl_kmem_cache_obj_per_slab. However,

1359

1407

* for very small objects we may end up with more than this so as not

1360

1408

* to waste space in the minimal allocation of a single page. Also for

1361

* very large objects we may use as few as SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN,

1409

* very large objects we may use as few as spl_kmem_cache_obj_per_slab_min,

1362

1410

* lower than this and we will fail.

1363

1411

1364

1412

static int

1367

1415

uint32_t sks_size, obj_size, max_size;

1368

1416

1369

1417

if (skc->skc_flags & KMC_OFFSLAB) {

1370

*objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;

1418

*objs = spl_kmem_cache_obj_per_slab;

1371

1419

*size = P2ROUNDUP(sizeof(spl_kmem_slab_t), PAGE_SIZE);

1372

1420

SRETURN(0);

1373

1421

} else {

1377

1425

if (skc->skc_flags & KMC_KMEM)

1378

1426

max_size = ((uint32_t)1 << (MAX_ORDER-3)) * PAGE_SIZE;

1379

1427

else

1380

max_size = (32 * 1024 * 1024);

1428

max_size = (spl_kmem_cache_max_size * 1024 * 1024);

1381

1429

1382

1430

/* Power of two sized slab */

1383

1431

for (*size = PAGE_SIZE; *size <= max_size; *size *= 2) {

1384

1432

*objs = (*size - sks_size) / obj_size;

1385

if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB)

1433

if (*objs >= spl_kmem_cache_obj_per_slab)

1386

1434

SRETURN(0);

1387

1435

}

1388

1436

1393

1441

1394

1442

*size = max_size;

1395

1443

*objs = (*size - sks_size) / obj_size;

1396

if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN)

1444

if (*objs >= (spl_kmem_cache_obj_per_slab_min))

1397

1445

SRETURN(0);

1398

1446

}

1399

1447

1478

1526

int i;

1479

1527

SENTRY;

1480

1528

1529

if (skc->skc_flags & KMC_NOMAGAZINE)

1530

SRETURN(0);

1531

1481

1532

skc->skc_mag_size = spl_magazine_size(skc);

1482

1533

skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;

1483

1534

1504

1555

int i;

1505

1556

SENTRY;

1506

1557

1558

if (skc->skc_flags & KMC_NOMAGAZINE) {

1559

SEXIT;

1560

return;

1561

}

1562

1507

1563

for_each_online_cpu(i) {

1508

1564

skm = skc->skc_mag[i];

1509

1565

spl_cache_flush(skc, skm, skm->skm_avail);

1526

1582

* flags

1527

1583

* KMC_NOTOUCH Disable cache object aging (unsupported)

1528

1584

* KMC_NODEBUG Disable debugging (unsupported)

1529

* KMC_NOMAGAZINE Disable magazine (unsupported)

1530

1585

* KMC_NOHASH Disable hashing (unsupported)

1531

1586

* KMC_QCACHE Disable qcache (unsupported)

1587

* KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab

1532

1588

* KMC_KMEM Force kmem backed cache

1533

1589

* KMC_VMEM Force vmem backed cache

1590

* KMC_SLAB Force Linux slab backed cache

1534

1591

* KMC_OFFSLAB Locate objects off the slab

1535

1592

1536

1593

spl_kmem_cache_t *

1576

1633

skc->skc_reclaim = reclaim;

1577

1634

skc->skc_private = priv;

1578

1635

skc->skc_vmp = vmp;

1636

skc->skc_linux_cache = NULL;

1579

1637

skc->skc_flags = flags;

1580

1638

skc->skc_obj_size = size;

1581

1639

skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;

1602

1660

skc->skc_obj_emergency = 0;

1603

1661

skc->skc_obj_emergency_max = 0;

1604

1662

1663

1664

* Verify the requested alignment restriction is sane.

1665

1605

1666

if (align) {

1606

1667

VERIFY(ISP2(align));

1607

VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */

1608

VERIFY3U(align, <=, PAGE_SIZE); /* Max alignment */

1668

VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);

1669

VERIFY3U(align, <=, PAGE_SIZE);

1609

1670

skc->skc_obj_align = align;

1610

1671

}

1611

1672

1612

/* If none passed select a cache type based on object size */

1613

if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {

1614

if (spl_obj_size(skc) < (PAGE_SIZE / 8))

1673

1674

* When no specific type of slab is requested (kmem, vmem, or

1675

* linuxslab) then select a cache type based on the object size

1676

* and default tunables.

1677

1678

if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {

1679

1680

1681

* Objects smaller than spl_kmem_cache_slab_limit can

1682

* use the Linux slab for better space-efficiency. By

1683

* default this functionality is disabled until its

1684

* performance characters are fully understood.

1685

1686

if (spl_kmem_cache_slab_limit &&

1687

size <= (size_t)spl_kmem_cache_slab_limit)

1688

skc->skc_flags |= KMC_SLAB;

1689

1690

1691

* Small objects, less than spl_kmem_cache_kmem_limit per

1692

* object should use kmem because their slabs are small.

1693

1694

else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit)

1615

1695

skc->skc_flags |= KMC_KMEM;

1696

1697

1698

* All other objects are considered large and are placed

1699

* on vmem backed slabs.

1700

1616

1701

else

1617

1702

skc->skc_flags |= KMC_VMEM;

1618

1703

}

1619

1704

1620

rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);

1621

if (rc)

1622

SGOTO(out, rc);

1623

1624

rc = spl_magazine_create(skc);

1625

if (rc)

1626

SGOTO(out, rc);

1705

1706

* Given the type of slab allocate the required resources.

1707

1708

if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {

1709

rc = spl_slab_size(skc,

1710

&skc->skc_slab_objs, &skc->skc_slab_size);

1711

if (rc)

1712

SGOTO(out, rc);

1713

1714

rc = spl_magazine_create(skc);

1715

if (rc)

1716

SGOTO(out, rc);

1717

} else {

1718

skc->skc_linux_cache = kmem_cache_create(

1719

skc->skc_name, size, align, 0, NULL);

1720

if (skc->skc_linux_cache == NULL)

1721

SGOTO(out, rc = ENOMEM);

1722

1723

kmem_cache_set_allocflags(skc, __GFP_COMP);

1724

skc->skc_flags |= KMC_NOMAGAZINE;

1725

}

1627

1726

1628

1727

if (spl_kmem_cache_expire & KMC_EXPIRE_AGE)

1629

1728

skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,

1665

1764

SENTRY;

1666

1765

1667

1766

ASSERT(skc->skc_magic == SKC_MAGIC);

1767

ASSERT(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB));

1668

1768

1669

1769

down_write(&spl_kmem_cache_sem);

1670

1770

list_del_init(&skc->skc_list);

1684

1784

* cache reaping action which races with this destroy. */

1685

1785

wait_event(wq, atomic_read(&skc->skc_ref) == 0);

1686

1786

1687

spl_magazine_destroy(skc);

1688

spl_slab_reclaim(skc, 0, 1);

1787

if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {

1788

spl_magazine_destroy(skc);

1789

spl_slab_reclaim(skc, 0, 1);

1790

} else {

1791

ASSERT(skc->skc_flags & KMC_SLAB);

1792

kmem_cache_destroy(skc->skc_linux_cache);

1793

}

1794

1689

1795

spin_lock(&skc->skc_lock);

1690

1796

1691

1797

/* Validate there are no objects in use and free all the

1791

1897

}

1792

1898

1793

1899

1794

* No available objects on any slabs, create a new slab.

1900

* No available objects on any slabs, create a new slab. Note that this

1901

* functionality is disabled for KMC_SLAB caches which are backed by the

1902

* Linux slab.

1795

1903

1796

1904

static int

1797

1905

spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)

1800

1908

SENTRY;

1801

1909

1802

1910

ASSERT(skc->skc_magic == SKC_MAGIC);

1911

ASSERT((skc->skc_flags & KMC_SLAB) == 0);

1803

1912

might_sleep();

1804

1913

*obj = NULL;

1805

1914

1995

2104

spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)

1996

2105

{

1997

2106

spl_kmem_magazine_t *skm;

1998

unsigned long irq_flags;

1999

2107

void *obj = NULL;

2000

2108

SENTRY;

2001

2109

2002

2110

ASSERT(skc->skc_magic == SKC_MAGIC);

2003

2111

ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));

2004

2112

ASSERT(flags & KM_SLEEP);

2113

2005

2114

atomic_inc(&skc->skc_ref);

2006

local_irq_save(irq_flags);

2115

2116

2117

* Allocate directly from a Linux slab. All optimizations are left

2118

* to the underlying cache we only need to guarantee that KM_SLEEP

2119

* callers will never fail.

2120

2121

if (skc->skc_flags & KMC_SLAB) {

2122

struct kmem_cache *slc = skc->skc_linux_cache;

2123

2124

do {

2125

obj = kmem_cache_alloc(slc, flags | __GFP_COMP);

2126

if (obj && skc->skc_ctor)

2127

skc->skc_ctor(obj, skc->skc_private, flags);

2128

2129

} while ((obj == NULL) && !(flags & KM_NOSLEEP));

2130

2131

atomic_dec(&skc->skc_ref);

2132

SRETURN(obj);

2133

}

2134

2135

local_irq_disable();

2007

2136

2008

2137

restart:

2009

2138

/* Safe to update per-cpu structure without lock, but

2025

2154

SGOTO(restart, obj = NULL);

2026

2155

}

2027

2156

2028

local_irq_restore(irq_flags);

2157

local_irq_enable();

2029

2158

ASSERT(obj);

2030

2159

ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));

2031

2160

2055

2184

atomic_inc(&skc->skc_ref);

2056

2185

2057

2186

2187

* Free the object from the Linux underlying Linux slab.

2188

2189

if (skc->skc_flags & KMC_SLAB) {

2190

if (skc->skc_dtor)

2191

skc->skc_dtor(obj, skc->skc_private);

2192

2193

kmem_cache_free(skc->skc_linux_cache, obj);

2194

goto out;

2195

}

2196

2197

2058

2198

* Only virtual slabs may have emergency objects and these objects

2059

2199

* are guaranteed to have physical addresses. They must be removed

2060

2200

* from the tree of emergency objects and the freed.

2105

2245

struct shrink_control *sc)

2106

2246

{

2107

2247

spl_kmem_cache_t *skc;

2108

int unused = 0;

2248

int alloc = 0;

2109

2249

2110

2250

down_read(&spl_kmem_cache_sem);

2111

2251

list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {

2114

2254

MAX(sc->nr_to_scan >> fls64(skc->skc_slab_objs), 1));

2115

2255

2116

2256

2117

* Presume everything alloc'ed in reclaimable, this ensures

2257

* Presume everything alloc'ed is reclaimable, this ensures

2118

2258

* we are called again with nr_to_scan > 0 so can try and

2119

2259

* reclaim. The exact number is not important either so

2120

2260

* we forgo taking this already highly contented lock.

2121

2261

2122

unused += skc->skc_obj_alloc;

2262

alloc += skc->skc_obj_alloc;

2123

2263

}

2124

2264

up_read(&spl_kmem_cache_sem);

2125

2265

2126

2266

2127

* After performing reclaim always return -1 to indicate we cannot

2128

* perform additional reclaim. This prevents shrink_slabs() from

2129

* repeatedly invoking this generic shrinker and potentially spinning.

2267

* When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.

2268

* This functionality only exists to work around a rare issue where

2269

* shrink_slabs() is repeatedly invoked by many cores causing the

2270

* system to thrash.

2130

2271

2131

if (sc->nr_to_scan)

2132

return -1;

2272

if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)

2273

return (-1);

2133

2274

2134

return unused;

2275

return MAX((alloc * sysctl_vfs_cache_pressure) / 100, 0);

2135

2276

}

2136

2277

2137

2278

SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);

2152

2293

ASSERT(skc->skc_magic == SKC_MAGIC);

2153

2294

ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));

2154

2295

2155

/* Prevent concurrent cache reaping when contended */

2156

if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {

2157

SEXIT;

2158

return;

2296

atomic_inc(&skc->skc_ref);

2297

2298

2299

* Execute the registered reclaim callback if it exists. The

2300

* per-cpu caches will be drained when is set KMC_EXPIRE_MEM.

2301

2302

if (skc->skc_flags & KMC_SLAB) {

2303

if (skc->skc_reclaim)

2304

skc->skc_reclaim(skc->skc_private);

2305

2306

if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)

2307

kmem_cache_shrink(skc->skc_linux_cache);

2308

2309

SGOTO(out, 0);

2159

2310

}

2160

2311

2161

atomic_inc(&skc->skc_ref);

2312

2313

* Prevent concurrent cache reaping when contended.

2314

2315

if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))

2316

SGOTO(out, 0);

2162

2317

2163

2318

2164

2319

* When a reclaim function is available it may be invoked repeatedly

2208

2363

clear_bit(KMC_BIT_REAPING, &skc->skc_flags);

2209

2364

smp_mb__after_clear_bit();

2210

2365

wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);

2211

2366

out:

2212

2367

atomic_dec(&skc->skc_ref);

2213

2368

2214

2369

SEXIT;

Older »