34
34
#define SS_DEBUG_SUBSYS SS_KMEM
37
* Within the scope of spl-kmem.c file the kmem_cache_* definitions
38
* are removed to allow access to the real Linux slab allocator.
40
#undef kmem_cache_destroy
41
#undef kmem_cache_create
42
#undef kmem_cache_alloc
43
#undef kmem_cache_free
37
47
* Cache expiration was implemented because it was part of the default Solaris
38
48
* kmem_cache behavior. The idea is that per-cpu objects which haven't been
39
49
* accessed in several seconds should be returned to the cache. On the other
40
50
* hand Linux slabs never move objects back to the slabs unless there is
41
* memory pressure on the system. By default both methods are disabled, but
42
* may be enabled by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
51
* memory pressure on the system. By default the Linux method is enabled
52
* because it has been shown to improve responsiveness on low memory systems.
53
* This policy may be changed by setting KMC_EXPIRE_AGE or KMC_EXPIRE_MEM.
44
unsigned int spl_kmem_cache_expire = 0;
55
unsigned int spl_kmem_cache_expire = KMC_EXPIRE_MEM;
45
56
EXPORT_SYMBOL(spl_kmem_cache_expire);
46
57
module_param(spl_kmem_cache_expire, uint, 0644);
47
58
MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
61
* KMC_RECLAIM_ONCE is set as the default until zfsonlinux/spl#268 is
62
* definitively resolved. Depending on the system configuration and
63
* workload this may increase the likelihood of out of memory events.
64
* For those cases it is advised that this option be set to zero.
66
unsigned int spl_kmem_cache_reclaim = KMC_RECLAIM_ONCE;
67
module_param(spl_kmem_cache_reclaim, uint, 0644);
68
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
70
unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
71
module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
72
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
74
unsigned int spl_kmem_cache_obj_per_slab_min = SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN;
75
module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644);
76
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min,
77
"Minimal number of objects per slab");
79
unsigned int spl_kmem_cache_max_size = 32;
80
module_param(spl_kmem_cache_max_size, uint, 0644);
81
MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
83
unsigned int spl_kmem_cache_slab_limit = 0;
84
module_param(spl_kmem_cache_slab_limit, uint, 0644);
85
MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
86
"Objects less than N bytes use the Linux slab");
88
unsigned int spl_kmem_cache_kmem_limit = (PAGE_SIZE / 4);
89
module_param(spl_kmem_cache_kmem_limit, uint, 0644);
90
MODULE_PARM_DESC(spl_kmem_cache_kmem_limit,
91
"Objects less than N bytes use the kmalloc");
50
94
* The minimum amount of memory measured in pages to be free at all
51
95
* times on the system. This is similar to Linux's zone->pages_min
52
96
* multiplied by the number of zones and is sized based on that.
1335
1380
atomic_inc(&skc->skc_ref);
1336
spl_on_each_cpu(spl_magazine_age, skc, 1);
1382
if (!(skc->skc_flags & KMC_NOMAGAZINE))
1383
spl_on_each_cpu(spl_magazine_age, skc, 1);
1337
1385
spl_slab_reclaim(skc, skc->skc_reap, 0);
1339
1387
while (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags) && !id) {
1357
1405
* Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
1358
* When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However,
1406
* When on-slab we want to target spl_kmem_cache_obj_per_slab. However,
1359
1407
* for very small objects we may end up with more than this so as not
1360
1408
* to waste space in the minimal allocation of a single page. Also for
1361
* very large objects we may use as few as SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN,
1409
* very large objects we may use as few as spl_kmem_cache_obj_per_slab_min,
1362
1410
* lower than this and we will fail.
1377
1425
if (skc->skc_flags & KMC_KMEM)
1378
1426
max_size = ((uint32_t)1 << (MAX_ORDER-3)) * PAGE_SIZE;
1380
max_size = (32 * 1024 * 1024);
1428
max_size = (spl_kmem_cache_max_size * 1024 * 1024);
1382
1430
/* Power of two sized slab */
1383
1431
for (*size = PAGE_SIZE; *size <= max_size; *size *= 2) {
1384
1432
*objs = (*size - sks_size) / obj_size;
1385
if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB)
1433
if (*objs >= spl_kmem_cache_obj_per_slab)
1527
1583
* KMC_NOTOUCH Disable cache object aging (unsupported)
1528
1584
* KMC_NODEBUG Disable debugging (unsupported)
1529
* KMC_NOMAGAZINE Disable magazine (unsupported)
1530
1585
* KMC_NOHASH Disable hashing (unsupported)
1531
1586
* KMC_QCACHE Disable qcache (unsupported)
1587
* KMC_NOMAGAZINE Enabled for kmem/vmem, Disabled for Linux slab
1532
1588
* KMC_KMEM Force kmem backed cache
1533
1589
* KMC_VMEM Force vmem backed cache
1590
* KMC_SLAB Force Linux slab backed cache
1534
1591
* KMC_OFFSLAB Locate objects off the slab
1536
1593
spl_kmem_cache_t *
1602
1660
skc->skc_obj_emergency = 0;
1603
1661
skc->skc_obj_emergency_max = 0;
1664
* Verify the requested alignment restriction is sane.
1606
1667
VERIFY(ISP2(align));
1607
VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN); /* Min alignment */
1608
VERIFY3U(align, <=, PAGE_SIZE); /* Max alignment */
1668
VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
1669
VERIFY3U(align, <=, PAGE_SIZE);
1609
1670
skc->skc_obj_align = align;
1612
/* If none passed select a cache type based on object size */
1613
if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
1614
if (spl_obj_size(skc) < (PAGE_SIZE / 8))
1674
* When no specific type of slab is requested (kmem, vmem, or
1675
* linuxslab) then select a cache type based on the object size
1676
* and default tunables.
1678
if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM | KMC_SLAB))) {
1681
* Objects smaller than spl_kmem_cache_slab_limit can
1682
* use the Linux slab for better space-efficiency. By
1683
* default this functionality is disabled until its
1684
* performance characters are fully understood.
1686
if (spl_kmem_cache_slab_limit &&
1687
size <= (size_t)spl_kmem_cache_slab_limit)
1688
skc->skc_flags |= KMC_SLAB;
1691
* Small objects, less than spl_kmem_cache_kmem_limit per
1692
* object should use kmem because their slabs are small.
1694
else if (spl_obj_size(skc) <= spl_kmem_cache_kmem_limit)
1615
1695
skc->skc_flags |= KMC_KMEM;
1698
* All other objects are considered large and are placed
1699
* on vmem backed slabs.
1617
1702
skc->skc_flags |= KMC_VMEM;
1620
rc = spl_slab_size(skc, &skc->skc_slab_objs, &skc->skc_slab_size);
1624
rc = spl_magazine_create(skc);
1706
* Given the type of slab allocate the required resources.
1708
if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
1709
rc = spl_slab_size(skc,
1710
&skc->skc_slab_objs, &skc->skc_slab_size);
1714
rc = spl_magazine_create(skc);
1718
skc->skc_linux_cache = kmem_cache_create(
1719
skc->skc_name, size, align, 0, NULL);
1720
if (skc->skc_linux_cache == NULL)
1721
SGOTO(out, rc = ENOMEM);
1723
kmem_cache_set_allocflags(skc, __GFP_COMP);
1724
skc->skc_flags |= KMC_NOMAGAZINE;
1628
1727
if (spl_kmem_cache_expire & KMC_EXPIRE_AGE)
1629
1728
skc->skc_taskqid = taskq_dispatch_delay(spl_kmem_cache_taskq,
1684
1784
* cache reaping action which races with this destroy. */
1685
1785
wait_event(wq, atomic_read(&skc->skc_ref) == 0);
1687
spl_magazine_destroy(skc);
1688
spl_slab_reclaim(skc, 0, 1);
1787
if (skc->skc_flags & (KMC_KMEM | KMC_VMEM)) {
1788
spl_magazine_destroy(skc);
1789
spl_slab_reclaim(skc, 0, 1);
1791
ASSERT(skc->skc_flags & KMC_SLAB);
1792
kmem_cache_destroy(skc->skc_linux_cache);
1689
1795
spin_lock(&skc->skc_lock);
1691
1797
/* Validate there are no objects in use and free all the
1995
2104
spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
1997
2106
spl_kmem_magazine_t *skm;
1998
unsigned long irq_flags;
1999
2107
void *obj = NULL;
2002
2110
ASSERT(skc->skc_magic == SKC_MAGIC);
2003
2111
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
2004
2112
ASSERT(flags & KM_SLEEP);
2005
2114
atomic_inc(&skc->skc_ref);
2006
local_irq_save(irq_flags);
2117
* Allocate directly from a Linux slab. All optimizations are left
2118
* to the underlying cache we only need to guarantee that KM_SLEEP
2119
* callers will never fail.
2121
if (skc->skc_flags & KMC_SLAB) {
2122
struct kmem_cache *slc = skc->skc_linux_cache;
2125
obj = kmem_cache_alloc(slc, flags | __GFP_COMP);
2126
if (obj && skc->skc_ctor)
2127
skc->skc_ctor(obj, skc->skc_private, flags);
2129
} while ((obj == NULL) && !(flags & KM_NOSLEEP));
2131
atomic_dec(&skc->skc_ref);
2135
local_irq_disable();
2009
2138
/* Safe to update per-cpu structure without lock, but
2055
2184
atomic_inc(&skc->skc_ref);
2187
* Free the object from the Linux underlying Linux slab.
2189
if (skc->skc_flags & KMC_SLAB) {
2191
skc->skc_dtor(obj, skc->skc_private);
2193
kmem_cache_free(skc->skc_linux_cache, obj);
2058
2198
* Only virtual slabs may have emergency objects and these objects
2059
2199
* are guaranteed to have physical addresses. They must be removed
2060
2200
* from the tree of emergency objects and the freed.
2114
2254
MAX(sc->nr_to_scan >> fls64(skc->skc_slab_objs), 1));
2117
* Presume everything alloc'ed in reclaimable, this ensures
2257
* Presume everything alloc'ed is reclaimable, this ensures
2118
2258
* we are called again with nr_to_scan > 0 so can try and
2119
2259
* reclaim. The exact number is not important either so
2120
2260
* we forgo taking this already highly contented lock.
2122
unused += skc->skc_obj_alloc;
2262
alloc += skc->skc_obj_alloc;
2124
2264
up_read(&spl_kmem_cache_sem);
2127
* After performing reclaim always return -1 to indicate we cannot
2128
* perform additional reclaim. This prevents shrink_slabs() from
2129
* repeatedly invoking this generic shrinker and potentially spinning.
2267
* When KMC_RECLAIM_ONCE is set allow only a single reclaim pass.
2268
* This functionality only exists to work around a rare issue where
2269
* shrink_slabs() is repeatedly invoked by many cores causing the
2272
if ((spl_kmem_cache_reclaim & KMC_RECLAIM_ONCE) && sc->nr_to_scan)
2275
return MAX((alloc * sysctl_vfs_cache_pressure) / 100, 0);
2137
2278
SPL_SHRINKER_CALLBACK_WRAPPER(spl_kmem_cache_generic_shrinker);
2152
2293
ASSERT(skc->skc_magic == SKC_MAGIC);
2153
2294
ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
2155
/* Prevent concurrent cache reaping when contended */
2156
if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
2296
atomic_inc(&skc->skc_ref);
2299
* Execute the registered reclaim callback if it exists. The
2300
* per-cpu caches will be drained when is set KMC_EXPIRE_MEM.
2302
if (skc->skc_flags & KMC_SLAB) {
2303
if (skc->skc_reclaim)
2304
skc->skc_reclaim(skc->skc_private);
2306
if (spl_kmem_cache_expire & KMC_EXPIRE_MEM)
2307
kmem_cache_shrink(skc->skc_linux_cache);
2161
atomic_inc(&skc->skc_ref);
2313
* Prevent concurrent cache reaping when contended.
2315
if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
2164
2319
* When a reclaim function is available it may be invoked repeatedly