3
* Copyright (C) 2011 Citrix Systems Inc.
5
* This file is part of Blktap2.
7
* Blktap2 is free software: you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License version 2
9
* as published by the Free Software Foundation.
11
* Blktap2 is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License version 2 for more details.
16
* You should have received a copy of the GNU General Public License
17
* version 2 along with Blktap2. If not, see
18
* <http://www.gnu.org/licenses/>.
23
#include <linux/mempool.h>
24
#include <linux/spinlock.h>
25
#include <linux/mutex.h>
26
#include <linux/sched.h>
27
#include <linux/device.h>
28
#include <linux/slab.h>
32
/* max pages per shared pool. just to prevent accidental dos. */
33
#define POOL_MAX_PAGES (256*BLKTAP_SEGMENT_MAX)
35
/* default page pool size. when considering to shrink a shared pool,
36
* note that paused tapdisks may grab a whole lot of pages for a long
38
#define POOL_DEFAULT_PAGES (2 * BLKTAP_RING_SIZE * BLKTAP_SEGMENT_MAX)
40
/* max number of pages allocatable per request. */
41
#define POOL_MAX_REQUEST_PAGES BLKTAP_SEGMENT_MAX
43
/* min request structs per pool. These grow dynamically. */
44
#define POOL_MIN_REQS BLKTAP_RING_SIZE
46
static struct kset *pool_set;
48
#define kobj_to_pool(_kobj) \
49
container_of(_kobj, struct blktap_page_pool, kobj)
51
static struct kmem_cache *request_cache;
52
static mempool_t *request_pool;
55
__page_pool_wake(struct blktap_page_pool *pool)
57
mempool_t *mem = pool->bufs;
60
NB. slightly wasteful to always wait for a full segment
61
set. but this ensures the next disk makes
62
progress. presently, the repeated request struct
63
alloc/release cycles would otherwise keep everyone spinning.
66
if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
71
blktap_request_get_pages(struct blktap *tap,
72
struct blktap_request *request, int nr_pages)
74
struct blktap_page_pool *pool = tap->pool;
75
mempool_t *mem = pool->bufs;
78
BUG_ON(request->nr_pages != 0);
79
BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
81
if (mem->curr_nr < nr_pages)
84
/* NB. avoid thundering herds of tapdisks colliding. */
85
spin_lock(&pool->lock);
87
if (mem->curr_nr < nr_pages) {
88
spin_unlock(&pool->lock);
92
while (request->nr_pages < nr_pages) {
93
page = mempool_alloc(mem, GFP_NOWAIT);
95
request->pages[request->nr_pages++] = page;
98
spin_unlock(&pool->lock);
104
blktap_request_put_pages(struct blktap *tap,
105
struct blktap_request *request)
107
struct blktap_page_pool *pool = tap->pool;
110
while (request->nr_pages) {
111
page = request->pages[--request->nr_pages];
112
mempool_free(page, pool->bufs);
117
blktap_request_debug(struct blktap *tap, char *buf, size_t size)
119
struct blktap_page_pool *pool = tap->pool;
120
mempool_t *mem = pool->bufs;
121
char *s = buf, *end = buf + size;
123
s += snprintf(buf, end - s,
124
"pool:%s pages:%d free:%d\n",
125
kobject_name(&pool->kobj),
126
mem->min_nr, mem->curr_nr);
131
struct blktap_request*
132
blktap_request_alloc(struct blktap *tap)
134
struct blktap_request *request;
136
request = mempool_alloc(request_pool, GFP_NOWAIT);
144
blktap_request_free(struct blktap *tap,
145
struct blktap_request *request)
147
blktap_request_put_pages(tap, request);
149
mempool_free(request, request_pool);
151
__page_pool_wake(tap->pool);
155
blktap_request_bounce_seg(struct blktap *tap,
156
struct blktap_request *request,
159
struct scatterlist *sg = &request->sg_table[seg];
162
BUG_ON(seg >= request->nr_pages);
165
p = page_address(request->pages[seg]) + sg->offset;
168
memcpy(p, s, sg->length);
170
memcpy(s, p, sg->length);
174
blktap_request_bounce(struct blktap *tap,
175
struct blktap_request *request,
180
for (seg = 0; seg < request->nr_pages; seg++)
181
blktap_request_bounce_seg(tap, request, seg, write);
185
blktap_request_ctor(void *obj)
187
struct blktap_request *request = obj;
189
memset(request, 0, sizeof(*request));
190
sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
194
blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
196
mempool_t *bufs = pool->bufs;
199
/* NB. mempool asserts min_nr >= 1 */
200
target = max(1, target);
202
err = mempool_resize(bufs, target, GFP_KERNEL);
206
__page_pool_wake(pool);
211
struct pool_attribute {
212
struct attribute attr;
214
ssize_t (*show)(struct blktap_page_pool *pool,
217
ssize_t (*store)(struct blktap_page_pool *pool,
218
const char *buf, size_t count);
221
#define kattr_to_pool_attr(_kattr) \
222
container_of(_kattr, struct pool_attribute, attr)
225
blktap_page_pool_show_size(struct blktap_page_pool *pool,
228
mempool_t *mem = pool->bufs;
229
return sprintf(buf, "%d", mem->min_nr);
233
blktap_page_pool_store_size(struct blktap_page_pool *pool,
234
const char *buf, size_t size)
239
* NB. target fixup to avoid undesired results. less than a
240
* full segment set can wedge the disk. much more than a
241
* couple times the physical queue depth is rarely useful.
244
target = simple_strtoul(buf, NULL, 0);
245
target = max(POOL_MAX_REQUEST_PAGES, target);
246
target = min(target, POOL_MAX_PAGES);
248
return blktap_page_pool_resize(pool, target) ? : size;
251
static struct pool_attribute blktap_page_pool_attr_size =
252
__ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
253
blktap_page_pool_show_size,
254
blktap_page_pool_store_size);
257
blktap_page_pool_show_free(struct blktap_page_pool *pool,
260
mempool_t *mem = pool->bufs;
261
return sprintf(buf, "%d", mem->curr_nr);
264
static struct pool_attribute blktap_page_pool_attr_free =
265
__ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
266
blktap_page_pool_show_free,
269
static struct attribute *blktap_page_pool_attrs[] = {
270
&blktap_page_pool_attr_size.attr,
271
&blktap_page_pool_attr_free.attr,
275
static inline struct kobject*
276
__blktap_kset_find_obj(struct kset *kset, const char *name)
279
struct kobject *ret = NULL;
281
spin_lock(&kset->list_lock);
282
list_for_each_entry(k, &kset->list, entry) {
283
if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
284
ret = kobject_get(k);
288
spin_unlock(&kset->list_lock);
293
blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
296
struct blktap_page_pool *pool = kobj_to_pool(kobj);
297
struct pool_attribute *attr = kattr_to_pool_attr(kattr);
300
return attr->show(pool, buf);
306
blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
307
const char *buf, size_t size)
309
struct blktap_page_pool *pool = kobj_to_pool(kobj);
310
struct pool_attribute *attr = kattr_to_pool_attr(kattr);
313
return attr->store(pool, buf, size);
318
static struct sysfs_ops blktap_page_pool_sysfs_ops = {
319
.show = blktap_page_pool_show_attr,
320
.store = blktap_page_pool_store_attr,
324
blktap_page_pool_release(struct kobject *kobj)
326
struct blktap_page_pool *pool = kobj_to_pool(kobj);
327
mempool_destroy(pool->bufs);
331
struct kobj_type blktap_page_pool_ktype = {
332
.release = blktap_page_pool_release,
333
.sysfs_ops = &blktap_page_pool_sysfs_ops,
334
.default_attrs = blktap_page_pool_attrs,
338
__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
342
if (!(gfp_mask & __GFP_WAIT))
345
page = alloc_page(gfp_mask);
347
SetPageReserved(page);
353
__mempool_page_free(void *element, void *pool_data)
355
struct page *page = element;
357
ClearPageReserved(page);
361
static struct kobject*
362
blktap_page_pool_create(const char *name, int nr_pages)
364
struct blktap_page_pool *pool;
367
pool = kzalloc(sizeof(*pool), GFP_KERNEL);
371
spin_lock_init(&pool->lock);
372
init_waitqueue_head(&pool->wait);
374
pool->bufs = mempool_create(nr_pages,
375
__mempool_page_alloc, __mempool_page_free,
380
kobject_init(&pool->kobj, &blktap_page_pool_ktype);
381
pool->kobj.kset = pool_set;
382
err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
388
kobject_del(&pool->kobj);
390
mempool_destroy(pool->bufs);
397
struct blktap_page_pool*
398
blktap_page_pool_get(const char *name)
400
struct kobject *kobj;
402
kobj = __blktap_kset_find_obj(pool_set, name);
404
kobj = blktap_page_pool_create(name,
407
return ERR_PTR(-ENOMEM);
409
return kobj_to_pool(kobj);
413
blktap_page_pool_init(struct kobject *parent)
416
kmem_cache_create("blktap-request",
417
sizeof(struct blktap_request), 0,
418
0, blktap_request_ctor);
423
mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
427
pool_set = kset_create_and_add("pools", NULL, parent);
435
blktap_page_pool_exit(void)
438
BUG_ON(!list_empty(&pool_set->list));
439
kset_unregister(pool_set);
444
mempool_destroy(request_pool);
449
kmem_cache_destroy(request_cache);
450
request_cache = NULL;