~ubuntu-branches/ubuntu/precise/linux-ti-omap4/precise

« back to all changes in this revision

Viewing changes to mm/page_cgroup.c

  • Committer: Bazaar Package Importer
  • Author(s): Paolo Pisati
  • Date: 2011-06-29 15:23:51 UTC
  • mfrom: (26.1.1 natty-proposed)
  • Revision ID: james.westby@ubuntu.com-20110629152351-xs96tm303d95rpbk
Tags: 3.0.0-1200.2
* Rebased against 3.0.0-6.7
* BSP from TI based on 3.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
11
11
#include <linux/swapops.h>
12
12
#include <linux/kmemleak.h>
13
13
 
14
 
static void __meminit
15
 
__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
 
14
static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
16
15
{
17
16
        pc->flags = 0;
 
17
        set_page_cgroup_array_id(pc, id);
18
18
        pc->mem_cgroup = NULL;
19
 
        pc->page = pfn_to_page(pfn);
20
19
        INIT_LIST_HEAD(&pc->lru);
21
20
}
22
21
static unsigned long total_usage;
43
42
        return base + offset;
44
43
}
45
44
 
 
45
struct page *lookup_cgroup_page(struct page_cgroup *pc)
 
46
{
 
47
        unsigned long pfn;
 
48
        struct page *page;
 
49
        pg_data_t *pgdat;
 
50
 
 
51
        pgdat = NODE_DATA(page_cgroup_array_id(pc));
 
52
        pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
 
53
        page = pfn_to_page(pfn);
 
54
        VM_BUG_ON(pc != lookup_page_cgroup(page));
 
55
        return page;
 
56
}
 
57
 
46
58
static int __init alloc_node_page_cgroup(int nid)
47
59
{
48
60
        struct page_cgroup *base, *pc;
63
75
                return -ENOMEM;
64
76
        for (index = 0; index < nr_pages; index++) {
65
77
                pc = base + index;
66
 
                __init_page_cgroup(pc, start_pfn + index);
 
78
                init_page_cgroup(pc, nid);
67
79
        }
68
80
        NODE_DATA(nid)->node_page_cgroup = base;
69
81
        total_usage += table_size;
105
117
        return section->page_cgroup + pfn;
106
118
}
107
119
 
108
 
/* __alloc_bootmem...() is protected by !slab_available() */
109
 
static int __init_refok init_section_page_cgroup(unsigned long pfn)
110
 
{
111
 
        struct mem_section *section = __pfn_to_section(pfn);
 
120
struct page *lookup_cgroup_page(struct page_cgroup *pc)
 
121
{
 
122
        struct mem_section *section;
 
123
        struct page *page;
 
124
        unsigned long nr;
 
125
 
 
126
        nr = page_cgroup_array_id(pc);
 
127
        section = __nr_to_section(nr);
 
128
        page = pfn_to_page(pc - section->page_cgroup);
 
129
        VM_BUG_ON(pc != lookup_page_cgroup(page));
 
130
        return page;
 
131
}
 
132
 
 
133
static void *__meminit alloc_page_cgroup(size_t size, int nid)
 
134
{
 
135
        void *addr = NULL;
 
136
 
 
137
        addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
 
138
        if (addr)
 
139
                return addr;
 
140
 
 
141
        if (node_state(nid, N_HIGH_MEMORY))
 
142
                addr = vmalloc_node(size, nid);
 
143
        else
 
144
                addr = vmalloc(size);
 
145
 
 
146
        return addr;
 
147
}
 
148
 
 
149
#ifdef CONFIG_MEMORY_HOTPLUG
 
150
static void free_page_cgroup(void *addr)
 
151
{
 
152
        if (is_vmalloc_addr(addr)) {
 
153
                vfree(addr);
 
154
        } else {
 
155
                struct page *page = virt_to_page(addr);
 
156
                size_t table_size =
 
157
                        sizeof(struct page_cgroup) * PAGES_PER_SECTION;
 
158
 
 
159
                BUG_ON(PageReserved(page));
 
160
                free_pages_exact(addr, table_size);
 
161
        }
 
162
}
 
163
#endif
 
164
 
 
165
static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 
166
{
112
167
        struct page_cgroup *base, *pc;
 
168
        struct mem_section *section;
113
169
        unsigned long table_size;
114
 
        int nid, index;
115
 
 
116
 
        if (!section->page_cgroup) {
117
 
                nid = page_to_nid(pfn_to_page(pfn));
118
 
                table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
119
 
                VM_BUG_ON(!slab_is_available());
120
 
                if (node_state(nid, N_HIGH_MEMORY)) {
121
 
                        base = kmalloc_node(table_size,
122
 
                                GFP_KERNEL | __GFP_NOWARN, nid);
123
 
                        if (!base)
124
 
                                base = vmalloc_node(table_size, nid);
125
 
                } else {
126
 
                        base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN);
127
 
                        if (!base)
128
 
                                base = vmalloc(table_size);
129
 
                }
130
 
                /*
131
 
                 * The value stored in section->page_cgroup is (base - pfn)
132
 
                 * and it does not point to the memory block allocated above,
133
 
                 * causing kmemleak false positives.
134
 
                 */
135
 
                kmemleak_not_leak(base);
136
 
        } else {
137
 
                /*
138
 
                 * We don't have to allocate page_cgroup again, but
139
 
                 * address of memmap may be changed. So, we have to initialize
140
 
                 * again.
141
 
                 */
142
 
                base = section->page_cgroup + pfn;
143
 
                table_size = 0;
144
 
                /* check address of memmap is changed or not. */
145
 
                if (base->page == pfn_to_page(pfn))
146
 
                        return 0;
147
 
        }
 
170
        unsigned long nr;
 
171
        int index;
 
172
 
 
173
        nr = pfn_to_section_nr(pfn);
 
174
        section = __nr_to_section(nr);
 
175
 
 
176
        if (section->page_cgroup)
 
177
                return 0;
 
178
 
 
179
        table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
 
180
        base = alloc_page_cgroup(table_size, nid);
 
181
 
 
182
        /*
 
183
         * The value stored in section->page_cgroup is (base - pfn)
 
184
         * and it does not point to the memory block allocated above,
 
185
         * causing kmemleak false positives.
 
186
         */
 
187
        kmemleak_not_leak(base);
148
188
 
149
189
        if (!base) {
150
190
                printk(KERN_ERR "page cgroup allocation failure\n");
153
193
 
154
194
        for (index = 0; index < PAGES_PER_SECTION; index++) {
155
195
                pc = base + index;
156
 
                __init_page_cgroup(pc, pfn + index);
 
196
                init_page_cgroup(pc, nr);
157
197
        }
158
 
 
 
198
        /*
 
199
         * The passed "pfn" may not be aligned to SECTION.  For the calculation
 
200
         * we need to apply a mask.
 
201
         */
 
202
        pfn &= PAGE_SECTION_MASK;
159
203
        section->page_cgroup = base - pfn;
160
204
        total_usage += table_size;
161
205
        return 0;
170
214
        if (!ms || !ms->page_cgroup)
171
215
                return;
172
216
        base = ms->page_cgroup + pfn;
173
 
        if (is_vmalloc_addr(base)) {
174
 
                vfree(base);
175
 
                ms->page_cgroup = NULL;
176
 
        } else {
177
 
                struct page *page = virt_to_page(base);
178
 
                if (!PageReserved(page)) { /* Is bootmem ? */
179
 
                        kfree(base);
180
 
                        ms->page_cgroup = NULL;
181
 
                }
182
 
        }
 
217
        free_page_cgroup(base);
 
218
        ms->page_cgroup = NULL;
183
219
}
184
220
 
185
221
int __meminit online_page_cgroup(unsigned long start_pfn,
192
228
        start = start_pfn & ~(PAGES_PER_SECTION - 1);
193
229
        end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
194
230
 
 
231
        if (nid == -1) {
 
232
                /*
 
233
                 * In this case, "nid" already exists and contains valid memory.
 
234
                 * "start_pfn" passed to us is a pfn which is an arg for
 
235
                 * online__pages(), and start_pfn should exist.
 
236
                 */
 
237
                nid = pfn_to_nid(start_pfn);
 
238
                VM_BUG_ON(!node_state(nid, N_ONLINE));
 
239
        }
 
240
 
195
241
        for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
196
242
                if (!pfn_present(pfn))
197
243
                        continue;
198
 
                fail = init_section_page_cgroup(pfn);
 
244
                fail = init_section_page_cgroup(pfn, nid);
199
245
        }
200
246
        if (!fail)
201
247
                return 0;
243
289
                break;
244
290
        }
245
291
 
246
 
        if (ret)
247
 
                ret = notifier_from_errno(ret);
248
 
        else
249
 
                ret = NOTIFY_OK;
250
 
 
251
 
        return ret;
 
292
        return notifier_from_errno(ret);
252
293
}
253
294
 
254
295
#endif
256
297
void __init page_cgroup_init(void)
257
298
{
258
299
        unsigned long pfn;
259
 
        int fail = 0;
 
300
        int nid;
260
301
 
261
302
        if (mem_cgroup_disabled())
262
303
                return;
263
304
 
264
 
        for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
265
 
                if (!pfn_present(pfn))
266
 
                        continue;
267
 
                fail = init_section_page_cgroup(pfn);
268
 
        }
269
 
        if (fail) {
270
 
                printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
271
 
                panic("Out of memory");
272
 
        } else {
273
 
                hotplug_memory_notifier(page_cgroup_callback, 0);
274
 
        }
 
305
        for_each_node_state(nid, N_HIGH_MEMORY) {
 
306
                unsigned long start_pfn, end_pfn;
 
307
 
 
308
                start_pfn = node_start_pfn(nid);
 
309
                end_pfn = node_end_pfn(nid);
 
310
                /*
 
311
                 * start_pfn and end_pfn may not be aligned to SECTION and the
 
312
                 * page->flags of out of node pages are not initialized.  So we
 
313
                 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
 
314
                 */
 
315
                for (pfn = start_pfn;
 
316
                     pfn < end_pfn;
 
317
                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
 
318
 
 
319
                        if (!pfn_valid(pfn))
 
320
                                continue;
 
321
                        /*
 
322
                         * Nodes's pfns can be overlapping.
 
323
                         * We know some arch can have a nodes layout such as
 
324
                         * -------------pfn-------------->
 
325
                         * N0 | N1 | N2 | N0 | N1 | N2|....
 
326
                         */
 
327
                        if (pfn_to_nid(pfn) != nid)
 
328
                                continue;
 
329
                        if (init_section_page_cgroup(pfn, nid))
 
330
                                goto oom;
 
331
                }
 
332
        }
 
333
        hotplug_memory_notifier(page_cgroup_callback, 0);
275
334
        printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
276
 
        printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't"
277
 
        " want memory cgroups\n");
 
335
        printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
 
336
                         "don't want memory cgroups\n");
 
337
        return;
 
338
oom:
 
339
        printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
 
340
        panic("Out of memory");
278
341
}
279
342
 
280
343
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
349
412
 * @new: new id
350
413
 *
351
414
 * Returns old id at success, 0 at failure.
352
 
 * (There is no mem_cgroup useing 0 as its id)
 
415
 * (There is no mem_cgroup using 0 as its id)
353
416
 */
354
417
unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
355
418
                                        unsigned short old, unsigned short new)
447
510
        if (!do_swap_account)
448
511
                return 0;
449
512
 
450
 
        length = ((max_pages/SC_PER_PAGE) + 1);
 
513
        length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
451
514
        array_size = length * sizeof(void *);
452
515
 
453
516
        array = vmalloc(array_size);
464
527
                /* memory shortage */
465
528
                ctrl->map = NULL;
466
529
                ctrl->length = 0;
 
530
                mutex_unlock(&swap_cgroup_mutex);
467
531
                vfree(array);
468
 
                mutex_unlock(&swap_cgroup_mutex);
469
532
                goto nomem;
470
533
        }
471
534
        mutex_unlock(&swap_cgroup_mutex);
480
543
 
481
544
void swap_cgroup_swapoff(int type)
482
545
{
483
 
        int i;
 
546
        struct page **map;
 
547
        unsigned long i, length;
484
548
        struct swap_cgroup_ctrl *ctrl;
485
549
 
486
550
        if (!do_swap_account)
488
552
 
489
553
        mutex_lock(&swap_cgroup_mutex);
490
554
        ctrl = &swap_cgroup_ctrl[type];
491
 
        if (ctrl->map) {
492
 
                for (i = 0; i < ctrl->length; i++) {
493
 
                        struct page *page = ctrl->map[i];
 
555
        map = ctrl->map;
 
556
        length = ctrl->length;
 
557
        ctrl->map = NULL;
 
558
        ctrl->length = 0;
 
559
        mutex_unlock(&swap_cgroup_mutex);
 
560
 
 
561
        if (map) {
 
562
                for (i = 0; i < length; i++) {
 
563
                        struct page *page = map[i];
494
564
                        if (page)
495
565
                                __free_page(page);
496
566
                }
497
 
                vfree(ctrl->map);
498
 
                ctrl->map = NULL;
499
 
                ctrl->length = 0;
 
567
                vfree(map);
500
568
        }
501
 
        mutex_unlock(&swap_cgroup_mutex);
502
569
}
503
570
 
504
571
#endif