30
30
#include <linux/pagevec.h>
31
31
#include <linux/blkdev.h>
32
32
#include <linux/slab.h>
33
#include <linux/ratelimit.h>
33
34
#include <linux/oom.h>
34
35
#include <linux/notifier.h>
35
36
#include <linux/topology.h>
39
40
#include <linux/memory_hotplug.h>
40
41
#include <linux/nodemask.h>
41
42
#include <linux/vmalloc.h>
43
#include <linux/vmstat.h>
42
44
#include <linux/mempolicy.h>
43
45
#include <linux/stop_machine.h>
44
46
#include <linux/sort.h>
53
55
#include <linux/compaction.h>
54
56
#include <trace/events/kmem.h>
55
57
#include <linux/ftrace_event.h>
56
#include <trace/page_alloc.h>
58
#include <linux/memcontrol.h>
59
#include <linux/prefetch.h>
58
61
#include <asm/tlbflush.h>
59
62
#include <asm/div64.h>
569
569
if (unlikely(page_mapcount(page) |
570
570
(page->mapping != NULL) |
571
571
(atomic_read(&page->_count) != 0) |
572
(page->flags & PAGE_FLAGS_CHECK_AT_FREE))) {
572
(page->flags & PAGE_FLAGS_CHECK_AT_FREE) |
573
(mem_cgroup_bad_page_check(page)))) {
618
619
list = &pcp->lists[migratetype];
619
620
} while (list_empty(list));
622
/* This is the only non-empty list. Free them all. */
623
if (batch_free == MIGRATE_PCPTYPES)
624
batch_free = to_free;
622
627
page = list_entry(list->prev, struct page, lru);
623
628
/* must delete as __free_one_page list manipulates */
651
656
trace_mm_page_free_direct(page, order);
652
657
kmemcheck_free_shadow(page, order);
654
trace_page_free(page, order);
656
659
if (PageAnon(page))
657
660
page->mapping = NULL;
658
661
for (i = 0; i < (1 << order); i++)
756
759
if (unlikely(page_mapcount(page) |
757
760
(page->mapping != NULL) |
758
761
(atomic_read(&page->_count) != 0) |
759
(page->flags & PAGE_FLAGS_CHECK_AT_PREP))) {
762
(page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
763
(mem_cgroup_bad_page_check(page)))) {
942
945
* If breaking a large block of pages, move all free
943
946
* pages to the preferred allocation list. If falling
944
947
* back for a reclaimable kernel allocation, be more
945
* agressive about taking ownership of free pages
948
* aggressive about taking ownership of free pages
947
950
if (unlikely(current_order >= (pageblock_order >> 1)) ||
948
951
start_migratetype == MIGRATE_RECLAIMABLE ||
1341
1344
__count_zone_vm_events(PGALLOC, zone, 1 << order);
1342
zone_statistics(preferred_zone, zone);
1345
zone_statistics(preferred_zone, zone, gfp_flags);
1343
1346
local_irq_restore(flags);
1345
1348
VM_BUG_ON(bad_range(zone, page));
1727
* Large machines with many possible nodes should not always dump per-node
1728
* meminfo in irq context.
1730
static inline bool should_suppress_show_mem(void)
1735
ret = in_interrupt();
1740
static DEFINE_RATELIMIT_STATE(nopage_rs,
1741
DEFAULT_RATELIMIT_INTERVAL,
1742
DEFAULT_RATELIMIT_BURST);
1744
void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
1747
unsigned int filter = SHOW_MEM_FILTER_NODES;
1749
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
1753
* This documents exceptions given to allocations in certain
1754
* contexts that are allowed to allocate outside current's set
1757
if (!(gfp_mask & __GFP_NOMEMALLOC))
1758
if (test_thread_flag(TIF_MEMDIE) ||
1759
(current->flags & (PF_MEMALLOC | PF_EXITING)))
1760
filter &= ~SHOW_MEM_FILTER_NODES;
1761
if (in_interrupt() || !(gfp_mask & __GFP_WAIT))
1762
filter &= ~SHOW_MEM_FILTER_NODES;
1765
printk(KERN_WARNING);
1766
va_start(args, fmt);
1771
pr_warning("%s: page allocation failure: order:%d, mode:0x%x\n",
1772
current->comm, order, gfp_mask);
1775
if (!should_suppress_show_mem())
1723
1779
static inline int
1724
1780
should_alloc_retry(gfp_t gfp_mask, unsigned int order,
1725
1781
unsigned long pages_reclaimed)
2050
2106
first_zones_zonelist(zonelist, high_zoneidx, NULL,
2051
2107
&preferred_zone);
2053
2110
/* This is the last chance, in general, before the goto nopage. */
2054
2111
page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2055
2112
high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
2165
if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
2166
printk(KERN_WARNING "%s: page allocation failure."
2167
" order:%d, mode:0x%x\n",
2168
current->comm, order, gfp_mask);
2221
warn_alloc_failed(gfp_mask, order, NULL);
2174
trace_page_alloc(page, order);
2175
2224
if (kmemcheck_enabled)
2176
2225
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
2291
2340
EXPORT_SYMBOL(free_pages);
2342
static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
2345
unsigned long alloc_end = addr + (PAGE_SIZE << order);
2346
unsigned long used = addr + PAGE_ALIGN(size);
2348
split_page(virt_to_page((void *)addr), order);
2349
while (used < alloc_end) {
2354
return (void *)addr;
2294
2358
* alloc_pages_exact - allocate an exact number physically-contiguous pages.
2295
2359
* @size: the number of bytes to allocate
2309
2373
unsigned long addr;
2311
2375
addr = __get_free_pages(gfp_mask, order);
2313
unsigned long alloc_end = addr + (PAGE_SIZE << order);
2314
unsigned long used = addr + PAGE_ALIGN(size);
2316
split_page(virt_to_page((void *)addr), order);
2317
while (used < alloc_end) {
2323
return (void *)addr;
2376
return make_alloc_exact(addr, order, size);
2325
2378
EXPORT_SYMBOL(alloc_pages_exact);
2381
* alloc_pages_exact_nid - allocate an exact number of physically-contiguous
2383
* @nid: the preferred node ID where memory should be allocated
2384
* @size: the number of bytes to allocate
2385
* @gfp_mask: GFP flags for the allocation
2387
* Like alloc_pages_exact(), but try to allocate on node nid first before falling
2389
* Note this is not alloc_pages_exact_node() which allocates on a specific node,
2392
void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2394
unsigned order = get_order(size);
2395
struct page *p = alloc_pages_node(nid, gfp_mask, order);
2398
return make_alloc_exact((unsigned long)page_address(p), order, size);
2400
EXPORT_SYMBOL(alloc_pages_exact_nid);
2328
2403
* free_pages_exact - release memory allocated via alloc_pages_exact()
2329
2404
* @virt: the value returned by alloc_pages_exact.
2330
2405
* @size: size of allocation, same value as passed to alloc_pages_exact().
2497
* Determine whether the node should be displayed or not, depending on whether
2498
* SHOW_MEM_FILTER_NODES was passed to show_free_areas().
2500
bool skip_free_areas_node(unsigned int flags, int nid)
2504
if (!(flags & SHOW_MEM_FILTER_NODES))
2508
ret = !node_isset(nid, cpuset_current_mems_allowed);
2421
2514
#define K(x) ((x) << (PAGE_SHIFT-10))
2424
2517
* Show free area list (used inside shift_scroll-lock stuff)
2425
2518
* We also calculate the percentage fragmentation. We do this by counting the
2426
2519
* memory on each free list with the exception of the first item on the list.
2520
* Suppresses nodes that are not allowed by current's cpuset if
2521
* SHOW_MEM_FILTER_NODES is passed.
2428
void show_free_areas(void)
2523
void show_free_areas(unsigned int filter)
2431
2526
struct zone *zone;
2433
2528
for_each_populated_zone(zone) {
2529
if (skip_free_areas_node(filter, zone_to_nid(zone)))
2434
2531
show_node(zone);
2435
2532
printk("%s per-cpu:\n", zone->name);
2539
2638
for_each_populated_zone(zone) {
2540
2639
unsigned long nr[MAX_ORDER], flags, order, total = 0;
2641
if (skip_free_areas_node(filter, zone_to_nid(zone)))
2542
2643
show_node(zone);
2543
2644
printk("%s: ", zone->name);
3228
3329
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
3332
* Check if a pageblock contains reserved pages
3334
static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
3338
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
3339
if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
3231
3346
* Mark a number of pageblocks as MIGRATE_RESERVE. The number
3232
3347
* of blocks reserved is based on min_wmark_pages(zone). The memory within
3233
3348
* the reserve will tend to store contiguous free pages. Setting min_free_kbytes
3237
3352
static void setup_zone_migrate_reserve(struct zone *zone)
3239
unsigned long start_pfn, pfn, end_pfn;
3354
unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
3240
3355
struct page *page;
3241
3356
unsigned long block_migratetype;
3268
3383
/* Blocks with reserved pages will never free, skip them. */
3269
if (PageReserved(page))
3384
block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
3385
if (pageblock_is_reserved(pfn, block_end_pfn))
3272
3388
block_migratetype = get_pageblock_migratetype(page);
3506
3622
if (!slab_is_available()) {
3507
3623
zone->wait_table = (wait_queue_head_t *)
3508
alloc_bootmem_node(pgdat, alloc_size);
3624
alloc_bootmem_node_nopanic(pgdat, alloc_size);
3511
3627
* This case means that a zone whose size was 0 gets new memory
3708
3824
#ifdef CONFIG_HAVE_MEMBLOCK
3826
* Basic iterator support. Return the last range of PFNs for a node
3827
* Note: nid == MAX_NUMNODES returns last region regardless of node
3829
static int __meminit last_active_region_index_in_nid(int nid)
3833
for (i = nr_nodemap_entries - 1; i >= 0; i--)
3834
if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
3841
* Basic iterator support. Return the previous active range of PFNs for a node
3842
* Note: nid == MAX_NUMNODES returns next region regardless of node
3844
static int __meminit previous_active_region_index_in_nid(int index, int nid)
3846
for (index = index - 1; index >= 0; index--)
3847
if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
3853
#define for_each_active_range_index_in_nid_reverse(i, nid) \
3854
for (i = last_active_region_index_in_nid(nid); i != -1; \
3855
i = previous_active_region_index_in_nid(i, nid))
3709
3857
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
3710
3858
u64 goal, u64 limit)
3714
3862
/* Need to go over early_node_map to find out good range for node */
3715
for_each_active_range_index_in_nid(i, nid) {
3863
for_each_active_range_index_in_nid_reverse(i, nid) {
3717
3865
u64 ei_start, ei_last;
3718
3866
u64 final_start, final_end;
3755
3903
return nr_range;
3758
#ifdef CONFIG_NO_BOOTMEM
3759
void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3760
u64 goal, u64 limit)
3765
if (limit > memblock.current_limit)
3766
limit = memblock.current_limit;
3768
addr = find_memory_core_early(nid, size, align, goal, limit);
3770
if (addr == MEMBLOCK_ERROR)
3773
ptr = phys_to_virt(addr);
3774
memset(ptr, 0, size);
3775
memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
3777
* The min_count is set to 0 so that bootmem allocated blocks
3778
* are never reported as leaks.
3780
kmemleak_alloc(ptr, size, 0, 0);
3786
3906
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3865
3985
* The zone ranges provided by the architecture do not include ZONE_MOVABLE
3866
* because it is sized independant of architecture. Unlike the other zones,
3986
* because it is sized independent of architecture. Unlike the other zones,
3867
3987
* the starting point for ZONE_MOVABLE is not fixed. It may be different
3868
3988
* in each node depending on the size of each node and how evenly kernelcore
3869
3989
* is distributed. This helper function adjusts the zone ranges
4078
4198
unsigned long usemapsize = usemap_size(zonesize);
4079
4199
zone->pageblock_flags = NULL;
4080
4200
if (usemapsize)
4081
zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
4201
zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
4084
4205
static inline void setup_usemap(struct pglist_data *pgdat,
4198
4319
zone->zone_pgdat = pgdat;
4200
4321
zone_pcp_init(zone);
4202
4323
INIT_LIST_HEAD(&zone->lru[l].list);
4203
zone->reclaim_stat.nr_saved_scan[l] = 0;
4205
4324
zone->reclaim_stat.recent_rotated[0] = 0;
4206
4325
zone->reclaim_stat.recent_rotated[1] = 0;
4207
4326
zone->reclaim_stat.recent_scanned[0] = 0;
4244
4363
size = (end - start) * sizeof(struct page);
4245
4364
map = alloc_remap(pgdat->node_id, size);
4247
map = alloc_bootmem_node(pgdat, size);
4366
map = alloc_bootmem_node_nopanic(pgdat, size);
4248
4367
pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
4250
4369
#ifndef CONFIG_NEED_MULTIPLE_NODES
4816
4935
dma_reserve = new_dma_reserve;
4819
#ifndef CONFIG_NEED_MULTIPLE_NODES
4820
struct pglist_data __refdata contig_page_data = {
4821
#ifndef CONFIG_NO_BOOTMEM
4822
.bdata = &bootmem_node_data[0]
4825
EXPORT_SYMBOL(contig_page_data);
4828
4938
void __init free_area_init(unsigned long *zones_size)
4830
4940
free_area_init_node(0, zones_size,
5633
5742
page, atomic_read(&page->_count), page_mapcount(page),
5634
5743
page->mapping, page->index);
5635
5744
dump_page_flags(page->flags);
5745
mem_cgroup_print_bad_page(page);