624
636
preempt_enable();
640
mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx)
642
struct mem_cgroup_per_zone *mz;
646
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
647
mz = mem_cgroup_zoneinfo(mem, nid, zid);
648
total += MEM_CGROUP_ZSTAT(mz, idx);
627
652
static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
628
653
enum lru_list idx)
631
struct mem_cgroup_per_zone *mz;
634
658
for_each_online_node(nid)
635
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
636
mz = mem_cgroup_zoneinfo(mem, nid, zid);
637
total += MEM_CGROUP_ZSTAT(mz, idx);
659
total += mem_cgroup_get_zonestat_node(mem, nid, idx);
680
704
mem_cgroup_threshold(mem);
681
705
__mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
682
706
if (unlikely(__memcg_event_check(mem,
683
MEM_CGROUP_TARGET_SOFTLIMIT))){
707
MEM_CGROUP_TARGET_SOFTLIMIT))) {
684
708
mem_cgroup_update_tree(mem, page);
685
709
__mem_cgroup_target_update(mem,
686
MEM_CGROUP_TARGET_SOFTLIMIT);
710
MEM_CGROUP_TARGET_SOFTLIMIT);
713
if (unlikely(__memcg_event_check(mem,
714
MEM_CGROUP_TARGET_NUMAINFO))) {
715
atomic_inc(&mem->numainfo_events);
716
__mem_cgroup_target_update(mem,
717
MEM_CGROUP_TARGET_NUMAINFO);
1075
1134
return MEM_CGROUP_ZSTAT(mz, lru);
1137
static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
1142
ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
1143
mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
1148
static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
1153
ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
1154
mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
1158
#if MAX_NUMNODES > 1
1159
static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
1164
for_each_node_state(nid, N_HIGH_MEMORY)
1165
total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
1170
static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
1175
for_each_node_state(nid, N_HIGH_MEMORY)
1176
total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
1181
static unsigned long
1182
mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)
1184
return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
1187
static unsigned long
1188
mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
1193
for_each_node_state(nid, N_HIGH_MEMORY)
1194
total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);
1199
static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
1206
total += mem_cgroup_get_zonestat_node(memcg, nid, l);
1211
static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
1216
for_each_node_state(nid, N_HIGH_MEMORY)
1217
total += mem_cgroup_node_nr_lru_pages(memcg, nid);
1221
#endif /* CONFIG_NUMA */
1078
1223
struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
1079
1224
struct zone *zone)
1567
* test_mem_cgroup_node_reclaimable
1568
* @mem: the target memcg
1569
* @nid: the node ID to be checked.
1570
* @noswap : specify true here if the user wants flle only information.
1572
* This function returns whether the specified memcg contains any
1573
* reclaimable pages on a node. Returns true if there are any reclaimable
1574
* pages in the node.
1576
static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
1577
int nid, bool noswap)
1579
if (mem_cgroup_node_nr_file_lru_pages(mem, nid))
1581
if (noswap || !total_swap_pages)
1583
if (mem_cgroup_node_nr_anon_lru_pages(mem, nid))
1588
#if MAX_NUMNODES > 1
1591
* Always updating the nodemask is not very good - even if we have an empty
1592
* list or the wrong list here, we can start from some node and traverse all
1593
* nodes based on the zonelist. So update the list loosely once per 10 secs.
1596
static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1600
* numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
1601
* pagein/pageout changes since the last update.
1603
if (!atomic_read(&mem->numainfo_events))
1605
if (atomic_inc_return(&mem->numainfo_updating) > 1)
1608
/* make a nodemask where this memcg uses memory from */
1609
mem->scan_nodes = node_states[N_HIGH_MEMORY];
1611
for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1613
if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
1614
node_clear(nid, mem->scan_nodes);
1617
atomic_set(&mem->numainfo_events, 0);
1618
atomic_set(&mem->numainfo_updating, 0);
1622
* Selecting a node where we start reclaim from. Because what we need is just
1623
* reducing usage counter, start from anywhere is O,K. Considering
1624
* memory reclaim from current node, there are pros. and cons.
1626
* Freeing memory from current node means freeing memory from a node which
1627
* we'll use or we've used. So, it may make LRU bad. And if several threads
1628
* hit limits, it will see a contention on a node. But freeing from remote
1629
* node means more costs for memory reclaim because of memory latency.
1631
* Now, we use round-robin. Better algorithm is welcomed.
1633
int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1637
mem_cgroup_may_update_nodemask(mem);
1638
node = mem->last_scanned_node;
1640
node = next_node(node, mem->scan_nodes);
1641
if (node == MAX_NUMNODES)
1642
node = first_node(mem->scan_nodes);
1644
* We call this when we hit limit, not when pages are added to LRU.
1645
* No LRU may hold pages because all pages are UNEVICTABLE or
1646
* memcg is too small and all pages are not on LRU. In that case,
1647
* we use curret node.
1649
if (unlikely(node == MAX_NUMNODES))
1650
node = numa_node_id();
1652
mem->last_scanned_node = node;
1657
* Check all nodes whether it contains reclaimable pages or not.
1658
* For quick scan, we make use of scan_nodes. This will allow us to skip
1659
* unused nodes. But scan_nodes is lazily updated and may not cotain
1660
* enough new information. We need to do double check.
1662
bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1667
* quick check...making use of scan_node.
1668
* We can skip unused nodes.
1670
if (!nodes_empty(mem->scan_nodes)) {
1671
for (nid = first_node(mem->scan_nodes);
1673
nid = next_node(nid, mem->scan_nodes)) {
1675
if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1680
* Check rest of nodes.
1682
for_each_node_state(nid, N_HIGH_MEMORY) {
1683
if (node_isset(nid, mem->scan_nodes))
1685
if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1692
int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1697
bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1699
return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
1422
1704
* Scan the hierarchy if needed to reclaim memory. We remember the last child
1423
1705
* we reclaimed from, so that we don't end up penalizing one child extensively
1442
1725
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1443
1726
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1444
1727
unsigned long excess;
1728
unsigned long nr_scanned;
1446
1730
excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1448
1732
/* If memsw_is_minimum==1, swap-out is of-no-use. */
1449
if (root_mem->memsw_is_minimum)
1733
if (!check_soft && !shrink && root_mem->memsw_is_minimum)
1453
1737
victim = mem_cgroup_select_victim(root_mem);
1454
1738
if (victim == root_mem) {
1457
drain_all_stock_async();
1741
* We are not draining per cpu cached charges during
1742
* soft limit reclaim because global reclaim doesn't
1743
* care about charges. It tries to free some memory and
1744
* charges will not give any.
1746
if (!check_soft && loop >= 1)
1747
drain_all_stock_async(root_mem);
1458
1748
if (loop >= 2) {
1460
1750
* If we have not been able to reclaim
1481
if (!mem_cgroup_local_usage(victim)) {
1771
if (!mem_cgroup_reclaimable(victim, noswap)) {
1482
1772
/* this cgroup's local usage == 0 */
1483
1773
css_put(&victim->css);
1486
1776
/* we use swappiness of local cgroup */
1488
1778
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1489
noswap, get_swappiness(victim), zone);
1779
noswap, get_swappiness(victim), zone,
1781
*total_scanned += nr_scanned;
1491
1783
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1492
1784
noswap, get_swappiness(victim));
1493
1785
css_put(&victim->css);
1789
2084
* expects some charges will be back to res_counter later but cannot wait for
1792
static void drain_all_stock_async(void)
2087
static void drain_all_stock_async(struct mem_cgroup *root_mem)
1795
/* This function is for scheduling "drain" in asynchronous way.
1796
* The result of "drain" is not directly handled by callers. Then,
1797
* if someone is calling drain, we don't have to call drain more.
1798
* Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if
1799
* there is a race. We just do loose check here.
2091
* If someone calls draining, avoid adding more kworker runs.
1801
if (atomic_read(&memcg_drain_count))
2093
if (!mutex_trylock(&percpu_charge_mutex))
1803
2095
/* Notify other cpus that system-wide "drain" is running */
1804
atomic_inc(&memcg_drain_count);
1805
2096
get_online_cpus();
2098
* Get a hint for avoiding draining charges on the current cpu,
2099
* which must be exhausted by our charging. It is not required that
2100
* this be a precise check, so we use raw_smp_processor_id() instead of
2101
* getcpu()/putcpu().
2103
curcpu = raw_smp_processor_id();
1806
2104
for_each_online_cpu(cpu) {
1807
2105
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
1808
schedule_work_on(cpu, &stock->work);
2106
struct mem_cgroup *mem;
2111
mem = stock->cached;
2114
if (mem != root_mem) {
2115
if (!root_mem->use_hierarchy)
2117
/* check whether "mem" is under tree of "root_mem" */
2118
if (!css_is_ancestor(&mem->css, &root_mem->css))
2121
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
2122
schedule_work_on(cpu, &stock->work);
1810
2124
put_online_cpus();
1811
atomic_dec(&memcg_drain_count);
2125
mutex_unlock(&percpu_charge_mutex);
1812
2126
/* We don't wait for flush_work */
3794
4116
{"pgpgin", "total_pgpgin"},
3795
4117
{"pgpgout", "total_pgpgout"},
3796
4118
{"swap", "total_swap"},
4119
{"pgfault", "total_pgfault"},
4120
{"pgmajfault", "total_pgmajfault"},
3797
4121
{"inactive_anon", "total_inactive_anon"},
3798
4122
{"active_anon", "total_active_anon"},
3799
4123
{"inactive_file", "total_inactive_file"},
3845
4173
mem_cgroup_get_local_stat(iter, s);
4177
static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
4180
unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
4181
unsigned long node_nr;
4182
struct cgroup *cont = m->private;
4183
struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
4185
total_nr = mem_cgroup_nr_lru_pages(mem_cont);
4186
seq_printf(m, "total=%lu", total_nr);
4187
for_each_node_state(nid, N_HIGH_MEMORY) {
4188
node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
4189
seq_printf(m, " N%d=%lu", nid, node_nr);
4193
file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
4194
seq_printf(m, "file=%lu", file_nr);
4195
for_each_node_state(nid, N_HIGH_MEMORY) {
4196
node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
4197
seq_printf(m, " N%d=%lu", nid, node_nr);
4201
anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
4202
seq_printf(m, "anon=%lu", anon_nr);
4203
for_each_node_state(nid, N_HIGH_MEMORY) {
4204
node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
4205
seq_printf(m, " N%d=%lu", nid, node_nr);
4209
unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
4210
seq_printf(m, "unevictable=%lu", unevictable_nr);
4211
for_each_node_state(nid, N_HIGH_MEMORY) {
4212
node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
4214
seq_printf(m, " N%d=%lu", nid, node_nr);
4219
#endif /* CONFIG_NUMA */
3848
4221
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
3849
4222
struct cgroup_map_cb *cb)
5169
5559
static int __init enable_swap_account(char *s)
5171
5561
/* consider enabled if no parameter or 1 is given */
5172
if (!(*s) || !strcmp(s, "=1"))
5562
if (!strcmp(s, "1"))
5173
5563
really_do_swap_account = 1;
5174
else if (!strcmp(s, "=0"))
5564
else if (!strcmp(s, "0"))
5175
5565
really_do_swap_account = 0;
5178
__setup("swapaccount", enable_swap_account);
5568
__setup("swapaccount=", enable_swap_account);
5180
static int __init disable_swap_account(char *s)
5182
printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");
5183
enable_swap_account("=0");
5186
__setup("noswapaccount", disable_swap_account);