~ubuntu-branches/ubuntu/quantal/linux-linaro-mx51/quantal : revision 10

35

#include <linux/limits.h>

36

#include <linux/mutex.h>

37

#include <linux/rbtree.h>

38

#include <linux/shmem_fs.h>

38

39

#include <linux/slab.h>

39

40

#include <linux/swap.h>

40

41

#include <linux/swapops.h>

94

95

MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */

95

96

MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */

96

97

MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */

98

MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */

99

MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */

97

100

MEM_CGROUP_EVENTS_NSTATS,

98

101

};

99

102

/*

105

108

enum mem_cgroup_events_target {

106

109

MEM_CGROUP_TARGET_THRESH,

107

110

MEM_CGROUP_TARGET_SOFTLIMIT,

111

MEM_CGROUP_TARGET_NUMAINFO,

108

112

MEM_CGROUP_NTARGETS,

109

113

};

110

114

#define THRESHOLDS_EVENTS_TARGET (128)

111

115

#define SOFTLIMIT_EVENTS_TARGET (1024)

116

#define NUMAINFO_EVENTS_TARGET (1024)

112

117

113

118

struct mem_cgroup_stat_cpu {

114

119

long count[MEM_CGROUP_STAT_NSTATS];

231

236

* reclaimed from.

232

237

*/

233

238

int last_scanned_child;

239

int last_scanned_node;

240

#if MAX_NUMNODES > 1

241

nodemask_t scan_nodes;

242

atomic_t numainfo_events;

243

atomic_t numainfo_updating;

244

#endif

234

245

/*

235

246

* Should the accounting and control be hierarchical, per subtree?

236

247

*/

352

363

static void mem_cgroup_get(struct mem_cgroup *mem);

353

364

static void mem_cgroup_put(struct mem_cgroup *mem);

354

365

static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);

355

static void drain_all_stock_async(void);

366

static void drain_all_stock_async(struct mem_cgroup *mem);

356

367

357

368

static struct mem_cgroup_per_zone *

358

369

mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)

569

580

return val;

570

581

}

571

582

572

static long mem_cgroup_local_usage(struct mem_cgroup *mem)

573

{

574

long ret;

575

576

ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);

577

ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);

578

return ret;

579

}

580

581

583

static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,

582

584

bool charge)

583

585

{

585

587

this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);

586

588

}

587

589

590

void mem_cgroup_pgfault(struct mem_cgroup *mem, int val)

591

{

592

this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);

593

}

594

595

void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val)

596

{

597

this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);

598

}

599

588

600

static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem,

589

601

enum mem_cgroup_events_index idx)

590

602

{

624

636

preempt_enable();

625

637

}

626

638

639

static unsigned long

640

mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx)

641

{

642

struct mem_cgroup_per_zone *mz;

643

u64 total = 0;

644

int zid;

645

646

for (zid = 0; zid < MAX_NR_ZONES; zid++) {

647

mz = mem_cgroup_zoneinfo(mem, nid, zid);

648

total += MEM_CGROUP_ZSTAT(mz, idx);

649

}

650

return total;

651

}

627

652

static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,

628

653

enum lru_list idx)

629

654

{

630

int nid, zid;

631

struct mem_cgroup_per_zone *mz;

655

int nid;

632

656

u64 total = 0;

633

657

634

658

for_each_online_node(nid)

635

for (zid = 0; zid < MAX_NR_ZONES; zid++) {

636

mz = mem_cgroup_zoneinfo(mem, nid, zid);

637

total += MEM_CGROUP_ZSTAT(mz, idx);

638

}

659

total += mem_cgroup_get_zonestat_node(mem, nid, idx);

639

660

return total;

640

661

}

641

662

683

case MEM_CGROUP_TARGET_SOFTLIMIT:

663

684

next = val + SOFTLIMIT_EVENTS_TARGET;

664

685

break;

686

case MEM_CGROUP_TARGET_NUMAINFO:

687

next = val + NUMAINFO_EVENTS_TARGET;

688

break;

665

689

default:

666

690

return;

667

691

}

680

704

mem_cgroup_threshold(mem);

681

705

__mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);

682

706

if (unlikely(__memcg_event_check(mem,

683

MEM_CGROUP_TARGET_SOFTLIMIT))){

707

MEM_CGROUP_TARGET_SOFTLIMIT))) {

684

708

mem_cgroup_update_tree(mem, page);

685

709

__mem_cgroup_target_update(mem,

686

MEM_CGROUP_TARGET_SOFTLIMIT);

687

}

710

MEM_CGROUP_TARGET_SOFTLIMIT);

711

}

712

#if MAX_NUMNODES > 1

713

if (unlikely(__memcg_event_check(mem,

714

MEM_CGROUP_TARGET_NUMAINFO))) {

715

atomic_inc(&mem->numainfo_events);

716

__mem_cgroup_target_update(mem,

717

MEM_CGROUP_TARGET_NUMAINFO);

718

}

719

#endif

688

720

}

689

721

}

690

722

709

741

struct mem_cgroup, css);

710

742

}

711

743

712

static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)

744

struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)

713

745

{

714

746

struct mem_cgroup *mem = NULL;

715

747

813

845

return (mem == root_mem_cgroup);

814

846

}

815

847

848

void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)

849

{

850

struct mem_cgroup *mem;

851

852

if (!mm)

853

return;

854

855

rcu_read_lock();

856

mem = mem_cgroup_from_task(rcu_dereference(mm->owner));

857

if (unlikely(!mem))

858

goto out;

859

860

switch (idx) {

861

case PGMAJFAULT:

862

mem_cgroup_pgmajfault(mem, 1);

863

break;

864

case PGFAULT:

865

mem_cgroup_pgfault(mem, 1);

866

break;

867

default:

868

BUG();

869

}

870

out:

871

rcu_read_unlock();

872

}

873

EXPORT_SYMBOL(mem_cgroup_count_vm_event);

874

816

875

/*

817

876

* Following LRU functions are allowed to be used without PCG_LOCK.

818

877

* Operations are called by routine of global LRU independently from memcg.

1064

1123

return (active > inactive);

1065

1124

}

1066

1125

1067

unsigned long mem_cgroup_zone_nr_pages(struct mem_cgroup *memcg,

1068

struct zone *zone,

1069

enum lru_list lru)

1126

unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,

1127

struct zone *zone,

1128

enum lru_list lru)

1070

1129

{

1071

1130

int nid = zone_to_nid(zone);

1072

1131

int zid = zone_idx(zone);

1075

1134

return MEM_CGROUP_ZSTAT(mz, lru);

1076

1135

}

1077

1136

1137

static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,

1138

int nid)

1139

{

1140

unsigned long ret;

1141

1142

ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +

1143

mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);

1144

1145

return ret;

1146

}

1147

1148

static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,

1149

int nid)

1150

{

1151

unsigned long ret;

1152

1153

ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +

1154

mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);

1155

return ret;

1156

}

1157

1158

#if MAX_NUMNODES > 1

1159

static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)

1160

{

1161

u64 total = 0;

1162

int nid;

1163

1164

for_each_node_state(nid, N_HIGH_MEMORY)

1165

total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);

1166

1167

return total;

1168

}

1169

1170

static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)

1171

{

1172

u64 total = 0;

1173

int nid;

1174

1175

for_each_node_state(nid, N_HIGH_MEMORY)

1176

total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);

1177

1178

return total;

1179

}

1180

1181

static unsigned long

1182

mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)

1183

{

1184

return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);

1185

}

1186

1187

static unsigned long

1188

mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)

1189

{

1190

u64 total = 0;

1191

int nid;

1192

1193

for_each_node_state(nid, N_HIGH_MEMORY)

1194

total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);

1195

1196

return total;

1197

}

1198

1199

static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,

1200

int nid)

1201

{

1202

enum lru_list l;

1203

u64 total = 0;

1204

1205

for_each_lru(l)

1206

total += mem_cgroup_get_zonestat_node(memcg, nid, l);

1207

1208

return total;

1209

}

1210

1211

static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)

1212

{

1213

u64 total = 0;

1214

int nid;

1215

1216

for_each_node_state(nid, N_HIGH_MEMORY)

1217

total += mem_cgroup_node_nr_lru_pages(memcg, nid);

1218

1219

return total;

1220

}

1221

#endif /* CONFIG_NUMA */

1222

1078

1223

struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,

1079

1224

struct zone *zone)

1080

1225

{

1418

1563

return ret;

1419

1564

}

1420

1565

1566

/**

1567

* test_mem_cgroup_node_reclaimable

1568

* @mem: the target memcg

1569

* @nid: the node ID to be checked.

1570

* @noswap : specify true here if the user wants flle only information.

1571

*

1572

* This function returns whether the specified memcg contains any

1573

* reclaimable pages on a node. Returns true if there are any reclaimable

1574

* pages in the node.

1575

*/

1576

static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,

1577

int nid, bool noswap)

1578

{

1579

if (mem_cgroup_node_nr_file_lru_pages(mem, nid))

1580

return true;

1581

if (noswap || !total_swap_pages)

1582

return false;

1583

if (mem_cgroup_node_nr_anon_lru_pages(mem, nid))

1584

return true;

1585

return false;

1586

1587

}

1588

#if MAX_NUMNODES > 1

1589

1590

/*

1591

* Always updating the nodemask is not very good - even if we have an empty

1592

* list or the wrong list here, we can start from some node and traverse all

1593

* nodes based on the zonelist. So update the list loosely once per 10 secs.

1594

*

1595

*/

1596

static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)

1597

{

1598

int nid;

1599

/*

1600

* numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET

1601

* pagein/pageout changes since the last update.

1602

*/

1603

if (!atomic_read(&mem->numainfo_events))

1604

return;

1605

if (atomic_inc_return(&mem->numainfo_updating) > 1)

1606

return;

1607

1608

/* make a nodemask where this memcg uses memory from */

1609

mem->scan_nodes = node_states[N_HIGH_MEMORY];

1610

1611

for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {

1612

1613

if (!test_mem_cgroup_node_reclaimable(mem, nid, false))

1614

node_clear(nid, mem->scan_nodes);

1615

}

1616

1617

atomic_set(&mem->numainfo_events, 0);

1618

atomic_set(&mem->numainfo_updating, 0);

1619

}

1620

1621

/*

1622

* Selecting a node where we start reclaim from. Because what we need is just

1623

* reducing usage counter, start from anywhere is O,K. Considering

1624

* memory reclaim from current node, there are pros. and cons.

1625

*

1626

* Freeing memory from current node means freeing memory from a node which

1627

* we'll use or we've used. So, it may make LRU bad. And if several threads

1628

* hit limits, it will see a contention on a node. But freeing from remote

1629

* node means more costs for memory reclaim because of memory latency.

1630

*

1631

* Now, we use round-robin. Better algorithm is welcomed.

1632

*/

1633

int mem_cgroup_select_victim_node(struct mem_cgroup *mem)

1634

{

1635

int node;

1636

1637

mem_cgroup_may_update_nodemask(mem);

1638

node = mem->last_scanned_node;

1639

1640

node = next_node(node, mem->scan_nodes);

1641

if (node == MAX_NUMNODES)

1642

node = first_node(mem->scan_nodes);

1643

/*

1644

* We call this when we hit limit, not when pages are added to LRU.

1645

* No LRU may hold pages because all pages are UNEVICTABLE or

1646

* memcg is too small and all pages are not on LRU. In that case,

1647

* we use curret node.

1648

*/

1649

if (unlikely(node == MAX_NUMNODES))

1650

node = numa_node_id();

1651

1652

mem->last_scanned_node = node;

1653

return node;

1654

}

1655

1656

/*

1657

* Check all nodes whether it contains reclaimable pages or not.

1658

* For quick scan, we make use of scan_nodes. This will allow us to skip

1659

* unused nodes. But scan_nodes is lazily updated and may not cotain

1660

* enough new information. We need to do double check.

1661

*/

1662

bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)

1663

{

1664

int nid;

1665

1666

/*

1667

* quick check...making use of scan_node.

1668

* We can skip unused nodes.

1669

*/

1670

if (!nodes_empty(mem->scan_nodes)) {

1671

for (nid = first_node(mem->scan_nodes);

1672

nid < MAX_NUMNODES;

1673

nid = next_node(nid, mem->scan_nodes)) {

1674

1675

if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))

1676

return true;

1677

}

1678

}

1679

/*

1680

* Check rest of nodes.

1681

*/

1682

for_each_node_state(nid, N_HIGH_MEMORY) {

1683

if (node_isset(nid, mem->scan_nodes))

1684

continue;

1685

if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))

1686

return true;

1687

}

1688

return false;

1689

}

1690

1691

#else

1692

int mem_cgroup_select_victim_node(struct mem_cgroup *mem)

1693

{

1694

return 0;

1695

}

1696

1697

bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)

1698

{

1699

return test_mem_cgroup_node_reclaimable(mem, 0, noswap);

1700

}

1701

#endif

1702

1421

1703

/*

1422

1704

* Scan the hierarchy if needed to reclaim memory. We remember the last child

1423

1705

* we reclaimed from, so that we don't end up penalizing one child extensively

1433

1715

static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,

1434

1716

struct zone *zone,

1435

1717

gfp_t gfp_mask,

1436

unsigned long reclaim_options)

1718

unsigned long reclaim_options,

1719

unsigned long *total_scanned)

1437

1720

{

1438

1721

struct mem_cgroup *victim;

1439

1722

int ret, total = 0;

1442

1725

bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;

1443

1726

bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;

1444

1727

unsigned long excess;

1728

unsigned long nr_scanned;

1445

1729

1446

1730

excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;

1447

1731

1448

1732

/* If memsw_is_minimum==1, swap-out is of-no-use. */

1449

if (root_mem->memsw_is_minimum)

1733

if (!check_soft && !shrink && root_mem->memsw_is_minimum)

1450

1734

noswap = true;

1451

1735

1452

1736

while (1) {

1453

1737

victim = mem_cgroup_select_victim(root_mem);

1454

1738

if (victim == root_mem) {

1455

1739

loop++;

1456

if (loop >= 1)

1457

drain_all_stock_async();

1740

/*

1741

* We are not draining per cpu cached charges during

1742

* soft limit reclaim because global reclaim doesn't

1743

* care about charges. It tries to free some memory and

1744

* charges will not give any.

1745

*/

1746

if (!check_soft && loop >= 1)

1747

drain_all_stock_async(root_mem);

1458

1748

if (loop >= 2) {

1459

1749

/*

1460

1750

* If we have not been able to reclaim

1478

1768

}

1479

1769

}

1480

1770

}

1481

if (!mem_cgroup_local_usage(victim)) {

1771

if (!mem_cgroup_reclaimable(victim, noswap)) {

1482

1772

/* this cgroup's local usage == 0 */

1483

1773

css_put(&victim->css);

1484

1774

continue;

1485

1775

}

1486

1776

/* we use swappiness of local cgroup */

1487

if (check_soft)

1777

if (check_soft) {

1488

1778

ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,

1489

noswap, get_swappiness(victim), zone);

1490

else

1779

noswap, get_swappiness(victim), zone,

1780

&nr_scanned);

1781

*total_scanned += nr_scanned;

1782

} else

1491

1783

ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,

1492

1784

noswap, get_swappiness(victim));

1493

1785

css_put(&victim->css);

1503

1795

if (!res_counter_soft_limit_excess(&root_mem->res))

1504

1796

return total;

1505

1797

} else if (mem_cgroup_margin(root_mem))

1506

return 1 + total;

1798

return total;

1507

1799

}

1508

1800

return total;

1509

1801

}

1715

2007

struct mem_cgroup *cached; /* this never be root cgroup */

1716

2008

unsigned int nr_pages;

1717

2009

struct work_struct work;

2010

unsigned long flags;

2011

#define FLUSHING_CACHED_CHARGE (0)

1718

2012

};

1719

2013

static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);

1720

static atomic_t memcg_drain_count;

2014

static DEFINE_MUTEX(percpu_charge_mutex);

1721

2015

1722

2016

/*

1723

2017

* Try to consume stocked charge on this cpu. If success, one page is consumed

1765

2059

{

1766

2060

struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);

1767

2061

drain_stock(stock);

2062

clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);

1768

2063

}

1769

2064

1770

2065

/*

1789

2084

* expects some charges will be back to res_counter later but cannot wait for

1790

2085

* it.

1791

2086

*/

1792

static void drain_all_stock_async(void)

2087

static void drain_all_stock_async(struct mem_cgroup *root_mem)

1793

2088

{

1794

int cpu;

1795

/* This function is for scheduling "drain" in asynchronous way.

1796

* The result of "drain" is not directly handled by callers. Then,

1797

* if someone is calling drain, we don't have to call drain more.

1798

* Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if

1799

* there is a race. We just do loose check here.

2089

int cpu, curcpu;

2090

/*

2091

* If someone calls draining, avoid adding more kworker runs.

1800

2092

*/

1801

if (atomic_read(&memcg_drain_count))

2093

if (!mutex_trylock(&percpu_charge_mutex))

1802

2094

return;

1803

2095

/* Notify other cpus that system-wide "drain" is running */

1804

atomic_inc(&memcg_drain_count);

1805

2096

get_online_cpus();

2097

/*

2098

* Get a hint for avoiding draining charges on the current cpu,

2099

* which must be exhausted by our charging. It is not required that

2100

* this be a precise check, so we use raw_smp_processor_id() instead of

2101

* getcpu()/putcpu().

2102

*/

2103

curcpu = raw_smp_processor_id();

1806

2104

for_each_online_cpu(cpu) {

1807

2105

struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);

1808

schedule_work_on(cpu, &stock->work);

2106

struct mem_cgroup *mem;

2107

2108

if (cpu == curcpu)

2109

continue;

2110

2111

mem = stock->cached;

2112

if (!mem)

2113

continue;

2114

if (mem != root_mem) {

2115

if (!root_mem->use_hierarchy)

2116

continue;

2117

/* check whether "mem" is under tree of "root_mem" */

2118

if (!css_is_ancestor(&mem->css, &root_mem->css))

2119

continue;

2120

}

2121

if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags))

2122

schedule_work_on(cpu, &stock->work);

1809

2123

}

1810

2124

put_online_cpus();

1811

atomic_dec(&memcg_drain_count);

2125

mutex_unlock(&percpu_charge_mutex);

1812

2126

/* We don't wait for flush_work */

1813

2127

}

1814

2128

1816

2130

static void drain_all_stock_sync(void)

1817

2131

{

1818

2132

/* called when force_empty is called */

1819

atomic_inc(&memcg_drain_count);

2133

mutex_lock(&percpu_charge_mutex);

1820

2134

schedule_on_each_cpu(drain_local_stock);

1821

atomic_dec(&memcg_drain_count);

2135

mutex_unlock(&percpu_charge_mutex);

1822

2136

}

1823

2137

1824

2138

/*

1928

2242

return CHARGE_WOULDBLOCK;

1929

2243

1930

2244

ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,

1931

gfp_mask, flags);

2245

gfp_mask, flags, NULL);

1932

2246

if (mem_cgroup_margin(mem_over_limit) >= nr_pages)

1933

2247

return CHARGE_RETRY;

1934

2248

/*

3211

3525

break;

3212

3526

3213

3527

mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,

3214

MEM_CGROUP_RECLAIM_SHRINK);

3528

MEM_CGROUP_RECLAIM_SHRINK,

3529

NULL);

3215

3530

curusage = res_counter_read_u64(&memcg->res, RES_USAGE);

3216

3531

/* Usage is reduced ? */

3217

3532

if (curusage >= oldusage)

3271

3586

3272

3587

mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,

3273

3588

MEM_CGROUP_RECLAIM_NOSWAP |

3274

MEM_CGROUP_RECLAIM_SHRINK);

3589

MEM_CGROUP_RECLAIM_SHRINK,

3590

NULL);

3275

3591

curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);

3276

3592

/* Usage is reduced ? */

3277

3593

if (curusage >= oldusage)

3285

3601

}

3286

3602

3287

3603

unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,

3288

gfp_t gfp_mask)

3604

gfp_t gfp_mask,

3605

unsigned long *total_scanned)

3289

3606

{

3290

3607

unsigned long nr_reclaimed = 0;

3291

3608

struct mem_cgroup_per_zone *mz, *next_mz = NULL;

3293

3610

int loop = 0;

3294

3611

struct mem_cgroup_tree_per_zone *mctz;

3295

3612

unsigned long long excess;

3613

unsigned long nr_scanned;

3296

3614

3297

3615

if (order > 0)

3298

3616

return 0;

3311

3629

if (!mz)

3312

3630

break;

3313

3631

3632

nr_scanned = 0;

3314

3633

reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,

3315

3634

gfp_mask,

3316

MEM_CGROUP_RECLAIM_SOFT);

3635

MEM_CGROUP_RECLAIM_SOFT,

3636

&nr_scanned);

3317

3637

nr_reclaimed += reclaimed;

3638

*total_scanned += nr_scanned;

3318

3639

spin_lock(&mctz->lock);

3319

3640

3320

3641

/*

3337

3658

*/

3338

3659

next_mz =

3339

3660

__mem_cgroup_largest_soft_limit_node(mctz);

3340

if (next_mz == mz) {

3661

if (next_mz == mz)

3341

3662

css_put(&next_mz->mem->css);

3342

next_mz = NULL;

3343

} else /* next_mz == NULL or other memcg */

3663

else /* next_mz == NULL or other memcg */

3344

3664

break;

3345

3665

} while (1);

3346

3666

}

3772

4092

MCS_PGPGIN,

3773

4093

MCS_PGPGOUT,

3774

4094

MCS_SWAP,

4095

MCS_PGFAULT,

4096

MCS_PGMAJFAULT,

3775

4097

MCS_INACTIVE_ANON,

3776

4098

MCS_ACTIVE_ANON,

3777

4099

MCS_INACTIVE_FILE,

3794

4116

{"pgpgin", "total_pgpgin"},

3795

4117

{"pgpgout", "total_pgpgout"},

3796

4118

{"swap", "total_swap"},

4119

{"pgfault", "total_pgfault"},

4120

{"pgmajfault", "total_pgmajfault"},

3797

4121

{"inactive_anon", "total_inactive_anon"},

3798

4122

{"active_anon", "total_active_anon"},

3799

4123

{"inactive_file", "total_inactive_file"},

3822

4146

val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT);

3823

4147

s->stat[MCS_SWAP] += val * PAGE_SIZE;

3824

4148

}

4149

val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT);

4150

s->stat[MCS_PGFAULT] += val;

4151

val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT);

4152

s->stat[MCS_PGMAJFAULT] += val;

3825

4153

3826

4154

/* per zone stat */

3827

4155

val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);

3845

4173

mem_cgroup_get_local_stat(iter, s);

3846

4174

}

3847

4175

4176

#ifdef CONFIG_NUMA

4177

static int mem_control_numa_stat_show(struct seq_file *m, void *arg)

4178

{

4179

int nid;

4180

unsigned long total_nr, file_nr, anon_nr, unevictable_nr;

4181

unsigned long node_nr;

4182

struct cgroup *cont = m->private;

4183

struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);

4184

4185

total_nr = mem_cgroup_nr_lru_pages(mem_cont);

4186

seq_printf(m, "total=%lu", total_nr);

4187

for_each_node_state(nid, N_HIGH_MEMORY) {

4188

node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);

4189

seq_printf(m, " N%d=%lu", nid, node_nr);

4190

}

4191

seq_putc(m, '\n');

4192

4193

file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);

4194

seq_printf(m, "file=%lu", file_nr);

4195

for_each_node_state(nid, N_HIGH_MEMORY) {

4196

node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);

4197

seq_printf(m, " N%d=%lu", nid, node_nr);

4198

}

4199

seq_putc(m, '\n');

4200

4201

anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);

4202

seq_printf(m, "anon=%lu", anon_nr);

4203

for_each_node_state(nid, N_HIGH_MEMORY) {

4204

node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);

4205

seq_printf(m, " N%d=%lu", nid, node_nr);

4206

}

4207

seq_putc(m, '\n');

4208

4209

unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);

4210

seq_printf(m, "unevictable=%lu", unevictable_nr);

4211

for_each_node_state(nid, N_HIGH_MEMORY) {

4212

node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,

4213

nid);

4214

seq_printf(m, " N%d=%lu", nid, node_nr);

4215

}

4216

seq_putc(m, '\n');

4217

return 0;

4218

}

4219

#endif /* CONFIG_NUMA */

4220

3848

4221

static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,

3849

4222

struct cgroup_map_cb *cb)

3850

4223

{

3855

4228

memset(&mystat, 0, sizeof(mystat));

3856

4229

mem_cgroup_get_local_stat(mem_cont, &mystat);

3857

4230

4231

3858

4232

for (i = 0; i < NR_MCS_STAT; i++) {

3859

4233

if (i == MCS_SWAP && !do_swap_account)

3860

4234

continue;

4278

4652

return 0;

4279

4653

}

4280

4654

4655

#ifdef CONFIG_NUMA

4656

static const struct file_operations mem_control_numa_stat_file_operations = {

4657

.read = seq_read,

4658

.llseek = seq_lseek,

4659

.release = single_release,

4660

};

4661

4662

static int mem_control_numa_stat_open(struct inode *unused, struct file *file)

4663

{

4664

struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;

4665

4666

file->f_op = &mem_control_numa_stat_file_operations;

4667

return single_open(file, mem_control_numa_stat_show, cont);

4668

}

4669

#endif /* CONFIG_NUMA */

4670

4281

4671

static struct cftype mem_cgroup_files[] = {

4282

4672

{

4283

4673

.name = "usage_in_bytes",

4341

4731

.unregister_event = mem_cgroup_oom_unregister_event,

4342

4732

.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),

4343

4733

},

4734

#ifdef CONFIG_NUMA

4735

{

4736

.name = "numa_stat",

4737

.open = mem_control_numa_stat_open,

4738

.mode = S_IRUGO,

4739

},

4740

#endif

4344

4741

};

4345

4742

4346

4743

#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP

4596

4993

res_counter_init(&mem->memsw, NULL);

4597

4994

}

4598

4995

mem->last_scanned_child = 0;

4996

mem->last_scanned_node = MAX_NUMNODES;

4599

4997

INIT_LIST_HEAD(&mem->oom_notify);

4600

4998

4601

4999

if (parent)

4953

5351

4954

5352

static int mem_cgroup_can_attach(struct cgroup_subsys *ss,

4955

5353

struct cgroup *cgroup,

4956

struct task_struct *p,

4957

bool threadgroup)

5354

struct task_struct *p)

4958

5355

{

4959

5356

int ret = 0;

4960

5357

struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);

4993

5390

4994

5391

static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,

4995

5392

struct cgroup *cgroup,

4996

struct task_struct *p,

4997

bool threadgroup)

5393

struct task_struct *p)

4998

5394

{

4999

5395

mem_cgroup_clear_mc();

5000

5396

}

5112

5508

static void mem_cgroup_move_task(struct cgroup_subsys *ss,

5113

5509

struct cgroup *cont,

5114

5510

struct cgroup *old_cont,

5115

struct task_struct *p,

5116

bool threadgroup)

5511

struct task_struct *p)

5117

5512

{

5118

struct mm_struct *mm;

5119

5120

if (!mc.to)

5121

/* no need to move charge */

5122

return;

5123

5124

mm = get_task_mm(p);

5513

struct mm_struct *mm = get_task_mm(p);

5514

5125

5515

if (mm) {

5126

mem_cgroup_move_charge(mm);

5516

if (mc.to)

5517

mem_cgroup_move_charge(mm);

5518

put_swap_token(mm);

5127

5519

mmput(mm);

5128

5520

}

5129

mem_cgroup_clear_mc();

5521

if (mc.to)

5522

mem_cgroup_clear_mc();

5130

5523

}

5131

5524

#else /* !CONFIG_MMU */

5132

5525

static int mem_cgroup_can_attach(struct cgroup_subsys *ss,

5133

5526

struct cgroup *cgroup,

5134

struct task_struct *p,

5135

bool threadgroup)

5527

struct task_struct *p)

5136

5528

{

5137

5529

return 0;

5138

5530

}

5139

5531

static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,

5140

5532

struct cgroup *cgroup,

5141

struct task_struct *p,

5142

bool threadgroup)

5533

struct task_struct *p)

5143

5534

{

5144

5535

}

5145

5536

static void mem_cgroup_move_task(struct cgroup_subsys *ss,

5146

5537

struct cgroup *cont,

5147

5538

struct cgroup *old_cont,

5148

struct task_struct *p,

5149

bool threadgroup)

5539

struct task_struct *p)

5150

5540

{

5151

5541

}

5152

5542

#endif

5169

5559

static int __init enable_swap_account(char *s)

5170

5560

{

5171

5561

/* consider enabled if no parameter or 1 is given */

5172

if (!(*s) || !strcmp(s, "=1"))

5562

if (!strcmp(s, "1"))

5173

5563

really_do_swap_account = 1;

5174

else if (!strcmp(s, "=0"))

5564

else if (!strcmp(s, "0"))

5175

5565

really_do_swap_account = 0;

5176

5566

return 1;

5177

5567

}

5178

__setup("swapaccount", enable_swap_account);

5568

__setup("swapaccount=", enable_swap_account);

5179

5569

5180

static int __init disable_swap_account(char *s)

5181

{

5182

printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");

5183

enable_swap_account("=0");

5184

return 1;

5185

}

5186

__setup("noswapaccount", disable_swap_account);

5187

5570

#endif