~ubuntu-branches/ubuntu/precise/libcgroup/precise-proposed : revision 1

1

/*

2

* Copyright IBM Corporation. 2007

3

*

4

* Author: Dhaval Giani <dhaval@linux.vnet.ibm.com>

5

* Author: Balbir Singh <balbir@linux.vnet.ibm.com>

6

*

7

* This program is free software; you can redistribute it and/or modify it

8

* under the terms of version 2.1 of the GNU Lesser General Public License

9

* as published by the Free Software Foundation.

10

*

11

* This program is distributed in the hope that it would be useful, but

12

* WITHOUT ANY WARRANTY; without even the implied warranty of

13

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

14

*

15

* TODOs:

16

* 1. Convert comments to Docbook style.

17

* 2. Add more APIs for the control groups.

18

* 3. Handle the configuration related APIs.

19

* 4. Error handling.

20

*

21

* Code initiated and designed by Dhaval Giani. All faults are most likely

22

* his mistake.

23

*

24

* Bharata B Rao <bharata@linux.vnet.ibm.com> is willing is take blame

25

* for mistakes in APIs for reading statistics.

26

*/

27

28

#ifndef _GNU_SOURCE

29

#define _GNU_SOURCE

30

#endif

31

32

#include <dirent.h>

33

#include <errno.h>

34

#include <libcgroup.h>

35

#include <libcgroup-internal.h>

36

#include <mntent.h>

37

#include <pthread.h>

38

#include <stdio.h>

39

#include <stdlib.h>

40

#include <string.h>

41

#include <sys/types.h>

42

#include <sys/stat.h>

43

#include <sys/socket.h>

44

#include <fcntl.h>

45

#include <sys/syscall.h>

46

#include <unistd.h>

47

#include <fts.h>

48

#include <ctype.h>

49

#include <pwd.h>

50

#include <libgen.h>

51

#include <assert.h>

52

#include <linux/un.h>

53

54

#ifndef PACKAGE_VERSION

55

#define PACKAGE_VERSION 0.01

56

#endif

57

58

#define VERSION(ver) #ver

59

60

/*

61

* The errno which happend the last time (have to be thread specific)

62

*/

63

__thread int last_errno;

64

65

#define MAXLEN 256

66

67

/* the value have to be thread specific */

68

__thread char errtext[MAXLEN];

69

70

/* Task command name length */

71

#define TASK_COMM_LEN 16

72

73

/*

74

* Remember to bump this up for major API changes.

75

*/

76

const static char cg_version[] = VERSION(PACKAGE_VERSION);

77

78

struct cg_mount_table_s cg_mount_table[CG_CONTROLLER_MAX];

79

static pthread_rwlock_t cg_mount_table_lock = PTHREAD_RWLOCK_INITIALIZER;

80

81

/* Check if cgroup_init has been called or not. */

82

static int cgroup_initialized;

83

84

/* Check if the rules cache has been loaded or not. */

85

static bool cgroup_rules_loaded;

86

87

/* List of configuration rules */

88

static struct cgroup_rule_list rl;

89

90

/* Temporary list of configuration rules (for non-cache apps) */

91

static struct cgroup_rule_list trl;

92

93

/* Lock for the list of rules (rl) */

94

static pthread_rwlock_t rl_lock = PTHREAD_RWLOCK_INITIALIZER;

95

96

char *cgroup_strerror_codes[] = {

97

"Cgroup is not compiled in",

98

"Cgroup is not mounted",

99

"Cgroup does not exist",

100

"Cgroup has not been created",

101

"Cgroup one of the needed subsystems is not mounted",

102

"Cgroup, request came in from non owner",

103

"Cgroup controllers controllers are bound to different mount points",

104

"Cgroup, operation not allowed",

105

"Cgroup value set exceeds maximum",

106

"Cgroup controller already exists",

107

"Cgroup value already exists",

108

"Cgroup invalid operation",

109

"Cgroup, creation of controller failed",

110

"Cgroup operation failed",

111

"Cgroup not initialized",

112

"Cgroup trying to set value for control that does not exist",

113

"Cgroup generic error",

114

"Cgroup values are not equal",

115

"Cgroup controllers are different",

116

"Cgroup parsing failed",

117

"Cgroup, rules file does not exist",

118

"Cgroup mounting failed",

119

"The config file can not be opend",

120

"End of File or iterator",

121

};

122

123

static int cg_chown_file(FTS *fts, FTSENT *ent, uid_t owner, gid_t group)

124

{

125

int ret = 0;

126

const char *filename = fts->fts_path;

127

cgroup_dbg("seeing file %s\n", filename);

128

switch (ent->fts_info) {

129

case FTS_ERR:

130

errno = ent->fts_errno;

131

break;

132

case FTS_D:

133

case FTS_DC:

134

case FTS_NSOK:

135

case FTS_NS:

136

case FTS_DNR:

137

case FTS_DP:

138

ret = chown(filename, owner, group);

139

if (ret)

140

goto fail_chown;

141

ret = chmod(filename, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |

142

S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH);

143

break;

144

case FTS_F:

145

case FTS_DEFAULT:

146

ret = chown(filename, owner, group);

147

if (ret)

148

goto fail_chown;

149

ret = chmod(filename, S_IRUSR | S_IWUSR | S_IRGRP |

150

S_IWGRP | S_IROTH);

151

break;

152

}

153

fail_chown:

154

if (ret < 0) {

155

last_errno = errno;

156

ret = ECGOTHER;

157

}

158

return ret;

159

}

160

161

/*

162

* TODO: Need to decide a better place to put this function.

163

*/

164

static int cg_chown_recursive(char **path, uid_t owner, gid_t group)

165

{

166

int ret = 0;

167

cgroup_dbg("path is %s\n", *path);

168

FTS *fts = fts_open(path, FTS_PHYSICAL | FTS_NOCHDIR |

169

FTS_NOSTAT, NULL);

170

while (1) {

171

FTSENT *ent;

172

ent = fts_read(fts);

173

if (!ent) {

174

cgroup_dbg("fts_read failed\n");

175

break;

176

}

177

ret = cg_chown_file(fts, ent, owner, group);

178

}

179

fts_close(fts);

180

return ret;

181

}

182

183

static int cgroup_test_subsys_mounted(const char *name)

184

{

185

int i;

186

187

pthread_rwlock_rdlock(&cg_mount_table_lock);

188

189

for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {

190

if (strncmp(cg_mount_table[i].name, name,

191

sizeof(cg_mount_table[i].name)) == 0) {

192

pthread_rwlock_unlock(&cg_mount_table_lock);

193

return 1;

194

}

195

}

196

pthread_rwlock_unlock(&cg_mount_table_lock);

197

return 0;

198

}

199

200

/**

201

* Free a single cgroup_rule struct.

202

* @param r The rule to free from memory

203

*/

204

static void cgroup_free_rule(struct cgroup_rule *r)

205

{

206

/* Loop variable */

207

int i = 0;

208

209

/* Make sure our rule is not NULL, first. */

210

if (!r) {

211

cgroup_dbg("Warning: Attempted to free NULL rule.\n");

212

return;

213

}

214

if (r->procname) {

215

free(r->procname);

216

r->procname = NULL;

217

}

218

/* We must free any used controller strings, too. */

219

for(i = 0; i < MAX_MNT_ELEMENTS; i++) {

220

if (r->controllers[i])

221

free(r->controllers[i]);

222

}

223

224

free(r);

225

}

226

227

/**

228

* Free a list of cgroup_rule structs. If rl is the main list of rules,

229

* the lock must be taken for writing before calling this function!

230

* @param rl Pointer to the list of rules to free from memory

231

*/

232

static void cgroup_free_rule_list(struct cgroup_rule_list *rl)

233

{

234

/* Temporary pointer */

235

struct cgroup_rule *tmp = NULL;

236

237

/* Make sure we're not freeing NULL memory! */

238

if (!(rl->head)) {

239

cgroup_dbg("Warning: Attempted to free NULL list.\n");

240

return;

241

}

242

243

while (rl->head) {

244

tmp = rl->head;

245

rl->head = tmp->next;

246

cgroup_free_rule(tmp);

247

}

248

249

/* Don't leave wild pointers around! */

250

rl->head = NULL;

251

rl->tail = NULL;

252

}

253

254

static char *cg_skip_unused_charactors_in_rule(char *rule)

255

{

256

char *itr;

257

258

/* We ignore anything after a # sign as comments. */

259

itr = strchr(rule, '#');

260

if (itr)

261

*itr = '\0';

262

263

/* We also need to remove the newline character. */

264

itr = strchr(rule, '\n');

265

if (itr)

266

*itr = '\0';

267

268

/* Now, skip any leading tabs and spaces. */

269

itr = rule;

270

while (itr && isblank(*itr))

271

itr++;

272

273

/* If there's nothing left, we can ignore this line. */

274

if (!strlen(itr))

275

return NULL;

276

277

return itr;

278

}

279

280

/**

281

* Parse the configuration file that maps UID/GIDs to cgroups. If ever the

282

* configuration file is modified, applications should call this function to

283

* load the new configuration rules. The function caller is responsible for

284

* calling free() on each rule in the list.

285

*

286

* The cache parameter alters the behavior of this function. If true, this

287

* function will read the entire configuration file and store the results in

288

* rl (global rules list). If false, this function will only parse until it

289

* finds a rule matching the given UID or GID. It will store this rule in rl,

290

* as well as any children rules (rules that begin with a %) that it has.

291

*

292

* This function is NOT thread safe!

293

* @param cache True to cache rules, else false

294

* @param muid If cache is false, the UID to match against

295

* @param mgid If cache is false, the GID to match against

296

* @return 0 on success, -1 if no cache and match found, > 0 on error.

297

* TODO: Make this function thread safe!

298

*/

299

static int cgroup_parse_rules(bool cache, uid_t muid,

300

gid_t mgid, char *mprocname)

301

{

302

/* File descriptor for the configuration file */

303

FILE *fp = NULL;

304

305

/* Buffer to store the line we're working on */

306

char buff[CGROUP_RULE_MAXLINE] = { '\0' };

307

308

/* Iterator for the line we're working on */

309

char *itr = NULL;

310

311

/* Pointer to process name in a line of the configuration file */

312

char *procname = NULL;

313

314

/* Pointer to the list that we're using */

315

struct cgroup_rule_list *lst = NULL;

316

317

/* Rule to add to the list */

318

struct cgroup_rule *newrule = NULL;

319

320

/* Structure to get GID from group name */

321

struct group *grp = NULL;

322

323

/* Structure to get UID from user name */

324

struct passwd *pwd = NULL;

325

326

/* Temporary storage for a configuration rule */

327

char key[CGROUP_RULE_MAXKEY] = { '\0' };

328

char user[LOGIN_NAME_MAX] = { '\0' };

329

char controllers[CG_CONTROLLER_MAX] = { '\0' };

330

char destination[FILENAME_MAX] = { '\0' };

331

uid_t uid = CGRULE_INVALID;

332

gid_t gid = CGRULE_INVALID;

333

int len_username;

334

int len_procname;

335

336

/* The current line number */

337

unsigned int linenum = 0;

338

339

/* Did we skip the previous line? */

340

bool skipped = false;

341

342

/* Have we found a matching rule (non-cache mode)? */

343

bool matched = false;

344

345

/* Return codes */

346

int ret = 0;

347

348

/* Temporary buffer for strtok() */

349

char *stok_buff = NULL;

350

351

/* Loop variable. */

352

int i = 0;

353

354

/* Open the configuration file. */

355

pthread_rwlock_wrlock(&rl_lock);

356

fp = fopen(CGRULES_CONF_FILE, "r");

357

if (!fp) {

358

cgroup_dbg("Failed to open configuration file %s with"

359

" error: %s\n", CGRULES_CONF_FILE,

360

strerror(errno));

361

last_errno = errno;

362

ret = ECGOTHER;

363

goto unlock;

364

}

365

366

/* Determine which list we're using. */

367

if (cache)

368

lst = &rl;

369

else

370

lst = &trl;

371

372

/* If our list already exists, clean it. */

373

if (lst->head)

374

cgroup_free_rule_list(lst);

375

376

/* Now, parse the configuration file one line at a time. */

377

cgroup_dbg("Parsing configuration file.\n");

378

while (fgets(buff, sizeof(buff), fp) != NULL) {

379

linenum++;

380

381

itr = cg_skip_unused_charactors_in_rule(buff);

382

if (!itr)

383

continue;

384

385

/*

386

* If we skipped the last rule and this rule is a continuation

387

* of it (begins with %), then we should skip this rule too.

388

*/

389

if (skipped && *itr == '%') {

390

cgroup_dbg("Warning: Skipped child of invalid rule,"

391

" line %d.\n", linenum);

392

continue;

393

}

394

395

/*

396

* If there is something left, it should be a rule. Otherwise,

397

* there's an error in the configuration file.

398

*/

399

skipped = false;

400

i = sscanf(itr, "%s%s%s", key, controllers, destination);

401

if (i != 3) {

402

cgroup_dbg("Failed to parse configuration file on"

403

" line %d.\n", linenum);

404

goto parsefail;

405

}

406

procname = strchr(key, ':');

407

if (procname) {

408

/* <user>:<procname> <subsystem> <destination> */

409

procname++; /* skip ':' */

410

len_username = procname - key - 1;

411

len_procname = strlen(procname);

412

if (len_procname < 0) {

413

cgroup_dbg("Failed to parse configuration file"

414

" on line %d.\n", linenum);

415

goto parsefail;

416

}

417

} else {

418

len_username = strlen(key);

419

len_procname = 0;

420

}

421

len_username = min(len_username, sizeof(user) - 1);

422

memset(user, '\0', sizeof(user));

423

strncpy(user, key, len_username);

424

425

/*

426

* Next, check the user/group. If it's a % sign, then we

427

* are continuing another rule and UID/GID should not be

428

* reset. If it's a @, we're dealing with a GID rule. If

429

* it's a *, then we do not need to do a lookup because the

430

* rule always applies (it's a wildcard). If we're using

431

* non-cache mode and we've found a matching rule, we only

432

* continue to parse if we're looking at a child rule.

433

*/

434

if ((!cache) && matched && (strncmp(user, "%", 1) != 0)) {

435

/* If we make it here, we finished (non-cache). */

436

cgroup_dbg("Parsing of configuration file"

437

" complete.\n\n");

438

ret = -1;

439

goto close;

440

}

441

if (strncmp(user, "@", 1) == 0) {

442

/* New GID rule. */

443

itr = &(user[1]);

444

if ((grp = getgrnam(itr))) {

445

uid = CGRULE_INVALID;

446

gid = grp->gr_gid;

447

} else {

448

cgroup_dbg("Warning: Entry for %s not"

449

"found. Skipping rule on line"

450

" %d.\n", itr, linenum);

451

skipped = true;

452

continue;

453

}

454

} else if (strncmp(user, "*", 1) == 0) {

455

/* Special wildcard rule. */

456

uid = CGRULE_WILD;

457

gid = CGRULE_WILD;

458

} else if (*itr != '%') {

459

/* New UID rule. */

460

if ((pwd = getpwnam(user))) {

461

uid = pwd->pw_uid;

462

gid = CGRULE_INVALID;

463

} else {

464

cgroup_dbg("Warning: Entry for %s not"

465

"found. Skipping rule on line"

466

" %d.\n", user, linenum);

467

skipped = true;

468

continue;

469

}

470

} /* Else, we're continuing another rule (UID/GID are okay). */

471

472

/*

473

* If we are not caching rules, then we need to check for a

474

* match before doing anything else. We consider four cases:

475

* The UID matches, the GID matches, the UID is a member of the

476

* GID, or we're looking at the wildcard rule, which always

477

* matches. If none of these are true, we simply continue to

478

* the next line in the file.

479

*/

480

if (grp && muid != CGRULE_INVALID) {

481

pwd = getpwuid(muid);

482

for (i = 0; grp->gr_mem[i]; i++) {

483

if (!(strcmp(pwd->pw_name, grp->gr_mem[i])))

484

matched = true;

485

}

486

}

487

488

if (uid == muid || gid == mgid || uid == CGRULE_WILD) {

489

matched = true;

490

}

491

492

if (!cache) {

493

if (!matched)

494

continue;

495

if (len_procname) {

496

/*

497

* If there is a rule based on process name,

498

* it should be matched with mprocname.

499

*/

500

if (!mprocname) {

501

uid = CGRULE_INVALID;

502

gid = CGRULE_INVALID;

503

matched = false;

504

continue;

505

}

506

if (strcmp(mprocname, procname) &&

507

strcmp(basename(mprocname), procname)) {

508

uid = CGRULE_INVALID;

509

gid = CGRULE_INVALID;

510

matched = false;

511

continue;

512

}

513

}

514

}

515

516

/*

517

* Now, we're either caching rules or we found a match. Either

518

* way, copy everything into a new rule and push it into the

519

* list.

520

*/

521

newrule = calloc(1, sizeof(struct cgroup_rule));

522

if (!newrule) {

523

cgroup_dbg("Out of memory? Error: %s\n",

524

strerror(errno));

525

last_errno = errno;

526

ret = ECGOTHER;

527

goto close;

528

}

529

530

newrule->uid = uid;

531

newrule->gid = gid;

532

len_username = min(len_username, sizeof(newrule->username) - 1);

533

strncpy(newrule->username, user, len_username);

534

if (len_procname) {

535

newrule->procname = strdup(procname);

536

if (!newrule->procname) {

537

last_errno = errno;

538

ret = ECGOTHER;

539

goto close;

540

}

541

} else {

542

newrule->procname = NULL;

543

}

544

strncpy(newrule->destination, destination,

545

sizeof(newrule->destination) - 1);

546

newrule->next = NULL;

547

548

/* Parse the controller list, and add that to newrule too. */

549

stok_buff = strtok(controllers, ",");

550

if (!stok_buff) {

551

cgroup_dbg("Failed to parse controllers on line"

552

" %d\n", linenum);

553

goto destroyrule;

554

}

555

556

i = 0;

557

do {

558

if (i >= MAX_MNT_ELEMENTS) {

559

cgroup_dbg("Too many controllers listed"

560

" on line %d\n", linenum);

561

goto destroyrule;

562

}

563

564

newrule->controllers[i] = strndup(stok_buff,

565

strlen(stok_buff) + 1);

566

if (!(newrule->controllers[i])) {

567

cgroup_dbg("Out of memory? Error was: %s\n",

568

strerror(errno));

569

goto destroyrule;

570

}

571

i++;

572

} while ((stok_buff = strtok(NULL, ",")));

573

574

/* Now, push the rule. */

575

if (lst->head == NULL) {

576

lst->head = newrule;

577

lst->tail = newrule;

578

} else {

579

lst->tail->next = newrule;

580

lst->tail = newrule;

581

}

582

583

cgroup_dbg("Added rule %s (UID: %d, GID: %d) -> %s for"

584

" controllers:", lst->tail->username, lst->tail->uid,

585

lst->tail->gid, lst->tail->destination);

586

for (i = 0; lst->tail->controllers[i]; i++) {

587

cgroup_dbg(" %s", lst->tail->controllers[i]);

588

}

589

cgroup_dbg("\n");

590

591

/* Finally, clear the buffer. */

592

grp = NULL;

593

pwd = NULL;

594

}

595

596

/* If we make it here, there were no errors. */

597

cgroup_dbg("Parsing of configuration file complete.\n\n");

598

ret = (matched && !cache) ? -1 : 0;

599

goto close;

600

601

destroyrule:

602

cgroup_free_rule(newrule);

603

604

parsefail:

605

ret = ECGROUPPARSEFAIL;

606

607

close:

608

fclose(fp);

609

unlock:

610

pthread_rwlock_unlock(&rl_lock);

611

return ret;

612

}

613

614

/**

615

* cgroup_init(), initializes the MOUNT_POINT.

616

*

617

* This code is theoretically thread safe now. Its not really tested

618

* so it can blow up. If does for you, please let us know with your

619

* test case and we can really make it thread safe.

620

*

621

*/

622

int cgroup_init()

623

{

624

FILE *proc_mount = NULL;

625

struct mntent *ent = NULL;

626

struct mntent *temp_ent = NULL;

627

int found_mnt = 0;

628

int ret = 0;

629

static char *controllers[CG_CONTROLLER_MAX];

630

FILE *proc_cgroup = NULL;

631

char subsys_name[FILENAME_MAX];

632

int hierarchy, num_cgroups, enabled;

633

int i=0;

634

int j;

635

int duplicate = 0;

636

char *mntopt = NULL;

637

int err;

638

char *buf = NULL;

639

char mntent_buffer[4 * FILENAME_MAX];

640

char *strtok_buffer = NULL;

641

642

pthread_rwlock_wrlock(&cg_mount_table_lock);

643

644

proc_cgroup = fopen("/proc/cgroups", "r");

645

646

if (!proc_cgroup) {

647

last_errno = errno;

648

ret = ECGOTHER;

649

goto unlock_exit;

650

}

651

652

/*

653

* The first line of the file has stuff we are not interested in.

654

* So just read it and discard the information.

655

*

656

* XX: fix the size for fgets

657

*/

658

buf = malloc(FILENAME_MAX);

659

if (!buf) {

660

last_errno = errno;

661

ret = ECGOTHER;

662

goto unlock_exit;

663

}

664

if (!fgets(buf, FILENAME_MAX, proc_cgroup)) {

665

free(buf);

666

last_errno = errno;

667

ret = ECGOTHER;

668

goto unlock_exit;

669

}

670

free(buf);

671

672

while (!feof(proc_cgroup)) {

673

err = fscanf(proc_cgroup, "%s %d %d %d", subsys_name,

674

&hierarchy, &num_cgroups, &enabled);

675

if (err < 0)

676

break;

677

controllers[i] = strdup(subsys_name);

678

i++;

679

}

680

controllers[i] = NULL;

681

682

proc_mount = fopen("/proc/mounts", "r");

683

if (proc_mount == NULL) {

684

ret = ECGFAIL;

685

goto unlock_exit;

686

}

687

688

temp_ent = (struct mntent *) malloc(sizeof(struct mntent));

689

690

if (!temp_ent) {

691

last_errno = errno;

692

ret = ECGOTHER;

693

goto unlock_exit;

694

}

695

696

while ((ent = getmntent_r(proc_mount, temp_ent,

697

mntent_buffer,

698

sizeof(mntent_buffer))) != NULL) {

699

if (strcmp(ent->mnt_type, "cgroup"))

700

continue;

701

702

for (i = 0; controllers[i] != NULL; i++) {

703

mntopt = hasmntopt(ent, controllers[i]);

704

705

if (!mntopt)

706

continue;

707

708

mntopt = strtok_r(mntopt, ",", &strtok_buffer);

709

710

if (strcmp(mntopt, controllers[i]))

711

continue;

712

713

cgroup_dbg("matched %s:%s\n", mntopt, controllers[i]);

714

715

/* do not have duplicates in mount table */

716

duplicate = 0;

717

for (j = 0; j < found_mnt; j++) {

718

if (strncmp(mntopt, cg_mount_table[j].name, FILENAME_MAX)

719

== 0) {

720

duplicate = 1;

721

break;

722

}

723

}

724

if (duplicate) {

725

cgroup_dbg("controller %s is already mounted on %s\n",

726

mntopt, cg_mount_table[j].path);

727

continue;

728

}

729

730

strcpy(cg_mount_table[found_mnt].name, controllers[i]);

731

strcpy(cg_mount_table[found_mnt].path, ent->mnt_dir);

732

cgroup_dbg("Found cgroup option %s, count %d\n",

733

ent->mnt_opts, found_mnt);

734

found_mnt++;

735

}

736

}

737

738

free(temp_ent);

739

740

if (!found_mnt) {

741

cg_mount_table[0].name[0] = '\0';

742

ret = ECGROUPNOTMOUNTED;

743

goto unlock_exit;

744

}

745

746

found_mnt++;

747

cg_mount_table[found_mnt].name[0] = '\0';

748

749

cgroup_initialized = 1;

750

751

unlock_exit:

752

if (proc_cgroup)

753

fclose(proc_cgroup);

754

755

if (proc_mount)

756

fclose(proc_mount);

757

758

for (i = 0; controllers[i]; i++) {

759

free(controllers[i]);

760

controllers[i] = NULL;

761

}

762

763

pthread_rwlock_unlock(&cg_mount_table_lock);

764

765

return ret;

766

}

767

768

static int cg_test_mounted_fs()

769

{

770

FILE *proc_mount = NULL;

771

struct mntent *ent = NULL;

772

struct mntent *temp_ent = NULL;

773

char mntent_buff[4 * FILENAME_MAX];

774

int ret = 1;

775

776

proc_mount = fopen("/proc/mounts", "r");

777

if (proc_mount == NULL) {

778

return 0;

779

}

780

781

temp_ent = (struct mntent *) malloc(sizeof(struct mntent));

782

if (!temp_ent) {

783

/* We just fail at the moment. */

784

fclose(proc_mount);

785

return 0;

786

}

787

788

ent = getmntent_r(proc_mount, temp_ent, mntent_buff,

789

sizeof(mntent_buff));

790

791

if (!ent) {

792

ret = 0;

793

goto done;

794

}

795

796

while (strcmp(ent->mnt_type, "cgroup") !=0) {

797

ent = getmntent_r(proc_mount, temp_ent, mntent_buff,

798

sizeof(mntent_buff));

799

if (ent == NULL) {

800

ret = 0;

801

goto done;

802

}

803

}

804

done:

805

fclose(proc_mount);

806

free(temp_ent);

807

return ret;

808

}

809

810

static inline pid_t cg_gettid()

811

{

812

return syscall(__NR_gettid);

813

}

814

815

816

/* Call with cg_mount_table_lock taken */

817

static char *cg_build_path_locked(char *name, char *path, char *type)

818

{

819

int i;

820

for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {

821

/*

822

* XX: Change to snprintf once you figure what n should be

823

*/

824

if (strcmp(cg_mount_table[i].name, type) == 0) {

825

sprintf(path, "%s/", cg_mount_table[i].path);

826

if (name) {

827

char *tmp;

828

tmp = strdup(path);

829

sprintf(path, "%s%s/", tmp, name);

830

free(tmp);

831

}

832

return path;

833

}

834

}

835

return NULL;

836

}

837

838

char *cg_build_path(char *name, char *path, char *type)

839

{

840

pthread_rwlock_rdlock(&cg_mount_table_lock);

841

path = cg_build_path_locked(name, path, type);

842

pthread_rwlock_unlock(&cg_mount_table_lock);

843

844

return path;

845

}

846

847

static int __cgroup_attach_task_pid(char *path, pid_t tid)

848

{

849

int ret = 0;

850

FILE *tasks = NULL;

851

852

tasks = fopen(path, "w");

853

if (!tasks) {

854

switch (errno) {

855

case EPERM:

856

return ECGROUPNOTOWNER;

857

case ENOENT:

858

return ECGROUPNOTEXIST;

859

default:

860

return ECGROUPNOTALLOWED;

861

}

862

}

863

ret = fprintf(tasks, "%d", tid);

864

if (ret < 0) {

865

last_errno = errno;

866

ret = ECGOTHER;

867

goto err;

868

}

869

ret = fflush(tasks);

870

if (ret) {

871

last_errno = errno;

872

ret = ECGOTHER;

873

goto err;

874

}

875

fclose(tasks);

876

return 0;

877

err:

878

cgroup_dbg("Error writing tid %d to %s:%s\n",

879

tid, path, strerror(errno));

880

fclose(tasks);

881

return ret;

882

}

883

884

/** cgroup_attach_task_pid is used to assign tasks to a cgroup.

885

* struct cgroup *cgroup: The cgroup to assign the thread to.

886

* pid_t tid: The thread to be assigned to the cgroup.

887

*

888

* returns 0 on success.

889

* returns ECGROUPNOTOWNER if the caller does not have access to the cgroup.

890

* returns ECGROUPNOTALLOWED for other causes of failure.

891

*/

892

int cgroup_attach_task_pid(struct cgroup *cgroup, pid_t tid)

893

{

894

char path[FILENAME_MAX];

895

int i, ret = 0;

896

897

if (!cgroup_initialized) {

898

cgroup_dbg("libcgroup is not initialized\n");

899

return ECGROUPNOTINITIALIZED;

900

}

901

if(!cgroup)

902

{

903

pthread_rwlock_rdlock(&cg_mount_table_lock);

904

for(i = 0; i < CG_CONTROLLER_MAX &&

905

cg_mount_table[i].name[0]!='\0'; i++) {

906

if (!cg_build_path_locked(NULL, path,

907

cg_mount_table[i].name))

908

continue;

909

strncat(path, "/tasks", sizeof(path) - strlen(path));

910

ret = __cgroup_attach_task_pid(path, tid);

911

if (ret) {

912

pthread_rwlock_unlock(&cg_mount_table_lock);

913

return ret;

914

}

915

}

916

pthread_rwlock_unlock(&cg_mount_table_lock);

917

} else {

918

for (i = 0; i < cgroup->index; i++) {

919

if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) {

920

cgroup_dbg("subsystem %s is not mounted\n",

921

cgroup->controller[i]->name);

922

return ECGROUPSUBSYSNOTMOUNTED;

923

}

924

}

925

926

for (i = 0; i < cgroup->index; i++) {

927

if (!cg_build_path(cgroup->name, path,

928

cgroup->controller[i]->name))

929

continue;

930

strncat(path, "/tasks", sizeof(path) - strlen(path));

931

ret = __cgroup_attach_task_pid(path, tid);

932

if (ret)

933

return ret;

934

}

935

}

936

return 0;

937

}

938

939

/** cgroup_attach_task is used to attach the current thread to a cgroup.

940

* struct cgroup *cgroup: The cgroup to assign the current thread to.

941

*

942

* See cg_attach_task_pid for return values.

943

*/

944

int cgroup_attach_task(struct cgroup *cgroup)

945

{

946

pid_t tid = cg_gettid();

947

int error;

948

949

error = cgroup_attach_task_pid(cgroup, tid);

950

951

return error;

952

}

953

954

/**

955

* cg_mkdir_p, emulate the mkdir -p command (recursively creating paths)

956

* @path: path to create

957

*/

958

int cg_mkdir_p(const char *path)

959

{

960

char *real_path = NULL;

961

char *wd = NULL;

962

int i = 0, j = 0;

963

char pos;

964

char *str = NULL;

965

int ret = 0;

966

char cwd[FILENAME_MAX];

967

char *buf = NULL;

968

969

buf = getcwd(cwd, FILENAME_MAX);

970

971

if (!buf) {

972

last_errno = errno;

973

return ECGOTHER;

974

}

975

976

real_path = strdup(path);

977

if (!real_path) {

978

last_errno = errno;

979

return ECGOTHER;

980

}

981

982

do {

983

while (real_path[j] != '\0' && real_path[j] != '/')

984

j++;

985

while (real_path[j] != '\0' && real_path[j] == '/')

986

j++;

987

if (i == j)

988

continue;

989

pos = real_path[j];

990

real_path[j] = '\0'; /* Temporarily overwrite "/" */

991

str = &real_path[i];

992

ret = mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);

993

wd = strdup(str);

994

if (!wd) {

995

last_errno = errno;

996

ret = ECGOTHER;

997

break;

998

}

999

real_path[j] = pos;

1000

if (ret) {

1001

switch (errno) {

1002

case EEXIST:

1003

ret = 0; /* Not fatal really */

1004

break;

1005

case EPERM:

1006

ret = ECGROUPNOTOWNER;

1007

free(wd);

1008

goto done;

1009

default:

1010

ret = ECGROUPNOTALLOWED;

1011

free(wd);

1012

goto done;

1013

}

1014

}

1015

i = j;

1016

ret = chdir(wd);

1017

if (ret) {

1018

cgroup_dbg("could not chdir to child directory (%s)\n",

1019

wd);

1020

break;

1021

}

1022

free(wd);

1023

} while (real_path[i]);

1024

1025

ret = chdir(buf);

1026

if (ret) {

1027

last_errno = errno;

1028

ret = ECGOTHER;

1029

cgroup_dbg("could not go back to old directory (%s)\n", cwd);

1030

}

1031

1032

done:

1033

free(real_path);

1034

return ret;

1035

}

1036

1037

/*

1038

* create_control_group()

1039

* This is the basic function used to create the control group. This function

1040

* just makes the group. It does not set any permissions, or any control values.

1041

* The argument path is the fully qualified path name to make it generic.

1042

*/

1043

static int cg_create_control_group(char *path)

1044

{

1045

int error;

1046

if (!cg_test_mounted_fs())

1047

return ECGROUPNOTMOUNTED;

1048

error = cg_mkdir_p(path);

1049

return error;

1050

}

1051

1052

/*

1053

* set_control_value()

1054

* This is the low level function for putting in a value in a control file.

1055

* This function takes in the complete path and sets the value in val in that

1056

* file.

1057

*/

1058

static int cg_set_control_value(char *path, char *val)

1059

{

1060

FILE *control_file = NULL;

1061

if (!cg_test_mounted_fs())

1062

return ECGROUPNOTMOUNTED;

1063

1064

control_file = fopen(path, "r+");

1065

1066

if (!control_file) {

1067

if (errno == EPERM) {

1068

/*

1069

* We need to set the correct error value, does the

1070

* group exist but we don't have the subsystem

1071

* mounted at that point, or is it that the group

1072

* does not exist. So we check if the tasks file

1073

* exist. Before that, we need to extract the path.

1074

*/

1075

int len = strlen(path);

1076

1077

while (*(path+len) != '/')

1078

len--;

1079

*(path+len+1) = '\0';

1080

strncat(path, "tasks", sizeof(path) - strlen(path));

1081

control_file = fopen(path, "r");

1082

if (!control_file) {

1083

if (errno == ENOENT)

1084

return ECGROUPSUBSYSNOTMOUNTED;

1085

}

1086

fclose(control_file);

1087

return ECGROUPNOTALLOWED;

1088

}

1089

return ECGROUPVALUENOTEXIST;

1090

}

1091

1092

fprintf(control_file, "%s", val);

1093

fclose(control_file);

1094

return 0;

1095

}

1096

1097

/** cgroup_modify_cgroup modifies the cgroup control files.

1098

* struct cgroup *cgroup: The name will be the cgroup to be modified.

1099

* The values will be the values to be modified, those not mentioned

1100

* in the structure will not be modified.

1101

*

1102

* The uids cannot be modified yet.

1103

*

1104

* returns 0 on success.

1105

*

1106

*/

1107

1108

int cgroup_modify_cgroup(struct cgroup *cgroup)

1109

{

1110

char *path, base[FILENAME_MAX];

1111

int i;

1112

int error;

1113

int ret;

1114

1115

if (!cgroup_initialized)

1116

return ECGROUPNOTINITIALIZED;

1117

1118

if (!cgroup)

1119

return ECGROUPNOTALLOWED;

1120

1121

for (i = 0; i < cgroup->index; i++) {

1122

if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) {

1123

cgroup_dbg("subsystem %s is not mounted\n",

1124

cgroup->controller[i]->name);

1125

return ECGROUPSUBSYSNOTMOUNTED;

1126

}

1127

}

1128

1129

for (i = 0; i < cgroup->index; i++) {

1130

int j;

1131

if (!cg_build_path(cgroup->name, base,

1132

cgroup->controller[i]->name))

1133

continue;

1134

for (j = 0; j < cgroup->controller[i]->index; j++) {

1135

ret = asprintf(&path, "%s%s", base,

1136

cgroup->controller[i]->values[j]->name);

1137

if (ret < 0) {

1138

last_errno = errno;

1139

error = ECGOTHER;

1140

goto err;

1141

}

1142

error = cg_set_control_value(path,

1143

cgroup->controller[i]->values[j]->value);

1144

free(path);

1145

path = NULL;

1146

if (error)

1147

goto err;

1148

}

1149

}

1150

if (path)

1151

free(path);

1152

return 0;

1153

err:

1154

if (path)

1155

free(path);

1156

return error;

1157

1158

}

1159

1160

/**

1161

* @dst: Destination controller

1162

* @src: Source controller from which values will be copied to dst

1163

*

1164

* Create a duplicate copy of values under the specified controller

1165

*/

1166

int cgroup_copy_controller_values(struct cgroup_controller *dst,

1167

struct cgroup_controller *src)

1168

{

1169

int i, ret = 0;

1170

1171

if (!dst || !src)

1172

return ECGFAIL;

1173

1174

strncpy(dst->name, src->name, FILENAME_MAX);

1175

for (i = 0; i < src->index; i++, dst->index++) {

1176

struct control_value *src_val = src->values[i];

1177

struct control_value *dst_val;

1178

1179

dst->values[i] = calloc(1, sizeof(struct control_value));

1180

if (!dst->values[i]) {

1181

ret = ECGFAIL;

1182

goto err;

1183

}

1184

1185

dst_val = dst->values[i];

1186

strncpy(dst_val->value, src_val->value, CG_VALUE_MAX);

1187

strncpy(dst_val->name, src_val->name, FILENAME_MAX);

1188

}

1189

err:

1190

return ret;

1191

}

1192

1193

/**

1194

* @dst: Destination control group

1195

* @src: Source from which values will be copied to dst

1196

*

1197

* Create a duplicate copy of src in dst. This will be useful for those who

1198

* that intend to create new instances based on an existing control group

1199

*/

1200

int cgroup_copy_cgroup(struct cgroup *dst, struct cgroup *src)

1201

{

1202

int ret = 0, i;

1203

1204

if (!dst || !src)

1205

return ECGROUPNOTEXIST;

1206

1207

/*

1208

* Should we just use the restrict keyword instead?

1209

*/

1210

if (dst == src)

1211

return ECGFAIL;

1212

1213

cgroup_free_controllers(dst);

1214

1215

for (i = 0; i < src->index; i++, dst->index++) {

1216

struct cgroup_controller *src_ctlr = src->controller[i];

1217

struct cgroup_controller *dst_ctlr;

1218

1219

dst->controller[i] = calloc(1, sizeof(struct cgroup_controller));

1220

if (!dst->controller[i]) {

1221

ret = ECGFAIL;

1222

goto err;

1223

}

1224

1225

dst_ctlr = dst->controller[i];

1226

ret = cgroup_copy_controller_values(dst_ctlr, src_ctlr);

1227

if (ret)

1228

goto err;

1229

}

1230

err:

1231

return ret;

1232

}

1233

1234

/** cgroup_create_cgroup creates a new control group.

1235

* struct cgroup *cgroup: The control group to be created

1236

*

1237

* returns 0 on success. We recommend calling cg_delete_cgroup

1238

* if this routine fails. That should do the cleanup operation.

1239

*/

1240

int cgroup_create_cgroup(struct cgroup *cgroup, int ignore_ownership)

1241

{

1242

char *fts_path[2];

1243

char *base = NULL;

1244

char *path = NULL;

1245

int i, j, k;

1246

int error = 0;

1247

int retval = 0;

1248

int ret;

1249

1250

if (!cgroup_initialized)

1251

return ECGROUPNOTINITIALIZED;

1252

1253

if (!cgroup)

1254

return ECGROUPNOTALLOWED;

1255

1256

for (i = 0; i < cgroup->index; i++) {

1257

if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name))

1258

return ECGROUPSUBSYSNOTMOUNTED;

1259

}

1260

1261

fts_path[0] = (char *)malloc(FILENAME_MAX);

1262

if (!fts_path[0]) {

1263

last_errno = errno;

1264

return ECGOTHER;

1265

}

1266

fts_path[1] = NULL;

1267

path = fts_path[0];

1268

1269

/*

1270

* XX: One important test to be done is to check, if you have multiple

1271

* subsystems mounted at one point, all of them *have* be on the cgroup

1272

* data structure. If not, we fail.

1273

*/

1274

for (k = 0; k < cgroup->index; k++) {

1275

if (!cg_build_path(cgroup->name, path,

1276

cgroup->controller[k]->name))

1277

continue;

1278

1279

error = cg_create_control_group(path);

1280

if (error)

1281

goto err;

1282

1283

base = strdup(path);

1284

1285

if (!base) {

1286

last_errno = errno;

1287

error = ECGOTHER;

1288

goto err;

1289

}

1290

1291

if (!ignore_ownership) {

1292

cgroup_dbg("Changing ownership of %s\n", fts_path[0]);

1293

error = cg_chown_recursive(fts_path,

1294

cgroup->control_uid, cgroup->control_gid);

1295

}

1296

1297

if (error)

1298

goto err;

1299

1300

for (j = 0; j < cgroup->controller[k]->index; j++) {

1301

ret = snprintf(path, FILENAME_MAX, "%s%s", base,

1302

cgroup->controller[k]->values[j]->name);

1303

cgroup_dbg("setting %s to %s, error %d\n", path,

1304

cgroup->controller[k]->values[j]->name, ret);

1305

if (ret < 0 || ret >= FILENAME_MAX) {

1306

last_errno = errno;

1307

error = ECGOTHER;

1308

goto err;

1309

}

1310

error = cg_set_control_value(path,

1311

cgroup->controller[k]->values[j]->value);

1312

/*

1313

* Should we undo, what we've done in the loops above?

1314

* An error should not be treated as fatal, since we

1315

* have several read-only files and several files that

1316

* are only conditionally created in the child.

1317

*

1318

* A middle ground would be to track that there

1319

* was an error and return that value.

1320

*/

1321

if (error) {

1322

retval = error;

1323

continue;

1324

}

1325

}

1326

1327

if (!ignore_ownership) {

1328

ret = snprintf(path, FILENAME_MAX, "%s/tasks", base);

1329

if (ret < 0 || ret >= FILENAME_MAX) {

1330

last_errno = errno;

1331

error = ECGOTHER;

1332

goto err;

1333

}

1334

error = chown(path, cgroup->tasks_uid,

1335

cgroup->tasks_gid);

1336

if (error) {

1337

last_errno = errno;

1338

error = ECGOTHER;

1339

goto err;

1340

}

1341

}

1342

free(base);

1343

base = NULL;

1344

}

1345

1346

err:

1347

if (path)

1348

free(path);

1349

if (base)

1350

free(base);

1351

if (retval && !error)

1352

error = retval;

1353

return error;

1354

}

1355

1356

/**

1357

* Find the parent of the specified directory. It returns the parent (the

1358

* parent is usually name/.. unless name is a mount point.

1359

*/

1360

char *cgroup_find_parent(char *name)

1361

{

1362

char child[FILENAME_MAX];

1363

char *parent = NULL;

1364

struct stat stat_child, stat_parent;

1365

char *type = NULL;

1366

char *dir = NULL;

1367

1368

pthread_rwlock_rdlock(&cg_mount_table_lock);

1369

type = cg_mount_table[0].name;

1370

if (!cg_build_path_locked(name, child, type)) {

1371

pthread_rwlock_unlock(&cg_mount_table_lock);

1372

return NULL;

1373

}

1374

pthread_rwlock_unlock(&cg_mount_table_lock);

1375

1376

cgroup_dbg("path is %s\n", child);

1377

dir = dirname(child);

1378

cgroup_dbg("directory name is %s\n", dir);

1379

1380

if (asprintf(&parent, "%s/..", dir) < 0)

1381

return NULL;

1382

1383

cgroup_dbg("parent's name is %s\n", parent);

1384

1385

if (stat(dir, &stat_child) < 0)

1386

goto free_parent;

1387

1388

if (stat(parent, &stat_parent) < 0)

1389

goto free_parent;

1390

1391

/*

1392

* Is the specified "name" a mount point?

1393

*/

1394

if (stat_parent.st_dev != stat_child.st_dev) {

1395

cgroup_dbg("parent is a mount point\n");

1396

strcpy(parent, ".");

1397

} else {

1398

dir = strdup(name);

1399

if (!dir)

1400

goto free_parent;

1401

dir = dirname(dir);

1402

if (strcmp(dir, ".") == 0)

1403

strcpy(parent, "..");

1404

else

1405

strcpy(parent, dir);

1406

}

1407

1408

return parent;

1409

1410

free_parent:

1411

free(parent);

1412

return NULL;

1413

}

1414

1415

/**

1416

* @cgroup: cgroup data structure to be filled with parent values and then

1417

* passed down for creation

1418

* @ignore_ownership: Ignore doing a chown on the newly created cgroup

1419

*/

1420

int cgroup_create_cgroup_from_parent(struct cgroup *cgroup,

1421

int ignore_ownership)

1422

{

1423

char *parent = NULL;

1424

struct cgroup *parent_cgroup = NULL;

1425

int ret = ECGFAIL;

1426

1427

if (!cgroup_initialized)

1428

return ECGROUPNOTINITIALIZED;

1429

1430

parent = cgroup_find_parent(cgroup->name);

1431

if (!parent)

1432

return ret;

1433

1434

cgroup_dbg("parent is %s\n", parent);

1435

parent_cgroup = cgroup_new_cgroup(parent);

1436

if (!parent_cgroup)

1437

goto err_nomem;

1438

1439

if (cgroup_get_cgroup(parent_cgroup))

1440

goto err_parent;

1441

1442

cgroup_dbg("got parent group for %s\n", parent_cgroup->name);

1443

ret = cgroup_copy_cgroup(cgroup, parent_cgroup);

1444

if (ret)

1445

goto err_parent;

1446

1447

cgroup_dbg("copied parent group %s to %s\n", parent_cgroup->name,

1448

cgroup->name);

1449

ret = cgroup_create_cgroup(cgroup, ignore_ownership);

1450

1451

err_parent:

1452

cgroup_free(&parent_cgroup);

1453

err_nomem:

1454

free(parent);

1455

return ret;

1456

}

1457

1458

/** cgroup_delete cgroup deletes a control group.

1459

* struct cgroup *cgroup takes the group which is to be deleted.

1460

*

1461

* returns 0 on success.

1462

*/

1463

int cgroup_delete_cgroup(struct cgroup *cgroup, int ignore_migration)

1464

{

1465

FILE *delete_tasks = NULL, *base_tasks = NULL;

1466

int tids;

1467

char path[FILENAME_MAX];

1468

int error = ECGROUPNOTALLOWED;

1469

int i, ret;

1470

1471

if (!cgroup_initialized)

1472

return ECGROUPNOTINITIALIZED;

1473

1474

if (!cgroup)

1475

return ECGROUPNOTALLOWED;

1476

1477

for (i = 0; i < cgroup->index; i++) {

1478

if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name))

1479

return ECGROUPSUBSYSNOTMOUNTED;

1480

}

1481

1482

for (i = 0; i < cgroup->index; i++) {

1483

if (!cg_build_path(cgroup->name, path,

1484

cgroup->controller[i]->name))

1485

continue;

1486

strncat(path, "../tasks", sizeof(path) - strlen(path));

1487

1488

base_tasks = fopen(path, "w");

1489

if (!base_tasks)

1490

goto open_err;

1491

1492

if (!cg_build_path(cgroup->name, path,

1493

cgroup->controller[i]->name)) {

1494

fclose(base_tasks);

1495

continue;

1496

}

1497

1498

strncat(path, "tasks", sizeof(path) - strlen(path));

1499

1500

delete_tasks = fopen(path, "r");

1501

if (!delete_tasks) {

1502

fclose(base_tasks);

1503

goto open_err;

1504

}

1505

1506

while (!feof(delete_tasks)) {

1507

ret = fscanf(delete_tasks, "%d", &tids);

1508

if (ret == EOF || ret < 1)

1509

break;

1510

fprintf(base_tasks, "%d", tids);

1511

}

1512

1513

fclose(delete_tasks);

1514

fclose(base_tasks);

1515

1516

if (!cg_build_path(cgroup->name, path,

1517

cgroup->controller[i]->name))

1518

continue;

1519

error = rmdir(path);

1520

last_errno = errno;

1521

}

1522

open_err:

1523

if (ignore_migration) {

1524

for (i = 0; i < cgroup->index; i++) {

1525

if (!cg_build_path(cgroup->name, path,

1526

cgroup->controller[i]->name))

1527

continue;

1528

error = rmdir(path);

1529

if (error < 0 && errno == ENOENT) {

1530

last_errno = errno;

1531

error = 0;

1532

}

1533

}

1534

}

1535

if (error)

1536

return ECGOTHER;

1537

1538

return error;

1539

}

1540

1541

/*

1542

* This function should really have more checks, but this version

1543

* will assume that the callers have taken care of everything.

1544

* Including the locking.

1545

*/

1546

static int cg_rd_ctrl_file(char *subsys, char *cgroup, char *file, char **value)

1547

{

1548

char path[FILENAME_MAX];

1549

FILE *ctrl_file = NULL;

1550

int ret;

1551

1552

if (!cg_build_path_locked(cgroup, path, subsys))

1553

return ECGFAIL;

1554

1555

strncat(path, file, sizeof(path) - strlen(path));

1556

ctrl_file = fopen(path, "r");

1557

if (!ctrl_file)

1558

return ECGROUPVALUENOTEXIST;

1559

1560

*value = malloc(CG_VALUE_MAX);

1561

if (!*value) {

1562

last_errno = errno;

1563

return ECGOTHER;

1564

}

1565

1566

/*

1567

* using %as crashes when we try to read from files like

1568

* memory.stat

1569

*/

1570

ret = fscanf(ctrl_file, "%s", *value);

1571

if (ret == 0 || ret == EOF) {

1572

free(*value);

1573

*value = NULL;

1574

}

1575

1576

fclose(ctrl_file);

1577

1578

return 0;

1579

}

1580

1581

/*

1582

* Call this function with required locks taken.

1583

*/

1584

static int cgroup_fill_cgc(struct dirent *ctrl_dir, struct cgroup *cgroup,

1585

struct cgroup_controller *cgc, int index)

1586

{

1587

char *ctrl_name = NULL;

1588

char *ctrl_file = NULL;

1589

char *ctrl_value = NULL;

1590

char *d_name = NULL;

1591

char path[FILENAME_MAX+1];

1592

char *buffer = NULL;

1593

int error = 0;

1594

struct stat stat_buffer;

1595

1596

d_name = strdup(ctrl_dir->d_name);

1597

1598

if (!strcmp(d_name, ".") || !strcmp(d_name, "..")) {

1599

error = ECGINVAL;

1600

goto fill_error;

1601

}

1602

1603

1604

/*

1605

* This part really needs to be optimized out. Probably use

1606

* some sort of a flag, but this is fine for now.

1607

*/

1608

1609

cg_build_path_locked(cgroup->name, path, cg_mount_table[index].name);

1610

strncat(path, d_name, sizeof(path) - strlen(path));

1611

1612

error = stat(path, &stat_buffer);

1613

1614

if (error) {

1615

error = ECGFAIL;

1616

goto fill_error;

1617

}

1618

1619

cgroup->control_uid = stat_buffer.st_uid;

1620

cgroup->control_gid = stat_buffer.st_gid;

1621

1622

ctrl_name = strtok_r(d_name, ".", &buffer);

1623

1624

if (!ctrl_name) {

1625

error = ECGFAIL;

1626

goto fill_error;

1627

}

1628

1629

ctrl_file = strtok_r(NULL, ".", &buffer);

1630

1631

if (!ctrl_file) {

1632

error = ECGINVAL;

1633

goto fill_error;

1634

}

1635

1636

if (strcmp(ctrl_name, cg_mount_table[index].name) == 0) {

1637

error = cg_rd_ctrl_file(cg_mount_table[index].name,

1638

cgroup->name, ctrl_dir->d_name, &ctrl_value);

1639

if (error || !ctrl_value)

1640

goto fill_error;

1641

1642

if (cgroup_add_value_string(cgc, ctrl_dir->d_name,

1643

ctrl_value)) {

1644

error = ECGFAIL;

1645

goto fill_error;

1646

}

1647

}

1648

fill_error:

1649

if (ctrl_value)

1650

free(ctrl_value);

1651

free(d_name);

1652

return error;

1653

}

1654

1655

/*

1656

* cgroup_get_cgroup reads the cgroup data from the filesystem.

1657

* struct cgroup has the name of the group to be populated

1658

*

1659

* return 0 on success.

1660

*/

1661

int cgroup_get_cgroup(struct cgroup *cgroup)

1662

{

1663

int i;

1664

char path[FILENAME_MAX];

1665

DIR *dir = NULL;

1666

struct dirent *ctrl_dir = NULL;

1667

char *control_path = NULL;

1668

int error;

1669

int ret;

1670

1671

if (!cgroup_initialized) {

1672

/* ECGROUPNOTINITIALIZED */

1673

return ECGROUPNOTINITIALIZED;

1674

}

1675

1676

if (!cgroup) {

1677

/* ECGROUPNOTALLOWED */

1678

return ECGROUPNOTALLOWED;

1679

}

1680

1681

pthread_rwlock_rdlock(&cg_mount_table_lock);

1682

for (i = 0; i < CG_CONTROLLER_MAX &&

1683

cg_mount_table[i].name[0] != '\0'; i++) {

1684

/*

1685

* cgc will not leak, since it has to be freed using

1686

* cgroup_free_cgroup

1687

*/

1688

struct cgroup_controller *cgc;

1689

struct stat stat_buffer;

1690

int path_len;

1691

1692

if (!cg_build_path_locked(NULL, path,

1693

cg_mount_table[i].name))

1694

continue;

1695

1696

path_len = strlen(path);

1697

strncat(path, cgroup->name, FILENAME_MAX - path_len - 1);

1698

1699

if (access(path, F_OK))

1700

continue;

1701

1702

if (!cg_build_path_locked(cgroup->name, path,

1703

cg_mount_table[i].name)) {

1704

/*

1705

* This fails when the cgroup does not exist

1706

* for that controller.

1707

*/

1708

continue;

1709

}

1710

1711

/*

1712

* Get the uid and gid information

1713

*/

1714

1715

ret = asprintf(&control_path, "%s/tasks", path);

1716

1717

if (ret < 0) {

1718

last_errno = errno;

1719

error = ECGOTHER;

1720

goto unlock_error;

1721

}

1722

1723

if (stat(control_path, &stat_buffer)) {

1724

last_errno = errno;

1725

free(control_path);

1726

error = ECGOTHER;

1727

goto unlock_error;

1728

}

1729

1730

cgroup->tasks_uid = stat_buffer.st_uid;

1731

cgroup->tasks_gid = stat_buffer.st_gid;

1732

1733

free(control_path);

1734

1735

cgc = cgroup_add_controller(cgroup,

1736

cg_mount_table[i].name);

1737

if (!cgc) {

1738

error = ECGINVAL;

1739

goto unlock_error;

1740

}

1741

1742

dir = opendir(path);

1743

if (!dir) {

1744

last_errno = errno;

1745

error = ECGOTHER;

1746

goto unlock_error;

1747

}

1748

1749

while ((ctrl_dir = readdir(dir)) != NULL) {

1750

/*

1751

* Skip over non regular files

1752

*/

1753

if (ctrl_dir->d_type != DT_REG)

1754

continue;

1755

1756

error = cgroup_fill_cgc(ctrl_dir, cgroup, cgc, i);

1757

if (error == ECGFAIL) {

1758

closedir(dir);

1759

goto unlock_error;

1760

}

1761

}

1762

closedir(dir);

1763

}

1764

1765

/* Check if the group really exists or not */

1766

if (!cgroup->index) {

1767

error = ECGROUPNOTEXIST;

1768

goto unlock_error;

1769

}

1770

1771

pthread_rwlock_unlock(&cg_mount_table_lock);

1772

return 0;

1773

1774

unlock_error:

1775

pthread_rwlock_unlock(&cg_mount_table_lock);

1776

/*

1777

* XX: Need to figure out how to cleanup? Cleanup just the stuff

1778

* we added, or the whole structure.

1779

*/

1780

cgroup_free_controllers(cgroup);

1781

cgroup = NULL;

1782

return error;

1783

}

1784

1785

/** cg_prepare_cgroup

1786

* Process the selected rule. Prepare the cgroup structure which can be

1787

* used to add the task to destination cgroup.

1788

*

1789

*

1790

* returns 0 on success.

1791

*/

1792

static int cg_prepare_cgroup(struct cgroup *cgroup, pid_t pid,

1793

const char *dest,

1794

char *controllers[])

1795

{

1796

int ret = 0, i;

1797

char *controller = NULL;

1798

struct cgroup_controller *cptr = NULL;

1799

1800

/* Fill in cgroup details. */

1801

cgroup_dbg("Will move pid %d to cgroup '%s'\n", pid, dest);

1802

1803

strcpy(cgroup->name, dest);

1804

1805

/* Scan all the controllers */

1806

for (i = 0; i < CG_CONTROLLER_MAX; i++) {

1807

if (!controllers[i])

1808

return 0;

1809

controller = controllers[i];

1810

1811

/* If first string is "*" that means all the mounted

1812

* controllers. */

1813

if (strcmp(controller, "*") == 0) {

1814

pthread_rwlock_rdlock(&cg_mount_table_lock);

1815

for (i = 0; i < CG_CONTROLLER_MAX &&

1816

cg_mount_table[i].name[0] != '\0'; i++) {

1817

cgroup_dbg("Adding controller %s\n",

1818

cg_mount_table[i].name);

1819

cptr = cgroup_add_controller(cgroup,

1820

cg_mount_table[i].name);

1821

if (!cptr) {

1822

cgroup_dbg("Adding controller '%s'"

1823

" failed\n",

1824

cg_mount_table[i].name);

1825

pthread_rwlock_unlock(&cg_mount_table_lock);

1826

cgroup_free_controllers(cgroup);

1827

return ECGROUPNOTALLOWED;

1828

}

1829

}

1830

pthread_rwlock_unlock(&cg_mount_table_lock);

1831

return ret;

1832

}

1833

1834

/* it is individual controller names and not "*" */

1835

cgroup_dbg("Adding controller %s\n", controller);

1836

cptr = cgroup_add_controller(cgroup, controller);

1837

if (!cptr) {

1838

cgroup_dbg("Adding controller '%s' failed\n",

1839

controller);

1840

cgroup_free_controllers(cgroup);

1841

return ECGROUPNOTALLOWED;

1842

}

1843

}

1844

1845

return ret;

1846

}

1847

1848

static struct cgroup_rule *cgroup_find_matching_rule_uid_gid(const uid_t uid,

1849

const gid_t gid, struct cgroup_rule *rule)

1850

{

1851

/* Temporary user data */

1852

struct passwd *usr = NULL;

1853

1854

/* Temporary group data */

1855

struct group *grp = NULL;

1856

1857

/* Temporary string pointer */

1858

char *sp = NULL;

1859

1860

/* Loop variable */

1861

int i = 0;

1862

1863

while (rule) {

1864

/* Skip "%" which indicates continuation of previous rule. */

1865

if (rule->username[0] == '%') {

1866

rule = rule->next;

1867

continue;

1868

}

1869

/* The wildcard rule always matches. */

1870

if ((rule->uid == CGRULE_WILD) && (rule->gid == CGRULE_WILD))

1871

return rule;

1872

1873

/* This is the simple case of the UID matching. */

1874

if (rule->uid == uid)

1875

return rule;

1876

1877

/* This is the simple case of the GID matching. */

1878

if (rule->gid == gid)

1879

return rule;

1880

1881

/* If this is a group rule, the UID might be a member. */

1882

if (rule->username[0] == '@') {

1883

/* Get the group data. */

1884

sp = &(rule->username[1]);

1885

grp = getgrnam(sp);

1886

if (!grp)

1887

continue;

1888

1889

/* Get the data for UID. */

1890

usr = getpwuid(uid);

1891

if (!usr)

1892

continue;

1893

1894

/* If UID is a member of group, we matched. */

1895

for (i = 0; grp->gr_mem[i]; i++) {

1896

if (!(strcmp(usr->pw_name, grp->gr_mem[i])))

1897

return rule;

1898

}

1899

}

1900

1901

/* If we haven't matched, try the next rule. */

1902

rule = rule->next;

1903

}

1904

1905

/* If we get here, no rules matched. */

1906

return NULL;

1907

}

1908

1909

/**

1910

* Finds the first rule in the cached list that matches the given UID, GID

1911

* or PROCESS NAME, and returns a pointer to that rule.

1912

* This function uses rl_lock.

1913

*

1914

* This function may NOT be thread safe.

1915

* @param uid The UID to match

1916

* @param gid The GID to match

1917

* @param procname The PROCESS NAME to match

1918

* @return Pointer to the first matching rule, or NULL if no match

1919

* TODO: Determine thread-safeness and fix if not safe.

1920

*/

1921

static struct cgroup_rule *cgroup_find_matching_rule(const uid_t uid,

1922

const gid_t gid, char *procname)

1923

{

1924

/* Return value */

1925

struct cgroup_rule *ret = rl.head;

1926

1927

pthread_rwlock_wrlock(&rl_lock);

1928

while (ret) {

1929

ret = cgroup_find_matching_rule_uid_gid(uid, gid, ret);

1930

if (!ret)

1931

break;

1932

if (!procname)

1933

/* If procname is NULL, return a rule matching

1934

* UID or GID */

1935

break;

1936

if (!ret->procname)

1937

/* If no process name in a rule, that means wildcard */

1938

break;

1939

if (!strcmp(ret->procname, procname))

1940

break;

1941

if (!strcmp(ret->procname, basename(procname)))

1942

/* Check a rule of basename. */

1943

break;

1944

ret = ret->next;

1945

}

1946

pthread_rwlock_unlock(&rl_lock);

1947

1948

return ret;

1949

}

1950

1951

int cgroup_change_cgroup_flags(const uid_t uid, const gid_t gid,

1952

char *procname, const pid_t pid, const int flags)

1953

{

1954

/* Temporary pointer to a rule */

1955

struct cgroup_rule *tmp = NULL;

1956

1957

/* Return codes */

1958

int ret = 0;

1959

1960

/* We need to check this before doing anything else! */

1961

if (!cgroup_initialized) {

1962

cgroup_dbg("libcgroup is not initialized\n");

1963

ret = ECGROUPNOTINITIALIZED;

1964

goto finished;

1965

}

1966

1967

/*

1968

* If the user did not ask for cached rules, we must parse the

1969

* configuration to find a matching rule (if one exists). Else, we'll

1970

* find the first match in the cached list (rl).

1971

*/

1972

if (!(flags & CGFLAG_USECACHE)) {

1973

cgroup_dbg("Not using cached rules for PID %d.\n", pid);

1974

ret = cgroup_parse_rules(false, uid, gid, procname);

1975

1976

/* The configuration file has an error! We must exit now. */

1977

if (ret != -1 && ret != 0) {

1978

cgroup_dbg("Failed to parse the configuration"

1979

" rules.\n");

1980

goto finished;

1981

}

1982

1983

/* We did not find a matching rule, so we're done. */

1984

if (ret == 0) {

1985

cgroup_dbg("No rule found to match PID: %d, UID: %d, "

1986

"GID: %d\n", pid, uid, gid);

1987

goto finished;

1988

}

1989

1990

/* Otherwise, we did match a rule and it's in trl. */

1991

tmp = trl.head;

1992

} else {

1993

/* Find the first matching rule in the cached list. */

1994

tmp = cgroup_find_matching_rule(uid, gid, procname);

1995

if (!tmp) {

1996

cgroup_dbg("No rule found to match PID: %d, UID: %d, "

1997

"GID: %d\n", pid, uid, gid);

1998

ret = 0;

1999

goto finished;

2000

}

2001

}

2002

cgroup_dbg("Found matching rule %s for PID: %d, UID: %d, GID: %d\n",

2003

tmp->username, pid, uid, gid);

2004

2005

/* If we are here, then we found a matching rule, so execute it. */

2006

do {

2007

cgroup_dbg("Executing rule %s for PID %d... ", tmp->username,

2008

pid);

2009

ret = cgroup_change_cgroup_path(tmp->destination,

2010

pid, tmp->controllers);

2011

if (ret) {

2012

cgroup_dbg("FAILED! (Error Code: %d)\n", ret);

2013

goto finished;

2014

}

2015

cgroup_dbg("OK!\n");

2016

2017

/* Now, check for multi-line rules. As long as the "next"

2018

* rule starts with '%', it's actually part of the rule that

2019

* we just executed.

2020

*/

2021

tmp = tmp->next;

2022

} while (tmp && (tmp->username[0] == '%'));

2023

2024

finished:

2025

return ret;

2026

}

2027

2028

int cgroup_change_cgroup_uid_gid_flags(const uid_t uid, const gid_t gid,

2029

const pid_t pid, const int flags)

2030

{

2031

return cgroup_change_cgroup_flags(uid, gid, NULL, pid, flags);

2032

}

2033

2034

/**

2035

* Provides backwards-compatibility with older versions of the API. This

2036

* function is deprecated, and cgroup_change_cgroup_uid_gid_flags() should be

2037

* used instead. In fact, this function simply calls the newer one with flags

2038

* set to 0 (none).

2039

* @param uid The UID to match

2040

* @param gid The GID to match

2041

* @param pid The PID of the process to move

2042

* @return 0 on success, > 0 on error

2043

*

2044

*/

2045

int cgroup_change_cgroup_uid_gid(uid_t uid, gid_t gid, pid_t pid)

2046

{

2047

return cgroup_change_cgroup_uid_gid_flags(uid, gid, pid, 0);

2048

}

2049

2050

/**

2051

* Changes the cgroup of a program based on the path provided. In this case,

2052

* the user must already know into which cgroup the task should be placed and

2053

* no rules will be parsed.

2054

*

2055

* returns 0 on success.

2056

*/

2057

int cgroup_change_cgroup_path(char *dest, pid_t pid, char *controllers[])

2058

{

2059

int ret;

2060

struct cgroup cgroup;

2061

2062

if (!cgroup_initialized) {

2063

cgroup_dbg("libcgroup is not initialized\n");

2064

return ECGROUPNOTINITIALIZED;

2065

}

2066

memset(&cgroup, 0, sizeof(struct cgroup));

2067

2068

ret = cg_prepare_cgroup(&cgroup, pid, dest, controllers);

2069

if (ret)

2070

return ret;

2071

/* Add task to cgroup */

2072

ret = cgroup_attach_task_pid(&cgroup, pid);

2073

if (ret)

2074

cgroup_dbg("cgroup_attach_task_pid failed:%d\n", ret);

2075

cgroup_free_controllers(&cgroup);

2076

return ret;

2077

}

2078

2079

/**

2080

* Print the cached rules table. This function should be called only after

2081

* first calling cgroup_parse_config(), but it will work with an empty rule

2082

* list.

2083

* @param fp The file stream to print to

2084

*/

2085

void cgroup_print_rules_config(FILE *fp)

2086

{

2087

/* Iterator */

2088

struct cgroup_rule *itr = NULL;

2089

2090

/* Loop variable */

2091

int i = 0;

2092

2093

pthread_rwlock_rdlock(&rl_lock);

2094

2095

if (!(rl.head)) {

2096

fprintf(fp, "The rules table is empty.\n\n");

2097

pthread_rwlock_unlock(&rl_lock);

2098

return;

2099

}

2100

2101

itr = rl.head;

2102

while (itr) {

2103

fprintf(fp, "Rule: %s", itr->username);

2104

if (itr->procname)

2105

fprintf(fp, ":%s", itr->procname);

2106

fprintf(fp, "\n");

2107

2108

if (itr->uid == CGRULE_WILD)

2109

fprintf(fp, " UID: any\n");

2110

else if (itr->uid == CGRULE_INVALID)

2111

fprintf(fp, " UID: N/A\n");

2112

else

2113

fprintf(fp, " UID: %d\n", itr->uid);

2114

2115

if (itr->gid == CGRULE_WILD)

2116

fprintf(fp, " GID: any\n");

2117

else if (itr->gid == CGRULE_INVALID)

2118

fprintf(fp, " GID: N/A\n");

2119

else

2120

fprintf(fp, " GID: %d\n", itr->gid);

2121

2122

fprintf(fp, " DEST: %s\n", itr->destination);

2123

2124

fprintf(fp, " CONTROLLERS:\n");

2125

for (i = 0; i < MAX_MNT_ELEMENTS; i++) {

2126

if (itr->controllers[i]) {

2127

fprintf(fp, " %s\n", itr->controllers[i]);

2128

}

2129

}

2130

fprintf(fp, "\n");

2131

itr = itr->next;

2132

}

2133

pthread_rwlock_unlock(&rl_lock);

2134

}

2135

2136

/**

2137

* Reloads the rules list, using the given configuration file. This function

2138

* is probably NOT thread safe (calls cgroup_parse_rules()).

2139

* @return 0 on success, > 0 on failure

2140

*/

2141

int cgroup_reload_cached_rules()

2142

{

2143

/* Return codes */

2144

int ret = 0;

2145

2146

cgroup_dbg("Reloading cached rules from %s.\n", CGRULES_CONF_FILE);

2147

ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID, NULL);

2148

if (ret) {

2149

cgroup_dbg("Error parsing configuration file \"%s\": %d.\n",

2150

CGRULES_CONF_FILE, ret);

2151

ret = ECGROUPPARSEFAIL;

2152

goto finished;

2153

}

2154

2155

#ifdef CGROUP_DEBUG

2156

cgroup_print_rules_config(stdout);

2157

#endif

2158

2159

finished:

2160

return ret;

2161

}

2162

2163

/**

2164

* Initializes the rules cache.

2165

* @return 0 on success, > 0 on error

2166

*/

2167

int cgroup_init_rules_cache()

2168

{

2169

/* Return codes */

2170

int ret = 0;

2171

2172

/* Attempt to read the configuration file and cache the rules. */

2173

ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID, NULL);

2174

if (ret) {

2175

cgroup_dbg("Could not initialize rule cache, error was: %d\n",

2176

ret);

2177

cgroup_rules_loaded = false;

2178

} else {

2179

cgroup_rules_loaded = true;

2180

}

2181

2182

return ret;

2183

}

2184

2185

/**

2186

* cgroup_get_current_controller_path

2187

* @pid: pid of the current process for which the path is to be determined

2188

* @controller: name of the controller for which to determine current path

2189

* @current_path: a pointer that is filled with the value of the current

2190

* path as seen in /proc/<pid>/cgroup

2191

*/

2192

int cgroup_get_current_controller_path(pid_t pid, const char *controller,

2193

char **current_path)

2194

{

2195

char *path = NULL;

2196

int ret;

2197

FILE *pid_cgroup_fd = NULL;

2198

2199

if (!controller)

2200

return ECGOTHER;

2201

2202

if (!cgroup_initialized) {

2203

cgroup_dbg("libcgroup is not initialized\n");

2204

return ECGROUPNOTINITIALIZED;

2205

}

2206

2207

ret = asprintf(&path, "/proc/%d/cgroup", pid);

2208

if (ret <= 0) {

2209

cgroup_dbg("cannot allocate memory (/proc/pid/cgroup) ret %d\n",

2210

ret);

2211

return ret;

2212

}

2213

2214

ret = ECGROUPNOTEXIST;

2215

pid_cgroup_fd = fopen(path, "r");

2216

if (!pid_cgroup_fd)

2217

goto cleanup_path;

2218

2219

/*

2220

* Why do we grab the cg_mount_table_lock?, the reason is that

2221

* the cgroup of a pid can change via the cgroup_attach_task_pid()

2222

* call. To make sure, we return consitent and safe results,

2223

* we acquire the lock upfront. We can optimize by acquiring

2224

* and releasing the lock in the while loop, but that

2225

* will be more expensive.

2226

*/

2227

pthread_rwlock_rdlock(&cg_mount_table_lock);

2228

while (!feof(pid_cgroup_fd)) {

2229

char controllers[FILENAME_MAX];

2230

char cgroup_path[FILENAME_MAX];

2231

int num;

2232

char *savedptr;

2233

char *token;

2234

2235

ret = fscanf(pid_cgroup_fd, "%d:%[^:]:%s\n", &num, controllers,

2236

cgroup_path);

2237

/*

2238

* Magic numbers like "3" seem to be integrating into

2239

* my daily life, I need some magic to help make them

2240

* disappear :)

2241

*/

2242

if (ret != 3 || ret == EOF) {

2243

cgroup_dbg("read failed for pid_cgroup_fd ret %d\n",

2244

ret);

2245

last_errno = errno;

2246

ret = ECGOTHER;

2247

goto done;

2248

}

2249

2250

token = strtok_r(controllers, ",", &savedptr);

2251

do {

2252

if (strncmp(controller, token, strlen(controller) + 1)

2253

== 0) {

2254

*current_path = strdup(cgroup_path);

2255

if (!*current_path) {

2256

last_errno = errno;

2257

ret = ECGOTHER;

2258

goto done;

2259

}

2260

ret = 0;

2261

goto done;

2262

}

2263

token = strtok_r(NULL, ",", &savedptr);

2264

} while (token);

2265

}

2266

2267

done:

2268

pthread_rwlock_unlock(&cg_mount_table_lock);

2269

fclose(pid_cgroup_fd);

2270

cleanup_path:

2271

free(path);

2272

return ret;

2273

}

2274

2275

char *cgroup_strerror(int code)

2276

{

2277

assert((code >= ECGROUPNOTCOMPILED) && (code < ECGSENTINEL));

2278

if (code == ECGOTHER) {

2279

snprintf(errtext, MAXLEN, "%s, error message: %s",

2280

cgroup_strerror_codes[code % ECGROUPNOTCOMPILED],

2281

strerror(cgroup_get_last_errno()));

2282

return errtext;

2283

}

2284

return cgroup_strerror_codes[code % ECGROUPNOTCOMPILED];

2285

}

2286

2287

/**

2288

* Return last errno, which caused ECGOTHER error.

2289

*/

2290

int cgroup_get_last_errno()

2291

{

2292

return last_errno;

2293

}

2294

2295

2296

static int cg_walk_node(FTS *fts, FTSENT *ent, const int depth,

2297

struct cgroup_file_info *info, int dir)

2298

{

2299

int ret = 0;

2300

2301

if (!cgroup_initialized)

2302

return ECGROUPNOTINITIALIZED;

2303

2304

cgroup_dbg("seeing file %s\n", ent->fts_path);

2305

2306

info->path = ent->fts_name;

2307

info->parent = ent->fts_parent->fts_name;

2308

info->full_path = ent->fts_path;

2309

info->depth = ent->fts_level;

2310

info->type = CGROUP_FILE_TYPE_OTHER;

2311

2312

if (depth && (info->depth > depth))

2313

return 0;

2314

2315

switch (ent->fts_info) {

2316

case FTS_DNR:

2317

case FTS_ERR:

2318

errno = ent->fts_errno;

2319

break;

2320

case FTS_D:

2321

if (dir & CGROUP_WALK_TYPE_PRE_DIR)

2322

info->type = CGROUP_FILE_TYPE_DIR;

2323

break;

2324

case FTS_DC:

2325

case FTS_NSOK:

2326

case FTS_NS:

2327

case FTS_DP:

2328

if (dir & CGROUP_WALK_TYPE_POST_DIR)

2329

info->type = CGROUP_FILE_TYPE_DIR;

2330

break;

2331

case FTS_F:

2332

info->type = CGROUP_FILE_TYPE_FILE;

2333

break;

2334

case FTS_DEFAULT:

2335

break;

2336

}

2337

return ret;

2338

}

2339

2340

int cgroup_walk_tree_next(const int depth, void **handle,

2341

struct cgroup_file_info *info, int base_level)

2342

{

2343

int ret = 0;

2344

struct cgroup_tree_handle *entry;

2345

FTSENT *ent;

2346

2347

if (!cgroup_initialized)

2348

return ECGROUPNOTINITIALIZED;

2349

2350

if (!handle)

2351

return ECGINVAL;

2352

2353

entry = (struct cgroup_tree_handle *) *handle;

2354

2355

ent = fts_read(entry->fts);

2356

if (!ent)

2357

return ECGEOF;

2358

if (!base_level && depth)

2359

base_level = ent->fts_level + depth;

2360

2361

ret = cg_walk_node(entry->fts, ent, base_level, info, entry->flags);

2362

2363

*handle = entry;

2364

return ret;

2365

}

2366

2367

int cgroup_walk_tree_end(void **handle)

2368

{

2369

struct cgroup_tree_handle *entry;

2370

2371

if (!cgroup_initialized)

2372

return ECGROUPNOTINITIALIZED;

2373

2374

if (!handle)

2375

return ECGINVAL;

2376

2377

entry = (struct cgroup_tree_handle *) *handle;

2378

2379

fts_close(entry->fts);

2380

free(entry);

2381

*handle = NULL;

2382

return 0;

2383

}

2384

2385

/*

2386

* TODO: Need to decide a better place to put this function.

2387

*/

2388

int cgroup_walk_tree_begin(char *controller, char *base_path, const int depth,

2389

void **handle, struct cgroup_file_info *info,

2390

int *base_level)

2391

{

2392

int ret = 0;

2393

cgroup_dbg("path is %s\n", base_path);

2394

char *cg_path[2];

2395

char full_path[FILENAME_MAX];

2396

FTSENT *ent;

2397

struct cgroup_tree_handle *entry;

2398

2399

if (!cgroup_initialized)

2400

return ECGROUPNOTINITIALIZED;

2401

2402

if (!handle)

2403

return ECGINVAL;

2404

2405

if (!cg_build_path(base_path, full_path, controller))

2406

return ECGOTHER;

2407

2408

entry = calloc(sizeof(struct cgroup_tree_handle), 1);

2409

2410

if (!entry) {

2411

last_errno = errno;

2412

return ECGOTHER;

2413

}

2414

2415

entry->flags |= CGROUP_WALK_TYPE_PRE_DIR;

2416

2417

*base_level = 0;

2418

cg_path[0] = full_path;

2419

cg_path[1] = NULL;

2420

2421

entry->fts = fts_open(cg_path, FTS_LOGICAL | FTS_NOCHDIR |

2422

FTS_NOSTAT, NULL);

2423

ent = fts_read(entry->fts);

2424

if (!ent) {

2425

cgroup_dbg("fts_read failed\n");

2426

return ECGINVAL;

2427

}

2428

if (!*base_level && depth)

2429

*base_level = ent->fts_level + depth;

2430

2431

ret = cg_walk_node(entry->fts, ent, *base_level, info, entry->flags);

2432

2433

*handle = entry;

2434

return ret;

2435

}

2436

2437

int cgroup_walk_tree_set_flags(void **handle, int flags)

2438

{

2439

struct cgroup_tree_handle *entry;

2440

2441

if (!cgroup_initialized)

2442

return ECGROUPNOTINITIALIZED;

2443

2444

if (!handle)

2445

return ECGINVAL;

2446

2447

if ((flags & CGROUP_WALK_TYPE_PRE_DIR) &&

2448

(flags & CGROUP_WALK_TYPE_POST_DIR))

2449

return ECGINVAL;

2450

2451

entry = (struct cgroup_tree_handle *) *handle;

2452

entry->flags = flags;

2453

2454

*handle = entry;

2455

return 0;

2456

}

2457

2458

/*

2459

* This parses a stat line which is in the form of (name value) pair

2460

* separated by a space.

2461

*/

2462

int cg_read_stat(FILE *fp, struct cgroup_stat *stat)

2463

{

2464

int ret = 0;

2465

char *line = NULL;

2466

size_t len = 0;

2467

ssize_t read;

2468

char *token, *saveptr;

2469

2470

read = getline(&line, &len, fp);

2471

if (read == -1)

2472

return ECGEOF;

2473

2474

token = strtok_r(line, " ", &saveptr);

2475

if (!token) {

2476

ret = ECGINVAL;

2477

goto out_free;

2478

}

2479

strncpy(stat->name, token, FILENAME_MAX);

2480

2481

token = strtok_r(NULL, " ", &saveptr);

2482

if (!token) {

2483

ret = ECGINVAL;

2484

goto out_free;

2485

}

2486

strncpy(stat->value, token, CG_VALUE_MAX);

2487

2488

out_free:

2489

free(line);

2490

return 0;

2491

}

2492

2493

int cgroup_read_stats_end(void **handle)

2494

{

2495

FILE *fp;

2496

2497

if (!cgroup_initialized)

2498

return ECGROUPNOTINITIALIZED;

2499

2500

if (!handle)

2501

return ECGINVAL;

2502

2503

fp = (FILE *)*handle;

2504

fclose(fp);

2505

return 0;

2506

}

2507

2508

int cgroup_read_stats_next(void **handle, struct cgroup_stat *stat)

2509

{

2510

int ret = 0;

2511

FILE *fp;

2512

2513

if (!cgroup_initialized)

2514

return ECGROUPNOTINITIALIZED;

2515

2516

if (!handle || !stat)

2517

return ECGINVAL;

2518

2519

fp = (FILE *)*handle;

2520

ret = cg_read_stat(fp, stat);

2521

*handle = fp;

2522

return ret;

2523

}

2524

2525

/*

2526

* TODO: Need to decide a better place to put this function.

2527

*/

2528

int cgroup_read_stats_begin(char *controller, char *path, void **handle,

2529

struct cgroup_stat *stat)

2530

{

2531

int ret = 0;

2532

char stat_file[FILENAME_MAX];

2533

FILE *fp;

2534

2535

if (!cgroup_initialized)

2536

return ECGROUPNOTINITIALIZED;

2537

2538

if (!stat || !handle)

2539

return ECGINVAL;

2540

2541

if (!cg_build_path(path, stat_file, controller))

2542

return ECGOTHER;

2543

2544

sprintf(stat_file, "%s/%s.stat", stat_file, controller);

2545

2546

fp = fopen(stat_file, "r");

2547

if (!fp) {

2548

cgroup_dbg("fopen failed\n");

2549

return ECGINVAL;

2550

}

2551

2552

ret = cg_read_stat(fp, stat);

2553

*handle = fp;

2554

return ret;

2555

}

2556

2557

int cgroup_get_task_end(void **handle)

2558

{

2559

if (!cgroup_initialized)

2560

return ECGROUPNOTINITIALIZED;

2561

2562

if (!*handle)

2563

return ECGINVAL;

2564

2565

fclose((FILE *) *handle);

2566

*handle = NULL;

2567

2568

return 0;

2569

}

2570

2571

int cgroup_get_task_next(void **handle, pid_t *pid)

2572

{

2573

int ret;

2574

2575

if (!cgroup_initialized)

2576

return ECGROUPNOTINITIALIZED;

2577

2578

if (!handle)

2579

return ECGINVAL;

2580

2581

ret = fscanf((FILE *) *handle, "%u", pid);

2582

2583

if (ret != 1) {

2584

if (ret == EOF)

2585

return ECGEOF;

2586

last_errno = errno;

2587

return ECGOTHER;

2588

}

2589

2590

return 0;

2591

}

2592

2593

int cgroup_get_task_begin(char *cgroup, char *controller, void **handle,

2594

pid_t *pid)

2595

{

2596

int ret = 0;

2597

char path[FILENAME_MAX];

2598

char *fullpath = NULL;

2599

2600

if (!cgroup_initialized)

2601

return ECGROUPNOTINITIALIZED;

2602

2603

if (!cg_build_path(cgroup, path, controller))

2604

return ECGOTHER;

2605

2606

ret = asprintf(&fullpath, "%s/tasks", path);

2607

2608

if (ret < 0) {

2609

last_errno = errno;

2610

return ECGOTHER;

2611

}

2612

2613

*handle = (void *) fopen(fullpath, "r");

2614

free(fullpath);

2615

2616

if (!*handle) {

2617

last_errno = errno;

2618

return ECGOTHER;

2619

}

2620

ret = cgroup_get_task_next(handle, pid);

2621

2622

return ret;

2623

}

2624

2625

2626

int cgroup_get_controller_end(void **handle)

2627

{

2628

int *pos = (int *) *handle;

2629

2630

if (!cgroup_initialized)

2631

return ECGROUPNOTINITIALIZED;

2632

2633

if (!pos)

2634

return ECGINVAL;

2635

2636

free(pos);

2637

*handle = NULL;

2638

2639

return 0;

2640

}

2641

2642

int cgroup_get_controller_next(void **handle, struct cgroup_mount_point *info)

2643

{

2644

int *pos = (int *) *handle;

2645

int ret = 0;

2646

2647

if (!cgroup_initialized)

2648

return ECGROUPNOTINITIALIZED;

2649

2650

if (!pos)

2651

return ECGINVAL;

2652

2653

if (!info)

2654

return ECGINVAL;

2655

2656

pthread_rwlock_rdlock(&cg_mount_table_lock);

2657

2658

if (cg_mount_table[*pos].name[0] == '\0') {

2659

ret = ECGEOF;

2660

goto out_unlock;

2661

}

2662

2663

strncpy(info->name, cg_mount_table[*pos].name, FILENAME_MAX);

2664

2665

strncpy(info->path, cg_mount_table[*pos].path, FILENAME_MAX);

2666

2667

(*pos)++;

2668

*handle = pos;

2669

2670

out_unlock:

2671

pthread_rwlock_unlock(&cg_mount_table_lock);

2672

return ret;

2673

}

2674

2675

int cgroup_get_controller_begin(void **handle, struct cgroup_mount_point *info)

2676

{

2677

int *pos;

2678

2679

if (!cgroup_initialized)

2680

return ECGROUPNOTINITIALIZED;

2681

2682

if (!info)

2683

return ECGINVAL;

2684

2685

pos = malloc(sizeof(int));

2686

2687

if (!pos) {

2688

last_errno = errno;

2689

return ECGOTHER;

2690

}

2691

2692

*pos = 0;

2693

2694

*handle = pos;

2695

2696

return cgroup_get_controller_next(handle, info);

2697

}

2698

2699

/**

2700

* Get process data (euid and egid) from /proc/<pid>/status file.

2701

* @param pid: The process id

2702

* @param euid: The uid of param pid

2703

* @param egid: The gid of param pid

2704

* @return 0 on success, > 0 on error.

2705

*/

2706

int cgroup_get_uid_gid_from_procfs(pid_t pid, uid_t *euid, gid_t *egid)

2707

{

2708

FILE *f;

2709

char path[FILENAME_MAX];

2710

char buf[4092];

2711

uid_t ruid, suid, fsuid;

2712

gid_t rgid, sgid, fsgid;

2713

bool found_euid = false;

2714

bool found_egid = false;

2715

2716

sprintf(path, "/proc/%d/status", pid);

2717

f = fopen(path, "r");

2718

if (!f)

2719

return ECGROUPNOTEXIST;

2720

2721

while (fgets(buf, sizeof(buf), f)) {

2722

if (!strncmp(buf, "Uid:", 4)) {

2723

if (sscanf((buf + strlen("Uid:") + 1), "%d%d%d%d",

2724

&ruid, euid, &suid, &fsuid) != 4)

2725

break;

2726

cgroup_dbg("Scanned proc values are %d %d %d %d\n",

2727

ruid, *euid, suid, fsuid);

2728

found_euid = true;

2729

} else if (!strncmp(buf, "Gid:", 4)) {

2730

if (sscanf((buf + strlen("Gid:") + 1), "%d%d%d%d",

2731

&rgid, egid, &sgid, &fsgid) != 4)

2732

break;

2733

cgroup_dbg("Scanned proc values are %d %d %d %d\n",

2734

rgid, *egid, sgid, fsgid);

2735

found_egid = true;

2736

}

2737

if (found_euid && found_egid)

2738

break;

2739

}

2740

fclose(f);

2741

if (!found_euid || !found_egid) {

2742

/*

2743

* This method doesn't match the file format of

2744

* /proc/<pid>/status. The format has been changed

2745

* and we should catch up the change.

2746

*/

2747

cgroup_dbg("The invlid file format of /proc/%d/status.\n", pid);

2748

return ECGFAIL;

2749

}

2750

return 0;

2751

}

2752

2753

/**

2754

* Get process name from /proc/<pid>/status file.

2755

* @param pid: The process id

2756

* @param pname_status : The process name

2757

* @return 0 on success, > 0 on error.

2758

*/

2759

static int cg_get_procname_from_proc_status(pid_t pid, char **procname_status)

2760

{

2761

int ret = ECGFAIL;

2762

int len;

2763

FILE *f;

2764

char path[FILENAME_MAX];

2765

char buf[4092];

2766

2767

sprintf(path, "/proc/%d/status", pid);

2768

f = fopen(path, "r");

2769

if (!f)

2770

return ECGROUPNOTEXIST;

2771

2772

while (fgets(buf, sizeof(buf), f)) {

2773

if (!strncmp(buf, "Name:", 5)) {

2774

len = strlen(buf);

2775

if (buf[len - 1] == '\n')

2776

buf[len - 1] = '\0';

2777

*procname_status = strdup(buf + strlen("Name:") + 1);

2778

if (*procname_status == NULL) {

2779

last_errno = errno;

2780

ret = ECGOTHER;

2781

break;

2782

}

2783

ret = 0;

2784

break;

2785

}

2786

}

2787

fclose(f);

2788

return ret;

2789

}

2790

2791

/**

2792

* Get process name from /proc/<pid>/cmdline file.

2793

* This function is mainly for getting a script name (shell, perl,

2794

* etc). A script name is written into the second or later argument

2795

* of /proc/<pid>/cmdline. This function gets each argument and

2796

* compares it to a process name taken from /proc/<pid>/status.

2797

* @param pid: The process id

2798

* @param pname_status : The process name taken from /proc/<pid>/status

2799

* @param pname_cmdline: The process name taken from /proc/<pid>/cmdline

2800

* @return 0 on success, > 0 on error.

2801

*/

2802

static int cg_get_procname_from_proc_cmdline(pid_t pid, char *pname_status,

2803

char **pname_cmdline)

2804

{

2805

FILE *f;

2806

int ret = ECGFAIL;

2807

int c = 0;

2808

int len = 0;

2809

char path[FILENAME_MAX];

2810

char buf_pname[FILENAME_MAX];

2811

char buf_cwd[FILENAME_MAX];

2812

2813

memset(buf_cwd, '\0', sizeof(buf_cwd));

2814

sprintf(path, "/proc/%d/cwd", pid);

2815

if (readlink(path, buf_cwd, sizeof(buf_cwd)) < 0)

2816

return ECGROUPNOTEXIST;

2817

2818

sprintf(path, "/proc/%d/cmdline", pid);

2819

f = fopen(path, "r");

2820

if (!f)

2821

return ECGROUPNOTEXIST;

2822

2823

while (c != EOF) {

2824

c = fgetc(f);

2825

if ((c != EOF) && (c != '\0')) {

2826

buf_pname[len] = c;

2827

len++;

2828

continue;

2829

}

2830

buf_pname[len] = '\0';

2831

2832

/*

2833

* The taken process name from /proc/<pid>/status is

2834

* shortened to 15 characters if it is over. So the

2835

* name should be compared by its length.

2836

*/

2837

if (strncmp(pname_status, basename(buf_pname),

2838

TASK_COMM_LEN - 1)) {

2839

len = 0;

2840

continue;

2841

}

2842

if (buf_pname[0] == '/') {

2843

*pname_cmdline = strdup(buf_pname);

2844

if (*pname_cmdline == NULL) {

2845

last_errno = errno;

2846

ret = ECGOTHER;

2847

break;

2848

}

2849

ret = 0;

2850

break;

2851

} else {

2852

strcat(buf_cwd, "/");

2853

strcat(buf_cwd, buf_pname);

2854

if (!realpath(buf_cwd, path)) {

2855

last_errno = errno;

2856

ret = ECGOTHER;

2857

break;

2858

}

2859

*pname_cmdline = strdup(path);

2860

if (*pname_cmdline == NULL) {

2861

last_errno = errno;

2862

ret = ECGOTHER;

2863

break;

2864

}

2865

ret = 0;

2866

break;

2867

}

2868

len = 0;

2869

}

2870

fclose(f);

2871

return ret;

2872

}

2873

2874

/**

2875

* Get a process name from /proc file system.

2876

* This function allocates memory for a process name, writes a process

2877

* name onto it. So a caller should free the memory when unusing it.

2878

* @param pid: The process id

2879

* @param procname: The process name

2880

* @return 0 on success, > 0 on error.

2881

*/

2882

int cgroup_get_procname_from_procfs(pid_t pid, char **procname)

2883

{

2884

int ret;

2885

char *pname_status;

2886

char *pname_cmdline;

2887

char path[FILENAME_MAX];

2888

char buf[FILENAME_MAX];

2889

2890

ret = cg_get_procname_from_proc_status(pid, &pname_status);

2891

if (ret)

2892

return ret;

2893

2894

/*

2895

* Get the full patch of process name from /proc/<pid>/exe.

2896

*/

2897

memset(buf, '\0', sizeof(buf));

2898

sprintf(path, "/proc/%d/exe", pid);

2899

if (readlink(path, buf, sizeof(buf)) < 0) {

2900

/*

2901

* readlink() fails if a kernel thread, and a process

2902

* name is taken from /proc/<pid>/status.

2903

*/

2904

*procname = pname_status;

2905

return 0;

2906

}

2907

if (!strncmp(pname_status, basename(buf), TASK_COMM_LEN - 1)) {

2908

/*

2909

* The taken process name from /proc/<pid>/status is

2910

* shortened to 15 characters if it is over. So the

2911

* name should be compared by its length.

2912

*/

2913

free(pname_status);

2914

*procname = strdup(buf);

2915

if (*procname == NULL) {

2916

last_errno = errno;

2917

return ECGOTHER;

2918

}

2919

return 0;

2920

}

2921

2922

/*

2923

* The above strncmp() is not 0 if a shell script, because

2924

* /proc/<pid>/exe links a shell command (/bin/bash etc.)

2925

* and the pname_status represents a shell script name.

2926

* Then the full path of a shell script is taken from

2927

* /proc/<pid>/cmdline.

2928

*/

2929

ret = cg_get_procname_from_proc_cmdline(pid, pname_status,

2930

&pname_cmdline);

2931

if (!ret)

2932

*procname = pname_cmdline;

2933

2934

free(pname_status);

2935

return ret;

2936

}

2937

2938

int cgroup_register_unchanged_process(pid_t pid, int flags)

2939

{

2940

int sk;

2941

int ret = 1;

2942

char buff[sizeof(CGRULE_SUCCESS_STORE_PID)];

2943

struct sockaddr_un addr;

2944

2945

sk = socket(PF_UNIX, SOCK_STREAM, 0);

2946

if (sk < 0)

2947

return 1;

2948

2949

bzero((char *)&addr, sizeof(addr));

2950

addr.sun_family = AF_UNIX;

2951

strcpy(addr.sun_path, CGRULE_CGRED_SOCKET_PATH);

2952

2953

if (connect(sk, (struct sockaddr *)&addr,

2954

sizeof(addr.sun_family) + strlen(CGRULE_CGRED_SOCKET_PATH)) < 0) {

2955

/* If the daemon does not work, this function returns 0

2956

* as success. */

2957

ret = 0;

2958

goto close;

2959

}

2960

if (write(sk, &pid, sizeof(pid)) < 0)

2961

goto close;

2962

2963

if (write(sk, &flags, sizeof(flags)) < 0)

2964

goto close;

2965

2966

if (read(sk, buff, sizeof(buff)) < 0)

2967

goto close;

2968

2969

if (strncmp(buff, CGRULE_SUCCESS_STORE_PID, sizeof(buff)))

2970

goto close;

2971

2972

ret = 0;

2973

close:

2974

close(sk);

2975

return ret;

2976

}

2977

2978

int cgroup_get_subsys_mount_point(char *controller, char **mount_point)

2979

{

2980

int i;

2981

int ret = ECGROUPNOTEXIST;

2982

2983

if (!cgroup_initialized)

2984

return ECGROUPNOTINITIALIZED;

2985

2986

pthread_rwlock_rdlock(&cg_mount_table_lock);

2987

for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {

2988

if (strncmp(cg_mount_table[i].name, controller, FILENAME_MAX))

2989

continue;

2990

2991

*mount_point = strdup(cg_mount_table[i].path);

2992

2993

if (!*mount_point) {

2994

last_errno = errno;

2995

ret = ECGOTHER;

2996

goto out_exit;

2997

}

2998

2999

ret = 0;

3000

break;

3001

}

3002

out_exit:

3003

pthread_rwlock_unlock(&cg_mount_table_lock);

3004

return ret;

3005

}