2
* Copyright IBM Corporation. 2007
4
* Author: Dhaval Giani <dhaval@linux.vnet.ibm.com>
5
* Author: Balbir Singh <balbir@linux.vnet.ibm.com>
7
* This program is free software; you can redistribute it and/or modify it
8
* under the terms of version 2.1 of the GNU Lesser General Public License
9
* as published by the Free Software Foundation.
11
* This program is distributed in the hope that it would be useful, but
12
* WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16
* 1. Convert comments to Docbook style.
17
* 2. Add more APIs for the control groups.
18
* 3. Handle the configuration related APIs.
21
* Code initiated and designed by Dhaval Giani. All faults are most likely
24
* Bharata B Rao <bharata@linux.vnet.ibm.com> is willing is take blame
25
* for mistakes in APIs for reading statistics.
34
#include <libcgroup.h>
35
#include <libcgroup-internal.h>
41
#include <sys/types.h>
43
#include <sys/socket.h>
45
#include <sys/syscall.h>
54
#ifndef PACKAGE_VERSION
55
#define PACKAGE_VERSION 0.01
58
#define VERSION(ver) #ver
61
* The errno which happend the last time (have to be thread specific)
63
__thread int last_errno;
67
/* the value have to be thread specific */
68
__thread char errtext[MAXLEN];
70
/* Task command name length */
71
#define TASK_COMM_LEN 16
74
* Remember to bump this up for major API changes.
76
const static char cg_version[] = VERSION(PACKAGE_VERSION);
78
struct cg_mount_table_s cg_mount_table[CG_CONTROLLER_MAX];
79
static pthread_rwlock_t cg_mount_table_lock = PTHREAD_RWLOCK_INITIALIZER;
81
/* Check if cgroup_init has been called or not. */
82
static int cgroup_initialized;
84
/* Check if the rules cache has been loaded or not. */
85
static bool cgroup_rules_loaded;
87
/* List of configuration rules */
88
static struct cgroup_rule_list rl;
90
/* Temporary list of configuration rules (for non-cache apps) */
91
static struct cgroup_rule_list trl;
93
/* Lock for the list of rules (rl) */
94
static pthread_rwlock_t rl_lock = PTHREAD_RWLOCK_INITIALIZER;
96
char *cgroup_strerror_codes[] = {
97
"Cgroup is not compiled in",
98
"Cgroup is not mounted",
99
"Cgroup does not exist",
100
"Cgroup has not been created",
101
"Cgroup one of the needed subsystems is not mounted",
102
"Cgroup, request came in from non owner",
103
"Cgroup controllers controllers are bound to different mount points",
104
"Cgroup, operation not allowed",
105
"Cgroup value set exceeds maximum",
106
"Cgroup controller already exists",
107
"Cgroup value already exists",
108
"Cgroup invalid operation",
109
"Cgroup, creation of controller failed",
110
"Cgroup operation failed",
111
"Cgroup not initialized",
112
"Cgroup trying to set value for control that does not exist",
113
"Cgroup generic error",
114
"Cgroup values are not equal",
115
"Cgroup controllers are different",
116
"Cgroup parsing failed",
117
"Cgroup, rules file does not exist",
118
"Cgroup mounting failed",
119
"The config file can not be opend",
120
"End of File or iterator",
123
static int cg_chown_file(FTS *fts, FTSENT *ent, uid_t owner, gid_t group)
126
const char *filename = fts->fts_path;
127
cgroup_dbg("seeing file %s\n", filename);
128
switch (ent->fts_info) {
130
errno = ent->fts_errno;
138
ret = chown(filename, owner, group);
141
ret = chmod(filename, S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP |
142
S_IWGRP | S_IXGRP | S_IROTH | S_IXOTH);
146
ret = chown(filename, owner, group);
149
ret = chmod(filename, S_IRUSR | S_IWUSR | S_IRGRP |
162
* TODO: Need to decide a better place to put this function.
164
static int cg_chown_recursive(char **path, uid_t owner, gid_t group)
167
cgroup_dbg("path is %s\n", *path);
168
FTS *fts = fts_open(path, FTS_PHYSICAL | FTS_NOCHDIR |
174
cgroup_dbg("fts_read failed\n");
177
ret = cg_chown_file(fts, ent, owner, group);
183
static int cgroup_test_subsys_mounted(const char *name)
187
pthread_rwlock_rdlock(&cg_mount_table_lock);
189
for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {
190
if (strncmp(cg_mount_table[i].name, name,
191
sizeof(cg_mount_table[i].name)) == 0) {
192
pthread_rwlock_unlock(&cg_mount_table_lock);
196
pthread_rwlock_unlock(&cg_mount_table_lock);
201
* Free a single cgroup_rule struct.
202
* @param r The rule to free from memory
204
static void cgroup_free_rule(struct cgroup_rule *r)
209
/* Make sure our rule is not NULL, first. */
211
cgroup_dbg("Warning: Attempted to free NULL rule.\n");
218
/* We must free any used controller strings, too. */
219
for(i = 0; i < MAX_MNT_ELEMENTS; i++) {
220
if (r->controllers[i])
221
free(r->controllers[i]);
228
* Free a list of cgroup_rule structs. If rl is the main list of rules,
229
* the lock must be taken for writing before calling this function!
230
* @param rl Pointer to the list of rules to free from memory
232
static void cgroup_free_rule_list(struct cgroup_rule_list *rl)
234
/* Temporary pointer */
235
struct cgroup_rule *tmp = NULL;
237
/* Make sure we're not freeing NULL memory! */
239
cgroup_dbg("Warning: Attempted to free NULL list.\n");
245
rl->head = tmp->next;
246
cgroup_free_rule(tmp);
249
/* Don't leave wild pointers around! */
254
static char *cg_skip_unused_charactors_in_rule(char *rule)
258
/* We ignore anything after a # sign as comments. */
259
itr = strchr(rule, '#');
263
/* We also need to remove the newline character. */
264
itr = strchr(rule, '\n');
268
/* Now, skip any leading tabs and spaces. */
270
while (itr && isblank(*itr))
273
/* If there's nothing left, we can ignore this line. */
281
* Parse the configuration file that maps UID/GIDs to cgroups. If ever the
282
* configuration file is modified, applications should call this function to
283
* load the new configuration rules. The function caller is responsible for
284
* calling free() on each rule in the list.
286
* The cache parameter alters the behavior of this function. If true, this
287
* function will read the entire configuration file and store the results in
288
* rl (global rules list). If false, this function will only parse until it
289
* finds a rule matching the given UID or GID. It will store this rule in rl,
290
* as well as any children rules (rules that begin with a %) that it has.
292
* This function is NOT thread safe!
293
* @param cache True to cache rules, else false
294
* @param muid If cache is false, the UID to match against
295
* @param mgid If cache is false, the GID to match against
296
* @return 0 on success, -1 if no cache and match found, > 0 on error.
297
* TODO: Make this function thread safe!
299
static int cgroup_parse_rules(bool cache, uid_t muid,
300
gid_t mgid, char *mprocname)
302
/* File descriptor for the configuration file */
305
/* Buffer to store the line we're working on */
306
char buff[CGROUP_RULE_MAXLINE] = { '\0' };
308
/* Iterator for the line we're working on */
311
/* Pointer to process name in a line of the configuration file */
312
char *procname = NULL;
314
/* Pointer to the list that we're using */
315
struct cgroup_rule_list *lst = NULL;
317
/* Rule to add to the list */
318
struct cgroup_rule *newrule = NULL;
320
/* Structure to get GID from group name */
321
struct group *grp = NULL;
323
/* Structure to get UID from user name */
324
struct passwd *pwd = NULL;
326
/* Temporary storage for a configuration rule */
327
char key[CGROUP_RULE_MAXKEY] = { '\0' };
328
char user[LOGIN_NAME_MAX] = { '\0' };
329
char controllers[CG_CONTROLLER_MAX] = { '\0' };
330
char destination[FILENAME_MAX] = { '\0' };
331
uid_t uid = CGRULE_INVALID;
332
gid_t gid = CGRULE_INVALID;
336
/* The current line number */
337
unsigned int linenum = 0;
339
/* Did we skip the previous line? */
340
bool skipped = false;
342
/* Have we found a matching rule (non-cache mode)? */
343
bool matched = false;
348
/* Temporary buffer for strtok() */
349
char *stok_buff = NULL;
354
/* Open the configuration file. */
355
pthread_rwlock_wrlock(&rl_lock);
356
fp = fopen(CGRULES_CONF_FILE, "r");
358
cgroup_dbg("Failed to open configuration file %s with"
359
" error: %s\n", CGRULES_CONF_FILE,
366
/* Determine which list we're using. */
372
/* If our list already exists, clean it. */
374
cgroup_free_rule_list(lst);
376
/* Now, parse the configuration file one line at a time. */
377
cgroup_dbg("Parsing configuration file.\n");
378
while (fgets(buff, sizeof(buff), fp) != NULL) {
381
itr = cg_skip_unused_charactors_in_rule(buff);
386
* If we skipped the last rule and this rule is a continuation
387
* of it (begins with %), then we should skip this rule too.
389
if (skipped && *itr == '%') {
390
cgroup_dbg("Warning: Skipped child of invalid rule,"
391
" line %d.\n", linenum);
396
* If there is something left, it should be a rule. Otherwise,
397
* there's an error in the configuration file.
400
i = sscanf(itr, "%s%s%s", key, controllers, destination);
402
cgroup_dbg("Failed to parse configuration file on"
403
" line %d.\n", linenum);
406
procname = strchr(key, ':');
408
/* <user>:<procname> <subsystem> <destination> */
409
procname++; /* skip ':' */
410
len_username = procname - key - 1;
411
len_procname = strlen(procname);
412
if (len_procname < 0) {
413
cgroup_dbg("Failed to parse configuration file"
414
" on line %d.\n", linenum);
418
len_username = strlen(key);
421
len_username = min(len_username, sizeof(user) - 1);
422
memset(user, '\0', sizeof(user));
423
strncpy(user, key, len_username);
426
* Next, check the user/group. If it's a % sign, then we
427
* are continuing another rule and UID/GID should not be
428
* reset. If it's a @, we're dealing with a GID rule. If
429
* it's a *, then we do not need to do a lookup because the
430
* rule always applies (it's a wildcard). If we're using
431
* non-cache mode and we've found a matching rule, we only
432
* continue to parse if we're looking at a child rule.
434
if ((!cache) && matched && (strncmp(user, "%", 1) != 0)) {
435
/* If we make it here, we finished (non-cache). */
436
cgroup_dbg("Parsing of configuration file"
441
if (strncmp(user, "@", 1) == 0) {
444
if ((grp = getgrnam(itr))) {
445
uid = CGRULE_INVALID;
448
cgroup_dbg("Warning: Entry for %s not"
449
"found. Skipping rule on line"
450
" %d.\n", itr, linenum);
454
} else if (strncmp(user, "*", 1) == 0) {
455
/* Special wildcard rule. */
458
} else if (*itr != '%') {
460
if ((pwd = getpwnam(user))) {
462
gid = CGRULE_INVALID;
464
cgroup_dbg("Warning: Entry for %s not"
465
"found. Skipping rule on line"
466
" %d.\n", user, linenum);
470
} /* Else, we're continuing another rule (UID/GID are okay). */
473
* If we are not caching rules, then we need to check for a
474
* match before doing anything else. We consider four cases:
475
* The UID matches, the GID matches, the UID is a member of the
476
* GID, or we're looking at the wildcard rule, which always
477
* matches. If none of these are true, we simply continue to
478
* the next line in the file.
480
if (grp && muid != CGRULE_INVALID) {
481
pwd = getpwuid(muid);
482
for (i = 0; grp->gr_mem[i]; i++) {
483
if (!(strcmp(pwd->pw_name, grp->gr_mem[i])))
488
if (uid == muid || gid == mgid || uid == CGRULE_WILD) {
497
* If there is a rule based on process name,
498
* it should be matched with mprocname.
501
uid = CGRULE_INVALID;
502
gid = CGRULE_INVALID;
506
if (strcmp(mprocname, procname) &&
507
strcmp(basename(mprocname), procname)) {
508
uid = CGRULE_INVALID;
509
gid = CGRULE_INVALID;
517
* Now, we're either caching rules or we found a match. Either
518
* way, copy everything into a new rule and push it into the
521
newrule = calloc(1, sizeof(struct cgroup_rule));
523
cgroup_dbg("Out of memory? Error: %s\n",
532
len_username = min(len_username, sizeof(newrule->username) - 1);
533
strncpy(newrule->username, user, len_username);
535
newrule->procname = strdup(procname);
536
if (!newrule->procname) {
542
newrule->procname = NULL;
544
strncpy(newrule->destination, destination,
545
sizeof(newrule->destination) - 1);
546
newrule->next = NULL;
548
/* Parse the controller list, and add that to newrule too. */
549
stok_buff = strtok(controllers, ",");
551
cgroup_dbg("Failed to parse controllers on line"
558
if (i >= MAX_MNT_ELEMENTS) {
559
cgroup_dbg("Too many controllers listed"
560
" on line %d\n", linenum);
564
newrule->controllers[i] = strndup(stok_buff,
565
strlen(stok_buff) + 1);
566
if (!(newrule->controllers[i])) {
567
cgroup_dbg("Out of memory? Error was: %s\n",
572
} while ((stok_buff = strtok(NULL, ",")));
574
/* Now, push the rule. */
575
if (lst->head == NULL) {
579
lst->tail->next = newrule;
583
cgroup_dbg("Added rule %s (UID: %d, GID: %d) -> %s for"
584
" controllers:", lst->tail->username, lst->tail->uid,
585
lst->tail->gid, lst->tail->destination);
586
for (i = 0; lst->tail->controllers[i]; i++) {
587
cgroup_dbg(" %s", lst->tail->controllers[i]);
591
/* Finally, clear the buffer. */
596
/* If we make it here, there were no errors. */
597
cgroup_dbg("Parsing of configuration file complete.\n\n");
598
ret = (matched && !cache) ? -1 : 0;
602
cgroup_free_rule(newrule);
605
ret = ECGROUPPARSEFAIL;
610
pthread_rwlock_unlock(&rl_lock);
615
* cgroup_init(), initializes the MOUNT_POINT.
617
* This code is theoretically thread safe now. Its not really tested
618
* so it can blow up. If does for you, please let us know with your
619
* test case and we can really make it thread safe.
624
FILE *proc_mount = NULL;
625
struct mntent *ent = NULL;
626
struct mntent *temp_ent = NULL;
629
static char *controllers[CG_CONTROLLER_MAX];
630
FILE *proc_cgroup = NULL;
631
char subsys_name[FILENAME_MAX];
632
int hierarchy, num_cgroups, enabled;
639
char mntent_buffer[4 * FILENAME_MAX];
640
char *strtok_buffer = NULL;
642
pthread_rwlock_wrlock(&cg_mount_table_lock);
644
proc_cgroup = fopen("/proc/cgroups", "r");
653
* The first line of the file has stuff we are not interested in.
654
* So just read it and discard the information.
656
* XX: fix the size for fgets
658
buf = malloc(FILENAME_MAX);
664
if (!fgets(buf, FILENAME_MAX, proc_cgroup)) {
672
while (!feof(proc_cgroup)) {
673
err = fscanf(proc_cgroup, "%s %d %d %d", subsys_name,
674
&hierarchy, &num_cgroups, &enabled);
677
controllers[i] = strdup(subsys_name);
680
controllers[i] = NULL;
682
proc_mount = fopen("/proc/mounts", "r");
683
if (proc_mount == NULL) {
688
temp_ent = (struct mntent *) malloc(sizeof(struct mntent));
696
while ((ent = getmntent_r(proc_mount, temp_ent,
698
sizeof(mntent_buffer))) != NULL) {
699
if (strcmp(ent->mnt_type, "cgroup"))
702
for (i = 0; controllers[i] != NULL; i++) {
703
mntopt = hasmntopt(ent, controllers[i]);
708
mntopt = strtok_r(mntopt, ",", &strtok_buffer);
710
if (strcmp(mntopt, controllers[i]))
713
cgroup_dbg("matched %s:%s\n", mntopt, controllers[i]);
715
/* do not have duplicates in mount table */
717
for (j = 0; j < found_mnt; j++) {
718
if (strncmp(mntopt, cg_mount_table[j].name, FILENAME_MAX)
725
cgroup_dbg("controller %s is already mounted on %s\n",
726
mntopt, cg_mount_table[j].path);
730
strcpy(cg_mount_table[found_mnt].name, controllers[i]);
731
strcpy(cg_mount_table[found_mnt].path, ent->mnt_dir);
732
cgroup_dbg("Found cgroup option %s, count %d\n",
733
ent->mnt_opts, found_mnt);
741
cg_mount_table[0].name[0] = '\0';
742
ret = ECGROUPNOTMOUNTED;
747
cg_mount_table[found_mnt].name[0] = '\0';
749
cgroup_initialized = 1;
758
for (i = 0; controllers[i]; i++) {
759
free(controllers[i]);
760
controllers[i] = NULL;
763
pthread_rwlock_unlock(&cg_mount_table_lock);
768
static int cg_test_mounted_fs()
770
FILE *proc_mount = NULL;
771
struct mntent *ent = NULL;
772
struct mntent *temp_ent = NULL;
773
char mntent_buff[4 * FILENAME_MAX];
776
proc_mount = fopen("/proc/mounts", "r");
777
if (proc_mount == NULL) {
781
temp_ent = (struct mntent *) malloc(sizeof(struct mntent));
783
/* We just fail at the moment. */
788
ent = getmntent_r(proc_mount, temp_ent, mntent_buff,
789
sizeof(mntent_buff));
796
while (strcmp(ent->mnt_type, "cgroup") !=0) {
797
ent = getmntent_r(proc_mount, temp_ent, mntent_buff,
798
sizeof(mntent_buff));
810
static inline pid_t cg_gettid()
812
return syscall(__NR_gettid);
816
/* Call with cg_mount_table_lock taken */
817
static char *cg_build_path_locked(char *name, char *path, char *type)
820
for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {
822
* XX: Change to snprintf once you figure what n should be
824
if (strcmp(cg_mount_table[i].name, type) == 0) {
825
sprintf(path, "%s/", cg_mount_table[i].path);
829
sprintf(path, "%s%s/", tmp, name);
838
char *cg_build_path(char *name, char *path, char *type)
840
pthread_rwlock_rdlock(&cg_mount_table_lock);
841
path = cg_build_path_locked(name, path, type);
842
pthread_rwlock_unlock(&cg_mount_table_lock);
847
static int __cgroup_attach_task_pid(char *path, pid_t tid)
852
tasks = fopen(path, "w");
856
return ECGROUPNOTOWNER;
858
return ECGROUPNOTEXIST;
860
return ECGROUPNOTALLOWED;
863
ret = fprintf(tasks, "%d", tid);
878
cgroup_dbg("Error writing tid %d to %s:%s\n",
879
tid, path, strerror(errno));
884
/** cgroup_attach_task_pid is used to assign tasks to a cgroup.
885
* struct cgroup *cgroup: The cgroup to assign the thread to.
886
* pid_t tid: The thread to be assigned to the cgroup.
888
* returns 0 on success.
889
* returns ECGROUPNOTOWNER if the caller does not have access to the cgroup.
890
* returns ECGROUPNOTALLOWED for other causes of failure.
892
int cgroup_attach_task_pid(struct cgroup *cgroup, pid_t tid)
894
char path[FILENAME_MAX];
897
if (!cgroup_initialized) {
898
cgroup_dbg("libcgroup is not initialized\n");
899
return ECGROUPNOTINITIALIZED;
903
pthread_rwlock_rdlock(&cg_mount_table_lock);
904
for(i = 0; i < CG_CONTROLLER_MAX &&
905
cg_mount_table[i].name[0]!='\0'; i++) {
906
if (!cg_build_path_locked(NULL, path,
907
cg_mount_table[i].name))
909
strncat(path, "/tasks", sizeof(path) - strlen(path));
910
ret = __cgroup_attach_task_pid(path, tid);
912
pthread_rwlock_unlock(&cg_mount_table_lock);
916
pthread_rwlock_unlock(&cg_mount_table_lock);
918
for (i = 0; i < cgroup->index; i++) {
919
if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) {
920
cgroup_dbg("subsystem %s is not mounted\n",
921
cgroup->controller[i]->name);
922
return ECGROUPSUBSYSNOTMOUNTED;
926
for (i = 0; i < cgroup->index; i++) {
927
if (!cg_build_path(cgroup->name, path,
928
cgroup->controller[i]->name))
930
strncat(path, "/tasks", sizeof(path) - strlen(path));
931
ret = __cgroup_attach_task_pid(path, tid);
939
/** cgroup_attach_task is used to attach the current thread to a cgroup.
940
* struct cgroup *cgroup: The cgroup to assign the current thread to.
942
* See cg_attach_task_pid for return values.
944
int cgroup_attach_task(struct cgroup *cgroup)
946
pid_t tid = cg_gettid();
949
error = cgroup_attach_task_pid(cgroup, tid);
955
* cg_mkdir_p, emulate the mkdir -p command (recursively creating paths)
956
* @path: path to create
958
int cg_mkdir_p(const char *path)
960
char *real_path = NULL;
966
char cwd[FILENAME_MAX];
969
buf = getcwd(cwd, FILENAME_MAX);
976
real_path = strdup(path);
983
while (real_path[j] != '\0' && real_path[j] != '/')
985
while (real_path[j] != '\0' && real_path[j] == '/')
990
real_path[j] = '\0'; /* Temporarily overwrite "/" */
992
ret = mkdir(str, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
1003
ret = 0; /* Not fatal really */
1006
ret = ECGROUPNOTOWNER;
1010
ret = ECGROUPNOTALLOWED;
1018
cgroup_dbg("could not chdir to child directory (%s)\n",
1023
} while (real_path[i]);
1029
cgroup_dbg("could not go back to old directory (%s)\n", cwd);
1038
* create_control_group()
1039
* This is the basic function used to create the control group. This function
1040
* just makes the group. It does not set any permissions, or any control values.
1041
* The argument path is the fully qualified path name to make it generic.
1043
static int cg_create_control_group(char *path)
1046
if (!cg_test_mounted_fs())
1047
return ECGROUPNOTMOUNTED;
1048
error = cg_mkdir_p(path);
1053
* set_control_value()
1054
* This is the low level function for putting in a value in a control file.
1055
* This function takes in the complete path and sets the value in val in that
1058
static int cg_set_control_value(char *path, char *val)
1060
FILE *control_file = NULL;
1061
if (!cg_test_mounted_fs())
1062
return ECGROUPNOTMOUNTED;
1064
control_file = fopen(path, "r+");
1066
if (!control_file) {
1067
if (errno == EPERM) {
1069
* We need to set the correct error value, does the
1070
* group exist but we don't have the subsystem
1071
* mounted at that point, or is it that the group
1072
* does not exist. So we check if the tasks file
1073
* exist. Before that, we need to extract the path.
1075
int len = strlen(path);
1077
while (*(path+len) != '/')
1079
*(path+len+1) = '\0';
1080
strncat(path, "tasks", sizeof(path) - strlen(path));
1081
control_file = fopen(path, "r");
1082
if (!control_file) {
1083
if (errno == ENOENT)
1084
return ECGROUPSUBSYSNOTMOUNTED;
1086
fclose(control_file);
1087
return ECGROUPNOTALLOWED;
1089
return ECGROUPVALUENOTEXIST;
1092
fprintf(control_file, "%s", val);
1093
fclose(control_file);
1097
/** cgroup_modify_cgroup modifies the cgroup control files.
1098
* struct cgroup *cgroup: The name will be the cgroup to be modified.
1099
* The values will be the values to be modified, those not mentioned
1100
* in the structure will not be modified.
1102
* The uids cannot be modified yet.
1104
* returns 0 on success.
1108
int cgroup_modify_cgroup(struct cgroup *cgroup)
1110
char *path, base[FILENAME_MAX];
1115
if (!cgroup_initialized)
1116
return ECGROUPNOTINITIALIZED;
1119
return ECGROUPNOTALLOWED;
1121
for (i = 0; i < cgroup->index; i++) {
1122
if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name)) {
1123
cgroup_dbg("subsystem %s is not mounted\n",
1124
cgroup->controller[i]->name);
1125
return ECGROUPSUBSYSNOTMOUNTED;
1129
for (i = 0; i < cgroup->index; i++) {
1131
if (!cg_build_path(cgroup->name, base,
1132
cgroup->controller[i]->name))
1134
for (j = 0; j < cgroup->controller[i]->index; j++) {
1135
ret = asprintf(&path, "%s%s", base,
1136
cgroup->controller[i]->values[j]->name);
1142
error = cg_set_control_value(path,
1143
cgroup->controller[i]->values[j]->value);
1161
* @dst: Destination controller
1162
* @src: Source controller from which values will be copied to dst
1164
* Create a duplicate copy of values under the specified controller
1166
int cgroup_copy_controller_values(struct cgroup_controller *dst,
1167
struct cgroup_controller *src)
1174
strncpy(dst->name, src->name, FILENAME_MAX);
1175
for (i = 0; i < src->index; i++, dst->index++) {
1176
struct control_value *src_val = src->values[i];
1177
struct control_value *dst_val;
1179
dst->values[i] = calloc(1, sizeof(struct control_value));
1180
if (!dst->values[i]) {
1185
dst_val = dst->values[i];
1186
strncpy(dst_val->value, src_val->value, CG_VALUE_MAX);
1187
strncpy(dst_val->name, src_val->name, FILENAME_MAX);
1194
* @dst: Destination control group
1195
* @src: Source from which values will be copied to dst
1197
* Create a duplicate copy of src in dst. This will be useful for those who
1198
* that intend to create new instances based on an existing control group
1200
int cgroup_copy_cgroup(struct cgroup *dst, struct cgroup *src)
1205
return ECGROUPNOTEXIST;
1208
* Should we just use the restrict keyword instead?
1213
cgroup_free_controllers(dst);
1215
for (i = 0; i < src->index; i++, dst->index++) {
1216
struct cgroup_controller *src_ctlr = src->controller[i];
1217
struct cgroup_controller *dst_ctlr;
1219
dst->controller[i] = calloc(1, sizeof(struct cgroup_controller));
1220
if (!dst->controller[i]) {
1225
dst_ctlr = dst->controller[i];
1226
ret = cgroup_copy_controller_values(dst_ctlr, src_ctlr);
1234
/** cgroup_create_cgroup creates a new control group.
1235
* struct cgroup *cgroup: The control group to be created
1237
* returns 0 on success. We recommend calling cg_delete_cgroup
1238
* if this routine fails. That should do the cleanup operation.
1240
int cgroup_create_cgroup(struct cgroup *cgroup, int ignore_ownership)
1250
if (!cgroup_initialized)
1251
return ECGROUPNOTINITIALIZED;
1254
return ECGROUPNOTALLOWED;
1256
for (i = 0; i < cgroup->index; i++) {
1257
if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name))
1258
return ECGROUPSUBSYSNOTMOUNTED;
1261
fts_path[0] = (char *)malloc(FILENAME_MAX);
1270
* XX: One important test to be done is to check, if you have multiple
1271
* subsystems mounted at one point, all of them *have* be on the cgroup
1272
* data structure. If not, we fail.
1274
for (k = 0; k < cgroup->index; k++) {
1275
if (!cg_build_path(cgroup->name, path,
1276
cgroup->controller[k]->name))
1279
error = cg_create_control_group(path);
1283
base = strdup(path);
1291
if (!ignore_ownership) {
1292
cgroup_dbg("Changing ownership of %s\n", fts_path[0]);
1293
error = cg_chown_recursive(fts_path,
1294
cgroup->control_uid, cgroup->control_gid);
1300
for (j = 0; j < cgroup->controller[k]->index; j++) {
1301
ret = snprintf(path, FILENAME_MAX, "%s%s", base,
1302
cgroup->controller[k]->values[j]->name);
1303
cgroup_dbg("setting %s to %s, error %d\n", path,
1304
cgroup->controller[k]->values[j]->name, ret);
1305
if (ret < 0 || ret >= FILENAME_MAX) {
1310
error = cg_set_control_value(path,
1311
cgroup->controller[k]->values[j]->value);
1313
* Should we undo, what we've done in the loops above?
1314
* An error should not be treated as fatal, since we
1315
* have several read-only files and several files that
1316
* are only conditionally created in the child.
1318
* A middle ground would be to track that there
1319
* was an error and return that value.
1327
if (!ignore_ownership) {
1328
ret = snprintf(path, FILENAME_MAX, "%s/tasks", base);
1329
if (ret < 0 || ret >= FILENAME_MAX) {
1334
error = chown(path, cgroup->tasks_uid,
1351
if (retval && !error)
1357
* Find the parent of the specified directory. It returns the parent (the
1358
* parent is usually name/.. unless name is a mount point.
1360
char *cgroup_find_parent(char *name)
1362
char child[FILENAME_MAX];
1363
char *parent = NULL;
1364
struct stat stat_child, stat_parent;
1368
pthread_rwlock_rdlock(&cg_mount_table_lock);
1369
type = cg_mount_table[0].name;
1370
if (!cg_build_path_locked(name, child, type)) {
1371
pthread_rwlock_unlock(&cg_mount_table_lock);
1374
pthread_rwlock_unlock(&cg_mount_table_lock);
1376
cgroup_dbg("path is %s\n", child);
1377
dir = dirname(child);
1378
cgroup_dbg("directory name is %s\n", dir);
1380
if (asprintf(&parent, "%s/..", dir) < 0)
1383
cgroup_dbg("parent's name is %s\n", parent);
1385
if (stat(dir, &stat_child) < 0)
1388
if (stat(parent, &stat_parent) < 0)
1392
* Is the specified "name" a mount point?
1394
if (stat_parent.st_dev != stat_child.st_dev) {
1395
cgroup_dbg("parent is a mount point\n");
1396
strcpy(parent, ".");
1402
if (strcmp(dir, ".") == 0)
1403
strcpy(parent, "..");
1405
strcpy(parent, dir);
1416
* @cgroup: cgroup data structure to be filled with parent values and then
1417
* passed down for creation
1418
* @ignore_ownership: Ignore doing a chown on the newly created cgroup
1420
int cgroup_create_cgroup_from_parent(struct cgroup *cgroup,
1421
int ignore_ownership)
1423
char *parent = NULL;
1424
struct cgroup *parent_cgroup = NULL;
1427
if (!cgroup_initialized)
1428
return ECGROUPNOTINITIALIZED;
1430
parent = cgroup_find_parent(cgroup->name);
1434
cgroup_dbg("parent is %s\n", parent);
1435
parent_cgroup = cgroup_new_cgroup(parent);
1439
if (cgroup_get_cgroup(parent_cgroup))
1442
cgroup_dbg("got parent group for %s\n", parent_cgroup->name);
1443
ret = cgroup_copy_cgroup(cgroup, parent_cgroup);
1447
cgroup_dbg("copied parent group %s to %s\n", parent_cgroup->name,
1449
ret = cgroup_create_cgroup(cgroup, ignore_ownership);
1452
cgroup_free(&parent_cgroup);
1458
/** cgroup_delete cgroup deletes a control group.
1459
* struct cgroup *cgroup takes the group which is to be deleted.
1461
* returns 0 on success.
1463
int cgroup_delete_cgroup(struct cgroup *cgroup, int ignore_migration)
1465
FILE *delete_tasks = NULL, *base_tasks = NULL;
1467
char path[FILENAME_MAX];
1468
int error = ECGROUPNOTALLOWED;
1471
if (!cgroup_initialized)
1472
return ECGROUPNOTINITIALIZED;
1475
return ECGROUPNOTALLOWED;
1477
for (i = 0; i < cgroup->index; i++) {
1478
if (!cgroup_test_subsys_mounted(cgroup->controller[i]->name))
1479
return ECGROUPSUBSYSNOTMOUNTED;
1482
for (i = 0; i < cgroup->index; i++) {
1483
if (!cg_build_path(cgroup->name, path,
1484
cgroup->controller[i]->name))
1486
strncat(path, "../tasks", sizeof(path) - strlen(path));
1488
base_tasks = fopen(path, "w");
1492
if (!cg_build_path(cgroup->name, path,
1493
cgroup->controller[i]->name)) {
1498
strncat(path, "tasks", sizeof(path) - strlen(path));
1500
delete_tasks = fopen(path, "r");
1501
if (!delete_tasks) {
1506
while (!feof(delete_tasks)) {
1507
ret = fscanf(delete_tasks, "%d", &tids);
1508
if (ret == EOF || ret < 1)
1510
fprintf(base_tasks, "%d", tids);
1513
fclose(delete_tasks);
1516
if (!cg_build_path(cgroup->name, path,
1517
cgroup->controller[i]->name))
1519
error = rmdir(path);
1523
if (ignore_migration) {
1524
for (i = 0; i < cgroup->index; i++) {
1525
if (!cg_build_path(cgroup->name, path,
1526
cgroup->controller[i]->name))
1528
error = rmdir(path);
1529
if (error < 0 && errno == ENOENT) {
1542
* This function should really have more checks, but this version
1543
* will assume that the callers have taken care of everything.
1544
* Including the locking.
1546
static int cg_rd_ctrl_file(char *subsys, char *cgroup, char *file, char **value)
1548
char path[FILENAME_MAX];
1549
FILE *ctrl_file = NULL;
1552
if (!cg_build_path_locked(cgroup, path, subsys))
1555
strncat(path, file, sizeof(path) - strlen(path));
1556
ctrl_file = fopen(path, "r");
1558
return ECGROUPVALUENOTEXIST;
1560
*value = malloc(CG_VALUE_MAX);
1567
* using %as crashes when we try to read from files like
1570
ret = fscanf(ctrl_file, "%s", *value);
1571
if (ret == 0 || ret == EOF) {
1582
* Call this function with required locks taken.
1584
static int cgroup_fill_cgc(struct dirent *ctrl_dir, struct cgroup *cgroup,
1585
struct cgroup_controller *cgc, int index)
1587
char *ctrl_name = NULL;
1588
char *ctrl_file = NULL;
1589
char *ctrl_value = NULL;
1590
char *d_name = NULL;
1591
char path[FILENAME_MAX+1];
1592
char *buffer = NULL;
1594
struct stat stat_buffer;
1596
d_name = strdup(ctrl_dir->d_name);
1598
if (!strcmp(d_name, ".") || !strcmp(d_name, "..")) {
1605
* This part really needs to be optimized out. Probably use
1606
* some sort of a flag, but this is fine for now.
1609
cg_build_path_locked(cgroup->name, path, cg_mount_table[index].name);
1610
strncat(path, d_name, sizeof(path) - strlen(path));
1612
error = stat(path, &stat_buffer);
1619
cgroup->control_uid = stat_buffer.st_uid;
1620
cgroup->control_gid = stat_buffer.st_gid;
1622
ctrl_name = strtok_r(d_name, ".", &buffer);
1629
ctrl_file = strtok_r(NULL, ".", &buffer);
1636
if (strcmp(ctrl_name, cg_mount_table[index].name) == 0) {
1637
error = cg_rd_ctrl_file(cg_mount_table[index].name,
1638
cgroup->name, ctrl_dir->d_name, &ctrl_value);
1639
if (error || !ctrl_value)
1642
if (cgroup_add_value_string(cgc, ctrl_dir->d_name,
1656
* cgroup_get_cgroup reads the cgroup data from the filesystem.
1657
* struct cgroup has the name of the group to be populated
1659
* return 0 on success.
1661
int cgroup_get_cgroup(struct cgroup *cgroup)
1664
char path[FILENAME_MAX];
1666
struct dirent *ctrl_dir = NULL;
1667
char *control_path = NULL;
1671
if (!cgroup_initialized) {
1672
/* ECGROUPNOTINITIALIZED */
1673
return ECGROUPNOTINITIALIZED;
1677
/* ECGROUPNOTALLOWED */
1678
return ECGROUPNOTALLOWED;
1681
pthread_rwlock_rdlock(&cg_mount_table_lock);
1682
for (i = 0; i < CG_CONTROLLER_MAX &&
1683
cg_mount_table[i].name[0] != '\0'; i++) {
1685
* cgc will not leak, since it has to be freed using
1686
* cgroup_free_cgroup
1688
struct cgroup_controller *cgc;
1689
struct stat stat_buffer;
1692
if (!cg_build_path_locked(NULL, path,
1693
cg_mount_table[i].name))
1696
path_len = strlen(path);
1697
strncat(path, cgroup->name, FILENAME_MAX - path_len - 1);
1699
if (access(path, F_OK))
1702
if (!cg_build_path_locked(cgroup->name, path,
1703
cg_mount_table[i].name)) {
1705
* This fails when the cgroup does not exist
1706
* for that controller.
1712
* Get the uid and gid information
1715
ret = asprintf(&control_path, "%s/tasks", path);
1723
if (stat(control_path, &stat_buffer)) {
1730
cgroup->tasks_uid = stat_buffer.st_uid;
1731
cgroup->tasks_gid = stat_buffer.st_gid;
1735
cgc = cgroup_add_controller(cgroup,
1736
cg_mount_table[i].name);
1742
dir = opendir(path);
1749
while ((ctrl_dir = readdir(dir)) != NULL) {
1751
* Skip over non regular files
1753
if (ctrl_dir->d_type != DT_REG)
1756
error = cgroup_fill_cgc(ctrl_dir, cgroup, cgc, i);
1757
if (error == ECGFAIL) {
1765
/* Check if the group really exists or not */
1766
if (!cgroup->index) {
1767
error = ECGROUPNOTEXIST;
1771
pthread_rwlock_unlock(&cg_mount_table_lock);
1775
pthread_rwlock_unlock(&cg_mount_table_lock);
1777
* XX: Need to figure out how to cleanup? Cleanup just the stuff
1778
* we added, or the whole structure.
1780
cgroup_free_controllers(cgroup);
1785
/** cg_prepare_cgroup
1786
* Process the selected rule. Prepare the cgroup structure which can be
1787
* used to add the task to destination cgroup.
1790
* returns 0 on success.
1792
static int cg_prepare_cgroup(struct cgroup *cgroup, pid_t pid,
1794
char *controllers[])
1797
char *controller = NULL;
1798
struct cgroup_controller *cptr = NULL;
1800
/* Fill in cgroup details. */
1801
cgroup_dbg("Will move pid %d to cgroup '%s'\n", pid, dest);
1803
strcpy(cgroup->name, dest);
1805
/* Scan all the controllers */
1806
for (i = 0; i < CG_CONTROLLER_MAX; i++) {
1807
if (!controllers[i])
1809
controller = controllers[i];
1811
/* If first string is "*" that means all the mounted
1813
if (strcmp(controller, "*") == 0) {
1814
pthread_rwlock_rdlock(&cg_mount_table_lock);
1815
for (i = 0; i < CG_CONTROLLER_MAX &&
1816
cg_mount_table[i].name[0] != '\0'; i++) {
1817
cgroup_dbg("Adding controller %s\n",
1818
cg_mount_table[i].name);
1819
cptr = cgroup_add_controller(cgroup,
1820
cg_mount_table[i].name);
1822
cgroup_dbg("Adding controller '%s'"
1824
cg_mount_table[i].name);
1825
pthread_rwlock_unlock(&cg_mount_table_lock);
1826
cgroup_free_controllers(cgroup);
1827
return ECGROUPNOTALLOWED;
1830
pthread_rwlock_unlock(&cg_mount_table_lock);
1834
/* it is individual controller names and not "*" */
1835
cgroup_dbg("Adding controller %s\n", controller);
1836
cptr = cgroup_add_controller(cgroup, controller);
1838
cgroup_dbg("Adding controller '%s' failed\n",
1840
cgroup_free_controllers(cgroup);
1841
return ECGROUPNOTALLOWED;
1848
static struct cgroup_rule *cgroup_find_matching_rule_uid_gid(const uid_t uid,
1849
const gid_t gid, struct cgroup_rule *rule)
1851
/* Temporary user data */
1852
struct passwd *usr = NULL;
1854
/* Temporary group data */
1855
struct group *grp = NULL;
1857
/* Temporary string pointer */
1864
/* Skip "%" which indicates continuation of previous rule. */
1865
if (rule->username[0] == '%') {
1869
/* The wildcard rule always matches. */
1870
if ((rule->uid == CGRULE_WILD) && (rule->gid == CGRULE_WILD))
1873
/* This is the simple case of the UID matching. */
1874
if (rule->uid == uid)
1877
/* This is the simple case of the GID matching. */
1878
if (rule->gid == gid)
1881
/* If this is a group rule, the UID might be a member. */
1882
if (rule->username[0] == '@') {
1883
/* Get the group data. */
1884
sp = &(rule->username[1]);
1889
/* Get the data for UID. */
1890
usr = getpwuid(uid);
1894
/* If UID is a member of group, we matched. */
1895
for (i = 0; grp->gr_mem[i]; i++) {
1896
if (!(strcmp(usr->pw_name, grp->gr_mem[i])))
1901
/* If we haven't matched, try the next rule. */
1905
/* If we get here, no rules matched. */
1910
* Finds the first rule in the cached list that matches the given UID, GID
1911
* or PROCESS NAME, and returns a pointer to that rule.
1912
* This function uses rl_lock.
1914
* This function may NOT be thread safe.
1915
* @param uid The UID to match
1916
* @param gid The GID to match
1917
* @param procname The PROCESS NAME to match
1918
* @return Pointer to the first matching rule, or NULL if no match
1919
* TODO: Determine thread-safeness and fix if not safe.
1921
static struct cgroup_rule *cgroup_find_matching_rule(const uid_t uid,
1922
const gid_t gid, char *procname)
1925
struct cgroup_rule *ret = rl.head;
1927
pthread_rwlock_wrlock(&rl_lock);
1929
ret = cgroup_find_matching_rule_uid_gid(uid, gid, ret);
1933
/* If procname is NULL, return a rule matching
1937
/* If no process name in a rule, that means wildcard */
1939
if (!strcmp(ret->procname, procname))
1941
if (!strcmp(ret->procname, basename(procname)))
1942
/* Check a rule of basename. */
1946
pthread_rwlock_unlock(&rl_lock);
1951
int cgroup_change_cgroup_flags(const uid_t uid, const gid_t gid,
1952
char *procname, const pid_t pid, const int flags)
1954
/* Temporary pointer to a rule */
1955
struct cgroup_rule *tmp = NULL;
1960
/* We need to check this before doing anything else! */
1961
if (!cgroup_initialized) {
1962
cgroup_dbg("libcgroup is not initialized\n");
1963
ret = ECGROUPNOTINITIALIZED;
1968
* If the user did not ask for cached rules, we must parse the
1969
* configuration to find a matching rule (if one exists). Else, we'll
1970
* find the first match in the cached list (rl).
1972
if (!(flags & CGFLAG_USECACHE)) {
1973
cgroup_dbg("Not using cached rules for PID %d.\n", pid);
1974
ret = cgroup_parse_rules(false, uid, gid, procname);
1976
/* The configuration file has an error! We must exit now. */
1977
if (ret != -1 && ret != 0) {
1978
cgroup_dbg("Failed to parse the configuration"
1983
/* We did not find a matching rule, so we're done. */
1985
cgroup_dbg("No rule found to match PID: %d, UID: %d, "
1986
"GID: %d\n", pid, uid, gid);
1990
/* Otherwise, we did match a rule and it's in trl. */
1993
/* Find the first matching rule in the cached list. */
1994
tmp = cgroup_find_matching_rule(uid, gid, procname);
1996
cgroup_dbg("No rule found to match PID: %d, UID: %d, "
1997
"GID: %d\n", pid, uid, gid);
2002
cgroup_dbg("Found matching rule %s for PID: %d, UID: %d, GID: %d\n",
2003
tmp->username, pid, uid, gid);
2005
/* If we are here, then we found a matching rule, so execute it. */
2007
cgroup_dbg("Executing rule %s for PID %d... ", tmp->username,
2009
ret = cgroup_change_cgroup_path(tmp->destination,
2010
pid, tmp->controllers);
2012
cgroup_dbg("FAILED! (Error Code: %d)\n", ret);
2015
cgroup_dbg("OK!\n");
2017
/* Now, check for multi-line rules. As long as the "next"
2018
* rule starts with '%', it's actually part of the rule that
2022
} while (tmp && (tmp->username[0] == '%'));
2028
int cgroup_change_cgroup_uid_gid_flags(const uid_t uid, const gid_t gid,
2029
const pid_t pid, const int flags)
2031
return cgroup_change_cgroup_flags(uid, gid, NULL, pid, flags);
2035
* Provides backwards-compatibility with older versions of the API. This
2036
* function is deprecated, and cgroup_change_cgroup_uid_gid_flags() should be
2037
* used instead. In fact, this function simply calls the newer one with flags
2039
* @param uid The UID to match
2040
* @param gid The GID to match
2041
* @param pid The PID of the process to move
2042
* @return 0 on success, > 0 on error
2045
int cgroup_change_cgroup_uid_gid(uid_t uid, gid_t gid, pid_t pid)
2047
return cgroup_change_cgroup_uid_gid_flags(uid, gid, pid, 0);
2051
* Changes the cgroup of a program based on the path provided. In this case,
2052
* the user must already know into which cgroup the task should be placed and
2053
* no rules will be parsed.
2055
* returns 0 on success.
2057
int cgroup_change_cgroup_path(char *dest, pid_t pid, char *controllers[])
2060
struct cgroup cgroup;
2062
if (!cgroup_initialized) {
2063
cgroup_dbg("libcgroup is not initialized\n");
2064
return ECGROUPNOTINITIALIZED;
2066
memset(&cgroup, 0, sizeof(struct cgroup));
2068
ret = cg_prepare_cgroup(&cgroup, pid, dest, controllers);
2071
/* Add task to cgroup */
2072
ret = cgroup_attach_task_pid(&cgroup, pid);
2074
cgroup_dbg("cgroup_attach_task_pid failed:%d\n", ret);
2075
cgroup_free_controllers(&cgroup);
2080
* Print the cached rules table. This function should be called only after
2081
* first calling cgroup_parse_config(), but it will work with an empty rule
2083
* @param fp The file stream to print to
2085
void cgroup_print_rules_config(FILE *fp)
2088
struct cgroup_rule *itr = NULL;
2093
pthread_rwlock_rdlock(&rl_lock);
2096
fprintf(fp, "The rules table is empty.\n\n");
2097
pthread_rwlock_unlock(&rl_lock);
2103
fprintf(fp, "Rule: %s", itr->username);
2105
fprintf(fp, ":%s", itr->procname);
2108
if (itr->uid == CGRULE_WILD)
2109
fprintf(fp, " UID: any\n");
2110
else if (itr->uid == CGRULE_INVALID)
2111
fprintf(fp, " UID: N/A\n");
2113
fprintf(fp, " UID: %d\n", itr->uid);
2115
if (itr->gid == CGRULE_WILD)
2116
fprintf(fp, " GID: any\n");
2117
else if (itr->gid == CGRULE_INVALID)
2118
fprintf(fp, " GID: N/A\n");
2120
fprintf(fp, " GID: %d\n", itr->gid);
2122
fprintf(fp, " DEST: %s\n", itr->destination);
2124
fprintf(fp, " CONTROLLERS:\n");
2125
for (i = 0; i < MAX_MNT_ELEMENTS; i++) {
2126
if (itr->controllers[i]) {
2127
fprintf(fp, " %s\n", itr->controllers[i]);
2133
pthread_rwlock_unlock(&rl_lock);
2137
* Reloads the rules list, using the given configuration file. This function
2138
* is probably NOT thread safe (calls cgroup_parse_rules()).
2139
* @return 0 on success, > 0 on failure
2141
int cgroup_reload_cached_rules()
2146
cgroup_dbg("Reloading cached rules from %s.\n", CGRULES_CONF_FILE);
2147
ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID, NULL);
2149
cgroup_dbg("Error parsing configuration file \"%s\": %d.\n",
2150
CGRULES_CONF_FILE, ret);
2151
ret = ECGROUPPARSEFAIL;
2156
cgroup_print_rules_config(stdout);
2164
* Initializes the rules cache.
2165
* @return 0 on success, > 0 on error
2167
int cgroup_init_rules_cache()
2172
/* Attempt to read the configuration file and cache the rules. */
2173
ret = cgroup_parse_rules(true, CGRULE_INVALID, CGRULE_INVALID, NULL);
2175
cgroup_dbg("Could not initialize rule cache, error was: %d\n",
2177
cgroup_rules_loaded = false;
2179
cgroup_rules_loaded = true;
2186
* cgroup_get_current_controller_path
2187
* @pid: pid of the current process for which the path is to be determined
2188
* @controller: name of the controller for which to determine current path
2189
* @current_path: a pointer that is filled with the value of the current
2190
* path as seen in /proc/<pid>/cgroup
2192
int cgroup_get_current_controller_path(pid_t pid, const char *controller,
2193
char **current_path)
2197
FILE *pid_cgroup_fd = NULL;
2202
if (!cgroup_initialized) {
2203
cgroup_dbg("libcgroup is not initialized\n");
2204
return ECGROUPNOTINITIALIZED;
2207
ret = asprintf(&path, "/proc/%d/cgroup", pid);
2209
cgroup_dbg("cannot allocate memory (/proc/pid/cgroup) ret %d\n",
2214
ret = ECGROUPNOTEXIST;
2215
pid_cgroup_fd = fopen(path, "r");
2220
* Why do we grab the cg_mount_table_lock?, the reason is that
2221
* the cgroup of a pid can change via the cgroup_attach_task_pid()
2222
* call. To make sure, we return consitent and safe results,
2223
* we acquire the lock upfront. We can optimize by acquiring
2224
* and releasing the lock in the while loop, but that
2225
* will be more expensive.
2227
pthread_rwlock_rdlock(&cg_mount_table_lock);
2228
while (!feof(pid_cgroup_fd)) {
2229
char controllers[FILENAME_MAX];
2230
char cgroup_path[FILENAME_MAX];
2235
ret = fscanf(pid_cgroup_fd, "%d:%[^:]:%s\n", &num, controllers,
2238
* Magic numbers like "3" seem to be integrating into
2239
* my daily life, I need some magic to help make them
2242
if (ret != 3 || ret == EOF) {
2243
cgroup_dbg("read failed for pid_cgroup_fd ret %d\n",
2250
token = strtok_r(controllers, ",", &savedptr);
2252
if (strncmp(controller, token, strlen(controller) + 1)
2254
*current_path = strdup(cgroup_path);
2255
if (!*current_path) {
2263
token = strtok_r(NULL, ",", &savedptr);
2268
pthread_rwlock_unlock(&cg_mount_table_lock);
2269
fclose(pid_cgroup_fd);
2275
char *cgroup_strerror(int code)
2277
assert((code >= ECGROUPNOTCOMPILED) && (code < ECGSENTINEL));
2278
if (code == ECGOTHER) {
2279
snprintf(errtext, MAXLEN, "%s, error message: %s",
2280
cgroup_strerror_codes[code % ECGROUPNOTCOMPILED],
2281
strerror(cgroup_get_last_errno()));
2284
return cgroup_strerror_codes[code % ECGROUPNOTCOMPILED];
2288
* Return last errno, which caused ECGOTHER error.
2290
int cgroup_get_last_errno()
2296
static int cg_walk_node(FTS *fts, FTSENT *ent, const int depth,
2297
struct cgroup_file_info *info, int dir)
2301
if (!cgroup_initialized)
2302
return ECGROUPNOTINITIALIZED;
2304
cgroup_dbg("seeing file %s\n", ent->fts_path);
2306
info->path = ent->fts_name;
2307
info->parent = ent->fts_parent->fts_name;
2308
info->full_path = ent->fts_path;
2309
info->depth = ent->fts_level;
2310
info->type = CGROUP_FILE_TYPE_OTHER;
2312
if (depth && (info->depth > depth))
2315
switch (ent->fts_info) {
2318
errno = ent->fts_errno;
2321
if (dir & CGROUP_WALK_TYPE_PRE_DIR)
2322
info->type = CGROUP_FILE_TYPE_DIR;
2328
if (dir & CGROUP_WALK_TYPE_POST_DIR)
2329
info->type = CGROUP_FILE_TYPE_DIR;
2332
info->type = CGROUP_FILE_TYPE_FILE;
2340
int cgroup_walk_tree_next(const int depth, void **handle,
2341
struct cgroup_file_info *info, int base_level)
2344
struct cgroup_tree_handle *entry;
2347
if (!cgroup_initialized)
2348
return ECGROUPNOTINITIALIZED;
2353
entry = (struct cgroup_tree_handle *) *handle;
2355
ent = fts_read(entry->fts);
2358
if (!base_level && depth)
2359
base_level = ent->fts_level + depth;
2361
ret = cg_walk_node(entry->fts, ent, base_level, info, entry->flags);
2367
int cgroup_walk_tree_end(void **handle)
2369
struct cgroup_tree_handle *entry;
2371
if (!cgroup_initialized)
2372
return ECGROUPNOTINITIALIZED;
2377
entry = (struct cgroup_tree_handle *) *handle;
2379
fts_close(entry->fts);
2386
* TODO: Need to decide a better place to put this function.
2388
int cgroup_walk_tree_begin(char *controller, char *base_path, const int depth,
2389
void **handle, struct cgroup_file_info *info,
2393
cgroup_dbg("path is %s\n", base_path);
2395
char full_path[FILENAME_MAX];
2397
struct cgroup_tree_handle *entry;
2399
if (!cgroup_initialized)
2400
return ECGROUPNOTINITIALIZED;
2405
if (!cg_build_path(base_path, full_path, controller))
2408
entry = calloc(sizeof(struct cgroup_tree_handle), 1);
2415
entry->flags |= CGROUP_WALK_TYPE_PRE_DIR;
2418
cg_path[0] = full_path;
2421
entry->fts = fts_open(cg_path, FTS_LOGICAL | FTS_NOCHDIR |
2423
ent = fts_read(entry->fts);
2425
cgroup_dbg("fts_read failed\n");
2428
if (!*base_level && depth)
2429
*base_level = ent->fts_level + depth;
2431
ret = cg_walk_node(entry->fts, ent, *base_level, info, entry->flags);
2437
int cgroup_walk_tree_set_flags(void **handle, int flags)
2439
struct cgroup_tree_handle *entry;
2441
if (!cgroup_initialized)
2442
return ECGROUPNOTINITIALIZED;
2447
if ((flags & CGROUP_WALK_TYPE_PRE_DIR) &&
2448
(flags & CGROUP_WALK_TYPE_POST_DIR))
2451
entry = (struct cgroup_tree_handle *) *handle;
2452
entry->flags = flags;
2459
* This parses a stat line which is in the form of (name value) pair
2460
* separated by a space.
2462
int cg_read_stat(FILE *fp, struct cgroup_stat *stat)
2468
char *token, *saveptr;
2470
read = getline(&line, &len, fp);
2474
token = strtok_r(line, " ", &saveptr);
2479
strncpy(stat->name, token, FILENAME_MAX);
2481
token = strtok_r(NULL, " ", &saveptr);
2486
strncpy(stat->value, token, CG_VALUE_MAX);
2493
int cgroup_read_stats_end(void **handle)
2497
if (!cgroup_initialized)
2498
return ECGROUPNOTINITIALIZED;
2503
fp = (FILE *)*handle;
2508
int cgroup_read_stats_next(void **handle, struct cgroup_stat *stat)
2513
if (!cgroup_initialized)
2514
return ECGROUPNOTINITIALIZED;
2516
if (!handle || !stat)
2519
fp = (FILE *)*handle;
2520
ret = cg_read_stat(fp, stat);
2526
* TODO: Need to decide a better place to put this function.
2528
int cgroup_read_stats_begin(char *controller, char *path, void **handle,
2529
struct cgroup_stat *stat)
2532
char stat_file[FILENAME_MAX];
2535
if (!cgroup_initialized)
2536
return ECGROUPNOTINITIALIZED;
2538
if (!stat || !handle)
2541
if (!cg_build_path(path, stat_file, controller))
2544
sprintf(stat_file, "%s/%s.stat", stat_file, controller);
2546
fp = fopen(stat_file, "r");
2548
cgroup_dbg("fopen failed\n");
2552
ret = cg_read_stat(fp, stat);
2557
int cgroup_get_task_end(void **handle)
2559
if (!cgroup_initialized)
2560
return ECGROUPNOTINITIALIZED;
2565
fclose((FILE *) *handle);
2571
int cgroup_get_task_next(void **handle, pid_t *pid)
2575
if (!cgroup_initialized)
2576
return ECGROUPNOTINITIALIZED;
2581
ret = fscanf((FILE *) *handle, "%u", pid);
2593
int cgroup_get_task_begin(char *cgroup, char *controller, void **handle,
2597
char path[FILENAME_MAX];
2598
char *fullpath = NULL;
2600
if (!cgroup_initialized)
2601
return ECGROUPNOTINITIALIZED;
2603
if (!cg_build_path(cgroup, path, controller))
2606
ret = asprintf(&fullpath, "%s/tasks", path);
2613
*handle = (void *) fopen(fullpath, "r");
2620
ret = cgroup_get_task_next(handle, pid);
2626
int cgroup_get_controller_end(void **handle)
2628
int *pos = (int *) *handle;
2630
if (!cgroup_initialized)
2631
return ECGROUPNOTINITIALIZED;
2642
int cgroup_get_controller_next(void **handle, struct cgroup_mount_point *info)
2644
int *pos = (int *) *handle;
2647
if (!cgroup_initialized)
2648
return ECGROUPNOTINITIALIZED;
2656
pthread_rwlock_rdlock(&cg_mount_table_lock);
2658
if (cg_mount_table[*pos].name[0] == '\0') {
2663
strncpy(info->name, cg_mount_table[*pos].name, FILENAME_MAX);
2665
strncpy(info->path, cg_mount_table[*pos].path, FILENAME_MAX);
2671
pthread_rwlock_unlock(&cg_mount_table_lock);
2675
int cgroup_get_controller_begin(void **handle, struct cgroup_mount_point *info)
2679
if (!cgroup_initialized)
2680
return ECGROUPNOTINITIALIZED;
2685
pos = malloc(sizeof(int));
2696
return cgroup_get_controller_next(handle, info);
2700
* Get process data (euid and egid) from /proc/<pid>/status file.
2701
* @param pid: The process id
2702
* @param euid: The uid of param pid
2703
* @param egid: The gid of param pid
2704
* @return 0 on success, > 0 on error.
2706
int cgroup_get_uid_gid_from_procfs(pid_t pid, uid_t *euid, gid_t *egid)
2709
char path[FILENAME_MAX];
2711
uid_t ruid, suid, fsuid;
2712
gid_t rgid, sgid, fsgid;
2713
bool found_euid = false;
2714
bool found_egid = false;
2716
sprintf(path, "/proc/%d/status", pid);
2717
f = fopen(path, "r");
2719
return ECGROUPNOTEXIST;
2721
while (fgets(buf, sizeof(buf), f)) {
2722
if (!strncmp(buf, "Uid:", 4)) {
2723
if (sscanf((buf + strlen("Uid:") + 1), "%d%d%d%d",
2724
&ruid, euid, &suid, &fsuid) != 4)
2726
cgroup_dbg("Scanned proc values are %d %d %d %d\n",
2727
ruid, *euid, suid, fsuid);
2729
} else if (!strncmp(buf, "Gid:", 4)) {
2730
if (sscanf((buf + strlen("Gid:") + 1), "%d%d%d%d",
2731
&rgid, egid, &sgid, &fsgid) != 4)
2733
cgroup_dbg("Scanned proc values are %d %d %d %d\n",
2734
rgid, *egid, sgid, fsgid);
2737
if (found_euid && found_egid)
2741
if (!found_euid || !found_egid) {
2743
* This method doesn't match the file format of
2744
* /proc/<pid>/status. The format has been changed
2745
* and we should catch up the change.
2747
cgroup_dbg("The invlid file format of /proc/%d/status.\n", pid);
2754
* Get process name from /proc/<pid>/status file.
2755
* @param pid: The process id
2756
* @param pname_status : The process name
2757
* @return 0 on success, > 0 on error.
2759
static int cg_get_procname_from_proc_status(pid_t pid, char **procname_status)
2764
char path[FILENAME_MAX];
2767
sprintf(path, "/proc/%d/status", pid);
2768
f = fopen(path, "r");
2770
return ECGROUPNOTEXIST;
2772
while (fgets(buf, sizeof(buf), f)) {
2773
if (!strncmp(buf, "Name:", 5)) {
2775
if (buf[len - 1] == '\n')
2776
buf[len - 1] = '\0';
2777
*procname_status = strdup(buf + strlen("Name:") + 1);
2778
if (*procname_status == NULL) {
2792
* Get process name from /proc/<pid>/cmdline file.
2793
* This function is mainly for getting a script name (shell, perl,
2794
* etc). A script name is written into the second or later argument
2795
* of /proc/<pid>/cmdline. This function gets each argument and
2796
* compares it to a process name taken from /proc/<pid>/status.
2797
* @param pid: The process id
2798
* @param pname_status : The process name taken from /proc/<pid>/status
2799
* @param pname_cmdline: The process name taken from /proc/<pid>/cmdline
2800
* @return 0 on success, > 0 on error.
2802
static int cg_get_procname_from_proc_cmdline(pid_t pid, char *pname_status,
2803
char **pname_cmdline)
2809
char path[FILENAME_MAX];
2810
char buf_pname[FILENAME_MAX];
2811
char buf_cwd[FILENAME_MAX];
2813
memset(buf_cwd, '\0', sizeof(buf_cwd));
2814
sprintf(path, "/proc/%d/cwd", pid);
2815
if (readlink(path, buf_cwd, sizeof(buf_cwd)) < 0)
2816
return ECGROUPNOTEXIST;
2818
sprintf(path, "/proc/%d/cmdline", pid);
2819
f = fopen(path, "r");
2821
return ECGROUPNOTEXIST;
2825
if ((c != EOF) && (c != '\0')) {
2830
buf_pname[len] = '\0';
2833
* The taken process name from /proc/<pid>/status is
2834
* shortened to 15 characters if it is over. So the
2835
* name should be compared by its length.
2837
if (strncmp(pname_status, basename(buf_pname),
2838
TASK_COMM_LEN - 1)) {
2842
if (buf_pname[0] == '/') {
2843
*pname_cmdline = strdup(buf_pname);
2844
if (*pname_cmdline == NULL) {
2852
strcat(buf_cwd, "/");
2853
strcat(buf_cwd, buf_pname);
2854
if (!realpath(buf_cwd, path)) {
2859
*pname_cmdline = strdup(path);
2860
if (*pname_cmdline == NULL) {
2875
* Get a process name from /proc file system.
2876
* This function allocates memory for a process name, writes a process
2877
* name onto it. So a caller should free the memory when unusing it.
2878
* @param pid: The process id
2879
* @param procname: The process name
2880
* @return 0 on success, > 0 on error.
2882
int cgroup_get_procname_from_procfs(pid_t pid, char **procname)
2886
char *pname_cmdline;
2887
char path[FILENAME_MAX];
2888
char buf[FILENAME_MAX];
2890
ret = cg_get_procname_from_proc_status(pid, &pname_status);
2895
* Get the full patch of process name from /proc/<pid>/exe.
2897
memset(buf, '\0', sizeof(buf));
2898
sprintf(path, "/proc/%d/exe", pid);
2899
if (readlink(path, buf, sizeof(buf)) < 0) {
2901
* readlink() fails if a kernel thread, and a process
2902
* name is taken from /proc/<pid>/status.
2904
*procname = pname_status;
2907
if (!strncmp(pname_status, basename(buf), TASK_COMM_LEN - 1)) {
2909
* The taken process name from /proc/<pid>/status is
2910
* shortened to 15 characters if it is over. So the
2911
* name should be compared by its length.
2914
*procname = strdup(buf);
2915
if (*procname == NULL) {
2923
* The above strncmp() is not 0 if a shell script, because
2924
* /proc/<pid>/exe links a shell command (/bin/bash etc.)
2925
* and the pname_status represents a shell script name.
2926
* Then the full path of a shell script is taken from
2927
* /proc/<pid>/cmdline.
2929
ret = cg_get_procname_from_proc_cmdline(pid, pname_status,
2932
*procname = pname_cmdline;
2938
int cgroup_register_unchanged_process(pid_t pid, int flags)
2942
char buff[sizeof(CGRULE_SUCCESS_STORE_PID)];
2943
struct sockaddr_un addr;
2945
sk = socket(PF_UNIX, SOCK_STREAM, 0);
2949
bzero((char *)&addr, sizeof(addr));
2950
addr.sun_family = AF_UNIX;
2951
strcpy(addr.sun_path, CGRULE_CGRED_SOCKET_PATH);
2953
if (connect(sk, (struct sockaddr *)&addr,
2954
sizeof(addr.sun_family) + strlen(CGRULE_CGRED_SOCKET_PATH)) < 0) {
2955
/* If the daemon does not work, this function returns 0
2960
if (write(sk, &pid, sizeof(pid)) < 0)
2963
if (write(sk, &flags, sizeof(flags)) < 0)
2966
if (read(sk, buff, sizeof(buff)) < 0)
2969
if (strncmp(buff, CGRULE_SUCCESS_STORE_PID, sizeof(buff)))
2978
int cgroup_get_subsys_mount_point(char *controller, char **mount_point)
2981
int ret = ECGROUPNOTEXIST;
2983
if (!cgroup_initialized)
2984
return ECGROUPNOTINITIALIZED;
2986
pthread_rwlock_rdlock(&cg_mount_table_lock);
2987
for (i = 0; cg_mount_table[i].name[0] != '\0'; i++) {
2988
if (strncmp(cg_mount_table[i].name, controller, FILENAME_MAX))
2991
*mount_point = strdup(cg_mount_table[i].path);
2993
if (!*mount_point) {
3003
pthread_rwlock_unlock(&cg_mount_table_lock);