2
* lxc: linux Container library
4
* (C) Copyright IBM Corp. 2007, 2008
7
* Daniel Lezcano <dlezcano at fr.ibm.com>
9
* This library is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
14
* This library is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with this library; if not, write to the Free Software
21
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36
#include <linux/loop.h>
38
#include <sys/types.h>
39
#include <sys/utsname.h>
40
#include <sys/param.h>
42
#include <sys/socket.h>
43
#include <sys/mount.h>
45
#include <sys/prctl.h>
46
#include <sys/capability.h>
47
#include <sys/personality.h>
49
#include <arpa/inet.h>
51
#include <netinet/in.h>
62
#include "lxc.h" /* for lxc_cgroup_set() */
63
#include "caps.h" /* for lxc_caps_last_cap() */
65
lxc_log_define(lxc_conf, lxc);
68
#define MAXINDEXLEN 20
70
#define MAXLINELEN 128
73
#define MS_DIRSYNC 128
85
#define MS_RELATIME (1 << 21)
88
#ifndef MS_STRICTATIME
89
#define MS_STRICTATIME (1 << 24)
93
#define CAP_SETFCAP 31
96
#ifndef CAP_MAC_OVERRIDE
97
#define CAP_MAC_OVERRIDE 32
100
#ifndef CAP_MAC_ADMIN
101
#define CAP_MAC_ADMIN 33
104
#ifndef PR_CAPBSET_DROP
105
#define PR_CAPBSET_DROP 24
108
char *lxchook_names[NUM_LXC_HOOKS] = {
109
"pre-start", "pre-mount", "mount", "start", "post-stop" };
111
extern int pivot_root(const char * new_root, const char * put_old);
113
typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
126
static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
127
static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
128
static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
129
static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
130
static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
132
static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
133
[LXC_NET_VETH] = instanciate_veth,
134
[LXC_NET_MACVLAN] = instanciate_macvlan,
135
[LXC_NET_VLAN] = instanciate_vlan,
136
[LXC_NET_PHYS] = instanciate_phys,
137
[LXC_NET_EMPTY] = instanciate_empty,
140
static struct mount_opt mount_opt[] = {
141
{ "defaults", 0, 0 },
142
{ "ro", 0, MS_RDONLY },
143
{ "rw", 1, MS_RDONLY },
144
{ "suid", 1, MS_NOSUID },
145
{ "nosuid", 0, MS_NOSUID },
146
{ "dev", 1, MS_NODEV },
147
{ "nodev", 0, MS_NODEV },
148
{ "exec", 1, MS_NOEXEC },
149
{ "noexec", 0, MS_NOEXEC },
150
{ "sync", 0, MS_SYNCHRONOUS },
151
{ "async", 1, MS_SYNCHRONOUS },
152
{ "dirsync", 0, MS_DIRSYNC },
153
{ "remount", 0, MS_REMOUNT },
154
{ "mand", 0, MS_MANDLOCK },
155
{ "nomand", 1, MS_MANDLOCK },
156
{ "atime", 1, MS_NOATIME },
157
{ "noatime", 0, MS_NOATIME },
158
{ "diratime", 1, MS_NODIRATIME },
159
{ "nodiratime", 0, MS_NODIRATIME },
160
{ "bind", 0, MS_BIND },
161
{ "rbind", 0, MS_BIND|MS_REC },
162
{ "relatime", 0, MS_RELATIME },
163
{ "norelatime", 1, MS_RELATIME },
164
{ "strictatime", 0, MS_STRICTATIME },
165
{ "nostrictatime", 1, MS_STRICTATIME },
169
static struct caps_opt caps_opt[] = {
170
{ "chown", CAP_CHOWN },
171
{ "dac_override", CAP_DAC_OVERRIDE },
172
{ "dac_read_search", CAP_DAC_READ_SEARCH },
173
{ "fowner", CAP_FOWNER },
174
{ "fsetid", CAP_FSETID },
175
{ "kill", CAP_KILL },
176
{ "setgid", CAP_SETGID },
177
{ "setuid", CAP_SETUID },
178
{ "setpcap", CAP_SETPCAP },
179
{ "linux_immutable", CAP_LINUX_IMMUTABLE },
180
{ "net_bind_service", CAP_NET_BIND_SERVICE },
181
{ "net_broadcast", CAP_NET_BROADCAST },
182
{ "net_admin", CAP_NET_ADMIN },
183
{ "net_raw", CAP_NET_RAW },
184
{ "ipc_lock", CAP_IPC_LOCK },
185
{ "ipc_owner", CAP_IPC_OWNER },
186
{ "sys_module", CAP_SYS_MODULE },
187
{ "sys_rawio", CAP_SYS_RAWIO },
188
{ "sys_chroot", CAP_SYS_CHROOT },
189
{ "sys_ptrace", CAP_SYS_PTRACE },
190
{ "sys_pacct", CAP_SYS_PACCT },
191
{ "sys_admin", CAP_SYS_ADMIN },
192
{ "sys_boot", CAP_SYS_BOOT },
193
{ "sys_nice", CAP_SYS_NICE },
194
{ "sys_resource", CAP_SYS_RESOURCE },
195
{ "sys_time", CAP_SYS_TIME },
196
{ "sys_tty_config", CAP_SYS_TTY_CONFIG },
197
{ "mknod", CAP_MKNOD },
198
{ "lease", CAP_LEASE },
199
#ifdef CAP_AUDIT_WRITE
200
{ "audit_write", CAP_AUDIT_WRITE },
202
#ifdef CAP_AUDIT_CONTROL
203
{ "audit_control", CAP_AUDIT_CONTROL },
205
{ "setfcap", CAP_SETFCAP },
206
{ "mac_override", CAP_MAC_OVERRIDE },
207
{ "mac_admin", CAP_MAC_ADMIN },
209
{ "syslog", CAP_SYSLOG },
211
#ifdef CAP_WAKE_ALARM
212
{ "wake_alarm", CAP_WAKE_ALARM },
216
static int run_script(const char *name, const char *section,
217
const char *script, ...)
221
char *buffer, *p, *output;
225
INFO("Executing script '%s' for container '%s', config section '%s'",
226
script, name, section);
228
va_start(ap, script);
229
while ((p = va_arg(ap, char *)))
230
size += strlen(p) + 1;
233
size += strlen(script);
234
size += strlen(name);
235
size += strlen(section);
241
buffer = alloca(size);
243
ERROR("failed to allocate memory");
247
ret = snprintf(buffer, size, "%s %s %s", script, name, section);
248
if (ret < 0 || ret >= size) {
249
ERROR("Script name too long");
254
va_start(ap, script);
255
while ((p = va_arg(ap, char *))) {
258
rc = snprintf(buffer + ret, len, " %s", p);
259
if (rc < 0 || rc >= len) {
261
ERROR("Script args too long");
268
f = popen(buffer, "r");
270
SYSERROR("popen failed");
274
output = malloc(LXC_LOG_BUFFER_SIZE);
276
ERROR("failed to allocate memory for script output");
280
while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
281
DEBUG("script output: %s", output);
285
if (pclose(f) == -1) {
286
SYSERROR("Script exited on error");
293
static int find_fstype_cb(char* buffer, void *data)
303
/* we don't try 'nodev' entries */
304
if (strstr(buffer, "nodev"))
308
fstype += lxc_char_left_gc(fstype, strlen(fstype));
309
fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
311
DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
312
cbarg->rootfs, cbarg->target, fstype);
314
if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
315
DEBUG("mount failed with error: %s", strerror(errno));
319
INFO("mounted '%s' on '%s', with fstype '%s'",
320
cbarg->rootfs, cbarg->target, fstype);
325
static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
340
* find the filesystem type with brute force:
341
* first we check with /etc/filesystems, in case the modules
342
* are auto-loaded and fall back to the supported kernel fs
349
for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
353
if (access(fsfile[i], F_OK))
356
ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
358
ERROR("failed to parse '%s'", fsfile[i]);
366
ERROR("failed to determine fs type for '%s'", rootfs);
370
static int mount_rootfs_dir(const char *rootfs, const char *target)
372
return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
375
static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
380
rfd = open(rootfs, O_RDWR);
382
SYSERROR("failed to open '%s'", rootfs);
386
memset(loinfo, 0, sizeof(*loinfo));
388
loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
390
if (ioctl(fd, LOOP_SET_FD, rfd)) {
391
SYSERROR("failed to LOOP_SET_FD");
395
if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
396
SYSERROR("failed to LOOP_SET_STATUS64");
407
static int mount_rootfs_file(const char *rootfs, const char *target)
409
struct dirent dirent, *direntp;
410
struct loop_info64 loinfo;
411
int ret = -1, fd = -1, rc;
413
char path[MAXPATHLEN];
415
dir = opendir("/dev");
417
SYSERROR("failed to open '/dev'");
421
while (!readdir_r(dir, &dirent, &direntp)) {
426
if (!strcmp(direntp->d_name, "."))
429
if (!strcmp(direntp->d_name, ".."))
432
if (strncmp(direntp->d_name, "loop", 4))
435
rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
436
if (rc < 0 || rc >= MAXPATHLEN)
439
fd = open(path, O_RDWR);
443
if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
448
if (errno != ENXIO) {
449
WARN("unexpected error for ioctl on '%s': %m",
454
DEBUG("found '%s' free lodev", path);
456
ret = setup_lodev(rootfs, fd, &loinfo);
458
ret = mount_unknow_fs(path, target, 0);
465
WARN("failed to close directory");
470
static int mount_rootfs_block(const char *rootfs, const char *target)
472
return mount_unknow_fs(rootfs, target, 0);
477
* if rootfs is a directory, then open ${rootfs}.hold for writing for the
478
* duration of the container run, to prevent the container from marking the
479
* underlying fs readonly on shutdown.
480
* return -1 on error.
481
* return -2 if nothing needed to be pinned.
482
* return an open fd (>=0) if we pinned it.
484
int pin_rootfs(const char *rootfs)
486
char absrootfs[MAXPATHLEN];
487
char absrootfspin[MAXPATHLEN];
491
if (rootfs == NULL || strlen(rootfs) == 0)
494
if (!realpath(rootfs, absrootfs)) {
495
SYSERROR("failed to get real path for '%s'", rootfs);
499
if (access(absrootfs, F_OK)) {
500
SYSERROR("'%s' is not accessible", absrootfs);
504
if (stat(absrootfs, &s)) {
505
SYSERROR("failed to stat '%s'", absrootfs);
509
if (!__S_ISTYPE(s.st_mode, S_IFDIR))
512
ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
513
if (ret >= MAXPATHLEN) {
514
SYSERROR("pathname too long for rootfs hold file");
518
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
519
INFO("opened %s as fd %d\n", absrootfspin, fd);
523
static int mount_rootfs(const char *rootfs, const char *target)
525
char absrootfs[MAXPATHLEN];
529
typedef int (*rootfs_cb)(const char *, const char *);
535
{ S_IFDIR, mount_rootfs_dir },
536
{ S_IFBLK, mount_rootfs_block },
537
{ S_IFREG, mount_rootfs_file },
540
if (!realpath(rootfs, absrootfs)) {
541
SYSERROR("failed to get real path for '%s'", rootfs);
545
if (access(absrootfs, F_OK)) {
546
SYSERROR("'%s' is not accessible", absrootfs);
550
if (stat(absrootfs, &s)) {
551
SYSERROR("failed to stat '%s'", absrootfs);
555
for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
557
if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
560
return rtfs_type[i].cb(absrootfs, target);
563
ERROR("unsupported rootfs type for '%s'", absrootfs);
567
static int setup_utsname(struct utsname *utsname)
572
if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
573
SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
577
INFO("'%s' hostname has been setup", utsname->nodename);
582
static int setup_tty(const struct lxc_rootfs *rootfs,
583
const struct lxc_tty_info *tty_info, char *ttydir)
585
char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
591
for (i = 0; i < tty_info->nbtty; i++) {
593
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
595
ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
596
rootfs->mount, i + 1);
597
if (ret >= sizeof(path)) {
598
ERROR("pathname too long for ttys");
602
/* create dev/lxc/tty%d" */
603
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
604
rootfs->mount, ttydir, i + 1);
605
if (ret >= sizeof(lxcpath)) {
606
ERROR("pathname too long for ttys");
609
ret = creat(lxcpath, 0660);
610
if (ret==-1 && errno != EEXIST) {
611
SYSERROR("error creating %s\n", lxcpath);
616
if (ret && errno != ENOENT) {
617
SYSERROR("error unlinking %s\n", path);
621
if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
622
WARN("failed to mount '%s'->'%s'",
623
pty_info->name, path);
627
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
628
if (ret >= sizeof(lxcpath)) {
629
ERROR("tty pathname too long");
632
ret = symlink(lxcpath, path);
634
SYSERROR("failed to create symlink for tty %d\n", i+1);
638
if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
639
WARN("failed to mount '%s'->'%s'",
640
pty_info->name, path);
646
INFO("%d tty(s) has been setup", tty_info->nbtty);
651
static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
653
struct lxc_list *mountlist, *listentry, *iterator;
654
char *pivotdir, *mountpoint, *mountentry;
659
cbparm = (void **)data;
661
mountlist = cbparm[0];
662
pivotdir = cbparm[1];
664
/* parse entry, first field is mountname, ignore */
665
mountpoint = strtok(mountentry, " ");
669
/* second field is mountpoint */
670
mountpoint = strtok(NULL, " ");
674
/* only consider mountpoints below old root fs */
675
if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
678
/* filter duplicate mountpoints */
680
lxc_list_for_each(iterator, mountlist) {
681
if (!strcmp(iterator->elem, mountpoint)) {
689
/* add entry to list */
690
listentry = malloc(sizeof(*listentry));
692
SYSERROR("malloc for mountpoint listentry failed");
696
listentry->elem = strdup(mountpoint);
697
if (!listentry->elem) {
698
SYSERROR("strdup failed");
701
lxc_list_add_tail(mountlist, listentry);
706
static int umount_oldrootfs(const char *oldrootfs)
708
char path[MAXPATHLEN];
710
struct lxc_list mountlist, *iterator;
711
int ok, still_mounted, last_still_mounted;
714
/* read and parse /proc/mounts in old root fs */
715
lxc_list_init(&mountlist);
717
/* oldrootfs is on the top tree directory now */
718
rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
719
if (rc >= sizeof(path)) {
720
ERROR("rootfs name too long");
723
cbparm[0] = &mountlist;
725
cbparm[1] = strdup(path);
727
SYSERROR("strdup failed");
731
rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
732
if (rc >= sizeof(path)) {
733
ERROR("container proc/mounts name too long");
737
ok = lxc_file_for_each_line(path,
738
setup_rootfs_pivot_root_cb, &cbparm);
740
SYSERROR("failed to read or parse mount list '%s'", path);
744
/* umount filesystems until none left or list no longer shrinks */
747
last_still_mounted = still_mounted;
750
lxc_list_for_each(iterator, &mountlist) {
752
/* umount normally */
753
if (!umount(iterator->elem)) {
754
DEBUG("umounted '%s'", (char *)iterator->elem);
755
lxc_list_del(iterator);
762
} while (still_mounted > 0 && still_mounted != last_still_mounted);
765
lxc_list_for_each(iterator, &mountlist) {
767
/* let's try a lazy umount */
768
if (!umount2(iterator->elem, MNT_DETACH)) {
769
INFO("lazy unmount of '%s'", (char *)iterator->elem);
773
/* be more brutal (nfs) */
774
if (!umount2(iterator->elem, MNT_FORCE)) {
775
INFO("forced unmount of '%s'", (char *)iterator->elem);
779
WARN("failed to unmount '%s'", (char *)iterator->elem);
785
static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
787
char path[MAXPATHLEN];
788
int remove_pivotdir = 0;
791
/* change into new root fs */
793
SYSERROR("can't chdir to new rootfs '%s'", rootfs);
800
/* compute the full path to pivotdir under rootfs */
801
rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
802
if (rc >= sizeof(path)) {
803
ERROR("pivot dir name too long");
807
if (access(path, F_OK)) {
809
if (mkdir_p(path, 0755)) {
810
SYSERROR("failed to create pivotdir '%s'", path);
815
DEBUG("created '%s' directory", path);
818
DEBUG("mountpoint for old rootfs is '%s'", path);
820
/* pivot_root into our new root fs */
821
if (pivot_root(".", path)) {
822
SYSERROR("pivot_root syscall failed");
827
SYSERROR("can't chdir to / after pivot_root");
831
DEBUG("pivot_root syscall to '%s' successful", rootfs);
833
/* we switch from absolute path to relative path */
834
if (umount_oldrootfs(pivotdir))
837
/* remove temporary mount point, we don't consider the removing
839
if (remove_pivotdir && rmdir(pivotdir))
840
WARN("can't remove mountpoint '%s': %m", pivotdir);
845
static int setup_rootfs(const struct lxc_rootfs *rootfs)
850
if (access(rootfs->mount, F_OK)) {
851
SYSERROR("failed to access to '%s', check it is present",
856
if (mount_rootfs(rootfs->path, rootfs->mount)) {
857
ERROR("failed to mount rootfs");
861
DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
866
int setup_pivot_root(const struct lxc_rootfs *rootfs)
871
if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
872
ERROR("failed to setup pivot root");
879
static int setup_pts(int pts)
881
char target[PATH_MAX];
886
if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
887
SYSERROR("failed to umount 'dev/pts'");
891
if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
892
"newinstance,ptmxmode=0666")) {
893
SYSERROR("failed to mount a new instance of '/dev/pts'");
897
if (access("/dev/ptmx", F_OK)) {
898
if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
900
SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
904
if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
907
/* fallback here, /dev/pts/ptmx exists just mount bind */
908
if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
909
SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
913
INFO("created new pts instance");
919
static int setup_personality(int persona)
924
if (personality(persona) < 0) {
925
SYSERROR("failed to set personality to '0x%x'", persona);
929
INFO("set personality to '0x%x'", persona);
934
static int setup_dev_console(const struct lxc_rootfs *rootfs,
935
const struct lxc_console *console)
937
char path[MAXPATHLEN];
941
ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
942
if (ret >= sizeof(path)) {
943
ERROR("console path too long\n");
947
if (access(path, F_OK)) {
948
WARN("rootfs specified but no console found at '%s'", path);
952
if (console->peer == -1) {
953
INFO("no console output required");
957
if (stat(path, &s)) {
958
SYSERROR("failed to stat '%s'", path);
962
if (chmod(console->name, s.st_mode)) {
963
SYSERROR("failed to set mode '0%o' to '%s'",
964
s.st_mode, console->name);
968
if (mount(console->name, path, "none", MS_BIND, 0)) {
969
ERROR("failed to mount '%s' on '%s'", console->name, path);
973
INFO("console has been setup");
977
static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
978
const struct lxc_console *console,
981
char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
984
/* create rootfs/dev/<ttydir> directory */
985
ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
987
if (ret >= sizeof(path))
989
ret = mkdir(path, 0755);
990
if (ret && errno != EEXIST) {
991
SYSERROR("failed with errno %d to create %s\n", errno, path);
994
INFO("created %s\n", path);
996
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
997
rootfs->mount, ttydir);
998
if (ret >= sizeof(lxcpath)) {
999
ERROR("console path too long\n");
1003
snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1005
if (ret && errno != ENOENT) {
1006
SYSERROR("error unlinking %s\n", path);
1010
ret = creat(lxcpath, 0660);
1011
if (ret==-1 && errno != EEXIST) {
1012
SYSERROR("error %d creating %s\n", errno, lxcpath);
1017
if (console->peer == -1) {
1018
INFO("no console output required");
1022
if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1023
ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1027
/* create symlink from rootfs/dev/console to 'lxc/console' */
1028
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1029
if (ret >= sizeof(lxcpath)) {
1030
ERROR("lxc/console path too long");
1033
ret = symlink(lxcpath, path);
1035
SYSERROR("failed to create symlink for console");
1039
INFO("console has been setup on %s", lxcpath);
1044
static int setup_console(const struct lxc_rootfs *rootfs,
1045
const struct lxc_console *console,
1048
/* We don't have a rootfs, /dev/console will be shared */
1052
return setup_dev_console(rootfs, console);
1054
return setup_ttydir_console(rootfs, console, ttydir);
1057
static int setup_cgroup(const char *name, struct lxc_list *cgroups)
1059
struct lxc_list *iterator;
1060
struct lxc_cgroup *cg;
1063
if (lxc_list_empty(cgroups))
1066
lxc_list_for_each(iterator, cgroups) {
1068
cg = iterator->elem;
1070
if (lxc_cgroup_set(name, cg->subsystem, cg->value))
1073
DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
1077
INFO("cgroup has been setup");
1082
static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1084
struct mount_opt *mo;
1086
/* If opt is found in mount_opt, set or clear flags.
1087
* Otherwise append it to data. */
1089
for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1090
if (!strncmp(opt, mo->name, strlen(mo->name))) {
1092
*flags &= ~mo->flag;
1104
static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
1108
char *p, *saveptr = NULL;
1116
s = strdup(mntopts);
1118
SYSERROR("failed to allocate memory");
1122
data = malloc(strlen(s) + 1);
1124
SYSERROR("failed to allocate memory");
1130
for (p = strtok_r(s, ",", &saveptr); p != NULL;
1131
p = strtok_r(NULL, ",", &saveptr))
1132
parse_mntopt(p, mntflags, &data);
1143
static int mount_entry(const char *fsname, const char *target,
1144
const char *fstype, unsigned long mountflags,
1147
if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1148
SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1152
if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1154
DEBUG("remounting %s on %s to respect bind or remount options",
1157
if (mount(fsname, target, fstype,
1158
mountflags | MS_REMOUNT, data)) {
1159
SYSERROR("failed to mount '%s' on '%s'",
1165
DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1170
static inline int mount_entry_on_systemfs(struct mntent *mntent)
1172
unsigned long mntflags;
1176
if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1177
ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1181
ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1182
mntent->mnt_type, mntflags, mntdata);
1189
static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
1190
const struct lxc_rootfs *rootfs,
1191
const char *lxc_name)
1194
char path[MAXPATHLEN];
1195
unsigned long mntflags;
1197
int r, ret = 0, offset;
1199
if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1200
ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1204
/* if rootfs->path is a blockdev path, allow container fstab to
1205
* use /var/lib/lxc/CN/rootfs as the target prefix */
1206
r = snprintf(path, MAXPATHLEN, "/var/lib/lxc/%s/rootfs", lxc_name);
1207
if (r < 0 || r >= MAXPATHLEN)
1210
aux = strstr(mntent->mnt_dir, path);
1212
offset = strlen(path);
1217
aux = strstr(mntent->mnt_dir, rootfs->path);
1219
WARN("ignoring mount point '%s'", mntent->mnt_dir);
1222
offset = strlen(rootfs->path);
1226
r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
1228
if (r < 0 || r >= MAXPATHLEN) {
1229
WARN("pathnme too long for '%s'", mntent->mnt_dir);
1235
ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1243
static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1246
char path[MAXPATHLEN];
1247
unsigned long mntflags;
1251
if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1252
ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1256
/* relative to root mount point */
1257
ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1258
if (ret >= sizeof(path)) {
1259
ERROR("path name too long");
1263
ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1271
static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1272
const char *lxc_name)
1274
struct mntent *mntent;
1277
while ((mntent = getmntent(file))) {
1279
if (!rootfs->path) {
1280
if (mount_entry_on_systemfs(mntent))
1285
/* We have a separate root, mounts are relative to it */
1286
if (mntent->mnt_dir[0] != '/') {
1287
if (mount_entry_on_relative_rootfs(mntent,
1293
if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
1299
INFO("mount points have been setup");
1304
static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1305
const char *lxc_name)
1313
file = setmntent(fstab, "r");
1315
SYSERROR("failed to use '%s'", fstab);
1319
ret = mount_file_entries(rootfs, file, lxc_name);
1325
static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
1326
const char *lxc_name)
1329
struct lxc_list *iterator;
1335
ERROR("tmpfile error: %m");
1339
lxc_list_for_each(iterator, mount) {
1340
mount_entry = iterator->elem;
1341
fprintf(file, "%s\n", mount_entry);
1346
ret = mount_file_entries(rootfs, file, lxc_name);
1352
static int setup_caps(struct lxc_list *caps)
1354
struct lxc_list *iterator;
1359
lxc_list_for_each(iterator, caps) {
1361
drop_entry = iterator->elem;
1365
for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1367
if (strcmp(drop_entry, caps_opt[i].name))
1370
capid = caps_opt[i].value;
1375
/* try to see if it's numeric, so the user may specify
1376
* capabilities that the running kernel knows about but
1378
capid = strtol(drop_entry, &ptr, 10);
1379
if (!ptr || *ptr != '\0' ||
1380
capid == LONG_MIN || capid == LONG_MAX)
1381
/* not a valid number */
1383
else if (capid > lxc_caps_last_cap())
1384
/* we have a number but it's not a valid
1390
ERROR("unknown capability %s", drop_entry);
1394
DEBUG("drop capability '%s' (%d)", drop_entry, capid);
1396
if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
1397
SYSERROR("failed to remove %s capability", drop_entry);
1403
DEBUG("capabilities has been setup");
1408
static int setup_hw_addr(char *hwaddr, const char *ifname)
1410
struct sockaddr sockaddr;
1414
ret = lxc_convert_mac(hwaddr, &sockaddr);
1416
ERROR("mac address '%s' conversion failed : %s",
1417
hwaddr, strerror(-ret));
1421
memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
1422
memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
1424
fd = socket(AF_INET, SOCK_DGRAM, 0);
1426
ERROR("socket failure : %s", strerror(errno));
1430
ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
1433
ERROR("ioctl failure : %s", strerror(errno));
1435
DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifname);
1440
static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
1442
struct lxc_list *iterator;
1443
struct lxc_inetdev *inetdev;
1446
lxc_list_for_each(iterator, ip) {
1448
inetdev = iterator->elem;
1450
err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
1451
&inetdev->bcast, inetdev->prefix);
1453
ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1454
ifindex, strerror(-err));
1462
static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
1464
struct lxc_list *iterator;
1465
struct lxc_inet6dev *inet6dev;
1468
lxc_list_for_each(iterator, ip) {
1470
inet6dev = iterator->elem;
1472
err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
1473
&inet6dev->mcast, &inet6dev->acast,
1476
ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1477
ifindex, strerror(-err));
1485
static int setup_netdev(struct lxc_netdev *netdev)
1487
char ifname[IFNAMSIZ];
1488
char *current_ifname = ifname;
1491
/* empty network namespace */
1492
if (!netdev->ifindex) {
1493
if (netdev->flags & IFF_UP) {
1494
err = lxc_netdev_up("lo");
1496
ERROR("failed to set the loopback up : %s",
1504
/* retrieve the name of the interface */
1505
if (!if_indextoname(netdev->ifindex, current_ifname)) {
1506
ERROR("no interface corresponding to index '%d'",
1511
/* default: let the system to choose one interface name */
1513
netdev->name = netdev->type == LXC_NET_PHYS ?
1514
netdev->link : "eth%d";
1516
/* rename the interface name */
1517
err = lxc_netdev_rename_by_name(ifname, netdev->name);
1519
ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
1524
/* Re-read the name of the interface because its name has changed
1525
* and would be automatically allocated by the system
1527
if (!if_indextoname(netdev->ifindex, current_ifname)) {
1528
ERROR("no interface corresponding to index '%d'",
1533
/* set a mac address */
1534
if (netdev->hwaddr) {
1535
if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
1536
ERROR("failed to setup hw address for '%s'",
1542
/* setup ipv4 addresses on the interface */
1543
if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
1544
ERROR("failed to setup ip addresses for '%s'",
1549
/* setup ipv6 addresses on the interface */
1550
if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
1551
ERROR("failed to setup ipv6 addresses for '%s'",
1556
/* set the network device up */
1557
if (netdev->flags & IFF_UP) {
1560
err = lxc_netdev_up(current_ifname);
1562
ERROR("failed to set '%s' up : %s", current_ifname,
1567
/* the network is up, make the loopback up too */
1568
err = lxc_netdev_up("lo");
1570
ERROR("failed to set the loopback up : %s",
1576
/* We can only set up the default routes after bringing
1577
* up the interface, sine bringing up the interface adds
1578
* the link-local routes and we can't add a default
1579
* route if the gateway is not reachable. */
1581
/* setup ipv4 gateway on the interface */
1582
if (netdev->ipv4_gateway) {
1583
if (!(netdev->flags & IFF_UP)) {
1584
ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
1588
if (lxc_list_empty(&netdev->ipv4)) {
1589
ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
1593
err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
1595
ERROR("failed to setup ipv4 gateway for '%s': %s",
1596
ifname, strerror(-err));
1597
if (netdev->ipv4_gateway_auto) {
1598
char buf[INET_ADDRSTRLEN];
1599
inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
1600
ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
1606
/* setup ipv6 gateway on the interface */
1607
if (netdev->ipv6_gateway) {
1608
if (!(netdev->flags & IFF_UP)) {
1609
ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
1613
if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
1614
ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
1618
err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
1620
ERROR("failed to setup ipv6 gateway for '%s': %s",
1621
ifname, strerror(-err));
1622
if (netdev->ipv6_gateway_auto) {
1623
char buf[INET6_ADDRSTRLEN];
1624
inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
1625
ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
1631
DEBUG("'%s' has been setup", current_ifname);
1636
static int setup_network(struct lxc_list *network)
1638
struct lxc_list *iterator;
1639
struct lxc_netdev *netdev;
1641
lxc_list_for_each(iterator, network) {
1643
netdev = iterator->elem;
1645
if (setup_netdev(netdev)) {
1646
ERROR("failed to setup netdev");
1651
if (!lxc_list_empty(network))
1652
INFO("network has been setup");
1657
static int setup_private_host_hw_addr(char *veth1)
1663
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
1667
snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
1668
err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
1674
ifr.ifr_hwaddr.sa_data[0] = 0xfe;
1675
err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
1680
DEBUG("mac address of host interface '%s' changed to private "
1681
"%02x:%02x:%02x:%02x:%02x:%02x", veth1,
1682
ifr.ifr_hwaddr.sa_data[0] & 0xff,
1683
ifr.ifr_hwaddr.sa_data[1] & 0xff,
1684
ifr.ifr_hwaddr.sa_data[2] & 0xff,
1685
ifr.ifr_hwaddr.sa_data[3] & 0xff,
1686
ifr.ifr_hwaddr.sa_data[4] & 0xff,
1687
ifr.ifr_hwaddr.sa_data[5] & 0xff);
1692
struct lxc_conf *lxc_conf_init(void)
1694
struct lxc_conf *new;
1697
new = malloc(sizeof(*new));
1699
ERROR("lxc_conf_init : %m");
1702
memset(new, 0, sizeof(*new));
1704
new->umount_proc = 0;
1705
new->personality = -1;
1706
new->console.path = NULL;
1707
new->console.peer = -1;
1708
new->console.master = -1;
1709
new->console.slave = -1;
1710
new->console.name[0] = '\0';
1711
new->rootfs.mount = LXCROOTFSMOUNT;
1712
lxc_list_init(&new->cgroup);
1713
lxc_list_init(&new->network);
1714
lxc_list_init(&new->mount_list);
1715
lxc_list_init(&new->caps);
1716
new->aa_profile = NULL;
1717
for (i=0; i<NUM_LXC_HOOKS; i++)
1718
lxc_list_init(&new->hooks[i]);
1723
static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1725
char veth1buf[IFNAMSIZ], *veth1;
1726
char veth2buf[IFNAMSIZ], *veth2;
1729
if (netdev->priv.veth_attr.pair)
1730
veth1 = netdev->priv.veth_attr.pair;
1732
err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
1733
if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
1734
ERROR("veth1 name too long");
1737
veth1 = mktemp(veth1buf);
1740
snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
1741
veth2 = mktemp(veth2buf);
1743
if (!strlen(veth1) || !strlen(veth2)) {
1744
ERROR("failed to allocate a temporary name");
1748
err = lxc_veth_create(veth1, veth2);
1750
ERROR("failed to create %s-%s : %s", veth1, veth2,
1755
/* changing the high byte of the mac address to 0xfe, the bridge interface
1756
* will always keep the host's mac address and not take the mac address
1758
err = setup_private_host_hw_addr(veth1);
1760
ERROR("failed to change mac address of host interface '%s' : %s",
1761
veth1, strerror(-err));
1766
err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
1768
err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
1770
ERROR("failed to set mtu '%s' for %s-%s : %s",
1771
netdev->mtu, veth1, veth2, strerror(-err));
1777
err = lxc_bridge_attach(netdev->link, veth1);
1779
ERROR("failed to attach '%s' to the bridge '%s' : %s",
1780
veth1, netdev->link, strerror(-err));
1785
netdev->ifindex = if_nametoindex(veth2);
1786
if (!netdev->ifindex) {
1787
ERROR("failed to retrieve the index for %s", veth2);
1791
err = lxc_netdev_up(veth1);
1793
ERROR("failed to set %s up : %s", veth1, strerror(-err));
1797
if (netdev->upscript) {
1798
err = run_script(handler->name, "net", netdev->upscript, "up",
1799
"veth", veth1, (char*) NULL);
1804
DEBUG("instanciated veth '%s/%s', index is '%d'",
1805
veth1, veth2, netdev->ifindex);
1810
lxc_netdev_delete_by_name(veth1);
1814
static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1816
char peerbuf[IFNAMSIZ], *peer;
1819
if (!netdev->link) {
1820
ERROR("no link specified for macvlan netdev");
1824
err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
1825
if (err >= sizeof(peerbuf))
1828
peer = mktemp(peerbuf);
1829
if (!strlen(peer)) {
1830
ERROR("failed to make a temporary name");
1834
err = lxc_macvlan_create(netdev->link, peer,
1835
netdev->priv.macvlan_attr.mode);
1837
ERROR("failed to create macvlan interface '%s' on '%s' : %s",
1838
peer, netdev->link, strerror(-err));
1842
netdev->ifindex = if_nametoindex(peer);
1843
if (!netdev->ifindex) {
1844
ERROR("failed to retrieve the index for %s", peer);
1845
lxc_netdev_delete_by_name(peer);
1849
if (netdev->upscript) {
1850
err = run_script(handler->name, "net", netdev->upscript, "up",
1851
"macvlan", netdev->link, (char*) NULL);
1856
DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
1857
peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
1862
/* XXX: merge with instanciate_macvlan */
1863
static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1865
char peer[IFNAMSIZ];
1868
if (!netdev->link) {
1869
ERROR("no link specified for vlan netdev");
1873
err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
1874
if (err >= sizeof(peer)) {
1875
ERROR("peer name too long");
1879
err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1881
ERROR("failed to create vlan interface '%s' on '%s' : %s",
1882
peer, netdev->link, strerror(-err));
1886
netdev->ifindex = if_nametoindex(peer);
1887
if (!netdev->ifindex) {
1888
ERROR("failed to retrieve the ifindex for %s", peer);
1889
lxc_netdev_delete_by_name(peer);
1893
DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
1899
static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1901
if (!netdev->link) {
1902
ERROR("no link specified for the physical interface");
1906
netdev->ifindex = if_nametoindex(netdev->link);
1907
if (!netdev->ifindex) {
1908
ERROR("failed to retrieve the index for %s", netdev->link);
1912
if (netdev->upscript) {
1914
err = run_script(handler->name, "net", netdev->upscript,
1915
"up", "phys", netdev->link, (char*) NULL);
1923
static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1925
netdev->ifindex = 0;
1926
if (netdev->upscript) {
1928
err = run_script(handler->name, "net", netdev->upscript,
1929
"up", "empty", (char*) NULL);
1936
int lxc_create_network(struct lxc_handler *handler)
1938
struct lxc_list *network = &handler->conf->network;
1939
struct lxc_list *iterator;
1940
struct lxc_netdev *netdev;
1942
lxc_list_for_each(iterator, network) {
1944
netdev = iterator->elem;
1946
if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
1947
ERROR("invalid network configuration type '%d'",
1952
if (netdev_conf[netdev->type](handler, netdev)) {
1953
ERROR("failed to create netdev");
1962
void lxc_delete_network(struct lxc_list *network)
1964
struct lxc_list *iterator;
1965
struct lxc_netdev *netdev;
1967
lxc_list_for_each(iterator, network) {
1968
netdev = iterator->elem;
1969
if (netdev->ifindex == 0)
1972
if (netdev->type == LXC_NET_PHYS) {
1973
if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
1974
WARN("failed to rename to the initial name the " \
1975
"netdev '%s'", netdev->link);
1979
/* Recent kernel remove the virtual interfaces when the network
1980
* namespace is destroyed but in case we did not moved the
1981
* interface to the network namespace, we have to destroy it
1983
if (lxc_netdev_delete_by_index(netdev->ifindex))
1984
WARN("failed to remove interface '%s'", netdev->name);
1988
int lxc_assign_network(struct lxc_list *network, pid_t pid)
1990
struct lxc_list *iterator;
1991
struct lxc_netdev *netdev;
1994
lxc_list_for_each(iterator, network) {
1996
netdev = iterator->elem;
1998
/* empty network namespace, nothing to move */
1999
if (!netdev->ifindex)
2002
err = lxc_netdev_move_by_index(netdev->ifindex, pid);
2004
ERROR("failed to move '%s' to the container : %s",
2005
netdev->link, strerror(-err));
2009
DEBUG("move '%s' to '%d'", netdev->name, pid);
2015
int lxc_find_gateway_addresses(struct lxc_handler *handler)
2017
struct lxc_list *network = &handler->conf->network;
2018
struct lxc_list *iterator;
2019
struct lxc_netdev *netdev;
2022
lxc_list_for_each(iterator, network) {
2023
netdev = iterator->elem;
2025
if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2028
if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2029
ERROR("gateway = auto only supported for "
2030
"veth and macvlan");
2034
if (!netdev->link) {
2035
ERROR("gateway = auto needs a link interface");
2039
link_index = if_nametoindex(netdev->link);
2043
if (netdev->ipv4_gateway_auto) {
2044
if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2045
ERROR("failed to automatically find ipv4 gateway "
2046
"address from link interface '%s'", netdev->link);
2051
if (netdev->ipv6_gateway_auto) {
2052
if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2053
ERROR("failed to automatically find ipv6 gateway "
2054
"address from link interface '%s'", netdev->link);
2063
int lxc_create_tty(const char *name, struct lxc_conf *conf)
2065
struct lxc_tty_info *tty_info = &conf->tty_info;
2068
/* no tty in the configuration */
2072
tty_info->pty_info =
2073
malloc(sizeof(*tty_info->pty_info)*conf->tty);
2074
if (!tty_info->pty_info) {
2075
SYSERROR("failed to allocate pty_info");
2079
for (i = 0; i < conf->tty; i++) {
2081
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2083
if (openpty(&pty_info->master, &pty_info->slave,
2084
pty_info->name, NULL, NULL)) {
2085
SYSERROR("failed to create pty #%d", i);
2086
tty_info->nbtty = i;
2087
lxc_delete_tty(tty_info);
2091
DEBUG("allocated pty '%s' (%d/%d)",
2092
pty_info->name, pty_info->master, pty_info->slave);
2094
/* Prevent leaking the file descriptors to the container */
2095
fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
2096
fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
2101
tty_info->nbtty = conf->tty;
2103
INFO("tty's configured");
2108
void lxc_delete_tty(struct lxc_tty_info *tty_info)
2112
for (i = 0; i < tty_info->nbtty; i++) {
2113
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2115
close(pty_info->master);
2116
close(pty_info->slave);
2119
free(tty_info->pty_info);
2120
tty_info->nbtty = 0;
2124
* make sure /proc/self exists, and points to '1', since we are the
2126
* Else mount /proc. Return 0 if proc was
2127
* already mounted, 1 if we mounted it, -1 if we failed.
2129
static int mount_proc_if_needed(char *root_src, char *rootfs_tgt)
2131
char path[MAXPATHLEN];
2135
ret = snprintf(path, MAXPATHLEN, "%s/proc/self", root_src ? rootfs_tgt : "");
2136
if (ret < 0 || ret >= MAXPATHLEN) {
2137
SYSERROR("proc path name too long");
2140
memset(link, 0, 20);
2141
linklen = readlink(path, link, 20);
2142
INFO("I am %d, /proc/self points to %s\n", getpid(), link);
2143
ret = snprintf(path, MAXPATHLEN, "%s/proc", root_src ? rootfs_tgt : "");
2144
if (linklen < 0) /* /proc not mounted */
2146
/* can't be longer than rootfs/proc/1 */
2147
if (strncmp(link, "1", linklen) != 0) {
2148
/* wrong /procs mounted */
2149
umount2(path, MNT_DETACH); /* ignore failure */
2152
/* the right proc is already mounted */
2156
if (mount("proc", path, "proc", 0, NULL))
2158
INFO("Mounted /proc for the container\n");
2162
int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
2166
if (setup_utsname(lxc_conf->utsname)) {
2167
ERROR("failed to setup the utsname for '%s'", name);
2171
if (setup_network(&lxc_conf->network)) {
2172
ERROR("failed to setup the network for '%s'", name);
2176
if (run_lxc_hooks(name, "pre-mount", lxc_conf)) {
2177
ERROR("failed to run pre-mount hooks for container '%s'.", name);
2181
if (setup_rootfs(&lxc_conf->rootfs)) {
2182
ERROR("failed to setup rootfs for '%s'", name);
2186
if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
2187
ERROR("failed to setup the mounts for '%s'", name);
2191
if (setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
2192
ERROR("failed to setup the mount entries for '%s'", name);
2196
if (setup_cgroup(name, &lxc_conf->cgroup)) {
2197
ERROR("failed to setup the cgroups for '%s'", name);
2200
if (run_lxc_hooks(name, "mount", lxc_conf)) {
2201
ERROR("failed to run mount hooks for container '%s'.", name);
2205
if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
2206
ERROR("failed to setup the console for '%s'", name);
2210
if (setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
2211
ERROR("failed to setup the ttys for '%s'", name);
2215
/* aa_change_onexec makes more sense since we want to transition when
2216
* /sbin/init is exec'd. But the transitions doesn't seem to work
2217
* then (refused). aa_change_onexec will work since we're doing it
2218
* right before the exec, so we'll just use that for now.
2219
* In case the container fstab didn't mount /proc, we mount it.
2221
INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path,
2222
lxc_conf->rootfs.mount);
2224
mounted = mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount);
2225
if (mounted == -1) {
2226
SYSERROR("failed to mount /proc in the container.");
2228
} else if (mounted == 1) {
2229
lxc_conf->umount_proc = 1;
2232
if (setup_pivot_root(&lxc_conf->rootfs)) {
2233
ERROR("failed to set rootfs for '%s'", name);
2237
if (setup_pts(lxc_conf->pts)) {
2238
ERROR("failed to setup the new pts instance");
2242
if (setup_personality(lxc_conf->personality)) {
2243
ERROR("failed to setup personality");
2247
if (setup_caps(&lxc_conf->caps)) {
2248
ERROR("failed to drop capabilities");
2252
NOTICE("'%s' is setup.", name);
2257
int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf)
2260
struct lxc_list *it;
2262
if (strcmp(hook, "pre-start") == 0)
2263
which = LXCHOOK_PRESTART;
2264
else if (strcmp(hook, "pre-mount") == 0)
2265
which = LXCHOOK_PREMOUNT;
2266
else if (strcmp(hook, "mount") == 0)
2267
which = LXCHOOK_MOUNT;
2268
else if (strcmp(hook, "start") == 0)
2269
which = LXCHOOK_START;
2270
else if (strcmp(hook, "post-stop") == 0)
2271
which = LXCHOOK_POSTSTOP;
2274
lxc_list_for_each(it, &conf->hooks[which]) {
2276
char *hookname = it->elem;
2277
ret = run_script(name, "lxc", hookname, hook, NULL);
2284
static void lxc_remove_nic(struct lxc_list *it)
2286
struct lxc_netdev *netdev = it->elem;
2287
struct lxc_list *it2;
2295
if (netdev->upscript)
2296
free(netdev->upscript);
2298
free(netdev->hwaddr);
2301
if (netdev->ipv4_gateway)
2302
free(netdev->ipv4_gateway);
2303
if (netdev->ipv6_gateway)
2304
free(netdev->ipv6_gateway);
2305
lxc_list_for_each(it2, &netdev->ipv4) {
2310
lxc_list_for_each(it2, &netdev->ipv6) {
2318
/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
2319
int lxc_clear_nic(struct lxc_conf *c, char *key)
2323
struct lxc_list *it;
2324
struct lxc_netdev *netdev;
2326
p1 = index(key, '.');
2327
if (!p1 || *(p1+1) == '\0')
2330
ret = sscanf(key, "%d", &idx);
2331
if (ret != 1) return -1;
2336
lxc_list_for_each(it, &c->network) {
2341
if (i < idx) // we don't have that many nics defined
2344
if (!it || !it->elem)
2351
} else if (strcmp(p1, "ipv4") == 0) {
2352
struct lxc_list *it2;
2353
lxc_list_for_each(it2, &netdev->ipv4) {
2358
} else if (strcmp(p1, "ipv6") == 0) {
2359
struct lxc_list *it2;
2360
lxc_list_for_each(it2, &netdev->ipv6) {
2365
} else if (strcmp(p1, "link") == 0) {
2368
netdev->link = NULL;
2370
} else if (strcmp(p1, "name") == 0) {
2373
netdev->name = NULL;
2375
} else if (strcmp(p1, "script.up") == 0) {
2376
if (netdev->upscript) {
2377
free(netdev->upscript);
2378
netdev->upscript = NULL;
2380
} else if (strcmp(p1, "hwaddr") == 0) {
2381
if (netdev->hwaddr) {
2382
free(netdev->hwaddr);
2383
netdev->hwaddr = NULL;
2385
} else if (strcmp(p1, "mtu") == 0) {
2390
} else if (strcmp(p1, "ipv4_gateway") == 0) {
2391
if (netdev->ipv4_gateway) {
2392
free(netdev->ipv4_gateway);
2393
netdev->ipv4_gateway = NULL;
2395
} else if (strcmp(p1, "ipv6_gateway") == 0) {
2396
if (netdev->ipv6_gateway) {
2397
free(netdev->ipv6_gateway);
2398
netdev->ipv6_gateway = NULL;
2406
int lxc_clear_config_network(struct lxc_conf *c)
2408
struct lxc_list *it;
2409
lxc_list_for_each(it, &c->network) {
2415
int lxc_clear_config_caps(struct lxc_conf *c)
2417
struct lxc_list *it;
2419
lxc_list_for_each(it, &c->caps) {
2427
int lxc_clear_cgroups(struct lxc_conf *c, char *key)
2429
struct lxc_list *it;
2433
if (strcmp(key, "lxc.cgroup") == 0)
2436
lxc_list_for_each(it, &c->cgroup) {
2437
struct lxc_cgroup *cg = it->elem;
2438
if (!all && strcmp(cg->subsystem, k) != 0)
2441
free(cg->subsystem);
2449
int lxc_clear_mount_entries(struct lxc_conf *c)
2451
struct lxc_list *it;
2453
lxc_list_for_each(it, &c->mount_list) {
2461
int lxc_clear_hooks(struct lxc_conf *c)
2463
struct lxc_list *it;
2466
for (i=0; i<NUM_LXC_HOOKS; i++) {
2467
lxc_list_for_each(it, &c->hooks[i]) {
2476
void lxc_conf_free(struct lxc_conf *conf)
2480
if (conf->console.path)
2481
free(conf->console.path);
2482
if (conf->rootfs.mount != LXCROOTFSMOUNT)
2483
free(conf->rootfs.mount);
2484
lxc_clear_config_network(conf);
2485
if (conf->aa_profile)
2486
free(conf->aa_profile);
2487
lxc_clear_config_caps(conf);
2488
lxc_clear_cgroups(conf, "lxc.cgroup");
2489
lxc_clear_hooks(conf);
2490
lxc_clear_mount_entries(conf);