2
* lxc: linux Container library
4
* (C) Copyright IBM Corp. 2007, 2008
7
* Daniel Lezcano <dlezcano at fr.ibm.com>
9
* This library is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
14
* This library is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with this library; if not, write to the Free Software
21
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
#include "../config.h"
35
#include <sys/param.h>
37
#include <sys/mount.h>
38
#include <sys/types.h>
39
#include <sys/prctl.h>
40
#include <sys/capability.h>
45
#ifdef HAVE_SYS_SIGNALFD_H
46
# include <sys/signalfd.h>
48
# ifndef __NR_signalfd4
49
/* assume kernel headers are too old */
51
# define __NR_signalfd4 327
53
# define __NR_signalfd4 289
55
# define __NR_signalfd4 313
57
# define __NR_signalfd4 322
61
# ifndef __NR_signalfd
62
/* assume kernel headers are too old */
64
# define __NR_signalfd 321
66
# define __NR_signalfd 282
68
# define __NR_signalfd 305
70
# define __NR_signalfd 316
74
int signalfd(int fd, const sigset_t *mask, int flags)
78
retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
79
if (errno == ENOSYS && flags == 0)
80
retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
85
#if !HAVE_DECL_PR_CAPBSET_DROP
86
#define PR_CAPBSET_DROP 24
96
lxc_log_define(lxc_start, lxc);
99
LXC_TTY_HANDLER(SIGINT);
100
LXC_TTY_HANDLER(SIGQUIT);
102
static int setup_sigchld_fd(sigset_t *oldmask)
107
if (sigprocmask(SIG_BLOCK, NULL, &mask)) {
108
SYSERROR("failed to get mask signal");
112
if (sigaddset(&mask, SIGCHLD) || sigprocmask(SIG_BLOCK, &mask, oldmask)) {
113
SYSERROR("failed to set mask signal");
117
fd = signalfd(-1, &mask, 0);
119
SYSERROR("failed to create the signal fd");
123
if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
124
SYSERROR("failed to set sigfd to close-on-exec");
132
static int setup_tty_service(const char *name, int *ttyfd)
135
struct sockaddr_un addr = { 0 };
136
char *offset = &addr.sun_path[1];
138
strcpy(offset, name);
139
addr.sun_path[0] = '\0';
141
fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, 0);
145
if (fcntl(fd, F_SETFD, FD_CLOEXEC)) {
146
SYSERROR("failed to close-on-exec flag");
156
static int sigchld_handler(int fd, void *data,
157
struct lxc_epoll_descr *descr)
161
waitpid(*pid, NULL, 0);
166
static int ttyclient_handler(int fd, void *data,
167
struct lxc_epoll_descr *descr)
170
struct lxc_tty_info *tty_info = data;
172
for (i = 0; i < tty_info->nbtty; i++) {
174
if (tty_info->pty_info[i].busy != fd)
177
lxc_mainloop_del_handler(descr, fd);
178
tty_info->pty_info[i].busy = 0;
185
static int ttyservice_handler(int fd, void *data,
186
struct lxc_epoll_descr *descr)
188
int conn, ttynum, val = 1, ret = -1;
189
struct lxc_tty_info *tty_info = data;
191
conn = accept(fd, NULL, 0);
193
SYSERROR("failed to accept tty client");
197
if (setsockopt(conn, SOL_SOCKET, SO_PASSCRED, &val, sizeof(val))) {
198
SYSERROR("failed to enable credential on socket");
202
if (lxc_af_unix_rcv_credential(conn, &ttynum, sizeof(ttynum)))
205
if (ttynum <= 0 || ttynum > tty_info->nbtty)
208
/* fixup index array (eg. tty1 is index 0) */
211
if (tty_info->pty_info[ttynum].busy)
214
if (lxc_af_unix_send_fd(conn, tty_info->pty_info[ttynum].master,
216
ERROR("failed to send tty to client");
220
if (lxc_mainloop_add_handler(descr, conn,
221
ttyclient_handler, tty_info)) {
222
ERROR("failed to add tty client handler");
226
tty_info->pty_info[ttynum].busy = conn;
237
static int mainloop(const char *name, pid_t pid, int sigfd,
238
const struct lxc_tty_info *tty_info)
240
int nfds, ttyfd = -1, ret = -1;
241
struct lxc_epoll_descr descr;
243
if (tty_info->nbtty && setup_tty_service(name, &ttyfd)) {
244
ERROR("failed to create the tty service point");
248
/* sigfd + nb tty + tty service
249
* if tty is enabled */
250
nfds = tty_info->nbtty + 1 + tty_info->nbtty ? 1 : 0;
252
if (lxc_mainloop_open(nfds, &descr)) {
253
ERROR("failed to create mainloop");
257
if (lxc_mainloop_add_handler(&descr, sigfd, sigchld_handler, &pid)) {
258
ERROR("failed to add handler for the signal");
259
goto out_mainloop_open;
262
if (tty_info->nbtty) {
263
if (lxc_mainloop_add_handler(&descr, ttyfd,
266
ERROR("failed to add handler for the tty");
267
goto out_mainloop_open;
271
ret = lxc_mainloop(&descr);
277
lxc_mainloop_close(&descr);
285
int lxc_start(const char *name, char *argv[])
287
struct lxc_tty_info tty_info = { 0 };
289
char init[MAXPATHLEN];
290
char tty[MAXPATHLEN];
292
int fd, sigfd, lock, sv[2], sync = 0, err = -LXC_ERROR_INTERNAL;
296
lock = lxc_get_lock(name);
300
/* Begin the set the state to STARTING*/
301
if (lxc_setstate(name, STARTING)) {
302
ERROR("failed to set state '%s'",
303
lxc_state2str(STARTING));
307
/* If we are not attached to a tty, disable it */
308
if (ttyname_r(0, tty, sizeof(tty)))
311
if (lxc_create_tty(name, &tty_info)) {
312
ERROR("failed to create the ttys");
316
/* the signal fd has to be created before forking otherwise
317
* if the child process exits before we setup the signal fd,
318
* the event will be lost and the command will be stuck */
319
sigfd = setup_sigchld_fd(&oldmask);
321
ERROR("failed to set sigchild fd handler");
325
/* Synchro socketpair */
326
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sv)) {
327
SYSERROR("failed to create communication socketpair");
331
/* Avoid signals from terminal */
332
LXC_TTY_ADD_HANDLER(SIGINT);
333
LXC_TTY_ADD_HANDLER(SIGQUIT);
335
clone_flags = CLONE_NEWPID|CLONE_NEWIPC|CLONE_NEWNS;
336
if (conf_has_utsname(name))
337
clone_flags |= CLONE_NEWUTS;
338
if (conf_has_network(name))
339
clone_flags |= CLONE_NEWNET;
341
/* Create a process in a new set of namespaces */
342
pid = fork_ns(clone_flags);
344
SYSERROR("failed to fork into a new namespace");
350
if (sigprocmask(SIG_SETMASK, &oldmask, NULL)) {
351
SYSERROR("failed to set sigprocmask");
357
/* Be sure we don't inherit this after the exec */
358
fcntl(sv[0], F_SETFD, FD_CLOEXEC);
360
/* Tell our father he can begin to configure the container */
361
if (write(sv[0], &sync, sizeof(sync)) < 0) {
362
SYSERROR("failed to write socket");
366
/* Wait for the father to finish the configuration */
367
if (read(sv[0], &sync, sizeof(sync)) < 0) {
368
SYSERROR("failed to read socket");
372
/* Setup the container, ip, names, utsname, ... */
373
err = lxc_setup(name, tty, &tty_info);
375
ERROR("failed to setup the container");
376
if (write(sv[0], &err, sizeof(err)) < 0)
377
SYSERROR("failed to write the socket");
381
if (prctl(PR_CAPBSET_DROP, CAP_SYS_BOOT, 0, 0, 0)) {
382
SYSERROR("failed to remove CAP_SYS_BOOT capability");
386
execvp(argv[0], argv);
387
SYSERROR("failed to exec %s", argv[0]);
389
err = LXC_ERROR_WRONG_COMMAND;
390
/* If the exec fails, tell that to our father */
391
if (write(sv[0], &err, sizeof(err)) < 0)
392
SYSERROR("failed to write the socket");
400
/* Wait for the child to be ready */
401
if (read(sv[1], &sync, sizeof(sync)) < 0) {
402
SYSERROR("failed to read the socket");
406
if (lxc_link_nsgroup(name, pid))
407
WARN("cgroupfs not found: cgroup disabled");
409
/* Create the network configuration */
410
if (clone_flags & CLONE_NEWNET && conf_create_network(name, pid)) {
411
ERROR("failed to create the configured network");
412
goto err_create_network;
415
/* Tell the child to continue its initialization */
416
if (write(sv[1], &sync, sizeof(sync)) < 0) {
417
SYSERROR("failed to write the socket");
421
/* Wait for the child to exec or returning an error */
422
err = read(sv[1], &sync, sizeof(sync));
424
ERROR("failed to read the socket");
430
waitpid(pid, NULL, 0);
431
goto err_child_failed;
434
if (!asprintf(&val, "%d\n", pid)) {
435
SYSERROR("failed to allocate memory");
436
goto err_child_failed;
439
snprintf(init, MAXPATHLEN, LXCPATH "/%s/init", name);
441
fd = open(init, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
443
SYSERROR("failed to open '%s'", init);
447
if (write(fd, val, strlen(val)) < 0) {
448
SYSERROR("failed to write the init pid");
454
if (lxc_setstate(name, RUNNING)) {
455
ERROR("failed to set state to %s",
456
lxc_state2str(RUNNING));
457
goto err_state_failed;
460
if (mainloop(name, pid, sigfd, &tty_info)) {
461
ERROR("mainloop exited with an error");
462
goto err_mailoop_failed;
465
if (lxc_setstate(name, STOPPING))
466
ERROR("failed to set state %s", lxc_state2str(STOPPING));
468
if (clone_flags & CLONE_NEWNET && conf_destroy_network(name))
469
ERROR("failed to destroy the network");
473
if (lxc_setstate(name, STOPPED))
474
ERROR("failed to set state %s", lxc_state2str(STOPPED));
476
lxc_delete_tty(&tty_info);
477
lxc_unlink_nsgroup(name);
481
LXC_TTY_DEL_HANDLER(SIGQUIT);
482
LXC_TTY_DEL_HANDLER(SIGINT);
493
if (clone_flags & CLONE_NEWNET)
494
conf_destroy_network(name);
498
if (lxc_setstate(name, ABORTING))
499
ERROR("failed to set state %s", lxc_state2str(STOPPED));