40
40
#include <linux/tracehook.h>
41
41
#include <linux/futex.h>
42
42
#include <linux/compat.h>
43
#include <linux/kthread.h>
43
44
#include <linux/task_io_accounting_ops.h>
44
45
#include <linux/rcupdate.h>
45
46
#include <linux/ptrace.h>
112
111
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
113
# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
114
# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk))
112
# define alloc_task_struct_node(node) \
113
kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
114
# define free_task_struct(tsk) \
115
kmem_cache_free(task_struct_cachep, (tsk))
115
116
static struct kmem_cache *task_struct_cachep;
118
119
#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
119
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
120
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
121
123
#ifdef CONFIG_DEBUG_STACK_USAGE
122
124
gfp_t mask = GFP_KERNEL | __GFP_ZERO;
124
126
gfp_t mask = GFP_KERNEL;
126
return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
128
struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
130
return page ? page_address(page) : NULL;
129
133
static inline void free_thread_info(struct thread_info *ti)
250
255
struct task_struct *tsk;
251
256
struct thread_info *ti;
252
257
unsigned long *stackend;
258
int node = tsk_fork_get_node(orig);
256
261
prepare_to_copy(orig);
258
tsk = alloc_task_struct();
263
tsk = alloc_task_struct_node(node);
262
ti = alloc_thread_info(tsk);
267
ti = alloc_thread_info_node(tsk, node);
264
269
free_task_struct(tsk);
379
384
if (tmp->vm_flags & VM_DENYWRITE)
380
385
atomic_dec(&inode->i_writecount);
381
spin_lock(&mapping->i_mmap_lock);
386
mutex_lock(&mapping->i_mmap_mutex);
382
387
if (tmp->vm_flags & VM_SHARED)
383
388
mapping->i_mmap_writable++;
384
tmp->vm_truncate_count = mpnt->vm_truncate_count;
385
389
flush_dcache_mmap_lock(mapping);
386
390
/* insert tmp into the share list, just after mpnt */
387
391
vma_prio_tree_add(tmp, mpnt);
388
392
flush_dcache_mmap_unlock(mapping);
389
spin_unlock(&mapping->i_mmap_lock);
393
mutex_unlock(&mapping->i_mmap_mutex);
517
521
struct mm_struct * mm;
519
523
mm = allocate_mm();
521
memset(mm, 0, sizeof(*mm));
522
mm = mm_init(mm, current);
527
memset(mm, 0, sizeof(*mm));
529
return mm_init(mm, current);
569
574
EXPORT_SYMBOL_GPL(mmput);
577
* We added or removed a vma mapping the executable. The vmas are only mapped
578
* during exec and are not mapped with the mmap system call.
579
* Callers must hold down_write() on the mm's mmap_sem for these
581
void added_exe_file_vma(struct mm_struct *mm)
583
mm->num_exe_file_vmas++;
586
void removed_exe_file_vma(struct mm_struct *mm)
588
mm->num_exe_file_vmas--;
589
if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
596
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
599
get_file(new_exe_file);
602
mm->exe_file = new_exe_file;
603
mm->num_exe_file_vmas = 0;
606
struct file *get_mm_exe_file(struct mm_struct *mm)
608
struct file *exe_file;
610
/* We need mmap_sem to protect against races with removal of
611
* VM_EXECUTABLE vmas */
612
down_read(&mm->mmap_sem);
613
exe_file = mm->exe_file;
616
up_read(&mm->mmap_sem);
620
static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
622
/* It's safe to write the exe_file pointer without exe_file_lock because
623
* this is called during fork when the task is not yet in /proc */
624
newmm->exe_file = get_mm_exe_file(oldmm);
572
628
* get_task_mm - acquire a reference to the task's mm
922
979
tty_audit_fork(sig);
923
980
sched_autogroup_fork(sig);
982
#ifdef CONFIG_CGROUPS
983
init_rwsem(&sig->threadgroup_fork_lock);
925
986
sig->oom_adj = current->signal->oom_adj;
926
987
sig->oom_score_adj = current->signal->oom_score_adj;
927
988
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
1099
1160
posix_cpu_timers_init(p);
1101
p->lock_depth = -1; /* -1 = no lock */
1102
1162
do_posix_clock_monotonic_gettime(&p->start_time);
1103
1163
p->real_start_time = p->start_time;
1104
1164
monotonic_to_bootbased(&p->real_start_time);
1105
1165
p->io_context = NULL;
1106
1166
p->audit_context = NULL;
1167
if (clone_flags & CLONE_THREAD)
1168
threadgroup_fork_read_lock(current);
1107
1169
cgroup_fork(p);
1108
1170
#ifdef CONFIG_NUMA
1109
1171
p->mempolicy = mpol_dup(p->mempolicy);
1195
1251
if (clone_flags & CLONE_THREAD)
1196
1252
p->tgid = current->tgid;
1198
if (current->nsproxy != p->nsproxy) {
1199
retval = ns_cgroup_clone(p, pid);
1201
goto bad_fork_free_pid;
1204
1254
p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1206
1256
* Clear TID on mm_release()?
1208
1258
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1209
1262
#ifdef CONFIG_FUTEX
1210
1263
p->robust_list = NULL;
1211
1264
#ifdef CONFIG_COMPAT
1252
1305
/* Need tasklist lock for parent etc handling! */
1253
1306
write_lock_irq(&tasklist_lock);
1256
* The state of the parent's TIF_KTRACE flag may have changed
1257
* since it was copied in dup_task_struct() so we re-copy it here.
1259
if (test_thread_flag(TIF_KERNEL_TRACE))
1260
set_tsk_thread_flag(p, TIF_KERNEL_TRACE);
1262
clear_tsk_thread_flag(p, TIF_KERNEL_TRACE);
1264
1308
/* CLONE_PARENT re-uses the old parent */
1265
1309
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1266
1310
p->real_parent = current->real_parent;
1300
1344
tracehook_finish_clone(p, clone_flags, trace);
1302
1346
if (thread_group_leader(p)) {
1303
if (clone_flags & CLONE_NEWPID)
1347
if (is_child_reaper(pid))
1304
1348
p->nsproxy->pid_ns->child_reaper = p;
1306
1350
p->signal->leader_pid = pid;
1515
1563
fs_cachep = kmem_cache_create("fs_cache",
1516
1564
sizeof(struct fs_struct), 0,
1517
1565
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1567
* FIXME! The "sizeof(struct mm_struct)" currently includes the
1568
* whole struct cpumask for the OFFSTACK case. We could change
1569
* this to *only* allocate as much of it as required by the
1570
* maximum number of CPU's we can ever have. The cpumask_allocation
1571
* is at the end of the structure, exactly for that reason.
1518
1573
mm_cachep = kmem_cache_create("mm_struct",
1519
1574
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1520
1575
SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1526
* Check constraints on flags passed to the unshare system call and
1527
* force unsharing of additional process context as appropriate.
1529
static void check_unshare_flags(unsigned long *flags_ptr)
1532
* If unsharing a thread from a thread group, must also
1535
if (*flags_ptr & CLONE_THREAD)
1536
*flags_ptr |= CLONE_VM;
1539
* If unsharing vm, must also unshare signal handlers.
1541
if (*flags_ptr & CLONE_VM)
1542
*flags_ptr |= CLONE_SIGHAND;
1545
* If unsharing namespace, must also unshare filesystem information.
1547
if (*flags_ptr & CLONE_NEWNS)
1548
*flags_ptr |= CLONE_FS;
1552
* Unsharing of tasks created with CLONE_THREAD is not supported yet
1554
static int unshare_thread(unsigned long unshare_flags)
1556
if (unshare_flags & CLONE_THREAD)
1581
* Check constraints on flags passed to the unshare system call.
1583
static int check_unshare_flags(unsigned long unshare_flags)
1585
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1586
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1587
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1557
1588
return -EINVAL;
1590
* Not implemented, but pretend it works if there is nothing to
1591
* unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
1592
* needs to unshare vm.
1594
if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
1595
/* FIXME: get_task_mm() increments ->mm_users */
1596
if (atomic_read(¤t->mm->mm_users) > 1)
1584
* Unsharing of sighand is not supported yet
1586
static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1588
struct sighand_struct *sigh = current->sighand;
1590
if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
1597
* Unshare vm if it is being shared
1599
static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1601
struct mm_struct *mm = current->mm;
1603
if ((unshare_flags & CLONE_VM) &&
1604
(mm && atomic_read(&mm->mm_users) > 1)) {
1612
1625
* Unshare file descriptor table if it is being shared
1614
1627
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1637
1650
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1640
1652
struct fs_struct *fs, *new_fs = NULL;
1641
struct sighand_struct *new_sigh = NULL;
1642
struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1643
1653
struct files_struct *fd, *new_fd = NULL;
1644
1654
struct nsproxy *new_nsproxy = NULL;
1645
1655
int do_sysvsem = 0;
1647
check_unshare_flags(&unshare_flags);
1649
/* Return -EINVAL for all unsupported flags */
1651
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1652
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1653
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1658
err = check_unshare_flags(unshare_flags);
1654
1660
goto bad_unshare_out;
1663
* If unsharing namespace, must also unshare filesystem information.
1665
if (unshare_flags & CLONE_NEWNS)
1666
unshare_flags |= CLONE_FS;
1657
1668
* CLONE_NEWIPC must also detach from the undolist: after switching
1658
1669
* to a new ipc namespace, the semaphore arrays from the old
1659
1670
* namespace are unreachable.
1661
1672
if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
1662
1673
do_sysvsem = 1;
1663
if ((err = unshare_thread(unshare_flags)))
1664
goto bad_unshare_out;
1665
1674
if ((err = unshare_fs(unshare_flags, &new_fs)))
1666
goto bad_unshare_cleanup_thread;
1667
if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1675
goto bad_unshare_out;
1676
if ((err = unshare_fd(unshare_flags, &new_fd)))
1668
1677
goto bad_unshare_cleanup_fs;
1669
if ((err = unshare_vm(unshare_flags, &new_mm)))
1670
goto bad_unshare_cleanup_sigh;
1671
if ((err = unshare_fd(unshare_flags, &new_fd)))
1672
goto bad_unshare_cleanup_vm;
1673
1678
if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1675
1680
goto bad_unshare_cleanup_fd;
1677
if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) {
1682
if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
1678
1683
if (do_sysvsem) {
1680
1685
* CLONE_SYSVSEM is equivalent to sys_exit().
1700
1705
spin_unlock(&fs->lock);
1705
active_mm = current->active_mm;
1706
current->mm = new_mm;
1707
current->active_mm = new_mm;
1708
if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
1709
atomic_dec(&mm->oom_disable_count);
1710
atomic_inc(&new_mm->oom_disable_count);
1712
activate_mm(active_mm, new_mm);
1717
1709
fd = current->files;
1718
1710
current->files = new_fd;
1730
1722
put_files_struct(new_fd);
1732
bad_unshare_cleanup_vm:
1736
bad_unshare_cleanup_sigh:
1738
if (atomic_dec_and_test(&new_sigh->count))
1739
kmem_cache_free(sighand_cachep, new_sigh);
1741
1724
bad_unshare_cleanup_fs:
1743
1726
free_fs_struct(new_fs);
1745
bad_unshare_cleanup_thread:
1746
1728
bad_unshare_out: