~ubuntu-branches/ubuntu/precise/linux-ti-omap4/precise

« back to all changes in this revision

Viewing changes to kernel/fork.c

  • Committer: Bazaar Package Importer
  • Author(s): Paolo Pisati
  • Date: 2011-06-29 15:23:51 UTC
  • mfrom: (26.1.1 natty-proposed)
  • Revision ID: james.westby@ubuntu.com-20110629152351-xs96tm303d95rpbk
Tags: 3.0.0-1200.2
* Rebased against 3.0.0-6.7
* BSP from TI based on 3.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
40
40
#include <linux/tracehook.h>
41
41
#include <linux/futex.h>
42
42
#include <linux/compat.h>
 
43
#include <linux/kthread.h>
43
44
#include <linux/task_io_accounting_ops.h>
44
45
#include <linux/rcupdate.h>
45
46
#include <linux/ptrace.h>
58
59
#include <linux/taskstats_kern.h>
59
60
#include <linux/random.h>
60
61
#include <linux/tty.h>
61
 
#include <linux/proc_fs.h>
62
62
#include <linux/blkdev.h>
63
63
#include <linux/fs_struct.h>
64
64
#include <linux/magic.h>
88
88
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
89
89
 
90
90
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
91
 
EXPORT_SYMBOL(tasklist_lock);
92
91
 
93
92
#ifdef CONFIG_PROVE_RCU
94
93
int lockdep_tasklist_lock_is_held(void)
110
109
}
111
110
 
112
111
#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
113
 
# define alloc_task_struct()    kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
114
 
# define free_task_struct(tsk)  kmem_cache_free(task_struct_cachep, (tsk))
 
112
# define alloc_task_struct_node(node)           \
 
113
                kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
 
114
# define free_task_struct(tsk)                  \
 
115
                kmem_cache_free(task_struct_cachep, (tsk))
115
116
static struct kmem_cache *task_struct_cachep;
116
117
#endif
117
118
 
118
119
#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
119
 
static inline struct thread_info *alloc_thread_info(struct task_struct *tsk)
 
120
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 
121
                                                  int node)
120
122
{
121
123
#ifdef CONFIG_DEBUG_STACK_USAGE
122
124
        gfp_t mask = GFP_KERNEL | __GFP_ZERO;
123
125
#else
124
126
        gfp_t mask = GFP_KERNEL;
125
127
#endif
126
 
        return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER);
 
128
        struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
 
129
 
 
130
        return page ? page_address(page) : NULL;
127
131
}
128
132
 
129
133
static inline void free_thread_info(struct thread_info *ti)
195
199
        if (!profile_handoff_task(tsk))
196
200
                free_task(tsk);
197
201
}
 
202
EXPORT_SYMBOL_GPL(__put_task_struct);
198
203
 
199
204
/*
200
205
 * macro override instead of weak attribute alias, to workaround
250
255
        struct task_struct *tsk;
251
256
        struct thread_info *ti;
252
257
        unsigned long *stackend;
253
 
 
 
258
        int node = tsk_fork_get_node(orig);
254
259
        int err;
255
260
 
256
261
        prepare_to_copy(orig);
257
262
 
258
 
        tsk = alloc_task_struct();
 
263
        tsk = alloc_task_struct_node(node);
259
264
        if (!tsk)
260
265
                return NULL;
261
266
 
262
 
        ti = alloc_thread_info(tsk);
 
267
        ti = alloc_thread_info_node(tsk, node);
263
268
        if (!ti) {
264
269
                free_task_struct(tsk);
265
270
                return NULL;
378
383
                        get_file(file);
379
384
                        if (tmp->vm_flags & VM_DENYWRITE)
380
385
                                atomic_dec(&inode->i_writecount);
381
 
                        spin_lock(&mapping->i_mmap_lock);
 
386
                        mutex_lock(&mapping->i_mmap_mutex);
382
387
                        if (tmp->vm_flags & VM_SHARED)
383
388
                                mapping->i_mmap_writable++;
384
 
                        tmp->vm_truncate_count = mpnt->vm_truncate_count;
385
389
                        flush_dcache_mmap_lock(mapping);
386
390
                        /* insert tmp into the share list, just after mpnt */
387
391
                        vma_prio_tree_add(tmp, mpnt);
388
392
                        flush_dcache_mmap_unlock(mapping);
389
 
                        spin_unlock(&mapping->i_mmap_lock);
 
393
                        mutex_unlock(&mapping->i_mmap_mutex);
390
394
                }
391
395
 
392
396
                /*
517
521
        struct mm_struct * mm;
518
522
 
519
523
        mm = allocate_mm();
520
 
        if (mm) {
521
 
                memset(mm, 0, sizeof(*mm));
522
 
                mm = mm_init(mm, current);
523
 
        }
524
 
        return mm;
 
524
        if (!mm)
 
525
                return NULL;
 
526
 
 
527
        memset(mm, 0, sizeof(*mm));
 
528
        mm_init_cpumask(mm);
 
529
        return mm_init(mm, current);
525
530
}
526
531
 
527
532
/*
568
573
}
569
574
EXPORT_SYMBOL_GPL(mmput);
570
575
 
 
576
/*
 
577
 * We added or removed a vma mapping the executable. The vmas are only mapped
 
578
 * during exec and are not mapped with the mmap system call.
 
579
 * Callers must hold down_write() on the mm's mmap_sem for these
 
580
 */
 
581
void added_exe_file_vma(struct mm_struct *mm)
 
582
{
 
583
        mm->num_exe_file_vmas++;
 
584
}
 
585
 
 
586
void removed_exe_file_vma(struct mm_struct *mm)
 
587
{
 
588
        mm->num_exe_file_vmas--;
 
589
        if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
 
590
                fput(mm->exe_file);
 
591
                mm->exe_file = NULL;
 
592
        }
 
593
 
 
594
}
 
595
 
 
596
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 
597
{
 
598
        if (new_exe_file)
 
599
                get_file(new_exe_file);
 
600
        if (mm->exe_file)
 
601
                fput(mm->exe_file);
 
602
        mm->exe_file = new_exe_file;
 
603
        mm->num_exe_file_vmas = 0;
 
604
}
 
605
 
 
606
struct file *get_mm_exe_file(struct mm_struct *mm)
 
607
{
 
608
        struct file *exe_file;
 
609
 
 
610
        /* We need mmap_sem to protect against races with removal of
 
611
         * VM_EXECUTABLE vmas */
 
612
        down_read(&mm->mmap_sem);
 
613
        exe_file = mm->exe_file;
 
614
        if (exe_file)
 
615
                get_file(exe_file);
 
616
        up_read(&mm->mmap_sem);
 
617
        return exe_file;
 
618
}
 
619
 
 
620
static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
 
621
{
 
622
        /* It's safe to write the exe_file pointer without exe_file_lock because
 
623
         * this is called during fork when the task is not yet in /proc */
 
624
        newmm->exe_file = get_mm_exe_file(oldmm);
 
625
}
 
626
 
571
627
/**
572
628
 * get_task_mm - acquire a reference to the task's mm
573
629
 *
674
730
                goto fail_nomem;
675
731
 
676
732
        memcpy(mm, oldmm, sizeof(*mm));
 
733
        mm_init_cpumask(mm);
677
734
 
678
735
        /* Initializing for Swap token stuff */
679
736
        mm->token_priority = 0;
922
979
        tty_audit_fork(sig);
923
980
        sched_autogroup_fork(sig);
924
981
 
 
982
#ifdef CONFIG_CGROUPS
 
983
        init_rwsem(&sig->threadgroup_fork_lock);
 
984
#endif
 
985
 
925
986
        sig->oom_adj = current->signal->oom_adj;
926
987
        sig->oom_score_adj = current->signal->oom_score_adj;
927
988
        sig->oom_score_adj_min = current->signal->oom_score_adj_min;
1098
1159
 
1099
1160
        posix_cpu_timers_init(p);
1100
1161
 
1101
 
        p->lock_depth = -1;             /* -1 = no lock */
1102
1162
        do_posix_clock_monotonic_gettime(&p->start_time);
1103
1163
        p->real_start_time = p->start_time;
1104
1164
        monotonic_to_bootbased(&p->real_start_time);
1105
1165
        p->io_context = NULL;
1106
1166
        p->audit_context = NULL;
 
1167
        if (clone_flags & CLONE_THREAD)
 
1168
                threadgroup_fork_read_lock(current);
1107
1169
        cgroup_fork(p);
1108
1170
#ifdef CONFIG_NUMA
1109
1171
        p->mempolicy = mpol_dup(p->mempolicy);
1148
1210
#endif
1149
1211
 
1150
1212
        /* Perform scheduler related setup. Assign this task to a CPU. */
1151
 
        sched_fork(p, clone_flags);
 
1213
        sched_fork(p);
1152
1214
 
1153
1215
        retval = perf_event_init_task(p);
1154
1216
        if (retval)
1182
1244
                pid = alloc_pid(p->nsproxy->pid_ns);
1183
1245
                if (!pid)
1184
1246
                        goto bad_fork_cleanup_io;
1185
 
 
1186
 
                if (clone_flags & CLONE_NEWPID) {
1187
 
                        retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1188
 
                        if (retval < 0)
1189
 
                                goto bad_fork_free_pid;
1190
 
                }
1191
1247
        }
1192
1248
 
1193
1249
        p->pid = pid_nr(pid);
1195
1251
        if (clone_flags & CLONE_THREAD)
1196
1252
                p->tgid = current->tgid;
1197
1253
 
1198
 
        if (current->nsproxy != p->nsproxy) {
1199
 
                retval = ns_cgroup_clone(p, pid);
1200
 
                if (retval)
1201
 
                        goto bad_fork_free_pid;
1202
 
        }
1203
 
 
1204
1254
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1205
1255
        /*
1206
1256
         * Clear TID on mm_release()?
1207
1257
         */
1208
1258
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
 
1259
#ifdef CONFIG_BLOCK
 
1260
        p->plug = NULL;
 
1261
#endif
1209
1262
#ifdef CONFIG_FUTEX
1210
1263
        p->robust_list = NULL;
1211
1264
#ifdef CONFIG_COMPAT
1252
1305
        /* Need tasklist lock for parent etc handling! */
1253
1306
        write_lock_irq(&tasklist_lock);
1254
1307
 
1255
 
        /*
1256
 
         * The state of the parent's TIF_KTRACE flag may have changed
1257
 
         * since it was copied in dup_task_struct() so we re-copy it here.
1258
 
         */
1259
 
        if (test_thread_flag(TIF_KERNEL_TRACE))
1260
 
                set_tsk_thread_flag(p, TIF_KERNEL_TRACE);
1261
 
        else
1262
 
                clear_tsk_thread_flag(p, TIF_KERNEL_TRACE);
1263
 
 
1264
1308
        /* CLONE_PARENT re-uses the old parent */
1265
1309
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1266
1310
                p->real_parent = current->real_parent;
1300
1344
                tracehook_finish_clone(p, clone_flags, trace);
1301
1345
 
1302
1346
                if (thread_group_leader(p)) {
1303
 
                        if (clone_flags & CLONE_NEWPID)
 
1347
                        if (is_child_reaper(pid))
1304
1348
                                p->nsproxy->pid_ns->child_reaper = p;
1305
1349
 
1306
1350
                        p->signal->leader_pid = pid;
1320
1364
        write_unlock_irq(&tasklist_lock);
1321
1365
        proc_fork_connector(p);
1322
1366
        cgroup_post_fork(p);
 
1367
        if (clone_flags & CLONE_THREAD)
 
1368
                threadgroup_fork_read_unlock(current);
1323
1369
        perf_event_fork(p);
1324
1370
        return p;
1325
1371
 
1358
1404
        mpol_put(p->mempolicy);
1359
1405
bad_fork_cleanup_cgroup:
1360
1406
#endif
 
1407
        if (clone_flags & CLONE_THREAD)
 
1408
                threadgroup_fork_read_unlock(current);
1361
1409
        cgroup_exit(p, cgroup_callbacks_done);
1362
1410
        delayacct_tsk_free(p);
1363
1411
        module_put(task_thread_info(p)->exec_domain->module);
1471
1519
                 */
1472
1520
                p->flags &= ~PF_STARTING;
1473
1521
 
1474
 
                wake_up_new_task(p, clone_flags);
 
1522
                wake_up_new_task(p);
1475
1523
 
1476
1524
                tracehook_report_clone_complete(trace, regs,
1477
1525
                                                clone_flags, nr, p);
1515
1563
        fs_cachep = kmem_cache_create("fs_cache",
1516
1564
                        sizeof(struct fs_struct), 0,
1517
1565
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 
1566
        /*
 
1567
         * FIXME! The "sizeof(struct mm_struct)" currently includes the
 
1568
         * whole struct cpumask for the OFFSTACK case. We could change
 
1569
         * this to *only* allocate as much of it as required by the
 
1570
         * maximum number of CPU's we can ever have.  The cpumask_allocation
 
1571
         * is at the end of the structure, exactly for that reason.
 
1572
         */
1518
1573
        mm_cachep = kmem_cache_create("mm_struct",
1519
1574
                        sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1520
1575
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1523
1578
}
1524
1579
 
1525
1580
/*
1526
 
 * Check constraints on flags passed to the unshare system call and
1527
 
 * force unsharing of additional process context as appropriate.
1528
 
 */
1529
 
static void check_unshare_flags(unsigned long *flags_ptr)
1530
 
{
1531
 
        /*
1532
 
         * If unsharing a thread from a thread group, must also
1533
 
         * unshare vm.
1534
 
         */
1535
 
        if (*flags_ptr & CLONE_THREAD)
1536
 
                *flags_ptr |= CLONE_VM;
1537
 
 
1538
 
        /*
1539
 
         * If unsharing vm, must also unshare signal handlers.
1540
 
         */
1541
 
        if (*flags_ptr & CLONE_VM)
1542
 
                *flags_ptr |= CLONE_SIGHAND;
1543
 
 
1544
 
        /*
1545
 
         * If unsharing namespace, must also unshare filesystem information.
1546
 
         */
1547
 
        if (*flags_ptr & CLONE_NEWNS)
1548
 
                *flags_ptr |= CLONE_FS;
1549
 
}
1550
 
 
1551
 
/*
1552
 
 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1553
 
 */
1554
 
static int unshare_thread(unsigned long unshare_flags)
1555
 
{
1556
 
        if (unshare_flags & CLONE_THREAD)
 
1581
 * Check constraints on flags passed to the unshare system call.
 
1582
 */
 
1583
static int check_unshare_flags(unsigned long unshare_flags)
 
1584
{
 
1585
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 
1586
                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 
1587
                                CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1557
1588
                return -EINVAL;
 
1589
        /*
 
1590
         * Not implemented, but pretend it works if there is nothing to
 
1591
         * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
 
1592
         * needs to unshare vm.
 
1593
         */
 
1594
        if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
 
1595
                /* FIXME: get_task_mm() increments ->mm_users */
 
1596
                if (atomic_read(&current->mm->mm_users) > 1)
 
1597
                        return -EINVAL;
 
1598
        }
1558
1599
 
1559
1600
        return 0;
1560
1601
}
1581
1622
}
1582
1623
 
1583
1624
/*
1584
 
 * Unsharing of sighand is not supported yet
1585
 
 */
1586
 
static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1587
 
{
1588
 
        struct sighand_struct *sigh = current->sighand;
1589
 
 
1590
 
        if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
1591
 
                return -EINVAL;
1592
 
        else
1593
 
                return 0;
1594
 
}
1595
 
 
1596
 
/*
1597
 
 * Unshare vm if it is being shared
1598
 
 */
1599
 
static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1600
 
{
1601
 
        struct mm_struct *mm = current->mm;
1602
 
 
1603
 
        if ((unshare_flags & CLONE_VM) &&
1604
 
            (mm && atomic_read(&mm->mm_users) > 1)) {
1605
 
                return -EINVAL;
1606
 
        }
1607
 
 
1608
 
        return 0;
1609
 
}
1610
 
 
1611
 
/*
1612
1625
 * Unshare file descriptor table if it is being shared
1613
1626
 */
1614
1627
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1636
1649
 */
1637
1650
SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1638
1651
{
1639
 
        int err = 0;
1640
1652
        struct fs_struct *fs, *new_fs = NULL;
1641
 
        struct sighand_struct *new_sigh = NULL;
1642
 
        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1643
1653
        struct files_struct *fd, *new_fd = NULL;
1644
1654
        struct nsproxy *new_nsproxy = NULL;
1645
1655
        int do_sysvsem = 0;
1646
 
 
1647
 
        check_unshare_flags(&unshare_flags);
1648
 
 
1649
 
        /* Return -EINVAL for all unsupported flags */
1650
 
        err = -EINVAL;
1651
 
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1652
 
                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1653
 
                                CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
 
1656
        int err;
 
1657
 
 
1658
        err = check_unshare_flags(unshare_flags);
 
1659
        if (err)
1654
1660
                goto bad_unshare_out;
1655
1661
 
1656
1662
        /*
 
1663
         * If unsharing namespace, must also unshare filesystem information.
 
1664
         */
 
1665
        if (unshare_flags & CLONE_NEWNS)
 
1666
                unshare_flags |= CLONE_FS;
 
1667
        /*
1657
1668
         * CLONE_NEWIPC must also detach from the undolist: after switching
1658
1669
         * to a new ipc namespace, the semaphore arrays from the old
1659
1670
         * namespace are unreachable.
1660
1671
         */
1661
1672
        if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
1662
1673
                do_sysvsem = 1;
1663
 
        if ((err = unshare_thread(unshare_flags)))
1664
 
                goto bad_unshare_out;
1665
1674
        if ((err = unshare_fs(unshare_flags, &new_fs)))
1666
 
                goto bad_unshare_cleanup_thread;
1667
 
        if ((err = unshare_sighand(unshare_flags, &new_sigh)))
 
1675
                goto bad_unshare_out;
 
1676
        if ((err = unshare_fd(unshare_flags, &new_fd)))
1668
1677
                goto bad_unshare_cleanup_fs;
1669
 
        if ((err = unshare_vm(unshare_flags, &new_mm)))
1670
 
                goto bad_unshare_cleanup_sigh;
1671
 
        if ((err = unshare_fd(unshare_flags, &new_fd)))
1672
 
                goto bad_unshare_cleanup_vm;
1673
1678
        if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1674
1679
                        new_fs)))
1675
1680
                goto bad_unshare_cleanup_fd;
1676
1681
 
1677
 
        if (new_fs ||  new_mm || new_fd || do_sysvsem || new_nsproxy) {
 
1682
        if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
1678
1683
                if (do_sysvsem) {
1679
1684
                        /*
1680
1685
                         * CLONE_SYSVSEM is equivalent to sys_exit().
1700
1705
                        spin_unlock(&fs->lock);
1701
1706
                }
1702
1707
 
1703
 
                if (new_mm) {
1704
 
                        mm = current->mm;
1705
 
                        active_mm = current->active_mm;
1706
 
                        current->mm = new_mm;
1707
 
                        current->active_mm = new_mm;
1708
 
                        if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
1709
 
                                atomic_dec(&mm->oom_disable_count);
1710
 
                                atomic_inc(&new_mm->oom_disable_count);
1711
 
                        }
1712
 
                        activate_mm(active_mm, new_mm);
1713
 
                        new_mm = mm;
1714
 
                }
1715
 
 
1716
1708
                if (new_fd) {
1717
1709
                        fd = current->files;
1718
1710
                        current->files = new_fd;
1729
1721
        if (new_fd)
1730
1722
                put_files_struct(new_fd);
1731
1723
 
1732
 
bad_unshare_cleanup_vm:
1733
 
        if (new_mm)
1734
 
                mmput(new_mm);
1735
 
 
1736
 
bad_unshare_cleanup_sigh:
1737
 
        if (new_sigh)
1738
 
                if (atomic_dec_and_test(&new_sigh->count))
1739
 
                        kmem_cache_free(sighand_cachep, new_sigh);
1740
 
 
1741
1724
bad_unshare_cleanup_fs:
1742
1725
        if (new_fs)
1743
1726
                free_fs_struct(new_fs);
1744
1727
 
1745
 
bad_unshare_cleanup_thread:
1746
1728
bad_unshare_out:
1747
1729
        return err;
1748
1730
}