183
183
return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
186
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
188
list_add_rcu(&rt_rq->leaf_rt_rq_list,
189
&rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
192
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
194
list_del_rcu(&rt_rq->leaf_rt_rq_list);
186
197
#define for_each_leaf_rt_rq(rt_rq, rq) \
187
198
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
194
205
return rt_se->my_q;
197
static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
208
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
198
209
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
200
211
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
202
213
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
203
struct sched_rt_entity *rt_se = rt_rq->rt_se;
214
struct sched_rt_entity *rt_se;
216
int cpu = cpu_of(rq_of_rt_rq(rt_rq));
218
rt_se = rt_rq->tg->rt_se[cpu];
205
220
if (rt_rq->rt_nr_running) {
206
221
if (rt_se && !on_rt_rq(rt_se))
207
enqueue_rt_entity(rt_se);
222
enqueue_rt_entity(rt_se, false);
208
223
if (rt_rq->highest_prio.curr < curr->prio)
209
224
resched_task(curr);
213
228
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
215
struct sched_rt_entity *rt_se = rt_rq->rt_se;
230
struct sched_rt_entity *rt_se;
231
int cpu = cpu_of(rq_of_rt_rq(rt_rq));
233
rt_se = rt_rq->tg->rt_se[cpu];
217
235
if (rt_se && on_rt_rq(rt_se))
218
236
dequeue_rt_entity(rt_se);
270
288
return ktime_to_ns(def_rt_bandwidth.rt_period);
291
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
295
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
273
299
#define for_each_leaf_rt_rq(rt_rq, rq) \
274
300
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
328
354
weight = cpumask_weight(rd->span);
330
spin_lock(&rt_b->rt_runtime_lock);
356
raw_spin_lock(&rt_b->rt_runtime_lock);
331
357
rt_period = ktime_to_ns(rt_b->rt_period);
332
358
for_each_cpu(i, rd->span) {
333
359
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
358
384
rt_rq->rt_runtime += diff;
360
386
if (rt_rq->rt_runtime == rt_period) {
361
spin_unlock(&iter->rt_runtime_lock);
387
raw_spin_unlock(&iter->rt_runtime_lock);
366
spin_unlock(&iter->rt_runtime_lock);
392
raw_spin_unlock(&iter->rt_runtime_lock);
368
spin_unlock(&rt_b->rt_runtime_lock);
394
raw_spin_unlock(&rt_b->rt_runtime_lock);
389
spin_lock(&rt_b->rt_runtime_lock);
390
spin_lock(&rt_rq->rt_runtime_lock);
415
raw_spin_lock(&rt_b->rt_runtime_lock);
416
raw_spin_lock(&rt_rq->rt_runtime_lock);
392
418
* Either we're all inf and nobody needs to borrow, or we're
393
419
* already disabled and thus have nothing to do, or we have
396
422
if (rt_rq->rt_runtime == RUNTIME_INF ||
397
423
rt_rq->rt_runtime == rt_b->rt_runtime)
399
spin_unlock(&rt_rq->rt_runtime_lock);
425
raw_spin_unlock(&rt_rq->rt_runtime_lock);
402
428
* Calculate the difference between what we started out with
418
444
if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
421
spin_lock(&iter->rt_runtime_lock);
447
raw_spin_lock(&iter->rt_runtime_lock);
423
449
diff = min_t(s64, iter->rt_runtime, want);
424
450
iter->rt_runtime -= diff;
427
453
iter->rt_runtime -= want;
430
spin_unlock(&iter->rt_runtime_lock);
456
raw_spin_unlock(&iter->rt_runtime_lock);
436
spin_lock(&rt_rq->rt_runtime_lock);
462
raw_spin_lock(&rt_rq->rt_runtime_lock);
438
464
* We cannot be left wanting - that would mean some runtime
439
465
* leaked out of the system.
445
471
* runtime - in which case borrowing doesn't make sense.
447
473
rt_rq->rt_runtime = RUNTIME_INF;
448
spin_unlock(&rt_rq->rt_runtime_lock);
449
spin_unlock(&rt_b->rt_runtime_lock);
474
raw_spin_unlock(&rt_rq->rt_runtime_lock);
475
raw_spin_unlock(&rt_b->rt_runtime_lock);
455
481
unsigned long flags;
457
spin_lock_irqsave(&rq->lock, flags);
483
raw_spin_lock_irqsave(&rq->lock, flags);
458
484
__disable_runtime(rq);
459
spin_unlock_irqrestore(&rq->lock, flags);
485
raw_spin_unlock_irqrestore(&rq->lock, flags);
462
488
static void __enable_runtime(struct rq *rq)
472
498
for_each_leaf_rt_rq(rt_rq, rq) {
473
499
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
475
spin_lock(&rt_b->rt_runtime_lock);
476
spin_lock(&rt_rq->rt_runtime_lock);
501
raw_spin_lock(&rt_b->rt_runtime_lock);
502
raw_spin_lock(&rt_rq->rt_runtime_lock);
477
503
rt_rq->rt_runtime = rt_b->rt_runtime;
478
504
rt_rq->rt_time = 0;
479
505
rt_rq->rt_throttled = 0;
480
spin_unlock(&rt_rq->rt_runtime_lock);
481
spin_unlock(&rt_b->rt_runtime_lock);
506
raw_spin_unlock(&rt_rq->rt_runtime_lock);
507
raw_spin_unlock(&rt_b->rt_runtime_lock);
487
513
unsigned long flags;
489
spin_lock_irqsave(&rq->lock, flags);
515
raw_spin_lock_irqsave(&rq->lock, flags);
490
516
__enable_runtime(rq);
491
spin_unlock_irqrestore(&rq->lock, flags);
517
raw_spin_unlock_irqrestore(&rq->lock, flags);
494
520
static int balance_runtime(struct rt_rq *rt_rq)
498
524
if (rt_rq->rt_time > rt_rq->rt_runtime) {
499
spin_unlock(&rt_rq->rt_runtime_lock);
525
raw_spin_unlock(&rt_rq->rt_runtime_lock);
500
526
more = do_balance_runtime(rt_rq);
501
spin_lock(&rt_rq->rt_runtime_lock);
527
raw_spin_lock(&rt_rq->rt_runtime_lock);
524
550
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
525
551
struct rq *rq = rq_of_rt_rq(rt_rq);
527
spin_lock(&rq->lock);
553
raw_spin_lock(&rq->lock);
528
554
if (rt_rq->rt_time) {
531
spin_lock(&rt_rq->rt_runtime_lock);
557
raw_spin_lock(&rt_rq->rt_runtime_lock);
532
558
if (rt_rq->rt_throttled)
533
559
balance_runtime(rt_rq);
534
560
runtime = rt_rq->rt_runtime;
540
566
if (rt_rq->rt_time || rt_rq->rt_nr_running)
542
spin_unlock(&rt_rq->rt_runtime_lock);
543
} else if (rt_rq->rt_nr_running)
568
raw_spin_unlock(&rt_rq->rt_runtime_lock);
569
} else if (rt_rq->rt_nr_running) {
571
if (!rt_rq_throttled(rt_rq))
547
576
sched_rt_rq_enqueue(rt_rq);
548
spin_unlock(&rq->lock);
577
raw_spin_unlock(&rq->lock);
600
629
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
603
if (!task_has_rt_policy(curr))
632
if (curr->sched_class != &rt_sched_class)
606
delta_exec = rq->clock - curr->se.exec_start;
635
delta_exec = rq->clock_task - curr->se.exec_start;
607
636
if (unlikely((s64)delta_exec < 0))
610
schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
639
schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));
612
641
curr->se.sum_exec_runtime += delta_exec;
613
642
account_group_exec_runtime(curr, delta_exec);
615
curr->se.exec_start = rq->clock;
644
curr->se.exec_start = rq->clock_task;
616
645
cpuacct_charge(curr, delta_exec);
618
647
sched_rt_avg_update(rq, delta_exec);
624
653
rt_rq = rt_rq_of_se(rt_se);
626
655
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
627
spin_lock(&rt_rq->rt_runtime_lock);
656
raw_spin_lock(&rt_rq->rt_runtime_lock);
628
657
rt_rq->rt_time += delta_exec;
629
658
if (sched_rt_runtime_exceeded(rt_rq))
630
659
resched_task(curr);
631
spin_unlock(&rt_rq->rt_runtime_lock);
660
raw_spin_unlock(&rt_rq->rt_runtime_lock);
803
832
dec_rt_group(rt_se, rt_rq);
806
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
835
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
808
837
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
809
838
struct rt_prio_array *array = &rt_rq->active;
819
848
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
822
list_add_tail(&rt_se->run_list, queue);
851
if (!rt_rq->rt_nr_running)
852
list_add_leaf_rt_rq(rt_rq);
855
list_add(&rt_se->run_list, queue);
857
list_add_tail(&rt_se->run_list, queue);
823
858
__set_bit(rt_se_prio(rt_se), array->bitmap);
825
860
inc_rt_tasks(rt_se, rt_rq);
835
870
__clear_bit(rt_se_prio(rt_se), array->bitmap);
837
872
dec_rt_tasks(rt_se, rt_rq);
873
if (!rt_rq->rt_nr_running)
874
list_del_leaf_rt_rq(rt_rq);
859
static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
896
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
861
898
dequeue_rt_stack(rt_se);
862
899
for_each_sched_rt_entity(rt_se)
863
__enqueue_rt_entity(rt_se);
900
__enqueue_rt_entity(rt_se, head);
866
903
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
871
908
struct rt_rq *rt_rq = group_rt_rq(rt_se);
873
910
if (rt_rq && rt_rq->rt_nr_running)
874
__enqueue_rt_entity(rt_se);
911
__enqueue_rt_entity(rt_se, false);
879
916
* Adding/removing a task to/from a priority array:
881
static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
919
enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
883
921
struct sched_rt_entity *rt_se = &p->rt;
923
if (flags & ENQUEUE_WAKEUP)
886
924
rt_se->timeout = 0;
888
enqueue_rt_entity(rt_se);
926
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
890
928
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
891
929
enqueue_pushable_task(rq, p);
894
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
932
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
896
934
struct sched_rt_entity *rt_se = &p->rt;
938
976
#ifdef CONFIG_SMP
939
977
static int find_lowest_rq(struct task_struct *task);
941
static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
980
select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
943
struct rq *rq = task_rq(p);
945
982
if (sd_flag != SD_BALANCE_WAKE)
946
983
return smp_processor_id();
951
988
* runqueue. Otherwise simply start this RT task
952
989
* on its current runqueue.
954
* We want to avoid overloading runqueues. Even if
955
* the RT task is of higher priority than the current RT task.
956
* RT tasks behave differently than other tasks. If
957
* one gets preempted, we try to push it off to another queue.
958
* So trying to keep a preempting RT task on the same
959
* cache hot CPU will force the running RT task to
960
* a cold CPU. So we waste all the cache for the lower
961
* RT task in hopes of saving some of a RT task
962
* that is just being woken and probably will have
991
* We want to avoid overloading runqueues. If the woken
992
* task is a higher priority, then it will stay on this CPU
993
* and the lower prio task should be moved to another CPU.
994
* Even though this will probably make the lower prio task
995
* lose its cache, we do not want to bounce a higher task
996
* around just because it gave up its CPU, perhaps for a
999
* For equal prio tasks, we just let the scheduler sort it out.
965
1001
if (unlikely(rt_task(rq->curr)) &&
1002
(rq->curr->rt.nr_cpus_allowed < 2 ||
1003
rq->curr->prio < p->prio) &&
966
1004
(p->rt.nr_cpus_allowed > 1)) {
967
1005
int cpu = find_lowest_rq(p);
1130
1168
for_each_leaf_rt_rq(rt_rq, rq) {
1131
1169
array = &rt_rq->active;
1132
1170
idx = sched_find_first_bit(array->bitmap);
1134
1172
if (idx >= MAX_RT_PRIO)
1136
1174
if (next && next->prio < idx)
1138
1176
list_for_each_entry(rt_se, array->queue + idx, run_list) {
1139
struct task_struct *p = rt_task_of(rt_se);
1177
struct task_struct *p;
1179
if (!rt_entity_is_task(rt_se))
1182
p = rt_task_of(rt_se);
1140
1183
if (pick_rt_task(rq, p, cpu)) {
1154
1197
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1156
static inline int pick_optimal_cpu(int this_cpu,
1157
const struct cpumask *mask)
1161
/* "this_cpu" is cheaper to preempt than a remote processor */
1162
if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
1165
first = cpumask_first(mask);
1166
if (first < nr_cpu_ids)
1172
1199
static int find_lowest_rq(struct task_struct *task)
1174
1201
struct sched_domain *sd;
1175
1202
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
1176
1203
int this_cpu = smp_processor_id();
1177
1204
int cpu = task_cpu(task);
1178
cpumask_var_t domain_mask;
1180
1206
if (task->rt.nr_cpus_allowed == 1)
1181
1207
return -1; /* No other targets possible */
1198
1224
* Otherwise, we consult the sched_domains span maps to figure
1199
1225
* out which cpu is logically closest to our hot cache data.
1201
if (this_cpu == cpu)
1202
this_cpu = -1; /* Skip this_cpu opt if the same */
1204
if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
1205
for_each_domain(cpu, sd) {
1206
if (sd->flags & SD_WAKE_AFFINE) {
1209
cpumask_and(domain_mask,
1210
sched_domain_span(sd),
1213
best_cpu = pick_optimal_cpu(this_cpu,
1216
if (best_cpu != -1) {
1217
free_cpumask_var(domain_mask);
1227
if (!cpumask_test_cpu(this_cpu, lowest_mask))
1228
this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1230
for_each_domain(cpu, sd) {
1231
if (sd->flags & SD_WAKE_AFFINE) {
1235
* "this_cpu" is cheaper to preempt than a
1238
if (this_cpu != -1 &&
1239
cpumask_test_cpu(this_cpu, sched_domain_span(sd)))
1242
best_cpu = cpumask_first_and(lowest_mask,
1243
sched_domain_span(sd));
1244
if (best_cpu < nr_cpu_ids)
1222
free_cpumask_var(domain_mask);
1227
1251
* just give the caller *something* to work with from the compatible
1230
return pick_optimal_cpu(this_cpu, lowest_mask);
1257
cpu = cpumask_any(lowest_mask);
1258
if (cpu < nr_cpu_ids)
1233
1263
/* Will lock the rq it finds */
1485
1515
* If we are not running and we are not going to reschedule soon, we should
1486
1516
* try to push tasks away now
1488
static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
1518
static void task_woken_rt(struct rq *rq, struct task_struct *p)
1490
1520
if (!task_running(rq, p) &&
1491
1521
!test_tsk_need_resched(rq->curr) &&
1492
1522
has_pushable_tasks(rq) &&
1493
p->rt.nr_cpus_allowed > 1)
1523
p->rt.nr_cpus_allowed > 1 &&
1524
rt_task(rq->curr) &&
1525
(rq->curr->rt.nr_cpus_allowed < 2 ||
1526
rq->curr->prio < p->prio))
1494
1527
push_rt_tasks(rq);
1497
static unsigned long
1498
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1499
unsigned long max_load_move,
1500
struct sched_domain *sd, enum cpu_idle_type idle,
1501
int *all_pinned, int *this_best_prio)
1503
/* don't touch RT tasks */
1508
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1509
struct sched_domain *sd, enum cpu_idle_type idle)
1511
/* don't touch RT tasks */
1515
1530
static void set_cpus_allowed_rt(struct task_struct *p,
1516
1531
const struct cpumask *new_mask)
1681
1696
unsigned long soft, hard;
1686
soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
1687
hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
1698
/* max may change after cur was read, this will be fixed next tick */
1699
soft = task_rlimit(p, RLIMIT_RTTIME);
1700
hard = task_rlimit_max(p, RLIMIT_RTTIME);
1689
1702
if (soft != RLIM_INFINITY) {
1690
1703
unsigned long next;
1729
1742
struct task_struct *p = rq->curr;
1731
p->se.exec_start = rq->clock;
1744
p->se.exec_start = rq->clock_task;
1733
1746
/* The running task is never eligible for pushing */
1734
1747
dequeue_pushable_task(rq, p);
1737
unsigned int get_rr_interval_rt(struct task_struct *task)
1750
static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
1740
1753
* Time slice is 0 for SCHED_FIFO tasks
1759
1772
#ifdef CONFIG_SMP
1760
1773
.select_task_rq = select_task_rq_rt,
1762
.load_balance = load_balance_rt,
1763
.move_one_task = move_one_task_rt,
1764
1775
.set_cpus_allowed = set_cpus_allowed_rt,
1765
1776
.rq_online = rq_online_rt,
1766
1777
.rq_offline = rq_offline_rt,
1767
1778
.pre_schedule = pre_schedule_rt,
1768
1779
.post_schedule = post_schedule_rt,
1769
.task_wake_up = task_wake_up_rt,
1780
.task_woken = task_woken_rt,
1770
1781
.switched_from = switched_from_rt,