656
883
static int ERTS_INLINE
657
884
ongoing_multi_scheduling_block(void)
659
return erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing) != 0;
886
return erts_smp_atomic32_read(&schdlr_sspnd.msb.ongoing) != 0;
662
889
static ERTS_INLINE void
663
890
empty_runq(ErtsRunQueue *rq)
665
long oifls = erts_smp_atomic_band(&rq->info_flags, ~ERTS_RUNQ_IFLG_NONEMPTY);
892
erts_aint32_t oifls = erts_smp_atomic32_band(&rq->info_flags,
893
~ERTS_RUNQ_IFLG_NONEMPTY);
666
894
if (oifls & ERTS_RUNQ_IFLG_NONEMPTY) {
668
long empty = erts_smp_atomic_read(&no_empty_run_queues);
669
ASSERT(0 <= empty && empty < erts_no_run_queues);
896
erts_aint32_t empty = erts_smp_atomic32_read(&no_empty_run_queues);
898
* For a short period of time no_empty_run_queues may have
899
* been increased twice for a specific run queue.
901
ASSERT(0 <= empty && empty < 2*erts_no_run_queues);
671
erts_smp_atomic_inc(&no_empty_run_queues);
903
erts_smp_atomic32_inc(&no_empty_run_queues);
675
907
static ERTS_INLINE void
676
908
non_empty_runq(ErtsRunQueue *rq)
678
long oifls = erts_smp_atomic_bor(&rq->info_flags, ERTS_RUNQ_IFLG_NONEMPTY);
910
erts_aint32_t oifls = erts_smp_atomic32_bor(&rq->info_flags,
911
ERTS_RUNQ_IFLG_NONEMPTY);
679
912
if (!(oifls & ERTS_RUNQ_IFLG_NONEMPTY)) {
681
long empty = erts_smp_atomic_read(&no_empty_run_queues);
682
ASSERT(0 < empty && empty <= erts_no_run_queues);
684
erts_smp_atomic_dec(&no_empty_run_queues);
688
static ERTS_INLINE int
689
sched_spin_wake(ErtsRunQueue *rq)
691
#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
695
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
697
val = erts_smp_atomic_read(&rq->spin_waiter);
700
erts_smp_atomic_inc(&rq->spin_wake);
707
static ERTS_INLINE int
708
sched_spin_wake_all(ErtsRunQueue *rq)
710
#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
714
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
716
val = erts_smp_atomic_read(&rq->spin_waiter);
719
erts_smp_atomic_add(&rq->spin_wake, val);
914
erts_aint32_t empty = erts_smp_atomic32_read(&no_empty_run_queues);
916
* For a short period of time no_empty_run_queues may have
917
* been increased twice for a specific run queue.
919
ASSERT(0 < empty && empty <= 2*erts_no_run_queues);
921
erts_smp_atomic32_dec(&no_empty_run_queues);
926
sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi)
929
erts_aint32_t nflgs = (ERTS_SSI_FLG_SLEEPING
930
| ERTS_SSI_FLG_WAITING);
931
erts_aint32_t xflgs = 0;
934
oflgs = erts_smp_atomic32_cmpxchg(&ssi->flags, nflgs, xflgs);
938
} while (!(oflgs & ERTS_SSI_FLG_SUSPENDED));
943
sched_prep_cont_spin_wait(ErtsSchedulerSleepInfo *ssi)
946
erts_aint32_t nflgs = (ERTS_SSI_FLG_SLEEPING
947
| ERTS_SSI_FLG_WAITING);
948
erts_aint32_t xflgs = ERTS_SSI_FLG_WAITING;
951
oflgs = erts_smp_atomic32_cmpxchg(&ssi->flags, nflgs, xflgs);
955
nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
956
} while (oflgs & ERTS_SSI_FLG_WAITING);
961
sched_spin_wait(ErtsSchedulerSleepInfo *ssi, int spincount)
963
int until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
968
flgs = erts_smp_atomic32_read(&ssi->flags);
969
if ((flgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
970
!= (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
974
if (--until_yield == 0) {
975
until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
983
sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type)
986
erts_aint32_t nflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING|sleep_type;
987
erts_aint32_t xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
989
if (sleep_type == ERTS_SSI_FLG_TSE_SLEEPING)
990
erts_tse_reset(ssi->event);
993
oflgs = erts_smp_atomic32_cmpxchg(&ssi->flags, nflgs, xflgs);
996
if ((oflgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
997
!= (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
1001
nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
1005
#define ERTS_SCHED_WAIT_WOKEN(FLGS) \
1006
(((FLGS) & (ERTS_SSI_FLG_WAITING|ERTS_SSI_FLG_SUSPENDED)) \
1007
!= ERTS_SSI_FLG_WAITING)
725
sched_sys_wait(Uint no, ErtsRunQueue *rq)
1010
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
728
#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
730
int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
1012
ErtsSchedulerSleepInfo *ssi = esdp->ssi;
1015
#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
1016
|| defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
1017
erts_aint32_t aux_work;
731
1020
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
735
sched_waiting_sys(no, rq);
737
#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
738
erts_smp_atomic_inc(&rq->spin_waiter);
739
erts_smp_runq_unlock(rq);
741
erl_sys_schedule(1); /* Might give us something to do */
743
dt = do_time_read_and_reset();
744
if (dt) bump_timer(dt);
746
while (spincount-- > 0) {
747
val = erts_smp_atomic_read(&rq->spin_wake);
750
erts_smp_runq_lock(rq);
751
val = erts_smp_atomic_read(&rq->spin_wake);
757
erts_smp_runq_unlock(rq);
1022
erts_smp_spin_lock(&rq->sleepers.lock);
1023
flgs = sched_prep_spin_wait(ssi);
1024
if (flgs & ERTS_SSI_FLG_SUSPENDED) {
1025
/* Go suspend instead... */
1026
erts_smp_spin_unlock(&rq->sleepers.lock);
1031
ssi->next = rq->sleepers.list;
1032
if (rq->sleepers.list)
1033
rq->sleepers.list->prev = ssi;
1034
rq->sleepers.list = ssi;
1035
erts_smp_spin_unlock(&rq->sleepers.lock);
1038
* If all schedulers are waiting, one of them *should*
1039
* be waiting in erl_sys_schedule()
1042
if (!prepare_for_sys_schedule()) {
1044
sched_waiting(esdp->no, rq);
1046
erts_smp_runq_unlock(rq);
1048
spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
1052
#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
1053
aux_work = erts_smp_atomic32_read(&ssi->aux_work);
1054
tse_blockable_aux_work:
1055
aux_work = blockable_aux_work(esdp, ssi, aux_work);
1057
erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
1061
#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
1062
#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
1063
aux_work = erts_smp_atomic32_read(&ssi->aux_work);
1065
nonblockable_aux_work(esdp, ssi, aux_work);
1068
flgs = sched_spin_wait(ssi, spincount);
1069
if (flgs & ERTS_SSI_FLG_SLEEPING) {
1070
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
1071
flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
1072
if (flgs & ERTS_SSI_FLG_SLEEPING) {
1074
ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
1075
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
1077
res = erts_tse_wait(ssi->event);
1078
} while (res == EINTR);
1082
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
1083
ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
1087
flgs = sched_prep_cont_spin_wait(ssi);
1088
spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
1090
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
1091
ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
1095
#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
1096
aux_work = erts_smp_atomic32_read(&ssi->aux_work);
1097
if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
1098
erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
1099
goto tse_blockable_aux_work;
761
erts_smp_runq_lock(rq);
762
val = erts_smp_atomic_read(&rq->spin_wake);
766
erts_smp_atomic_dec(&rq->spin_wake);
767
ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
768
erts_smp_atomic_dec(&rq->spin_waiter);
769
ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
1105
erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
1107
if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
1108
erts_smp_atomic32_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
1110
erts_smp_runq_lock(rq);
1111
sched_active(esdp->no, rq);
773
erts_smp_atomic_dec(&rq->spin_waiter);
774
ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
1117
erts_smp_atomic32_set(&function_calls, 0);
1120
sched_waiting_sys(esdp->no, rq);
1122
erts_smp_runq_unlock(rq);
1124
spincount = ERTS_SCHED_SYS_SLEEP_SPINCOUNT;
1126
while (spincount-- > 0) {
1130
ASSERT(!erts_port_task_have_outstanding_io_tasks());
1132
erl_sys_schedule(1); /* Might give us something to do */
1134
dt = erts_do_time_read_and_reset();
1135
if (dt) erts_bump_timer(dt);
1139
#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
1140
aux_work = erts_smp_atomic32_read(&ssi->aux_work);
1141
aux_work = blockable_aux_work(esdp, ssi, aux_work);
1143
#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
1144
#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
1145
aux_work = erts_smp_atomic32_read(&ssi->aux_work);
1147
nonblockable_aux_work(esdp, ssi, aux_work);
1150
flgs = erts_smp_atomic32_read(&ssi->flags);
1151
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
1152
ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
1155
if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
1156
flgs = sched_prep_cont_spin_wait(ssi);
1157
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
1158
ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
1164
* If we got new I/O tasks we aren't allowed to
1165
* call erl_sys_schedule() until it is handled.
1167
if (erts_port_task_have_outstanding_io_tasks()) {
1168
clear_sys_scheduling();
1170
* Got to check that we still got I/O tasks; otherwise
1171
* we have to continue checking for I/O...
1173
if (!prepare_for_sys_schedule()) {
1174
spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
1180
erts_smp_runq_lock(rq);
776
1183
* If we got new I/O tasks we aren't allowed to
777
1184
* sleep in erl_sys_schedule().
779
if (!erts_port_task_have_outstanding_io_tasks()) {
1186
if (erts_port_task_have_outstanding_io_tasks()) {
1187
clear_sys_scheduling();
1190
* Got to check that we still got I/O tasks; otherwise
1191
* we have to wait in erl_sys_schedule() after all...
1193
if (prepare_for_sys_schedule())
1194
goto do_sys_schedule;
1197
* Not allowed to wait in erl_sys_schedule;
1198
* do tse wait instead...
1200
sched_change_waiting_sys_to_waiting(esdp->no, rq);
1201
erts_smp_runq_unlock(rq);
782
1207
erts_sys_schedule_interrupt(0);
1208
flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
1209
if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
1210
if (!(flgs & ERTS_SSI_FLG_WAITING))
1211
goto sys_locked_woken;
1212
erts_smp_runq_unlock(rq);
1213
flgs = sched_prep_cont_spin_wait(ssi);
1214
if (!(flgs & ERTS_SSI_FLG_WAITING)) {
1215
ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
1218
ASSERT(!erts_port_task_have_outstanding_io_tasks());
1219
goto sys_poll_aux_work;
1222
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
1223
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
783
1225
erts_smp_runq_unlock(rq);
1227
ASSERT(!erts_port_task_have_outstanding_io_tasks());
785
1229
erl_sys_schedule(0);
787
dt = do_time_read_and_reset();
788
if (dt) bump_timer(dt);
1231
dt = erts_do_time_read_and_reset();
1232
if (dt) erts_bump_timer(dt);
1234
flgs = sched_prep_cont_spin_wait(ssi);
1235
if (flgs & ERTS_SSI_FLG_WAITING)
790
1239
erts_smp_runq_lock(rq);
792
#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
1241
clear_sys_scheduling();
1242
if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
1243
erts_smp_atomic32_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
1244
sched_active_sys(esdp->no, rq);
797
sched_active_sys(no, rq);
1248
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
1251
static ERTS_INLINE erts_aint32_t
1252
ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi)
1254
/* reset all flags but suspended */
1255
erts_aint32_t oflgs;
1256
erts_aint32_t nflgs = 0;
1257
erts_aint32_t xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
1259
oflgs = erts_smp_atomic32_cmpxchg(&ssi->flags, nflgs, xflgs);
1262
nflgs = oflgs & ERTS_SSI_FLG_SUSPENDED;
801
sched_cnd_wait(Uint no, ErtsRunQueue *rq)
1268
wake_scheduler(ErtsRunQueue *rq, int incq, int one)
803
#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
805
int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
806
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
809
sched_waiting(no, rq);
810
erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
815
#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
816
erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
818
erts_smp_atomic_inc(&rq->spin_waiter);
819
erts_smp_mtx_unlock(&rq->mtx);
821
while (spincount-- > 0) {
822
val = erts_smp_atomic_read(&rq->spin_wake);
825
erts_smp_mtx_lock(&rq->mtx);
826
val = erts_smp_atomic_read(&rq->spin_wake);
832
erts_smp_mtx_unlock(&rq->mtx);
1271
ErtsSchedulerSleepInfo *ssi;
1272
ErtsSchedulerSleepList *sl;
1275
* The unlocked run queue is not strictly necessary
1276
* from a thread safety or deadlock prevention
1277
* perspective. It will, however, cost us performance
1278
* if it is locked during wakup of another scheduler,
1279
* so all code *should* handle this without having
1280
* the lock on the run queue.
1282
ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq));
1286
erts_smp_spin_lock(&sl->lock);
1289
erts_smp_spin_unlock(&sl->lock);
1293
ssi->prev->next = ssi->next;
1295
ASSERT(sl->list == ssi);
1296
sl->list = ssi->next;
836
erts_smp_mtx_lock(&rq->mtx);
837
val = erts_smp_atomic_read(&rq->spin_wake);
841
erts_smp_atomic_dec(&rq->spin_waiter);
842
ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
843
erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
1299
ssi->next->prev = ssi->prev;
1301
res = sl->list != NULL;
1302
erts_smp_spin_unlock(&sl->lock);
1304
ERTS_THR_MEMORY_BARRIER;
1305
flgs = ssi_flags_set_wake(ssi);
1306
erts_sched_finish_poke(ssi, flgs);
1308
if (incq && !erts_common_run_queue && (flgs & ERTS_SSI_FLG_WAITING))
847
erts_smp_atomic_dec(&rq->spin_wake);
848
ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
849
erts_smp_atomic_dec(&rq->spin_waiter);
850
ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
854
erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
859
sched_active(no, rq);
863
wake_one_scheduler(void)
865
ASSERT(erts_common_run_queue);
866
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(erts_common_run_queue));
867
if (erts_common_run_queue->waiting) {
868
if (!sched_spin_wake(erts_common_run_queue)) {
869
if (erts_common_run_queue->waiting == -1) /* One scheduler waiting
872
erts_sys_schedule_interrupt(1);
874
erts_smp_cnd_signal(&erts_common_run_queue->cnd);
880
wake_scheduler(ErtsRunQueue *rq, int incq)
882
ASSERT(!erts_common_run_queue);
883
ASSERT(-1 <= rq->waiting && rq->waiting <= 1);
884
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
885
if (rq->waiting && !rq->woken) {
886
if (!sched_spin_wake(rq)) {
888
erts_sys_schedule_interrupt(1);
890
erts_smp_cnd_signal(&rq->cnd);
1313
erts_smp_spin_unlock(&sl->lock);
1315
ERTS_THR_MEMORY_BARRIER;
1317
ErtsSchedulerSleepInfo *wake_ssi = ssi;
1319
erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
899
1325
wake_all_schedulers(void)
901
if (erts_common_run_queue) {
902
erts_smp_runq_lock(erts_common_run_queue);
903
if (erts_common_run_queue->waiting) {
904
if (erts_common_run_queue->waiting < 0)
905
erts_sys_schedule_interrupt(1);
906
sched_spin_wake_all(erts_common_run_queue);
907
erts_smp_cnd_broadcast(&erts_common_run_queue->cnd);
909
erts_smp_runq_unlock(erts_common_run_queue);
1327
if (erts_common_run_queue)
1328
wake_scheduler(erts_common_run_queue, 0, 0);
913
1331
for (ix = 0; ix < erts_no_run_queues; ix++) {
914
1332
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
915
erts_smp_runq_lock(rq);
916
wake_scheduler(rq, 0);
917
erts_smp_runq_unlock(rq);
1333
wake_scheduler(rq, 0, 1);
3111
3786
#endif /* ERTS_SMP */
3114
int_cmp(const void *vx, const void *vy)
3116
return *((int *) vx) - *((int *) vy);
3120
cpu_spread_order_cmp(const void *vx, const void *vy)
3122
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3123
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3125
if (x->thread != y->thread)
3126
return x->thread - y->thread;
3127
if (x->core != y->core)
3128
return x->core - y->core;
3129
if (x->processor_node != y->processor_node)
3130
return x->processor_node - y->processor_node;
3131
if (x->processor != y->processor)
3132
return x->processor - y->processor;
3133
if (x->node != y->node)
3134
return x->node - y->node;
3139
cpu_processor_spread_order_cmp(const void *vx, const void *vy)
3141
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3142
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3144
if (x->thread != y->thread)
3145
return x->thread - y->thread;
3146
if (x->processor_node != y->processor_node)
3147
return x->processor_node - y->processor_node;
3148
if (x->core != y->core)
3149
return x->core - y->core;
3150
if (x->node != y->node)
3151
return x->node - y->node;
3152
if (x->processor != y->processor)
3153
return x->processor - y->processor;
3158
cpu_thread_spread_order_cmp(const void *vx, const void *vy)
3160
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3161
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3163
if (x->thread != y->thread)
3164
return x->thread - y->thread;
3165
if (x->node != y->node)
3166
return x->node - y->node;
3167
if (x->processor != y->processor)
3168
return x->processor - y->processor;
3169
if (x->processor_node != y->processor_node)
3170
return x->processor_node - y->processor_node;
3171
if (x->core != y->core)
3172
return x->core - y->core;
3177
cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
3179
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3180
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3182
if (x->thread != y->thread)
3183
return x->thread - y->thread;
3184
if (x->node != y->node)
3185
return x->node - y->node;
3186
if (x->core != y->core)
3187
return x->core - y->core;
3188
if (x->processor != y->processor)
3189
return x->processor - y->processor;
3194
cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
3196
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3197
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3199
if (x->node != y->node)
3200
return x->node - y->node;
3201
if (x->thread != y->thread)
3202
return x->thread - y->thread;
3203
if (x->core != y->core)
3204
return x->core - y->core;
3205
if (x->processor != y->processor)
3206
return x->processor - y->processor;
3211
cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
3213
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3214
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3216
if (x->node != y->node)
3217
return x->node - y->node;
3218
if (x->thread != y->thread)
3219
return x->thread - y->thread;
3220
if (x->processor != y->processor)
3221
return x->processor - y->processor;
3222
if (x->core != y->core)
3223
return x->core - y->core;
3228
cpu_no_spread_order_cmp(const void *vx, const void *vy)
3230
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3231
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3233
if (x->node != y->node)
3234
return x->node - y->node;
3235
if (x->processor != y->processor)
3236
return x->processor - y->processor;
3237
if (x->processor_node != y->processor_node)
3238
return x->processor_node - y->processor_node;
3239
if (x->core != y->core)
3240
return x->core - y->core;
3241
if (x->thread != y->thread)
3242
return x->thread - y->thread;
3246
static ERTS_INLINE void
3247
make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
3252
int processor_node = -1;
3253
int processor_node_node = -1;
3257
int old_processor = -1;
3258
int old_processor_node = -1;
3260
int old_thread = -1;
3262
for (ix = 0; ix < size; ix++) {
3263
if (!no_node || cpudata[ix].node >= 0) {
3264
if (old_node == cpudata[ix].node)
3265
cpudata[ix].node = node;
3267
old_node = cpudata[ix].node;
3268
old_processor = processor = -1;
3270
old_processor_node = processor_node = -1;
3271
old_core = core = -1;
3272
old_thread = thread = -1;
3273
if (no_node || cpudata[ix].node >= 0)
3274
cpudata[ix].node = ++node;
3277
if (old_processor == cpudata[ix].processor)
3278
cpudata[ix].processor = processor;
3280
old_processor = cpudata[ix].processor;
3282
processor_node_node = old_processor_node = processor_node = -1;
3283
old_core = core = -1;
3284
old_thread = thread = -1;
3285
cpudata[ix].processor = ++processor;
3287
if (no_node && cpudata[ix].processor_node < 0)
3288
old_processor_node = -1;
3290
if (old_processor_node == cpudata[ix].processor_node) {
3292
cpudata[ix].node = cpudata[ix].processor_node = node;
3294
if (processor_node_node >= 0)
3295
cpudata[ix].node = processor_node_node;
3296
cpudata[ix].processor_node = processor_node;
3300
old_processor_node = cpudata[ix].processor_node;
3301
old_core = core = -1;
3302
old_thread = thread = -1;
3304
cpudata[ix].node = cpudata[ix].processor_node = ++node;
3306
cpudata[ix].node = processor_node_node = ++node;
3307
cpudata[ix].processor_node = ++processor_node;
3311
if (!no_node && cpudata[ix].processor_node < 0)
3312
cpudata[ix].processor_node = 0;
3313
if (old_core == cpudata[ix].core)
3314
cpudata[ix].core = core;
3316
old_core = cpudata[ix].core;
3317
old_thread = thread = -1;
3318
cpudata[ix].core = ++core;
3320
if (old_thread == cpudata[ix].thread)
3321
cpudata[ix].thread = thread;
3323
old_thread = cpudata[ix].thread = ++thread;
3328
cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
3330
ErtsCpuBindOrder bind_order,
3335
int (*cmp_func)(const void *, const void *);
3336
switch (bind_order) {
3337
case ERTS_CPU_BIND_SPREAD:
3338
cmp_func = cpu_spread_order_cmp;
3340
case ERTS_CPU_BIND_PROCESSOR_SPREAD:
3341
cmp_func = cpu_processor_spread_order_cmp;
3343
case ERTS_CPU_BIND_THREAD_SPREAD:
3344
cmp_func = cpu_thread_spread_order_cmp;
3346
case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
3348
cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
3350
case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
3352
cmp_func = cpu_no_node_processor_spread_order_cmp;
3354
case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
3356
cmp_func = cpu_no_node_thread_spread_order_cmp;
3358
case ERTS_CPU_BIND_NO_SPREAD:
3359
cmp_func = cpu_no_spread_order_cmp;
3363
erl_exit(ERTS_ABORT_EXIT,
3364
"Bad cpu bind type: %d\n",
3365
(int) cpu_bind_order);
3370
make_cpudata_id_seq(cpudata, size, no_node);
3372
qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
3377
processor_order_cmp(const void *vx, const void *vy)
3379
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
3380
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
3382
if (x->processor != y->processor)
3383
return x->processor - y->processor;
3384
if (x->node != y->node)
3385
return x->node - y->node;
3386
if (x->processor_node != y->processor_node)
3387
return x->processor_node - y->processor_node;
3388
if (x->core != y->core)
3389
return x->core - y->core;
3390
if (x->thread != y->thread)
3391
return x->thread - y->thread;
3396
check_cpu_bind(ErtsSchedulerData *esdp)
3400
erts_smp_runq_unlock(esdp->run_queue);
3401
erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
3402
cpu_id = scheduler2cpu_map[esdp->no].bind_id;
3403
if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
3404
res = erts_bind_to_cpu(erts_cpuinfo, cpu_id);
3406
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
3408
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
3409
erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
3410
(int) esdp->no, cpu_id, erl_errno_id(-res));
3411
erts_send_error_to_logger_nogl(dsbufp);
3412
if (scheduler2cpu_map[esdp->no].bound_id >= 0)
3416
else if (cpu_id < 0 && scheduler2cpu_map[esdp->no].bound_id >= 0) {
3418
/* Get rid of old binding */
3419
res = erts_unbind_from_cpu(erts_cpuinfo);
3421
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
3423
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
3424
erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
3425
(int) esdp->no, cpu_id, erl_errno_id(-res));
3426
erts_send_error_to_logger_nogl(dsbufp);
3429
erts_smp_runq_lock(esdp->run_queue);
3431
if (erts_common_run_queue)
3432
erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
3434
esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
3437
erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
3442
signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
3447
if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
3449
cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
3451
for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
3452
if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical))
3453
scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
3456
if (s_ix <= erts_no_schedulers)
3457
for (; s_ix <= erts_no_schedulers; s_ix++)
3458
scheduler2cpu_map[s_ix].bind_id = -1;
3461
if (erts_common_run_queue) {
3462
for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++)
3463
erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1);
3464
wake_all_schedulers();
3467
ERTS_FOREACH_RUNQ(rq,
3469
rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
3470
wake_scheduler(rq, 0);
3474
check_cpu_bind(erts_get_scheduler_data());
3479
erts_init_scheduler_bind_type(char *how)
3481
if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP)
3482
return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
3484
if (!system_cpudata && !user_cpudata)
3485
return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
3487
if (sys_strcmp(how, "s") == 0)
3488
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
3489
else if (sys_strcmp(how, "ps") == 0)
3490
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
3491
else if (sys_strcmp(how, "ts") == 0)
3492
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
3493
else if (sys_strcmp(how, "db") == 0
3494
|| sys_strcmp(how, "tnnps") == 0)
3495
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
3496
else if (sys_strcmp(how, "nnps") == 0)
3497
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
3498
else if (sys_strcmp(how, "nnts") == 0)
3499
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
3500
else if (sys_strcmp(how, "ns") == 0)
3501
cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
3502
else if (sys_strcmp(how, "u") == 0)
3503
cpu_bind_order = ERTS_CPU_BIND_NONE;
3505
return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
3507
return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
3517
ErtsCpuTopIdSeq logical;
3518
ErtsCpuTopIdSeq thread;
3519
ErtsCpuTopIdSeq core;
3520
ErtsCpuTopIdSeq processor_node;
3521
ErtsCpuTopIdSeq processor;
3522
ErtsCpuTopIdSeq node;
3526
init_cpu_top_entry(ErtsCpuTopEntry *cte)
3529
cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3531
cte->logical.size = size;
3532
cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3534
cte->thread.size = size;
3535
cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3537
cte->core.size = size;
3538
cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3540
cte->processor_node.size = size;
3541
cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3543
cte->processor.size = size;
3544
cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
3546
cte->node.size = size;
3550
destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
3552
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
3553
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
3554
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
3555
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
3556
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
3557
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
3561
get_cput_value_or_range(int *v, int *vr, char **str)
3566
if (!isdigit((unsigned char)*c))
3567
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
3568
l = strtol(c, &c, 10);
3569
if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
3570
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
3574
if (!isdigit((unsigned char)*c))
3575
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3576
l = strtol(c, &c, 10);
3577
if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
3578
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3582
return ERTS_INIT_CPU_TOPOLOGY_OK;
3586
get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
3597
res = get_cput_value_or_range(&val, &val_range, &c);
3598
if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
3600
if (val_range < 0 || val_range == val)
3603
if (val_range > val)
3604
nids = val_range - val + 1;
3606
nids = val - val_range + 1;
3609
if (need_size > idseq->size) {
3610
idseq->size = need_size + 10;
3611
idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
3613
sizeof(int)*idseq->size);
3616
idseq->id[ix++] = val;
3617
else if (val_range > val) {
3618
for (; val <= val_range; val++)
3619
idseq->id[ix++] = val;
3622
for (; val >= val_range; val--)
3623
idseq->id[ix++] = val;
3631
return ERTS_INIT_CPU_TOPOLOGY_OK;
3635
get_cput_entry(ErtsCpuTopEntry *cput, char **str)
3640
cput->logical.used = 0;
3641
cput->thread.id[0] = 0;
3642
cput->thread.used = 1;
3643
cput->core.id[0] = 0;
3644
cput->core.used = 1;
3645
cput->processor_node.id[0] = -1;
3646
cput->processor_node.used = 1;
3647
cput->processor.id[0] = 0;
3648
cput->processor.used = 1;
3649
cput->node.id[0] = -1;
3650
cput->node.used = 1;
3652
h = ERTS_TOPOLOGY_MAX_DEPTH;
3653
while (*c != ':' && *c != '\0') {
3655
ErtsCpuTopIdSeq *idseqp;
3658
if (h <= ERTS_TOPOLOGY_LOGICAL)
3659
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3660
idseqp = &cput->logical;
3661
h = ERTS_TOPOLOGY_LOGICAL;
3665
if (h <= ERTS_TOPOLOGY_THREAD)
3666
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3667
idseqp = &cput->thread;
3668
h = ERTS_TOPOLOGY_THREAD;
3672
if (h <= ERTS_TOPOLOGY_CORE)
3673
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3674
idseqp = &cput->core;
3675
h = ERTS_TOPOLOGY_CORE;
3679
if (h <= ERTS_TOPOLOGY_PROCESSOR)
3680
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3681
idseqp = &cput->processor;
3682
h = ERTS_TOPOLOGY_PROCESSOR;
3686
if (h <= ERTS_TOPOLOGY_PROCESSOR) {
3688
if (h <= ERTS_TOPOLOGY_NODE)
3689
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3690
idseqp = &cput->node;
3691
h = ERTS_TOPOLOGY_NODE;
3696
while (*p_chk != '\0' && *p_chk != ':') {
3697
if (*p_chk == 'p' || *p_chk == 'P') {
3705
if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
3706
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
3707
idseqp = &cput->processor_node;
3708
h = ERTS_TOPOLOGY_PROCESSOR_NODE;
3712
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
3714
res = get_cput_id_seq(idseqp, &c);
3715
if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
3719
if (cput->logical.used < 1)
3720
return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
3726
if (cput->thread.used != 1
3727
&& cput->thread.used != cput->logical.used)
3728
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3729
if (cput->core.used != 1
3730
&& cput->core.used != cput->logical.used)
3731
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3732
if (cput->processor_node.used != 1
3733
&& cput->processor_node.used != cput->logical.used)
3734
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3735
if (cput->processor.used != 1
3736
&& cput->processor.used != cput->logical.used)
3737
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3738
if (cput->node.used != 1
3739
&& cput->node.used != cput->logical.used)
3740
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
3743
return ERTS_INIT_CPU_TOPOLOGY_OK;
3747
verify_topology(erts_cpu_topology_t *cpudata, int size)
3751
int node, processor, no_nodes, i;
3753
/* Verify logical ids */
3754
logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
3756
for (i = 0; i < user_cpudata_size; i++)
3757
logical[i] = user_cpudata[i].logical;
3759
qsort(logical, user_cpudata_size, sizeof(int), int_cmp);
3760
for (i = 0; i < user_cpudata_size-1; i++) {
3761
if (logical[i] == logical[i+1]) {
3762
erts_free(ERTS_ALC_T_TMP, logical);
3763
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
3767
erts_free(ERTS_ALC_T_TMP, logical);
3769
qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
3771
/* Verify unique entities */
3773
for (i = 1; i < user_cpudata_size; i++) {
3774
if (user_cpudata[i-1].processor == user_cpudata[i].processor
3775
&& user_cpudata[i-1].node == user_cpudata[i].node
3776
&& (user_cpudata[i-1].processor_node
3777
== user_cpudata[i].processor_node)
3778
&& user_cpudata[i-1].core == user_cpudata[i].core
3779
&& user_cpudata[i-1].thread == user_cpudata[i].thread) {
3780
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
3784
/* Verify numa nodes */
3785
node = cpudata[0].node;
3786
processor = cpudata[0].processor;
3787
no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
3788
for (i = 1; i < size; i++) {
3790
if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
3791
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
3794
if (cpudata[i].processor == processor && cpudata[i].node != node)
3795
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
3796
node = cpudata[i].node;
3797
processor = cpudata[i].processor;
3798
if (node >= 0 && cpudata[i].processor_node >= 0)
3799
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
3800
if (node < 0 && cpudata[i].processor_node < 0)
3801
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
3806
return ERTS_INIT_CPU_TOPOLOGY_OK;
3810
erts_init_cpu_topology(char *topology_str)
3812
ErtsCpuTopEntry cput;
3816
int error = ERTS_INIT_CPU_TOPOLOGY_OK;
3819
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
3820
user_cpudata_size = 10;
3822
user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
3823
(sizeof(erts_cpu_topology_t)
3824
* user_cpudata_size));
3826
init_cpu_top_entry(&cput);
3833
error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
3838
error = get_cput_entry(&cput, &c);
3839
if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
3841
need_size += cput.logical.used;
3842
if (user_cpudata_size < need_size) {
3843
user_cpudata_size = need_size + 10;
3844
user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
3846
(sizeof(erts_cpu_topology_t)
3847
* user_cpudata_size));
3850
ASSERT(cput.thread.used == 1
3851
|| cput.thread.used == cput.logical.used);
3852
ASSERT(cput.core.used == 1
3853
|| cput.core.used == cput.logical.used);
3854
ASSERT(cput.processor_node.used == 1
3855
|| cput.processor_node.used == cput.logical.used);
3856
ASSERT(cput.processor.used == 1
3857
|| cput.processor.used == cput.logical.used);
3858
ASSERT(cput.node.used == 1
3859
|| cput.node.used == cput.logical.used);
3861
for (r = 0; r < cput.logical.used; r++) {
3862
user_cpudata[ix].logical = cput.logical.id[r];
3863
user_cpudata[ix].thread =
3864
cput.thread.id[cput.thread.used == 1 ? 0 : r];
3865
user_cpudata[ix].core =
3866
cput.core.id[cput.core.used == 1 ? 0 : r];
3867
user_cpudata[ix].processor_node =
3868
cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
3869
user_cpudata[ix].processor =
3870
cput.processor.id[cput.processor.used == 1 ? 0 : r];
3871
user_cpudata[ix].node =
3872
cput.node.id[cput.node.used == 1 ? 0 : r];
3875
} while (*c != '\0');
3877
if (user_cpudata_size != ix) {
3878
user_cpudata_size = ix;
3879
user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
3881
(sizeof(erts_cpu_topology_t)
3882
* user_cpudata_size));
3885
error = verify_topology(user_cpudata, user_cpudata_size);
3886
if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
3887
destroy_cpu_top_entry(&cput);
3888
return ERTS_INIT_CPU_TOPOLOGY_OK;
3893
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
3894
user_cpudata_size = 0;
3895
destroy_cpu_top_entry(&cput);
3899
#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
3900
#define ERTS_GET_USED_CPU_TOPOLOGY 0
3901
#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
3902
#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
3904
static Eterm get_cpu_topology_term(Process *c_p, int type);
3907
erts_set_cpu_topology(Process *c_p, Eterm term)
3909
erts_cpu_topology_t *cpudata = NULL;
3910
int cpudata_size = 0;
3913
erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
3914
res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
3915
if (term == am_undefined) {
3917
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
3918
user_cpudata = NULL;
3919
user_cpudata_size = 0;
3921
if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
3922
cpudata_size = system_cpudata_size;
3923
cpudata = erts_alloc(ERTS_ALC_T_TMP,
3924
(sizeof(erts_cpu_topology_t)
3927
sys_memcpy((void *) cpudata,
3928
(void *) system_cpudata,
3929
sizeof(erts_cpu_topology_t)*cpudata_size);
3932
else if (is_not_list(term)) {
3934
res = THE_NON_VALUE;
3942
cpudata = erts_alloc(ERTS_ALC_T_TMP,
3943
(sizeof(erts_cpu_topology_t)
3946
while (is_list(list)) {
3947
Eterm *lp = list_val(list);
3948
Eterm cpu = CAR(lp);
3952
if (is_not_tuple(cpu))
3955
tp = tuple_val(cpu);
3957
if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
3960
if (ix >= cpudata_size) {
3961
cpudata_size += 100;
3962
cpudata = erts_realloc(ERTS_ALC_T_TMP,
3964
(sizeof(erts_cpu_topology_t)
3968
id = signed_val(tp[2]);
3969
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3971
cpudata[ix].node = (int) id;
3973
id = signed_val(tp[3]);
3974
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3976
cpudata[ix].processor = (int) id;
3978
id = signed_val(tp[4]);
3979
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3981
cpudata[ix].processor_node = (int) id;
3983
id = signed_val(tp[5]);
3984
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3986
cpudata[ix].core = (int) id;
3988
id = signed_val(tp[6]);
3989
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3991
cpudata[ix].thread = (int) id;
3993
id = signed_val(tp[7]);
3994
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
3996
cpudata[ix].logical = (int) id;
4002
if (is_not_nil(list))
4007
if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
4010
if (user_cpudata_size != cpudata_size) {
4012
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
4013
user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
4014
sizeof(erts_cpu_topology_t)*cpudata_size);
4015
user_cpudata_size = cpudata_size;
4018
sys_memcpy((void *) user_cpudata,
4020
sizeof(erts_cpu_topology_t)*cpudata_size);
4023
signal_schedulers_bind_change(cpudata, cpudata_size);
4026
erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
4029
erts_free(ERTS_ALC_T_TMP, cpudata);
4035
bound_schedulers_term(ErtsCpuBindOrder order)
4038
case ERTS_CPU_BIND_SPREAD: {
4039
ERTS_DECL_AM(spread);
4042
case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
4043
ERTS_DECL_AM(processor_spread);
4044
return AM_processor_spread;
4046
case ERTS_CPU_BIND_THREAD_SPREAD: {
4047
ERTS_DECL_AM(thread_spread);
4048
return AM_thread_spread;
4050
case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
4051
ERTS_DECL_AM(thread_no_node_processor_spread);
4052
return AM_thread_no_node_processor_spread;
4054
case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
4055
ERTS_DECL_AM(no_node_processor_spread);
4056
return AM_no_node_processor_spread;
4058
case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
4059
ERTS_DECL_AM(no_node_thread_spread);
4060
return AM_no_node_thread_spread;
4062
case ERTS_CPU_BIND_NO_SPREAD: {
4063
ERTS_DECL_AM(no_spread);
4064
return AM_no_spread;
4066
case ERTS_CPU_BIND_NONE: {
4067
ERTS_DECL_AM(unbound);
4072
return THE_NON_VALUE;
4077
erts_bound_schedulers_term(Process *c_p)
4079
ErtsCpuBindOrder order;
4080
erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
4081
order = cpu_bind_order;
4082
erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
4083
return bound_schedulers_term(order);
4087
create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
4090
*cpudata_size = user_cpudata_size;
4091
*cpudata = erts_alloc(ERTS_ALC_T_TMP,
4092
(sizeof(erts_cpu_topology_t)
4093
* (*cpudata_size)));
4094
sys_memcpy((void *) *cpudata,
4095
(void *) user_cpudata,
4096
sizeof(erts_cpu_topology_t)*(*cpudata_size));
4098
else if (system_cpudata) {
4099
*cpudata_size = system_cpudata_size;
4100
*cpudata = erts_alloc(ERTS_ALC_T_TMP,
4101
(sizeof(erts_cpu_topology_t)
4102
* (*cpudata_size)));
4103
sys_memcpy((void *) *cpudata,
4104
(void *) system_cpudata,
4105
sizeof(erts_cpu_topology_t)*(*cpudata_size));
4114
destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
4117
erts_free(ERTS_ALC_T_TMP, cpudata);
4121
erts_bind_schedulers(Process *c_p, Eterm how)
4124
erts_cpu_topology_t *cpudata;
4126
ErtsCpuBindOrder old_cpu_bind_order;
4128
erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
4130
if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) {
4131
ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
4135
old_cpu_bind_order = cpu_bind_order;
4137
if (ERTS_IS_ATOM_STR("spread", how))
4138
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
4139
else if (ERTS_IS_ATOM_STR("processor_spread", how))
4140
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
4141
else if (ERTS_IS_ATOM_STR("thread_spread", how))
4142
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
4143
else if (ERTS_IS_ATOM_STR("default_bind", how)
4144
|| ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
4145
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
4146
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
4147
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
4148
else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
4149
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
4150
else if (ERTS_IS_ATOM_STR("no_spread", how))
4151
cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
4152
else if (ERTS_IS_ATOM_STR("unbound", how))
4153
cpu_bind_order = ERTS_CPU_BIND_NONE;
4155
cpu_bind_order = old_cpu_bind_order;
4156
ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
4160
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
4163
cpu_bind_order = old_cpu_bind_order;
4164
ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
4168
signal_schedulers_bind_change(cpudata, cpudata_size);
4170
destroy_tmp_cpu_topology_copy(cpudata);
4172
res = bound_schedulers_term(old_cpu_bind_order);
4177
erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
4183
erts_fake_scheduler_bindings(Process *p, Eterm how)
4185
ErtsCpuBindOrder fake_cpu_bind_order;
4186
erts_cpu_topology_t *cpudata;
4190
if (ERTS_IS_ATOM_STR("spread", how))
4191
fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
4192
else if (ERTS_IS_ATOM_STR("processor_spread", how))
4193
fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
4194
else if (ERTS_IS_ATOM_STR("thread_spread", how))
4195
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
4196
else if (ERTS_IS_ATOM_STR("default_bind", how)
4197
|| ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
4198
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
4199
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
4200
fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
4201
else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
4202
fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
4203
else if (ERTS_IS_ATOM_STR("no_spread", how))
4204
fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
4205
else if (ERTS_IS_ATOM_STR("unbound", how))
4206
fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
4208
ERTS_BIF_PREP_ERROR(res, p, BADARG);
4212
erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
4213
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
4214
erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
4216
if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
4217
ERTS_BIF_PREP_RET(res, am_false);
4222
cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
4224
#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
4226
erts_fprintf(stderr, "node: ");
4227
for (i = 0; i < cpudata_size; i++)
4228
erts_fprintf(stderr, " %2d", cpudata[i].node);
4229
erts_fprintf(stderr, "\n");
4230
erts_fprintf(stderr, "processor: ");
4231
for (i = 0; i < cpudata_size; i++)
4232
erts_fprintf(stderr, " %2d", cpudata[i].processor);
4233
erts_fprintf(stderr, "\n");
4234
if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
4235
&& fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
4236
&& fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
4237
erts_fprintf(stderr, "processor_node:");
4238
for (i = 0; i < cpudata_size; i++)
4239
erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
4240
erts_fprintf(stderr, "\n");
4242
erts_fprintf(stderr, "core: ");
4243
for (i = 0; i < cpudata_size; i++)
4244
erts_fprintf(stderr, " %2d", cpudata[i].core);
4245
erts_fprintf(stderr, "\n");
4246
erts_fprintf(stderr, "thread: ");
4247
for (i = 0; i < cpudata_size; i++)
4248
erts_fprintf(stderr, " %2d", cpudata[i].thread);
4249
erts_fprintf(stderr, "\n");
4250
erts_fprintf(stderr, "logical: ");
4251
for (i = 0; i < cpudata_size; i++)
4252
erts_fprintf(stderr, " %2d", cpudata[i].logical);
4253
erts_fprintf(stderr, "\n");
4256
hp = HAlloc(p, cpudata_size+1);
4257
ERTS_BIF_PREP_RET(res, make_tuple(hp));
4258
*hp++ = make_arityval((Uint) cpudata_size);
4259
for (i = 0; i < cpudata_size; i++)
4260
*hp++ = make_small((Uint) cpudata[i].logical);
4263
destroy_tmp_cpu_topology_copy(cpudata);
4269
erts_get_schedulers_binds(Process *c_p)
4272
ERTS_DECL_AM(unbound);
4273
Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
4274
Eterm res = make_tuple(hp);
4276
*(hp++) = make_arityval(erts_no_schedulers);
4277
erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
4278
for (ix = 1; ix <= erts_no_schedulers; ix++)
4279
*(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
4280
? make_small(scheduler2cpu_map[ix].bound_id)
4282
erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
4287
bld_topology_term(Eterm **hpp,
4289
erts_cpu_topology_t *cpudata,
4296
return am_undefined;
4298
for (i = size-1; i >= 0; i--) {
4299
res = erts_bld_cons(hpp,
4305
make_small(cpudata[i].node),
4306
make_small(cpudata[i].processor),
4307
make_small(cpudata[i].processor_node),
4308
make_small(cpudata[i].core),
4309
make_small(cpudata[i].thread),
4310
make_small(cpudata[i].logical)),
4317
get_cpu_topology_term(Process *c_p, int type)
4324
Eterm res = THE_NON_VALUE;
4325
erts_cpu_topology_t *cpudata = NULL;
4329
case ERTS_GET_USED_CPU_TOPOLOGY:
4334
case ERTS_GET_DETECTED_CPU_TOPOLOGY:
4336
if (!system_cpudata)
4339
size = system_cpudata_size;
4340
cpudata = erts_alloc(ERTS_ALC_T_TMP,
4341
(sizeof(erts_cpu_topology_t)
4343
sys_memcpy((void *) cpudata,
4344
(void *) system_cpudata,
4345
sizeof(erts_cpu_topology_t)*size);
4348
case ERTS_GET_DEFINED_CPU_TOPOLOGY:
4353
size = user_cpudata_size;
4354
cpudata = user_cpudata;
4358
erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
4362
if (res == am_undefined) {
4369
bld_topology_term(NULL, &hsz,
4372
hp = HAlloc(c_p, hsz);
4378
res = bld_topology_term(&hp, NULL,
4381
ASSERT(hp_end == hp);
4383
if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
4384
erts_free(ERTS_ALC_T_TMP, cpudata);
4390
erts_get_cpu_topology_term(Process *c_p, Eterm which)
4394
erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
4395
if (ERTS_IS_ATOM_STR("used", which))
4396
type = ERTS_GET_USED_CPU_TOPOLOGY;
4397
else if (ERTS_IS_ATOM_STR("detected", which))
4398
type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
4399
else if (ERTS_IS_ATOM_STR("defined", which))
4400
type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
4402
type = ERTS_GET_CPU_TOPOLOGY_ERROR;
4403
if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
4404
res = THE_NON_VALUE;
4406
res = get_cpu_topology_term(c_p, type);
4407
erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
4412
early_cpu_bind_init(void)
4414
user_cpudata = NULL;
4415
user_cpudata_size = 0;
4417
system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
4418
system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
4419
(sizeof(erts_cpu_topology_t)
4420
* system_cpudata_size));
4422
cpu_bind_order = ERTS_CPU_BIND_NONE;
4424
if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
4425
|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
4426
system_cpudata_size)) {
4427
erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
4428
system_cpudata = NULL;
4429
system_cpudata_size = 0;
4434
late_cpu_bind_init(void)
4438
erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind");
4440
scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
4441
(sizeof(ErtsCpuBindData)
4442
* (erts_no_schedulers+1)));
4443
for (ix = 1; ix <= erts_no_schedulers; ix++) {
4444
scheduler2cpu_map[ix].bind_id = -1;
4445
scheduler2cpu_map[ix].bound_id = -1;
4448
if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
4449
erts_cpu_topology_t *cpudata;
4451
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
4453
signal_schedulers_bind_change(cpudata, cpudata_size);
4454
destroy_tmp_cpu_topology_copy(cpudata);
4458
3788
#ifdef ERTS_SMP