2
* x86 SMP booting functions
4
* (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5
* (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
7
* Much of the core SMP work is based on previous work by Thomas Radke, to
8
* whom a great many thanks are extended.
10
* Thanks to Intel for making available several different Pentium,
11
* Pentium Pro and Pentium-II/Xeon MP machines.
12
* Original development of Linux SMP code supported by Caldera.
14
* This code is released under the GNU General Public License version 2 or
18
* Felix Koop : NR_CPUS used properly
19
* Jose Renau : Handle single CPU case.
20
* Alan Cox : By repeated request 8) - Total BogoMIP report.
21
* Greg Wright : Fix for kernel stacks panic.
22
* Erich Boleyn : MP v1.4 and additional changes.
23
* Matthias Sattler : Changes for 2.1 kernel map.
24
* Michel Lespinasse : Changes for 2.1 kernel map.
25
* Michael Chastain : Change trampoline.S to gnu as.
26
* Alan Cox : Dumb bug: 'B' step PPro's are fine
27
* Ingo Molnar : Added APIC timers, based on code
29
* Ingo Molnar : various cleanups and rewrites
30
* Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31
* Maciej W. Rozycki : Bits for genuine 82489DX APICs
32
* Martin J. Bligh : Added support for multi-quad systems
35
#include <xeno/config.h>
36
#include <xeno/init.h>
37
#include <xeno/interrupt.h>
40
#include <xeno/slab.h>
41
#include <asm/pgalloc.h>
42
#include <asm/mc146818rtc.h>
43
#include <asm/smpboot.h>
46
#include <asm/system.h>
47
#include <xeno/sched.h>
48
#include <xeno/delay.h>
53
/* Set if we find a B stepping CPU */
54
static int smp_b_stepping;
56
/* Setup configured maximum number of CPUs to activate */
57
static int max_cpus = -1;
59
/* Total count of live CPUs */
62
/* Bitmask of currently online CPUs */
63
unsigned long cpu_online_map;
65
static volatile unsigned long cpu_callin_map;
66
static volatile unsigned long cpu_callout_map;
68
/* Per CPU bogomips and other parameters */
69
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
71
/* Set when the idlers are all forked */
72
int smp_threads_ready;
75
* Trampoline 80x86 program as an array.
78
extern unsigned char trampoline_data [];
79
extern unsigned char trampoline_end [];
80
static unsigned char *trampoline_base;
83
* Currently trivial. Write the real->protected mode
84
* bootstrap into the page concerned. The caller
85
* has made sure it's suitably aligned.
88
static unsigned long __init setup_trampoline(void)
90
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
91
return virt_to_phys(trampoline_base);
95
* We are called very early to get the low memory for the
96
* SMP bootup trampoline page.
98
void __init smp_alloc_memory(void)
101
* Has to be in very low memory so we can execute
104
trampoline_base = __va(0x90000);
108
* The bootstrap kernel entry code has set these up. Save them for
112
void __init smp_store_cpu_info(int id)
114
struct cpuinfo_x86 *c = cpu_data + id;
120
c->pgtable_cache_sz = 0;
123
* Mask B, Pentium, but not Pentium MMX
125
if (c->x86_vendor == X86_VENDOR_INTEL &&
127
c->x86_mask >= 1 && c->x86_mask <= 4 &&
130
* Remember we have B step Pentia with bugs
136
* Architecture specific routine called by the kernel just before init is
137
* fired off. This allows the BP to have everything in order [we hope].
138
* At the end of this all the APs will hit the system scheduling and off
139
* we go. Each AP will load the system gdt's and jump through the kernel
140
* init into idle(). At this point the scheduler will one day take over
141
* and give them jobs to do. smp_callin is a standard routine
142
* we use to track CPUs as they power up.
145
static atomic_t smp_commenced = ATOMIC_INIT(0);
147
void __init smp_commence(void)
150
* Lets the callins below out of their loop.
152
Dprintk("Setting commenced=1, go go go\n");
155
atomic_set(&smp_commenced,1);
159
* TSC synchronization.
161
* We first check wether all CPUs have their TSC's synchronized,
162
* then we print a warning if not, and always resync.
165
static atomic_t tsc_start_flag = ATOMIC_INIT(0);
166
static atomic_t tsc_count_start = ATOMIC_INIT(0);
167
static atomic_t tsc_count_stop = ATOMIC_INIT(0);
168
static unsigned long long tsc_values[NR_CPUS];
173
* accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
174
* multiplication. Not terribly optimized but we need it at boot time only
178
* == (a1 + a2*(2^32)) / b
179
* == a1/b + a2*(2^32/b)
180
* == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
181
* ^---- (this multiplication can overflow)
184
static unsigned long long div64 (unsigned long long a, unsigned long b0)
187
unsigned long long res;
189
a1 = ((unsigned int*)&a)[0];
190
a2 = ((unsigned int*)&a)[1];
193
(unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
195
(a2 * (0xffffffff % b0)) / b0;
200
static void __init synchronize_tsc_bp (void)
203
unsigned long long t0;
204
unsigned long long sum, avg;
208
printk("checking TSC synchronization across CPUs: ");
210
atomic_set(&tsc_start_flag, 1);
214
* We loop a few times to get a primed instruction cache,
215
* then the last pass is more or less synchronized and
216
* the BP and APs set their cycle counters to zero all at
217
* once. This reduces the chance of having random offsets
218
* between the processors, and guarantees that the maximum
219
* delay between the cycle counters is never bigger than
220
* the latency of information-passing (cachelines) between
223
for (i = 0; i < NR_LOOPS; i++) {
225
* all APs synchronize but they loop on '== num_cpus'
227
while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
228
atomic_set(&tsc_count_stop, 0);
231
* this lets the APs save their current TSC:
233
atomic_inc(&tsc_count_start);
235
rdtscll(tsc_values[smp_processor_id()]);
237
* We clear the TSC in the last loop:
243
* Wait for all APs to leave the synchronization point:
245
while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
246
atomic_set(&tsc_count_start, 0);
248
atomic_inc(&tsc_count_stop);
252
for (i = 0; i < smp_num_cpus; i++) {
256
avg = div64(sum, smp_num_cpus);
259
for (i = 0; i < smp_num_cpus; i++) {
260
delta = tsc_values[i] - avg;
264
* We report bigger than 2 microseconds clock differences.
266
if (delta > 2*ticks_per_usec) {
272
realdelta = div64(delta, ticks_per_usec);
273
if (tsc_values[i] < avg)
274
realdelta = -realdelta;
276
printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
286
static void __init synchronize_tsc_ap (void)
291
* smp_num_cpus is not necessarily known at the time
292
* this gets called, so we first wait for the BP to
293
* finish SMP initialization:
295
while (!atomic_read(&tsc_start_flag)) mb();
297
for (i = 0; i < NR_LOOPS; i++) {
298
atomic_inc(&tsc_count_start);
299
while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
301
rdtscll(tsc_values[smp_processor_id()]);
305
atomic_inc(&tsc_count_stop);
306
while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
311
static atomic_t init_deasserted;
313
void __init smp_callin(void)
315
int cpuid, phys_id, i;
318
* If waken up by an INIT in an 82489DX configuration
319
* we may get here before an INIT-deassert IPI reaches
320
* our local APIC. We have to wait for the IPI or we'll
321
* lock up on an APIC access.
323
while (!atomic_read(&init_deasserted));
326
* (This works even if the APIC is not enabled.)
328
phys_id = GET_APIC_ID(apic_read(APIC_ID));
329
cpuid = smp_processor_id();
330
if (test_and_set_bit(cpuid, &cpu_online_map)) {
331
printk("huh, phys CPU#%d, CPU#%d already present??\n",
335
Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
338
* STARTUP IPIs are fragile beasts as they might sometimes
339
* trigger some glue motherboard logic. Complete APIC bus
340
* silence for 1 second, this overestimates the time the
341
* boot CPU is spending to send the up to 2 STARTUP IPIs
342
* by a factor of two. This should be enough.
345
for ( i = 0; i < 200; i++ )
347
if ( test_bit(cpuid, &cpu_callout_map) ) break;
351
if (!test_bit(cpuid, &cpu_callout_map)) {
352
printk("BUG: CPU%d started up but did not get a callout!\n",
358
* the boot CPU has finished the init stage and is spinning
359
* on callin_map until we finish. We are free to set up this
360
* CPU, first the APIC. (this is probably redundant on most
364
Dprintk("CALLIN, before setup_local_APIC().\n");
372
* Must be done before calibration delay is computed
374
mtrr_init_secondary_cpu ();
377
Dprintk("Stack at about %p\n",&cpuid);
380
* Save our processor parameters
382
smp_store_cpu_info(cpuid);
384
if (nmi_watchdog == NMI_LOCAL_APIC)
385
setup_apic_nmi_watchdog();
388
* Allow the master to continue.
390
set_bit(cpuid, &cpu_callin_map);
393
* Synchronize the TSC with the BP
395
synchronize_tsc_ap();
401
* Activate a secondary processor.
403
void __init start_secondary(void)
405
unsigned int cpu = cpucount;
406
/* 6 bytes suitable for passing to LIDT instruction. */
407
unsigned char idt_load[6];
409
extern void cpu_init(void);
411
set_current(idle_task[cpu]);
414
* Dont put anything before smp_callin(), SMP
415
* booting is too fragile that we want to limit the
416
* things done here to the most necessary things.
421
while (!atomic_read(&smp_commenced))
425
* At this point, boot CPU has fully initialised the IDT. It is
426
* now safe to make ourselves a private copy.
428
idt_tables[cpu] = kmalloc(IDT_ENTRIES*8, GFP_KERNEL);
429
memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*8);
430
*(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*8)-1;
431
*(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
432
__asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
435
* low-memory mappings have been cleared, flush them from the local TLBs
440
startup_cpu_idle_loop();
446
unsigned long esp, ss;
449
/* which physical APIC ID maps to which logical CPU number */
450
volatile int physical_apicid_2_cpu[MAX_APICID];
451
/* which logical CPU number maps to which physical APIC ID */
452
volatile int cpu_2_physical_apicid[NR_CPUS];
454
/* which logical APIC ID maps to which logical CPU number */
455
volatile int logical_apicid_2_cpu[MAX_APICID];
456
/* which logical CPU number maps to which logical APIC ID */
457
volatile int cpu_2_logical_apicid[NR_CPUS];
459
static inline void init_cpu_to_apicid(void)
460
/* Initialize all maps between cpu number and apicids */
464
for (apicid = 0; apicid < MAX_APICID; apicid++) {
465
physical_apicid_2_cpu[apicid] = -1;
466
logical_apicid_2_cpu[apicid] = -1;
468
for (cpu = 0; cpu < NR_CPUS; cpu++) {
469
cpu_2_physical_apicid[cpu] = -1;
470
cpu_2_logical_apicid[cpu] = -1;
474
static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
476
* set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
477
* else physical apic ids
480
physical_apicid_2_cpu[apicid] = cpu;
481
cpu_2_physical_apicid[cpu] = apicid;
484
static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
486
* undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
487
* else physical apic ids
490
physical_apicid_2_cpu[apicid] = -1;
491
cpu_2_physical_apicid[cpu] = -1;
495
static inline void inquire_remote_apic(int apicid)
497
int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
498
char *names[] = { "ID", "VERSION", "SPIV" };
501
printk("Inquiring remote APIC #%d...\n", apicid);
503
for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
504
printk("... APIC #%d %s: ", apicid, names[i]);
509
apic_wait_icr_idle();
511
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
512
apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
517
status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
518
} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
521
case APIC_ICR_RR_VALID:
522
status = apic_read(APIC_RRR);
523
printk("%08x\n", status);
533
static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
535
unsigned long send_status = 0, accept_status = 0;
536
int maxlvt, timeout, num_starts, j;
538
Dprintk("Asserting INIT.\n");
541
* Turn INIT on target chip
543
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
548
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
551
Dprintk("Waiting for send to finish...\n");
556
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
557
} while (send_status && (timeout++ < 1000));
561
Dprintk("Deasserting INIT.\n");
564
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
567
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
569
Dprintk("Waiting for send to finish...\n");
574
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
575
} while (send_status && (timeout++ < 1000));
577
atomic_set(&init_deasserted, 1);
580
* Should we send STARTUP IPIs ?
582
* Determine this based on the APIC version.
583
* If we don't have an integrated APIC, don't send the STARTUP IPIs.
585
if (APIC_INTEGRATED(apic_version[phys_apicid]))
591
* Run STARTUP IPI loop.
593
Dprintk("#startup loops: %d.\n", num_starts);
595
maxlvt = get_maxlvt();
597
for (j = 1; j <= num_starts; j++) {
598
Dprintk("Sending STARTUP #%d.\n",j);
600
apic_read_around(APIC_SPIV);
601
apic_write(APIC_ESR, 0);
603
Dprintk("After apic_write.\n");
610
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
612
/* Boot on the stack */
613
/* Kick the second */
614
apic_write_around(APIC_ICR, APIC_DM_STARTUP
615
| (start_eip >> 12));
618
* Give the other CPU some time to accept the IPI.
622
Dprintk("Startup point 1.\n");
624
Dprintk("Waiting for send to finish...\n");
629
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
630
} while (send_status && (timeout++ < 1000));
633
* Give the other CPU some time to accept the IPI.
637
* Due to the Pentium erratum 3AP.
640
apic_read_around(APIC_SPIV);
641
apic_write(APIC_ESR, 0);
643
accept_status = (apic_read(APIC_ESR) & 0xEF);
644
if (send_status || accept_status)
647
Dprintk("After Startup.\n");
650
printk("APIC never delivered???\n");
652
printk("APIC delivery error (%lx).\n", accept_status);
654
return (send_status | accept_status);
657
extern unsigned long cpu_initialized;
659
static void __init do_boot_cpu (int apicid)
661
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
662
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
665
struct task_struct *idle;
666
unsigned long boot_error = 0;
668
unsigned long start_eip;
672
if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
673
panic("failed 'createdomain' for CPU %d", cpu);
675
idle->mm.pagetable = mk_pagetable(__pa(idle_pg_table));
677
map_cpu_to_boot_apicid(cpu, apicid);
679
SET_DEFAULT_FAST_TRAP(&idle->thread);
681
idle_task[cpu] = idle;
683
/* start_eip had better be page-aligned! */
684
start_eip = setup_trampoline();
686
/* So we see what's up. */
687
printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
688
stack_start.esp = __pa(get_free_page(GFP_KERNEL)) + 4000;
691
* This grunge runs the startup process for
692
* the targeted processor.
695
atomic_set(&init_deasserted, 0);
697
Dprintk("Setting warm reset code and vector.\n");
699
CMOS_WRITE(0xa, 0xf);
702
*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
704
*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
708
* Be paranoid about clearing APIC errors.
710
if ( APIC_INTEGRATED(apic_version[apicid]) )
712
apic_read_around(APIC_SPIV);
713
apic_write(APIC_ESR, 0);
718
* Status is now clean
723
* Starting actual IPI sequence...
726
boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
730
* allow APs to start initializing.
732
Dprintk("Before Callout %d.\n", cpu);
733
set_bit(cpu, &cpu_callout_map);
734
Dprintk("After Callout %d.\n", cpu);
737
* Wait 5s total for a response
739
for (timeout = 0; timeout < 50000; timeout++) {
740
if (test_bit(cpu, &cpu_callin_map))
741
break; /* It has booted */
745
if (test_bit(cpu, &cpu_callin_map)) {
746
/* number CPUs logically, starting from 1 (BSP is 0) */
747
printk("CPU%d has booted.\n", cpu);
750
if (*((volatile unsigned long *)phys_to_virt(start_eip))
752
/* trampoline started but...? */
753
printk("Stuck ??\n");
755
/* trampoline code not run */
756
printk("Not responding.\n");
758
inquire_remote_apic(apicid);
763
/* Try to put things back the way they were before ... */
764
unmap_cpu_to_boot_apicid(cpu, apicid);
765
clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
766
clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
767
clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */
774
* Cycle through the processors sending APIC IPIs to boot each.
777
static int boot_cpu_logical_apicid;
778
/* Where the IO area was mapped on multiquad, always 0 otherwise */
779
void *xquad_portio = NULL;
781
void __init smp_boot_cpus(void)
786
/* Must be done before other processors booted */
787
mtrr_init_boot_cpu ();
789
/* Initialize the logical to physical CPU number mapping */
790
init_cpu_to_apicid();
793
* Setup boot CPU information
795
smp_store_cpu_info(0); /* Final full version of the data */
796
printk("CPU%d booted\n", 0);
799
* We have the boot CPU online for sure.
801
set_bit(0, &cpu_online_map);
802
boot_cpu_logical_apicid = logical_smp_processor_id();
803
map_cpu_to_boot_apicid(0, boot_cpu_apicid);
806
* If we couldnt find an SMP configuration at boot time,
807
* get out of here now!
809
if (!smp_found_config) {
810
printk("SMP motherboard not detected.\n");
812
cpu_online_map = phys_cpu_present_map = 1;
814
if (APIC_init_uniprocessor())
815
printk("Local APIC not detected."
816
" Using dummy APIC emulation.\n");
821
* Should not be necessary because the MP table should list the boot
822
* CPU too, but we do it for the sake of robustness anyway.
824
if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
825
printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
826
boot_cpu_physical_apicid);
827
phys_cpu_present_map |= (1 << hard_smp_processor_id());
831
* If we couldn't find a local APIC, then get out of here now!
833
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
834
!test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
835
printk("BIOS bug, local APIC #%d not detected!...\n",
836
boot_cpu_physical_apicid);
837
printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
839
cpu_online_map = phys_cpu_present_map = 1;
847
* If SMP should be disabled, then really disable it!
850
smp_found_config = 0;
851
printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
853
cpu_online_map = phys_cpu_present_map = 1;
861
if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
865
* Scan the CPU present map and fire up the other CPUs via do_boot_cpu
867
* In clustered apic mode, phys_cpu_present_map is a constructed thus:
868
* bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
871
Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
873
for (bit = 0; bit < NR_CPUS; bit++) {
874
apicid = cpu_present_to_apicid(bit);
876
* Don't even attempt to start the boot CPU!
878
if (apicid == boot_cpu_apicid)
881
if (!(phys_cpu_present_map & (1 << bit)))
883
if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
889
* Make sure we unmap all failed CPUs
891
if ((boot_apicid_to_cpu(apicid) == -1) &&
892
(phys_cpu_present_map & (1 << bit)))
893
printk("CPU #%d not responding - cannot use it.\n",
898
* Cleanup possible dangling ends...
901
* Install writable page 0 entry to set BIOS data area.
906
* Paranoid: Set warm reset code and vector here back
911
*((volatile long *) phys_to_virt(0x467)) = 0;
914
printk("Error: only one processor found.\n");
916
printk("Total of %d processors activated.\n", cpucount+1);
918
smp_num_cpus = cpucount + 1;
921
printk("WARNING: SMP operation may"
922
" be unreliable with B stepping processors.\n");
923
Dprintk("Boot done.\n");
926
* Here we can be sure that there is an IO-APIC in the system. Let's
929
if ( nr_ioapics ) setup_IO_APIC();
931
/* Set up all local APIC timers in the system. */
934
/* Synchronize the TSC with the AP(s). */
935
if ( cpucount ) synchronize_tsc_bp();
941
#endif /* CONFIG_SMP */