1
diff --git a/Makefile b/Makefile
2
index 17bfe08..69cc998 100644
5
@@ -525,6 +525,10 @@ endif
6
# Arch Makefiles may override this setting
7
KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
9
+ifdef CONFIG_IPIPE_TRACE_MCOUNT
13
ifdef CONFIG_FRAME_POINTER
14
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
16
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
17
index 2187c60..1179860 100644
18
--- a/arch/x86/Kconfig
19
+++ b/arch/x86/Kconfig
20
@@ -381,6 +381,7 @@ config SCHED_NO_NO_OMIT_FRAME_POINTER
22
menuconfig PARAVIRT_GUEST
23
bool "Paravirtualized guest support"
26
Say Y here to get to see options related to running Linux under
27
various hypervisors. This option alone does not add any kernel code.
28
@@ -426,7 +427,7 @@ source "arch/x86/lguest/Kconfig"
31
bool "Enable paravirtualization code"
32
- depends on !X86_VOYAGER
33
+ depends on !(X86_VOYAGER && IPIPE)
35
This changes the kernel so it can modify itself when it is run
36
under a hypervisor, potentially improving performance significantly
37
@@ -623,6 +624,8 @@ config SCHED_MC
39
source "kernel/Kconfig.preempt"
41
+source "kernel/ipipe/Kconfig"
44
bool "Local APIC support on uniprocessors"
45
depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
46
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
47
index d1a47ad..7769508 100644
48
--- a/arch/x86/Makefile
49
+++ b/arch/x86/Makefile
50
@@ -187,7 +187,7 @@ all: bzImage
51
zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
53
zImage bzImage: vmlinux
54
- $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
55
+ $(Q)$(MAKE) $(build)=$(boot) mflags-y="$(mflags-y)" $(KBUILD_IMAGE)
56
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
57
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
59
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
60
index cd48c72..76c116d 100644
61
--- a/arch/x86/boot/Makefile
62
+++ b/arch/x86/boot/Makefile
63
@@ -115,7 +115,7 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
64
$(call if_changed,objcopy)
66
$(obj)/compressed/vmlinux: FORCE
67
- $(Q)$(MAKE) $(build)=$(obj)/compressed $@
68
+ $(Q)$(MAKE) $(build)=$(obj)/compressed mflags-y="$(mflags-y)" $@
70
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
72
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
73
index 1771c80..8b0cef0 100644
74
--- a/arch/x86/boot/compressed/Makefile
75
+++ b/arch/x86/boot/compressed/Makefile
76
@@ -9,7 +9,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
77
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
78
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
79
cflags-$(CONFIG_X86_64) := -mcmodel=small
80
-KBUILD_CFLAGS += $(cflags-y)
81
+KBUILD_CFLAGS += $(cflags-y) $(mflags-y)
82
KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
83
KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
85
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
86
index 3b1510b..710ba4c 100644
87
--- a/arch/x86/include/asm/apic.h
88
+++ b/arch/x86/include/asm/apic.h
89
@@ -143,7 +143,13 @@ static inline void ack_x2APIC_irq(void)
94
+#define ack_APIC_irq() do { } while(0)
95
+static inline void __ack_APIC_irq(void)
96
+#else /* !CONFIG_IPIPE */
97
+#define __ack_APIC_irq() ack_APIC_irq()
98
static inline void ack_APIC_irq(void)
99
+#endif /* CONFIG_IPIPE */
102
* ack_APIC_irq() actually gets compiled as a single instruction
103
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
104
index 63134e3..fd9eed6 100644
105
--- a/arch/x86/include/asm/apicdef.h
106
+++ b/arch/x86/include/asm/apicdef.h
108
# define MAX_LOCAL_APIC 32768
111
+#ifndef __ASSEMBLY__
113
* All x86-64 systems are xAPIC compatible.
114
* In the following, "apicid" is a physical APIC ID.
115
@@ -414,4 +415,7 @@ struct local_apic {
117
#define BAD_APICID 0xFFFFu
120
+#endif /* !__ASSEMBLY__ */
122
#endif /* _ASM_X86_APICDEF_H */
123
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
124
index b97aecb..f3a15ec 100644
125
--- a/arch/x86/include/asm/hw_irq.h
126
+++ b/arch/x86/include/asm/hw_irq.h
127
@@ -33,6 +33,13 @@ extern void error_interrupt(void);
128
extern void spurious_interrupt(void);
129
extern void thermal_interrupt(void);
130
extern void reschedule_interrupt(void);
132
+void ipipe_ipi0(void);
133
+void ipipe_ipi1(void);
134
+void ipipe_ipi2(void);
135
+void ipipe_ipi3(void);
136
+void ipipe_ipiX(void);
139
extern void invalidate_interrupt(void);
140
extern void invalidate_interrupt0(void);
141
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
142
index 48f0004..e636ef3 100644
143
--- a/arch/x86/include/asm/i387.h
144
+++ b/arch/x86/include/asm/i387.h
145
@@ -283,11 +283,14 @@ static inline void __clear_fpu(struct task_struct *tsk)
146
static inline void kernel_fpu_begin(void)
148
struct thread_info *me = current_thread_info();
149
+ unsigned long flags;
151
+ local_irq_save_hw_cond(flags);
152
if (me->status & TS_USEDFPU)
153
__save_init_fpu(me->task);
156
+ local_irq_restore_hw_cond(flags);
159
static inline void kernel_fpu_end(void)
160
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
161
index 58d7091..ac8bd15 100644
162
--- a/arch/x86/include/asm/i8259.h
163
+++ b/arch/x86/include/asm/i8259.h
164
@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
165
#define SLAVE_ICW4_DEFAULT 0x01
166
#define PIC_ICW4_AEOI 2
168
-extern spinlock_t i8259A_lock;
169
+extern ipipe_spinlock_t i8259A_lock;
171
extern void init_8259A(int auto_eoi);
172
extern void enable_8259A_irq(unsigned int irq);
173
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
174
index f89dffb..a8779c0 100644
175
--- a/arch/x86/include/asm/ipi.h
176
+++ b/arch/x86/include/asm/ipi.h
177
@@ -66,6 +66,9 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
181
+ unsigned long flags;
183
+ local_irq_save_hw(flags);
187
@@ -81,6 +84,8 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
188
* Send the IPI. The write to APIC_ICR fires this off.
190
native_apic_mem_write(APIC_ICR, cfg);
192
+ local_irq_restore_hw(flags);
196
@@ -90,8 +95,9 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
197
static inline void __send_IPI_dest_field(unsigned int mask, int vector,
201
+ unsigned long cfg, flags;
203
+ local_irq_save_hw(flags);
207
@@ -115,11 +121,12 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
208
* Send the IPI. The write to APIC_ICR fires this off.
210
native_apic_mem_write(APIC_ICR, cfg);
212
+ local_irq_restore_hw(flags);
215
static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
217
- unsigned long flags;
218
unsigned long query_cpu;
221
@@ -127,12 +134,10 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
222
* to an arbitrary mask, so I do a unicast to each CPU instead.
225
- local_irq_save(flags);
226
for_each_cpu_mask_nr(query_cpu, mask) {
227
__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
228
vector, APIC_DEST_PHYSICAL);
230
- local_irq_restore(flags);
233
#endif /* _ASM_X86_IPI_H */
234
diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
236
index 0000000..4bec994
238
+++ b/arch/x86/include/asm/ipipe.h
241
+ * arch/x86/include/asm/ipipe.h
243
+ * Copyright (C) 2007 Philippe Gerum.
245
+ * This program is free software; you can redistribute it and/or modify
246
+ * it under the terms of the GNU General Public License as published by
247
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
248
+ * USA; either version 2 of the License, or (at your option) any later
251
+ * This program is distributed in the hope that it will be useful,
252
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
253
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
254
+ * GNU General Public License for more details.
256
+ * You should have received a copy of the GNU General Public License
257
+ * along with this program; if not, write to the Free Software
258
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
261
+#ifndef __X86_IPIPE_H
262
+#define __X86_IPIPE_H
266
+#ifndef IPIPE_ARCH_STRING
267
+#define IPIPE_ARCH_STRING "2.2-07"
268
+#define IPIPE_MAJOR_NUMBER 2
269
+#define IPIPE_MINOR_NUMBER 2
270
+#define IPIPE_PATCH_NUMBER 7
273
+DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
275
+static inline unsigned __ipipe_get_irq_vector(int irq)
277
+#ifdef CONFIG_X86_IO_APIC
278
+ unsigned __ipipe_get_ioapic_irq_vector(int irq);
279
+ return __ipipe_get_ioapic_irq_vector(irq);
280
+#elif defined(CONFIG_X86_LOCAL_APIC)
281
+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ?
282
+ ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR;
284
+ return irq + IRQ0_VECTOR;
288
+#ifdef CONFIG_X86_32
289
+# include "ipipe_32.h"
291
+# include "ipipe_64.h"
295
+ * The logical processor id and the current Linux task are read from the PDA,
296
+ * so this is always safe, regardless of the underlying stack.
298
+#define ipipe_processor_id() raw_smp_processor_id()
299
+#define ipipe_safe_current() current
301
+#define prepare_arch_switch(next) \
303
+ ipipe_schedule_notify(current, next); \
304
+ local_irq_disable_hw(); \
307
+#define task_hijacked(p) \
308
+ ({ int x = !ipipe_root_domain_p; \
309
+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \
310
+ local_irq_enable_hw(); x; })
312
+struct ipipe_domain;
314
+struct ipipe_sysinfo {
316
+ int ncpus; /* Number of CPUs on board */
317
+ u64 cpufreq; /* CPU frequency (in Hz) */
319
+ /* Arch-dependent block */
322
+ unsigned tmirq; /* Timer tick IRQ */
323
+ u64 tmfreq; /* Timer frequency */
327
+/* Private interface -- Internal use only */
329
+#define __ipipe_check_platform() do { } while(0)
330
+#define __ipipe_init_platform() do { } while(0)
331
+#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq)
332
+#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq)
335
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
337
+#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
340
+#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0)
342
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq);
344
+void __ipipe_enable_pipeline(void);
346
+void __ipipe_do_critical_sync(unsigned irq, void *cookie);
348
+void __ipipe_serial_debug(const char *fmt, ...);
350
+extern int __ipipe_tick_irq;
352
+#ifdef CONFIG_X86_LOCAL_APIC
353
+#define ipipe_update_tick_evtdev(evtdev) \
355
+ if (strcmp((evtdev)->name, "lapic") == 0) \
356
+ __ipipe_tick_irq = \
357
+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \
359
+ __ipipe_tick_irq = 0; \
362
+#define ipipe_update_tick_evtdev(evtdev) \
363
+ __ipipe_tick_irq = 0
366
+int __ipipe_check_lapic(void);
368
+int __ipipe_check_tickdev(const char *devname);
370
+#define __ipipe_syscall_watched_p(p, sc) \
371
+ (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls)
373
+#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF)
376
+#define __ipipe_move_root_irq(irq) \
378
+ if (irq < NR_IRQS) { \
379
+ struct irq_chip *chip = irq_to_desc(irq)->chip; \
384
+#else /* !CONFIG_SMP */
385
+#define __ipipe_move_root_irq(irq) do { } while (0)
386
+#endif /* !CONFIG_SMP */
388
+#else /* !CONFIG_IPIPE */
390
+#define ipipe_update_tick_evtdev(evtdev) do { } while (0)
391
+#define task_hijacked(p) 0
393
+#endif /* CONFIG_IPIPE */
395
+#endif /* !__X86_IPIPE_H */
396
diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
398
index 0000000..235c846
400
+++ b/arch/x86/include/asm/ipipe_32.h
403
+ * arch/x86/include/asm/ipipe_32.h
405
+ * Copyright (C) 2002-2005 Philippe Gerum.
407
+ * This program is free software; you can redistribute it and/or modify
408
+ * it under the terms of the GNU General Public License as published by
409
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
410
+ * USA; either version 2 of the License, or (at your option) any later
413
+ * This program is distributed in the hope that it will be useful,
414
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
415
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
416
+ * GNU General Public License for more details.
418
+ * You should have received a copy of the GNU General Public License
419
+ * along with this program; if not, write to the Free Software
420
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
423
+#ifndef __X86_IPIPE_32_H
424
+#define __X86_IPIPE_32_H
426
+#include <linux/cpumask.h>
427
+#include <linux/list.h>
428
+#include <linux/threads.h>
429
+#include <linux/ipipe_percpu.h>
430
+#include <asm/ptrace.h>
432
+#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t))
433
+#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; })
435
+#define ipipe_tsc2ns(t) \
437
+ unsigned long long delta = (t)*1000; \
438
+ do_div(delta, cpu_khz/1000+1); \
439
+ (unsigned long)delta; \
442
+#define ipipe_tsc2us(t) \
444
+ unsigned long long delta = (t); \
445
+ do_div(delta, cpu_khz/1000+1); \
446
+ (unsigned long)delta; \
449
+/* Private interface -- Internal use only */
451
+int __ipipe_handle_irq(struct pt_regs *regs);
453
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
455
+ __asm__("bsrl %1, %0":"=r"(ul)
462
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
464
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
466
+static inline void __ipipe_call_root_xirq_handler(unsigned irq,
467
+ ipipe_irq_handler_t handler)
469
+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
471
+ regs->orig_ax = ~__ipipe_get_irq_vector(irq);
473
+ __asm__ __volatile__("pushfl\n\t"
475
+ "pushl $__xirq_end\n\t"
487
+ "movl %2,%%eax\n\t"
489
+ "jmp ret_from_intr\n\t"
490
+ "__xirq_end: cli\n"
492
+ : "a" (~irq), "r" (handler), "rm" (regs));
495
+void irq_enter(void);
496
+void irq_exit(void);
498
+static inline void __ipipe_call_root_virq_handler(unsigned irq,
499
+ ipipe_irq_handler_t handler,
503
+ __asm__ __volatile__("pushfl\n\t"
505
+ "pushl $__virq_end\n\t"
522
+ : "a" (irq), "r" (handler), "d" (cookie));
524
+ __asm__ __volatile__("jmp ret_from_intr\n\t"
525
+ "__virq_end: cli\n"
531
+ * When running handlers, enable hw interrupts for all domains but the
532
+ * one heading the pipeline, so that IRQs can never be significantly
533
+ * deferred for the latter.
535
+#define __ipipe_run_isr(ipd, irq) \
537
+ local_irq_enable_nohead(ipd); \
538
+ if (ipd == ipipe_root_domain) { \
539
+ if (likely(!ipipe_virtual_irq_p(irq))) { \
540
+ __ipipe_move_root_irq(irq); \
541
+ __ipipe_call_root_xirq_handler(irq, ipd->irqs[irq].handler); \
543
+ __ipipe_call_root_virq_handler(irq, ipd->irqs[irq].handler, ipd->irqs[irq].cookie); \
545
+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
546
+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \
547
+ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
549
+ local_irq_disable_nohead(ipd); \
552
+#endif /* !__X86_IPIPE_32_H */
553
diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
555
index 0000000..4e719d4
557
+++ b/arch/x86/include/asm/ipipe_64.h
560
+ * arch/x86/include/asm/ipipe_64.h
562
+ * Copyright (C) 2007 Philippe Gerum.
564
+ * This program is free software; you can redistribute it and/or modify
565
+ * it under the terms of the GNU General Public License as published by
566
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
567
+ * USA; either version 2 of the License, or (at your option) any later
570
+ * This program is distributed in the hope that it will be useful,
571
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
572
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
573
+ * GNU General Public License for more details.
575
+ * You should have received a copy of the GNU General Public License
576
+ * along with this program; if not, write to the Free Software
577
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
580
+#ifndef __X86_IPIPE_64_H
581
+#define __X86_IPIPE_64_H
583
+#include <asm/ptrace.h>
584
+#include <asm/irq.h>
585
+#include <linux/cpumask.h>
586
+#include <linux/list.h>
587
+#include <linux/ipipe_percpu.h>
589
+#include <asm/mpspec.h>
590
+#include <linux/thread_info.h>
593
+#define ipipe_read_tsc(t) do { \
594
+ unsigned int __a,__d; \
595
+ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
596
+ (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
599
+extern unsigned cpu_khz;
600
+#define ipipe_cpu_freq() ({ unsigned long __freq = (1000UL * cpu_khz); __freq; })
601
+#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL))
602
+#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL))
604
+/* Private interface -- Internal use only */
606
+int __ipipe_handle_irq(struct pt_regs *regs);
608
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
610
+ __asm__("bsrq %1, %0":"=r"(ul)
617
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc);
619
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
621
+static inline void __ipipe_call_root_xirq_handler(unsigned irq,
622
+ void (*handler)(unsigned, void *))
624
+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
626
+ regs->orig_ax = ~__ipipe_get_irq_vector(irq);
628
+ __asm__ __volatile__("movq %%rsp, %%rax\n\t"
632
+ "pushq %[kernel_cs]\n\t"
633
+ "pushq $__xirq_end\n\t"
634
+ "pushq %[vector]\n\t"
635
+ "subq $9*8,%%rsp\n\t"
636
+ "movq %%rdi,8*8(%%rsp)\n\t"
637
+ "movq %%rsi,7*8(%%rsp)\n\t"
638
+ "movq %%rdx,6*8(%%rsp)\n\t"
639
+ "movq %%rcx,5*8(%%rsp)\n\t"
640
+ "movq %%rax,4*8(%%rsp)\n\t"
641
+ "movq %%r8,3*8(%%rsp)\n\t"
642
+ "movq %%r9,2*8(%%rsp)\n\t"
643
+ "movq %%r10,1*8(%%rsp)\n\t"
644
+ "movq %%r11,(%%rsp)\n\t"
645
+ "call *%[handler]\n\t"
646
+ "jmp exit_intr\n\t"
647
+ "__xirq_end: cli\n"
649
+ : [kernel_cs] "i" (__KERNEL_CS),
650
+ [vector] "rm" (regs->orig_ax),
651
+ [handler] "r" (handler), "D" (regs)
655
+void irq_enter(void);
656
+void irq_exit(void);
658
+static inline void __ipipe_call_root_virq_handler(unsigned irq,
659
+ void (*handler)(unsigned, void *),
663
+ __asm__ __volatile__("movq %%rsp, %%rax\n\t"
667
+ "pushq %[kernel_cs]\n\t"
668
+ "pushq $__virq_end\n\t"
670
+ "subq $9*8,%%rsp\n\t"
671
+ "movq %%rdi,8*8(%%rsp)\n\t"
672
+ "movq %%rsi,7*8(%%rsp)\n\t"
673
+ "movq %%rdx,6*8(%%rsp)\n\t"
674
+ "movq %%rcx,5*8(%%rsp)\n\t"
675
+ "movq %%rax,4*8(%%rsp)\n\t"
676
+ "movq %%r8,3*8(%%rsp)\n\t"
677
+ "movq %%r9,2*8(%%rsp)\n\t"
678
+ "movq %%r10,1*8(%%rsp)\n\t"
679
+ "movq %%r11,(%%rsp)\n\t"
680
+ "call *%[handler]\n\t"
682
+ : [kernel_cs] "i" (__KERNEL_CS),
683
+ [handler] "r" (handler), "D" (irq), "S" (cookie)
686
+ __asm__ __volatile__("jmp exit_intr\n\t"
687
+ "__virq_end: cli\n"
693
+ * When running handlers, enable hw interrupts for all domains but the
694
+ * one heading the pipeline, so that IRQs can never be significantly
695
+ * deferred for the latter.
697
+#define __ipipe_run_isr(ipd, irq) \
699
+ local_irq_enable_nohead(ipd); \
700
+ if (ipd == ipipe_root_domain) { \
701
+ if (likely(!ipipe_virtual_irq_p(irq))) { \
702
+ __ipipe_move_root_irq(irq); \
703
+ __ipipe_call_root_xirq_handler( \
704
+ irq, (ipd)->irqs[irq].handler); \
706
+ __ipipe_call_root_virq_handler( \
707
+ irq, (ipd)->irqs[irq].handler, \
708
+ (ipd)->irqs[irq].cookie); \
710
+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
711
+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \
712
+ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
714
+ local_irq_disable_nohead(ipd); \
717
+#endif /* !__X86_IPIPE_64_H */
718
diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h
720
index 0000000..05338e4
722
+++ b/arch/x86/include/asm/ipipe_base.h
725
+ * arch/x86/include/asm/ipipe_base.h
727
+ * Copyright (C) 2007 Philippe Gerum.
729
+ * This program is free software; you can redistribute it and/or modify
730
+ * it under the terms of the GNU General Public License as published by
731
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
732
+ * USA; either version 2 of the License, or (at your option) any later
735
+ * This program is distributed in the hope that it will be useful,
736
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
737
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
738
+ * GNU General Public License for more details.
740
+ * You should have received a copy of the GNU General Public License
741
+ * along with this program; if not, write to the Free Software
742
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
745
+#ifndef __X86_IPIPE_BASE_H
746
+#define __X86_IPIPE_BASE_H
748
+#ifdef CONFIG_X86_32
749
+# include "ipipe_base_32.h"
751
+# include "ipipe_base_64.h"
754
+#define ex_do_divide_error 0
755
+#define ex_do_debug 1
756
+/* NMI not pipelined. */
757
+#define ex_do_int3 3
758
+#define ex_do_overflow 4
759
+#define ex_do_bounds 5
760
+#define ex_do_invalid_op 6
761
+#define ex_do_device_not_available 7
762
+/* Double fault not pipelined. */
763
+#define ex_do_coprocessor_segment_overrun 9
764
+#define ex_do_invalid_TSS 10
765
+#define ex_do_segment_not_present 11
766
+#define ex_do_stack_segment 12
767
+#define ex_do_general_protection 13
768
+#define ex_do_page_fault 14
769
+#define ex_do_spurious_interrupt_bug 15
770
+#define ex_do_coprocessor_error 16
771
+#define ex_do_alignment_check 17
772
+#define ex_machine_check_vector 18
773
+#define ex_reserved ex_machine_check_vector
774
+#define ex_do_simd_coprocessor_error 19
775
+#define ex_do_iret_error 32
777
+#if !defined(__ASSEMBLY__) && !defined(CONFIG_SMP)
780
+/* Alias to ipipe_root_cpudom_var(status) */
781
+extern unsigned long __ipipe_root_status;
783
+extern unsigned long *const __ipipe_root_status_addr;
784
+#define __ipipe_root_status (*__ipipe_root_status_addr)
787
+static inline void __ipipe_stall_root(void)
789
+ volatile unsigned long *p = &__ipipe_root_status;
790
+ __asm__ __volatile__("btsl $0,%0;"
791
+ :"+m" (*p) : : "memory");
794
+static inline unsigned long __ipipe_test_and_stall_root(void)
796
+ volatile unsigned long *p = &__ipipe_root_status;
799
+ __asm__ __volatile__("btsl $0,%1;"
801
+ :"=r" (oldbit), "+m" (*p)
806
+static inline unsigned long __ipipe_test_root(void)
808
+ volatile unsigned long *p = &__ipipe_root_status;
811
+ __asm__ __volatile__("btl $0,%1;"
818
+#endif /* !__ASSEMBLY__ && !CONFIG_SMP */
820
+#endif /* !__X86_IPIPE_BASE_H */
821
diff --git a/arch/x86/include/asm/ipipe_base_32.h b/arch/x86/include/asm/ipipe_base_32.h
823
index 0000000..0709d01
825
+++ b/arch/x86/include/asm/ipipe_base_32.h
828
+ * arch/x86/include/asm/ipipe_base_32.h
830
+ * Copyright (C) 2007 Philippe Gerum.
832
+ * This program is free software; you can redistribute it and/or modify
833
+ * it under the terms of the GNU General Public License as published by
834
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
835
+ * USA; either version 2 of the License, or (at your option) any later
838
+ * This program is distributed in the hope that it will be useful,
839
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
840
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
841
+ * GNU General Public License for more details.
843
+ * You should have received a copy of the GNU General Public License
844
+ * along with this program; if not, write to the Free Software
845
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
848
+#ifndef __X86_IPIPE_BASE_32_H
849
+#define __X86_IPIPE_BASE_32_H
851
+#include <linux/threads.h>
852
+#include <asm/apicdef.h>
853
+#include <asm/irq_vectors.h>
855
+#ifdef CONFIG_X86_LOCAL_APIC
856
+/* System interrupts are mapped beyond the last defined external IRQ
858
+#define IPIPE_FIRST_APIC_IRQ NR_IRQS
859
+#define IPIPE_NR_XIRQS (NR_IRQS + 256 - FIRST_SYSTEM_VECTOR)
860
+#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
861
+#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
862
+/* If the APIC is enabled, then we expose four service vectors in the
863
+ * APIC space which are freely available to domains. */
864
+#define IPIPE_SERVICE_VECTOR0 0xf5
865
+#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
866
+#define IPIPE_SERVICE_VECTOR1 0xf6
867
+#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
868
+#define IPIPE_SERVICE_VECTOR2 0xf7
869
+#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
870
+#define IPIPE_SERVICE_VECTOR3 0xf8
871
+#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
872
+#define IPIPE_CRITICAL_VECTOR 0xf9 /* SMP-only: used by ipipe_critical_enter/exit() */
873
+#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
874
+#else /* !CONFIG_X86_LOCAL_APIC */
875
+#define IPIPE_NR_XIRQS NR_IRQS
876
+#endif /* !CONFIG_X86_LOCAL_APIC */
878
+#define IPIPE_IRQ_ISHIFT 5 /* 2^5 for 32bits arch. */
880
+/* IDT fault vectors */
881
+#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */
882
+/* Pseudo-vectors used for kernel events */
883
+#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS
884
+#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT)
885
+#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1)
886
+#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2)
887
+#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3)
888
+#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4)
889
+#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5)
890
+#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6)
891
+#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP
892
+#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1)
894
+#ifndef __ASSEMBLY__
896
+#include <asm/alternative.h>
900
+#define GET_ROOT_STATUS_ADDR \
902
+ "movl %%fs:per_cpu__this_cpu_off, %%eax;" \
903
+ "lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;"
904
+#define PUT_ROOT_STATUS_ADDR "popfl;"
906
+static inline void __ipipe_stall_root(void)
908
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
911
+ PUT_ROOT_STATUS_ADDR
912
+ : : : "eax", "memory");
915
+static inline unsigned long __ipipe_test_and_stall_root(void)
919
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
923
+ PUT_ROOT_STATUS_ADDR
925
+ : : "eax", "memory");
929
+static inline unsigned long __ipipe_test_root(void)
933
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
936
+ PUT_ROOT_STATUS_ADDR
942
+#endif /* CONFIG_SMP */
944
+void __ipipe_serial_debug(const char *fmt, ...);
946
+#endif /* !__ASSEMBLY__ */
948
+#endif /* !__X86_IPIPE_BASE_32_H */
949
diff --git a/arch/x86/include/asm/ipipe_base_64.h b/arch/x86/include/asm/ipipe_base_64.h
951
index 0000000..02aa014
953
+++ b/arch/x86/include/asm/ipipe_base_64.h
956
+ * arch/x86/include/asm/ipipe_base_64.h
958
+ * Copyright (C) 2007 Philippe Gerum.
960
+ * This program is free software; you can redistribute it and/or modify
961
+ * it under the terms of the GNU General Public License as published by
962
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
963
+ * USA; either version 2 of the License, or (at your option) any later
966
+ * This program is distributed in the hope that it will be useful,
967
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
968
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
969
+ * GNU General Public License for more details.
971
+ * You should have received a copy of the GNU General Public License
972
+ * along with this program; if not, write to the Free Software
973
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
976
+#ifndef __X86_IPIPE_BASE_64_H
977
+#define __X86_IPIPE_BASE_64_H
979
+#include <linux/threads.h>
981
+/* Local APIC is always compiled in on x86_64. Reserve 32 IRQs for
982
+ APIC interrupts, we don't want them to mess with the normally
983
+ assigned interrupts. */
984
+#define IPIPE_NR_XIRQS (NR_IRQS + 32)
985
+#define IPIPE_FIRST_APIC_IRQ NR_IRQS
987
+#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
988
+#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
990
+/* If the APIC is enabled, then we expose four service vectors in the
991
+ APIC space which are freely available to domains. */
992
+#define IPIPE_SERVICE_VECTOR0 (INVALIDATE_TLB_VECTOR_END + 1)
993
+#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
994
+#define IPIPE_SERVICE_VECTOR1 (INVALIDATE_TLB_VECTOR_END + 2)
995
+#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
996
+#define IPIPE_SERVICE_VECTOR2 (INVALIDATE_TLB_VECTOR_END + 3)
997
+#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
998
+#define IPIPE_SERVICE_VECTOR3 (INVALIDATE_TLB_VECTOR_END + 4)
999
+#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
1001
+#define IPIPE_CRITICAL_VECTOR 0xf7 /* Used by ipipe_critical_enter/exit() */
1002
+#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
1005
+#define IPIPE_IRQ_ISHIFT 6 /* 2^6 for 64bits arch. */
1007
+/* IDT fault vectors */
1008
+#define IPIPE_NR_FAULTS 32
1009
+/* Pseudo-vectors used for kernel events */
1010
+#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS
1011
+#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT)
1012
+#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1)
1013
+#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2)
1014
+#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3)
1015
+#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4)
1016
+#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5)
1017
+#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6)
1018
+#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP
1019
+#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1)
1021
+#ifndef __ASSEMBLY__
1023
+#include <asm/alternative.h>
1027
+ * Ugly: depends on x8664_pda layout and actual implementation of
1028
+ * percpu accesses.
1030
+#define GET_ROOT_STATUS_ADDR \
1032
+ "movq %%gs:8, %%rax;" /* x8664_pda.data_offset */ \
1033
+ "addq $per_cpu__ipipe_percpu_darray, %%rax;"
1034
+#define PUT_ROOT_STATUS_ADDR "popfq;"
1036
+static inline void __ipipe_stall_root(void)
1038
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
1040
+ "btsl $0,(%%rax);"
1041
+ PUT_ROOT_STATUS_ADDR
1042
+ : : : "rax", "memory");
1045
+static inline unsigned long __ipipe_test_and_stall_root(void)
1049
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
1051
+ "btsl $0,(%%rax);"
1053
+ PUT_ROOT_STATUS_ADDR
1055
+ : : "rax", "memory");
1059
+static inline unsigned long __ipipe_test_root(void)
1063
+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR
1066
+ PUT_ROOT_STATUS_ADDR
1072
+#endif /* CONFIG_SMP */
1074
+#endif /* !__ASSEMBLY__ */
1076
+#endif /* !__X86_IPIPE_BASE_64_H */
1077
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
1078
index 0005adb..3792e5d 100644
1079
--- a/arch/x86/include/asm/irq_vectors.h
1080
+++ b/arch/x86/include/asm/irq_vectors.h
1082
#define THERMAL_APIC_VECTOR 0xfa
1083
#define THRESHOLD_APIC_VECTOR 0xf9
1084
#define UV_BAU_MESSAGE 0xf8
1085
+#ifdef CONFIG_IPIPE
1086
+#define INVALIDATE_TLB_VECTOR_END 0xf2
1087
+#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f2 used for TLB flush */
1088
+#define NUM_INVALIDATE_TLB_VECTORS 3 /* f3-f7 used by I-pipe */
1089
+#else /* !CONFIG_IPIPE */
1090
#define INVALIDATE_TLB_VECTOR_END 0xf7
1091
#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
1093
#define NUM_INVALIDATE_TLB_VECTORS 8
1098
+#define FIRST_SYSTEM_VECTOR 0xef
1101
* Local APIC timer IRQ vector is on a different priority level,
1102
* to work around the 'lost local interrupt if more than 2 IRQ
1103
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
1104
index 2bdab21..bcd2f5f 100644
1105
--- a/arch/x86/include/asm/irqflags.h
1106
+++ b/arch/x86/include/asm/irqflags.h
1108
#include <asm/processor-flags.h>
1110
#ifndef __ASSEMBLY__
1112
+#include <linux/ipipe_base.h>
1113
+#include <linux/ipipe_trace.h>
1116
* Interrupt control:
1118
@@ -12,35 +16,59 @@ static inline unsigned long native_save_fl(void)
1120
unsigned long flags;
1122
+#ifdef CONFIG_IPIPE
1123
+ flags = (!__ipipe_test_root()) << 9;
1126
asm volatile("# __raw_save_flags\n\t"
1136
static inline void native_restore_fl(unsigned long flags)
1138
+#ifdef CONFIG_IPIPE
1140
+ __ipipe_restore_root(!(flags & X86_EFLAGS_IF));
1142
asm volatile("push %0 ; popf"
1149
static inline void native_irq_disable(void)
1151
+#ifdef CONFIG_IPIPE
1152
+ ipipe_check_context(ipipe_root_domain);
1153
+ __ipipe_stall_root();
1156
asm volatile("cli": : :"memory");
1160
static inline void native_irq_enable(void)
1162
+#ifdef CONFIG_IPIPE
1164
+ __ipipe_unstall_root();
1166
asm volatile("sti": : :"memory");
1170
static inline void native_safe_halt(void)
1172
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
1173
+ ipipe_trace_end(0x8000000E);
1175
asm volatile("sti; hlt": : :"memory");
1178
@@ -66,6 +94,71 @@ static inline void raw_local_irq_restore(unsigned long flags)
1179
native_restore_fl(flags);
1182
+static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real)
1185
+ * Merge virtual and real interrupt mask bits into a single
1188
+ return (real & ~(1L << 31)) | ((virt != 0) << 31);
1191
+static inline int raw_demangle_irq_bits(unsigned long *x)
1193
+ int virt = (*x & (1L << 31)) != 0;
1194
+ *x &= ~(1L << 31);
1198
+#define local_irq_save_hw_notrace(x) \
1199
+ __asm__ __volatile__("pushf ; pop %0 ; cli":"=g" (x): /* no input */ :"memory")
1200
+#define local_irq_restore_hw_notrace(x) \
1201
+ __asm__ __volatile__("push %0 ; popf": /* no output */ :"g" (x):"memory", "cc")
1203
+#define local_save_flags_hw(x) __asm__ __volatile__("pushf ; pop %0":"=g" (x): /* no input */)
1205
+#define irqs_disabled_hw() \
1207
+ unsigned long x; \
1208
+ local_save_flags_hw(x); \
1209
+ !((x) & X86_EFLAGS_IF); \
1212
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
1213
+#define local_irq_disable_hw() do { \
1214
+ if (!irqs_disabled_hw()) { \
1215
+ local_irq_disable_hw_notrace(); \
1216
+ ipipe_trace_begin(0x80000000); \
1219
+#define local_irq_enable_hw() do { \
1220
+ if (irqs_disabled_hw()) { \
1221
+ ipipe_trace_end(0x80000000); \
1222
+ local_irq_enable_hw_notrace(); \
1225
+#define local_irq_save_hw(x) do { \
1226
+ local_save_flags_hw(x); \
1227
+ if ((x) & X86_EFLAGS_IF) { \
1228
+ local_irq_disable_hw_notrace(); \
1229
+ ipipe_trace_begin(0x80000001); \
1232
+#define local_irq_restore_hw(x) do { \
1233
+ if ((x) & X86_EFLAGS_IF) \
1234
+ ipipe_trace_end(0x80000001); \
1235
+ local_irq_restore_hw_notrace(x); \
1237
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
1238
+#define local_irq_save_hw(x) local_irq_save_hw_notrace(x)
1239
+#define local_irq_restore_hw(x) local_irq_restore_hw_notrace(x)
1240
+#define local_irq_enable_hw() local_irq_enable_hw_notrace()
1241
+#define local_irq_disable_hw() local_irq_disable_hw_notrace()
1242
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
1244
+#define local_irq_disable_hw_notrace() __asm__ __volatile__("cli": : :"memory")
1245
+#define local_irq_enable_hw_notrace() __asm__ __volatile__("sti": : :"memory")
1247
static inline void raw_local_irq_disable(void)
1249
native_irq_disable();
1250
@@ -99,16 +192,38 @@ static inline void halt(void)
1252
static inline unsigned long __raw_local_irq_save(void)
1254
+#ifdef CONFIG_IPIPE
1255
+ unsigned long flags = (!__ipipe_test_and_stall_root()) << 9;
1258
unsigned long flags = __raw_local_save_flags();
1260
raw_local_irq_disable();
1267
-#define ENABLE_INTERRUPTS(x) sti
1268
-#define DISABLE_INTERRUPTS(x) cli
1269
+#ifdef CONFIG_IPIPE
1270
+#ifdef CONFIG_X86_32
1271
+#define DISABLE_INTERRUPTS(clobbers) PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti
1272
+#define ENABLE_INTERRUPTS(clobbers) call __ipipe_unstall_root
1273
+#else /* CONFIG_X86_64 */
1274
+/* Not worth virtualizing in x86_64 mode. */
1275
+#define DISABLE_INTERRUPTS(clobbers) cli
1276
+#define ENABLE_INTERRUPTS(clobbers) sti
1277
+#endif /* CONFIG_X86_64 */
1278
+#define ENABLE_INTERRUPTS_HW_COND sti
1279
+#define DISABLE_INTERRUPTS_HW(clobbers) cli
1280
+#define ENABLE_INTERRUPTS_HW(clobbers) sti
1281
+#else /* !CONFIG_IPIPE */
1282
+#define ENABLE_INTERRUPTS(x) sti
1283
+#define DISABLE_INTERRUPTS(x) cli
1284
+#define ENABLE_INTERRUPTS_HW_COND
1285
+#define DISABLE_INTERRUPTS_HW(clobbers) DISABLE_INTERRUPTS(clobbers)
1286
+#define ENABLE_INTERRUPTS_HW(clobbers) ENABLE_INTERRUPTS(clobbers)
1287
+#endif /* !CONFIG_IPIPE */
1289
#ifdef CONFIG_X86_64
1290
#define SWAPGS swapgs
1291
@@ -151,8 +266,10 @@ static inline unsigned long __raw_local_irq_save(void)
1292
#define raw_local_save_flags(flags) \
1293
do { (flags) = __raw_local_save_flags(); } while (0)
1295
-#define raw_local_irq_save(flags) \
1296
- do { (flags) = __raw_local_irq_save(); } while (0)
1297
+#define raw_local_irq_save(flags) do { \
1298
+ ipipe_check_context(ipipe_root_domain); \
1299
+ (flags) = __raw_local_irq_save(); \
1302
static inline int raw_irqs_disabled_flags(unsigned long flags)
1304
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
1305
index 8aeeb3f..dddfb84 100644
1306
--- a/arch/x86/include/asm/mmu_context.h
1307
+++ b/arch/x86/include/asm/mmu_context.h
1308
@@ -29,8 +29,11 @@ void destroy_context(struct mm_struct *mm);
1310
#define activate_mm(prev, next) \
1312
+ unsigned long flags; \
1313
paravirt_activate_mm((prev), (next)); \
1314
+ local_irq_save_hw_cond(flags); \
1315
switch_mm((prev), (next), NULL); \
1316
+ local_irq_restore_hw_cond(flags); \
1320
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
1321
index c45a0a5..f326e5b 100644
1322
--- a/arch/x86/include/asm/nmi.h
1323
+++ b/arch/x86/include/asm/nmi.h
1324
@@ -29,7 +29,7 @@ extern void setup_apic_nmi_watchdog(void *);
1325
extern void stop_apic_nmi_watchdog(void *);
1326
extern void disable_timer_nmi_watchdog(void);
1327
extern void enable_timer_nmi_watchdog(void);
1328
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
1329
+extern int (*nmi_watchdog_tick)(struct pt_regs *regs, unsigned reason);
1330
extern void cpu_nmi_set_wd_enabled(void);
1332
extern atomic_t nmi_active;
1333
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
1334
index 06d6f30..35a0e1a 100644
1335
--- a/arch/x86/include/asm/processor.h
1336
+++ b/arch/x86/include/asm/processor.h
1337
@@ -397,6 +397,7 @@ struct thread_struct {
1339
unsigned short fsindex;
1340
unsigned short gsindex;
1341
+ unsigned long rip;
1345
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
1346
index 2ed3f0f..4b84f80 100644
1347
--- a/arch/x86/include/asm/system.h
1348
+++ b/arch/x86/include/asm/system.h
1349
@@ -90,8 +90,10 @@ do { \
1350
#define switch_to(prev, next, last) \
1351
asm volatile(SAVE_CONTEXT \
1352
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
1353
+ "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */ \
1354
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
1355
- "call __switch_to\n\t" \
1356
+ "pushq %P[threadrip](%[next])\n\t" /* restore RIP */ \
1357
+ "jmp __switch_to\n\t" \
1358
".globl thread_return\n" \
1359
"thread_return:\n\t" \
1360
"movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
1361
@@ -103,6 +105,7 @@ do { \
1363
: [next] "S" (next), [prev] "D" (prev), \
1364
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
1365
+ [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \
1366
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
1367
[tif_fork] "i" (TIF_FORK), \
1368
[thread_info] "i" (offsetof(struct task_struct, stack)), \
1369
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
1370
index f2bba78..0c75007 100644
1371
--- a/arch/x86/include/asm/unistd_32.h
1372
+++ b/arch/x86/include/asm/unistd_32.h
1374
#define __NR_pipe2 331
1375
#define __NR_inotify_init1 332
1377
+#define NR_syscalls 333
1381
#define __ARCH_WANT_IPC_PARSE_VERSION
1382
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
1383
index d2e415e..03072c6 100644
1384
--- a/arch/x86/include/asm/unistd_64.h
1385
+++ b/arch/x86/include/asm/unistd_64.h
1386
@@ -654,6 +654,7 @@ __SYSCALL(__NR_pipe2, sys_pipe2)
1387
#define __NR_inotify_init1 294
1388
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
1390
+#define NR_syscalls 295
1393
#define __ARCH_WANT_OLD_READDIR
1394
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
1395
index b62a766..576e8a1 100644
1396
--- a/arch/x86/kernel/Makefile
1397
+++ b/arch/x86/kernel/Makefile
1398
@@ -79,6 +79,8 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
1399
obj-$(CONFIG_KGDB) += kgdb.o
1400
obj-$(CONFIG_VM86) += vm86_32.o
1401
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
1402
+obj-$(CONFIG_IPIPE) += ipipe.o
1403
+obj-$(CONFIG_IPIPE_TRACE_MCOUNT) += mcount_$(BITS).o
1405
obj-$(CONFIG_HPET_TIMER) += hpet.o
1407
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
1408
index 3eae1df..2587543 100644
1409
--- a/arch/x86/kernel/apic.c
1410
+++ b/arch/x86/kernel/apic.c
1411
@@ -428,7 +428,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
1412
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
1415
- local_irq_save(flags);
1416
+ local_irq_save_hw(flags);
1419
case CLOCK_EVT_MODE_PERIODIC:
1420
@@ -447,7 +447,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
1424
- local_irq_restore(flags);
1425
+ local_irq_restore_hw(flags);
1429
@@ -945,7 +945,7 @@ void lapic_shutdown(void)
1433
- local_irq_save(flags);
1434
+ local_irq_save_hw(flags);
1436
#ifdef CONFIG_X86_32
1437
if (!enabled_via_apicbase)
1438
@@ -955,7 +955,7 @@ void lapic_shutdown(void)
1439
disable_local_APIC();
1442
- local_irq_restore(flags);
1443
+ local_irq_restore_hw(flags);
1447
@@ -1129,6 +1129,10 @@ static void __cpuinit lapic_setup_esr(void)
1451
+int __ipipe_check_lapic(void)
1453
+ return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY);
1457
* setup_local_APIC - setup the local APIC
1458
@@ -1187,7 +1191,7 @@ void __cpuinit setup_local_APIC(void)
1459
value = apic_read(APIC_ISR + i*0x10);
1460
for (j = 31; j >= 0; j--) {
1467
@@ -1693,7 +1697,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1469
v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1470
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1474
#ifdef CONFIG_X86_64
1475
add_pda(irq_spurious_count, 1);
1476
@@ -1970,9 +1974,9 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1477
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1480
- local_irq_save(flags);
1481
+ local_irq_save_hw(flags);
1482
disable_local_APIC();
1483
- local_irq_restore(flags);
1484
+ local_irq_restore_hw(flags);
1488
@@ -1987,7 +1991,7 @@ static int lapic_resume(struct sys_device *dev)
1490
maxlvt = lapic_get_maxlvt();
1492
- local_irq_save(flags);
1493
+ local_irq_save_hw(flags);
1497
@@ -2030,7 +2034,7 @@ static int lapic_resume(struct sys_device *dev)
1498
apic_write(APIC_ESR, 0);
1499
apic_read(APIC_ESR);
1501
- local_irq_restore(flags);
1502
+ local_irq_restore_hw(flags);
1506
@@ -2236,3 +2240,4 @@ static int __init lapic_insert_resource(void)
1507
* that is using request_resource
1509
late_initcall(lapic_insert_resource);
1511
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1512
index b9c9ea0..fda09a0 100644
1513
--- a/arch/x86/kernel/cpu/common.c
1514
+++ b/arch/x86/kernel/cpu/common.c
1515
@@ -864,7 +864,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
1517
char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
1519
-void __cpuinit pda_init(int cpu)
1520
+void notrace __cpuinit pda_init(int cpu)
1522
struct x8664_pda *pda = cpu_pda(cpu);
1524
@@ -956,7 +956,7 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
1525
* A lot of state is already set up in PDA init for 64 bit
1527
#ifdef CONFIG_X86_64
1528
-void __cpuinit cpu_init(void)
1529
+void notrace __cpuinit cpu_init(void)
1531
int cpu = stack_smp_processor_id();
1532
struct tss_struct *t = &per_cpu(init_tss, cpu);
1533
@@ -1076,7 +1076,7 @@ void __cpuinit cpu_init(void)
1537
-void __cpuinit cpu_init(void)
1538
+void notrace __cpuinit cpu_init(void)
1540
int cpu = smp_processor_id();
1541
struct task_struct *curr = current;
1542
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
1543
index ff14c32..e30e25e 100644
1544
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
1545
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
1546
@@ -170,6 +170,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1547
unsigned long size, mtrr_type type)
1549
unsigned char arr, arr_type, arr_size;
1550
+ unsigned long flags;
1552
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
1554
@@ -212,6 +213,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1558
+ local_irq_save_hw(flags);
1562
base <<= PAGE_SHIFT;
1563
@@ -221,6 +224,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
1564
setCx86(CX86_RCR_BASE + reg, arr_type);
1568
+ local_irq_restore_hw(flags);
1572
@@ -238,8 +243,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 };
1574
static void cyrix_set_all(void)
1576
+ unsigned long flags;
1579
+ local_irq_save_hw(flags);
1582
/* the CCRs are not contiguous */
1583
@@ -252,6 +259,7 @@ static void cyrix_set_all(void)
1584
arr_state[i].size, arr_state[i].type);
1587
+ local_irq_restore_hw(flags);
1590
static struct mtrr_ops cyrix_mtrr_ops = {
1591
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
1592
index 4e8d77f..cd4cc43 100644
1593
--- a/arch/x86/kernel/cpu/mtrr/generic.c
1594
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
1595
@@ -552,7 +552,7 @@ static void generic_set_all(void)
1596
unsigned long mask, count;
1597
unsigned long flags;
1599
- local_irq_save(flags);
1600
+ local_irq_save_hw(flags);
1603
/* Actually set the state */
1604
@@ -562,7 +562,7 @@ static void generic_set_all(void)
1608
- local_irq_restore(flags);
1609
+ local_irq_restore_hw(flags);
1611
/* Use the atomic bitops to update the global mask */
1612
for (count = 0; count < sizeof mask * 8; ++count) {
1613
@@ -583,12 +583,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
1617
- unsigned long flags;
1618
+ unsigned long flags, _flags;
1619
struct mtrr_var_range *vr;
1621
vr = &mtrr_state.var_ranges[reg];
1623
- local_irq_save(flags);
1624
+ local_irq_save_full(flags, _flags);
1628
@@ -607,7 +607,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
1632
- local_irq_restore(flags);
1633
+ local_irq_restore_full(flags, _flags);
1636
int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
1637
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
1638
index b361475..7f24b7f 100644
1639
--- a/arch/x86/kernel/dumpstack_32.c
1640
+++ b/arch/x86/kernel/dumpstack_32.c
1641
@@ -234,6 +234,9 @@ void show_registers(struct pt_regs *regs)
1642
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
1643
TASK_COMM_LEN, current->comm, task_pid_nr(current),
1644
current_thread_info(), current, task_thread_info(current));
1645
+#ifdef CONFIG_IPIPE
1646
+ printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name);
1647
+#endif /* CONFIG_IPIPE */
1649
* When in-kernel, we also print out the stack and code at the
1650
* time of the fault..
1651
@@ -419,6 +422,8 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
1655
+EXPORT_SYMBOL_GPL(die_nmi);
1657
static int __init oops_setup(char *s)
1660
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
1661
index 96a5db7..da14b7a 100644
1662
--- a/arch/x86/kernel/dumpstack_64.c
1663
+++ b/arch/x86/kernel/dumpstack_64.c
1664
@@ -379,6 +379,11 @@ void show_registers(struct pt_regs *regs)
1666
printk("CPU %d ", cpu);
1667
__show_regs(regs, 1);
1668
+#ifdef CONFIG_IPIPE
1669
+ if (ipipe_current_domain != ipipe_root_domain)
1670
+ printk("I-pipe domain %s\n", ipipe_current_domain->name);
1672
+#endif /* CONFIG_IPIPE */
1673
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
1674
cur->comm, cur->pid, task_thread_info(cur), cur);
1676
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
1677
index 28b597e..fc9b2c6 100644
1678
--- a/arch/x86/kernel/entry_32.S
1679
+++ b/arch/x86/kernel/entry_32.S
1681
#include <linux/linkage.h>
1682
#include <asm/thread_info.h>
1683
#include <asm/irqflags.h>
1684
+#include <asm/ipipe_base.h>
1685
#include <asm/errno.h>
1686
#include <asm/segment.h>
1687
#include <asm/smp.h>
1690
#define nr_syscalls ((syscall_table_size)/4)
1692
+#ifdef CONFIG_IPIPE
1693
+#define EMULATE_ROOT_IRET(bypass) \
1694
+ call __ipipe_unstall_iret_root ; \
1697
+ movl PT_EAX(%esp),%eax
1698
+#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,%eax
1699
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \
1700
+ movl %esp,%eax ; \
1701
+ call __ipipe_syscall_root ; \
1702
+ testl %eax,%eax ; \
1705
+ movl PT_ORIG_EAX(%esp),%eax
1706
+#define PUSH_XCODE(v) pushl $ ex_ ## v
1707
+#define PUSH_XVEC(v) pushl $ ex_ ## v
1708
+#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \
1709
+ call __ipipe_handle_exception ; \
1710
+ testl %eax,%eax ; \
1711
+ jnz restore_nocheck_notrace
1712
+#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \
1713
+ movl %ecx, %ds ; \
1714
+ movl %ecx, %es ; \
1715
+ movl %esp, %eax ; \
1716
+ movl $ex_ ## code,%edx ; \
1717
+ call __ipipe_divert_exception ; \
1718
+ testl %eax,%eax ; \
1719
+ jnz restore_nocheck_notrace
1721
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
1722
+# define IPIPE_TRACE_IRQ_ENTER \
1723
+ lea PT_EIP-4(%esp), %ebp; \
1724
+ movl PT_ORIG_EAX(%esp), %eax; \
1725
+ call ipipe_trace_begin
1726
+# define IPIPE_TRACE_IRQ_EXIT \
1728
+ movl PT_ORIG_EAX+4(%esp), %eax; \
1729
+ call ipipe_trace_end; \
1731
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
1732
+#define IPIPE_TRACE_IRQ_ENTER
1733
+#define IPIPE_TRACE_IRQ_EXIT
1734
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
1735
+#else /* !CONFIG_IPIPE */
1736
+#define EMULATE_ROOT_IRET(bypass)
1737
+#define TEST_PREEMPTIBLE(regs) testl $X86_EFLAGS_IF,PT_EFLAGS(regs)
1738
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2)
1739
+#define PUSH_XCODE(v) pushl $v
1740
+#define PUSH_XVEC(v) pushl v
1741
+#define HANDLE_EXCEPTION(code) call *%code
1742
+#define DIVERT_EXCEPTION(code)
1743
+#endif /* CONFIG_IPIPE */
1745
#ifdef CONFIG_PREEMPT
1746
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
1749
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
1751
ENTRY(ret_from_fork)
1752
+ ENABLE_INTERRUPTS_HW_COND
1755
CFI_ADJUST_CFA_OFFSET 4
1756
@@ -245,7 +300,7 @@ END(ret_from_fork)
1759
preempt_stop(CLBR_ANY)
1761
+ENTRY(ret_from_intr)
1762
GET_THREAD_INFO(%ebp)
1764
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
1765
@@ -269,14 +324,14 @@ END(ret_from_exception)
1767
#ifdef CONFIG_PREEMPT
1768
ENTRY(resume_kernel)
1769
- DISABLE_INTERRUPTS(CLBR_ANY)
1770
+ DISABLE_INTERRUPTS_HW(CLBR_ANY)
1771
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
1774
movl TI_flags(%ebp), %ecx # need_resched set ?
1775
testb $_TIF_NEED_RESCHED, %cl
1777
- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
1778
+ TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ?
1780
call preempt_schedule_irq
1782
@@ -324,7 +379,7 @@ sysenter_past_esp:
1784
CFI_ADJUST_CFA_OFFSET 4
1786
- ENABLE_INTERRUPTS(CLBR_NONE)
1787
+ ENABLE_INTERRUPTS_HW(CLBR_NONE)
1790
* Load the potential sixth argument from user stack.
1791
@@ -340,6 +395,7 @@ sysenter_past_esp:
1794
GET_THREAD_INFO(%ebp)
1795
+ CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_out)
1797
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
1798
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
1799
@@ -349,6 +405,7 @@ sysenter_do_call:
1801
call *sys_call_table(,%eax,4)
1802
movl %eax,PT_EAX(%esp)
1805
DISABLE_INTERRUPTS(CLBR_ANY)
1807
@@ -357,10 +414,13 @@ sysenter_do_call:
1810
/* if something modifies registers it must also disable sysexit */
1811
+ EMULATE_ROOT_IRET(sysenter_out)
1812
movl PT_EIP(%esp), %edx
1813
movl PT_OLDESP(%esp), %ecx
1816
+#ifndef CONFIG_IPIPE
1819
1: mov PT_FS(%esp), %fs
1820
ENABLE_INTERRUPTS_SYSEXIT
1822
@@ -419,6 +479,7 @@ ENTRY(system_call)
1823
CFI_ADJUST_CFA_OFFSET 4
1825
GET_THREAD_INFO(%ebp)
1826
+ CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck_notrace)
1827
# system call tracing in operation / emulation
1828
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
1829
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
1830
@@ -450,7 +511,11 @@ restore_all:
1832
je ldt_ss # returning to user-space with LDT SS
1834
+#ifdef CONFIG_IPIPE
1835
+ call __ipipe_unstall_iret_root
1836
+#else /* !CONFIG_IPIPE */
1838
+#endif /* CONFIG_IPIPE */
1839
restore_nocheck_notrace:
1841
addl $4, %esp # skip orig_eax/error_code
1842
@@ -460,7 +525,7 @@ irq_return:
1843
.section .fixup,"ax"
1845
pushl $0 # no error code
1846
- pushl $do_iret_error
1847
+ PUSH_XCODE(do_iret_error)
1850
.section __ex_table,"a"
1851
@@ -501,7 +566,7 @@ ldt_ss:
1852
CFI_ADJUST_CFA_OFFSET 4
1854
CFI_ADJUST_CFA_OFFSET 4
1855
- DISABLE_INTERRUPTS(CLBR_EAX)
1856
+ DISABLE_INTERRUPTS_HW(CLBR_EAX)
1859
CFI_ADJUST_CFA_OFFSET -8
1860
@@ -516,6 +581,7 @@ work_pending:
1861
testb $_TIF_NEED_RESCHED, %cl
1864
+ ENABLE_INTERRUPTS_HW_COND
1867
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
1868
@@ -648,6 +714,49 @@ END(irq_entries_start)
1872
+#ifdef CONFIG_IPIPE
1876
+ IPIPE_TRACE_IRQ_ENTER
1878
+ call __ipipe_handle_irq
1879
+ IPIPE_TRACE_IRQ_EXIT
1887
+#define BUILD_INTERRUPT(name, nr) \
1889
+ RING0_INT_FRAME; \
1891
+ CFI_ADJUST_CFA_OFFSET 4; \
1893
+ IPIPE_TRACE_IRQ_ENTER; \
1894
+ movl %esp, %eax; \
1895
+ call __ipipe_handle_irq; \
1896
+ IPIPE_TRACE_IRQ_EXIT; \
1897
+ testl %eax,%eax; \
1898
+ jnz ret_from_intr; \
1904
+#ifdef CONFIG_X86_LOCAL_APIC
1905
+ BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0)
1906
+ BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1)
1907
+ BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2)
1908
+ BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3)
1910
+ BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR)
1914
+#else /* !CONFIG_IPIPE */
1916
* the CPU automatically disables interrupts when executing an IRQ vector,
1917
* so IRQ-flags tracing has to follow that:
1918
@@ -675,12 +784,14 @@ ENTRY(name) \
1922
+#endif /* !CONFIG_IPIPE */
1924
/* The include is where all of the SMP etc. interrupts come from */
1925
#include "entry_arch.h"
1927
KPROBE_ENTRY(page_fault)
1929
- pushl $do_page_fault
1930
+ PUSH_XCODE(do_page_fault)
1931
CFI_ADJUST_CFA_OFFSET 4
1934
@@ -732,7 +843,7 @@ error_code:
1937
movl %esp,%eax # pt_regs pointer
1939
+ HANDLE_EXCEPTION(edi)
1940
jmp ret_from_exception
1942
KPROBE_END(page_fault)
1943
@@ -741,7 +852,7 @@ ENTRY(coprocessor_error)
1946
CFI_ADJUST_CFA_OFFSET 4
1947
- pushl $do_coprocessor_error
1948
+ PUSH_XCODE(do_coprocessor_error)
1949
CFI_ADJUST_CFA_OFFSET 4
1952
@@ -751,7 +862,7 @@ ENTRY(simd_coprocessor_error)
1955
CFI_ADJUST_CFA_OFFSET 4
1956
- pushl $do_simd_coprocessor_error
1957
+ PUSH_XCODE(do_simd_coprocessor_error)
1958
CFI_ADJUST_CFA_OFFSET 4
1961
@@ -761,7 +872,7 @@ ENTRY(device_not_available)
1963
pushl $-1 # mark this as an int
1964
CFI_ADJUST_CFA_OFFSET 4
1965
- pushl $do_device_not_available
1966
+ PUSH_XCODE(do_device_not_available)
1967
CFI_ADJUST_CFA_OFFSET 4
1970
@@ -805,6 +916,7 @@ debug_stack_correct:
1971
CFI_ADJUST_CFA_OFFSET 4
1974
+ DIVERT_EXCEPTION(do_debug)
1975
xorl %edx,%edx # error code 0
1976
movl %esp,%eax # pt_regs pointer
1978
@@ -922,6 +1034,7 @@ KPROBE_ENTRY(int3)
1979
CFI_ADJUST_CFA_OFFSET 4
1982
+ DIVERT_EXCEPTION(do_int3)
1983
xorl %edx,%edx # zero error code
1984
movl %esp,%eax # pt_regs pointer
1986
@@ -933,7 +1046,7 @@ ENTRY(overflow)
1989
CFI_ADJUST_CFA_OFFSET 4
1990
- pushl $do_overflow
1991
+ PUSH_XCODE(do_overflow)
1992
CFI_ADJUST_CFA_OFFSET 4
1995
@@ -943,7 +1056,7 @@ ENTRY(bounds)
1998
CFI_ADJUST_CFA_OFFSET 4
2000
+ PUSH_XCODE(do_bounds)
2001
CFI_ADJUST_CFA_OFFSET 4
2004
@@ -953,7 +1066,7 @@ ENTRY(invalid_op)
2007
CFI_ADJUST_CFA_OFFSET 4
2008
- pushl $do_invalid_op
2009
+ PUSH_XCODE(do_invalid_op)
2010
CFI_ADJUST_CFA_OFFSET 4
2013
@@ -963,7 +1076,7 @@ ENTRY(coprocessor_segment_overrun)
2016
CFI_ADJUST_CFA_OFFSET 4
2017
- pushl $do_coprocessor_segment_overrun
2018
+ PUSH_XCODE(do_coprocessor_segment_overrun)
2019
CFI_ADJUST_CFA_OFFSET 4
2022
@@ -971,7 +1084,7 @@ END(coprocessor_segment_overrun)
2026
- pushl $do_invalid_TSS
2027
+ PUSH_XCODE(do_invalid_TSS)
2028
CFI_ADJUST_CFA_OFFSET 4
2031
@@ -979,7 +1092,7 @@ END(invalid_TSS)
2033
ENTRY(segment_not_present)
2035
- pushl $do_segment_not_present
2036
+ PUSH_XCODE(do_segment_not_present)
2037
CFI_ADJUST_CFA_OFFSET 4
2040
@@ -987,7 +1100,7 @@ END(segment_not_present)
2042
ENTRY(stack_segment)
2044
- pushl $do_stack_segment
2045
+ PUSH_XCODE(do_stack_segment)
2046
CFI_ADJUST_CFA_OFFSET 4
2049
@@ -995,7 +1108,7 @@ END(stack_segment)
2051
KPROBE_ENTRY(general_protection)
2053
- pushl $do_general_protection
2054
+ PUSH_XCODE(do_general_protection)
2055
CFI_ADJUST_CFA_OFFSET 4
2058
@@ -1003,7 +1116,7 @@ KPROBE_END(general_protection)
2060
ENTRY(alignment_check)
2062
- pushl $do_alignment_check
2063
+ PUSH_XCODE(do_alignment_check)
2064
CFI_ADJUST_CFA_OFFSET 4
2067
@@ -1013,7 +1126,7 @@ ENTRY(divide_error)
2069
pushl $0 # no error code
2070
CFI_ADJUST_CFA_OFFSET 4
2071
- pushl $do_divide_error
2072
+ PUSH_XCODE(do_divide_error)
2073
CFI_ADJUST_CFA_OFFSET 4
2076
@@ -1024,7 +1137,7 @@ ENTRY(machine_check)
2079
CFI_ADJUST_CFA_OFFSET 4
2080
- pushl machine_check_vector
2081
+ PUSH_XVEC(machine_check_vector)
2082
CFI_ADJUST_CFA_OFFSET 4
2085
@@ -1035,7 +1148,7 @@ ENTRY(spurious_interrupt_bug)
2088
CFI_ADJUST_CFA_OFFSET 4
2089
- pushl $do_spurious_interrupt_bug
2090
+ PUSH_XCODE(do_spurious_interrupt_bug)
2091
CFI_ADJUST_CFA_OFFSET 4
2094
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
2095
index b86f332..d59b479 100644
2096
--- a/arch/x86/kernel/entry_64.S
2097
+++ b/arch/x86/kernel/entry_64.S
2099
#include <asm/irqflags.h>
2100
#include <asm/paravirt.h>
2101
#include <asm/ftrace.h>
2102
+#include <asm/ipipe_base.h>
2104
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
2105
#include <linux/elf-em.h>
2110
+#ifdef CONFIG_IPIPE
2111
+#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq
2112
+#else /* !CONFIG_IPIPE */
2113
+#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq
2114
+#endif /* !CONFIG_IPIPE */
2117
#ifdef CONFIG_FUNCTION_TRACER
2118
#ifdef CONFIG_DYNAMIC_FTRACE
2120
@@ -252,6 +260,7 @@ ENTRY(ret_from_fork)
2121
CFI_ADJUST_CFA_OFFSET 8
2122
popf # reset kernel eflags
2123
CFI_ADJUST_CFA_OFFSET -8
2124
+ ENABLE_INTERRUPTS_HW_COND
2126
GET_THREAD_INFO(%rcx)
2127
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
2128
@@ -328,6 +337,17 @@ ENTRY(system_call_after_swapgs)
2129
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
2130
movq %rcx,RIP-ARGOFFSET(%rsp)
2131
CFI_REL_OFFSET rip,RIP-ARGOFFSET
2132
+#ifdef CONFIG_IPIPE
2135
+ leaq -(ARGOFFSET-16)(%rsp),%rdi # regs for handler
2136
+ call __ipipe_syscall_root_thunk
2140
+ js ret_from_sys_call
2141
+ jnz sysret_fastexit
2143
GET_THREAD_INFO(%rcx)
2144
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
2146
@@ -357,6 +377,7 @@ sysret_check:
2147
* sysretq will re-enable interrupts:
2151
movq RIP-ARGOFFSET(%rsp),%rcx
2152
CFI_REGISTER rip,rcx
2153
RESTORE_ARGS 0,-ARG_SKIP,1
2154
@@ -368,6 +389,8 @@ sysret_check:
2155
/* Handle reschedules */
2156
/* edx: work, edi: workmask */
2158
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx
2159
+ jnz ret_from_sys_call_trace
2160
bt $TIF_NEED_RESCHED,%edx
2163
@@ -379,6 +402,16 @@ sysret_careful:
2164
CFI_ADJUST_CFA_OFFSET -8
2167
+ret_from_sys_call_trace:
2171
+ FIXUP_TOP_OF_STACK %rdi
2173
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
2175
+ jmp int_ret_from_sys_call
2177
/* Handle a signal */
2180
@@ -667,12 +700,51 @@ END(stub_rt_sigreturn)
2181
* We entered an interrupt context - irqs are off:
2184
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
2185
+ leaq RIP-8(%rdi), %rbp # make interrupted address show up in trace
2187
+ movq ORIG_RAX(%rdi), %rdi # IRQ number
2188
+ notq %rdi # ...is inverted, fix up
2189
+ call ipipe_trace_begin
2199
+ movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi
2200
+ leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp
2202
+ call ipipe_trace_end
2211
ENTRY(common_interrupt)
2212
+#ifdef CONFIG_IPIPE
2214
+ interrupt __ipipe_handle_irq
2217
+ decl %gs:pda_irqcount
2219
+ CFI_DEF_CFA_REGISTER rsp
2220
+ CFI_ADJUST_CFA_OFFSET -8
2221
+ testl $3,CS-ARGOFFSET(%rsp)
2225
+#else /* !CONFIG_IPIPE */
2228
+#endif /* !CONFIG_IPIPE */
2229
/* 0(%rsp): oldrsp-ARGOFFSET */
2231
DISABLE_INTERRUPTS(CLBR_NONE)
2232
@@ -681,7 +753,7 @@ ret_from_intr:
2234
CFI_DEF_CFA_REGISTER rsp
2235
CFI_ADJUST_CFA_OFFSET -8
2238
GET_THREAD_INFO(%rcx)
2239
testl $3,CS-ARGOFFSET(%rsp)
2241
@@ -701,20 +773,20 @@ retint_check:
2244
retint_swapgs: /* return to user-space */
2247
* The iretq could re-enable interrupts:
2249
- DISABLE_INTERRUPTS(CLBR_ANY)
2251
+retint_swapgs_notrace:
2256
retint_restore_args: /* return to kernel space */
2257
- DISABLE_INTERRUPTS(CLBR_ANY)
2260
* The iretq could re-enable interrupts:
2266
@@ -796,7 +868,7 @@ ENTRY(retint_kernel)
2267
jnc retint_restore_args
2268
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
2269
jnc retint_restore_args
2270
- call preempt_schedule_irq
2271
+ PREEMPT_SCHEDULE_IRQ
2275
@@ -805,7 +877,26 @@ END(common_interrupt)
2281
+#ifdef CONFIG_IPIPE
2282
+ .macro apicinterrupt num,func
2285
+ CFI_ADJUST_CFA_OFFSET 8
2286
+ interrupt __ipipe_handle_irq
2289
+ decl %gs:pda_irqcount
2291
+ CFI_DEF_CFA_REGISTER rsp
2292
+ CFI_ADJUST_CFA_OFFSET -8
2293
+ testl $3,CS-ARGOFFSET(%rsp)
2299
+#else /* !CONFIG_IPIPE */
2300
.macro apicinterrupt num,func
2303
@@ -814,6 +905,7 @@ END(common_interrupt)
2307
+#endif /* !CONFIG_IPIPE */
2309
ENTRY(thermal_interrupt)
2310
apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
2311
@@ -838,10 +930,12 @@ END(invalidate_interrupt\num)
2315
+#ifndef CONFIG_IPIPE
2320
+#endif /* !CONFIG_IPIPE */
2322
ENTRY(call_function_interrupt)
2323
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
2324
@@ -873,7 +967,7 @@ END(spurious_interrupt)
2326
* Exception entry points.
2328
- .macro zeroentry sym
2329
+ .macro zeroentry sym, ex_code
2331
PARAVIRT_ADJUST_EXCEPTION_FRAME
2332
pushq $0 /* push error code/oldrax */
2333
@@ -881,25 +975,33 @@ END(spurious_interrupt)
2334
pushq %rax /* push real oldrax to the rdi slot */
2335
CFI_ADJUST_CFA_OFFSET 8
2336
CFI_REL_OFFSET rax,0
2337
+#ifdef CONFIG_IPIPE
2338
+ movq $\ex_code,%rax
2340
leaq \sym(%rip),%rax
2346
- .macro errorentry sym
2347
+ .macro errorentry sym, ex_code
2349
PARAVIRT_ADJUST_EXCEPTION_FRAME
2351
CFI_ADJUST_CFA_OFFSET 8
2352
CFI_REL_OFFSET rax,0
2353
+#ifdef CONFIG_IPIPE
2354
+ movq $\ex_code,%rax
2356
leaq \sym(%rip),%rax
2362
/* error code is on the stack already */
2363
/* handle NMI like exceptions that can happen everywhere */
2364
- .macro paranoidentry sym, ist=0, irqtrace=1
2365
+ .macro paranoidentry sym, ist=0, irqtrace=1, ex_code=0
2369
@@ -922,6 +1024,16 @@ END(spurious_interrupt)
2371
subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
2373
+#ifdef CONFIG_IPIPE
2375
+ movq $\ex_code,%rsi
2376
+ call __ipipe_divert_exception /* handle(regs, ex_code) */
2380
+ movq ORIG_RAX(%rsp),%rsi
2385
addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
2386
@@ -930,6 +1042,12 @@ END(spurious_interrupt)
2390
+#ifdef CONFIG_IPIPE
2392
+ /* WARNING: paranoidexit will check %eax to decide whether the
2393
+ exception has to be propagated or not. */
2399
@@ -949,6 +1067,10 @@ END(spurious_interrupt)
2400
paranoid_exit\trace:
2401
testl %ebx,%ebx /* swapgs needed? */
2402
jnz paranoid_restore\trace
2403
+#ifdef CONFIG_IPIPE
2405
+ jnz paranoid_swapgs\trace
2408
jnz paranoid_userspace\trace
2409
paranoid_swapgs\trace:
2410
@@ -1049,7 +1171,19 @@ error_sti:
2412
movq ORIG_RAX(%rsp),%rsi /* get error code */
2413
movq $-1,ORIG_RAX(%rsp)
2414
+#ifdef CONFIG_IPIPE
2416
+ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */
2422
+ jne retint_noswapgs
2423
+ jmp retint_swapgs_notrace
2424
+#else /* !CONFIG_IPIPE */
2426
+#endif /* !CONFIG_IPIPE */
2427
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
2430
@@ -1205,19 +1339,19 @@ ENTRY(kernel_execve)
2431
ENDPROC(kernel_execve)
2433
KPROBE_ENTRY(page_fault)
2434
- errorentry do_page_fault
2435
+ errorentry do_page_fault ex_do_page_fault
2436
KPROBE_END(page_fault)
2438
ENTRY(coprocessor_error)
2439
- zeroentry do_coprocessor_error
2440
+ zeroentry do_coprocessor_error ex_do_coprocessor_error
2441
END(coprocessor_error)
2443
ENTRY(simd_coprocessor_error)
2444
- zeroentry do_simd_coprocessor_error
2445
+ zeroentry do_simd_coprocessor_error ex_do_simd_coprocessor_error
2446
END(simd_coprocessor_error)
2448
ENTRY(device_not_available)
2449
- zeroentry do_device_not_available
2450
+ zeroentry do_device_not_available ex_do_device_not_available
2451
END(device_not_available)
2453
/* runs on exception stack */
2454
@@ -1226,7 +1360,7 @@ KPROBE_ENTRY(debug)
2455
PARAVIRT_ADJUST_EXCEPTION_FRAME
2457
CFI_ADJUST_CFA_OFFSET 8
2458
- paranoidentry do_debug, DEBUG_STACK
2459
+ paranoidentry do_debug, DEBUG_STACK, 1, ex_do_debug
2463
@@ -1250,25 +1384,25 @@ KPROBE_ENTRY(int3)
2464
PARAVIRT_ADJUST_EXCEPTION_FRAME
2466
CFI_ADJUST_CFA_OFFSET 8
2467
- paranoidentry do_int3, DEBUG_STACK
2468
+ paranoidentry do_int3, DEBUG_STACK, 1, ex_do_int3
2474
- zeroentry do_overflow
2475
+ zeroentry do_overflow ex_do_overflow
2479
- zeroentry do_bounds
2480
+ zeroentry do_bounds ex_do_bounds
2484
- zeroentry do_invalid_op
2485
+ zeroentry do_invalid_op ex_do_invalid_op
2488
ENTRY(coprocessor_segment_overrun)
2489
- zeroentry do_coprocessor_segment_overrun
2490
+ zeroentry do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun
2491
END(coprocessor_segment_overrun)
2493
/* runs on exception stack */
2494
@@ -1281,11 +1415,11 @@ ENTRY(double_fault)
2498
- errorentry do_invalid_TSS
2499
+ errorentry do_invalid_TSS ex_do_invalid_TSS
2502
ENTRY(segment_not_present)
2503
- errorentry do_segment_not_present
2504
+ errorentry do_segment_not_present ex_do_segment_not_present
2505
END(segment_not_present)
2507
/* runs on exception stack */
2508
@@ -1298,19 +1432,19 @@ ENTRY(stack_segment)
2511
KPROBE_ENTRY(general_protection)
2512
- errorentry do_general_protection
2513
+ errorentry do_general_protection ex_do_general_protection
2514
KPROBE_END(general_protection)
2516
ENTRY(alignment_check)
2517
- errorentry do_alignment_check
2518
+ errorentry do_alignment_check ex_do_alignment_check
2519
END(alignment_check)
2522
- zeroentry do_divide_error
2523
+ zeroentry do_divide_error ex_do_divide_error
2526
ENTRY(spurious_interrupt_bug)
2527
- zeroentry do_spurious_interrupt_bug
2528
+ zeroentry do_spurious_interrupt_bug ex_do_spurious_interrupt_bug
2529
END(spurious_interrupt_bug)
2531
#ifdef CONFIG_X86_MCE
2532
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
2533
index c026279..8cfe478 100644
2534
--- a/arch/x86/kernel/genapic_flat_64.c
2535
+++ b/arch/x86/kernel/genapic_flat_64.c
2536
@@ -74,9 +74,9 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
2537
unsigned long mask = cpus_addr(cpumask)[0];
2538
unsigned long flags;
2540
- local_irq_save(flags);
2541
+ local_irq_save_hw(flags);
2542
__send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
2543
- local_irq_restore(flags);
2544
+ local_irq_restore_hw(flags);
2547
static void flat_send_IPI_allbutself(int vector)
2548
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
2549
index 9461dc3..6421209 100644
2550
--- a/arch/x86/kernel/head64.c
2551
+++ b/arch/x86/kernel/head64.c
2552
@@ -72,7 +72,7 @@ static void __init copy_bootdata(char *real_mode_data)
2556
-void __init x86_64_start_kernel(char * real_mode_data)
2557
+void __init notrace x86_64_start_kernel(char * real_mode_data)
2561
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
2562
index c1b5e3e..810be3e 100644
2563
--- a/arch/x86/kernel/i8253.c
2564
+++ b/arch/x86/kernel/i8253.c
2566
#include <linux/jiffies.h>
2567
#include <linux/module.h>
2568
#include <linux/spinlock.h>
2569
+#include <linux/ipipe.h>
2571
#include <asm/smp.h>
2572
#include <asm/delay.h>
2573
@@ -139,6 +140,12 @@ static cycle_t pit_read(void)
2574
static int old_count;
2575
static u32 old_jifs;
2577
+#ifdef CONFIG_IPIPE
2578
+ if (!__ipipe_pipeline_head_p(ipipe_root_domain))
2579
+ /* We don't really own the PIT. */
2580
+ return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count;
2581
+#endif /* CONFIG_IPIPE */
2583
spin_lock_irqsave(&i8253_lock, flags);
2585
* Although our caller may have the read side of xtime_lock,
2586
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
2587
index 4b8a53d..e5c525d 100644
2588
--- a/arch/x86/kernel/i8259.c
2589
+++ b/arch/x86/kernel/i8259.c
2593
static int i8259A_auto_eoi;
2594
-DEFINE_SPINLOCK(i8259A_lock);
2595
+IPIPE_DEFINE_SPINLOCK(i8259A_lock);
2596
static void mask_and_ack_8259A(unsigned int);
2598
struct irq_chip i8259A_chip = {
2599
@@ -70,6 +70,7 @@ void disable_8259A_irq(unsigned int irq)
2600
unsigned long flags;
2602
spin_lock_irqsave(&i8259A_lock, flags);
2603
+ ipipe_irq_lock(irq);
2604
cached_irq_mask |= mask;
2606
outb(cached_slave_mask, PIC_SLAVE_IMR);
2607
@@ -80,15 +81,18 @@ void disable_8259A_irq(unsigned int irq)
2609
void enable_8259A_irq(unsigned int irq)
2611
- unsigned int mask = ~(1 << irq);
2612
+ unsigned int mask = (1 << irq);
2613
unsigned long flags;
2615
spin_lock_irqsave(&i8259A_lock, flags);
2616
- cached_irq_mask &= mask;
2618
- outb(cached_slave_mask, PIC_SLAVE_IMR);
2620
- outb(cached_master_mask, PIC_MASTER_IMR);
2621
+ if (cached_irq_mask & mask) {
2622
+ cached_irq_mask &= ~mask;
2624
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
2626
+ outb(cached_master_mask, PIC_MASTER_IMR);
2627
+ ipipe_irq_unlock(irq);
2629
spin_unlock_irqrestore(&i8259A_lock, flags);
2632
@@ -169,6 +173,18 @@ static void mask_and_ack_8259A(unsigned int irq)
2634
if (cached_irq_mask & irqmask)
2635
goto spurious_8259A_irq;
2636
+#ifdef CONFIG_IPIPE
2639
+ * Fast timer ack -- don't mask (unless supposedly
2640
+ * spurious). We trace outb's in order to detect
2641
+ * broken hardware inducing large delays.
2643
+ outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */
2644
+ spin_unlock_irqrestore(&i8259A_lock, flags);
2647
+#endif /* CONFIG_IPIPE */
2648
cached_irq_mask |= irqmask;
2651
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
2652
index 9043251..0fa9b05 100644
2653
--- a/arch/x86/kernel/io_apic.c
2654
+++ b/arch/x86/kernel/io_apic.c
2657
int sis_apic_bug = -1;
2659
-static DEFINE_SPINLOCK(ioapic_lock);
2660
-static DEFINE_SPINLOCK(vector_lock);
2661
+static IPIPE_DEFINE_SPINLOCK(ioapic_lock);
2662
+static IPIPE_DEFINE_SPINLOCK(vector_lock);
2664
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
2665
+volatile unsigned long bugous_edge_irq_triggers[(NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG];
2669
* # of IRQ routing registers
2670
@@ -151,6 +155,14 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
2671
return irq_cfg(irq);
2674
+#ifdef CONFIG_IPIPE
2675
+unsigned __ipipe_get_ioapic_irq_vector(int irq)
2677
+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ?
2678
+ ipipe_apic_irq_vector(irq) : irq_cfg(irq)->vector;
2680
+#endif /* CONFIG_IPIPE */
2683
* Rough estimation of how many shared IRQs there are, can be changed
2685
@@ -520,6 +532,7 @@ static void mask_IO_APIC_irq (unsigned int irq)
2686
unsigned long flags;
2688
spin_lock_irqsave(&ioapic_lock, flags);
2689
+ ipipe_irq_lock(irq);
2690
__mask_IO_APIC_irq(irq);
2691
spin_unlock_irqrestore(&ioapic_lock, flags);
2693
@@ -529,7 +542,13 @@ static void unmask_IO_APIC_irq (unsigned int irq)
2694
unsigned long flags;
2696
spin_lock_irqsave(&ioapic_lock, flags);
2697
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32)
2698
+ if (test_and_clear_bit(irq, &bugous_edge_irq_triggers[0]))
2699
+ __unmask_and_level_IO_APIC_irq(irq);
2702
__unmask_IO_APIC_irq(irq);
2703
+ ipipe_irq_unlock(irq);
2704
spin_unlock_irqrestore(&ioapic_lock, flags);
2707
@@ -559,8 +578,10 @@ static void clear_IO_APIC (void)
2708
#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
2709
void send_IPI_self(int vector)
2711
+ unsigned long flags;
2714
+ local_irq_save_hw_cond(flags);
2718
@@ -570,6 +591,7 @@ void send_IPI_self(int vector)
2719
* Send the IPI. The write to APIC_ICR fires this off.
2721
apic_write(APIC_ICR, cfg);
2722
+ local_irq_restore_hw_cond(flags);
2724
#endif /* !CONFIG_SMP && CONFIG_X86_32*/
2726
@@ -2030,6 +2052,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2729
__unmask_IO_APIC_irq(irq);
2730
+ ipipe_irq_unlock(irq);
2731
spin_unlock_irqrestore(&ioapic_lock, flags);
2734
@@ -2284,11 +2307,37 @@ static void ack_x2apic_edge(unsigned int irq)
2738
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
2739
+static void move_apic_irq(unsigned int irq)
2741
+ struct irq_desc *desc = irq_to_desc(irq);
2743
+ if (desc->handle_irq == &handle_edge_irq) {
2744
+ spin_lock(&desc->lock);
2745
+ irq_complete_move(irq);
2746
+ move_native_irq(irq);
2747
+ spin_unlock(&desc->lock);
2748
+ } else if (desc->handle_irq == &handle_fasteoi_irq) {
2749
+ spin_lock(&desc->lock);
2750
+ irq_complete_move(irq);
2751
+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
2752
+ if (!io_apic_level_ack_pending(irq))
2753
+ move_masked_irq(irq);
2754
+ unmask_IO_APIC_irq(irq);
2756
+ spin_unlock(&desc->lock);
2760
+#endif /* CONFIG_IPIPE */
2762
static void ack_apic_edge(unsigned int irq)
2764
+#ifndef CONFIG_IPIPE
2765
irq_complete_move(irq);
2766
move_native_irq(irq);
2768
+#endif /* CONFIG_IPIPE */
2772
atomic_t irq_mis_count;
2773
@@ -2299,6 +2348,7 @@ static void ack_apic_level(unsigned int irq)
2777
+#ifndef CONFIG_IPIPE
2778
int do_unmask_irq = 0;
2780
irq_complete_move(irq);
2781
@@ -2373,7 +2423,6 @@ static void ack_apic_level(unsigned int irq)
2782
move_masked_irq(irq);
2783
unmask_IO_APIC_irq(irq);
2786
#ifdef CONFIG_X86_32
2787
if (!(v & (1 << (i & 0x1f)))) {
2788
atomic_inc(&irq_mis_count);
2789
@@ -2383,6 +2432,30 @@ static void ack_apic_level(unsigned int irq)
2790
spin_unlock(&ioapic_lock);
2793
+#else /* CONFIG_IPIPE */
2794
+#ifdef CONFIG_X86_32
2795
+ i = irq_cfg(irq)->vector;
2796
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2797
+ spin_lock(&ioapic_lock);
2798
+ if (unlikely(!(v & (1 << (i & 0x1f))))) {
2799
+ /* IO-APIC erratum: see comment below. */
2800
+ atomic_inc(&irq_mis_count);
2801
+ __mask_and_edge_IO_APIC_irq(irq);
2802
+ set_bit(irq, &bugous_edge_irq_triggers[0]);
2804
+ __mask_IO_APIC_irq(irq);
2805
+#else /* CONFIG_X86_64 */
2807
+ * Prevent low priority IRQs grabbed by high priority domains
2808
+ * from being delayed, waiting for a high priority interrupt
2809
+ * handler running in a low priority domain to complete.
2811
+ spin_lock(&ioapic_lock);
2812
+ __mask_IO_APIC_irq(irq);
2813
+#endif /* CONFIG_X86_64 */
2814
+ spin_unlock(&ioapic_lock);
2816
+#endif /* CONFIG_IPIPE */
2819
static struct irq_chip ioapic_chip __read_mostly = {
2820
@@ -2394,6 +2467,9 @@ static struct irq_chip ioapic_chip __read_mostly = {
2821
.eoi = ack_apic_level,
2823
.set_affinity = set_ioapic_affinity_irq,
2824
+#ifdef CONFIG_IPIPE
2825
+ .move = move_apic_irq,
2828
.retrigger = ioapic_retrigger_irq,
2830
@@ -2454,23 +2530,29 @@ static inline void init_IO_APIC_traps(void)
2832
static void mask_lapic_irq(unsigned int irq)
2835
+ unsigned long v, flags;
2837
+ local_irq_save_hw_cond(flags);
2838
+ ipipe_irq_lock(irq);
2839
v = apic_read(APIC_LVT0);
2840
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2841
+ local_irq_restore_hw_cond(flags);
2844
static void unmask_lapic_irq(unsigned int irq)
2847
+ unsigned long v, flags;
2849
+ local_irq_save_hw_cond(flags);
2850
v = apic_read(APIC_LVT0);
2851
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2852
+ ipipe_irq_unlock(irq);
2853
+ local_irq_restore_hw_cond(flags);
2856
static void ack_lapic_irq (unsigned int irq)
2862
static struct irq_chip lapic_chip __read_mostly = {
2863
@@ -2478,6 +2560,9 @@ static struct irq_chip lapic_chip __read_mostly = {
2864
.mask = mask_lapic_irq,
2865
.unmask = unmask_lapic_irq,
2866
.ack = ack_lapic_irq,
2867
+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP)
2868
+ .move = move_apic_irq,
2872
static void lapic_register_intr(int irq)
2873
@@ -2718,6 +2803,10 @@ static inline void __init check_timer(void)
2874
"...trying to set up timer as Virtual Wire IRQ...\n");
2876
lapic_register_intr(0);
2877
+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64)
2878
+ irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq;
2879
+ irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq;
2881
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
2882
enable_8259A_irq(0);
2884
@@ -3122,6 +3211,9 @@ static struct irq_chip msi_chip = {
2885
.ack = ack_apic_edge,
2887
.set_affinity = set_msi_irq_affinity,
2888
+#ifdef CONFIG_IPIPE
2889
+ .move = move_apic_irq,
2892
.retrigger = ioapic_retrigger_irq,
2894
@@ -3347,6 +3439,9 @@ struct irq_chip dmar_msi_type = {
2895
.ack = ack_apic_edge,
2897
.set_affinity = dmar_msi_set_affinity,
2898
+#ifdef CONFIG_IPIPE
2899
+ .move = move_apic_irq,
2902
.retrigger = ioapic_retrigger_irq,
2904
@@ -3482,6 +3577,9 @@ static struct irq_chip ht_irq_chip = {
2905
.ack = ack_apic_edge,
2907
.set_affinity = set_ht_irq_affinity,
2908
+#ifdef CONFIG_IPIPE
2909
+ .move = move_apic_irq,
2912
.retrigger = ioapic_retrigger_irq,
2914
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
2915
index f1c688e..484928c 100644
2916
--- a/arch/x86/kernel/ipi.c
2917
+++ b/arch/x86/kernel/ipi.c
2918
@@ -58,6 +58,9 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
2922
+ unsigned long flags;
2924
+ local_irq_save_hw_cond(flags);
2928
@@ -73,6 +76,8 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector)
2929
* Send the IPI. The write to APIC_ICR fires this off.
2931
apic_write(APIC_ICR, cfg);
2933
+ local_irq_restore_hw_cond(flags);
2936
void send_IPI_self(int vector)
2937
@@ -86,8 +91,9 @@ void send_IPI_self(int vector)
2939
static inline void __send_IPI_dest_field(unsigned long mask, int vector)
2941
- unsigned long cfg;
2942
+ unsigned long cfg, flags;
2944
+ local_irq_save_hw_cond(flags);
2948
@@ -111,6 +117,8 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
2949
* Send the IPI. The write to APIC_ICR fires this off.
2951
apic_write(APIC_ICR, cfg);
2953
+ local_irq_restore_hw_cond(flags);
2957
@@ -121,10 +129,10 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
2958
unsigned long mask = cpus_addr(cpumask)[0];
2959
unsigned long flags;
2961
- local_irq_save(flags);
2962
+ local_irq_save_hw(flags);
2963
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
2964
__send_IPI_dest_field(mask, vector);
2965
- local_irq_restore(flags);
2966
+ local_irq_restore_hw(flags);
2969
void send_IPI_mask_sequence(cpumask_t mask, int vector)
2970
@@ -138,14 +146,14 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
2971
* should be modified to do 1 message per cluster ID - mbligh
2974
- local_irq_save(flags);
2975
+ local_irq_save_hw(flags);
2976
for_each_possible_cpu(query_cpu) {
2977
if (cpu_isset(query_cpu, mask)) {
2978
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
2982
- local_irq_restore(flags);
2983
+ local_irq_restore_hw(flags);
2986
/* must come after the send_IPI functions above for inlining */
2987
diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
2988
new file mode 100644
2989
index 0000000..e2255b3
2991
+++ b/arch/x86/kernel/ipipe.c
2994
+ * linux/arch/x86/kernel/ipipe.c
2996
+ * Copyright (C) 2002-2007 Philippe Gerum.
2998
+ * This program is free software; you can redistribute it and/or modify
2999
+ * it under the terms of the GNU General Public License as published by
3000
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
3001
+ * USA; either version 2 of the License, or (at your option) any later
3004
+ * This program is distributed in the hope that it will be useful,
3005
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
3006
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3007
+ * GNU General Public License for more details.
3009
+ * You should have received a copy of the GNU General Public License
3010
+ * along with this program; if not, write to the Free Software
3011
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
3013
+ * Architecture-dependent I-PIPE support for x86.
3016
+#include <linux/kernel.h>
3017
+#include <linux/smp.h>
3018
+#include <linux/module.h>
3019
+#include <linux/sched.h>
3020
+#include <linux/interrupt.h>
3021
+#include <linux/slab.h>
3022
+#include <linux/irq.h>
3023
+#include <linux/clockchips.h>
3024
+#include <asm/unistd.h>
3025
+#include <asm/system.h>
3026
+#include <asm/atomic.h>
3027
+#include <asm/hw_irq.h>
3028
+#include <asm/irq.h>
3029
+#include <asm/desc.h>
3030
+#include <asm/io.h>
3031
+#ifdef CONFIG_X86_LOCAL_APIC
3032
+#include <asm/tlbflush.h>
3033
+#include <asm/fixmap.h>
3034
+#include <asm/bitops.h>
3035
+#include <asm/mpspec.h>
3036
+#ifdef CONFIG_X86_IO_APIC
3037
+#include <asm/io_apic.h>
3038
+#endif /* CONFIG_X86_IO_APIC */
3039
+#include <asm/apic.h>
3040
+#include <mach_ipi.h>
3041
+#endif /* CONFIG_X86_LOCAL_APIC */
3042
+#include <asm/traps.h>
3044
+int __ipipe_tick_irq = 0; /* Legacy timer */
3046
+DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
3050
+static cpumask_t __ipipe_cpu_sync_map;
3052
+static cpumask_t __ipipe_cpu_lock_map;
3054
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
3056
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
3058
+static void (*__ipipe_cpu_sync) (void);
3060
+#endif /* CONFIG_SMP */
3063
+ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline
3064
+ * just like if it has been actually received from a hw source. Also
3065
+ * works for virtual interrupts.
3067
+int ipipe_trigger_irq(unsigned irq)
3069
+ struct pt_regs regs;
3070
+ unsigned long flags;
3072
+ if (irq >= IPIPE_NR_IRQS ||
3073
+ (ipipe_virtual_irq_p(irq) &&
3074
+ !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)))
3077
+ local_irq_save_hw(flags);
3078
+ regs.flags = flags;
3079
+ regs.orig_ax = irq; /* Positive value - IRQ won't be acked */
3080
+ regs.cs = __KERNEL_CS;
3081
+ __ipipe_handle_irq(®s);
3082
+ local_irq_restore_hw(flags);
3087
+int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
3089
+ info->ncpus = num_online_cpus();
3090
+ info->cpufreq = ipipe_cpu_freq();
3091
+ info->archdep.tmirq = __ipipe_tick_irq;
3092
+#ifdef CONFIG_X86_TSC
3093
+ info->archdep.tmfreq = ipipe_cpu_freq();
3094
+#else /* !CONFIG_X86_TSC */
3095
+ info->archdep.tmfreq = CLOCK_TICK_RATE;
3096
+#endif /* CONFIG_X86_TSC */
3101
+unsigned int do_IRQ(struct pt_regs *regs);
3102
+void smp_apic_timer_interrupt(struct pt_regs *regs);
3103
+void smp_spurious_interrupt(struct pt_regs *regs);
3104
+void smp_error_interrupt(struct pt_regs *regs);
3105
+void smp_thermal_interrupt(struct pt_regs *regs);
3106
+void smp_reschedule_interrupt(struct pt_regs *regs);
3107
+void smp_invalidate_interrupt(struct pt_regs *regs);
3108
+void smp_call_function_interrupt(struct pt_regs *regs);
3109
+void smp_call_function_single_interrupt(struct pt_regs *regs);
3110
+void mce_threshold_interrupt(struct pt_regs *regs);
3111
+void uv_bau_message_interrupt(struct pt_regs *regs);
3113
+static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc)
3115
+ desc->ipipe_ack(irq, desc);
3118
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
3120
+ irq_to_desc(irq)->status &= ~IRQ_DISABLED;
3123
+#ifdef CONFIG_X86_LOCAL_APIC
3125
+static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc)
3129
+static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc)
3134
+static void __ipipe_null_handler(unsigned irq, void *cookie)
3138
+#endif /* CONFIG_X86_LOCAL_APIC */
3140
+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
3141
+ interrupts are off, and secondary CPUs are still lost in space. */
3143
+void __init __ipipe_enable_pipeline(void)
3147
+#ifdef CONFIG_X86_LOCAL_APIC
3149
+ /* Map the APIC system vectors. */
3151
+ ipipe_virtualize_irq(ipipe_root_domain,
3152
+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
3153
+ (ipipe_irq_handler_t)&smp_apic_timer_interrupt,
3155
+ &__ipipe_ack_apic,
3156
+ IPIPE_STDROOT_MASK);
3158
+ ipipe_virtualize_irq(ipipe_root_domain,
3159
+ ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
3160
+ (ipipe_irq_handler_t)&smp_spurious_interrupt,
3162
+ &__ipipe_noack_apic,
3163
+ IPIPE_STDROOT_MASK);
3165
+ ipipe_virtualize_irq(ipipe_root_domain,
3166
+ ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
3167
+ (ipipe_irq_handler_t)&smp_error_interrupt,
3169
+ &__ipipe_ack_apic,
3170
+ IPIPE_STDROOT_MASK);
3172
+ ipipe_virtualize_irq(ipipe_root_domain,
3173
+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0),
3174
+ &__ipipe_null_handler,
3176
+ &__ipipe_ack_apic,
3177
+ IPIPE_STDROOT_MASK);
3179
+ ipipe_virtualize_irq(ipipe_root_domain,
3180
+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1),
3181
+ &__ipipe_null_handler,
3183
+ &__ipipe_ack_apic,
3184
+ IPIPE_STDROOT_MASK);
3186
+ ipipe_virtualize_irq(ipipe_root_domain,
3187
+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2),
3188
+ &__ipipe_null_handler,
3190
+ &__ipipe_ack_apic,
3191
+ IPIPE_STDROOT_MASK);
3193
+ ipipe_virtualize_irq(ipipe_root_domain,
3194
+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3),
3195
+ &__ipipe_null_handler,
3197
+ &__ipipe_ack_apic,
3198
+ IPIPE_STDROOT_MASK);
3200
+#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_64)
3201
+ ipipe_virtualize_irq(ipipe_root_domain,
3202
+ ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
3203
+ (ipipe_irq_handler_t)&smp_thermal_interrupt,
3205
+ &__ipipe_ack_apic,
3206
+ IPIPE_STDROOT_MASK);
3207
+#endif /* CONFIG_X86_MCE_P4THERMAL || CONFIG_X86_64 */
3209
+#ifdef CONFIG_X86_64
3210
+ ipipe_virtualize_irq(ipipe_root_domain,
3211
+ ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR),
3212
+ (ipipe_irq_handler_t)&mce_threshold_interrupt,
3214
+ &__ipipe_ack_apic,
3215
+ IPIPE_STDROOT_MASK);
3217
+ ipipe_virtualize_irq(ipipe_root_domain,
3218
+ ipipe_apic_vector_irq(UV_BAU_MESSAGE),
3219
+ (ipipe_irq_handler_t)&uv_bau_message_interrupt,
3221
+ &__ipipe_ack_apic,
3222
+ IPIPE_STDROOT_MASK);
3223
+#endif /* CONFIG_X86_64 */
3225
+#endif /* CONFIG_X86_LOCAL_APIC */
3228
+ ipipe_virtualize_irq(ipipe_root_domain,
3229
+ ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
3230
+ (ipipe_irq_handler_t)&smp_reschedule_interrupt,
3232
+ &__ipipe_ack_apic,
3233
+ IPIPE_STDROOT_MASK);
3235
+#ifdef CONFIG_X86_32
3236
+ ipipe_virtualize_irq(ipipe_root_domain,
3237
+ ipipe_apic_vector_irq(INVALIDATE_TLB_VECTOR),
3238
+ (ipipe_irq_handler_t)&smp_invalidate_interrupt,
3240
+ &__ipipe_ack_apic,
3241
+ IPIPE_STDROOT_MASK);
3242
+#else /* !CONFIG_X86_32 */
3246
+ for (vector = INVALIDATE_TLB_VECTOR_START;
3247
+ vector <= INVALIDATE_TLB_VECTOR_END; ++vector)
3248
+ ipipe_virtualize_irq(ipipe_root_domain,
3249
+ ipipe_apic_vector_irq(vector),
3250
+ (ipipe_irq_handler_t)&smp_invalidate_interrupt,
3252
+ &__ipipe_ack_apic,
3253
+ IPIPE_STDROOT_MASK);
3255
+#endif /* !CONFIG_X86_32 */
3257
+ ipipe_virtualize_irq(ipipe_root_domain,
3258
+ ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
3259
+ (ipipe_irq_handler_t)&smp_call_function_interrupt,
3261
+ &__ipipe_ack_apic,
3262
+ IPIPE_STDROOT_MASK);
3264
+ ipipe_virtualize_irq(ipipe_root_domain,
3265
+ ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR),
3266
+ (ipipe_irq_handler_t)&smp_call_function_single_interrupt,
3268
+ &__ipipe_ack_apic,
3269
+ IPIPE_STDROOT_MASK);
3271
+#endif /* CONFIG_SMP */
3273
+ /* Finally, virtualize the remaining ISA and IO-APIC
3274
+ * interrupts. Interrupts which have already been virtualized
3275
+ * will just beget a silent -EPERM error since
3276
+ * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */
3278
+ for (irq = 0; irq < NR_IRQS; irq++)
3279
+ /* Fails for IPIPE_CRITICAL_IPI but that's ok. */
3280
+ ipipe_virtualize_irq(ipipe_root_domain,
3282
+ (ipipe_irq_handler_t)&do_IRQ,
3285
+ IPIPE_STDROOT_MASK);
3287
+#ifdef CONFIG_X86_LOCAL_APIC
3288
+ /* Eventually allow these vectors to be reprogrammed. */
3289
+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK;
3290
+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK;
3291
+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK;
3292
+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK;
3293
+#endif /* CONFIG_X86_LOCAL_APIC */
3298
+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask)
3300
+ cpumask_t oldmask = irq_to_desc(irq)->affinity;
3302
+ if (irq_to_desc(irq)->chip->set_affinity == NULL)
3303
+ return CPU_MASK_NONE;
3305
+ if (cpus_empty(cpumask))
3306
+ return oldmask; /* Return mask value -- no change. */
3308
+ cpus_and(cpumask,cpumask,cpu_online_map);
3310
+ if (cpus_empty(cpumask))
3311
+ return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */
3313
+ irq_to_desc(irq)->chip->set_affinity(irq,cpumask);
3318
+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask)
3320
+ unsigned long flags;
3323
+ if (ipi != IPIPE_SERVICE_IPI0 &&
3324
+ ipi != IPIPE_SERVICE_IPI1 &&
3325
+ ipi != IPIPE_SERVICE_IPI2 &&
3326
+ ipi != IPIPE_SERVICE_IPI3)
3329
+ local_irq_save_hw(flags);
3331
+ self = cpu_isset(ipipe_processor_id(),cpumask);
3332
+ cpu_clear(ipipe_processor_id(), cpumask);
3334
+ if (!cpus_empty(cpumask))
3335
+ send_IPI_mask(cpumask, ipipe_apic_irq_vector(ipi));
3338
+ ipipe_trigger_irq(ipi);
3340
+ local_irq_restore_hw(flags);
3345
+/* Always called with hw interrupts off. */
3347
+void __ipipe_do_critical_sync(unsigned irq, void *cookie)
3349
+ int cpu = ipipe_processor_id();
3351
+ cpu_set(cpu, __ipipe_cpu_sync_map);
3353
+ /* Now we are in sync with the lock requestor running on another
3354
+ CPU. Enter a spinning wait until he releases the global
3356
+ spin_lock(&__ipipe_cpu_barrier);
3358
+ /* Got it. Now get out. */
3360
+ if (__ipipe_cpu_sync)
3361
+ /* Call the sync routine if any. */
3362
+ __ipipe_cpu_sync();
3364
+ spin_unlock(&__ipipe_cpu_barrier);
3366
+ cpu_clear(cpu, __ipipe_cpu_sync_map);
3369
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
3371
+ ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic;
3372
+ ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync;
3373
+ ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL;
3374
+ /* Immediately handle in the current domain but *never* pass */
3375
+ ipd->irqs[IPIPE_CRITICAL_IPI].control =
3376
+ IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK;
3379
+#endif /* CONFIG_SMP */
3381
+/* ipipe_critical_enter() -- Grab the superlock excluding all CPUs
3382
+ but the current one from a critical section. This lock is used when
3383
+ we must enforce a global critical section for a single CPU in a
3384
+ possibly SMP system whichever context the CPUs are running. */
3386
+unsigned long ipipe_critical_enter(void (*syncfn) (void))
3388
+ unsigned long flags;
3390
+ local_irq_save_hw(flags);
3393
+ if (unlikely(num_online_cpus() == 1)) /* We might be running a SMP-kernel on a UP box... */
3397
+ int cpu = ipipe_processor_id();
3398
+ cpumask_t lock_map;
3400
+ if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) {
3401
+ while (cpu_test_and_set(BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) {
3405
+ } while (++n < cpu);
3408
+ spin_lock(&__ipipe_cpu_barrier);
3410
+ __ipipe_cpu_sync = syncfn;
3412
+ /* Send the sync IPI to all processors but the current one. */
3413
+ send_IPI_allbutself(IPIPE_CRITICAL_VECTOR);
3415
+ cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map);
3417
+ while (!cpus_equal(__ipipe_cpu_sync_map, lock_map))
3421
+ atomic_inc(&__ipipe_critical_count);
3423
+#endif /* CONFIG_SMP */
3428
+/* ipipe_critical_exit() -- Release the superlock. */
3430
+void ipipe_critical_exit(unsigned long flags)
3433
+ if (num_online_cpus() == 1)
3436
+ if (atomic_dec_and_test(&__ipipe_critical_count)) {
3437
+ spin_unlock(&__ipipe_cpu_barrier);
3439
+ while (!cpus_empty(__ipipe_cpu_sync_map))
3442
+ cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map);
3443
+ cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map);
3446
+#endif /* CONFIG_SMP */
3448
+ local_irq_restore_hw(flags);
3451
+#ifdef CONFIG_X86_32
3453
+static inline void __fixup_if(int s, struct pt_regs *regs)
3456
+ * Have the saved hw state look like the domain stall bit, so
3457
+ * that __ipipe_unstall_iret_root() restores the proper
3458
+ * pipeline state for the root stage upon exit.
3461
+ regs->flags &= ~X86_EFLAGS_IF;
3463
+ regs->flags |= X86_EFLAGS_IF;
3467
+ * Check the stall bit of the root domain to make sure the existing
3468
+ * preemption opportunity upon in-kernel resumption could be
3469
+ * exploited. In case a rescheduling could take place, the root stage
3470
+ * is stalled before the hw interrupts are re-enabled. This routine
3471
+ * must be called with hw interrupts off.
3474
+asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs)
3476
+ if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
3477
+ /* Root stage is stalled: rescheduling denied. */
3480
+ __ipipe_stall_root();
3481
+ local_irq_enable_hw_notrace();
3483
+ return 1; /* Ok, may reschedule now. */
3486
+asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs)
3488
+ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr();
3490
+ /* Emulate IRET's handling of the interrupt flag. */
3492
+ local_irq_disable_hw();
3495
+ * Restore the software state as it used to be on kernel
3496
+ * entry. CAUTION: NMIs must *not* return through this
3499
+ if (raw_irqs_disabled_flags(regs.flags)) {
3500
+ if (!__test_and_set_bit(IPIPE_STALL_FLAG, &p->status))
3501
+ trace_hardirqs_off();
3502
+ regs.flags |= X86_EFLAGS_IF;
3504
+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) {
3505
+ trace_hardirqs_on();
3506
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
3510
+ * Only sync virtual IRQs here, so that we don't
3511
+ * recurse indefinitely in case of an external
3512
+ * interrupt flood.
3514
+ if ((p->irqpend_himask & IPIPE_IRQMASK_VIRT) != 0)
3515
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
3517
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
3518
+ ipipe_trace_end(0x8000000D);
3519
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
3522
+#else /* !CONFIG_X86_32 */
3524
+static inline void __fixup_if(int s, struct pt_regs *regs)
3528
+#ifdef CONFIG_PREEMPT
3530
+asmlinkage void preempt_schedule_irq(void);
3532
+void __ipipe_preempt_schedule_irq(void)
3534
+ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr();
3537
+ local_irq_disable_hw();
3538
+ s = __test_and_set_bit(IPIPE_STALL_FLAG, &p->status);
3539
+ local_irq_enable_hw();
3540
+ preempt_schedule_irq(); /* Ok, may reschedule now. */
3541
+ local_irq_disable_hw();
3543
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
3545
+#endif /* CONFIG_PREEMPT */
3547
+#endif /* !CONFIG_X86_32 */
3549
+static void do_machine_check_vector(struct pt_regs *regs, long error_code)
3551
+#ifdef CONFIG_X86_MCE
3552
+#ifdef CONFIG_X86_32
3553
+ extern void (*machine_check_vector)(struct pt_regs *, long error_code);
3554
+ machine_check_vector(regs, error_code);
3556
+ do_machine_check(regs, error_code);
3558
+#endif /* CONFIG_X86_MCE */
3561
+/* Work around genksyms's issue with over-qualification in decls. */
3563
+typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long);
3565
+typedef __ipipe_exhandler *__ipipe_exptr;
3567
+static __ipipe_exptr __ipipe_std_extable[] = {
3569
+ [ex_do_divide_error] = &do_divide_error,
3570
+ [ex_do_overflow] = &do_overflow,
3571
+ [ex_do_bounds] = &do_bounds,
3572
+ [ex_do_invalid_op] = &do_invalid_op,
3573
+ [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun,
3574
+ [ex_do_invalid_TSS] = &do_invalid_TSS,
3575
+ [ex_do_segment_not_present] = &do_segment_not_present,
3576
+ [ex_do_stack_segment] = &do_stack_segment,
3577
+ [ex_do_general_protection] = do_general_protection,
3578
+ [ex_do_page_fault] = (__ipipe_exptr)&do_page_fault,
3579
+ [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug,
3580
+ [ex_do_coprocessor_error] = &do_coprocessor_error,
3581
+ [ex_do_alignment_check] = &do_alignment_check,
3582
+ [ex_machine_check_vector] = &do_machine_check_vector,
3583
+ [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error,
3584
+ [ex_do_device_not_available] = &do_device_not_available,
3585
+#ifdef CONFIG_X86_32
3586
+ [ex_do_iret_error] = &do_iret_error,
3591
+#include <linux/kgdb.h>
3593
+static int __ipipe_xlate_signo[] = {
3595
+ [ex_do_divide_error] = SIGFPE,
3596
+ [ex_do_debug] = SIGTRAP,
3598
+ [ex_do_int3] = SIGTRAP,
3599
+ [ex_do_overflow] = SIGSEGV,
3600
+ [ex_do_bounds] = SIGSEGV,
3601
+ [ex_do_invalid_op] = SIGILL,
3602
+ [ex_do_device_not_available] = -1,
3604
+ [ex_do_coprocessor_segment_overrun] = SIGFPE,
3605
+ [ex_do_invalid_TSS] = SIGSEGV,
3606
+ [ex_do_segment_not_present] = SIGBUS,
3607
+ [ex_do_stack_segment] = SIGBUS,
3608
+ [ex_do_general_protection] = SIGSEGV,
3609
+ [ex_do_page_fault] = SIGSEGV,
3610
+ [ex_do_spurious_interrupt_bug] = -1,
3611
+ [ex_do_coprocessor_error] = -1,
3612
+ [ex_do_alignment_check] = SIGBUS,
3613
+ [ex_machine_check_vector] = -1,
3614
+ [ex_do_simd_coprocessor_error] = -1,
3616
+#ifdef CONFIG_X86_32
3617
+ [ex_do_iret_error] = SIGSEGV,
3620
+#endif /* CONFIG_KGDB */
3622
+int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector)
3624
+ unsigned long flags;
3626
+ /* Pick up the root domain state of the interrupted context. */
3627
+ local_save_flags(flags);
3629
+ if (ipipe_root_domain_p) {
3631
+ * Replicate hw interrupt state into the virtual mask before
3632
+ * calling the I-pipe event handler over the root domain. Also
3633
+ * required later when calling the Linux exception handler.
3635
+ if (irqs_disabled_hw())
3636
+ local_irq_disable();
3639
+ /* catch exception KGDB is interested in over non-root domains */
3640
+ else if (__ipipe_xlate_signo[vector] >= 0 &&
3641
+ !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector],
3642
+ error_code, regs))
3644
+#endif /* CONFIG_KGDB */
3646
+ if (unlikely(ipipe_trap_notify(vector, regs))) {
3647
+ local_irq_restore_nosync(flags);
3652
+ * 32-bit: In case we migrated to root domain inside the event
3653
+ * handler, restore the original IF from exception entry as the
3654
+ * low-level return code will evaluate it.
3656
+ __fixup_if(raw_irqs_disabled_flags(flags), regs);
3658
+ if (unlikely(!ipipe_root_domain_p)) {
3659
+ /* Detect unhandled faults over non-root domains. */
3660
+ struct ipipe_domain *ipd = ipipe_current_domain;
3662
+ /* Switch to root so that Linux can handle the fault cleanly. */
3663
+ ipipe_current_domain = ipipe_root_domain;
3665
+ ipipe_trace_panic_freeze();
3667
+ /* Always warn about user land and unfixable faults. */
3668
+ if ((error_code & 4) || !search_exception_tables(instruction_pointer(regs))) {
3669
+ printk(KERN_ERR "BUG: Unhandled exception over domain"
3670
+ " %s at 0x%lx - switching to ROOT\n",
3671
+ ipd->name, instruction_pointer(regs));
3673
+ ipipe_trace_panic_dump();
3674
+#ifdef CONFIG_IPIPE_DEBUG
3675
+ /* Also report fixable ones when debugging is enabled. */
3677
+ printk(KERN_WARNING "WARNING: Fixable exception over "
3678
+ "domain %s at 0x%lx - switching to ROOT\n",
3679
+ ipd->name, instruction_pointer(regs));
3681
+ ipipe_trace_panic_dump();
3682
+#endif /* CONFIG_IPIPE_DEBUG */
3686
+ __ipipe_std_extable[vector](regs, error_code);
3689
+ * Relevant for 64-bit: Restore root domain state as the low-level
3690
+ * return code will not align it to regs.flags.
3692
+ local_irq_restore_nosync(flags);
3697
+int __ipipe_divert_exception(struct pt_regs *regs, int vector)
3699
+ unsigned long flags;
3701
+ /* Same root state handling as in __ipipe_handle_exception. */
3702
+ local_save_flags(flags);
3704
+ if (ipipe_root_domain_p) {
3705
+ if (irqs_disabled_hw())
3706
+ local_irq_disable();
3709
+ /* catch int1 and int3 over non-root domains */
3711
+#ifdef CONFIG_X86_32
3712
+ if (vector != ex_do_device_not_available)
3715
+ unsigned int condition = 0;
3718
+ get_debugreg(condition, 6);
3719
+ if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs))
3723
+#endif /* CONFIG_KGDB */
3725
+ if (unlikely(ipipe_trap_notify(vector, regs))) {
3726
+ local_irq_restore_nosync(flags);
3731
+ * 32-bit: Due to possible migration inside the event handler, we have
3732
+ * to restore IF so that low-level return code sets the root domain
3733
+ * state correctly.
3735
+ __fixup_if(raw_irqs_disabled_flags(flags), regs);
3740
+int __ipipe_syscall_root(struct pt_regs *regs)
3742
+ struct ipipe_percpu_domain_data *p;
3743
+ unsigned long flags;
3747
+ * This routine either returns:
3748
+ * 0 -- if the syscall is to be passed to Linux;
3749
+ * >0 -- if the syscall should not be passed to Linux, and no
3750
+ * tail work should be performed;
3751
+ * <0 -- if the syscall should not be passed to Linux but the
3752
+ * tail work has to be performed (for handling signals etc).
3755
+ if (!__ipipe_syscall_watched_p(current, regs->orig_ax) ||
3756
+ !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL))
3759
+ ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs);
3760
+ if (!ipipe_root_domain_p) {
3761
+#ifdef CONFIG_X86_64
3762
+ local_irq_disable_hw();
3767
+ p = ipipe_root_cpudom_ptr();
3768
+ __fixup_if(test_bit(IPIPE_STALL_FLAG, &p->status), regs);
3769
+ local_irq_save_hw(flags);
3771
+ * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is
3774
+ if ((p->irqpend_himask & IPIPE_IRQMASK_VIRT) != 0)
3775
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
3776
+#ifdef CONFIG_X86_64
3779
+ local_irq_restore_hw(flags);
3785
+ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic
3786
+ * interrupt protection log is maintained here for each domain. Hw
3787
+ * interrupts are off on entry.
3789
+int __ipipe_handle_irq(struct pt_regs *regs)
3791
+ struct ipipe_domain *this_domain, *next_domain;
3792
+ unsigned vector = regs->orig_ax, irq;
3793
+ struct list_head *head, *pos;
3796
+ if ((long)regs->orig_ax < 0) {
3798
+#ifdef CONFIG_X86_LOCAL_APIC
3799
+ if (vector >= FIRST_SYSTEM_VECTOR)
3800
+ irq = ipipe_apic_vector_irq(vector);
3803
+ irq = __get_cpu_var(vector_irq)[vector];
3805
+ } else { /* This is a self-triggered one. */
3810
+ this_domain = ipipe_current_domain;
3812
+ if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))
3813
+ head = &this_domain->p_link;
3815
+ head = __ipipe_pipeline.next;
3816
+ next_domain = list_entry(head, struct ipipe_domain, p_link);
3817
+ if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) {
3818
+ if (!m_ack && next_domain->irqs[irq].acknowledge)
3819
+ next_domain->irqs[irq].acknowledge(irq, irq_desc + irq);
3820
+ __ipipe_dispatch_wired(next_domain, irq);
3821
+ goto finalize_nosync;
3825
+ /* Ack the interrupt. */
3829
+ while (pos != &__ipipe_pipeline) {
3830
+ next_domain = list_entry(pos, struct ipipe_domain, p_link);
3831
+ if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) {
3832
+ __ipipe_set_irq_pending(next_domain, irq);
3833
+ if (!m_ack && next_domain->irqs[irq].acknowledge) {
3834
+ next_domain->irqs[irq].acknowledge(irq, irq_desc + irq);
3838
+ if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control))
3840
+ pos = next_domain->p_link.next;
3844
+ * If the interrupt preempted the head domain, then do not
3845
+ * even try to walk the pipeline, unless an interrupt is
3848
+ if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) &&
3849
+ ipipe_head_cpudom_var(irqpend_himask) == 0)
3850
+ goto finalize_nosync;
3853
+ * Now walk the pipeline, yielding control to the highest
3854
+ * priority domain that has pending interrupt(s) or
3855
+ * immediately to the current domain if the interrupt has been
3856
+ * marked as 'sticky'. This search does not go beyond the
3857
+ * current domain in the pipeline.
3860
+ __ipipe_walk_pipeline(head);
3865
+ * Given our deferred dispatching model for regular IRQs, we
3866
+ * only record CPU regs for the last timer interrupt, so that
3867
+ * the timer handler charges CPU times properly. It is assumed
3868
+ * that other interrupt handlers don't actually care for such
3872
+ if (irq == __ipipe_tick_irq) {
3873
+ struct pt_regs *tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs);
3874
+ tick_regs->flags = regs->flags;
3875
+ tick_regs->cs = regs->cs;
3876
+ tick_regs->ip = regs->ip;
3877
+ tick_regs->bp = regs->bp;
3878
+#ifdef CONFIG_X86_64
3879
+ tick_regs->ss = regs->ss;
3880
+ tick_regs->sp = regs->sp;
3882
+ if (!ipipe_root_domain_p)
3883
+ tick_regs->flags &= ~X86_EFLAGS_IF;
3886
+ if (!ipipe_root_domain_p ||
3887
+ test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
3890
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
3892
+ * Prevent a spurious rescheduling from being triggered on
3893
+ * preemptible kernels along the way out through
3896
+ if ((long)regs->orig_ax < 0)
3897
+ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
3898
+#endif /* CONFIG_SMP */
3903
+int __ipipe_check_tickdev(const char *devname)
3905
+#ifdef CONFIG_X86_LOCAL_APIC
3906
+ if (!strcmp(devname, "lapic"))
3907
+ return __ipipe_check_lapic();
3913
+EXPORT_SYMBOL(__ipipe_tick_irq);
3915
+EXPORT_SYMBOL_GPL(irq_desc);
3916
+struct task_struct *__switch_to(struct task_struct *prev_p,
3917
+ struct task_struct *next_p);
3918
+EXPORT_SYMBOL_GPL(__switch_to);
3919
+EXPORT_SYMBOL_GPL(show_stack);
3921
+#ifdef CONFIG_X86_32
3922
+EXPORT_PER_CPU_SYMBOL_GPL(init_tss);
3924
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate);
3925
+#endif /* CONFIG_SMP */
3926
+#endif /* !CONFIG_X86_32 */
3928
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
3929
+EXPORT_SYMBOL(tasklist_lock);
3930
+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
3932
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
3933
+void notrace mcount(void);
3934
+EXPORT_SYMBOL(mcount);
3935
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
3936
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
3937
index d1d4dc5..1789a2a 100644
3938
--- a/arch/x86/kernel/irq.c
3939
+++ b/arch/x86/kernel/irq.c
3940
@@ -31,7 +31,7 @@ void ack_bad_irq(unsigned int irq)
3941
* But only ack when the APIC is enabled -AK
3949
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
3950
index 845aa98..9e46b1a 100644
3951
--- a/arch/x86/kernel/irqinit_32.c
3952
+++ b/arch/x86/kernel/irqinit_32.c
3953
@@ -151,6 +151,10 @@ void __init native_init_IRQ(void)
3955
/* Low priority IPI to cleanup after moving an irq */
3956
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
3957
+#ifdef CONFIG_IPIPE
3958
+ /* IPI for critical lock */
3959
+ set_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX);
3963
#ifdef CONFIG_X86_LOCAL_APIC
3964
@@ -160,6 +164,12 @@ void __init native_init_IRQ(void)
3965
/* IPI vectors for APIC spurious and error interrupts */
3966
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
3967
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
3968
+#ifdef CONFIG_IPIPE
3969
+ set_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0);
3970
+ set_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1);
3971
+ set_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2);
3972
+ set_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3);
3976
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
3977
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
3978
index ff02353..2fec83c 100644
3979
--- a/arch/x86/kernel/irqinit_64.c
3980
+++ b/arch/x86/kernel/irqinit_64.c
3981
@@ -173,11 +173,13 @@ static void __init smp_intr_init(void)
3982
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
3983
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
3984
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
3985
+#ifndef CONFIG_IPIPE
3986
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
3987
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
3988
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
3989
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
3990
alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
3993
/* IPI for generic function call */
3994
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
3995
diff --git a/arch/x86/kernel/mcount_32.S b/arch/x86/kernel/mcount_32.S
3996
new file mode 100644
3997
index 0000000..999097e
3999
+++ b/arch/x86/kernel/mcount_32.S
4002
+ * linux/arch/x86/mcount_32.S
4004
+ * Copyright (C) 2005, 2007 Jan Kiszka
4009
+ cmpl $0,ipipe_trace_enable
4016
+ pushl $0 # no additional value (v)
4017
+ movl 4(%ebp),%ecx # __CALLER_ADDR1
4018
+ movl 16(%esp),%edx # __CALLER_ADDR0
4019
+ movl $0,%eax # IPIPE_TRACE_FUNC
4020
+ call __ipipe_trace
4028
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
4029
new file mode 100644
4030
index 0000000..c84a687
4032
+++ b/arch/x86/kernel/mcount_64.S
4035
+ * linux/arch/x86/kernel/mcount_64.S
4037
+ * Copyright (C) 2002 Free Software Foundation, Inc.
4038
+ * Contributed by Andreas Jaeger <aj@suse.de>.
4039
+ * Slightly adapted by Philippe Gerum for the I-pipe tracer.
4042
+#include <linux/linkage.h>
4047
+ cmpl $0,ipipe_trace_enable
4052
+ movq %rdx,16(%rsp)
4053
+ movq %rsi,24(%rsp)
4054
+ movq %rdi,32(%rsp)
4058
+ movq $0,%rcx /* No additional value. */
4059
+ movq 8(%rbp),%rdx /* Parent rip. */
4060
+ movq 56(%rsp),%rsi /* Caller rip. */
4061
+ movq $0,%rdi /* IPIPE_TRACE_FN */
4062
+ call __ipipe_trace
4066
+ movq 32(%rsp),%rdi
4067
+ movq 24(%rsp),%rsi
4068
+ movq 16(%rsp),%rdx
4075
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
4076
index 2c97f07..e049145 100644
4077
--- a/arch/x86/kernel/nmi.c
4078
+++ b/arch/x86/kernel/nmi.c
4079
@@ -61,6 +61,10 @@ static unsigned int nmi_hz = HZ;
4080
static DEFINE_PER_CPU(short, wd_enabled);
4081
static int endflag __initdata;
4083
+static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason);
4084
+int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick;
4085
+EXPORT_SYMBOL(nmi_watchdog_tick);
4087
static inline unsigned int get_nmi_count(int cpu)
4089
#ifdef CONFIG_X86_64
4090
@@ -386,7 +390,7 @@ void touch_nmi_watchdog(void)
4091
EXPORT_SYMBOL(touch_nmi_watchdog);
4093
notrace __kprobes int
4094
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
4095
+default_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
4098
* Since current_thread_info()-> is always on the stack, and we
4099
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
4100
index c622772..65cec6e 100644
4101
--- a/arch/x86/kernel/process.c
4102
+++ b/arch/x86/kernel/process.c
4103
@@ -26,7 +26,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
4105
WARN_ON((unsigned long)dst->thread.xstate & 15);
4106
memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
4108
+#ifdef CONFIG_IPIPE
4109
+ dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
4111
+ if (!dst->thread.xstate)
4119
@@ -50,6 +58,10 @@ void arch_task_cache_init(void)
4120
kmem_cache_create("task_xstate", xstate_size,
4121
__alignof__(union thread_xstate),
4123
+#ifdef CONFIG_IPIPE
4124
+ current->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
4130
@@ -311,6 +323,11 @@ static void c1e_idle(void)
4132
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
4134
+#ifdef CONFIG_IPIPE
4135
+#define default_to_mwait force_mwait
4137
+#define default_to_mwait 1
4139
#ifdef CONFIG_X86_SMP
4140
if (pm_idle == poll_idle && smp_num_siblings > 1) {
4141
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
4142
@@ -320,7 +337,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
4146
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
4147
+ if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
4149
* One CPU supports mwait => All CPUs supports mwait
4151
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
4152
index 0a1302f..1e7c506 100644
4153
--- a/arch/x86/kernel/process_32.c
4154
+++ b/arch/x86/kernel/process_32.c
4155
@@ -113,6 +113,7 @@ void cpu_idle(void)
4156
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
4157
/* Don't trace irqs off for idle */
4158
stop_critical_timings();
4159
+ ipipe_suspend_domain();
4161
start_critical_timings();
4163
@@ -354,10 +355,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
4164
regs->cs = __USER_CS;
4167
+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */
4169
* Free the old FP and other extended state
4171
free_thread_xstate(current);
4174
EXPORT_SYMBOL_GPL(start_thread);
4176
@@ -552,7 +555,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
4178
struct thread_struct *prev = &prev_p->thread,
4179
*next = &next_p->thread;
4180
- int cpu = smp_processor_id();
4181
+ int cpu = raw_smp_processor_id();
4182
struct tss_struct *tss = &per_cpu(init_tss, cpu);
4184
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
4185
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
4186
index c958120..639edb5 100644
4187
--- a/arch/x86/kernel/process_64.c
4188
+++ b/arch/x86/kernel/process_64.c
4191
asmlinkage extern void ret_from_fork(void);
4193
+asmlinkage extern void thread_return(void);
4195
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
4197
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
4198
@@ -127,6 +129,7 @@ void cpu_idle(void)
4200
/* Don't trace irqs off for idle */
4201
stop_critical_timings();
4202
+ ipipe_suspend_domain();
4204
start_critical_timings();
4205
/* In many cases the interrupt that ended idle
4206
@@ -336,6 +339,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
4207
p->thread.sp = (unsigned long) childregs;
4208
p->thread.sp0 = (unsigned long) (childregs+1);
4209
p->thread.usersp = me->thread.usersp;
4210
+ p->thread.rip = (unsigned long) thread_return;
4212
set_tsk_thread_flag(p, TIF_FORK);
4214
@@ -395,10 +399,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
4215
regs->ss = __USER_DS;
4216
regs->flags = 0x200;
4218
+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */
4220
* Free the old FP and other extended state
4222
free_thread_xstate(current);
4225
EXPORT_SYMBOL_GPL(start_thread);
4227
@@ -557,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4229
struct thread_struct *prev = &prev_p->thread;
4230
struct thread_struct *next = &next_p->thread;
4231
- int cpu = smp_processor_id();
4232
+ int cpu = raw_smp_processor_id();
4233
struct tss_struct *tss = &per_cpu(init_tss, cpu);
4234
unsigned fsindex, gsindex;
4236
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
4237
index 18f9b19..d159922 100644
4238
--- a/arch/x86/kernel/smp.c
4239
+++ b/arch/x86/kernel/smp.c
4240
@@ -142,7 +142,7 @@ void native_send_call_func_ipi(cpumask_t mask)
4242
static void stop_this_cpu(void *dummy)
4244
- local_irq_disable();
4245
+ local_irq_disable_hw();
4249
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
4250
index f71f96f..813228c 100644
4251
--- a/arch/x86/kernel/smpboot.c
4252
+++ b/arch/x86/kernel/smpboot.c
4253
@@ -287,7 +287,7 @@ static int __cpuinitdata unsafe_smp;
4255
* Activate a secondary processor.
4257
-static void __cpuinit start_secondary(void *unused)
4258
+static void notrace __cpuinit start_secondary(void *unused)
4261
* Don't put *anything* before cpu_init(), SMP booting is too
4262
@@ -965,7 +965,7 @@ restore_state:
4263
int __cpuinit native_cpu_up(unsigned int cpu)
4265
int apicid = cpu_present_to_apicid(cpu);
4266
- unsigned long flags;
4267
+ unsigned long flags, _flags;
4270
WARN_ON(irqs_disabled());
4271
@@ -1017,9 +1017,9 @@ int __cpuinit native_cpu_up(unsigned int cpu)
4272
* Check TSC synchronization with the AP (keep irqs disabled
4275
- local_irq_save(flags);
4276
+ local_irq_save_full(flags, _flags);
4277
check_tsc_sync_source(cpu);
4278
- local_irq_restore(flags);
4279
+ local_irq_restore_full(flags, _flags);
4281
while (!cpu_online(cpu)) {
4283
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
4284
index 77b400f..f53b944 100644
4285
--- a/arch/x86/kernel/time_32.c
4286
+++ b/arch/x86/kernel/time_32.c
4287
@@ -84,11 +84,12 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
4288
* manually to deassert NMI lines for the watchdog if run
4289
* on an 82489DX-based system.
4291
- spin_lock(&i8259A_lock);
4292
+ unsigned long flags;
4293
+ spin_lock_irqsave_cond(&i8259A_lock,flags);
4294
outb(0x0c, PIC_MASTER_OCW3);
4295
/* Ack the IRQ; AEOI will end it automatically. */
4296
inb(PIC_MASTER_POLL);
4297
- spin_unlock(&i8259A_lock);
4298
+ spin_unlock_irqrestore_cond(&i8259A_lock,flags);
4302
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
4303
index 70bdbb5..5251b23 100644
4304
--- a/arch/x86/kernel/traps.c
4305
+++ b/arch/x86/kernel/traps.c
4306
@@ -877,6 +877,7 @@ asmlinkage void math_state_restore(void)
4308
struct thread_info *thread = current_thread_info();
4309
struct task_struct *tsk = thread->task;
4310
+ unsigned long flags;
4312
if (!tsk_used_math(tsk)) {
4314
@@ -893,6 +894,7 @@ asmlinkage void math_state_restore(void)
4315
local_irq_disable();
4318
+ local_irq_save_hw_cond(flags);
4319
clts(); /* Allow maths ops (or we recurse) */
4320
#ifdef CONFIG_X86_32
4322
@@ -902,12 +904,14 @@ asmlinkage void math_state_restore(void)
4324
if (unlikely(restore_fpu_checking(tsk))) {
4326
+ local_irq_restore_hw_cond(flags);
4327
force_sig(SIGSEGV, tsk);
4331
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
4333
+ local_irq_restore_hw_cond(flags);
4335
EXPORT_SYMBOL_GPL(math_state_restore);
4337
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
4338
index 4eeb5cf..6699b26 100644
4339
--- a/arch/x86/kernel/vm86_32.c
4340
+++ b/arch/x86/kernel/vm86_32.c
4341
@@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
4345
+ local_irq_disable_hw_cond();
4346
tss = &per_cpu(init_tss, get_cpu());
4347
current->thread.sp0 = current->thread.saved_sp0;
4348
current->thread.sysenter_cs = __KERNEL_CS;
4349
load_sp0(tss, ¤t->thread);
4350
current->thread.saved_sp0 = 0;
4352
+ local_irq_enable_hw_cond();
4354
ret = KVM86->regs32;
4356
@@ -325,12 +327,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
4357
tsk->thread.saved_fs = info->regs32->fs;
4358
savesegment(gs, tsk->thread.saved_gs);
4360
+ local_irq_disable_hw_cond();
4361
tss = &per_cpu(init_tss, get_cpu());
4362
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
4364
tsk->thread.sysenter_cs = 0;
4365
load_sp0(tss, &tsk->thread);
4367
+ local_irq_enable_hw_cond();
4369
tsk->thread.screen_bitmap = info->screen_bitmap;
4370
if (info->flags & VM86_SCREEN_BITMAP)
4371
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
4372
index c9f2d9b..78d780a 100644
4373
--- a/arch/x86/lib/mmx_32.c
4374
+++ b/arch/x86/lib/mmx_32.c
4375
@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
4379
- if (unlikely(in_interrupt()))
4380
+ if (unlikely(!ipipe_root_domain_p || in_interrupt()))
4381
return __memcpy(to, from, len);
4384
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
4385
index bf9a7d5..98609ae 100644
4386
--- a/arch/x86/lib/thunk_64.S
4387
+++ b/arch/x86/lib/thunk_64.S
4389
thunk lockdep_sys_exit_thunk,lockdep_sys_exit
4392
+#ifdef CONFIG_IPIPE
4393
+ thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root
4396
/* SAVE_ARGS below is used only for the .cfi directives it contains. */
4399
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
4400
index 68dc060..9f1c6a9 100644
4401
--- a/arch/x86/mm/fault.c
4402
+++ b/arch/x86/mm/fault.c
4403
@@ -301,6 +301,56 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
4404
BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
4408
+static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address)
4411
+ pud_t *pud, *pud_ref;
4412
+ pmd_t *pmd, *pmd_ref;
4413
+ pte_t *pte, *pte_ref;
4415
+ /* Make sure we are in vmalloc area */
4416
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
4419
+ /* Copy kernel mappings over when needed. This can also
4420
+ happen within a race in page table update. In the later
4421
+ case just flush. */
4423
+ pgd_ref = pgd_offset_k(address);
4424
+ if (pgd_none(*pgd_ref))
4426
+ if (pgd_none(*pgd))
4427
+ set_pgd(pgd, *pgd_ref);
4429
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
4431
+ /* Below here mismatches are bugs because these lower tables
4434
+ pud = pud_offset(pgd, address);
4435
+ pud_ref = pud_offset(pgd_ref, address);
4436
+ if (pud_none(*pud_ref))
4438
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
4440
+ pmd = pmd_offset(pud, address);
4441
+ pmd_ref = pmd_offset(pud_ref, address);
4442
+ if (pmd_none(*pmd_ref))
4444
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
4446
+ pte_ref = pte_offset_kernel(pmd_ref, address);
4447
+ if (!pte_present(*pte_ref))
4449
+ pte = pte_offset_kernel(pmd, address);
4450
+ /* Don't use pte_page here, because the mappings can point
4451
+ outside mem_map, and the NUMA hash lookup cannot handle
4453
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
4459
#ifdef CONFIG_X86_64
4460
@@ -493,9 +543,9 @@ static int spurious_fault(unsigned long address,
4462
* This assumes no large pages in there.
4464
+#ifdef CONFIG_X86_32
4465
static int vmalloc_fault(unsigned long address)
4467
-#ifdef CONFIG_X86_32
4468
unsigned long pgd_paddr;
4471
@@ -519,56 +569,14 @@ static int vmalloc_fault(unsigned long address)
4472
if (!pte_present(*pte_k))
4477
- pgd_t *pgd, *pgd_ref;
4478
- pud_t *pud, *pud_ref;
4479
- pmd_t *pmd, *pmd_ref;
4480
- pte_t *pte, *pte_ref;
4482
- /* Make sure we are in vmalloc area */
4483
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
4486
- /* Copy kernel mappings over when needed. This can also
4487
- happen within a race in page table update. In the later
4488
- case just flush. */
4490
- pgd = pgd_offset(current->active_mm, address);
4491
- pgd_ref = pgd_offset_k(address);
4492
- if (pgd_none(*pgd_ref))
4494
- if (pgd_none(*pgd))
4495
- set_pgd(pgd, *pgd_ref);
4497
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
4499
- /* Below here mismatches are bugs because these lower tables
4502
- pud = pud_offset(pgd, address);
4503
- pud_ref = pud_offset(pgd_ref, address);
4504
- if (pud_none(*pud_ref))
4506
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
4508
- pmd = pmd_offset(pud, address);
4509
- pmd_ref = pmd_offset(pud_ref, address);
4510
- if (pmd_none(*pmd_ref))
4512
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
4514
- pte_ref = pte_offset_kernel(pmd_ref, address);
4515
- if (!pte_present(*pte_ref))
4517
- pte = pte_offset_kernel(pmd, address);
4518
- /* Don't use pte_page here, because the mappings can point
4519
- outside mem_map, and the NUMA hash lookup cannot handle
4521
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
4525
+static int vmalloc_fault(unsigned long address)
4527
+ pgd_t *pgd = pgd = pgd_offset(current->active_mm, address);
4528
+ return vmalloc_sync_one(pgd, address);
4532
int show_unhandled_signals = 1;
4534
@@ -592,13 +600,15 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
4535
unsigned long flags;
4538
+ /* get the address */
4539
+ address = read_cr2();
4541
+ local_irq_enable_hw_cond();
4545
prefetchw(&mm->mmap_sem);
4547
- /* get the address */
4548
- address = read_cr2();
4550
si_code = SEGV_MAPERR;
4552
if (unlikely(kmmio_fault(regs, address)))
4553
@@ -944,3 +954,43 @@ void vmalloc_sync_all(void)
4558
+#ifdef CONFIG_IPIPE
4559
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end)
4561
+#ifdef CONFIG_X86_32
4562
+ unsigned long next, addr = start;
4565
+ unsigned long flags;
4566
+ struct page *page;
4568
+ next = pgd_addr_end(addr, end);
4569
+ spin_lock_irqsave(&pgd_lock, flags);
4570
+ list_for_each_entry(page, &pgd_list, lru)
4571
+ vmalloc_sync_one(page_address(page), addr);
4572
+ spin_unlock_irqrestore(&pgd_lock, flags);
4574
+ } while (addr = next, addr != end);
4576
+ unsigned long next, addr = start;
4580
+ struct page *page;
4582
+ next = pgd_addr_end(addr, end);
4583
+ spin_lock(&pgd_lock);
4584
+ list_for_each_entry(page, &pgd_list, lru) {
4586
+ pgd = (pgd_t *)page_address(page) + pgd_index(addr);
4587
+ ret = vmalloc_sync_one(pgd, addr);
4591
+ spin_unlock(&pgd_lock);
4593
+ } while (!ret && addr != end);
4596
+#endif /* CONFIG_IPIPE */
4597
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
4598
index 5f8d746..527aeba 100644
4599
--- a/drivers/acpi/processor_idle.c
4600
+++ b/drivers/acpi/processor_idle.c
4601
@@ -218,7 +218,7 @@ static void acpi_safe_halt(void)
4603
if (!need_resched()) {
4605
- local_irq_disable();
4606
+ local_irq_disable_hw(); local_irq_disable();
4608
current_thread_info()->status |= TS_POLLING;
4610
@@ -402,7 +402,7 @@ static void acpi_processor_idle(void)
4611
* Interrupts must be disabled during bus mastering calculations and
4612
* for C2/C3 transitions.
4614
- local_irq_disable();
4615
+ local_irq_disable_hw(); local_irq_disable();
4617
pr = __get_cpu_var(processors);
4619
@@ -1463,7 +1463,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
4623
- local_irq_disable();
4624
+ local_irq_disable_hw(); local_irq_disable();
4626
/* Do not access any ACPI IO ports in suspend path */
4627
if (acpi_idle_suspend) {
4628
@@ -1506,7 +1506,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
4629
if (acpi_idle_suspend)
4630
return(acpi_idle_enter_c1(dev, state));
4632
- local_irq_disable();
4633
+ local_irq_disable_hw(); local_irq_disable();
4634
current_thread_info()->status &= ~TS_POLLING;
4636
* TS_POLLING-cleared state must be visible before we test
4637
@@ -1589,14 +1589,14 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
4638
dev->last_state = dev->safe_state;
4639
return dev->safe_state->enter(dev, dev->safe_state);
4641
- local_irq_disable();
4642
+ local_irq_disable_hw(); local_irq_disable();
4649
- local_irq_disable();
4650
+ local_irq_disable_hw(); local_irq_disable();
4651
current_thread_info()->status &= ~TS_POLLING;
4653
* TS_POLLING-cleared state must be visible before we test
4654
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
4655
index bf7d6ce..0ffac4e 100644
4656
--- a/drivers/pci/htirq.c
4657
+++ b/drivers/pci/htirq.c
4659
* With multiple simultaneous hypertransport irq devices it might pay
4660
* to make this more fine grained. But start with simple, stupid, and correct.
4662
-static DEFINE_SPINLOCK(ht_irq_lock);
4663
+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
4666
struct pci_dev *dev;
4667
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
4668
index a181ccf..e494638 100644
4669
--- a/drivers/serial/8250.c
4670
+++ b/drivers/serial/8250.c
4671
@@ -2895,6 +2895,51 @@ static int serial8250_resume(struct platform_device *dev)
4675
+#if defined(CONFIG_IPIPE_DEBUG) && defined(CONFIG_SERIAL_8250_CONSOLE)
4677
+#include <stdarg.h>
4679
+void __ipipe_serial_debug(const char *fmt, ...)
4681
+ struct uart_8250_port *up = &serial8250_ports[0];
4682
+ unsigned int ier, count;
4683
+ unsigned long flags;
4687
+ va_start(ap, fmt);
4688
+ vsprintf(buf, fmt, ap);
4690
+ count = strlen(buf);
4692
+ touch_nmi_watchdog();
4694
+ local_irq_save_hw(flags);
4697
+ * First save the IER then disable the interrupts
4699
+ ier = serial_in(up, UART_IER);
4701
+ if (up->capabilities & UART_CAP_UUE)
4702
+ serial_out(up, UART_IER, UART_IER_UUE);
4704
+ serial_out(up, UART_IER, 0);
4706
+ uart_console_write(&up->port, buf, count, serial8250_console_putchar);
4709
+ * Finally, wait for transmitter to become empty
4710
+ * and restore the IER
4712
+ wait_for_xmitr(up, BOTH_EMPTY);
4713
+ serial_out(up, UART_IER, ier);
4715
+ local_irq_restore_hw(flags);
4720
static struct platform_driver serial8250_isa_driver = {
4721
.probe = serial8250_probe,
4722
.remove = __devexit_p(serial8250_remove),
4723
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
4724
index b2ba2fc..ed01ab9 100644
4725
--- a/include/asm-generic/cmpxchg-local.h
4726
+++ b/include/asm-generic/cmpxchg-local.h
4727
@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
4728
if (size == 8 && sizeof(unsigned long) != 8)
4729
wrong_size_cmpxchg(ptr);
4731
- local_irq_save(flags);
4732
+ local_irq_save_hw(flags);
4734
case 1: prev = *(u8 *)ptr;
4736
@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
4738
wrong_size_cmpxchg(ptr);
4740
- local_irq_restore(flags);
4741
+ local_irq_restore_hw(flags);
4745
@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
4747
unsigned long flags;
4749
- local_irq_save(flags);
4750
+ local_irq_save_hw(flags);
4754
- local_irq_restore(flags);
4755
+ local_irq_restore_hw(flags);
4759
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
4760
index 181006c..bc69d94 100644
4761
--- a/include/linux/hardirq.h
4762
+++ b/include/linux/hardirq.h
4763
@@ -161,7 +161,22 @@ extern void irq_enter(void);
4765
extern void irq_exit(void);
4767
-#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
4768
-#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
4769
+#define nmi_enter() \
4771
+ ipipe_nmi_enter(); \
4772
+ if (ipipe_root_domain_p) { \
4778
+#define nmi_exit() \
4780
+ if (ipipe_root_domain_p) { \
4784
+ ipipe_nmi_exit(); \
4787
#endif /* LINUX_HARDIRQ_H */
4788
diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h
4789
new file mode 100644
4790
index 0000000..2b12117
4792
+++ b/include/linux/ipipe.h
4795
+ * include/linux/ipipe.h
4797
+ * Copyright (C) 2002-2007 Philippe Gerum.
4799
+ * This program is free software; you can redistribute it and/or modify
4800
+ * it under the terms of the GNU General Public License as published by
4801
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
4802
+ * USA; either version 2 of the License, or (at your option) any later
4805
+ * This program is distributed in the hope that it will be useful,
4806
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4807
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4808
+ * GNU General Public License for more details.
4810
+ * You should have received a copy of the GNU General Public License
4811
+ * along with this program; if not, write to the Free Software
4812
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
4815
+#ifndef __LINUX_IPIPE_H
4816
+#define __LINUX_IPIPE_H
4818
+#include <linux/spinlock.h>
4819
+#include <linux/cache.h>
4820
+#include <linux/percpu.h>
4821
+#include <linux/mutex.h>
4822
+#include <linux/linkage.h>
4823
+#include <linux/ipipe_base.h>
4824
+#include <linux/ipipe_compat.h>
4825
+#include <asm/ipipe.h>
4827
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
4829
+#include <linux/cpumask.h>
4830
+#include <asm/system.h>
4832
+static inline int ipipe_disable_context_check(int cpu)
4834
+ return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0);
4837
+static inline void ipipe_restore_context_check(int cpu, int old_state)
4839
+ per_cpu(ipipe_percpu_context_check, cpu) = old_state;
4842
+static inline void ipipe_context_check_off(void)
4845
+ for_each_online_cpu(cpu)
4846
+ per_cpu(ipipe_percpu_context_check, cpu) = 0;
4849
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
4851
+static inline int ipipe_disable_context_check(int cpu)
4856
+static inline void ipipe_restore_context_check(int cpu, int old_state) { }
4858
+static inline void ipipe_context_check_off(void) { }
4860
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
4862
+#ifdef CONFIG_IPIPE
4865
+ * Sanity check: IPIPE_VIRQ_BASE depends on CONFIG_NR_CPUS, and if the
4866
+ * latter gets too large, we fail to map the virtual interrupts.
4868
+#if IPIPE_VIRQ_BASE / BITS_PER_LONG > BITS_PER_LONG
4869
+#error "CONFIG_NR_CPUS is too large, please lower it."
4872
+#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING
4873
+#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \
4874
+ (IPIPE_MINOR_NUMBER << 8) | \
4875
+ (IPIPE_PATCH_NUMBER))
4877
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
4878
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
4879
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
4880
+#endif /* !BUILTIN_RETURN_ADDRESS */
4882
+#define IPIPE_ROOT_PRIO 100
4883
+#define IPIPE_ROOT_ID 0
4884
+#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */
4886
+#define IPIPE_RESET_TIMER 0x1
4887
+#define IPIPE_GRAB_TIMER 0x2
4889
+/* Global domain flags */
4890
+#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */
4891
+#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */
4893
+/* Interrupt control bits */
4894
+#define IPIPE_HANDLE_FLAG 0
4895
+#define IPIPE_PASS_FLAG 1
4896
+#define IPIPE_ENABLE_FLAG 2
4897
+#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG
4898
+#define IPIPE_STICKY_FLAG 3
4899
+#define IPIPE_SYSTEM_FLAG 4
4900
+#define IPIPE_LOCK_FLAG 5
4901
+#define IPIPE_WIRED_FLAG 6
4902
+#define IPIPE_EXCLUSIVE_FLAG 7
4904
+#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG)
4905
+#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG)
4906
+#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG)
4907
+#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK
4908
+#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG)
4909
+#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG)
4910
+#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG)
4911
+#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG)
4912
+#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG)
4914
+#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK)
4915
+#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK)
4917
+#define IPIPE_EVENT_SELF 0x80000000
4919
+#define IPIPE_NR_CPUS NR_CPUS
4921
+#define ipipe_current_domain ipipe_cpu_var(ipipe_percpu_domain)
4923
+#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \
4924
+ (irq) < IPIPE_NR_IRQS)
4926
+#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1))
4930
+typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc);
4932
+typedef int (*ipipe_event_handler_t)(unsigned event,
4933
+ struct ipipe_domain *from,
4935
+struct ipipe_domain {
4937
+ int slot; /* Slot number in percpu domain data array. */
4938
+ struct list_head p_link; /* Link in pipeline */
4939
+ ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */
4940
+ unsigned long long evself; /* Self-monitored event bits. */
4943
+ unsigned long control;
4944
+ ipipe_irq_ackfn_t acknowledge;
4945
+ ipipe_irq_handler_t handler;
4947
+ } ____cacheline_aligned irqs[IPIPE_NR_IRQS];
4951
+ unsigned long flags;
4954
+ struct mutex mutex;
4957
+#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */
4959
+struct ipipe_domain_attr {
4961
+ unsigned domid; /* Domain identifier -- Magic value set by caller */
4962
+ const char *name; /* Domain name -- Warning: won't be dup'ed! */
4963
+ int priority; /* Priority in interrupt pipeline */
4964
+ void (*entry) (void); /* Domain entry point */
4965
+ void *pdd; /* Per-domain (opaque) data pointer */
4969
+/* These ops must start and complete on the same CPU: care for
4971
+#define set_bit_safe(b, a) \
4972
+ ({ unsigned long __flags; \
4973
+ local_irq_save_hw_notrace(__flags); \
4974
+ __set_bit(b, a); \
4975
+ local_irq_restore_hw_notrace(__flags); })
4976
+#define test_and_set_bit_safe(b, a) \
4977
+ ({ unsigned long __flags, __x; \
4978
+ local_irq_save_hw_notrace(__flags); \
4979
+ __x = __test_and_set_bit(b, a); \
4980
+ local_irq_restore_hw_notrace(__flags); __x; })
4981
+#define clear_bit_safe(b, a) \
4982
+ ({ unsigned long __flags; \
4983
+ local_irq_save_hw_notrace(__flags); \
4984
+ __clear_bit(b, a); \
4985
+ local_irq_restore_hw_notrace(__flags); })
4987
+#define set_bit_safe(b, a) set_bit(b, a)
4988
+#define test_and_set_bit_safe(b, a) test_and_set_bit(b, a)
4989
+#define clear_bit_safe(b, a) clear_bit(b, a)
4992
+#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie
4993
+#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler
4994
+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq]
4996
+extern unsigned __ipipe_printk_virq;
4998
+extern unsigned long __ipipe_virtual_irq_map;
5000
+extern struct list_head __ipipe_pipeline;
5002
+extern int __ipipe_event_monitors[];
5004
+/* Private interface */
5006
+void ipipe_init(void);
5008
+#ifdef CONFIG_PROC_FS
5009
+void ipipe_init_proc(void);
5011
+#ifdef CONFIG_IPIPE_TRACE
5012
+void __ipipe_init_tracer(void);
5013
+#else /* !CONFIG_IPIPE_TRACE */
5014
+#define __ipipe_init_tracer() do { } while(0)
5015
+#endif /* CONFIG_IPIPE_TRACE */
5017
+#else /* !CONFIG_PROC_FS */
5018
+#define ipipe_init_proc() do { } while(0)
5019
+#endif /* CONFIG_PROC_FS */
5021
+void __ipipe_init_stage(struct ipipe_domain *ipd);
5023
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd);
5025
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd);
5027
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd);
5029
+void __ipipe_flush_printk(unsigned irq, void *cookie);
5031
+void __ipipe_walk_pipeline(struct list_head *pos);
5033
+void __ipipe_pend_irq(unsigned irq, struct list_head *head);
5035
+int __ipipe_dispatch_event(unsigned event, void *data);
5037
+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq);
5039
+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq);
5041
+void __ipipe_sync_stage(unsigned long syncmask);
5043
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq);
5045
+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq);
5047
+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq);
5049
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end);
5051
+/* Must be called hw IRQs off. */
5052
+static inline void ipipe_irq_lock(unsigned irq)
5054
+ __ipipe_lock_irq(ipipe_current_domain, ipipe_processor_id(), irq);
5057
+/* Must be called hw IRQs off. */
5058
+static inline void ipipe_irq_unlock(unsigned irq)
5060
+ __ipipe_unlock_irq(ipipe_current_domain, irq);
5063
+#ifndef __ipipe_sync_pipeline
5064
+#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask)
5067
+#ifndef __ipipe_run_irqtail
5068
+#define __ipipe_run_irqtail() do { } while(0)
5071
+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next)
5074
+ * Keep the following as a macro, so that client code could check for
5075
+ * the support of the invariant pipeline head optimization.
5077
+#define __ipipe_pipeline_head() \
5078
+ list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link)
5080
+#define __ipipe_event_monitored_p(ev) \
5081
+ (__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL << ev)))
5085
+cpumask_t __ipipe_set_irq_affinity(unsigned irq,
5086
+ cpumask_t cpumask);
5088
+int __ipipe_send_ipi(unsigned ipi,
5089
+ cpumask_t cpumask);
5091
+#endif /* CONFIG_SMP */
5093
+#define ipipe_sigwake_notify(p) \
5095
+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \
5096
+ __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p); \
5099
+#define ipipe_exit_notify(p) \
5101
+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \
5102
+ __ipipe_dispatch_event(IPIPE_EVENT_EXIT,p); \
5105
+#define ipipe_setsched_notify(p) \
5107
+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \
5108
+ __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p); \
5111
+#define ipipe_schedule_notify(prev, next) \
5113
+ if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \
5114
+ __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \
5115
+ __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \
5118
+#define ipipe_trap_notify(ex, regs) \
5121
+ if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \
5122
+ ((current)->flags & PF_EVNOTIFY)) && \
5123
+ __ipipe_event_monitored_p(ex)) \
5124
+ ret = __ipipe_dispatch_event(ex, regs); \
5128
+static inline void ipipe_init_notify(struct task_struct *p)
5130
+ if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT))
5131
+ __ipipe_dispatch_event(IPIPE_EVENT_INIT,p);
5136
+static inline void ipipe_cleanup_notify(struct mm_struct *mm)
5138
+ if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP))
5139
+ __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm);
5142
+/* Public interface */
5144
+int ipipe_register_domain(struct ipipe_domain *ipd,
5145
+ struct ipipe_domain_attr *attr);
5147
+int ipipe_unregister_domain(struct ipipe_domain *ipd);
5149
+void ipipe_suspend_domain(void);
5151
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
5153
+ ipipe_irq_handler_t handler,
5155
+ ipipe_irq_ackfn_t acknowledge,
5156
+ unsigned modemask);
5158
+int ipipe_control_irq(unsigned irq,
5160
+ unsigned setmask);
5162
+unsigned ipipe_alloc_virq(void);
5164
+int ipipe_free_virq(unsigned virq);
5166
+int ipipe_trigger_irq(unsigned irq);
5168
+static inline void __ipipe_propagate_irq(unsigned irq)
5170
+ struct list_head *next = ipipe_current_domain->p_link.next;
5171
+ if (next == &ipipe_root.p_link) {
5172
+ /* Fast path: root must handle all interrupts. */
5173
+ __ipipe_set_irq_pending(&ipipe_root, irq);
5176
+ __ipipe_pend_irq(irq, next);
5179
+static inline void __ipipe_schedule_irq(unsigned irq)
5181
+ __ipipe_pend_irq(irq, &ipipe_current_domain->p_link);
5184
+static inline void __ipipe_schedule_irq_head(unsigned irq)
5186
+ __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq);
5189
+static inline void __ipipe_schedule_irq_root(unsigned irq)
5191
+ __ipipe_set_irq_pending(&ipipe_root, irq);
5194
+static inline void ipipe_propagate_irq(unsigned irq)
5196
+ unsigned long flags;
5198
+ local_irq_save_hw(flags);
5199
+ __ipipe_propagate_irq(irq);
5200
+ local_irq_restore_hw(flags);
5203
+static inline void ipipe_schedule_irq(unsigned irq)
5205
+ unsigned long flags;
5207
+ local_irq_save_hw(flags);
5208
+ __ipipe_schedule_irq(irq);
5209
+ local_irq_restore_hw(flags);
5212
+static inline void ipipe_schedule_irq_head(unsigned irq)
5214
+ unsigned long flags;
5216
+ local_irq_save_hw(flags);
5217
+ __ipipe_schedule_irq_head(irq);
5218
+ local_irq_restore_hw(flags);
5221
+static inline void ipipe_schedule_irq_root(unsigned irq)
5223
+ unsigned long flags;
5225
+ local_irq_save_hw(flags);
5226
+ __ipipe_schedule_irq_root(irq);
5227
+ local_irq_restore_hw(flags);
5230
+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd);
5232
+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd);
5234
+void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd);
5236
+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd);
5238
+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
5241
+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
5243
+ return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
5246
+static inline void ipipe_stall_pipeline_head(void)
5248
+ local_irq_disable_hw();
5249
+ __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
5252
+static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
5254
+ local_irq_disable_hw();
5255
+ return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status));
5258
+void ipipe_unstall_pipeline_head(void);
5260
+void __ipipe_restore_pipeline_head(unsigned long x);
5262
+static inline void ipipe_restore_pipeline_head(unsigned long x)
5264
+ /* On some archs, __test_and_set_bit() might return different
5265
+ * truth value than test_bit(), so we test the exclusive OR of
5266
+ * both statuses, assuming that the lowest bit is always set in
5267
+ * the truth value (if this is wrong, the failed optimization will
5268
+ * be caught in __ipipe_restore_pipeline_head() if
5269
+ * CONFIG_DEBUG_KERNEL is set). */
5270
+ if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1)
5271
+ __ipipe_restore_pipeline_head(x);
5274
+#define ipipe_unstall_pipeline() \
5275
+ ipipe_unstall_pipeline_from(ipipe_current_domain)
5277
+#define ipipe_test_and_unstall_pipeline() \
5278
+ ipipe_test_and_unstall_pipeline_from(ipipe_current_domain)
5280
+#define ipipe_test_pipeline() \
5281
+ ipipe_test_pipeline_from(ipipe_current_domain)
5283
+#define ipipe_test_and_stall_pipeline() \
5284
+ ipipe_test_and_stall_pipeline_from(ipipe_current_domain)
5286
+#define ipipe_stall_pipeline() \
5287
+ ipipe_stall_pipeline_from(ipipe_current_domain)
5289
+#define ipipe_restore_pipeline(x) \
5290
+ ipipe_restore_pipeline_from(ipipe_current_domain, (x))
5292
+void ipipe_init_attr(struct ipipe_domain_attr *attr);
5294
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
5296
+unsigned long ipipe_critical_enter(void (*syncfn) (void));
5298
+void ipipe_critical_exit(unsigned long flags);
5300
+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
5302
+ set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
5305
+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd)
5307
+ clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
5310
+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
5312
+ /* Must be called hw interrupts off. */
5313
+ __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
5316
+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
5318
+ /* Must be called hw interrupts off. */
5319
+ __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
5322
+#ifndef ipipe_safe_current
5323
+#define ipipe_safe_current() \
5325
+ struct task_struct *p; \
5326
+ p = test_bit(IPIPE_NOSTACK_FLAG, \
5327
+ &ipipe_this_cpudom_var(status)) ? &init_task : current; \
5332
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
5334
+ ipipe_event_handler_t handler);
5336
+cpumask_t ipipe_set_irq_affinity(unsigned irq,
5337
+ cpumask_t cpumask);
5339
+int ipipe_send_ipi(unsigned ipi,
5340
+ cpumask_t cpumask);
5342
+int ipipe_setscheduler_root(struct task_struct *p,
5346
+int ipipe_reenter_root(struct task_struct *prev,
5350
+int ipipe_alloc_ptdkey(void);
5352
+int ipipe_free_ptdkey(int key);
5354
+int ipipe_set_ptd(int key,
5357
+void *ipipe_get_ptd(int key);
5359
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
5361
+#define local_irq_enable_hw_cond() local_irq_enable_hw()
5362
+#define local_irq_disable_hw_cond() local_irq_disable_hw()
5363
+#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags)
5364
+#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags)
5366
+#define local_irq_enable_nohead(ipd) \
5368
+ if (!__ipipe_pipeline_head_p(ipd)) \
5369
+ local_irq_enable_hw(); \
5372
+#define local_irq_disable_nohead(ipd) \
5374
+ if (!__ipipe_pipeline_head_p(ipd)) \
5375
+ local_irq_disable_hw(); \
5378
+#define local_irq_save_full(vflags, rflags) \
5380
+ local_irq_save(vflags); \
5381
+ local_irq_save_hw(rflags); \
5384
+#define local_irq_restore_full(vflags, rflags) \
5386
+ local_irq_restore_hw(rflags); \
5387
+ local_irq_restore(vflags); \
5390
+static inline void local_irq_restore_nosync(unsigned long x)
5392
+ if (raw_irqs_disabled_flags(x))
5393
+ set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipipe_root_domain, status));
5395
+ clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipipe_root_domain, status));
5398
+#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain)
5400
+static inline void ipipe_nmi_enter(void)
5402
+ int cpu = ipipe_processor_id();
5404
+ per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status);
5405
+ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
5407
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5408
+ per_cpu(ipipe_saved_context_check_state, cpu) =
5409
+ ipipe_disable_context_check(cpu);
5410
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
5413
+static inline void ipipe_nmi_exit(void)
5415
+ int cpu = ipipe_processor_id();
5417
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5418
+ ipipe_restore_context_check
5419
+ (cpu, per_cpu(ipipe_saved_context_check_state, cpu));
5420
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
5422
+ if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu)))
5423
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
5426
+#else /* !CONFIG_IPIPE */
5428
+#define ipipe_init() do { } while(0)
5429
+#define ipipe_suspend_domain() do { } while(0)
5430
+#define ipipe_sigwake_notify(p) do { } while(0)
5431
+#define ipipe_setsched_notify(p) do { } while(0)
5432
+#define ipipe_init_notify(p) do { } while(0)
5433
+#define ipipe_exit_notify(p) do { } while(0)
5434
+#define ipipe_cleanup_notify(mm) do { } while(0)
5435
+#define ipipe_trap_notify(t,r) 0
5436
+#define ipipe_init_proc() do { } while(0)
5438
+static inline void __ipipe_pin_range_globally(unsigned long start,
5439
+ unsigned long end)
5443
+#define local_irq_enable_hw_cond() do { } while(0)
5444
+#define local_irq_disable_hw_cond() do { } while(0)
5445
+#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0)
5446
+#define local_irq_restore_hw_cond(flags) do { } while(0)
5448
+#define ipipe_irq_lock(irq) do { } while(0)
5449
+#define ipipe_irq_unlock(irq) do { } while(0)
5451
+#define ipipe_root_domain_p 1
5452
+#define ipipe_safe_current current
5453
+#define ipipe_processor_id() smp_processor_id()
5455
+#define ipipe_nmi_enter() do { } while (0)
5456
+#define ipipe_nmi_exit() do { } while (0)
5458
+#define local_irq_disable_head() local_irq_disable()
5460
+#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0)
5461
+#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0)
5462
+#define local_irq_restore_nosync(vflags) local_irq_restore(vflags)
5464
+#endif /* CONFIG_IPIPE */
5466
+#endif /* !__LINUX_IPIPE_H */
5467
diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h
5468
new file mode 100644
5469
index 0000000..c49fe14
5471
+++ b/include/linux/ipipe_base.h
5474
+ * include/linux/ipipe_base.h
5476
+ * Copyright (C) 2002-2007 Philippe Gerum.
5477
+ * 2007 Jan Kiszka.
5479
+ * This program is free software; you can redistribute it and/or modify
5480
+ * it under the terms of the GNU General Public License as published by
5481
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5482
+ * USA; either version 2 of the License, or (at your option) any later
5485
+ * This program is distributed in the hope that it will be useful,
5486
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5487
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5488
+ * GNU General Public License for more details.
5490
+ * You should have received a copy of the GNU General Public License
5491
+ * along with this program; if not, write to the Free Software
5492
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5495
+#ifndef __LINUX_IPIPE_BASE_H
5496
+#define __LINUX_IPIPE_BASE_H
5498
+#ifdef CONFIG_IPIPE
5500
+#include <linux/bitops.h>
5501
+#include <asm/ipipe_base.h>
5503
+/* Number of virtual IRQs */
5504
+#define IPIPE_NR_VIRQS BITS_PER_LONG
5505
+/* First virtual IRQ # */
5506
+#define IPIPE_VIRQ_BASE (((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG)
5507
+/* Total number of IRQ slots */
5508
+#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS)
5509
+/* Number of indirect words needed to map the whole IRQ space. */
5510
+#define IPIPE_IRQ_IWORDS ((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG)
5511
+#define IPIPE_IRQ_IMASK (BITS_PER_LONG - 1)
5512
+#define IPIPE_IRQMASK_ANY (~0L)
5513
+#define IPIPE_IRQMASK_VIRT (IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG))
5515
+/* Per-cpu pipeline status */
5516
+#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */
5517
+#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */
5518
+#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */
5520
+#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG)
5521
+#define IPIPE_SYNC_MASK (1L << IPIPE_SYNC_FLAG)
5522
+#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG)
5524
+typedef void (*ipipe_irq_handler_t)(unsigned irq,
5527
+extern struct ipipe_domain ipipe_root;
5529
+#define ipipe_root_domain (&ipipe_root)
5531
+void __ipipe_unstall_root(void);
5533
+void __ipipe_restore_root(unsigned long x);
5535
+#define ipipe_preempt_disable(flags) local_irq_save_hw(flags)
5536
+#define ipipe_preempt_enable(flags) local_irq_restore_hw(flags)
5538
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5539
+void ipipe_check_context(struct ipipe_domain *border_ipd);
5540
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
5541
+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { }
5542
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
5544
+/* Generic features */
5546
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
5547
+#define __IPIPE_FEATURE_REQUEST_TICKDEV 1
5549
+#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1
5550
+#define __IPIPE_FEATURE_FASTPEND_IRQ 1
5551
+#define __IPIPE_FEATURE_TRACE_EVENT 1
5553
+#else /* !CONFIG_IPIPE */
5554
+#define ipipe_preempt_disable(flags) do { \
5555
+ preempt_disable(); \
5558
+#define ipipe_preempt_enable(flags) preempt_enable()
5559
+#define ipipe_check_context(ipd) do { } while(0)
5560
+#endif /* CONFIG_IPIPE */
5562
+#endif /* !__LINUX_IPIPE_BASE_H */
5563
diff --git a/include/linux/ipipe_compat.h b/include/linux/ipipe_compat.h
5564
new file mode 100644
5565
index 0000000..50a245c
5567
+++ b/include/linux/ipipe_compat.h
5570
+ * include/linux/ipipe_compat.h
5572
+ * Copyright (C) 2007 Philippe Gerum.
5574
+ * This program is free software; you can redistribute it and/or modify
5575
+ * it under the terms of the GNU General Public License as published by
5576
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5577
+ * USA; either version 2 of the License, or (at your option) any later
5580
+ * This program is distributed in the hope that it will be useful,
5581
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5582
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5583
+ * GNU General Public License for more details.
5585
+ * You should have received a copy of the GNU General Public License
5586
+ * along with this program; if not, write to the Free Software
5587
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5590
+#ifndef __LINUX_IPIPE_COMPAT_H
5591
+#define __LINUX_IPIPE_COMPAT_H
5593
+#ifdef CONFIG_IPIPE_COMPAT
5595
+ * OBSOLETE: defined only for backward compatibility. Will be removed
5596
+ * in future releases, please update client code accordingly.
5600
+#define ipipe_declare_cpuid int cpuid
5601
+#define ipipe_load_cpuid() do { \
5602
+ cpuid = ipipe_processor_id(); \
5604
+#define ipipe_lock_cpu(flags) do { \
5605
+ local_irq_save_hw(flags); \
5606
+ cpuid = ipipe_processor_id(); \
5608
+#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags)
5609
+#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags)
5610
+#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags)
5611
+#else /* !CONFIG_SMP */
5612
+#define ipipe_declare_cpuid const int cpuid = 0
5613
+#define ipipe_load_cpuid() do { } while(0)
5614
+#define ipipe_lock_cpu(flags) local_irq_save_hw(flags)
5615
+#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags)
5616
+#define ipipe_get_cpu(flags) do { (void)(flags); } while(0)
5617
+#define ipipe_put_cpu(flags) do { } while(0)
5618
+#endif /* CONFIG_SMP */
5620
+#endif /* CONFIG_IPIPE_COMPAT */
5622
+#endif /* !__LINUX_IPIPE_COMPAT_H */
5623
diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h
5624
new file mode 100644
5625
index 0000000..2f41af3
5627
+++ b/include/linux/ipipe_percpu.h
5630
+ * include/linux/ipipe_percpu.h
5632
+ * Copyright (C) 2007 Philippe Gerum.
5634
+ * This program is free software; you can redistribute it and/or modify
5635
+ * it under the terms of the GNU General Public License as published by
5636
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5637
+ * USA; either version 2 of the License, or (at your option) any later
5640
+ * This program is distributed in the hope that it will be useful,
5641
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5642
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5643
+ * GNU General Public License for more details.
5645
+ * You should have received a copy of the GNU General Public License
5646
+ * along with this program; if not, write to the Free Software
5647
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5650
+#ifndef __LINUX_IPIPE_PERCPU_H
5651
+#define __LINUX_IPIPE_PERCPU_H
5653
+#include <asm/percpu.h>
5654
+#include <asm/ptrace.h>
5656
+struct ipipe_domain;
5658
+struct ipipe_percpu_domain_data {
5659
+ unsigned long status; /* <= Must be first in struct. */
5660
+ unsigned long irqpend_himask;
5661
+ unsigned long irqpend_lomask[IPIPE_IRQ_IWORDS];
5662
+ unsigned long irqheld_mask[IPIPE_IRQ_IWORDS];
5663
+ unsigned long irqall[IPIPE_NR_IRQS];
5668
+#define ipipe_percpudom_ptr(ipd, cpu) \
5669
+ (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot])
5670
+#define ipipe_cpudom_ptr(ipd) \
5671
+ (&__raw_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot])
5673
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]);
5674
+#define ipipe_percpudom_ptr(ipd, cpu) \
5675
+ (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot])
5676
+#define ipipe_cpudom_ptr(ipd) \
5677
+ (__raw_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot])
5679
+#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var)
5680
+#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var)
5682
+#define IPIPE_ROOT_SLOT 0
5683
+#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1)
5685
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
5687
+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
5689
+DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root);
5691
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
5692
+DECLARE_PER_CPU(int, ipipe_percpu_context_check);
5693
+DECLARE_PER_CPU(int, ipipe_saved_context_check_state);
5696
+#define ipipe_percpu(var, cpu) per_cpu(var, cpu)
5697
+#define ipipe_cpu_var(var) __raw_get_cpu_var(var)
5699
+#define ipipe_root_cpudom_ptr(var) \
5700
+ (&__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT])
5702
+#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var
5704
+#define ipipe_this_cpudom_var(var) \
5705
+ ipipe_cpudom_var(ipipe_current_domain, var)
5707
+#define ipipe_head_cpudom_ptr() \
5708
+ (&__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT])
5710
+#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var
5712
+#endif /* !__LINUX_IPIPE_PERCPU_H */
5713
diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h
5714
new file mode 100644
5715
index 0000000..4a1cb1b
5717
+++ b/include/linux/ipipe_tickdev.h
5720
+ * include/linux/ipipe_tickdev.h
5722
+ * Copyright (C) 2007 Philippe Gerum.
5724
+ * This program is free software; you can redistribute it and/or modify
5725
+ * it under the terms of the GNU General Public License as published by
5726
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5727
+ * USA; either version 2 of the License, or (at your option) any later
5730
+ * This program is distributed in the hope that it will be useful,
5731
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5732
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5733
+ * GNU General Public License for more details.
5735
+ * You should have received a copy of the GNU General Public License
5736
+ * along with this program; if not, write to the Free Software
5737
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5740
+#ifndef __LINUX_IPIPE_TICKDEV_H
5741
+#define __LINUX_IPIPE_TICKDEV_H
5743
+#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS)
5745
+#include <linux/clockchips.h>
5747
+struct tick_device;
5749
+struct ipipe_tick_device {
5751
+ void (*emul_set_mode)(enum clock_event_mode,
5752
+ struct clock_event_device *cdev);
5753
+ int (*emul_set_tick)(unsigned long delta,
5754
+ struct clock_event_device *cdev);
5755
+ void (*real_set_mode)(enum clock_event_mode mode,
5756
+ struct clock_event_device *cdev);
5757
+ int (*real_set_tick)(unsigned long delta,
5758
+ struct clock_event_device *cdev);
5759
+ struct tick_device *slave;
5760
+ unsigned long real_max_delta_ns;
5761
+ unsigned long real_mult;
5765
+int ipipe_request_tickdev(const char *devname,
5766
+ void (*emumode)(enum clock_event_mode mode,
5767
+ struct clock_event_device *cdev),
5768
+ int (*emutick)(unsigned long evt,
5769
+ struct clock_event_device *cdev),
5770
+ int cpu, unsigned long *tmfreq);
5772
+void ipipe_release_tickdev(int cpu);
5774
+#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */
5776
+#endif /* !__LINUX_IPIPE_TICKDEV_H */
5777
diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h
5778
new file mode 100644
5779
index 0000000..627b354
5781
+++ b/include/linux/ipipe_trace.h
5784
+ * include/linux/ipipe_trace.h
5786
+ * Copyright (C) 2005 Luotao Fu.
5787
+ * 2005-2007 Jan Kiszka.
5789
+ * This program is free software; you can redistribute it and/or modify
5790
+ * it under the terms of the GNU General Public License as published by
5791
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
5792
+ * USA; either version 2 of the License, or (at your option) any later
5795
+ * This program is distributed in the hope that it will be useful,
5796
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5797
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5798
+ * GNU General Public License for more details.
5800
+ * You should have received a copy of the GNU General Public License
5801
+ * along with this program; if not, write to the Free Software
5802
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
5805
+#ifndef _LINUX_IPIPE_TRACE_H
5806
+#define _LINUX_IPIPE_TRACE_H
5808
+#ifdef CONFIG_IPIPE_TRACE
5810
+#include <linux/types.h>
5812
+void ipipe_trace_begin(unsigned long v);
5813
+void ipipe_trace_end(unsigned long v);
5814
+void ipipe_trace_freeze(unsigned long v);
5815
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
5816
+void ipipe_trace_pid(pid_t pid, short prio);
5817
+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc);
5818
+int ipipe_trace_max_reset(void);
5819
+int ipipe_trace_frozen_reset(void);
5821
+#else /* !CONFIG_IPIPE_TRACE */
5823
+#define ipipe_trace_begin(v) do { (void)(v); } while(0)
5824
+#define ipipe_trace_end(v) do { (void)(v); } while(0)
5825
+#define ipipe_trace_freeze(v) do { (void)(v); } while(0)
5826
+#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0)
5827
+#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0)
5828
+#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0)
5829
+#define ipipe_trace_max_reset() do { } while(0)
5830
+#define ipipe_trace_froze_reset() do { } while(0)
5832
+#endif /* !CONFIG_IPIPE_TRACE */
5834
+#ifdef CONFIG_IPIPE_TRACE_PANIC
5835
+void ipipe_trace_panic_freeze(void);
5836
+void ipipe_trace_panic_dump(void);
5838
+static inline void ipipe_trace_panic_freeze(void) { }
5839
+static inline void ipipe_trace_panic_dump(void) { }
5842
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
5843
+#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq)
5844
+#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq)
5845
+#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL)
5846
+#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL)
5848
+#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0)
5849
+#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0)
5850
+#define ipipe_trace_irqsoff() do { } while(0)
5851
+#define ipipe_trace_irqson() do { } while(0)
5854
+#endif /* !__LINUX_IPIPE_TRACE_H */
5855
diff --git a/include/linux/irq.h b/include/linux/irq.h
5856
index 3dddfa7..6ce9115 100644
5857
--- a/include/linux/irq.h
5858
+++ b/include/linux/irq.h
5859
@@ -114,6 +114,9 @@ struct irq_chip {
5861
void (*end)(unsigned int irq);
5862
void (*set_affinity)(unsigned int irq, cpumask_t dest);
5863
+#ifdef CONFIG_IPIPE
5864
+ void (*move)(unsigned int irq);
5865
+#endif /* CONFIG_IPIPE */
5866
int (*retrigger)(unsigned int irq);
5867
int (*set_type)(unsigned int irq, unsigned int flow_type);
5868
int (*set_wake)(unsigned int irq, unsigned int on);
5869
@@ -153,6 +156,12 @@ struct irq_chip {
5870
* @name: flow handler name for /proc/interrupts output
5873
+#ifdef CONFIG_IPIPE
5874
+ void (*ipipe_ack)(unsigned int irq,
5875
+ struct irq_desc *desc);
5876
+ void (*ipipe_end)(unsigned int irq,
5877
+ struct irq_desc *desc);
5878
+#endif /* CONFIG_IPIPE */
5880
irq_flow_handler_t handle_irq;
5881
struct irq_chip *chip;
5882
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
5883
index dc7e0d0..88d5034 100644
5884
--- a/include/linux/kernel.h
5885
+++ b/include/linux/kernel.h
5887
#include <linux/compiler.h>
5888
#include <linux/bitops.h>
5889
#include <linux/log2.h>
5890
+#include <linux/ipipe_base.h>
5891
#include <linux/typecheck.h>
5892
#include <linux/ratelimit.h>
5893
#include <linux/dynamic_printk.h>
5894
@@ -111,9 +112,12 @@ struct user;
5896
#ifdef CONFIG_PREEMPT_VOLUNTARY
5897
extern int _cond_resched(void);
5898
-# define might_resched() _cond_resched()
5899
+# define might_resched() do { \
5900
+ ipipe_check_context(ipipe_root_domain); \
5901
+ _cond_resched(); \
5904
-# define might_resched() do { } while (0)
5905
+# define might_resched() ipipe_check_context(ipipe_root_domain)
5908
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
5909
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
5910
index 9fd1f85..8018d53 100644
5911
--- a/include/linux/linkage.h
5912
+++ b/include/linux/linkage.h
5914
#define ATTRIB_NORET __attribute__((noreturn))
5915
#define NORET_AND noreturn,
5918
+#define notrace __attribute__((no_instrument_function))
5922
diff --git a/include/linux/mm.h b/include/linux/mm.h
5923
index 5c7f5f5..3bd32c2 100644
5924
--- a/include/linux/mm.h
5925
+++ b/include/linux/mm.h
5926
@@ -115,6 +115,8 @@ extern unsigned int kobjsize(const void *objp);
5927
#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
5928
#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
5930
+#define VM_PINNED 0x40000000 /* Disable faults for the vma */
5932
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
5933
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
5935
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
5936
index 72b1a10..80553be 100644
5937
--- a/include/linux/preempt.h
5938
+++ b/include/linux/preempt.h
5940
#include <linux/thread_info.h>
5941
#include <linux/linkage.h>
5942
#include <linux/list.h>
5943
+#include <linux/ipipe_base.h>
5945
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
5946
extern void add_preempt_count(int val);
5947
extern void sub_preempt_count(int val);
5949
-# define add_preempt_count(val) do { preempt_count() += (val); } while (0)
5950
-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0)
5951
+# define add_preempt_count(val) do { \
5952
+ ipipe_check_context(ipipe_root_domain); \
5953
+ preempt_count() += (val); \
5955
+# define sub_preempt_count(val) do { \
5956
+ ipipe_check_context(ipipe_root_domain); \
5957
+ preempt_count() -= (val); \
5961
#define inc_preempt_count() add_preempt_count(1)
5962
diff --git a/include/linux/sched.h b/include/linux/sched.h
5963
index 3883c32..7a30b68 100644
5964
--- a/include/linux/sched.h
5965
+++ b/include/linux/sched.h
5966
@@ -59,6 +59,7 @@ struct sched_param {
5967
#include <linux/errno.h>
5968
#include <linux/nodemask.h>
5969
#include <linux/mm_types.h>
5970
+#include <linux/ipipe.h>
5972
#include <asm/system.h>
5973
#include <asm/page.h>
5974
@@ -181,6 +182,13 @@ extern unsigned long long time_sync_thresh;
5975
/* in tsk->state again */
5976
#define TASK_DEAD 64
5977
#define TASK_WAKEKILL 128
5978
+#ifdef CONFIG_IPIPE
5979
+#define TASK_ATOMICSWITCH 512
5980
+#define TASK_NOWAKEUP 1024
5981
+#else /* !CONFIG_IPIPE */
5982
+#define TASK_ATOMICSWITCH 0
5983
+#define TASK_NOWAKEUP 0
5984
+#endif /* CONFIG_IPIPE */
5986
/* Convenience macros for the sake of set_task_state */
5987
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
5988
@@ -289,6 +297,15 @@ extern void account_process_tick(struct task_struct *task, int user);
5989
extern void update_process_times(int user);
5990
extern void scheduler_tick(void);
5992
+#ifdef CONFIG_IPIPE
5993
+extern void update_root_process_times(struct pt_regs *regs);
5994
+#else /* !CONFIG_IPIPE */
5995
+static inline void update_root_process_times(struct pt_regs *regs)
5997
+ update_process_times(user_mode(regs));
5999
+#endif /* CONFIG_IPIPE */
6001
extern void sched_show_task(struct task_struct *p);
6003
#ifdef CONFIG_DETECT_SOFTLOCKUP
6004
@@ -1331,6 +1348,9 @@ struct task_struct {
6006
atomic_t fs_excl; /* holding fs exclusive resources */
6007
struct rcu_head rcu;
6008
+#ifdef CONFIG_IPIPE
6009
+ void *ptd[IPIPE_ROOT_NPTDKEYS];
6013
* cache last used pipe for splice
6014
@@ -1542,6 +1562,11 @@ extern cputime_t task_gtime(struct task_struct *p);
6015
#define PF_EXITING 0x00000004 /* getting shut down */
6016
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
6017
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
6018
+#ifdef CONFIG_IPIPE
6019
+#define PF_EVNOTIFY 0x00000020 /* Notify other domains about internal events */
6021
+#define PF_EVNOTIFY 0
6022
+#endif /* CONFIG_IPIPE */
6023
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
6024
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
6025
#define PF_DUMPCORE 0x00000200 /* dumped core */
6026
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
6027
index e0c0fcc..2044388 100644
6028
--- a/include/linux/spinlock.h
6029
+++ b/include/linux/spinlock.h
6030
@@ -90,10 +90,14 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
6031
# include <linux/spinlock_up.h>
6035
+#define TYPE_EQUAL(lock, type) \
6036
+ __builtin_types_compatible_p(typeof(lock), type *)
6038
#ifdef CONFIG_DEBUG_SPINLOCK
6039
extern void __spin_lock_init(spinlock_t *lock, const char *name,
6040
struct lock_class_key *key);
6041
-# define spin_lock_init(lock) \
6042
+# define _spin_lock_init(lock) \
6044
static struct lock_class_key __key; \
6046
@@ -101,10 +105,21 @@ do { \
6050
-# define spin_lock_init(lock) \
6051
+# define _spin_lock_init(lock) \
6052
do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
6055
+# define spin_lock_init(lock) \
6057
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \
6059
+ IPIPE_DEFINE_SPINLOCK(__lock__); \
6060
+ *((ipipe_spinlock_t *)lock) = __lock__; \
6063
+ _spin_lock_init((spinlock_t *)lock); \
6066
#ifdef CONFIG_DEBUG_SPINLOCK
6067
extern void __rwlock_init(rwlock_t *lock, const char *name,
6068
struct lock_class_key *key);
6069
@@ -179,7 +194,94 @@ do { \
6070
#define read_trylock(lock) __cond_lock(lock, _read_trylock(lock))
6071
#define write_trylock(lock) __cond_lock(lock, _write_trylock(lock))
6073
-#define spin_lock(lock) _spin_lock(lock)
6074
+#define PICK_SPINOP(op, lock) \
6076
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \
6077
+ __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6078
+ else if (TYPE_EQUAL(lock, spinlock_t)) \
6079
+ _spin##op((spinlock_t *)(lock)); \
6082
+#define PICK_SPINOP_RAW(op, lock) \
6084
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \
6085
+ __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6086
+ else if (TYPE_EQUAL(lock, spinlock_t)) \
6087
+ __raw_spin##op(&((spinlock_t *)(lock))->raw_lock); \
6090
+#define PICK_SPINLOCK_IRQ(lock) \
6092
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6093
+ __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6094
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6095
+ _spin_lock_irq((spinlock_t *)(lock)); \
6098
+#define PICK_SPINUNLOCK_IRQ(lock) \
6100
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6101
+ __ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6102
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6103
+ _spin_unlock_irq((spinlock_t *)(lock)); \
6106
+#define PICK_SPINLOCK_IRQ_RAW(lock) \
6108
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6109
+ __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6110
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6111
+ local_irq_disable(); \
6112
+ __raw_spin_lock(&((spinlock_t *)(lock))->raw_lock); \
6115
+#define PICK_SPINUNLOCK_IRQ_RAW(lock) \
6117
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6118
+ __ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6119
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6120
+ __raw_spin_unlock(&((spinlock_t *)(lock))->raw_lock); \
6121
+ local_irq_enable(); \
6124
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
6125
+extern int __bad_spinlock_type(void);
6127
+#define PICK_SPINLOCK_IRQSAVE(lock, flags) \
6129
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6130
+ (flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6131
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6132
+ flags = _spin_lock_irqsave((spinlock_t *)(lock)); \
6133
+ else __bad_spinlock_type(); \
6135
+#define PICK_SPINLOCK_IRQSAVE_NESTED(lock, flags, subclass) \
6137
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6138
+ (flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6139
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6140
+ flags = _spin_lock_irqsave_nested((spinlock_t *)(lock), subclass); \
6141
+ else __bad_spinlock_type(); \
6144
+#define PICK_SPINLOCK_IRQSAVE(lock, flags) \
6146
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6147
+ (flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
6148
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6149
+ _spin_lock_irqsave((spinlock_t *)(lock), flags); \
6153
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \
6155
+ if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \
6156
+ __ipipe_spin_unlock_irqrestore(&((__ipipe_spinlock_t *)(lock))->__raw_lock, flags); \
6157
+ } else if (TYPE_EQUAL(lock, spinlock_t)) \
6158
+ _spin_unlock_irqrestore((spinlock_t *)(lock), flags); \
6161
+#define spin_lock(lock) PICK_SPINOP(_lock, lock)
6163
#ifdef CONFIG_DEBUG_LOCK_ALLOC
6164
# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
6165
@@ -201,7 +303,7 @@ do { \
6166
#define spin_lock_irqsave(lock, flags) \
6168
typecheck(unsigned long, flags); \
6169
- flags = _spin_lock_irqsave(lock); \
6170
+ PICK_SPINLOCK_IRQSAVE(lock, flags); \
6172
#define read_lock_irqsave(lock, flags) \
6174
@@ -218,13 +320,13 @@ do { \
6175
#define spin_lock_irqsave_nested(lock, flags, subclass) \
6177
typecheck(unsigned long, flags); \
6178
- flags = _spin_lock_irqsave_nested(lock, subclass); \
6179
+ PICK_SPINLOCK_IRQSAVE_NESTED(lock, flags, subclass); \
6182
#define spin_lock_irqsave_nested(lock, flags, subclass) \
6184
typecheck(unsigned long, flags); \
6185
- flags = _spin_lock_irqsave(lock); \
6186
+ PICK_SPINLOCK_IRQSAVE(lock, flags); \
6190
@@ -233,7 +335,7 @@ do { \
6191
#define spin_lock_irqsave(lock, flags) \
6193
typecheck(unsigned long, flags); \
6194
- _spin_lock_irqsave(lock, flags); \
6195
+ PICK_SPINLOCK_IRQSAVE(lock, flags); \
6197
#define read_lock_irqsave(lock, flags) \
6199
@@ -250,7 +352,7 @@ do { \
6203
-#define spin_lock_irq(lock) _spin_lock_irq(lock)
6204
+#define spin_lock_irq(lock) PICK_SPINLOCK_IRQ(lock)
6205
#define spin_lock_bh(lock) _spin_lock_bh(lock)
6207
#define read_lock_irq(lock) _read_lock_irq(lock)
6208
@@ -264,32 +366,40 @@ do { \
6210
#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
6211
!defined(CONFIG_SMP)
6212
-# define spin_unlock(lock) _spin_unlock(lock)
6213
+#define spin_unlock(lock) PICK_SPINOP(_unlock, lock)
6214
# define read_unlock(lock) _read_unlock(lock)
6215
# define write_unlock(lock) _write_unlock(lock)
6216
-# define spin_unlock_irq(lock) _spin_unlock_irq(lock)
6217
-# define read_unlock_irq(lock) _read_unlock_irq(lock)
6218
-# define write_unlock_irq(lock) _write_unlock_irq(lock)
6219
+# define spin_unlock_irq(lock) PICK_SPINUNLOCK_IRQ(lock)
6220
+# define read_unlock_irq(lock) _read_unlock_irq(lock)
6221
+# define write_unlock_irq(lock) _write_unlock_irq(lock)
6223
-# define spin_unlock(lock) \
6224
- do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0)
6225
-# define read_unlock(lock) \
6226
- do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0)
6227
-# define write_unlock(lock) \
6228
- do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0)
6229
-# define spin_unlock_irq(lock) \
6230
+# define spin_unlock(lock) \
6232
- __raw_spin_unlock(&(lock)->raw_lock); \
6233
+ PICK_SPINOP_RAW(_unlock, lock); \
6234
+ __release(lock); \
6236
+# define read_unlock(lock) \
6238
+ __raw_read_unlock(&(lock)->raw_lock); \
6240
- local_irq_enable(); \
6242
-# define read_unlock_irq(lock) \
6243
+# define write_unlock(lock) \
6245
+ __raw_write_unlock(&(lock)->raw_lock); \
6246
+ __release(lock); \
6248
+# define spin_unlock_irq(lock) \
6250
+ PICK_SPINUNLOCK_IRQ_RAW(lock); \
6251
+ __release(lock); \
6253
+# define read_unlock_irq(lock) \
6255
__raw_read_unlock(&(lock)->raw_lock); \
6257
local_irq_enable(); \
6259
-# define write_unlock_irq(lock) \
6260
+# define write_unlock_irq(lock) \
6262
__raw_write_unlock(&(lock)->raw_lock); \
6264
@@ -300,7 +410,7 @@ do { \
6265
#define spin_unlock_irqrestore(lock, flags) \
6267
typecheck(unsigned long, flags); \
6268
- _spin_unlock_irqrestore(lock, flags); \
6269
+ PICK_SPINUNLOCK_IRQRESTORE(lock, flags); \
6271
#define spin_unlock_bh(lock) _spin_unlock_bh(lock)
6273
@@ -364,4 +474,29 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
6275
#define spin_can_lock(lock) (!spin_is_locked(lock))
6277
+#ifdef CONFIG_IPIPE
6278
+void __ipipe_spin_lock_irq(raw_spinlock_t *lock);
6279
+void __ipipe_spin_unlock_irq(raw_spinlock_t *lock);
6280
+unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock);
6281
+void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock,
6283
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
6284
+void __ipipe_spin_unlock_irqcomplete(unsigned long x);
6285
+#define spin_lock_irqsave_cond(lock, flags) \
6286
+ spin_lock_irqsave(lock, flags)
6287
+#define spin_unlock_irqrestore_cond(lock, flags) \
6288
+ spin_unlock_irqrestore(lock, flags)
6290
+#define spin_lock_irqsave_cond(lock, flags) \
6291
+ do { (void)(flags); spin_lock(lock); } while(0)
6292
+#define spin_unlock_irqrestore_cond(lock, flags) \
6294
+#define __ipipe_spin_lock_irq(lock) do { } while(0)
6295
+#define __ipipe_spin_unlock_irq(lock) do { } while(0)
6296
+#define __ipipe_spin_lock_irqsave(lock) 0
6297
+#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while(0)
6298
+#define __ipipe_spin_unlock_irqbegin(lock) do { } while(0)
6299
+#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while(0)
6302
#endif /* __LINUX_SPINLOCK_H */
6303
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
6304
index 68d88f7..f5ce3c4 100644
6305
--- a/include/linux/spinlock_types.h
6306
+++ b/include/linux/spinlock_types.h
6307
@@ -31,6 +31,10 @@ typedef struct {
6312
+ raw_spinlock_t __raw_lock;
6313
+} __ipipe_spinlock_t;
6315
#define SPINLOCK_MAGIC 0xdead4ead
6318
@@ -92,9 +96,21 @@ typedef struct {
6319
* __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate.
6321
#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
6322
+#define IPIPE_SPIN_LOCK_UNLOCKED \
6323
+ (__ipipe_spinlock_t) { .__raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
6324
#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
6326
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
6327
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
6329
+#ifdef CONFIG_IPIPE
6330
+# define ipipe_spinlock_t __ipipe_spinlock_t
6331
+# define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
6332
+# define IPIPE_DECLARE_SPINLOCK(x) extern ipipe_spinlock_t x
6334
+# define ipipe_spinlock_t spinlock_t
6335
+# define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x)
6336
+# define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x
6339
#endif /* __LINUX_SPINLOCK_TYPES_H */
6340
diff --git a/init/Kconfig b/init/Kconfig
6341
index 83b6905..27ccba5 100644
6344
@@ -73,6 +73,7 @@ config INIT_ENV_ARG_LIMIT
6347
string "Local version - append to kernel release"
6350
Append an extra string to the end of your kernel version.
6351
This will show up when you type uname, for example.
6352
diff --git a/init/main.c b/init/main.c
6353
index 7e117a2..43474f4 100644
6356
@@ -555,7 +555,7 @@ asmlinkage void __init start_kernel(void)
6357
debug_objects_early_init();
6358
cgroup_init_early();
6360
- local_irq_disable();
6361
+ local_irq_disable_hw();
6362
early_boot_irqs_off();
6363
early_init_irq_lock_class();
6365
@@ -611,6 +611,11 @@ asmlinkage void __init start_kernel(void)
6370
+ * We need to wait for the interrupt and time subsystems to be
6371
+ * initialized before enabling the pipeline.
6375
if (!irqs_disabled())
6376
printk("start_kernel(): bug: interrupts were enabled early\n");
6377
@@ -772,6 +777,7 @@ static void __init do_basic_setup(void)
6378
usermodehelper_init();
6381
+ ipipe_init_proc();
6385
diff --git a/kernel/Makefile b/kernel/Makefile
6386
index 19fad00..e36e4e3 100644
6387
--- a/kernel/Makefile
6388
+++ b/kernel/Makefile
6389
@@ -79,6 +79,7 @@ ifeq ($(CONFIG_PREEMPT_RCU),y)
6390
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
6392
obj-$(CONFIG_RELAY) += relay.o
6393
+obj-$(CONFIG_IPIPE) += ipipe/
6394
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
6395
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
6396
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
6397
diff --git a/kernel/exit.c b/kernel/exit.c
6398
index 10e393b..c74548a 100644
6401
@@ -1072,6 +1072,7 @@ NORET_TYPE void do_exit(long code)
6403
trace_sched_process_exit(tsk);
6405
+ ipipe_exit_notify(tsk);
6409
diff --git a/kernel/fork.c b/kernel/fork.c
6410
index 8f753e5..d91e8d8 100644
6413
@@ -468,6 +468,7 @@ void mmput(struct mm_struct *mm)
6414
if (atomic_dec_and_test(&mm->mm_users)) {
6417
+ ipipe_cleanup_notify(mm);
6418
set_mm_exe_file(mm, NULL);
6419
if (!list_empty(&mm->mmlist)) {
6420
spin_lock(&mmlist_lock);
6421
@@ -887,7 +888,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
6423
unsigned long new_flags = p->flags;
6425
- new_flags &= ~PF_SUPERPRIV;
6426
+ new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY);
6427
new_flags |= PF_FORKNOEXEC;
6428
new_flags |= PF_STARTING;
6429
p->flags = new_flags;
6430
@@ -1271,6 +1272,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
6431
write_unlock_irq(&tasklist_lock);
6432
proc_fork_connector(p);
6433
cgroup_post_fork(p);
6434
+#ifdef CONFIG_IPIPE
6435
+ memset(p->ptd, 0, sizeof(p->ptd));
6436
+#endif /* CONFIG_IPIPE */
6440
diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig
6441
new file mode 100644
6442
index 0000000..de5e6a3
6444
+++ b/kernel/ipipe/Kconfig
6447
+ bool "Interrupt pipeline"
6450
+ Activate this option if you want the interrupt pipeline to be
6453
+config IPIPE_DOMAINS
6458
+ The maximum number of I-pipe domains to run concurrently.
6460
+config IPIPE_COMPAT
6461
+ bool "Maintain code compatibility with older releases"
6465
+ Activate this option if you want the compatibility code to be
6466
+ defined, so that older I-pipe clients may use obsolete
6467
+ constructs. WARNING: obsolete code will be eventually
6468
+ deprecated in future I-pipe releases, and removed from the
6469
+ compatibility support as time passes. Please fix I-pipe
6470
+ clients to get rid of such uses as soon as possible.
6472
+config IPIPE_DELAYED_ATOMICSW
6477
+config IPIPE_UNMASKED_CONTEXT_SWITCH
6481
diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug
6482
new file mode 100644
6483
index 0000000..48dae0e
6485
+++ b/kernel/ipipe/Kconfig.debug
6488
+ bool "I-pipe debugging"
6491
+config IPIPE_DEBUG_CONTEXT
6492
+ bool "Check for illicit cross-domain calls"
6493
+ depends on IPIPE_DEBUG
6496
+ Enable this feature to arm checkpoints in the kernel that
6497
+ verify the correct invocation context. On entry of critical
6498
+ Linux services a warning is issued if the caller is not
6499
+ running over the root domain.
6502
+ bool "Latency tracing"
6503
+ depends on IPIPE_DEBUG && !FUNCTION_TRACER
6504
+ select FRAME_POINTER
6508
+ Activate this option if you want to use per-function tracing of
6509
+ the kernel. The tracer will collect data via instrumentation
6510
+ features like the one below or with the help of explicite calls
6511
+ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
6512
+ in-kernel tracing API. The collected data and runtime control
6513
+ is available via /proc/ipipe/trace/*.
6517
+config IPIPE_TRACE_ENABLE
6518
+ bool "Enable tracing on boot"
6521
+ Disable this option if you want to arm the tracer after booting
6522
+ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
6523
+ boot time on slow embedded devices due to the tracer overhead.
6525
+config IPIPE_TRACE_MCOUNT
6526
+ bool "Instrument function entries"
6529
+ When enabled, records every kernel function entry in the tracer
6530
+ log. While this slows down the system noticeably, it provides
6531
+ the highest level of information about the flow of events.
6532
+ However, it can be switch off in order to record only explicit
6533
+ I-pipe trace points.
6535
+config IPIPE_TRACE_IRQSOFF
6536
+ bool "Trace IRQs-off times"
6539
+ Activate this option if I-pipe shall trace the longest path
6540
+ with hard-IRQs switched off.
6542
+config IPIPE_TRACE_SHIFT
6543
+ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
6547
+ The number of trace points to hold tracing data for each
6548
+ trace path, as a power of 2.
6550
+config IPIPE_TRACE_VMALLOC
6551
+ bool "Use vmalloc'ed trace buffer"
6552
+ default y if EMBEDDED
6554
+ Instead of reserving static kernel data, the required buffer
6555
+ is allocated via vmalloc during boot-up when this option is
6556
+ enabled. This can help to start systems that are low on memory,
6557
+ but it slightly degrades overall performance. Try this option
6558
+ when a traced kernel hangs unexpectedly at boot time.
6560
+config IPIPE_TRACE_PANIC
6561
+ bool "Enable panic back traces"
6564
+ Provides services to freeze and dump a back trace on panic
6565
+ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
6566
+ as well as ordinary kernel oopses. You can control the number
6567
+ of printed back trace points via /proc/ipipe/trace.
6569
+config IPIPE_TRACE_ENABLE_VALUE
6571
+ default 0 if !IPIPE_TRACE_ENABLE
6572
+ default 1 if IPIPE_TRACE_ENABLE
6575
diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile
6576
new file mode 100644
6577
index 0000000..6257dfa
6579
+++ b/kernel/ipipe/Makefile
6582
+obj-$(CONFIG_IPIPE) += core.o
6583
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
6584
diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c
6585
new file mode 100644
6586
index 0000000..97e6aae
6588
+++ b/kernel/ipipe/core.c
6591
+ * linux/kernel/ipipe/core.c
6593
+ * Copyright (C) 2002-2005 Philippe Gerum.
6595
+ * This program is free software; you can redistribute it and/or modify
6596
+ * it under the terms of the GNU General Public License as published by
6597
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
6598
+ * USA; either version 2 of the License, or (at your option) any later
6601
+ * This program is distributed in the hope that it will be useful,
6602
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6603
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6604
+ * GNU General Public License for more details.
6606
+ * You should have received a copy of the GNU General Public License
6607
+ * along with this program; if not, write to the Free Software
6608
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
6610
+ * Architecture-independent I-PIPE core support.
6613
+#include <linux/version.h>
6614
+#include <linux/module.h>
6615
+#include <linux/init.h>
6616
+#include <linux/kernel.h>
6617
+#include <linux/sched.h>
6618
+#include <linux/sched.h>
6619
+#include <linux/kallsyms.h>
6620
+#include <linux/interrupt.h>
6621
+#include <linux/bitops.h>
6622
+#include <linux/tick.h>
6623
+#include <linux/prefetch.h>
6624
+#ifdef CONFIG_PROC_FS
6625
+#include <linux/proc_fs.h>
6626
+#include <linux/seq_file.h>
6627
+#endif /* CONFIG_PROC_FS */
6628
+#include <linux/ipipe_trace.h>
6629
+#include <linux/ipipe_tickdev.h>
6630
+#include <linux/irq.h>
6632
+static int __ipipe_ptd_key_count;
6634
+static unsigned long __ipipe_ptd_key_map;
6636
+static unsigned long __ipipe_domain_slot_map;
6638
+struct ipipe_domain ipipe_root;
6642
+ * Create an alias to the unique root status, so that arch-dep code
6643
+ * may get simple and easy access to this percpu variable. We also
6644
+ * create an array of pointers to the percpu domain data; this tends
6645
+ * to produce a better code when reaching non-root domains. We make
6646
+ * sure that the early boot code would be able to dereference the
6647
+ * pointer to the root domain data safely by statically initializing
6648
+ * its value (local_irq*() routines depend on this).
6651
+extern unsigned long __ipipe_root_status
6652
+__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray)))));
6653
+EXPORT_SYMBOL(__ipipe_root_status);
6654
+#else /* __GNUC__ < 4 */
6656
+ * Work around a GCC 3.x issue making alias symbols unusable as
6657
+ * constant initializers.
6659
+unsigned long *const __ipipe_root_status_addr =
6660
+ &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status;
6661
+EXPORT_SYMBOL(__ipipe_root_status_addr);
6662
+#endif /* __GNUC__ < 4 */
6664
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
6665
+{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
6666
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
6667
+#endif /* !CONFIG_SMP */
6669
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
6670
+{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
6672
+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
6674
+DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */
6676
+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock);
6678
+LIST_HEAD(__ipipe_pipeline);
6680
+unsigned long __ipipe_virtual_irq_map;
6682
+#ifdef CONFIG_PRINTK
6683
+unsigned __ipipe_printk_virq;
6684
+#endif /* CONFIG_PRINTK */
6686
+int __ipipe_event_monitors[IPIPE_NR_EVENTS];
6688
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
6690
+DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
6692
+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device);
6694
+int ipipe_request_tickdev(const char *devname,
6695
+ void (*emumode)(enum clock_event_mode mode,
6696
+ struct clock_event_device *cdev),
6697
+ int (*emutick)(unsigned long delta,
6698
+ struct clock_event_device *cdev),
6699
+ int cpu, unsigned long *tmfreq)
6701
+ struct ipipe_tick_device *itd;
6702
+ struct tick_device *slave;
6703
+ struct clock_event_device *evtdev;
6704
+ unsigned long long freq;
6705
+ unsigned long flags;
6708
+ flags = ipipe_critical_enter(NULL);
6710
+ itd = &per_cpu(ipipe_tick_cpu_device, cpu);
6712
+ if (itd->slave != NULL) {
6717
+ slave = &per_cpu(tick_cpu_device, cpu);
6719
+ if (strcmp(slave->evtdev->name, devname)) {
6721
+ * No conflict so far with the current tick device,
6722
+ * check whether the requested device is sane and has
6723
+ * been blessed by the kernel.
6725
+ status = __ipipe_check_tickdev(devname) ?
6726
+ CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN;
6731
+ * Our caller asks for using the same clock event device for
6732
+ * ticking than we do, let's create a tick emulation device to
6733
+ * interpose on the set_next_event() method, so that we may
6734
+ * both manage the device in oneshot mode. Only the tick
6735
+ * emulation code will actually program the clockchip hardware
6736
+ * for the next shot, though.
6738
+ * CAUTION: we still have to grab the tick device even when it
6739
+ * current runs in periodic mode, since the kernel may switch
6740
+ * to oneshot dynamically (highres/no_hz tick mode).
6743
+ evtdev = slave->evtdev;
6744
+ status = evtdev->mode;
6746
+ if (status == CLOCK_EVT_MODE_SHUTDOWN)
6749
+ itd->slave = slave;
6750
+ itd->emul_set_mode = emumode;
6751
+ itd->emul_set_tick = emutick;
6752
+ itd->real_set_mode = evtdev->set_mode;
6753
+ itd->real_set_tick = evtdev->set_next_event;
6754
+ itd->real_max_delta_ns = evtdev->max_delta_ns;
6755
+ itd->real_mult = evtdev->mult;
6756
+ itd->real_shift = evtdev->shift;
6757
+ freq = (1000000000ULL * evtdev->mult) >> evtdev->shift;
6758
+ *tmfreq = (unsigned long)freq;
6759
+ evtdev->set_mode = emumode;
6760
+ evtdev->set_next_event = emutick;
6761
+ evtdev->max_delta_ns = ULONG_MAX;
6763
+ evtdev->shift = 0;
6765
+ ipipe_critical_exit(flags);
6770
+void ipipe_release_tickdev(int cpu)
6772
+ struct ipipe_tick_device *itd;
6773
+ struct tick_device *slave;
6774
+ struct clock_event_device *evtdev;
6775
+ unsigned long flags;
6777
+ flags = ipipe_critical_enter(NULL);
6779
+ itd = &per_cpu(ipipe_tick_cpu_device, cpu);
6781
+ if (itd->slave != NULL) {
6782
+ slave = &per_cpu(tick_cpu_device, cpu);
6783
+ evtdev = slave->evtdev;
6784
+ evtdev->set_mode = itd->real_set_mode;
6785
+ evtdev->set_next_event = itd->real_set_tick;
6786
+ evtdev->max_delta_ns = itd->real_max_delta_ns;
6787
+ evtdev->mult = itd->real_mult;
6788
+ evtdev->shift = itd->real_shift;
6789
+ itd->slave = NULL;
6792
+ ipipe_critical_exit(flags);
6795
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
6798
+ * ipipe_init() -- Initialization routine of the IPIPE layer. Called
6799
+ * by the host kernel early during the boot procedure.
6801
+void __init ipipe_init(void)
6803
+ struct ipipe_domain *ipd = &ipipe_root;
6805
+ __ipipe_check_platform(); /* Do platform dependent checks first. */
6808
+ * A lightweight registration code for the root domain. We are
6809
+ * running on the boot CPU, hw interrupts are off, and
6810
+ * secondary CPUs are still lost in space.
6813
+ /* Reserve percpu data slot #0 for the root domain. */
6815
+ set_bit(0, &__ipipe_domain_slot_map);
6817
+ ipd->name = "Linux";
6818
+ ipd->domid = IPIPE_ROOT_ID;
6819
+ ipd->priority = IPIPE_ROOT_PRIO;
6821
+ __ipipe_init_stage(ipd);
6823
+ INIT_LIST_HEAD(&ipd->p_link);
6824
+ list_add_tail(&ipd->p_link, &__ipipe_pipeline);
6826
+ __ipipe_init_platform();
6828
+#ifdef CONFIG_PRINTK
6829
+ __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */
6830
+ ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk;
6831
+ ipd->irqs[__ipipe_printk_virq].cookie = NULL;
6832
+ ipd->irqs[__ipipe_printk_virq].acknowledge = NULL;
6833
+ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
6834
+#endif /* CONFIG_PRINTK */
6836
+ __ipipe_enable_pipeline();
6838
+ printk(KERN_INFO "I-pipe %s: pipeline enabled.\n",
6839
+ IPIPE_VERSION_STRING);
6842
+void __ipipe_init_stage(struct ipipe_domain *ipd)
6846
+ for_each_online_cpu(cpu) {
6848
+ ipipe_percpudom(ipd, irqpend_himask, cpu) = 0;
6850
+ for (n = 0; n < IPIPE_IRQ_IWORDS; n++) {
6851
+ ipipe_percpudom(ipd, irqpend_lomask, cpu)[n] = 0;
6852
+ ipipe_percpudom(ipd, irqheld_mask, cpu)[n] = 0;
6855
+ for (n = 0; n < IPIPE_NR_IRQS; n++)
6856
+ ipipe_percpudom(ipd, irqall, cpu)[n] = 0;
6858
+ ipipe_percpudom(ipd, evsync, cpu) = 0;
6861
+ for (n = 0; n < IPIPE_NR_IRQS; n++) {
6862
+ ipd->irqs[n].acknowledge = NULL;
6863
+ ipd->irqs[n].handler = NULL;
6864
+ ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */
6867
+ for (n = 0; n < IPIPE_NR_EVENTS; n++)
6868
+ ipd->evhand[n] = NULL;
6870
+ ipd->evself = 0LL;
6871
+ mutex_init(&ipd->mutex);
6873
+ __ipipe_hook_critical_ipi(ipd);
6876
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
6878
+ ipipe_unstall_pipeline_from(ipd);
6884
+ for_each_online_cpu(cpu) {
6885
+ while (ipipe_percpudom(ipd, irqpend_himask, cpu) != 0)
6890
+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL;
6893
+ clear_bit(ipd->slot, &__ipipe_domain_slot_map);
6896
+void __ipipe_unstall_root(void)
6898
+ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr();
6900
+#ifndef CONFIG_IPIPE_DEBUG_CONTEXT
6901
+ BUG_ON(!ipipe_root_domain_p);
6904
+ local_irq_disable_hw();
6906
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
6908
+ if (unlikely(p->irqpend_himask != 0))
6909
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
6911
+ local_irq_enable_hw();
6914
+void __ipipe_restore_root(unsigned long x)
6916
+#ifndef CONFIG_IPIPE_DEBUG_CONTEXT
6917
+ BUG_ON(!ipipe_root_domain_p);
6921
+ __ipipe_stall_root();
6923
+ __ipipe_unstall_root();
6926
+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
6928
+ set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
6930
+ if (__ipipe_pipeline_head_p(ipd))
6931
+ local_irq_disable_hw();
6934
+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
6938
+ x = test_and_set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
6940
+ if (__ipipe_pipeline_head_p(ipd))
6941
+ local_irq_disable_hw();
6947
+ * ipipe_unstall_pipeline_from() -- Unstall the pipeline and
6948
+ * synchronize pending interrupts for a given domain. See
6949
+ * __ipipe_walk_pipeline() for more information.
6951
+void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
6953
+ struct list_head *pos;
6954
+ unsigned long flags;
6956
+ local_irq_save_hw(flags);
6958
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
6960
+ if (ipd == ipipe_current_domain)
6961
+ pos = &ipd->p_link;
6963
+ pos = __ipipe_pipeline.next;
6965
+ __ipipe_walk_pipeline(pos);
6967
+ if (likely(__ipipe_pipeline_head_p(ipd)))
6968
+ local_irq_enable_hw();
6970
+ local_irq_restore_hw(flags);
6973
+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd)
6977
+ x = test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
6978
+ ipipe_unstall_pipeline_from(ipd);
6983
+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
6987
+ ipipe_stall_pipeline_from(ipd);
6989
+ ipipe_unstall_pipeline_from(ipd);
6992
+void ipipe_unstall_pipeline_head(void)
6994
+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
6996
+ local_irq_disable_hw();
6998
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
7000
+ if (unlikely(p->irqpend_himask != 0)) {
7001
+ struct ipipe_domain *head_domain = __ipipe_pipeline_head();
7002
+ if (likely(head_domain == ipipe_current_domain))
7003
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7005
+ __ipipe_walk_pipeline(&head_domain->p_link);
7008
+ local_irq_enable_hw();
7011
+void __ipipe_restore_pipeline_head(unsigned long x)
7013
+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr();
7015
+ local_irq_disable_hw();
7018
+#ifdef CONFIG_DEBUG_KERNEL
7019
+ static int warned;
7020
+ if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) {
7022
+ * Already stalled albeit ipipe_restore_pipeline_head()
7023
+ * should have detected it? Send a warning once.
7026
+ printk(KERN_WARNING
7027
+ "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n");
7030
+#else /* !CONFIG_DEBUG_KERNEL */
7031
+ set_bit(IPIPE_STALL_FLAG, &p->status);
7032
+#endif /* CONFIG_DEBUG_KERNEL */
7035
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
7036
+ if (unlikely(p->irqpend_himask != 0)) {
7037
+ struct ipipe_domain *head_domain = __ipipe_pipeline_head();
7038
+ if (likely(head_domain == ipipe_current_domain))
7039
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7041
+ __ipipe_walk_pipeline(&head_domain->p_link);
7043
+ local_irq_enable_hw();
7047
+void __ipipe_spin_lock_irq(raw_spinlock_t *lock)
7049
+ local_irq_disable_hw();
7050
+ __raw_spin_lock(lock);
7051
+ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7054
+void __ipipe_spin_unlock_irq(raw_spinlock_t *lock)
7056
+ __raw_spin_unlock(lock);
7057
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7058
+ local_irq_enable_hw();
7061
+unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock)
7063
+ unsigned long flags;
7066
+ local_irq_save_hw(flags);
7067
+ __raw_spin_lock(lock);
7068
+ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7070
+ return raw_mangle_irq_bits(s, flags);
7073
+void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x)
7075
+ __raw_spin_unlock(lock);
7076
+ if (!raw_demangle_irq_bits(&x))
7077
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7078
+ local_irq_restore_hw(x);
7081
+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
7083
+ __raw_spin_unlock(&lock->__raw_lock);
7086
+void __ipipe_spin_unlock_irqcomplete(unsigned long x)
7088
+ if (!raw_demangle_irq_bits(&x))
7089
+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
7090
+ local_irq_restore_hw(x);
7093
+/* Must be called hw IRQs off. */
7094
+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq)
7096
+ int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
7097
+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd);
7101
+ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
7102
+ __set_bit(rank, &p->irqpend_lomask[level]);
7103
+ __set_bit(level, &p->irqpend_himask);
7105
+ __set_bit(rank, &p->irqheld_mask[level]);
7110
+/* Must be called hw IRQs off. */
7111
+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq)
7113
+ struct ipipe_percpu_domain_data *p;
7116
+ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)))
7119
+ level = irq >> IPIPE_IRQ_ISHIFT;
7120
+ rank = irq & IPIPE_IRQ_IMASK;
7121
+ p = ipipe_percpudom_ptr(ipd, cpu);
7123
+ if (__test_and_clear_bit(rank, &p->irqpend_lomask[level]))
7124
+ __set_bit(rank, &p->irqheld_mask[level]);
7125
+ if (p->irqpend_lomask[level] == 0)
7126
+ __clear_bit(level, &p->irqpend_himask);
7129
+/* Must be called hw IRQs off. */
7130
+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)
7132
+ struct ipipe_percpu_domain_data *p;
7133
+ int cpu, level, rank;
7135
+ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)))
7138
+ level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
7139
+ for_each_online_cpu(cpu) {
7140
+ p = ipipe_percpudom_ptr(ipd, cpu);
7141
+ if (test_and_clear_bit(rank, &p->irqheld_mask[level])) {
7142
+ /* We need atomic ops here: */
7143
+ set_bit(rank, &p->irqpend_lomask[level]);
7144
+ set_bit(level, &p->irqpend_himask);
7150
+ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must
7151
+ * be called with local hw interrupts disabled.
7153
+void __ipipe_walk_pipeline(struct list_head *pos)
7155
+ struct ipipe_domain *this_domain = ipipe_current_domain, *next_domain;
7156
+ struct ipipe_percpu_domain_data *p, *np;
7158
+ p = ipipe_cpudom_ptr(this_domain);
7160
+ while (pos != &__ipipe_pipeline) {
7162
+ next_domain = list_entry(pos, struct ipipe_domain, p_link);
7163
+ np = ipipe_cpudom_ptr(next_domain);
7165
+ if (test_bit(IPIPE_STALL_FLAG, &np->status))
7166
+ break; /* Stalled stage -- do not go further. */
7168
+ if (np->irqpend_himask) {
7169
+ if (next_domain == this_domain)
7170
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7174
+ ipipe_current_domain = next_domain;
7175
+ ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */
7177
+ if (ipipe_current_domain == next_domain)
7178
+ ipipe_current_domain = this_domain;
7180
+ * Otherwise, something changed the current domain under our
7181
+ * feet recycling the register set; do not override the new
7185
+ if (p->irqpend_himask &&
7186
+ !test_bit(IPIPE_STALL_FLAG, &p->status))
7187
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7190
+ } else if (next_domain == this_domain)
7193
+ pos = next_domain->p_link.next;
7198
+ * ipipe_suspend_domain() -- Suspend the current domain, switching to
7199
+ * the next one which has pending work down the pipeline.
7201
+void ipipe_suspend_domain(void)
7203
+ struct ipipe_domain *this_domain, *next_domain;
7204
+ struct ipipe_percpu_domain_data *p;
7205
+ struct list_head *ln;
7206
+ unsigned long flags;
7208
+ local_irq_save_hw(flags);
7210
+ this_domain = next_domain = ipipe_current_domain;
7211
+ p = ipipe_cpudom_ptr(this_domain);
7212
+ p->status &= ~(IPIPE_STALL_MASK|IPIPE_SYNC_MASK);
7214
+ if (p->irqpend_himask != 0)
7218
+ ln = next_domain->p_link.next;
7220
+ if (ln == &__ipipe_pipeline)
7223
+ next_domain = list_entry(ln, struct ipipe_domain, p_link);
7224
+ p = ipipe_cpudom_ptr(next_domain);
7226
+ if (p->status & IPIPE_STALL_MASK)
7229
+ if (p->irqpend_himask == 0)
7232
+ ipipe_current_domain = next_domain;
7234
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7236
+ if (ipipe_current_domain != next_domain)
7238
+ * Something has changed the current domain under our
7239
+ * feet, recycling the register set; take note.
7241
+ this_domain = ipipe_current_domain;
7244
+ ipipe_current_domain = this_domain;
7246
+ local_irq_restore_hw(flags);
7250
+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt.
7251
+ * Virtual interrupts are handled in exactly the same way than their
7252
+ * hw-generated counterparts wrt pipelining.
7254
+unsigned ipipe_alloc_virq(void)
7256
+ unsigned long flags, irq = 0;
7259
+ spin_lock_irqsave(&__ipipe_pipelock, flags);
7261
+ if (__ipipe_virtual_irq_map != ~0) {
7262
+ ipos = ffz(__ipipe_virtual_irq_map);
7263
+ set_bit(ipos, &__ipipe_virtual_irq_map);
7264
+ irq = ipos + IPIPE_VIRQ_BASE;
7267
+ spin_unlock_irqrestore(&__ipipe_pipelock, flags);
7272
+/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw
7273
+ acknowledge routine) to an interrupt for a given domain. */
7275
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
7277
+ ipipe_irq_handler_t handler,
7279
+ ipipe_irq_ackfn_t acknowledge,
7280
+ unsigned modemask)
7282
+ unsigned long flags;
7285
+ if (irq >= IPIPE_NR_IRQS)
7288
+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
7291
+ if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags))
7292
+ /* Silently unwire interrupts for non-heading domains. */
7293
+ modemask &= ~IPIPE_WIRED_MASK;
7295
+ spin_lock_irqsave(&__ipipe_pipelock, flags);
7297
+ if (handler != NULL) {
7298
+ if (handler == IPIPE_SAME_HANDLER) {
7299
+ handler = ipd->irqs[irq].handler;
7300
+ cookie = ipd->irqs[irq].cookie;
7302
+ if (handler == NULL) {
7304
+ goto unlock_and_exit;
7306
+ } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 &&
7307
+ ipd->irqs[irq].handler != NULL) {
7309
+ goto unlock_and_exit;
7312
+ /* Wired interrupts can only be delivered to domains
7313
+ * always heading the pipeline, and using dynamic
7316
+ if ((modemask & IPIPE_WIRED_MASK) != 0) {
7317
+ if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) {
7319
+ goto unlock_and_exit;
7321
+ modemask |= (IPIPE_HANDLE_MASK);
7324
+ if ((modemask & IPIPE_STICKY_MASK) != 0)
7325
+ modemask |= IPIPE_HANDLE_MASK;
7328
+ ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK |
7329
+ IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK);
7331
+ if (acknowledge == NULL && !ipipe_virtual_irq_p(irq))
7332
+ /* Acknowledge handler unspecified for a hw interrupt:
7333
+ use the Linux-defined handler instead. */
7334
+ acknowledge = ipipe_root_domain->irqs[irq].acknowledge;
7336
+ ipd->irqs[irq].handler = handler;
7337
+ ipd->irqs[irq].cookie = cookie;
7338
+ ipd->irqs[irq].acknowledge = acknowledge;
7339
+ ipd->irqs[irq].control = modemask;
7341
+ if (irq < NR_IRQS && handler != NULL && !ipipe_virtual_irq_p(irq)) {
7342
+ __ipipe_enable_irqdesc(ipd, irq);
7344
+ if ((modemask & IPIPE_ENABLE_MASK) != 0) {
7345
+ if (ipd != ipipe_current_domain) {
7346
+ /* IRQ enable/disable state is domain-sensitive, so we may
7347
+ not change it for another domain. What is allowed
7348
+ however is forcing some domain to handle an interrupt
7349
+ source, by passing the proper 'ipd' descriptor which
7350
+ thus may be different from ipipe_current_domain. */
7352
+ goto unlock_and_exit;
7354
+ __ipipe_enable_irq(irq);
7362
+ spin_unlock_irqrestore(&__ipipe_pipelock, flags);
7367
+/* ipipe_control_irq() -- Change modes of a pipelined interrupt for
7368
+ * the current domain. */
7370
+int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask)
7372
+ struct ipipe_domain *ipd;
7373
+ unsigned long flags;
7375
+ if (irq >= IPIPE_NR_IRQS)
7378
+ ipd = ipipe_current_domain;
7380
+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
7383
+ if (ipd->irqs[irq].handler == NULL)
7384
+ setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
7386
+ if ((setmask & IPIPE_STICKY_MASK) != 0)
7387
+ setmask |= IPIPE_HANDLE_MASK;
7389
+ if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */
7390
+ clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
7392
+ spin_lock_irqsave(&__ipipe_pipelock, flags);
7394
+ ipd->irqs[irq].control &= ~clrmask;
7395
+ ipd->irqs[irq].control |= setmask;
7397
+ if ((setmask & IPIPE_ENABLE_MASK) != 0)
7398
+ __ipipe_enable_irq(irq);
7399
+ else if ((clrmask & IPIPE_ENABLE_MASK) != 0)
7400
+ __ipipe_disable_irq(irq);
7402
+ spin_unlock_irqrestore(&__ipipe_pipelock, flags);
7407
+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */
7409
+int __ipipe_dispatch_event (unsigned event, void *data)
7411
+ struct ipipe_domain *start_domain, *this_domain, *next_domain;
7412
+ ipipe_event_handler_t evhand;
7413
+ struct list_head *pos, *npos;
7414
+ unsigned long flags;
7415
+ int propagate = 1;
7417
+ local_irq_save_hw(flags);
7419
+ start_domain = this_domain = ipipe_current_domain;
7421
+ list_for_each_safe(pos, npos, &__ipipe_pipeline) {
7423
+ * Note: Domain migration may occur while running
7424
+ * event or interrupt handlers, in which case the
7425
+ * current register set is going to be recycled for a
7426
+ * different domain than the initiating one. We do
7427
+ * care for that, always tracking the current domain
7428
+ * descriptor upon return from those handlers.
7430
+ next_domain = list_entry(pos, struct ipipe_domain, p_link);
7433
+ * Keep a cached copy of the handler's address since
7434
+ * ipipe_catch_event() may clear it under our feet.
7436
+ evhand = next_domain->evhand[event];
7438
+ if (evhand != NULL) {
7439
+ ipipe_current_domain = next_domain;
7440
+ ipipe_cpudom_var(next_domain, evsync) |= (1LL << event);
7441
+ local_irq_restore_hw(flags);
7442
+ propagate = !evhand(event, start_domain, data);
7443
+ local_irq_save_hw(flags);
7445
+ * We may have a migration issue here, if the
7446
+ * current task is migrated to another CPU on
7447
+ * behalf of the invoked handler, usually when
7448
+ * a syscall event is processed. However,
7449
+ * ipipe_catch_event() will make sure that a
7450
+ * CPU that clears a handler for any given
7451
+ * event will not attempt to wait for itself
7452
+ * to clear the evsync bit for that event,
7453
+ * which practically plugs the hole, without
7454
+ * resorting to a much more complex strategy.
7456
+ ipipe_cpudom_var(next_domain, evsync) &= ~(1LL << event);
7457
+ if (ipipe_current_domain != next_domain)
7458
+ this_domain = ipipe_current_domain;
7461
+ if (next_domain != ipipe_root_domain && /* NEVER sync the root stage here. */
7462
+ ipipe_cpudom_var(next_domain, irqpend_himask) != 0 &&
7463
+ !test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status))) {
7464
+ ipipe_current_domain = next_domain;
7465
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7466
+ if (ipipe_current_domain != next_domain)
7467
+ this_domain = ipipe_current_domain;
7470
+ ipipe_current_domain = this_domain;
7472
+ if (next_domain == this_domain || !propagate)
7476
+ local_irq_restore_hw(flags);
7478
+ return !propagate;
7482
+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired
7483
+ * interrupts are immediately and unconditionally delivered to the
7484
+ * domain heading the pipeline upon receipt, and such domain must have
7485
+ * been registered as an invariant head for the system (priority ==
7486
+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is
7487
+ * to get an extra-fast dispatching path for those IRQs, by relying on
7488
+ * a straightforward logic based on assumptions that must always be
7489
+ * true for invariant head domains. The following assumptions are
7490
+ * made when dealing with such interrupts:
7492
+ * 1- Wired interrupts are purely dynamic, i.e. the decision to
7493
+ * propagate them down the pipeline must be done from the head domain
7495
+ * 2- Wired interrupts cannot be shared or sticky.
7496
+ * 3- The root domain cannot be an invariant pipeline head, in
7497
+ * consequence of what the root domain cannot handle wired
7499
+ * 4- Wired interrupts must have a valid acknowledge handler for the
7500
+ * head domain (if needed, see __ipipe_handle_irq).
7502
+ * Called with hw interrupts off.
7505
+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq)
7507
+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
7511
+ if (unlikely(test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control))) {
7513
+ * If we can't process this IRQ right now, we must
7514
+ * mark it as held, so that it will get played during
7515
+ * normal log sync when the corresponding interrupt
7516
+ * source is eventually unlocked.
7519
+ __set_bit(irq & IPIPE_IRQ_IMASK, &p->irqheld_mask[irq >> IPIPE_IRQ_ISHIFT]);
7523
+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) {
7524
+ __ipipe_set_irq_pending(head, irq);
7528
+ __ipipe_dispatch_wired_nocheck(head, irq);
7531
+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq)
7533
+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head);
7534
+ struct ipipe_domain *old;
7538
+ old = ipipe_current_domain;
7539
+ ipipe_current_domain = head; /* Switch to the head domain. */
7542
+ __set_bit(IPIPE_STALL_FLAG, &p->status);
7543
+ head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */
7544
+ __ipipe_run_irqtail();
7545
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
7547
+ if (ipipe_current_domain == head) {
7548
+ ipipe_current_domain = old;
7549
+ if (old == head) {
7550
+ if (p->irqpend_himask)
7551
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7556
+ __ipipe_walk_pipeline(&head->p_link);
7560
+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current
7561
+ * domain (and processor). This routine flushes the interrupt log
7562
+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for
7563
+ * more on the deferred interrupt scheme). Every interrupt that
7564
+ * occurred while the pipeline was stalled gets played. WARNING:
7565
+ * callers on SMP boxen should always check for CPU migration on
7566
+ * return of this routine. One can control the kind of interrupts
7567
+ * which are going to be sync'ed using the syncmask
7568
+ * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT
7569
+ * plays virtual interrupts only.
7571
+ * This routine must be called with hw interrupts off.
7573
+void __ipipe_sync_stage(unsigned long syncmask)
7575
+ struct ipipe_percpu_domain_data *p;
7576
+ unsigned long mask, submask;
7577
+ struct ipipe_domain *ipd;
7578
+ int level, rank, cpu;
7581
+ ipd = ipipe_current_domain;
7582
+ p = ipipe_cpudom_ptr(ipd);
7584
+ if (__test_and_set_bit(IPIPE_SYNC_FLAG, &p->status))
7587
+ cpu = ipipe_processor_id();
7590
+ * The policy here is to keep the dispatching code interrupt-free
7591
+ * by stalling the current stage. If the upper domain handler
7592
+ * (which we call) wants to re-enable interrupts while in a safe
7593
+ * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's
7594
+ * sigaction()), it will have to unstall (then stall again before
7595
+ * returning to us!) the stage when it sees fit.
7597
+ while ((mask = (p->irqpend_himask & syncmask)) != 0) {
7598
+ level = __ipipe_ffnz(mask);
7600
+ while ((submask = p->irqpend_lomask[level]) != 0) {
7601
+ rank = __ipipe_ffnz(submask);
7602
+ irq = (level << IPIPE_IRQ_ISHIFT) + rank;
7604
+ __clear_bit(rank, &p->irqpend_lomask[level]);
7606
+ if (p->irqpend_lomask[level] == 0)
7607
+ __clear_bit(level, &p->irqpend_himask);
7609
+ * Make sure the compiler will not postpone
7610
+ * the pending bitmask updates before calling
7611
+ * the interrupt handling routine. Otherwise,
7612
+ * those late updates could overwrite any
7613
+ * change to irqpend_hi/lomask due to a nested
7614
+ * interrupt, leaving the latter unprocessed
7615
+ * (seen on mpc836x).
7619
+ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))
7622
+ __set_bit(IPIPE_STALL_FLAG, &p->status);
7624
+ if (ipd == ipipe_root_domain)
7625
+ trace_hardirqs_off();
7627
+ __ipipe_run_isr(ipd, irq);
7628
+ p = ipipe_cpudom_ptr(ipipe_current_domain);
7631
+ int newcpu = ipipe_processor_id();
7633
+ if (newcpu != cpu) { /* Handle CPU migration. */
7635
+ * We expect any domain to clear the SYNC bit each
7636
+ * time it switches in a new task, so that preemptions
7637
+ * and/or CPU migrations (in the SMP case) over the
7638
+ * ISR do not lock out the log syncer for some
7639
+ * indefinite amount of time. In the Linux case,
7640
+ * schedule() handles this (see kernel/sched.c). For
7641
+ * this reason, we don't bother clearing it here for
7642
+ * the source CPU in the migration handling case,
7643
+ * since it must have scheduled another task in by
7646
+ __set_bit(IPIPE_SYNC_FLAG, &p->status);
7650
+#endif /* CONFIG_SMP */
7651
+#ifdef CONFIG_TRACE_IRQFLAGS
7652
+ if (ipipe_root_domain_p &&
7653
+ test_bit(IPIPE_STALL_FLAG, &p->status))
7654
+ trace_hardirqs_on();
7656
+ __clear_bit(IPIPE_STALL_FLAG, &p->status);
7660
+ __clear_bit(IPIPE_SYNC_FLAG, &p->status);
7663
+/* ipipe_register_domain() -- Link a new domain to the pipeline. */
7665
+int ipipe_register_domain(struct ipipe_domain *ipd,
7666
+ struct ipipe_domain_attr *attr)
7668
+ struct ipipe_domain *_ipd;
7669
+ struct list_head *pos = NULL;
7670
+ unsigned long flags;
7672
+ if (!ipipe_root_domain_p) {
7673
+ printk(KERN_WARNING
7674
+ "I-pipe: Only the root domain may register a new domain.\n");
7678
+ flags = ipipe_critical_enter(NULL);
7680
+ if (attr->priority == IPIPE_HEAD_PRIORITY) {
7681
+ if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) {
7682
+ ipipe_critical_exit(flags);
7683
+ return -EAGAIN; /* Cannot override current head. */
7685
+ ipd->slot = IPIPE_HEAD_SLOT;
7687
+ ipd->slot = ffz(__ipipe_domain_slot_map);
7689
+ if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
7690
+ set_bit(ipd->slot, &__ipipe_domain_slot_map);
7691
+ list_for_each(pos, &__ipipe_pipeline) {
7692
+ _ipd = list_entry(pos, struct ipipe_domain, p_link);
7693
+ if (_ipd->domid == attr->domid)
7698
+ ipipe_critical_exit(flags);
7700
+ if (pos != &__ipipe_pipeline) {
7701
+ if (ipd->slot < CONFIG_IPIPE_DOMAINS)
7702
+ clear_bit(ipd->slot, &__ipipe_domain_slot_map);
7708
+ * Set up the perdomain pointers for direct access to the
7709
+ * percpu domain data. This saves a costly multiply each time
7710
+ * we need to refer to the contents of the percpu domain data
7713
+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot];
7716
+ ipd->name = attr->name;
7717
+ ipd->domid = attr->domid;
7718
+ ipd->pdd = attr->pdd;
7721
+ if (attr->priority == IPIPE_HEAD_PRIORITY) {
7722
+ ipd->priority = INT_MAX;
7723
+ __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags);
7726
+ ipd->priority = attr->priority;
7728
+ __ipipe_init_stage(ipd);
7730
+ INIT_LIST_HEAD(&ipd->p_link);
7732
+#ifdef CONFIG_PROC_FS
7733
+ __ipipe_add_domain_proc(ipd);
7734
+#endif /* CONFIG_PROC_FS */
7736
+ flags = ipipe_critical_enter(NULL);
7738
+ list_for_each(pos, &__ipipe_pipeline) {
7739
+ _ipd = list_entry(pos, struct ipipe_domain, p_link);
7740
+ if (ipd->priority > _ipd->priority)
7744
+ list_add_tail(&ipd->p_link, pos);
7746
+ ipipe_critical_exit(flags);
7748
+ printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name);
7751
+ * Finally, allow the new domain to perform its initialization
7755
+ if (attr->entry != NULL) {
7756
+ ipipe_current_domain = ipd;
7758
+ ipipe_current_domain = ipipe_root_domain;
7760
+ local_irq_save_hw(flags);
7762
+ if (ipipe_root_cpudom_var(irqpend_himask) != 0 &&
7763
+ !test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
7764
+ __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
7766
+ local_irq_restore_hw(flags);
7772
+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */
7774
+int ipipe_unregister_domain(struct ipipe_domain *ipd)
7776
+ unsigned long flags;
7778
+ if (!ipipe_root_domain_p) {
7779
+ printk(KERN_WARNING
7780
+ "I-pipe: Only the root domain may unregister a domain.\n");
7784
+ if (ipd == ipipe_root_domain) {
7785
+ printk(KERN_WARNING
7786
+ "I-pipe: Cannot unregister the root domain.\n");
7795
+ * In the SMP case, wait for the logged events to drain on
7796
+ * other processors before eventually removing the domain
7797
+ * from the pipeline.
7800
+ ipipe_unstall_pipeline_from(ipd);
7802
+ flags = ipipe_critical_enter(NULL);
7804
+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
7805
+ clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control);
7806
+ clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control);
7807
+ set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control);
7810
+ ipipe_critical_exit(flags);
7812
+ for_each_online_cpu(cpu) {
7813
+ while (ipipe_percpudom(ipd, irqpend_himask, cpu) > 0)
7817
+#endif /* CONFIG_SMP */
7819
+ mutex_lock(&ipd->mutex);
7821
+#ifdef CONFIG_PROC_FS
7822
+ __ipipe_remove_domain_proc(ipd);
7823
+#endif /* CONFIG_PROC_FS */
7826
+ * Simply remove the domain from the pipeline and we are almost done.
7829
+ flags = ipipe_critical_enter(NULL);
7830
+ list_del_init(&ipd->p_link);
7831
+ ipipe_critical_exit(flags);
7833
+ __ipipe_cleanup_domain(ipd);
7835
+ mutex_unlock(&ipd->mutex);
7837
+ printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name);
7843
+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of
7844
+ * a running interrupt handler to the next domain down the pipeline.
7845
+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts
7846
+ * to pend the interrupt for the current domain first.
7847
+ * Must be called hw IRQs off.
7849
+void __ipipe_pend_irq(unsigned irq, struct list_head *head)
7851
+ struct ipipe_domain *ipd;
7852
+ struct list_head *ln;
7854
+#ifdef CONFIG_IPIPE_DEBUG
7855
+ BUG_ON(irq >= IPIPE_NR_IRQS ||
7856
+ (ipipe_virtual_irq_p(irq)
7857
+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)));
7859
+ for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) {
7860
+ ipd = list_entry(ln, struct ipipe_domain, p_link);
7861
+ if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) {
7862
+ __ipipe_set_irq_pending(ipd, irq);
7868
+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */
7870
+int ipipe_free_virq(unsigned virq)
7872
+ if (!ipipe_virtual_irq_p(virq))
7875
+ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
7880
+void ipipe_init_attr(struct ipipe_domain_attr *attr)
7882
+ attr->name = "anon";
7884
+ attr->entry = NULL;
7885
+ attr->priority = IPIPE_ROOT_PRIO;
7890
+ * ipipe_catch_event() -- Interpose or remove an event handler for a
7893
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
7895
+ ipipe_event_handler_t handler)
7897
+ ipipe_event_handler_t old_handler;
7898
+ unsigned long flags;
7899
+ int self = 0, cpu;
7901
+ if (event & IPIPE_EVENT_SELF) {
7902
+ event &= ~IPIPE_EVENT_SELF;
7906
+ if (event >= IPIPE_NR_EVENTS)
7909
+ flags = ipipe_critical_enter(NULL);
7911
+ if (!(old_handler = xchg(&ipd->evhand[event],handler))) {
7914
+ ipd->evself |= (1LL << event);
7916
+ __ipipe_event_monitors[event]++;
7919
+ else if (!handler) {
7920
+ if (ipd->evself & (1LL << event))
7921
+ ipd->evself &= ~(1LL << event);
7923
+ __ipipe_event_monitors[event]--;
7924
+ } else if ((ipd->evself & (1LL << event)) && !self) {
7925
+ __ipipe_event_monitors[event]++;
7926
+ ipd->evself &= ~(1LL << event);
7927
+ } else if (!(ipd->evself & (1LL << event)) && self) {
7928
+ __ipipe_event_monitors[event]--;
7929
+ ipd->evself |= (1LL << event);
7932
+ ipipe_critical_exit(flags);
7934
+ if (!handler && ipipe_root_domain_p) {
7936
+ * If we cleared a handler on behalf of the root
7937
+ * domain, we have to wait for any current invocation
7938
+ * to drain, since our caller might subsequently unmap
7939
+ * the target domain. To this aim, this code
7940
+ * synchronizes with __ipipe_dispatch_event(),
7941
+ * guaranteeing that either the dispatcher sees a null
7942
+ * handler in which case it discards the invocation
7943
+ * (which also prevents from entering a livelock), or
7944
+ * finds a valid handler and calls it. Symmetrically,
7945
+ * ipipe_catch_event() ensures that the called code
7946
+ * won't be unmapped under our feet until the event
7947
+ * synchronization flag is cleared for the given event
7950
+ preempt_disable();
7951
+ cpu = smp_processor_id();
7953
+ * Hack: this solves the potential migration issue
7954
+ * raised in __ipipe_dispatch_event(). This is a
7955
+ * work-around which makes the assumption that other
7956
+ * CPUs will subsequently, either process at least one
7957
+ * interrupt for the target domain, or call
7958
+ * __ipipe_dispatch_event() without going through a
7959
+ * migration while running the handler at least once;
7960
+ * practically, this is safe on any normally running
7963
+ ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event);
7966
+ for_each_online_cpu(cpu) {
7967
+ while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event))
7968
+ schedule_timeout_interruptible(HZ / 50);
7972
+ return old_handler;
7975
+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
7978
+ if (irq >= IPIPE_NR_XIRQS)
7979
+ /* Allow changing affinity of external IRQs only. */
7980
+ return CPU_MASK_NONE;
7982
+ if (num_online_cpus() > 1)
7983
+ return __ipipe_set_irq_affinity(irq,cpumask);
7984
+#endif /* CONFIG_SMP */
7986
+ return CPU_MASK_NONE;
7989
+int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
7993
+ return __ipipe_send_ipi(ipi,cpumask);
7994
+#else /* !CONFIG_SMP */
7996
+#endif /* CONFIG_SMP */
7999
+int ipipe_alloc_ptdkey (void)
8001
+ unsigned long flags;
8004
+ spin_lock_irqsave(&__ipipe_pipelock,flags);
8006
+ if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
8007
+ key = ffz(__ipipe_ptd_key_map);
8008
+ set_bit(key,&__ipipe_ptd_key_map);
8009
+ __ipipe_ptd_key_count++;
8012
+ spin_unlock_irqrestore(&__ipipe_pipelock,flags);
8017
+int ipipe_free_ptdkey (int key)
8019
+ unsigned long flags;
8021
+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8024
+ spin_lock_irqsave(&__ipipe_pipelock,flags);
8026
+ if (test_and_clear_bit(key,&__ipipe_ptd_key_map))
8027
+ __ipipe_ptd_key_count--;
8029
+ spin_unlock_irqrestore(&__ipipe_pipelock,flags);
8034
+int ipipe_set_ptd (int key, void *value)
8037
+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8040
+ current->ptd[key] = value;
8045
+void *ipipe_get_ptd (int key)
8048
+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
8051
+ return current->ptd[key];
8054
+#ifdef CONFIG_PROC_FS
8056
+struct proc_dir_entry *ipipe_proc_root;
8058
+static int __ipipe_version_info_proc(char *page,
8060
+ off_t off, int count, int *eof, void *data)
8062
+ int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING);
8066
+ if (len <= off + count)
8069
+ *start = page + off;
8080
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
8082
+ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
8083
+ char handling, stickiness, lockbit, exclusive, virtuality;
8085
+ unsigned long ctlbits;
8088
+ seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n");
8089
+ seq_printf(p, " |+---- Sticky\n");
8090
+ seq_printf(p, " ||+--- Locked\n");
8091
+ seq_printf(p, " |||+-- Exclusive\n");
8092
+ seq_printf(p, " ||||+- Virtual\n");
8093
+ seq_printf(p, "[IRQ] |||||\n");
8095
+ mutex_lock(&ipd->mutex);
8097
+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
8098
+ /* Remember to protect against
8099
+ * ipipe_virtual_irq/ipipe_control_irq if more fields
8100
+ * get involved. */
8101
+ ctlbits = ipd->irqs[irq].control;
8103
+ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
8105
+ * There might be a hole between the last external
8106
+ * IRQ and the first virtual one; skip it.
8110
+ if (ipipe_virtual_irq_p(irq)
8111
+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
8112
+ /* Non-allocated virtual IRQ; skip it. */
8116
+ * Statuses are as follows:
8117
+ * o "accepted" means handled _and_ passed down the pipeline.
8118
+ * o "grabbed" means handled, but the interrupt might be
8119
+ * terminated _or_ passed down the pipeline depending on
8120
+ * what the domain handler asks for to the I-pipe.
8121
+ * o "wired" is basically the same as "grabbed", except that
8122
+ * the interrupt is unconditionally delivered to an invariant
8123
+ * pipeline head domain.
8124
+ * o "passed" means unhandled by the domain but passed
8125
+ * down the pipeline.
8126
+ * o "discarded" means unhandled and _not_ passed down the
8127
+ * pipeline. The interrupt merely disappears from the
8128
+ * current domain down to the end of the pipeline.
8130
+ if (ctlbits & IPIPE_HANDLE_MASK) {
8131
+ if (ctlbits & IPIPE_PASS_MASK)
8133
+ else if (ctlbits & IPIPE_WIRED_MASK)
8137
+ } else if (ctlbits & IPIPE_PASS_MASK)
8138
+ /* Do not output if no major action is taken. */
8143
+ if (ctlbits & IPIPE_STICKY_MASK)
8148
+ if (ctlbits & IPIPE_LOCK_MASK)
8153
+ if (ctlbits & IPIPE_EXCLUSIVE_MASK)
8158
+ if (ipipe_virtual_irq_p(irq))
8163
+ seq_printf(p, " %3u: %c%c%c%c%c\n",
8164
+ irq, handling, stickiness, lockbit, exclusive, virtuality);
8167
+ seq_printf(p, "[Domain info]\n");
8169
+ seq_printf(p, "id=0x%.8x\n", ipd->domid);
8171
+ if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags))
8172
+ seq_printf(p, "priority=topmost\n");
8174
+ seq_printf(p, "priority=%d\n", ipd->priority);
8176
+ mutex_unlock(&ipd->mutex);
8181
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
8183
+ return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data);
8186
+static struct file_operations __ipipe_info_proc_ops = {
8187
+ .owner = THIS_MODULE,
8188
+ .open = __ipipe_common_info_open,
8190
+ .llseek = seq_lseek,
8191
+ .release = single_release,
8194
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd)
8196
+ struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root);
8198
+ e->proc_fops = &__ipipe_info_proc_ops;
8199
+ e->data = (void*) ipd;
8203
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd)
8205
+ remove_proc_entry(ipd->name,ipipe_proc_root);
8208
+void __init ipipe_init_proc(void)
8210
+ ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0);
8211
+ create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL);
8212
+ __ipipe_add_domain_proc(ipipe_root_domain);
8214
+ __ipipe_init_tracer();
8217
+#endif /* CONFIG_PROC_FS */
8219
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
8221
+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 };
8222
+DEFINE_PER_CPU(int, ipipe_saved_context_check_state);
8224
+void ipipe_check_context(struct ipipe_domain *border_ipd)
8226
+ /* Note: We don't make the per_cpu access atomic. We assume that code
8227
+ which temporarily disables the check does this in atomic context
8229
+ if (likely(ipipe_current_domain->priority <= border_ipd->priority &&
8230
+ !test_bit(IPIPE_STALL_FLAG,
8231
+ &ipipe_head_cpudom_var(status))) ||
8232
+ !per_cpu(ipipe_percpu_context_check, ipipe_processor_id()))
8235
+ ipipe_context_check_off();
8237
+ ipipe_trace_panic_freeze();
8238
+ ipipe_set_printk_sync(ipipe_current_domain);
8240
+ if (ipipe_current_domain->priority > border_ipd->priority)
8241
+ printk(KERN_ERR "I-pipe: Detected illicit call from domain "
8243
+ KERN_ERR " into a service reserved for domain "
8244
+ "'%s' and below.\n",
8245
+ ipipe_current_domain->name, border_ipd->name);
8247
+ printk(KERN_ERR "I-pipe: Detected stalled topmost domain, "
8248
+ "probably caused by a bug.\n"
8249
+ " A critical section may have been "
8250
+ "left unterminated.\n");
8252
+ ipipe_trace_panic_dump();
8255
+EXPORT_SYMBOL(ipipe_check_context);
8256
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
8258
+EXPORT_SYMBOL(ipipe_virtualize_irq);
8259
+EXPORT_SYMBOL(ipipe_control_irq);
8260
+EXPORT_SYMBOL(ipipe_suspend_domain);
8261
+EXPORT_SYMBOL(ipipe_alloc_virq);
8262
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain);
8263
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray);
8264
+EXPORT_SYMBOL(ipipe_root);
8265
+EXPORT_SYMBOL(ipipe_stall_pipeline_from);
8266
+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from);
8267
+EXPORT_SYMBOL(ipipe_unstall_pipeline_from);
8268
+EXPORT_SYMBOL(ipipe_restore_pipeline_from);
8269
+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from);
8270
+EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
8271
+EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
8272
+EXPORT_SYMBOL(__ipipe_unstall_root);
8273
+EXPORT_SYMBOL(__ipipe_restore_root);
8274
+EXPORT_SYMBOL(__ipipe_spin_lock_irq);
8275
+EXPORT_SYMBOL(__ipipe_spin_unlock_irq);
8276
+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave);
8277
+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore);
8278
+EXPORT_SYMBOL(__ipipe_pipeline);
8279
+EXPORT_SYMBOL(__ipipe_lock_irq);
8280
+EXPORT_SYMBOL(__ipipe_unlock_irq);
8281
+EXPORT_SYMBOL(ipipe_register_domain);
8282
+EXPORT_SYMBOL(ipipe_unregister_domain);
8283
+EXPORT_SYMBOL(ipipe_free_virq);
8284
+EXPORT_SYMBOL(ipipe_init_attr);
8285
+EXPORT_SYMBOL(ipipe_catch_event);
8286
+EXPORT_SYMBOL(ipipe_alloc_ptdkey);
8287
+EXPORT_SYMBOL(ipipe_free_ptdkey);
8288
+EXPORT_SYMBOL(ipipe_set_ptd);
8289
+EXPORT_SYMBOL(ipipe_get_ptd);
8290
+EXPORT_SYMBOL(ipipe_set_irq_affinity);
8291
+EXPORT_SYMBOL(ipipe_send_ipi);
8292
+EXPORT_SYMBOL(__ipipe_pend_irq);
8293
+EXPORT_SYMBOL(__ipipe_set_irq_pending);
8294
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
8295
+EXPORT_SYMBOL(ipipe_request_tickdev);
8296
+EXPORT_SYMBOL(ipipe_release_tickdev);
8299
+EXPORT_SYMBOL(ipipe_critical_enter);
8300
+EXPORT_SYMBOL(ipipe_critical_exit);
8301
+EXPORT_SYMBOL(ipipe_trigger_irq);
8302
+EXPORT_SYMBOL(ipipe_get_sysinfo);
8303
diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c
8304
new file mode 100644
8305
index 0000000..a605672
8307
+++ b/kernel/ipipe/tracer.c
8310
+ * kernel/ipipe/tracer.c
8312
+ * Copyright (C) 2005 Luotao Fu.
8313
+ * 2005-2008 Jan Kiszka.
8315
+ * This program is free software; you can redistribute it and/or modify
8316
+ * it under the terms of the GNU General Public License as published by
8317
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
8318
+ * USA; either version 2 of the License, or (at your option) any later
8321
+ * This program is distributed in the hope that it will be useful,
8322
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8323
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8324
+ * GNU General Public License for more details.
8326
+ * You should have received a copy of the GNU General Public License
8327
+ * along with this program; if not, write to the Free Software
8328
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
8331
+#include <linux/kernel.h>
8332
+#include <linux/module.h>
8333
+#include <linux/version.h>
8334
+#include <linux/kallsyms.h>
8335
+#include <linux/seq_file.h>
8336
+#include <linux/proc_fs.h>
8337
+#include <linux/ctype.h>
8338
+#include <linux/vmalloc.h>
8339
+#include <linux/pid.h>
8340
+#include <linux/utsrelease.h>
8341
+#include <linux/sched.h>
8342
+#include <linux/ipipe.h>
8343
+#include <asm/uaccess.h>
8345
+#define IPIPE_TRACE_PATHS 4 /* <!> Do not lower below 3 */
8346
+#define IPIPE_DEFAULT_ACTIVE 0
8347
+#define IPIPE_DEFAULT_MAX 1
8348
+#define IPIPE_DEFAULT_FROZEN 2
8350
+#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT)
8351
+#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1))
8353
+#define IPIPE_DEFAULT_PRE_TRACE 10
8354
+#define IPIPE_DEFAULT_POST_TRACE 10
8355
+#define IPIPE_DEFAULT_BACK_TRACE 100
8357
+#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */
8358
+#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */
8360
+#define IPIPE_TFLG_NMI_LOCK 0x0001
8361
+#define IPIPE_TFLG_NMI_HIT 0x0002
8362
+#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004
8364
+#define IPIPE_TFLG_HWIRQ_OFF 0x0100
8365
+#define IPIPE_TFLG_FREEZING 0x0200
8366
+#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */
8367
+#define IPIPE_TFLG_CURRDOM_MASK 0x0C00
8368
+#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */
8369
+#define IPIPE_TFLG_DOMSTATE_BITS 3
8371
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
8372
+ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
8373
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
8374
+ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
8376
+struct ipipe_trace_point {
8379
+ unsigned long eip;
8380
+ unsigned long parent_eip;
8382
+ unsigned long long timestamp;
8385
+struct ipipe_trace_path {
8386
+ volatile int flags;
8387
+ int dump_lock; /* separated from flags due to cross-cpu access */
8388
+ int trace_pos; /* next point to fill */
8389
+ int begin, end; /* finalised path begin and end */
8390
+ int post_trace; /* non-zero when in post-trace phase */
8391
+ unsigned long long length; /* max path length in cycles */
8392
+ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
8393
+ unsigned long nmi_saved_parent_eip;
8394
+ unsigned long nmi_saved_v;
8395
+ struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
8396
+} ____cacheline_aligned_in_smp;
8398
+enum ipipe_trace_type
8400
+ IPIPE_TRACE_FUNC = 0,
8401
+ IPIPE_TRACE_BEGIN,
8403
+ IPIPE_TRACE_FREEZE,
8404
+ IPIPE_TRACE_SPECIAL,
8406
+ IPIPE_TRACE_EVENT,
8409
+#define IPIPE_TYPE_MASK 0x0007
8410
+#define IPIPE_TYPE_BITS 3
8412
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
8413
+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path);
8414
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
8415
+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) =
8416
+ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } };
8417
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
8419
+int ipipe_trace_enable = 0;
8421
+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE };
8422
+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX };
8423
+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN };
8424
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
8425
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
8426
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
8427
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
8428
+static int verbose_trace = 1;
8429
+static unsigned long trace_overhead;
8431
+static unsigned long trigger_begin;
8432
+static unsigned long trigger_end;
8434
+static DEFINE_MUTEX(out_mutex);
8435
+static struct ipipe_trace_path *print_path;
8436
+#ifdef CONFIG_IPIPE_TRACE_PANIC
8437
+static struct ipipe_trace_path *panic_path;
8438
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
8439
+static int print_pre_trace;
8440
+static int print_post_trace;
8443
+static long __ipipe_signed_tsc2us(long long tsc);
8445
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
8446
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
8449
+static notrace void
8450
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
8452
+ struct ipipe_domain *ipd;
8453
+ struct list_head *pos;
8456
+ list_for_each_prev(pos, &__ipipe_pipeline) {
8457
+ ipd = list_entry(pos, struct ipipe_domain, p_link);
8459
+ if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)))
8460
+ point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT);
8462
+ if (ipd == ipipe_current_domain)
8463
+ point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT;
8465
+ if (++i > IPIPE_TFLG_DOMSTATE_BITS)
8470
+static notrace int __ipipe_get_free_trace_path(int old, int cpu)
8472
+ int new_active = old;
8473
+ struct ipipe_trace_path *tp;
8476
+ if (++new_active == IPIPE_TRACE_PATHS)
8478
+ tp = &per_cpu(trace_path, cpu)[new_active];
8479
+ } while (new_active == per_cpu(max_path, cpu) ||
8480
+ new_active == per_cpu(frozen_path, cpu) ||
8483
+ return new_active;
8486
+static notrace void
8487
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
8488
+ struct ipipe_trace_path *old_tp, int old_pos)
8492
+ new_tp->trace_pos = pre_trace+1;
8494
+ for (i = new_tp->trace_pos; i > 0; i--)
8495
+ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
8496
+ &old_tp->point[WRAP_POINT_NO(old_pos-i)],
8497
+ sizeof(struct ipipe_trace_point));
8499
+ /* mark the end (i.e. the point before point[0]) invalid */
8500
+ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
8503
+static notrace struct ipipe_trace_path *
8504
+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos)
8506
+ struct ipipe_trace_path *old_tp = tp;
8507
+ long active = per_cpu(active_path, cpu);
8508
+ unsigned long long length;
8510
+ /* do we have a new worst case? */
8511
+ length = tp->point[tp->end].timestamp -
8512
+ tp->point[tp->begin].timestamp;
8513
+ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) {
8514
+ /* we need protection here against other cpus trying
8515
+ to start a proc dump */
8516
+ spin_lock(&global_path_lock);
8518
+ /* active path holds new worst case */
8519
+ tp->length = length;
8520
+ per_cpu(max_path, cpu) = active;
8522
+ /* find next unused trace path */
8523
+ active = __ipipe_get_free_trace_path(active, cpu);
8525
+ spin_unlock(&global_path_lock);
8527
+ tp = &per_cpu(trace_path, cpu)[active];
8529
+ /* migrate last entries for pre-tracing */
8530
+ __ipipe_migrate_pre_trace(tp, old_tp, pos);
8536
+static notrace struct ipipe_trace_path *
8537
+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos)
8539
+ struct ipipe_trace_path *old_tp = tp;
8540
+ long active = per_cpu(active_path, cpu);
8543
+ /* frozen paths have no core (begin=end) */
8544
+ tp->begin = tp->end;
8546
+ /* we need protection here against other cpus trying
8547
+ * to set their frozen path or to start a proc dump */
8548
+ spin_lock(&global_path_lock);
8550
+ per_cpu(frozen_path, cpu) = active;
8552
+ /* find next unused trace path */
8553
+ active = __ipipe_get_free_trace_path(active, cpu);
8555
+ /* check if this is the first frozen path */
8556
+ for_each_possible_cpu(n) {
8558
+ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0)
8562
+ spin_unlock(&global_path_lock);
8564
+ tp = &per_cpu(trace_path, cpu)[active];
8566
+ /* migrate last entries for pre-tracing */
8567
+ __ipipe_migrate_pre_trace(tp, old_tp, pos);
8573
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
8574
+ unsigned long parent_eip, unsigned long v)
8576
+ struct ipipe_trace_path *tp, *old_tp;
8577
+ int pos, next_pos, begin;
8578
+ struct ipipe_trace_point *point;
8579
+ unsigned long flags;
8582
+ local_irq_save_hw_notrace(flags);
8584
+ cpu = ipipe_processor_id();
8586
+ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
8588
+ /* here starts a race window with NMIs - catched below */
8590
+ /* check for NMI recursion */
8591
+ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
8592
+ tp->flags |= IPIPE_TFLG_NMI_HIT;
8594
+ /* first freeze request from NMI context? */
8595
+ if ((type == IPIPE_TRACE_FREEZE) &&
8596
+ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
8597
+ /* save arguments and mark deferred freezing */
8598
+ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
8599
+ tp->nmi_saved_eip = eip;
8600
+ tp->nmi_saved_parent_eip = parent_eip;
8601
+ tp->nmi_saved_v = v;
8603
+ return; /* no need for restoring flags inside IRQ */
8606
+ /* clear NMI events and set lock (atomically per cpu) */
8607
+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
8608
+ IPIPE_TFLG_NMI_FREEZE_REQ))
8609
+ | IPIPE_TFLG_NMI_LOCK;
8611
+ /* check active_path again - some nasty NMI may have switched
8613
+ if (unlikely(tp !=
8614
+ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) {
8615
+ /* release lock on wrong path and restart */
8616
+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
8618
+ /* there is no chance that the NMI got deferred
8619
+ * => no need to check for pending freeze requests */
8623
+ /* get the point buffer */
8624
+ pos = tp->trace_pos;
8625
+ point = &tp->point[pos];
8627
+ /* store all trace point data */
8628
+ point->type = type;
8629
+ point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
8631
+ point->parent_eip = parent_eip;
8633
+ ipipe_read_tsc(point->timestamp);
8635
+ __ipipe_store_domain_states(point);
8637
+ /* forward to next point buffer */
8638
+ next_pos = WRAP_POINT_NO(pos+1);
8639
+ tp->trace_pos = next_pos;
8641
+ /* only mark beginning if we haven't started yet */
8642
+ begin = tp->begin;
8643
+ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
8646
+ /* end of critical path, start post-trace if not already started */
8647
+ if (unlikely(type == IPIPE_TRACE_END) &&
8648
+ (begin >= 0) && !tp->post_trace)
8649
+ tp->post_trace = post_trace + 1;
8651
+ /* freeze only if the slot is free and we are not already freezing */
8652
+ if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
8653
+ (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
8654
+ type == IPIPE_TRACE_FUNC)) &&
8655
+ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 &&
8656
+ !(tp->flags & IPIPE_TFLG_FREEZING)) {
8657
+ tp->post_trace = post_trace + 1;
8658
+ tp->flags |= IPIPE_TFLG_FREEZING;
8661
+ /* enforce end of trace in case of overflow */
8662
+ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
8667
+ /* stop tracing this path if we are in post-trace and
8668
+ * a) that phase is over now or
8669
+ * b) a new TRACE_BEGIN came in but we are not freezing this path */
8670
+ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
8671
+ ((type == IPIPE_TRACE_BEGIN) &&
8672
+ !(tp->flags & IPIPE_TFLG_FREEZING))))) {
8673
+ /* store the path's end (i.e. excluding post-trace) */
8674
+ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
8677
+ if (tp->flags & IPIPE_TFLG_FREEZING)
8678
+ tp = __ipipe_trace_freeze(cpu, tp, pos);
8680
+ tp = __ipipe_trace_end(cpu, tp, pos);
8682
+ /* reset the active path, maybe already start a new one */
8683
+ tp->begin = (type == IPIPE_TRACE_BEGIN) ?
8684
+ WRAP_POINT_NO(tp->trace_pos - 1) : -1;
8686
+ tp->post_trace = 0;
8689
+ /* update active_path not earlier to avoid races with NMIs */
8690
+ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu);
8693
+ /* we still have old_tp and point,
8694
+ * let's reset NMI lock and check for catches */
8695
+ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
8696
+ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
8697
+ /* well, this late tagging may not immediately be visible for
8698
+ * other cpus already dumping this path - a minor issue */
8699
+ point->flags |= IPIPE_TFLG_NMI_HIT;
8701
+ /* handle deferred freezing from NMI context */
8702
+ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
8703
+ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
8704
+ old_tp->nmi_saved_parent_eip,
8705
+ old_tp->nmi_saved_v);
8708
+ local_irq_restore_hw_notrace(flags);
8711
+static unsigned long __ipipe_global_path_lock(void)
8713
+ unsigned long flags;
8715
+ struct ipipe_trace_path *tp;
8717
+ spin_lock_irqsave(&global_path_lock, flags);
8719
+ cpu = ipipe_processor_id();
8721
+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
8723
+ /* here is small race window with NMIs - catched below */
8725
+ /* clear NMI events and set lock (atomically per cpu) */
8726
+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
8727
+ IPIPE_TFLG_NMI_FREEZE_REQ))
8728
+ | IPIPE_TFLG_NMI_LOCK;
8730
+ /* check active_path again - some nasty NMI may have switched
8732
+ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) {
8733
+ /* release lock on wrong path and restart */
8734
+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
8736
+ /* there is no chance that the NMI got deferred
8737
+ * => no need to check for pending freeze requests */
8744
+static void __ipipe_global_path_unlock(unsigned long flags)
8747
+ struct ipipe_trace_path *tp;
8749
+ /* release spinlock first - it's not involved in the NMI issue */
8750
+ __ipipe_spin_unlock_irqbegin(&global_path_lock);
8752
+ cpu = ipipe_processor_id();
8753
+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
8755
+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
8757
+ /* handle deferred freezing from NMI context */
8758
+ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
8759
+ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
8760
+ tp->nmi_saved_parent_eip, tp->nmi_saved_v);
8762
+ /* See __ipipe_spin_lock_irqsave() and friends. */
8763
+ __ipipe_spin_unlock_irqcomplete(flags);
8766
+void notrace ipipe_trace_begin(unsigned long v)
8768
+ if (!ipipe_trace_enable)
8770
+ __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
8771
+ __BUILTIN_RETURN_ADDRESS1, v);
8773
+EXPORT_SYMBOL(ipipe_trace_begin);
8775
+void notrace ipipe_trace_end(unsigned long v)
8777
+ if (!ipipe_trace_enable)
8779
+ __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
8780
+ __BUILTIN_RETURN_ADDRESS1, v);
8782
+EXPORT_SYMBOL(ipipe_trace_end);
8784
+void notrace ipipe_trace_freeze(unsigned long v)
8786
+ if (!ipipe_trace_enable)
8788
+ __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
8789
+ __BUILTIN_RETURN_ADDRESS1, v);
8791
+EXPORT_SYMBOL(ipipe_trace_freeze);
8793
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
8795
+ if (!ipipe_trace_enable)
8797
+ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
8798
+ __BUILTIN_RETURN_ADDRESS0,
8799
+ __BUILTIN_RETURN_ADDRESS1, v);
8801
+EXPORT_SYMBOL(ipipe_trace_special);
8803
+void notrace ipipe_trace_pid(pid_t pid, short prio)
8805
+ if (!ipipe_trace_enable)
8807
+ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
8808
+ __BUILTIN_RETURN_ADDRESS0,
8809
+ __BUILTIN_RETURN_ADDRESS1, pid);
8811
+EXPORT_SYMBOL(ipipe_trace_pid);
8813
+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc)
8815
+ if (!ipipe_trace_enable)
8817
+ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS),
8818
+ __BUILTIN_RETURN_ADDRESS0,
8819
+ __BUILTIN_RETURN_ADDRESS1, delay_tsc);
8821
+EXPORT_SYMBOL(ipipe_trace_event);
8823
+int ipipe_trace_max_reset(void)
8826
+ unsigned long flags;
8827
+ struct ipipe_trace_path *path;
8830
+ flags = __ipipe_global_path_lock();
8832
+ for_each_possible_cpu(cpu) {
8833
+ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
8835
+ if (path->dump_lock) {
8842
+ path->trace_pos = 0;
8846
+ __ipipe_global_path_unlock(flags);
8850
+EXPORT_SYMBOL(ipipe_trace_max_reset);
8852
+int ipipe_trace_frozen_reset(void)
8855
+ unsigned long flags;
8856
+ struct ipipe_trace_path *path;
8859
+ flags = __ipipe_global_path_lock();
8861
+ for_each_online_cpu(cpu) {
8862
+ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
8864
+ if (path->dump_lock) {
8871
+ path->trace_pos = 0;
8875
+ __ipipe_global_path_unlock(flags);
8879
+EXPORT_SYMBOL(ipipe_trace_frozen_reset);
8882
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
8885
+ struct task_struct *task = NULL;
8891
+ if (!read_trylock(&tasklist_lock))
8894
+ read_lock(&tasklist_lock);
8897
+ task = find_task_by_pid_type_ns(PIDTYPE_PID, (pid_t)point->v, &init_pid_ns);
8900
+ strncpy(task_info, task->comm, 11);
8902
+ strcpy(task_info, "-<?>-");
8905
+ read_unlock(&tasklist_lock);
8907
+ for (i = strlen(task_info); i < 11; i++)
8908
+ task_info[i] = ' ';
8910
+ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
8911
+ strcpy(task_info + (11 - strlen(buf)), buf);
8915
+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path,
8916
+ struct ipipe_trace_point *point)
8921
+ time = __ipipe_signed_tsc2us(point->timestamp -
8922
+ path->point[path->begin].timestamp + point->v);
8923
+ type = point->type >> IPIPE_TYPE_BITS;
8927
+ * Event type #0 is predefined, stands for the next
8930
+ sprintf(buf, "tick@%-6ld", time);
8932
+ sprintf(buf, "%3d@%-7ld", type, time);
8935
+#ifdef CONFIG_IPIPE_TRACE_PANIC
8936
+void ipipe_trace_panic_freeze(void)
8938
+ unsigned long flags;
8941
+ if (!ipipe_trace_enable)
8944
+ ipipe_trace_enable = 0;
8945
+ local_irq_save_hw_notrace(flags);
8947
+ cpu = ipipe_processor_id();
8949
+ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)];
8951
+ local_irq_restore_hw(flags);
8953
+EXPORT_SYMBOL(ipipe_trace_panic_freeze);
8955
+void ipipe_trace_panic_dump(void)
8957
+ int cnt = back_trace;
8964
+ ipipe_context_check_off();
8966
+ printk("I-pipe tracer log (%d points):\n", cnt);
8968
+ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
8970
+ while (cnt-- > 0) {
8971
+ struct ipipe_trace_point *point = &panic_path->point[pos];
8977
+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
8979
+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
8981
+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
8982
+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
8984
+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
8988
+ printk("-<invalid>-\n");
8990
+ __ipipe_trace_point_type(buf, point);
8991
+ printk("%s", buf);
8993
+ switch (point->type & IPIPE_TYPE_MASK) {
8994
+ case IPIPE_TRACE_FUNC:
8998
+ case IPIPE_TRACE_PID:
8999
+ __ipipe_get_task_info(info,
9001
+ printk("%s", info);
9004
+ case IPIPE_TRACE_EVENT:
9005
+ __ipipe_get_event_date(info,
9006
+ panic_path, point);
9007
+ printk("%s", info);
9011
+ printk("0x%08lx ", point->v);
9014
+ time = __ipipe_signed_tsc2us(point->timestamp -
9015
+ panic_path->point[start].timestamp);
9016
+ printk(" %5ld ", time);
9018
+ __ipipe_print_symname(NULL, point->eip);
9020
+ __ipipe_print_symname(NULL, point->parent_eip);
9023
+ pos = WRAP_POINT_NO(pos - 1);
9026
+ panic_path = NULL;
9028
+EXPORT_SYMBOL(ipipe_trace_panic_dump);
9029
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
9032
+/* --- /proc output --- */
9034
+static notrace int __ipipe_in_critical_trpath(long point_no)
9036
+ return ((WRAP_POINT_NO(point_no-print_path->begin) <
9037
+ WRAP_POINT_NO(print_path->end-print_path->begin)) ||
9038
+ ((print_path->end == print_path->begin) &&
9039
+ (WRAP_POINT_NO(point_no-print_path->end) >
9040
+ print_post_trace)));
9043
+static long __ipipe_signed_tsc2us(long long tsc)
9045
+ unsigned long long abs_tsc;
9048
+ /* ipipe_tsc2us works on unsigned => handle sign separately */
9049
+ abs_tsc = (tsc >= 0) ? tsc : -tsc;
9050
+ us = ipipe_tsc2us(abs_tsc);
9058
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
9060
+ switch (point->type & IPIPE_TYPE_MASK) {
9061
+ case IPIPE_TRACE_FUNC:
9062
+ strcpy(buf, "func ");
9065
+ case IPIPE_TRACE_BEGIN:
9066
+ strcpy(buf, "begin ");
9069
+ case IPIPE_TRACE_END:
9070
+ strcpy(buf, "end ");
9073
+ case IPIPE_TRACE_FREEZE:
9074
+ strcpy(buf, "freeze ");
9077
+ case IPIPE_TRACE_SPECIAL:
9078
+ sprintf(buf, "(0x%02x) ",
9079
+ point->type >> IPIPE_TYPE_BITS);
9082
+ case IPIPE_TRACE_PID:
9083
+ sprintf(buf, "[%5d] ", (pid_t)point->v);
9086
+ case IPIPE_TRACE_EVENT:
9087
+ sprintf(buf, "event ");
9093
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
9096
+ int point_no = point - print_path->point;
9099
+ if (print_path->end == point_no)
9101
+ else if (print_path->begin == point_no)
9103
+ else if (__ipipe_in_critical_trpath(point_no))
9105
+ seq_printf(m, "%c%c", mark,
9106
+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
9108
+ if (!verbose_trace)
9111
+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
9112
+ seq_printf(m, "%c",
9113
+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
9114
+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
9116
+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
9120
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
9122
+ unsigned long delay = 0;
9126
+ next = WRAP_POINT_NO(point+1 - print_path->point);
9128
+ if (next != print_path->trace_pos)
9129
+ delay = ipipe_tsc2ns(print_path->point[next].timestamp -
9130
+ point->timestamp);
9132
+ if (__ipipe_in_critical_trpath(point - print_path->point)) {
9133
+ if (delay > IPIPE_DELAY_WARN)
9135
+ else if (delay > IPIPE_DELAY_NOTE)
9138
+ seq_puts(m, mark);
9140
+ if (verbose_trace)
9141
+ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
9142
+ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
9147
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
9149
+ char namebuf[KSYM_NAME_LEN+1];
9150
+ unsigned long size, offset;
9151
+ const char *sym_name;
9154
+ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
9156
+#ifdef CONFIG_IPIPE_TRACE_PANIC
9160
+ printk("%s+0x%lx", sym_name, offset);
9162
+ printk(" [%s]", modname);
9165
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
9168
+ if (verbose_trace) {
9169
+ seq_printf(m, "%s+0x%lx", sym_name, offset);
9171
+ seq_printf(m, " [%s]", modname);
9173
+ seq_puts(m, sym_name);
9175
+ seq_printf(m, "<%08lx>", eip);
9179
+static void __ipipe_print_headline(struct seq_file *m)
9181
+ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
9182
+ "us\n\n", trace_overhead/1000, trace_overhead%1000);
9184
+ if (verbose_trace) {
9185
+ const char *name[4] = { [0 ... 3] = "<unused>" };
9186
+ struct list_head *pos;
9189
+ list_for_each_prev(pos, &__ipipe_pipeline) {
9190
+ struct ipipe_domain *ipd =
9191
+ list_entry(pos, struct ipipe_domain, p_link);
9193
+ name[i] = ipd->name;
9199
+ " +----- Hard IRQs ('|': locked)\n"
9204
+ " ||||| +---------- "
9205
+ "Delay flag ('+': > %d us, '!': > %d us)\n"
9207
+ "NMI noise ('N')\n"
9209
+ " Type User Val. Time Delay Function "
9211
+ name[3], name[2], name[1], name[0],
9212
+ name[0] ? " ('*': domain stalled, '+': current, "
9213
+ "'#': current+stalled)" : "",
9214
+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
9217
+ " +--------------- Hard IRQs ('|': locked)\n"
9218
+ " | +- Delay flag "
9219
+ "('+': > %d us, '!': > %d us)\n"
9221
+ " Type Time Function (Parent)\n",
9222
+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
9225
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
9229
+ mutex_lock(&out_mutex);
9232
+ struct ipipe_trace_path *tp;
9233
+ unsigned long length_usecs;
9235
+ unsigned long flags;
9237
+ /* protect against max_path/frozen_path updates while we
9238
+ * haven't locked our target path, also avoid recursively
9239
+ * taking global_path_lock from NMI context */
9240
+ flags = __ipipe_global_path_lock();
9242
+ /* find the longest of all per-cpu paths */
9243
+ print_path = NULL;
9244
+ for_each_online_cpu(cpu) {
9245
+ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)];
9246
+ if ((print_path == NULL) ||
9247
+ (tp->length > print_path->length)) {
9252
+ print_path->dump_lock = 1;
9254
+ __ipipe_global_path_unlock(flags);
9256
+ /* does this path actually contain data? */
9257
+ if (print_path->end == print_path->begin)
9260
+ /* number of points inside the critical path */
9261
+ points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
9263
+ /* pre- and post-tracing length, post-trace length was frozen
9264
+ in __ipipe_trace, pre-trace may have to be reduced due to
9266
+ print_pre_trace = pre_trace;
9267
+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
9268
+ print_path->end - 1);
9269
+ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
9270
+ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
9273
+ length_usecs = ipipe_tsc2us(print_path->length);
9274
+ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n"
9275
+ "------------------------------------------------------------\n",
9276
+ UTS_RELEASE, IPIPE_ARCH_STRING);
9277
+ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
9278
+ "%d (-%d/+%d), Length: %lu us\n",
9279
+ cpu, print_path->point[print_path->begin].timestamp,
9280
+ points, print_pre_trace, print_post_trace, length_usecs);
9281
+ __ipipe_print_headline(m);
9284
+ /* check if we are inside the trace range */
9285
+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
9286
+ print_pre_trace + print_post_trace))
9289
+ /* return the next point to be shown */
9290
+ return &print_path->point[WRAP_POINT_NO(print_path->begin -
9291
+ print_pre_trace + n)];
9294
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
9296
+ loff_t n = ++*pos;
9298
+ /* check if we are inside the trace range with the next entry */
9299
+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
9300
+ print_pre_trace + print_post_trace))
9303
+ /* return the next point to be shown */
9304
+ return &print_path->point[WRAP_POINT_NO(print_path->begin -
9305
+ print_pre_trace + *pos)];
9308
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
9311
+ print_path->dump_lock = 0;
9312
+ mutex_unlock(&out_mutex);
9315
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
9318
+ struct ipipe_trace_point *point = p;
9321
+ if (!point->eip) {
9322
+ seq_puts(m, "-<invalid>-\n");
9326
+ __ipipe_print_pathmark(m, point);
9327
+ __ipipe_trace_point_type(buf, point);
9329
+ if (verbose_trace)
9330
+ switch (point->type & IPIPE_TYPE_MASK) {
9331
+ case IPIPE_TRACE_FUNC:
9335
+ case IPIPE_TRACE_PID:
9336
+ __ipipe_get_task_info(buf, point, 0);
9340
+ case IPIPE_TRACE_EVENT:
9341
+ __ipipe_get_event_date(buf, print_path, point);
9346
+ seq_printf(m, "0x%08lx ", point->v);
9349
+ time = __ipipe_signed_tsc2us(point->timestamp -
9350
+ print_path->point[print_path->begin].timestamp);
9351
+ seq_printf(m, "%5ld", time);
9353
+ __ipipe_print_delay(m, point);
9354
+ __ipipe_print_symname(m, point->eip);
9355
+ seq_puts(m, " (");
9356
+ __ipipe_print_symname(m, point->parent_eip);
9357
+ seq_puts(m, ")\n");
9362
+static struct seq_operations __ipipe_max_ptrace_ops = {
9363
+ .start = __ipipe_max_prtrace_start,
9364
+ .next = __ipipe_prtrace_next,
9365
+ .stop = __ipipe_prtrace_stop,
9366
+ .show = __ipipe_prtrace_show
9369
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
9371
+ return seq_open(file, &__ipipe_max_ptrace_ops);
9375
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
9376
+ size_t count, loff_t *data)
9378
+ mutex_lock(&out_mutex);
9379
+ ipipe_trace_max_reset();
9380
+ mutex_unlock(&out_mutex);
9385
+struct file_operations __ipipe_max_prtrace_fops = {
9386
+ .open = __ipipe_max_prtrace_open,
9388
+ .write = __ipipe_max_reset,
9389
+ .llseek = seq_lseek,
9390
+ .release = seq_release,
9393
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
9397
+ mutex_lock(&out_mutex);
9400
+ struct ipipe_trace_path *tp;
9402
+ unsigned long flags;
9404
+ /* protect against max_path/frozen_path updates while we
9405
+ * haven't locked our target path, also avoid recursively
9406
+ * taking global_path_lock from NMI context */
9407
+ flags = __ipipe_global_path_lock();
9409
+ /* find the first of all per-cpu frozen paths */
9410
+ print_path = NULL;
9411
+ for_each_online_cpu(cpu) {
9412
+ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)];
9413
+ if (tp->end >= 0) {
9419
+ print_path->dump_lock = 1;
9421
+ __ipipe_global_path_unlock(flags);
9426
+ /* back- and post-tracing length, post-trace length was frozen
9427
+ in __ipipe_trace, back-trace may have to be reduced due to
9429
+ print_pre_trace = back_trace-1; /* substract freeze point */
9430
+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
9431
+ print_path->end - 1);
9432
+ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
9433
+ print_pre_trace = IPIPE_TRACE_POINTS - 2 -
9436
+ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n"
9437
+ "------------------------------------------------------"
9439
+ UTS_RELEASE, IPIPE_ARCH_STRING);
9440
+ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
9441
+ cpu, print_path->point[print_path->begin].timestamp,
9442
+ print_pre_trace+1, print_post_trace);
9443
+ __ipipe_print_headline(m);
9446
+ /* check if we are inside the trace range */
9447
+ if (n >= print_pre_trace + 1 + print_post_trace)
9450
+ /* return the next point to be shown */
9451
+ return &print_path->point[WRAP_POINT_NO(print_path->begin-
9452
+ print_pre_trace+n)];
9455
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
9456
+ .start = __ipipe_frozen_prtrace_start,
9457
+ .next = __ipipe_prtrace_next,
9458
+ .stop = __ipipe_prtrace_stop,
9459
+ .show = __ipipe_prtrace_show
9462
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
9464
+ return seq_open(file, &__ipipe_frozen_ptrace_ops);
9468
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
9469
+ size_t count, loff_t *data)
9471
+ char *end, buf[16];
9475
+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
9477
+ if (copy_from_user(buf, pbuffer, n))
9481
+ val = simple_strtol(buf, &end, 0);
9483
+ if (((*end != '\0') && !isspace(*end)) || (val < 0))
9486
+ mutex_lock(&out_mutex);
9487
+ ipipe_trace_frozen_reset();
9489
+ ipipe_trace_freeze(-1);
9490
+ mutex_unlock(&out_mutex);
9495
+struct file_operations __ipipe_frozen_prtrace_fops = {
9496
+ .open = __ipipe_frozen_prtrace_open,
9498
+ .write = __ipipe_frozen_ctrl,
9499
+ .llseek = seq_lseek,
9500
+ .release = seq_release,
9503
+static int __ipipe_rd_proc_val(char *page, char **start, off_t off,
9504
+ int count, int *eof, void *data)
9508
+ len = sprintf(page, "%u\n", *(int *)data);
9510
+ if (len <= off + count)
9512
+ *start = page + off;
9521
+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer,
9522
+ unsigned long count, void *data)
9524
+ char *end, buf[16];
9528
+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
9530
+ if (copy_from_user(buf, buffer, n))
9534
+ val = simple_strtol(buf, &end, 0);
9536
+ if (((*end != '\0') && !isspace(*end)) || (val < 0))
9539
+ mutex_lock(&out_mutex);
9540
+ *(int *)data = val;
9541
+ mutex_unlock(&out_mutex);
9546
+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count,
9547
+ int *eof, void *data)
9551
+ if (!trigger_begin)
9554
+ len = sprint_symbol(page, trigger_begin);
9555
+ page[len++] = '\n';
9558
+ if (len <= off + count)
9560
+ *start = page + off;
9569
+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer,
9570
+ unsigned long count, void *data)
9572
+ char buf[KSYM_SYMBOL_LEN];
9573
+ unsigned long begin, end;
9575
+ if (count > sizeof(buf) - 1)
9576
+ count = sizeof(buf) - 1;
9577
+ if (copy_from_user(buf, buffer, count))
9580
+ if (buf[count-1] == '\n')
9583
+ begin = kallsyms_lookup_name(buf);
9584
+ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
9588
+ mutex_lock(&out_mutex);
9589
+ /* invalidate the current range before setting a new one */
9592
+ ipipe_trace_frozen_reset();
9594
+ /* set new range */
9595
+ trigger_begin = begin;
9597
+ trigger_end = end;
9598
+ mutex_unlock(&out_mutex);
9603
+extern struct proc_dir_entry *ipipe_proc_root;
9606
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
9607
+ const char *name, int *value_ptr)
9609
+ struct proc_dir_entry *entry;
9611
+ entry = create_proc_entry(name, 0644, trace_dir);
9613
+ entry->data = value_ptr;
9614
+ entry->read_proc = __ipipe_rd_proc_val;
9615
+ entry->write_proc = __ipipe_wr_proc_val;
9616
+ entry->owner = THIS_MODULE;
9620
+void __init __ipipe_init_tracer(void)
9622
+ struct proc_dir_entry *trace_dir;
9623
+ struct proc_dir_entry *entry;
9624
+ unsigned long long start, end, min = ULLONG_MAX;
9626
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
9629
+ for_each_possible_cpu(cpu) {
9630
+ struct ipipe_trace_path *tp_buf;
9632
+ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) *
9633
+ IPIPE_TRACE_PATHS, cpu_to_node(cpu));
9635
+ printk(KERN_ERR "I-pipe: "
9636
+ "insufficient memory for trace buffer.\n");
9640
+ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
9641
+ for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
9642
+ tp_buf[path].begin = -1;
9643
+ tp_buf[path].end = -1;
9645
+ per_cpu(trace_path, cpu) = tp_buf;
9647
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
9648
+ ipipe_trace_enable = CONFIG_IPIPE_TRACE_ENABLE_VALUE;
9650
+ /* Calculate minimum overhead of __ipipe_trace() */
9651
+ local_irq_disable_hw();
9652
+ for (i = 0; i < 100; i++) {
9653
+ ipipe_read_tsc(start);
9654
+ __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
9655
+ __BUILTIN_RETURN_ADDRESS1, 0);
9656
+ ipipe_read_tsc(end);
9662
+ local_irq_enable_hw();
9663
+ trace_overhead = ipipe_tsc2ns(min);
9665
+ trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root);
9667
+ entry = create_proc_entry("max", 0644, trace_dir);
9669
+ entry->proc_fops = &__ipipe_max_prtrace_fops;
9671
+ entry = create_proc_entry("frozen", 0644, trace_dir);
9673
+ entry->proc_fops = &__ipipe_frozen_prtrace_fops;
9675
+ entry = create_proc_entry("trigger", 0644, trace_dir);
9677
+ entry->read_proc = __ipipe_rd_trigger;
9678
+ entry->write_proc = __ipipe_wr_trigger;
9679
+ entry->owner = THIS_MODULE;
9682
+ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
9684
+ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
9686
+ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
9688
+ __ipipe_create_trace_proc_val(trace_dir, "verbose",
9690
+ __ipipe_create_trace_proc_val(trace_dir, "enable",
9691
+ &ipipe_trace_enable);
9693
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
9694
index 10b5092..4b835aa 100644
9695
--- a/kernel/irq/chip.c
9696
+++ b/kernel/irq/chip.c
9697
@@ -351,7 +351,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
9698
irqreturn_t action_ret;
9700
spin_lock(&desc->lock);
9701
+#ifndef CONFIG_IPIPE
9702
mask_ack_irq(desc, irq);
9703
+#endif /* CONFIG_IPIPE */
9705
if (unlikely(desc->status & IRQ_INPROGRESS))
9707
@@ -427,8 +429,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
9709
spin_lock(&desc->lock);
9710
desc->status &= ~IRQ_INPROGRESS;
9711
+#ifdef CONFIG_IPIPE
9712
+ desc->chip->unmask(irq);
9716
desc->chip->eoi(irq);
9719
spin_unlock(&desc->lock);
9721
@@ -469,8 +476,10 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
9723
kstat_incr_irqs_this_cpu(irq, desc);
9725
+#ifndef CONFIG_IPIPE
9726
/* Start handling the irq */
9727
desc->chip->ack(irq);
9728
+#endif /* CONFIG_IPIPE */
9730
/* Mark the IRQ currently in progress.*/
9731
desc->status |= IRQ_INPROGRESS;
9732
@@ -510,6 +519,85 @@ out_unlock:
9733
spin_unlock(&desc->lock);
9736
+#ifdef CONFIG_IPIPE
9738
+void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc)
9742
+void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc)
9746
+void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)
9748
+ mask_ack_irq(desc, irq);
9751
+void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)
9753
+ if (desc->chip->unmask)
9754
+ desc->chip->unmask(irq);
9757
+void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc)
9759
+ desc->chip->eoi(irq);
9762
+void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc)
9765
+ * Non-requestable IRQs should not be masked in EOI handler.
9767
+ if (!(desc->status & IRQ_NOREQUEST))
9768
+ desc->chip->unmask(irq);
9771
+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc)
9773
+ desc->chip->ack(irq);
9776
+void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc)
9778
+ if (desc->chip->ack)
9779
+ desc->chip->ack(irq);
9782
+void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc)
9784
+ if (desc->chip->eoi)
9785
+ desc->chip->eoi(irq);
9788
+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc)
9792
+void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc)
9796
+ handle_bad_irq(irq, desc);
9799
+ printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n",
9800
+ __FUNCTION__, irq);
9805
+void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc)
9809
+void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc)
9813
+#endif /* CONFIG_IPIPE */
9816
* handle_percpu_IRQ - Per CPU local irq handler
9817
* @irq: the interrupt number
9818
@@ -524,8 +612,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
9820
kstat_incr_irqs_this_cpu(irq, desc);
9822
+#ifndef CONFIG_IPIPE
9823
if (desc->chip->ack)
9824
desc->chip->ack(irq);
9825
+#endif /* CONFIG_IPIPE */
9827
action_ret = handle_IRQ_event(irq, desc->action);
9829
@@ -550,6 +640,30 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
9832
handle = handle_bad_irq;
9833
+#ifdef CONFIG_IPIPE
9834
+ else if (handle == &handle_simple_irq) {
9835
+ desc->ipipe_ack = &__ipipe_ack_simple_irq;
9836
+ desc->ipipe_end = &__ipipe_end_simple_irq;
9838
+ else if (handle == &handle_level_irq) {
9839
+ desc->ipipe_ack = &__ipipe_ack_level_irq;
9840
+ desc->ipipe_end = &__ipipe_end_level_irq;
9842
+ else if (handle == &handle_edge_irq) {
9843
+ desc->ipipe_ack = &__ipipe_ack_edge_irq;
9844
+ desc->ipipe_end = &__ipipe_end_edge_irq;
9846
+ else if (handle == &handle_fasteoi_irq) {
9847
+ desc->ipipe_ack = &__ipipe_ack_fasteoi_irq;
9848
+ desc->ipipe_end = &__ipipe_end_fasteoi_irq;
9851
+ else if (handle == &handle_percpu_irq) {
9852
+ desc->ipipe_ack = &__ipipe_ack_percpu_irq;
9853
+ desc->ipipe_end = &__ipipe_end_percpu_irq;
9855
+#endif /* CONFIG_SMP */
9856
+#endif /* CONFIG_IPIPE */
9857
else if (desc->chip == &no_irq_chip) {
9858
printk(KERN_WARNING "Trying to install %sinterrupt handler "
9859
"for IRQ%d\n", is_chained ? "chained " : "", irq);
9860
@@ -561,7 +675,21 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
9861
* dummy_irq_chip for easy transition.
9863
desc->chip = &dummy_irq_chip;
9864
+#ifdef CONFIG_IPIPE
9865
+ desc->ipipe_ack = &__ipipe_noack_irq;
9866
+ desc->ipipe_end = &__ipipe_noend_irq;
9867
+#endif /* CONFIG_IPIPE */
9869
+#ifdef CONFIG_IPIPE
9870
+ else if (is_chained) {
9871
+ desc->ipipe_ack = handle;
9872
+ desc->ipipe_end = &__ipipe_noend_irq;
9873
+ handle = &__ipipe_noack_irq;
9875
+ desc->ipipe_ack = &__ipipe_ack_bad_irq;
9876
+ desc->ipipe_end = &__ipipe_noend_irq;
9878
+#endif /* CONFIG_IPIPE */
9880
spin_lock_irqsave(&desc->lock, flags);
9882
@@ -571,9 +699,17 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
9883
mask_ack_irq(desc, irq);
9884
desc->status |= IRQ_DISABLED;
9886
+#ifdef CONFIG_IPIPE
9887
+ desc->ipipe_ack = &__ipipe_ack_bad_irq;
9888
+ desc->ipipe_end = &__ipipe_noend_irq;
9889
+#endif /* CONFIG_IPIPE */
9891
desc->handle_irq = handle;
9893
+#ifdef CONFIG_IPIPE
9894
+ /* Suppress intermediate trampoline routine. */
9895
+ ipipe_root_domain->irqs[irq].acknowledge = desc->ipipe_ack;
9896
+#endif /* CONFIG_IPIPE */
9898
if (handle != handle_bad_irq && is_chained) {
9899
desc->status &= ~IRQ_DISABLED;
9900
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
9901
index c815b42..f2efd4e 100644
9902
--- a/kernel/irq/handle.c
9903
+++ b/kernel/irq/handle.c
9904
@@ -179,8 +179,10 @@ unsigned int __do_IRQ(unsigned int irq)
9906
* No locking required for CPU-local interrupts:
9908
+#ifndef CONFIG_IPIPE
9909
if (desc->chip->ack)
9910
desc->chip->ack(irq);
9912
if (likely(!(desc->status & IRQ_DISABLED))) {
9913
action_ret = handle_IRQ_event(irq, desc->action);
9915
@@ -191,8 +193,10 @@ unsigned int __do_IRQ(unsigned int irq)
9918
spin_lock(&desc->lock);
9919
+#ifndef CONFIG_IPIPE
9920
if (desc->chip->ack)
9921
desc->chip->ack(irq);
9924
* REPLAY is when Linux resends an IRQ that was dropped earlier
9925
* WAITING is used by probe to mark irqs that are being tested
9926
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
9927
index 46a4041..bc57809 100644
9928
--- a/kernel/lockdep.c
9929
+++ b/kernel/lockdep.c
9930
@@ -2188,7 +2188,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
9931
/* we'll do an OFF -> ON transition: */
9932
curr->hardirqs_enabled = 1;
9934
- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
9935
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
9937
if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
9939
@@ -2231,7 +2231,7 @@ void trace_hardirqs_off_caller(unsigned long ip)
9940
if (unlikely(!debug_locks || current->lockdep_recursion))
9943
- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
9944
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw()))
9947
if (curr->hardirqs_enabled) {
9948
diff --git a/kernel/panic.c b/kernel/panic.c
9949
index 4d50883..1afea02 100644
9950
--- a/kernel/panic.c
9951
+++ b/kernel/panic.c
9953
#include <linux/debug_locks.h>
9954
#include <linux/random.h>
9955
#include <linux/kallsyms.h>
9956
+#include <linux/ipipe_trace.h>
9959
static unsigned long tainted_mask;
9960
@@ -285,6 +286,8 @@ int oops_may_print(void)
9962
void oops_enter(void)
9964
+ ipipe_trace_panic_freeze();
9965
+ ipipe_disable_context_check(ipipe_processor_id());
9966
debug_locks_off(); /* can't trust the integrity of the kernel anymore */
9967
do_oops_enter_exit();
9969
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
9970
index c9d7408..e6915f2 100644
9971
--- a/kernel/power/disk.c
9972
+++ b/kernel/power/disk.c
9973
@@ -208,6 +208,7 @@ static int create_image(int platform_mode)
9976
local_irq_disable();
9977
+ local_irq_disable_hw_cond();
9978
/* At this point, device_suspend() has been called, but *not*
9979
* device_power_down(). We *must* call device_power_down() now.
9980
* Otherwise, drivers for some devices (e.g. interrupt controllers)
9981
@@ -241,6 +242,7 @@ static int create_image(int platform_mode)
9982
device_power_up(in_suspend ?
9983
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
9985
+ local_irq_enable_hw_cond();
9989
@@ -324,6 +326,7 @@ static int resume_target_kernel(void)
9992
local_irq_disable();
9993
+ local_irq_disable_hw_cond();
9994
error = device_power_down(PMSG_QUIESCE);
9996
printk(KERN_ERR "PM: Some devices failed to power down, "
9997
@@ -354,6 +357,7 @@ static int resume_target_kernel(void)
9998
touch_softlockup_watchdog();
9999
device_power_up(PMSG_RECOVER);
10001
+ local_irq_enable_hw_cond();
10002
local_irq_enable();
10003
device_pm_unlock();
10005
diff --git a/kernel/printk.c b/kernel/printk.c
10006
index b84eec7..eab2d52 100644
10007
--- a/kernel/printk.c
10008
+++ b/kernel/printk.c
10009
@@ -533,6 +533,41 @@ static int have_callable_console(void)
10013
+#ifdef CONFIG_IPIPE
10015
+static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED;
10017
+static int __ipipe_printk_fill;
10019
+static char __ipipe_printk_buf[__LOG_BUF_LEN];
10021
+void __ipipe_flush_printk (unsigned virq, void *cookie)
10023
+ char *p = __ipipe_printk_buf;
10024
+ int len, lmax, out = 0;
10025
+ unsigned long flags;
10030
+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
10032
+ lmax = __ipipe_printk_fill;
10033
+ while (out < lmax) {
10034
+ len = strlen(p) + 1;
10039
+ spin_lock_irqsave(&__ipipe_printk_lock, flags);
10041
+ while (__ipipe_printk_fill != lmax);
10043
+ __ipipe_printk_fill = 0;
10045
+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
10049
* printk - print a kernel message
10050
* @fmt: format string
10051
@@ -557,6 +592,63 @@ static int have_callable_console(void)
10053
asmlinkage int printk(const char *fmt, ...)
10055
+ int r, fbytes, oldcount;
10058
+ unsigned long flags;
10061
+ va_start(args, fmt);
10063
+ if (test_bit(IPIPE_SPRINTK_FLAG, &ipipe_current_domain->flags) ||
10064
+ oops_in_progress)
10065
+ cs = ipipe_disable_context_check(ipipe_processor_id());
10066
+ else if (ipipe_current_domain == ipipe_root_domain) {
10067
+ struct ipipe_domain *dom;
10069
+ local_irq_save_hw(flags);
10070
+ list_for_each_entry(dom, &__ipipe_pipeline, p_link) {
10071
+ if (dom == ipipe_root_domain)
10073
+ if (test_bit(IPIPE_STALL_FLAG,
10074
+ &ipipe_cpudom_var(dom, status)))
10077
+ local_irq_restore_hw(flags);
10082
+ r = vprintk(fmt, args);
10084
+ ipipe_restore_context_check(ipipe_processor_id(), cs);
10088
+ spin_lock_irqsave(&__ipipe_printk_lock, flags);
10090
+ oldcount = __ipipe_printk_fill;
10091
+ fbytes = __LOG_BUF_LEN - oldcount;
10093
+ if (fbytes > 1) {
10094
+ r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
10095
+ fbytes, fmt, args) + 1; /* account for the null byte */
10096
+ __ipipe_printk_fill += r;
10100
+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
10102
+ if (oldcount == 0)
10103
+ ipipe_trigger_irq(__ipipe_printk_virq);
10109
+#else /* !CONFIG_IPIPE */
10110
+asmlinkage int printk(const char *fmt, ...)
10115
@@ -566,6 +658,7 @@ asmlinkage int printk(const char *fmt, ...)
10119
+#endif /* CONFIG_IPIPE */
10121
/* cpu currently holding logbuf_lock */
10122
static volatile unsigned int printk_cpu = UINT_MAX;
10123
diff --git a/kernel/sched.c b/kernel/sched.c
10124
index db66874..c5bc8dc 100644
10125
--- a/kernel/sched.c
10126
+++ b/kernel/sched.c
10127
@@ -2255,7 +2255,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
10129
rq = task_rq_lock(p, &flags);
10130
old_state = p->state;
10131
- if (!(old_state & state))
10132
+ if (!(old_state & state) || (old_state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH)))
10136
@@ -2609,13 +2609,15 @@ asmlinkage void schedule_tail(struct task_struct *prev)
10138
if (current->set_child_tid)
10139
put_user(task_pid_vnr(current), current->set_child_tid);
10141
+ ipipe_init_notify(current);
10145
* context_switch - switch to the new MM and the new
10146
* thread's register state.
10148
-static inline void
10150
context_switch(struct rq *rq, struct task_struct *prev,
10151
struct task_struct *next)
10153
@@ -2657,12 +2659,23 @@ context_switch(struct rq *rq, struct task_struct *prev,
10154
switch_to(prev, next, prev);
10158
+#ifdef CONFIG_IPIPE_DELAYED_ATOMICSW
10159
+ current->state &= ~TASK_ATOMICSWITCH;
10161
+ prev->state &= ~TASK_ATOMICSWITCH;
10163
+ if (task_hijacked(prev))
10167
* this_rq must be evaluated again because prev may have moved
10168
* CPUs since it called schedule(), thus the 'rq' on its stack
10169
* frame will be invalid.
10171
finish_task_switch(this_rq(), prev);
10177
@@ -4335,6 +4348,7 @@ EXPORT_SYMBOL(add_preempt_count);
10179
void __kprobes sub_preempt_count(int val)
10181
+ ipipe_check_context(ipipe_root_domain);
10182
#ifdef CONFIG_DEBUG_PREEMPT
10185
@@ -4383,6 +4397,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
10187
static inline void schedule_debug(struct task_struct *prev)
10189
+ ipipe_check_context(ipipe_root_domain);
10191
* Test if we are atomic. Since do_exit() needs to call into
10192
* schedule() atomically, we ignore that path for now.
10193
@@ -4451,6 +4466,9 @@ need_resched:
10194
rcu_qsctr_inc(cpu);
10196
switch_count = &prev->nivcsw;
10197
+ if (unlikely(prev->state & TASK_ATOMICSWITCH))
10198
+ /* Pop one disable level -- one still remains. */
10199
+ preempt_enable();
10201
release_kernel_lock(prev);
10202
need_resched_nonpreemptible:
10203
@@ -4490,15 +4508,18 @@ need_resched_nonpreemptible:
10207
- context_switch(rq, prev, next); /* unlocks the rq */
10208
+ if (context_switch(rq, prev, next)) /* unlocks the rq unless hijacked */
10211
* the context switch might have flipped the stack from under
10212
* us, hence refresh the local variables.
10214
cpu = smp_processor_id();
10218
+ prev->state &= ~TASK_ATOMICSWITCH;
10219
spin_unlock_irq(&rq->lock);
10222
if (unlikely(reacquire_kernel_lock(current) < 0))
10223
goto need_resched_nonpreemptible;
10224
@@ -5241,6 +5262,7 @@ recheck:
10227
__setscheduler(rq, p, policy, param->sched_priority);
10228
+ ipipe_setsched_notify(p);
10231
p->sched_class->set_curr_task(rq);
10232
@@ -5570,6 +5592,7 @@ static void __cond_resched(void)
10233
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
10234
__might_sleep(__FILE__, __LINE__);
10236
+ ipipe_check_context(ipipe_root_domain);
10238
* The BKS might be reacquired before we have dropped
10239
* PREEMPT_ACTIVE, which could trigger a second
10240
@@ -9388,3 +9411,60 @@ struct cgroup_subsys cpuacct_subsys = {
10241
.subsys_id = cpuacct_subsys_id,
10243
#endif /* CONFIG_CGROUP_CPUACCT */
10245
+#ifdef CONFIG_IPIPE
10247
+int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio)
10249
+ const struct sched_class *prev_class = p->sched_class;
10250
+ int oldprio, on_rq, running;
10251
+ unsigned long flags;
10254
+ spin_lock_irqsave(&p->pi_lock, flags);
10255
+ rq = __task_rq_lock(p);
10256
+ update_rq_clock(rq);
10257
+ on_rq = p->se.on_rq;
10258
+ running = task_running(rq, p);
10261
+ deactivate_task(rq, p, 0);
10263
+ p->sched_class->put_prev_task(rq, p);
10265
+ oldprio = p->prio;
10266
+ __setscheduler(rq, p, policy, prio);
10267
+ ipipe_setsched_notify(p);
10270
+ p->sched_class->set_curr_task(rq);
10272
+ activate_task(rq, p, 0);
10273
+ check_class_changed(rq, p, prev_class, oldprio, running);
10275
+ __task_rq_unlock(rq);
10276
+ spin_unlock_irqrestore(&p->pi_lock, flags);
10278
+ rt_mutex_adjust_pi(p);
10283
+EXPORT_SYMBOL(ipipe_setscheduler_root);
10285
+int ipipe_reenter_root (struct task_struct *prev, int policy, int prio)
10287
+ finish_task_switch(this_rq(), prev);
10289
+ (void)reacquire_kernel_lock(current);
10290
+ preempt_enable_no_resched();
10292
+ if (current->policy != policy || current->rt_priority != prio)
10293
+ return ipipe_setscheduler_root(current, policy, prio);
10298
+EXPORT_SYMBOL(ipipe_reenter_root);
10300
+#endif /* CONFIG_IPIPE */
10301
diff --git a/kernel/signal.c b/kernel/signal.c
10302
index 28859a9..734c5c0 100644
10303
--- a/kernel/signal.c
10304
+++ b/kernel/signal.c
10305
@@ -494,6 +494,7 @@ void signal_wake_up(struct task_struct *t, int resume)
10308
set_tsk_thread_flag(t, TIF_SIGPENDING);
10309
+ ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */
10312
* For SIGKILL, we want to wake it up in the stopped/traced/killable
10313
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
10314
index 29ab207..c21e314 100644
10315
--- a/kernel/spinlock.c
10316
+++ b/kernel/spinlock.c
10317
@@ -87,7 +87,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
10318
* _raw_spin_lock_flags() code, because lockdep assumes
10319
* that interrupts are not re-enabled during lock-acquire:
10321
-#ifdef CONFIG_LOCKDEP
10322
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
10323
LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
10325
_raw_spin_lock_flags(lock, &flags);
10326
@@ -304,7 +304,7 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
10327
* _raw_spin_lock_flags() code, because lockdep assumes
10328
* that interrupts are not re-enabled during lock-acquire:
10330
-#ifdef CONFIG_LOCKDEP
10331
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
10332
LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
10334
_raw_spin_lock_flags(lock, &flags);
10335
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
10336
index df12434..3d63445 100644
10337
--- a/kernel/time/tick-common.c
10338
+++ b/kernel/time/tick-common.c
10339
@@ -69,7 +69,7 @@ static void tick_periodic(int cpu)
10340
write_sequnlock(&xtime_lock);
10343
- update_process_times(user_mode(get_irq_regs()));
10344
+ update_root_process_times(get_irq_regs());
10345
profile_tick(CPU_PROFILING);
10348
@@ -167,6 +167,10 @@ static void tick_setup_device(struct tick_device *td,
10350
td->evtdev = newdev;
10352
+ /* I-pipe: derive global tick IRQ from CPU 0 */
10354
+ ipipe_update_tick_evtdev(newdev);
10357
* When the device is not per cpu, pin the interrupt to the
10359
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
10360
index 342fc9c..3665e47 100644
10361
--- a/kernel/time/tick-sched.c
10362
+++ b/kernel/time/tick-sched.c
10363
@@ -507,7 +507,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
10364
ts->idle_jiffies++;
10367
- update_process_times(user_mode(regs));
10368
+ update_root_process_times(regs);
10369
profile_tick(CPU_PROFILING);
10371
while (tick_nohz_reprogram(ts, now)) {
10372
@@ -658,7 +658,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
10373
touch_softlockup_watchdog();
10374
ts->idle_jiffies++;
10376
- update_process_times(user_mode(regs));
10377
+ update_root_process_times(regs);
10378
profile_tick(CPU_PROFILING);
10381
diff --git a/kernel/timer.c b/kernel/timer.c
10382
index a5eaea2..6ecc85e 100644
10383
--- a/kernel/timer.c
10384
+++ b/kernel/timer.c
10385
@@ -1052,6 +1052,26 @@ void update_process_times(int user_tick)
10386
run_posix_cpu_timers(p);
10389
+#ifdef CONFIG_IPIPE
10391
+void update_root_process_times(struct pt_regs *regs)
10393
+ int cpu, user_tick = user_mode(regs);
10395
+ if (__ipipe_root_tick_p(regs)) {
10396
+ update_process_times(user_tick);
10400
+ run_local_timers();
10401
+ cpu = smp_processor_id();
10402
+ if (rcu_pending(cpu))
10403
+ rcu_check_callbacks(cpu, user_tick);
10404
+ run_posix_cpu_timers(current);
10410
* Nr of active tasks - counted in fixed-point numbers
10412
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
10413
index b0f239e..fdc111b 100644
10414
--- a/lib/Kconfig.debug
10415
+++ b/lib/Kconfig.debug
10416
@@ -128,6 +128,8 @@ config DEBUG_SECTION_MISMATCH
10417
- Enable verbose reporting from modpost to help solving
10418
the section mismatches reported.
10420
+source "kernel/ipipe/Kconfig.debug"
10422
config DEBUG_KERNEL
10423
bool "Kernel debugging"
10425
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
10426
index 486da62..9a0f064 100644
10427
--- a/lib/bust_spinlocks.c
10428
+++ b/lib/bust_spinlocks.c
10430
#include <linux/tty.h>
10431
#include <linux/wait.h>
10432
#include <linux/vt_kern.h>
10433
+#include <linux/ipipe_trace.h>
10436
void __attribute__((weak)) bust_spinlocks(int yes)
10437
@@ -22,6 +23,7 @@ void __attribute__((weak)) bust_spinlocks(int yes)
10441
+ ipipe_trace_panic_dump();
10442
if (--oops_in_progress == 0)
10445
diff --git a/lib/ioremap.c b/lib/ioremap.c
10446
index 14c6078..a275469 100644
10447
--- a/lib/ioremap.c
10448
+++ b/lib/ioremap.c
10449
@@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr,
10452
} while (pgd++, addr = next, addr != end);
10454
- flush_cache_vmap(start, end);
10455
+ __ipipe_pin_range_globally(start, end);
10456
+ flush_cache_vmap(start, end);
10460
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
10461
index 0f8fc22..90dc46f 100644
10462
--- a/lib/smp_processor_id.c
10463
+++ b/lib/smp_processor_id.c
10464
@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void)
10465
unsigned long preempt_count = preempt_count();
10466
int this_cpu = raw_smp_processor_id();
10468
+ if (!ipipe_root_domain_p)
10471
if (likely(preempt_count))
10474
- if (irqs_disabled())
10475
+ if (irqs_disabled() || irqs_disabled_hw())
10479
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
10480
index 9c4b025..08f096b 100644
10481
--- a/lib/spinlock_debug.c
10482
+++ b/lib/spinlock_debug.c
10483
@@ -133,6 +133,8 @@ void _raw_spin_lock(spinlock_t *lock)
10484
debug_spin_lock_after(lock);
10487
+EXPORT_SYMBOL(_raw_spin_lock);
10489
int _raw_spin_trylock(spinlock_t *lock)
10491
int ret = __raw_spin_trylock(&lock->raw_lock);
10492
@@ -148,12 +150,16 @@ int _raw_spin_trylock(spinlock_t *lock)
10496
+EXPORT_SYMBOL(_raw_spin_trylock);
10498
void _raw_spin_unlock(spinlock_t *lock)
10500
debug_spin_unlock(lock);
10501
__raw_spin_unlock(&lock->raw_lock);
10504
+EXPORT_SYMBOL(_raw_spin_unlock);
10506
static void rwlock_bug(rwlock_t *lock, const char *msg)
10508
if (!debug_locks_off())
10509
@@ -199,6 +205,8 @@ void _raw_read_lock(rwlock_t *lock)
10510
__raw_read_lock(&lock->raw_lock);
10513
+EXPORT_SYMBOL(_raw_read_lock);
10515
int _raw_read_trylock(rwlock_t *lock)
10517
int ret = __raw_read_trylock(&lock->raw_lock);
10518
@@ -212,12 +220,16 @@ int _raw_read_trylock(rwlock_t *lock)
10522
+EXPORT_SYMBOL(_raw_read_trylock);
10524
void _raw_read_unlock(rwlock_t *lock)
10526
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
10527
__raw_read_unlock(&lock->raw_lock);
10530
+EXPORT_SYMBOL(_raw_read_unlock);
10532
static inline void debug_write_lock_before(rwlock_t *lock)
10534
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
10535
@@ -275,6 +287,8 @@ void _raw_write_lock(rwlock_t *lock)
10536
debug_write_lock_after(lock);
10539
+EXPORT_SYMBOL(_raw_write_lock);
10541
int _raw_write_trylock(rwlock_t *lock)
10543
int ret = __raw_write_trylock(&lock->raw_lock);
10544
@@ -290,8 +304,12 @@ int _raw_write_trylock(rwlock_t *lock)
10548
+EXPORT_SYMBOL(_raw_write_trylock);
10550
void _raw_write_unlock(rwlock_t *lock)
10552
debug_write_unlock(lock);
10553
__raw_write_unlock(&lock->raw_lock);
10556
+EXPORT_SYMBOL(_raw_write_unlock);
10557
diff --git a/mm/memory.c b/mm/memory.c
10558
index fe2257f..d710733 100644
10562
#include <linux/writeback.h>
10563
#include <linux/memcontrol.h>
10564
#include <linux/mmu_notifier.h>
10565
+#include <linux/vmalloc.h>
10567
#include <asm/pgalloc.h>
10568
#include <asm/uaccess.h>
10569
@@ -482,6 +483,32 @@ out:
10570
return pfn_to_page(pfn);
10573
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
10576
+ * If the source page was a PFN mapping, we don't have
10577
+ * a "struct page" for it. We do a best-effort copy by
10578
+ * just copying from the original user address. If that
10579
+ * fails, we just zero-fill it. Live with it.
10581
+ if (unlikely(!src)) {
10582
+ void *kaddr = kmap_atomic(dst, KM_USER0);
10583
+ void __user *uaddr = (void __user *)(va & PAGE_MASK);
10586
+ * This really shouldn't fail, because the page is there
10587
+ * in the page tables. But it might just be unreadable,
10588
+ * in which case we just give up and fill the result with
10591
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
10592
+ memset(kaddr, 0, PAGE_SIZE);
10593
+ kunmap_atomic(kaddr, KM_USER0);
10594
+ flush_dcache_page(dst);
10596
+ copy_user_highpage(dst, src, va, vma);
10600
* copy one vm_area from one task to the other. Assumes the page tables
10601
* already present in the new task to be cleared in the whole range
10602
@@ -490,8 +517,8 @@ out:
10605
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
10606
- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
10607
- unsigned long addr, int *rss)
10608
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
10609
+ unsigned long addr, int *rss, struct page *uncow_page)
10611
unsigned long vm_flags = vma->vm_flags;
10612
pte_t pte = *src_pte;
10613
@@ -530,6 +557,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
10614
* in the parent and the child
10616
if (is_cow_mapping(vm_flags)) {
10617
+#ifdef CONFIG_IPIPE
10618
+ if (uncow_page) {
10619
+ struct page *old_page = vm_normal_page(vma, addr, pte);
10620
+ cow_user_page(uncow_page, old_page, addr, vma);
10621
+ pte = mk_pte(uncow_page, vma->vm_page_prot);
10623
+ if (vm_flags & VM_SHARED)
10624
+ pte = pte_mkclean(pte);
10625
+ pte = pte_mkold(pte);
10627
+ page_add_new_anon_rmap(uncow_page, vma, addr);
10628
+ rss[!!PageAnon(uncow_page)]++;
10629
+ goto out_set_pte;
10631
+#endif /* CONFIG_IPIPE */
10632
ptep_set_wrprotect(src_mm, addr, src_pte);
10633
pte = pte_wrprotect(pte);
10635
@@ -560,13 +602,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
10636
pte_t *src_pte, *dst_pte;
10637
spinlock_t *src_ptl, *dst_ptl;
10639
+ struct page *uncow_page = NULL;
10642
+#ifdef CONFIG_IPIPE
10643
+ int do_cow_break = 0;
10645
+ if (do_cow_break) {
10646
+ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
10649
+ do_cow_break = 0;
10654
rss[1] = rss[0] = 0;
10655
dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
10659
+ page_cache_release(uncow_page);
10662
src_pte = pte_offset_map_nested(src_pmd, addr);
10663
src_ptl = pte_lockptr(src_mm, src_pmd);
10664
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
10665
@@ -587,7 +643,26 @@ again:
10669
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
10670
+#ifdef CONFIG_IPIPE
10671
+ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
10672
+ if (is_cow_mapping(vma->vm_flags)) {
10673
+ if (((vma->vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED))
10674
+ == (VM_LOCKED|VM_PINNED)) {
10675
+ arch_leave_lazy_mmu_mode();
10676
+ spin_unlock(src_ptl);
10677
+ pte_unmap_nested(src_pte);
10678
+ add_mm_rss(dst_mm, rss[0], rss[1]);
10679
+ pte_unmap_unlock(dst_pte, dst_ptl);
10681
+ do_cow_break = 1;
10687
+ copy_one_pte(dst_mm, src_mm, dst_pte,
10688
+ src_pte, vma, addr, rss, uncow_page);
10689
+ uncow_page = NULL;
10691
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
10693
@@ -1734,32 +1809,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
10697
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
10700
- * If the source page was a PFN mapping, we don't have
10701
- * a "struct page" for it. We do a best-effort copy by
10702
- * just copying from the original user address. If that
10703
- * fails, we just zero-fill it. Live with it.
10705
- if (unlikely(!src)) {
10706
- void *kaddr = kmap_atomic(dst, KM_USER0);
10707
- void __user *uaddr = (void __user *)(va & PAGE_MASK);
10710
- * This really shouldn't fail, because the page is there
10711
- * in the page tables. But it might just be unreadable,
10712
- * in which case we just give up and fill the result with
10715
- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
10716
- memset(kaddr, 0, PAGE_SIZE);
10717
- kunmap_atomic(kaddr, KM_USER0);
10718
- flush_dcache_page(dst);
10720
- copy_user_highpage(dst, src, va, vma);
10724
* This routine handles present pages, when users try to write
10725
* to a shared page. It is done by copying the page to a new address
10726
@@ -3049,3 +3098,111 @@ void print_vma_addr(char *prefix, unsigned long ip)
10728
up_read(¤t->mm->mmap_sem);
10731
+#ifdef CONFIG_IPIPE
10733
+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
10734
+ struct vm_area_struct *vma,
10735
+ unsigned long addr, unsigned long end)
10741
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
10745
+ if (!pte_present(*pte) || pte_write(*pte)) {
10746
+ pte_unmap_unlock(pte, ptl);
10750
+ if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
10752
+ } while (addr += PAGE_SIZE, addr != end);
10756
+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
10757
+ struct vm_area_struct *vma,
10758
+ unsigned long addr, unsigned long end)
10760
+ unsigned long next;
10763
+ pmd = pmd_offset(pud, addr);
10765
+ next = pmd_addr_end(addr, end);
10766
+ if (pmd_none_or_clear_bad(pmd))
10768
+ if (ipipe_pin_pte_range(mm, pmd, vma, addr, next))
10770
+ } while (pmd++, addr = next, addr != end);
10774
+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
10775
+ struct vm_area_struct *vma,
10776
+ unsigned long addr, unsigned long end)
10778
+ unsigned long next;
10781
+ pud = pud_offset(pgd, addr);
10783
+ next = pud_addr_end(addr, end);
10784
+ if (pud_none_or_clear_bad(pud))
10786
+ if (ipipe_pin_pmd_range(mm, pud, vma, addr, next))
10788
+ } while (pud++, addr = next, addr != end);
10792
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
10794
+ unsigned long addr, next, end;
10795
+ struct vm_area_struct *vma;
10796
+ struct mm_struct *mm;
10800
+ mm = get_task_mm(tsk);
10804
+ down_write(&mm->mmap_sem);
10805
+ if (mm->def_flags & VM_PINNED)
10808
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
10809
+ if (!is_cow_mapping(vma->vm_flags)
10810
+ || !(vma->vm_flags & VM_WRITE))
10813
+ addr = vma->vm_start;
10814
+ end = vma->vm_end;
10816
+ pgd = pgd_offset(mm, addr);
10818
+ next = pgd_addr_end(addr, end);
10819
+ if (pgd_none_or_clear_bad(pgd))
10821
+ if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) {
10822
+ result = -ENOMEM;
10825
+ } while (pgd++, addr = next, addr != end);
10827
+ mm->def_flags |= VM_PINNED;
10830
+ up_write(&mm->mmap_sem);
10835
+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
10838
diff --git a/mm/mlock.c b/mm/mlock.c
10839
index 64dca47..4058ba6 100644
10842
@@ -532,10 +532,10 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
10843
static int do_mlockall(int flags)
10845
struct vm_area_struct * vma, * prev = NULL;
10846
- unsigned int def_flags = 0;
10847
+ unsigned int def_flags = current->mm->def_flags & VM_PINNED;
10849
if (flags & MCL_FUTURE)
10850
- def_flags = VM_LOCKED;
10851
+ def_flags |= VM_LOCKED;
10852
current->mm->def_flags = def_flags;
10853
if (flags == MCL_FUTURE)
10855
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
10856
index 4172ce4..c828a98 100644
10859
@@ -156,8 +156,7 @@ static int vmap_page_range(unsigned long start, unsigned long end,
10860
pgprot_t prot, struct page **pages)
10863
- unsigned long next;
10864
- unsigned long addr = start;
10865
+ unsigned long next, addr = start;
10869
@@ -173,6 +172,9 @@ static int vmap_page_range(unsigned long start, unsigned long end,
10874
+ __ipipe_pin_range_globally(start, end);