1
/******************************************************************************
4
* Portions of this file are:
5
* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
8
#include <xen/config.h>
10
#include <xen/delay.h>
11
#include <xen/errno.h>
12
#include <xen/event.h>
14
#include <xen/perfc.h>
15
#include <xen/sched.h>
16
#include <xen/keyhandler.h>
17
#include <xen/compat.h>
18
#include <xen/iocap.h>
19
#include <xen/iommu.h>
20
#include <xen/trace.h>
22
#include <asm/current.h>
23
#include <asm/flushtlb.h>
24
#include <asm/mach-generic/mach_apic.h>
25
#include <public/physdev.h>
27
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
28
int __read_mostly opt_noirqbalance = 0;
29
boolean_param("noirqbalance", opt_noirqbalance);
31
unsigned int __read_mostly nr_irqs_gsi = 16;
32
unsigned int __read_mostly nr_irqs = 1024;
33
integer_param("nr_irqs", nr_irqs);
35
u8 __read_mostly *irq_vector;
36
struct irq_desc __read_mostly *irq_desc = NULL;
38
int __read_mostly *irq_status = NULL;
39
#define IRQ_UNUSED (0)
43
#define IRQ_VECTOR_UNASSIGNED (0)
45
static DECLARE_BITMAP(used_vectors, NR_VECTORS);
47
struct irq_cfg __read_mostly *irq_cfg = NULL;
49
static struct timer *__read_mostly irq_guest_eoi_timer;
51
static DEFINE_SPINLOCK(vector_lock);
53
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
54
[0 ... NR_VECTORS - 1] = -1
57
DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
59
static LIST_HEAD(irq_ratelimit_list);
60
static DEFINE_SPINLOCK(irq_ratelimit_lock);
61
static struct timer irq_ratelimit_timer;
63
/* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */
64
static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
65
integer_param("irq_ratelimit", irq_ratelimit_threshold);
67
/* Must be called when irq disabled */
68
void lock_vector_lock(void)
70
/* Used to the online set of cpus does not change
71
* during assign_irq_vector.
73
spin_lock(&vector_lock);
76
void unlock_vector_lock(void)
78
spin_unlock(&vector_lock);
81
static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
85
struct irq_cfg *cfg = irq_cfg(irq);
87
BUG_ON((unsigned)irq >= nr_irqs);
88
BUG_ON((unsigned)vector >= NR_VECTORS);
90
cpus_and(mask, domain, cpu_online_map);
93
if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
95
if (cfg->vector != IRQ_VECTOR_UNASSIGNED)
97
for_each_cpu_mask(cpu, mask)
98
per_cpu(vector_irq, cpu)[vector] = irq;
100
cfg->domain = domain;
101
irq_status[irq] = IRQ_USED;
102
if (IO_APIC_IRQ(irq))
103
irq_vector[irq] = vector;
107
int bind_irq_vector(int irq, int vector, cpumask_t domain)
112
spin_lock_irqsave(&vector_lock, flags);
113
ret = __bind_irq_vector(irq, vector, domain);
114
spin_unlock_irqrestore(&vector_lock, flags);
118
static inline int find_unassigned_irq(void)
122
for (irq = nr_irqs_gsi; irq < nr_irqs; irq++)
123
if (irq_status[irq] == IRQ_UNUSED)
129
* Dynamic irq allocate and deallocation for MSI
137
spin_lock_irqsave(&vector_lock, flags);
139
irq = find_unassigned_irq();
142
ret = __assign_irq_vector(irq, irq_cfg(irq), TARGET_CPUS);
146
spin_unlock_irqrestore(&vector_lock, flags);
151
static void dynamic_irq_cleanup(unsigned int irq)
153
struct irq_desc *desc = irq_to_desc(irq);
155
struct irqaction *action;
157
spin_lock_irqsave(&desc->lock, flags);
158
desc->status |= IRQ_DISABLED;
159
desc->handler->shutdown(irq);
160
action = desc->action;
163
desc->msi_desc = NULL;
164
desc->handler = &no_irq_type;
165
cpus_setall(desc->affinity);
166
spin_unlock_irqrestore(&desc->lock, flags);
168
/* Wait to make sure it's not being used on another CPU */
169
do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
175
static void init_one_irq_status(int irq);
177
static void __clear_irq_vector(int irq)
181
struct irq_cfg *cfg = irq_cfg(irq);
183
BUG_ON(!cfg->vector);
185
vector = cfg->vector;
186
cpus_and(tmp_mask, cfg->domain, cpu_online_map);
188
for_each_cpu_mask(cpu, tmp_mask)
189
per_cpu(vector_irq, cpu)[vector] = -1;
191
cfg->vector = IRQ_VECTOR_UNASSIGNED;
192
cpus_clear(cfg->domain);
193
init_one_irq_status(irq);
195
if (likely(!cfg->move_in_progress))
197
for_each_cpu_mask(cpu, tmp_mask) {
198
for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR;
200
if (per_cpu(vector_irq, cpu)[vector] != irq)
202
per_cpu(vector_irq, cpu)[vector] = -1;
207
cfg->move_in_progress = 0;
210
void clear_irq_vector(int irq)
214
spin_lock_irqsave(&vector_lock, flags);
215
__clear_irq_vector(irq);
216
spin_unlock_irqrestore(&vector_lock, flags);
219
void destroy_irq(unsigned int irq)
221
BUG_ON(!MSI_IRQ(irq));
222
dynamic_irq_cleanup(irq);
223
clear_irq_vector(irq);
226
int irq_to_vector(int irq)
231
BUG_ON(irq >= nr_irqs || irq < 0);
233
if (IO_APIC_IRQ(irq))
234
vector = irq_vector[irq];
235
else if(MSI_IRQ(irq)) {
237
vector = cfg->vector;
239
vector = LEGACY_VECTOR(irq);
244
static void init_one_irq_desc(struct irq_desc *desc)
246
desc->status = IRQ_DISABLED;
247
desc->handler = &no_irq_type;
250
desc->msi_desc = NULL;
251
spin_lock_init(&desc->lock);
252
cpus_setall(desc->affinity);
253
INIT_LIST_HEAD(&desc->rl_link);
256
static void init_one_irq_status(int irq)
258
irq_status[irq] = IRQ_UNUSED;
261
static void init_one_irq_cfg(struct irq_cfg *cfg)
263
cfg->vector = IRQ_VECTOR_UNASSIGNED;
264
cpus_clear(cfg->domain);
265
cpus_clear(cfg->old_domain);
268
int init_irq_data(void)
270
struct irq_desc *desc;
274
irq_desc = xmalloc_array(struct irq_desc, nr_irqs);
275
irq_cfg = xmalloc_array(struct irq_cfg, nr_irqs);
276
irq_status = xmalloc_array(int, nr_irqs);
277
irq_guest_eoi_timer = xmalloc_array(struct timer, nr_irqs);
278
irq_vector = xmalloc_array(u8, nr_irqs_gsi);
280
if (!irq_desc || !irq_cfg || !irq_status ||! irq_vector ||
281
!irq_guest_eoi_timer)
284
memset(irq_desc, 0, nr_irqs * sizeof(*irq_desc));
285
memset(irq_cfg, 0, nr_irqs * sizeof(*irq_cfg));
286
memset(irq_status, 0, nr_irqs * sizeof(*irq_status));
287
memset(irq_vector, 0, nr_irqs_gsi * sizeof(*irq_vector));
288
memset(irq_guest_eoi_timer, 0, nr_irqs * sizeof(*irq_guest_eoi_timer));
290
for (irq = 0; irq < nr_irqs; irq++) {
291
desc = irq_to_desc(irq);
294
desc->chip_data = cfg;
295
init_one_irq_desc(desc);
296
init_one_irq_cfg(cfg);
297
init_one_irq_status(irq);
300
/* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
301
set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
302
set_bit(HYPERCALL_VECTOR, used_vectors);
304
/* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */
305
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
310
static void __do_IRQ_guest(int vector);
312
void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
314
static void enable_none(unsigned int vector) { }
315
static unsigned int startup_none(unsigned int vector) { return 0; }
316
static void disable_none(unsigned int vector) { }
317
static void ack_none(unsigned int irq)
322
#define shutdown_none disable_none
323
#define end_none enable_none
325
hw_irq_controller no_irq_type = {
335
atomic_t irq_err_count;
337
int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask)
340
* NOTE! The local APIC isn't very good at handling
341
* multiple interrupts at the same interrupt level.
342
* As the interrupt level is determined by taking the
343
* vector number and shifting that right by 4, we
344
* want to spread these out a bit so that they don't
345
* all fall in the same interrupt level.
347
* Also, we've got to be careful not to trash gate
348
* 0x80, because int 0x80 is hm, kind of importantish. ;)
350
static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
351
unsigned int old_vector;
355
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
358
old_vector = irq_to_vector(irq);
360
cpus_and(tmp_mask, mask, cpu_online_map);
361
cpus_and(tmp_mask, cfg->domain, tmp_mask);
362
if (!cpus_empty(tmp_mask)) {
363
cfg->vector = old_vector;
368
/* Only try and allocate irqs on cpus that are present */
369
cpus_and(mask, mask, cpu_online_map);
372
for_each_cpu_mask(cpu, mask) {
376
tmp_mask = vector_allocation_domain(cpu);
377
cpus_and(tmp_mask, tmp_mask, cpu_online_map);
379
vector = current_vector;
380
offset = current_offset;
383
if (vector > LAST_DYNAMIC_VECTOR) {
384
/* If out of vectors on large boxen, must share them. */
385
offset = (offset + 1) % 8;
386
vector = FIRST_DYNAMIC_VECTOR + offset;
388
if (unlikely(current_vector == vector))
391
if (test_bit(vector, used_vectors))
394
for_each_cpu_mask(new_cpu, tmp_mask)
395
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
398
current_vector = vector;
399
current_offset = offset;
401
cfg->move_in_progress = 1;
402
cpus_copy(cfg->old_domain, cfg->domain);
404
for_each_cpu_mask(new_cpu, tmp_mask)
405
per_cpu(vector_irq, new_cpu)[vector] = irq;
406
cfg->vector = vector;
407
cpus_copy(cfg->domain, tmp_mask);
409
irq_status[irq] = IRQ_USED;
410
if (IO_APIC_IRQ(irq))
411
irq_vector[irq] = vector;
418
int assign_irq_vector(int irq)
422
struct irq_cfg *cfg = &irq_cfg[irq];
423
struct irq_desc *desc = irq_to_desc(irq);
425
BUG_ON(irq >= nr_irqs || irq <0);
427
spin_lock_irqsave(&vector_lock, flags);
428
ret = __assign_irq_vector(irq, cfg, TARGET_CPUS);
431
cpus_copy(desc->affinity, cfg->domain);
433
spin_unlock_irqrestore(&vector_lock, flags);
438
* Initialize vector_irq on a new cpu. This function must be called
439
* with vector_lock held.
441
void __setup_vector_irq(int cpu)
446
/* Clear vector_irq */
447
for (vector = 0; vector < NR_VECTORS; ++vector)
448
per_cpu(vector_irq, cpu)[vector] = -1;
449
/* Mark the inuse vectors */
450
for (irq = 0; irq < nr_irqs; ++irq) {
452
if (!cpu_isset(cpu, cfg->domain))
454
vector = irq_to_vector(irq);
455
per_cpu(vector_irq, cpu)[vector] = irq;
459
void move_masked_irq(int irq)
461
struct irq_desc *desc = irq_to_desc(irq);
463
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
466
desc->status &= ~IRQ_MOVE_PENDING;
468
if (unlikely(cpus_empty(desc->pending_mask)))
471
if (!desc->handler->set_affinity)
475
* If there was a valid mask to work with, please
476
* do the disable, re-program, enable sequence.
477
* This is *not* particularly important for level triggered
478
* but in a edge trigger case, we might be setting rte
479
* when an active trigger is comming in. This could
480
* cause some ioapics to mal-function.
481
* Being paranoid i guess!
483
* For correct operation this depends on the caller
486
if (likely(cpus_intersects(desc->pending_mask, cpu_online_map)))
487
desc->handler->set_affinity(irq, desc->pending_mask);
489
cpus_clear(desc->pending_mask);
492
void move_native_irq(int irq)
494
struct irq_desc *desc = irq_to_desc(irq);
496
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
499
if (unlikely(desc->status & IRQ_DISABLED))
502
desc->handler->disable(irq);
503
move_masked_irq(irq);
504
desc->handler->enable(irq);
507
/* For re-setting irq interrupt affinity for specific irq */
508
void irq_set_affinity(int irq, cpumask_t mask)
510
struct irq_desc *desc = irq_to_desc(irq);
512
if (!desc->handler->set_affinity)
515
ASSERT(spin_is_locked(&desc->lock));
516
desc->status |= IRQ_MOVE_PENDING;
517
cpus_copy(desc->pending_mask, mask);
520
DEFINE_PER_CPU(unsigned int, irq_count);
522
asmlinkage void do_IRQ(struct cpu_user_regs *regs)
524
struct irqaction *action;
526
struct irq_desc *desc;
527
unsigned int vector = regs->entry_vector;
528
int irq = __get_cpu_var(vector_irq[vector]);
529
struct cpu_user_regs *old_regs = set_irq_regs(regs);
533
this_cpu(irq_count)++;
537
printk("%s: %d.%d No irq handler for vector (irq %d)\n",
538
__func__, smp_processor_id(), vector, irq);
539
set_irq_regs(old_regs);
543
desc = irq_to_desc(irq);
545
spin_lock(&desc->lock);
546
desc->handler->ack(irq);
548
if ( likely(desc->status & IRQ_GUEST) )
550
if ( irq_ratelimit_timer.function && /* irq rate limiting enabled? */
551
unlikely(desc->rl_cnt++ >= irq_ratelimit_threshold) )
553
s_time_t now = NOW();
554
if ( now < (desc->rl_quantum_start + MILLISECS(10)) )
556
desc->handler->disable(irq);
558
* If handler->disable doesn't actually mask the interrupt, a
559
* disabled irq still can fire. This check also avoids possible
560
* deadlocks if ratelimit_timer_fn runs at the same time.
562
if ( likely(list_empty(&desc->rl_link)) )
564
spin_lock(&irq_ratelimit_lock);
565
if ( list_empty(&irq_ratelimit_list) )
566
set_timer(&irq_ratelimit_timer, now + MILLISECS(10));
567
list_add(&desc->rl_link, &irq_ratelimit_list);
568
spin_unlock(&irq_ratelimit_lock);
573
desc->rl_quantum_start = now;
577
tsc_in = tb_init_done ? get_cycles() : 0;
579
TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles());
581
spin_unlock(&desc->lock);
582
set_irq_regs(old_regs);
586
desc->status &= ~IRQ_REPLAY;
587
desc->status |= IRQ_PENDING;
590
* Since we set PENDING, if another processor is handling a different
591
* instance of this same irq, the other processor will take care of it.
593
if ( desc->status & (IRQ_DISABLED | IRQ_INPROGRESS) )
596
desc->status |= IRQ_INPROGRESS;
598
action = desc->action;
599
while ( desc->status & IRQ_PENDING )
601
desc->status &= ~IRQ_PENDING;
603
spin_unlock_irq(&desc->lock);
604
tsc_in = tb_init_done ? get_cycles() : 0;
605
action->handler(irq, action->dev_id, regs);
606
TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles());
607
spin_lock_irq(&desc->lock);
611
desc->status &= ~IRQ_INPROGRESS;
614
desc->handler->end(irq);
615
spin_unlock(&desc->lock);
616
set_irq_regs(old_regs);
619
static void irq_ratelimit_timer_fn(void *data)
621
struct irq_desc *desc, *tmp;
624
spin_lock_irqsave(&irq_ratelimit_lock, flags);
626
list_for_each_entry_safe ( desc, tmp, &irq_ratelimit_list, rl_link )
628
spin_lock(&desc->lock);
629
desc->handler->enable(desc->irq);
630
list_del(&desc->rl_link);
631
INIT_LIST_HEAD(&desc->rl_link);
632
spin_unlock(&desc->lock);
635
spin_unlock_irqrestore(&irq_ratelimit_lock, flags);
638
static int __init irq_ratelimit_init(void)
640
if ( irq_ratelimit_threshold )
641
init_timer(&irq_ratelimit_timer, irq_ratelimit_timer_fn, NULL, 0);
644
__initcall(irq_ratelimit_init);
646
int request_irq(unsigned int irq,
647
void (*handler)(int, void *, struct cpu_user_regs *),
648
unsigned long irqflags, const char * devname, void *dev_id)
650
struct irqaction * action;
654
* Sanity-check: shared interrupts must pass in a real dev-ID,
655
* otherwise we'll have trouble later trying to figure out
656
* which interrupt is which (messes up the interrupt freeing
664
action = xmalloc(struct irqaction);
668
action->handler = handler;
669
action->name = devname;
670
action->dev_id = dev_id;
671
action->free_on_release = 1;
673
retval = setup_irq(irq, action);
680
void release_irq(unsigned int irq)
682
struct irq_desc *desc;
684
struct irqaction *action;
686
desc = irq_to_desc(irq);
688
spin_lock_irqsave(&desc->lock,flags);
689
action = desc->action;
692
desc->status |= IRQ_DISABLED;
693
desc->handler->shutdown(irq);
694
spin_unlock_irqrestore(&desc->lock,flags);
696
/* Wait to make sure it's not being used on another CPU */
697
do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
699
if (action && action->free_on_release)
703
int setup_irq(unsigned int irq, struct irqaction *new)
705
struct irq_desc *desc;
708
desc = irq_to_desc(irq);
710
spin_lock_irqsave(&desc->lock,flags);
712
if ( desc->action != NULL )
714
spin_unlock_irqrestore(&desc->lock,flags);
720
desc->status &= ~IRQ_DISABLED;
721
desc->handler->startup(irq);
723
spin_unlock_irqrestore(&desc->lock,flags);
730
* HANDLING OF GUEST-BOUND PHYSICAL IRQS
733
#define IRQ_MAX_GUESTS 7
739
#define ACKTYPE_NONE 0 /* No final acknowledgement is required */
740
#define ACKTYPE_UNMASK 1 /* Unmask PIC hardware (from any CPU) */
741
#define ACKTYPE_EOI 2 /* EOI on the CPU that was interrupted */
742
cpumask_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
743
struct domain *guest[IRQ_MAX_GUESTS];
744
} irq_guest_action_t;
747
* Stack of interrupts awaiting EOI on each CPU. These must be popped in
748
* order, as only the current highest-priority pending irq can be EOIed.
751
u32 ready:1; /* Ready for EOI now? */
752
u32 irq:23; /* irq of the vector */
753
u32 vector:8; /* vector awaiting EOI */
756
static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]);
757
#define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector)
759
static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
761
if ( d->arch.pirq_eoi_map )
762
set_bit(irq, d->arch.pirq_eoi_map);
765
static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
767
if ( d->arch.pirq_eoi_map )
768
clear_bit(irq, d->arch.pirq_eoi_map);
771
static void _irq_guest_eoi(struct irq_desc *desc)
773
irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
774
unsigned int i, irq = desc - irq_desc;
776
if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
779
for ( i = 0; i < action->nr_guests; ++i )
780
clear_pirq_eoi(action->guest[i],
781
domain_irq_to_pirq(action->guest[i], irq));
783
desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
784
desc->handler->enable(irq);
787
static void irq_guest_eoi_timer_fn(void *data)
789
struct irq_desc *desc = data;
792
spin_lock_irqsave(&desc->lock, flags);
793
_irq_guest_eoi(desc);
794
spin_unlock_irqrestore(&desc->lock, flags);
797
static void __do_IRQ_guest(int irq)
799
struct irq_desc *desc = irq_to_desc(irq);
800
irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
802
int i, sp, already_pending = 0;
803
struct pending_eoi *peoi = this_cpu(pending_eoi);
804
int vector = get_irq_regs()->entry_vector;
806
if ( unlikely(action->nr_guests == 0) )
808
/* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
809
ASSERT(action->ack_type == ACKTYPE_EOI);
810
ASSERT(desc->status & IRQ_DISABLED);
811
desc->handler->end(irq);
815
if ( action->ack_type == ACKTYPE_EOI )
817
sp = pending_eoi_sp(peoi);
818
ASSERT((sp == 0) || (peoi[sp-1].vector < vector));
819
ASSERT(sp < (NR_DYNAMIC_VECTORS-1));
821
peoi[sp].vector = vector;
823
pending_eoi_sp(peoi) = sp+1;
824
cpu_set(smp_processor_id(), action->cpu_eoi_map);
827
for ( i = 0; i < action->nr_guests; i++ )
830
d = action->guest[i];
831
pirq = domain_irq_to_pirq(d, irq);
832
if ( (action->ack_type != ACKTYPE_NONE) &&
833
!test_and_set_bit(pirq, d->pirq_mask) )
835
if ( hvm_do_IRQ_dpci(d, pirq) )
837
if ( action->ack_type == ACKTYPE_NONE )
839
already_pending += !!(desc->status & IRQ_INPROGRESS);
840
desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */
843
else if ( send_guest_pirq(d, pirq) &&
844
(action->ack_type == ACKTYPE_NONE) )
850
if ( already_pending == action->nr_guests )
852
stop_timer(&irq_guest_eoi_timer[irq]);
853
desc->handler->disable(irq);
854
desc->status |= IRQ_GUEST_EOI_PENDING;
855
for ( i = 0; i < already_pending; ++i )
857
d = action->guest[i];
858
set_pirq_eoi(d, domain_irq_to_pirq(d, irq));
860
* Could check here whether the guest unmasked the event by now
861
* (or perhaps just re-issue the send_guest_pirq()), and if it
862
* can now accept the event,
863
* - clear all the pirq_eoi bits we already set,
864
* - re-enable the vector, and
865
* - skip the timer setup below.
868
init_timer(&irq_guest_eoi_timer[irq],
869
irq_guest_eoi_timer_fn, desc, smp_processor_id());
870
set_timer(&irq_guest_eoi_timer[irq], NOW() + MILLISECS(1));
875
* Retrieve Xen irq-descriptor corresponding to a domain-specific irq.
876
* The descriptor is returned locked. This function is safe against changes
877
* to the per-domain irq-to-vector mapping.
879
struct irq_desc *domain_spin_lock_irq_desc(
880
struct domain *d, int pirq, unsigned long *pflags)
884
struct irq_desc *desc;
888
irq = domain_pirq_to_irq(d, pirq);
891
desc = irq_to_desc(irq);
892
spin_lock_irqsave(&desc->lock, flags);
893
if ( irq == domain_pirq_to_irq(d, pirq) )
895
spin_unlock_irqrestore(&desc->lock, flags);
898
if ( pflags != NULL )
903
/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
904
static void flush_ready_eoi(void)
906
struct pending_eoi *peoi = this_cpu(pending_eoi);
907
struct irq_desc *desc;
910
ASSERT(!local_irq_is_enabled());
912
sp = pending_eoi_sp(peoi);
914
while ( (--sp >= 0) && peoi[sp].ready )
918
desc = irq_to_desc(irq);
919
spin_lock(&desc->lock);
920
desc->handler->end(irq);
921
spin_unlock(&desc->lock);
924
pending_eoi_sp(peoi) = sp+1;
927
static void __set_eoi_ready(struct irq_desc *desc)
929
irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
930
struct pending_eoi *peoi = this_cpu(pending_eoi);
933
irq = desc - irq_desc;
935
if ( !(desc->status & IRQ_GUEST) ||
936
(action->in_flight != 0) ||
937
!cpu_test_and_clear(smp_processor_id(), action->cpu_eoi_map) )
940
sp = pending_eoi_sp(peoi);
944
} while ( peoi[--sp].irq != irq );
945
ASSERT(!peoi[sp].ready);
949
/* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */
950
static void set_eoi_ready(void *data)
952
struct irq_desc *desc = data;
954
ASSERT(!local_irq_is_enabled());
956
spin_lock(&desc->lock);
957
__set_eoi_ready(desc);
958
spin_unlock(&desc->lock);
963
static void __pirq_guest_eoi(struct domain *d, int pirq)
965
struct irq_desc *desc;
966
irq_guest_action_t *action;
967
cpumask_t cpu_eoi_map;
970
ASSERT(local_irq_is_enabled());
971
desc = domain_spin_lock_irq_desc(d, pirq, NULL);
975
action = (irq_guest_action_t *)desc->action;
976
irq = desc - irq_desc;
978
if ( action->ack_type == ACKTYPE_NONE )
980
ASSERT(!test_bit(pirq, d->pirq_mask));
981
stop_timer(&irq_guest_eoi_timer[irq]);
982
_irq_guest_eoi(desc);
985
if ( unlikely(!test_and_clear_bit(pirq, d->pirq_mask)) ||
986
unlikely(--action->in_flight != 0) )
988
spin_unlock_irq(&desc->lock);
992
if ( action->ack_type == ACKTYPE_UNMASK )
994
ASSERT(cpus_empty(action->cpu_eoi_map));
995
desc->handler->end(irq);
996
spin_unlock_irq(&desc->lock);
1000
ASSERT(action->ack_type == ACKTYPE_EOI);
1002
cpu_eoi_map = action->cpu_eoi_map;
1004
if ( cpu_test_and_clear(smp_processor_id(), cpu_eoi_map) )
1006
__set_eoi_ready(desc);
1007
spin_unlock(&desc->lock);
1013
spin_unlock_irq(&desc->lock);
1016
if ( !cpus_empty(cpu_eoi_map) )
1017
on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1020
int pirq_guest_eoi(struct domain *d, int irq)
1022
if ( (irq < 0) || (irq >= d->nr_pirqs) )
1025
__pirq_guest_eoi(d, irq);
1030
int pirq_guest_unmask(struct domain *d)
1032
unsigned int irq, nr = d->nr_pirqs;
1034
for ( irq = find_first_bit(d->pirq_mask, nr);
1036
irq = find_next_bit(d->pirq_mask, nr, irq+1) )
1038
if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d, evtchn_mask)) )
1039
__pirq_guest_eoi(d, irq);
1045
extern int ioapic_ack_new;
1046
static int pirq_acktype(struct domain *d, int pirq)
1048
struct irq_desc *desc;
1051
irq = domain_pirq_to_irq(d, pirq);
1053
return ACKTYPE_NONE;
1055
desc = irq_to_desc(irq);
1057
if ( desc->handler == &no_irq_type )
1058
return ACKTYPE_NONE;
1061
* Edge-triggered IO-APIC and LAPIC interrupts need no final
1062
* acknowledgement: we ACK early during interrupt processing.
1064
if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
1065
!strcmp(desc->handler->typename, "local-APIC-edge") )
1066
return ACKTYPE_NONE;
1069
* MSIs are treated as edge-triggered interrupts, except
1070
* when there is no proper way to mask them.
1072
if ( desc->handler == &pci_msi_type )
1073
return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
1076
* Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
1077
* on which they were received. This is because we tickle the LAPIC to EOI.
1079
if ( !strcmp(desc->handler->typename, "IO-APIC-level") )
1080
return ioapic_ack_new ? ACKTYPE_EOI : ACKTYPE_UNMASK;
1082
/* Legacy PIC interrupts can be acknowledged from any CPU. */
1083
if ( !strcmp(desc->handler->typename, "XT-PIC") )
1084
return ACKTYPE_UNMASK;
1086
printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq);
1092
int pirq_shared(struct domain *d, int pirq)
1094
struct irq_desc *desc;
1095
irq_guest_action_t *action;
1096
unsigned long flags;
1099
desc = domain_spin_lock_irq_desc(d, pirq, &flags);
1103
action = (irq_guest_action_t *)desc->action;
1104
shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1));
1106
spin_unlock_irqrestore(&desc->lock, flags);
1111
int pirq_guest_bind(struct vcpu *v, int pirq, int will_share)
1114
struct irq_desc *desc;
1115
irq_guest_action_t *action, *newaction = NULL;
1117
cpumask_t cpumask = CPU_MASK_NONE;
1119
WARN_ON(!spin_is_locked(&v->domain->event_lock));
1120
BUG_ON(!local_irq_is_enabled());
1123
desc = domain_spin_lock_irq_desc(v->domain, pirq, NULL);
1130
action = (irq_guest_action_t *)desc->action;
1131
irq = desc - irq_desc;
1133
if ( !(desc->status & IRQ_GUEST) )
1135
if ( desc->action != NULL )
1137
gdprintk(XENLOG_INFO,
1138
"Cannot bind IRQ %d to guest. In use by '%s'.\n",
1139
pirq, desc->action->name);
1144
if ( newaction == NULL )
1146
spin_unlock_irq(&desc->lock);
1147
if ( (newaction = xmalloc(irq_guest_action_t)) != NULL )
1149
gdprintk(XENLOG_INFO,
1150
"Cannot bind IRQ %d to guest. Out of memory.\n",
1157
desc->action = (struct irqaction *)action;
1160
action->nr_guests = 0;
1161
action->in_flight = 0;
1162
action->shareable = will_share;
1163
action->ack_type = pirq_acktype(v->domain, pirq);
1164
cpus_clear(action->cpu_eoi_map);
1167
desc->status |= IRQ_GUEST;
1168
desc->status &= ~IRQ_DISABLED;
1169
desc->handler->startup(irq);
1171
/* Attempt to bind the interrupt target to the correct CPU. */
1172
cpu_set(v->processor, cpumask);
1173
if ( !opt_noirqbalance && (desc->handler->set_affinity != NULL) )
1174
desc->handler->set_affinity(irq, cpumask);
1176
else if ( !will_share || !action->shareable )
1178
gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. %s.\n",
1181
"Others do not share" :
1182
"Will not share with others");
1186
else if ( action->nr_guests == 0 )
1189
* Indicates that an ACKTYPE_EOI interrupt is being released.
1190
* Wait for that to happen before continuing.
1192
ASSERT(action->ack_type == ACKTYPE_EOI);
1193
ASSERT(desc->status & IRQ_DISABLED);
1194
spin_unlock_irq(&desc->lock);
1199
if ( action->nr_guests == IRQ_MAX_GUESTS )
1201
gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
1202
"Already at max share.\n", pirq);
1207
action->guest[action->nr_guests++] = v->domain;
1209
if ( action->ack_type != ACKTYPE_NONE )
1210
set_pirq_eoi(v->domain, pirq);
1212
clear_pirq_eoi(v->domain, pirq);
1215
spin_unlock_irq(&desc->lock);
1217
if ( newaction != NULL )
1222
static irq_guest_action_t *__pirq_guest_unbind(
1223
struct domain *d, int pirq, struct irq_desc *desc)
1226
irq_guest_action_t *action;
1227
cpumask_t cpu_eoi_map;
1230
BUG_ON(!(desc->status & IRQ_GUEST));
1232
action = (irq_guest_action_t *)desc->action;
1233
irq = desc - irq_desc;
1235
if ( unlikely(action == NULL) )
1237
dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1238
d->domain_id, pirq);
1242
for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1244
BUG_ON(i == action->nr_guests);
1245
memmove(&action->guest[i], &action->guest[i+1],
1246
(action->nr_guests-i-1) * sizeof(action->guest[0]));
1247
action->nr_guests--;
1249
switch ( action->ack_type )
1251
case ACKTYPE_UNMASK:
1252
if ( test_and_clear_bit(pirq, d->pirq_mask) &&
1253
(--action->in_flight == 0) )
1254
desc->handler->end(irq);
1257
/* NB. If #guests == 0 then we clear the eoi_map later on. */
1258
if ( test_and_clear_bit(pirq, d->pirq_mask) &&
1259
(--action->in_flight == 0) &&
1260
(action->nr_guests != 0) )
1262
cpu_eoi_map = action->cpu_eoi_map;
1263
spin_unlock_irq(&desc->lock);
1264
on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1265
spin_lock_irq(&desc->lock);
1269
stop_timer(&irq_guest_eoi_timer[irq]);
1270
_irq_guest_eoi(desc);
1275
* The guest cannot re-bind to this IRQ until this function returns. So,
1276
* when we have flushed this IRQ from pirq_mask, it should remain flushed.
1278
BUG_ON(test_bit(pirq, d->pirq_mask));
1280
if ( action->nr_guests != 0 )
1283
BUG_ON(action->in_flight != 0);
1285
/* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
1287
desc->status |= IRQ_DISABLED;
1288
desc->handler->disable(irq);
1291
* Mark any remaining pending EOIs as ready to flush.
1292
* NOTE: We will need to make this a stronger barrier if in future we allow
1293
* an interrupt vectors to be re-bound to a different PIC. In that case we
1294
* would need to flush all ready EOIs before returning as otherwise the
1295
* desc->handler could change and we would call the wrong 'end' hook.
1297
cpu_eoi_map = action->cpu_eoi_map;
1298
if ( !cpus_empty(cpu_eoi_map) )
1300
BUG_ON(action->ack_type != ACKTYPE_EOI);
1301
spin_unlock_irq(&desc->lock);
1302
on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1);
1303
spin_lock_irq(&desc->lock);
1306
BUG_ON(!cpus_empty(action->cpu_eoi_map));
1308
desc->action = NULL;
1309
desc->status &= ~IRQ_GUEST;
1310
desc->status &= ~IRQ_INPROGRESS;
1311
kill_timer(&irq_guest_eoi_timer[irq]);
1312
desc->handler->shutdown(irq);
1314
/* Caller frees the old guest descriptor block. */
1318
void pirq_guest_unbind(struct domain *d, int pirq)
1320
irq_guest_action_t *oldaction = NULL;
1321
struct irq_desc *desc;
1324
WARN_ON(!spin_is_locked(&d->event_lock));
1326
BUG_ON(!local_irq_is_enabled());
1327
desc = domain_spin_lock_irq_desc(d, pirq, NULL);
1331
irq = -domain_pirq_to_irq(d, pirq);
1333
desc = irq_to_desc(irq);
1334
spin_lock_irq(&desc->lock);
1335
d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0;
1339
oldaction = __pirq_guest_unbind(d, pirq, desc);
1342
spin_unlock_irq(&desc->lock);
1344
if ( oldaction != NULL )
1348
static int pirq_guest_force_unbind(struct domain *d, int irq)
1350
struct irq_desc *desc;
1351
irq_guest_action_t *action, *oldaction = NULL;
1354
WARN_ON(!spin_is_locked(&d->event_lock));
1356
BUG_ON(!local_irq_is_enabled());
1357
desc = domain_spin_lock_irq_desc(d, irq, NULL);
1358
BUG_ON(desc == NULL);
1360
if ( !(desc->status & IRQ_GUEST) )
1363
action = (irq_guest_action_t *)desc->action;
1364
if ( unlikely(action == NULL) )
1366
dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1371
for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1373
if ( i == action->nr_guests )
1377
oldaction = __pirq_guest_unbind(d, irq, desc);
1380
spin_unlock_irq(&desc->lock);
1382
if ( oldaction != NULL )
1388
int get_free_pirq(struct domain *d, int type, int index)
1392
ASSERT(spin_is_locked(&d->event_lock));
1394
if ( type == MAP_PIRQ_TYPE_GSI )
1396
for ( i = 16; i < nr_irqs_gsi; i++ )
1397
if ( !d->arch.pirq_irq[i] )
1399
if ( i == nr_irqs_gsi )
1404
for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
1405
if ( !d->arch.pirq_irq[i] )
1407
if ( i < nr_irqs_gsi )
1414
int map_domain_pirq(
1415
struct domain *d, int pirq, int irq, int type, void *data)
1418
int old_irq, old_pirq;
1419
struct irq_desc *desc;
1420
unsigned long flags;
1421
struct msi_desc *msi_desc;
1422
struct pci_dev *pdev = NULL;
1424
ASSERT(spin_is_locked(&pcidevs_lock));
1425
ASSERT(spin_is_locked(&d->event_lock));
1427
if ( !IS_PRIV(current->domain) &&
1428
!(IS_PRIV_FOR(current->domain, d) &&
1429
irq_access_permitted(current->domain, pirq)))
1432
if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs )
1434
dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n",
1435
d->domain_id, pirq, irq);
1439
old_irq = domain_pirq_to_irq(d, pirq);
1440
old_pirq = domain_irq_to_pirq(d, irq);
1442
if ( (old_irq && (old_irq != irq) ) ||
1443
(old_pirq && (old_pirq != pirq)) )
1445
dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n",
1446
d->domain_id, pirq, irq);
1450
ret = irq_permit_access(d, pirq);
1453
dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n",
1454
d->domain_id, pirq);
1458
desc = irq_to_desc(irq);
1460
if ( type == MAP_PIRQ_TYPE_MSI )
1462
struct msi_info *msi = (struct msi_info *)data;
1465
if ( !cpu_has_apic )
1468
pdev = pci_get_pdev(msi->bus, msi->devfn);
1469
ret = pci_enable_msi(msi, &msi_desc);
1473
spin_lock_irqsave(&desc->lock, flags);
1475
if ( desc->handler != &no_irq_type )
1476
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
1478
desc->handler = &pci_msi_type;
1479
d->arch.pirq_irq[pirq] = irq;
1480
d->arch.irq_pirq[irq] = pirq;
1481
setup_msi_irq(pdev, msi_desc, irq);
1482
spin_unlock_irqrestore(&desc->lock, flags);
1485
spin_lock_irqsave(&desc->lock, flags);
1486
d->arch.pirq_irq[pirq] = irq;
1487
d->arch.irq_pirq[irq] = pirq;
1488
spin_unlock_irqrestore(&desc->lock, flags);
1495
/* The pirq should have been unbound before this call. */
1496
int unmap_domain_pirq(struct domain *d, int pirq)
1498
unsigned long flags;
1499
struct irq_desc *desc;
1501
bool_t forced_unbind;
1502
struct msi_desc *msi_desc = NULL;
1504
if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
1507
if ( !IS_PRIV_FOR(current->domain, d) )
1510
ASSERT(spin_is_locked(&pcidevs_lock));
1511
ASSERT(spin_is_locked(&d->event_lock));
1513
irq = domain_pirq_to_irq(d, pirq);
1516
dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
1517
d->domain_id, pirq);
1522
forced_unbind = pirq_guest_force_unbind(d, pirq);
1523
if ( forced_unbind )
1524
dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
1525
d->domain_id, pirq);
1527
desc = irq_to_desc(irq);
1529
if ( (msi_desc = desc->msi_desc) != NULL )
1530
pci_disable_msi(msi_desc);
1532
spin_lock_irqsave(&desc->lock, flags);
1534
BUG_ON(irq != domain_pirq_to_irq(d, pirq));
1536
if ( !forced_unbind )
1538
d->arch.pirq_irq[pirq] = 0;
1539
d->arch.irq_pirq[irq] = 0;
1543
d->arch.pirq_irq[pirq] = -irq;
1544
d->arch.irq_pirq[irq] = -pirq;
1547
spin_unlock_irqrestore(&desc->lock, flags);
1549
msi_free_irq(msi_desc);
1551
ret = irq_deny_access(d, pirq);
1553
dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n",
1554
d->domain_id, pirq);
1556
if ( desc->handler == &pci_msi_type )
1557
desc->handler = &no_irq_type;
1563
void free_domain_pirqs(struct domain *d)
1567
spin_lock(&pcidevs_lock);
1568
spin_lock(&d->event_lock);
1570
for ( i = 0; i < d->nr_pirqs; i++ )
1571
if ( d->arch.pirq_irq[i] > 0 )
1572
unmap_domain_pirq(d, i);
1574
spin_unlock(&d->event_lock);
1575
spin_unlock(&pcidevs_lock);
1578
extern void dump_ioapic_irq_info(void);
1580
static void dump_irqs(unsigned char key)
1583
struct irq_desc *desc;
1584
struct irq_cfg *cfg;
1585
irq_guest_action_t *action;
1587
unsigned long flags;
1589
printk("Guest interrupt information:\n");
1591
for ( irq = 0; irq < nr_irqs; irq++ )
1594
desc = irq_to_desc(irq);
1595
cfg = desc->chip_data;
1597
if ( !desc->handler || desc->handler == &no_irq_type )
1600
spin_lock_irqsave(&desc->lock, flags);
1602
cpumask_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch),
1604
printk(" IRQ:%4d affinity:%s vec:%02x type=%-15s"
1606
irq, keyhandler_scratch, cfg->vector,
1607
desc->handler->typename, desc->status);
1609
if ( !(desc->status & IRQ_GUEST) )
1610
printk("mapped, unbound\n");
1613
action = (irq_guest_action_t *)desc->action;
1615
printk("in-flight=%d domain-list=", action->in_flight);
1617
for ( i = 0; i < action->nr_guests; i++ )
1619
d = action->guest[i];
1620
pirq = domain_irq_to_pirq(d, irq);
1621
printk("%u:%3d(%c%c%c%c)",
1623
(test_bit(d->pirq_to_evtchn[pirq],
1624
&shared_info(d, evtchn_pending)) ?
1626
(test_bit(d->pirq_to_evtchn[pirq] /
1627
BITS_PER_EVTCHN_WORD(d),
1628
&vcpu_info(d->vcpu[0], evtchn_pending_sel)) ?
1630
(test_bit(d->pirq_to_evtchn[pirq],
1631
&shared_info(d, evtchn_mask)) ?
1633
(test_bit(pirq, d->pirq_mask) ?
1635
if ( i != action->nr_guests )
1642
spin_unlock_irqrestore(&desc->lock, flags);
1645
dump_ioapic_irq_info();
1648
static struct keyhandler dump_irqs_keyhandler = {
1651
.desc = "dump interrupt bindings"
1654
static int __init setup_dump_irqs(void)
1656
register_keyhandler('i', &dump_irqs_keyhandler);
1659
__initcall(setup_dump_irqs);
1661
/* A cpu has been removed from cpu_online_mask. Re-set irq affinities. */
1662
void fixup_irqs(void)
1664
unsigned int irq, sp;
1666
struct irq_desc *desc;
1667
irq_guest_action_t *action;
1668
struct pending_eoi *peoi;
1670
for ( irq = 0; irq < nr_irqs; irq++ )
1672
int break_affinity = 0;
1673
int set_affinity = 1;
1679
desc = irq_to_desc(irq);
1681
spin_lock(&desc->lock);
1683
affinity = desc->affinity;
1684
if ( !desc->action || cpus_equal(affinity, cpu_online_map) )
1686
spin_unlock(&desc->lock);
1690
cpus_and(affinity, affinity, cpu_online_map);
1691
if ( any_online_cpu(affinity) == NR_CPUS )
1694
affinity = cpu_online_map;
1697
if ( desc->handler->disable )
1698
desc->handler->disable(irq);
1700
if ( desc->handler->set_affinity )
1701
desc->handler->set_affinity(irq, affinity);
1702
else if ( !(warned++) )
1705
if ( desc->handler->enable )
1706
desc->handler->enable(irq);
1708
spin_unlock(&desc->lock);
1710
if ( break_affinity && set_affinity )
1711
printk("Broke affinity for irq %i\n", irq);
1712
else if ( !set_affinity )
1713
printk("Cannot set affinity for irq %i\n", irq);
1716
/* That doesn't seem sufficient. Give it 1ms. */
1719
local_irq_disable();
1721
/* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
1722
for ( irq = 0; irq < nr_irqs; irq++ )
1724
desc = irq_to_desc(irq);
1725
if ( !(desc->status & IRQ_GUEST) )
1727
action = (irq_guest_action_t *)desc->action;
1728
cpu_clear(smp_processor_id(), action->cpu_eoi_map);
1731
/* Flush the interrupt EOI stack. */
1732
peoi = this_cpu(pending_eoi);
1733
for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )