1
/******************************************************************************
4
* Virtual network driver for XenoLinux.
6
* Copyright (c) 2002-2003, K A Fraser
9
#include <linux/config.h>
10
#include <linux/module.h>
12
#include <linux/kernel.h>
13
#include <linux/sched.h>
14
#include <linux/slab.h>
15
#include <linux/string.h>
16
#include <linux/errno.h>
18
#include <linux/netdevice.h>
19
#include <linux/inetdevice.h>
20
#include <linux/etherdevice.h>
21
#include <linux/skbuff.h>
22
#include <linux/init.h>
26
#include <net/pkt_sched.h>
28
#define NET_IRQ _EVENT_NET
30
#define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
31
#define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
33
#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1))
34
#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1))
35
#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
36
#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
38
#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
40
static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
41
static void network_tx_buf_gc(struct net_device *dev);
42
static void network_alloc_rx_buffers(struct net_device *dev);
43
static void cleanup_module(void);
45
static struct list_head dev_list;
49
struct list_head list;
50
struct net_device *dev;
52
struct net_device_stats stats;
54
unsigned int rx_resp_cons, tx_resp_cons, tx_full;
55
unsigned int net_ring_fixmap_idx;
59
unsigned int idx; /* Domain-specific index of this VIF. */
61
unsigned int rx_bufs_to_notify;
63
#define STATE_ACTIVE 0
64
#define STATE_SUSPENDED 1
65
#define STATE_CLOSED 2
69
* {tx,rx}_skbs store outstanding skbuffs. The first entry in each
70
* array is an index into a chain of free entries.
72
struct sk_buff *tx_skbs[TX_RING_SIZE];
73
struct sk_buff *rx_skbs[RX_RING_SIZE];
76
/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
77
#define ADD_ID_TO_FREELIST(_list, _id) \
78
(_list)[(_id)] = (_list)[0]; \
79
(_list)[0] = (void *)(unsigned long)(_id);
80
#define GET_ID_FROM_FREELIST(_list) \
81
({ unsigned long _id = (unsigned long)(_list)[0]; \
82
(_list)[0] = (_list)[_id]; \
86
static void _dbg_network_int(struct net_device *dev)
88
struct net_private *np = dev->priv;
90
if ( np->state == STATE_CLOSED )
93
printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
94
" tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
95
np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons,
96
np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
97
np->net_idx->tx_event,
98
test_bit(__LINK_STATE_XOFF, &dev->state));
99
printk(KERN_ALERT "rx_resp_cons = %d,"
100
" rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n",
101
np->rx_resp_cons, np->net_idx->rx_req_prod,
102
np->net_idx->rx_resp_prod, np->net_idx->rx_event);
106
static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
108
struct list_head *ent;
109
struct net_private *np;
110
list_for_each ( ent, &dev_list )
112
np = list_entry(ent, struct net_private, list);
113
_dbg_network_int(np->dev);
118
static int network_open(struct net_device *dev)
120
struct net_private *np = dev->priv;
124
netop.cmd = NETOP_RESET_RINGS;
126
if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
128
printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
132
netop.cmd = NETOP_GET_VIF_INFO;
134
if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
136
printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
140
memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
142
set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx,
143
netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
144
np->net_ring = (net_ring_t *)fix_to_virt(
145
FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
146
np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx];
148
np->rx_bufs_to_notify = 0;
149
np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
150
memset(&np->stats, 0, sizeof(np->stats));
151
spin_lock_init(&np->tx_lock);
152
atomic_set(&np->tx_entries, 0);
153
memset(np->net_ring, 0, sizeof(*np->net_ring));
154
memset(np->net_idx, 0, sizeof(*np->net_idx));
156
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
157
for ( i = 0; i < TX_RING_SIZE; i++ )
158
np->tx_skbs[i] = (void *)(i+1);
159
for ( i = 0; i < RX_RING_SIZE; i++ )
160
np->rx_skbs[i] = (void *)(i+1);
163
np->state = STATE_ACTIVE;
165
network_alloc_rx_buffers(dev);
167
netif_start_queue(dev);
175
static void network_tx_buf_gc(struct net_device *dev)
178
struct net_private *np = dev->priv;
181
tx_entry_t *tx_ring = np->net_ring->tx_ring;
184
prod = np->net_idx->tx_resp_prod;
186
for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
188
skb = np->tx_skbs[tx_ring[i].resp.id];
189
ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id);
190
dev_kfree_skb_any(skb);
191
atomic_dec(&np->tx_entries);
194
np->tx_resp_cons = prod;
196
/* Set a new event, then check for race with update of tx_cons. */
197
np->net_idx->tx_event =
198
TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
201
while ( prod != np->net_idx->tx_resp_prod );
203
if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
206
if ( np->state == STATE_ACTIVE )
207
netif_wake_queue(dev);
212
static inline pte_t *get_ppte(void *addr)
214
pgd_t *pgd; pmd_t *pmd; pte_t *pte;
215
pgd = pgd_offset_k( (unsigned long)addr);
216
pmd = pmd_offset(pgd, (unsigned long)addr);
217
pte = pte_offset(pmd, (unsigned long)addr);
222
static void network_alloc_rx_buffers(struct net_device *dev)
225
struct net_private *np = dev->priv;
227
unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
230
if ( ((i = np->net_idx->rx_req_prod) == end) ||
231
(np->state != STATE_ACTIVE) )
235
skb = dev_alloc_skb(RX_BUF_SIZE);
236
if ( skb == NULL ) break;
239
if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
240
panic("alloc_skb needs to provide us page-aligned buffers.");
242
id = GET_ID_FROM_FREELIST(np->rx_skbs);
243
np->rx_skbs[id] = skb;
245
np->net_ring->rx_ring[i].req.id = (unsigned short)id;
246
np->net_ring->rx_ring[i].req.addr =
247
virt_to_machine(get_ppte(skb->head));
249
np->rx_bufs_to_notify++;
251
while ( (i = RX_RING_INC(i)) != end );
254
* We may have allocated buffers which have entries outstanding in the page
255
* update queue -- make sure we flush those first!
257
flush_page_update_queue();
259
np->net_idx->rx_req_prod = i;
260
np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons);
262
/* Batch Xen notifications. */
263
if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) )
265
netop.cmd = NETOP_PUSH_BUFFERS;
267
(void)HYPERVISOR_net_io_op(&netop);
268
np->rx_bufs_to_notify = 0;
273
static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
276
struct net_private *np = (struct net_private *)dev->priv;
281
printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
282
netif_stop_queue(dev);
286
if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
288
struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
289
if ( new_skb == NULL ) return 1;
290
skb_put(new_skb, skb->len);
291
memcpy(new_skb->data, skb->data, skb->len);
296
spin_lock_irq(&np->tx_lock);
298
i = np->net_idx->tx_req_prod;
300
id = GET_ID_FROM_FREELIST(np->tx_skbs);
301
np->tx_skbs[id] = skb;
303
np->net_ring->tx_ring[i].req.id = (unsigned short)id;
304
np->net_ring->tx_ring[i].req.addr =
305
phys_to_machine(virt_to_phys(skb->data));
306
np->net_ring->tx_ring[i].req.size = skb->len;
307
np->net_idx->tx_req_prod = TX_RING_INC(i);
308
atomic_inc(&np->tx_entries);
310
network_tx_buf_gc(dev);
312
if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
315
netif_stop_queue(dev);
318
spin_unlock_irq(&np->tx_lock);
320
np->stats.tx_bytes += skb->len;
321
np->stats.tx_packets++;
323
/* Only notify Xen if there are no outstanding responses. */
325
if ( np->net_idx->tx_resp_prod == i )
327
netop.cmd = NETOP_PUSH_BUFFERS;
329
(void)HYPERVISOR_net_io_op(&netop);
336
static inline void _network_interrupt(struct net_device *dev)
338
struct net_private *np = dev->priv;
344
if ( np->state == STATE_CLOSED )
347
spin_lock_irqsave(&np->tx_lock, flags);
348
network_tx_buf_gc(dev);
349
spin_unlock_irqrestore(&np->tx_lock, flags);
352
for ( i = np->rx_resp_cons;
353
i != np->net_idx->rx_resp_prod;
356
rx = &np->net_ring->rx_ring[i].resp;
358
skb = np->rx_skbs[rx->id];
359
ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
361
if ( rx->status != RING_STATUS_OK )
363
/* Gate this error. We get a (valid) slew of them on suspend. */
364
if ( np->state == STATE_ACTIVE )
365
printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
366
dev_kfree_skb_any(skb);
371
* Set up shinfo -- from alloc_skb This was particularily nasty: the
372
* shared info is hidden at the back of the data area (presumably so it
373
* can be shared), but on page flip it gets very spunked.
375
atomic_set(&(skb_shinfo(skb)->dataref), 1);
376
skb_shinfo(skb)->nr_frags = 0;
377
skb_shinfo(skb)->frag_list = NULL;
379
phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
380
(*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
382
skb->data = skb->tail = skb->head + rx->offset;
383
skb_put(skb, rx->size);
384
skb->protocol = eth_type_trans(skb, dev);
386
np->stats.rx_packets++;
388
np->stats.rx_bytes += rx->size;
390
dev->last_rx = jiffies;
393
np->rx_resp_cons = i;
395
network_alloc_rx_buffers(dev);
397
/* Deal with hypervisor racing our resetting of rx_event. */
399
if ( np->net_idx->rx_resp_prod != i ) goto again;
403
static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
405
struct list_head *ent;
406
struct net_private *np;
407
list_for_each ( ent, &dev_list )
409
np = list_entry(ent, struct net_private, list);
410
_network_interrupt(np->dev);
415
int network_close(struct net_device *dev)
417
struct net_private *np = dev->priv;
420
np->state = STATE_SUSPENDED;
423
netif_stop_queue(np->dev);
425
netop.cmd = NETOP_FLUSH_BUFFERS;
427
(void)HYPERVISOR_net_io_op(&netop);
429
while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
430
(np->tx_resp_cons != np->net_idx->tx_req_prod) )
433
current->state = TASK_INTERRUPTIBLE;
438
np->state = STATE_CLOSED;
441
/* Now no longer safe to take interrupts for this device. */
442
clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
450
static struct net_device_stats *network_get_stats(struct net_device *dev)
452
struct net_private *np = (struct net_private *)dev->priv;
458
* This notifier is installed for domain 0 only.
459
* All other domains have VFR rules installed on their behalf by domain 0
460
* when they are created. For bootstrap, Xen creates wildcard rules for
461
* domain 0 -- this notifier is used to detect when we find our proper
462
* IP address, so we can poke down proper rules and remove the wildcards.
464
static int inetdev_notify(struct notifier_block *this,
468
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
469
struct net_device *dev = ifa->ifa_dev->dev;
470
struct list_head *ent;
471
struct net_private *np;
475
list_for_each ( ent, &dev_list )
477
np = list_entry(dev_list.next, struct net_private, list);
478
if ( np->dev == dev )
485
memset(&op, 0, sizeof(op));
486
op.u.net_rule.proto = NETWORK_PROTO_ANY;
487
op.u.net_rule.action = NETWORK_ACTION_ACCEPT;
489
if ( event == NETDEV_UP )
490
op.cmd = NETWORK_OP_ADDRULE;
491
else if ( event == NETDEV_DOWN )
492
op.cmd = NETWORK_OP_DELETERULE;
496
op.u.net_rule.src_vif = idx;
497
op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE;
498
op.u.net_rule.src_addr = ntohl(ifa->ifa_address);
499
op.u.net_rule.src_addr_mask = ~0UL;
500
op.u.net_rule.dst_addr = 0;
501
op.u.net_rule.dst_addr_mask = 0;
502
(void)HYPERVISOR_network_op(&op);
504
op.u.net_rule.src_vif = VIF_ANY_INTERFACE;
505
op.u.net_rule.dst_vif = idx;
506
op.u.net_rule.src_addr = 0;
507
op.u.net_rule.src_addr_mask = 0;
508
op.u.net_rule.dst_addr = ntohl(ifa->ifa_address);
509
op.u.net_rule.dst_addr_mask = ~0UL;
510
(void)HYPERVISOR_network_op(&op);
516
static struct notifier_block notifier_inetdev = {
517
.notifier_call = inetdev_notify,
523
int __init init_module(void)
525
int i, fixmap_idx=-1, err;
526
struct net_device *dev;
527
struct net_private *np;
530
INIT_LIST_HEAD(&dev_list);
533
* Domain 0 must poke its own network rules as it discovers its IP
534
* addresses. All other domains have a privileged "parent" to do this for
535
* them at start of day.
537
if ( start_info.dom_id == 0 )
538
(void)register_inetaddr_notifier(¬ifier_inetdev);
540
err = request_irq(NET_IRQ, network_interrupt,
541
SA_SAMPLE_RANDOM, "network", NULL);
544
printk(KERN_WARNING "Could not allocate network interrupt\n");
548
err = request_irq(_EVENT_DEBUG, dbg_network_int, 0, "debug", NULL);
550
printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
552
for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
554
/* If the VIF is invalid then the query hypercall will fail. */
555
netop.cmd = NETOP_GET_VIF_INFO;
557
if ( HYPERVISOR_net_io_op(&netop) != 0 )
560
/* We actually only support up to 4 vifs right now. */
561
if ( ++fixmap_idx == 4 )
564
dev = alloc_etherdev(sizeof(struct net_private));
572
np->state = STATE_CLOSED;
573
np->net_ring_fixmap_idx = fixmap_idx;
576
SET_MODULE_OWNER(dev);
577
dev->open = network_open;
578
dev->hard_start_xmit = network_start_xmit;
579
dev->stop = network_close;
580
dev->get_stats = network_get_stats;
582
memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
584
if ( (err = register_netdev(dev)) != 0 )
591
list_add(&np->list, &dev_list);
602
static void cleanup_module(void)
604
struct net_private *np;
605
struct net_device *dev;
607
while ( !list_empty(&dev_list) )
609
np = list_entry(dev_list.next, struct net_private, list);
612
unregister_netdev(dev);
616
if ( start_info.dom_id == 0 )
617
(void)unregister_inetaddr_notifier(¬ifier_inetdev);
621
module_init(init_module);
622
module_exit(cleanup_module);