1
/*******************************************************************************
3
Intel(R) Gigabit Ethernet Linux driver
4
Copyright(c) 2007-2011 Intel Corporation.
6
This program is free software; you can redistribute it and/or modify it
7
under the terms and conditions of the GNU General Public License,
8
version 2, as published by the Free Software Foundation.
10
This program is distributed in the hope it will be useful, but WITHOUT
11
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15
You should have received a copy of the GNU General Public License along with
16
this program; if not, write to the Free Software Foundation, Inc.,
17
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19
The full GNU General Public License is included in this distribution in
20
the file called "COPYING".
23
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26
*******************************************************************************/
28
#include <linux/module.h>
29
#include <linux/types.h>
30
#include <linux/init.h>
31
#include <linux/bitops.h>
32
#include <linux/vmalloc.h>
33
#include <linux/pagemap.h>
34
#include <linux/netdevice.h>
35
#include <linux/ipv6.h>
36
#include <linux/slab.h>
37
#include <net/checksum.h>
38
#include <net/ip6_checksum.h>
39
#include <linux/net_tstamp.h>
40
#include <linux/mii.h>
41
#include <linux/ethtool.h>
43
#include <linux/if_vlan.h>
44
#include <linux/pci.h>
45
#include <linux/pci-aspm.h>
46
#include <linux/delay.h>
47
#include <linux/interrupt.h>
49
#include <linux/tcp.h>
50
#include <linux/sctp.h>
51
#include <linux/if_ether.h>
52
#include <linux/aer.h>
53
#include <linux/prefetch.h>
55
#include <linux/dca.h>
62
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63
__stringify(BUILD) "-k"
64
char igb_driver_name[] = "igb";
65
char igb_driver_version[] = DRV_VERSION;
66
static const char igb_driver_string[] =
67
"Intel(R) Gigabit Ethernet Network Driver";
68
static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
70
static const struct e1000_info *igb_info_tbl[] = {
71
[board_82575] = &e1000_82575_info,
74
static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
75
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
76
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
77
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
78
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
79
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
80
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
81
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
82
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
83
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
84
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
85
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
86
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
87
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
88
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
89
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
90
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
91
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
92
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
93
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
94
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
95
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
96
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
97
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
98
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
99
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
100
/* required last entry */
104
MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
106
void igb_reset(struct igb_adapter *);
107
static int igb_setup_all_tx_resources(struct igb_adapter *);
108
static int igb_setup_all_rx_resources(struct igb_adapter *);
109
static void igb_free_all_tx_resources(struct igb_adapter *);
110
static void igb_free_all_rx_resources(struct igb_adapter *);
111
static void igb_setup_mrqc(struct igb_adapter *);
112
static int igb_probe(struct pci_dev *, const struct pci_device_id *);
113
static void __devexit igb_remove(struct pci_dev *pdev);
114
static void igb_init_hw_timer(struct igb_adapter *adapter);
115
static int igb_sw_init(struct igb_adapter *);
116
static int igb_open(struct net_device *);
117
static int igb_close(struct net_device *);
118
static void igb_configure_tx(struct igb_adapter *);
119
static void igb_configure_rx(struct igb_adapter *);
120
static void igb_clean_all_tx_rings(struct igb_adapter *);
121
static void igb_clean_all_rx_rings(struct igb_adapter *);
122
static void igb_clean_tx_ring(struct igb_ring *);
123
static void igb_clean_rx_ring(struct igb_ring *);
124
static void igb_set_rx_mode(struct net_device *);
125
static void igb_update_phy_info(unsigned long);
126
static void igb_watchdog(unsigned long);
127
static void igb_watchdog_task(struct work_struct *);
128
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
129
static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
130
struct rtnl_link_stats64 *stats);
131
static int igb_change_mtu(struct net_device *, int);
132
static int igb_set_mac(struct net_device *, void *);
133
static void igb_set_uta(struct igb_adapter *adapter);
134
static irqreturn_t igb_intr(int irq, void *);
135
static irqreturn_t igb_intr_msi(int irq, void *);
136
static irqreturn_t igb_msix_other(int irq, void *);
137
static irqreturn_t igb_msix_ring(int irq, void *);
138
#ifdef CONFIG_IGB_DCA
139
static void igb_update_dca(struct igb_q_vector *);
140
static void igb_setup_dca(struct igb_adapter *);
141
#endif /* CONFIG_IGB_DCA */
142
static int igb_poll(struct napi_struct *, int);
143
static bool igb_clean_tx_irq(struct igb_q_vector *);
144
static bool igb_clean_rx_irq(struct igb_q_vector *, int);
145
static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
146
static void igb_tx_timeout(struct net_device *);
147
static void igb_reset_task(struct work_struct *);
148
static void igb_vlan_mode(struct net_device *netdev, u32 features);
149
static void igb_vlan_rx_add_vid(struct net_device *, u16);
150
static void igb_vlan_rx_kill_vid(struct net_device *, u16);
151
static void igb_restore_vlan(struct igb_adapter *);
152
static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
153
static void igb_ping_all_vfs(struct igb_adapter *);
154
static void igb_msg_task(struct igb_adapter *);
155
static void igb_vmm_control(struct igb_adapter *);
156
static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
157
static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
158
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
159
static int igb_ndo_set_vf_vlan(struct net_device *netdev,
160
int vf, u16 vlan, u8 qos);
161
static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
162
static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
163
struct ifla_vf_info *ivi);
164
static void igb_check_vf_rate_limit(struct igb_adapter *);
166
#ifdef CONFIG_PCI_IOV
167
static int igb_vf_configure(struct igb_adapter *adapter, int vf);
168
static int igb_find_enabled_vfs(struct igb_adapter *adapter);
169
static int igb_check_vf_assignment(struct igb_adapter *adapter);
173
static int igb_suspend(struct pci_dev *, pm_message_t);
174
static int igb_resume(struct pci_dev *);
176
static void igb_shutdown(struct pci_dev *);
177
#ifdef CONFIG_IGB_DCA
178
static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
179
static struct notifier_block dca_notifier = {
180
.notifier_call = igb_notify_dca,
185
#ifdef CONFIG_NET_POLL_CONTROLLER
186
/* for netdump / net console */
187
static void igb_netpoll(struct net_device *);
189
#ifdef CONFIG_PCI_IOV
190
static unsigned int max_vfs = 0;
191
module_param(max_vfs, uint, 0);
192
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
193
"per physical function");
194
#endif /* CONFIG_PCI_IOV */
196
static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
197
pci_channel_state_t);
198
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
199
static void igb_io_resume(struct pci_dev *);
201
static struct pci_error_handlers igb_err_handler = {
202
.error_detected = igb_io_error_detected,
203
.slot_reset = igb_io_slot_reset,
204
.resume = igb_io_resume,
207
static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
209
static struct pci_driver igb_driver = {
210
.name = igb_driver_name,
211
.id_table = igb_pci_tbl,
213
.remove = __devexit_p(igb_remove),
215
/* Power Management Hooks */
216
.suspend = igb_suspend,
217
.resume = igb_resume,
219
.shutdown = igb_shutdown,
220
.err_handler = &igb_err_handler
223
MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224
MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225
MODULE_LICENSE("GPL");
226
MODULE_VERSION(DRV_VERSION);
228
struct igb_reg_info {
233
static const struct igb_reg_info igb_reg_info_tbl[] = {
235
/* General Registers */
236
{E1000_CTRL, "CTRL"},
237
{E1000_STATUS, "STATUS"},
238
{E1000_CTRL_EXT, "CTRL_EXT"},
240
/* Interrupt Registers */
244
{E1000_RCTL, "RCTL"},
245
{E1000_RDLEN(0), "RDLEN"},
246
{E1000_RDH(0), "RDH"},
247
{E1000_RDT(0), "RDT"},
248
{E1000_RXDCTL(0), "RXDCTL"},
249
{E1000_RDBAL(0), "RDBAL"},
250
{E1000_RDBAH(0), "RDBAH"},
253
{E1000_TCTL, "TCTL"},
254
{E1000_TDBAL(0), "TDBAL"},
255
{E1000_TDBAH(0), "TDBAH"},
256
{E1000_TDLEN(0), "TDLEN"},
257
{E1000_TDH(0), "TDH"},
258
{E1000_TDT(0), "TDT"},
259
{E1000_TXDCTL(0), "TXDCTL"},
260
{E1000_TDFH, "TDFH"},
261
{E1000_TDFT, "TDFT"},
262
{E1000_TDFHS, "TDFHS"},
263
{E1000_TDFPC, "TDFPC"},
265
/* List Terminator */
270
* igb_regdump - register printout routine
272
static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
278
switch (reginfo->ofs) {
280
for (n = 0; n < 4; n++)
281
regs[n] = rd32(E1000_RDLEN(n));
284
for (n = 0; n < 4; n++)
285
regs[n] = rd32(E1000_RDH(n));
288
for (n = 0; n < 4; n++)
289
regs[n] = rd32(E1000_RDT(n));
291
case E1000_RXDCTL(0):
292
for (n = 0; n < 4; n++)
293
regs[n] = rd32(E1000_RXDCTL(n));
296
for (n = 0; n < 4; n++)
297
regs[n] = rd32(E1000_RDBAL(n));
300
for (n = 0; n < 4; n++)
301
regs[n] = rd32(E1000_RDBAH(n));
304
for (n = 0; n < 4; n++)
305
regs[n] = rd32(E1000_RDBAL(n));
308
for (n = 0; n < 4; n++)
309
regs[n] = rd32(E1000_TDBAH(n));
312
for (n = 0; n < 4; n++)
313
regs[n] = rd32(E1000_TDLEN(n));
316
for (n = 0; n < 4; n++)
317
regs[n] = rd32(E1000_TDH(n));
320
for (n = 0; n < 4; n++)
321
regs[n] = rd32(E1000_TDT(n));
323
case E1000_TXDCTL(0):
324
for (n = 0; n < 4; n++)
325
regs[n] = rd32(E1000_TXDCTL(n));
328
printk(KERN_INFO "%-15s %08x\n",
329
reginfo->name, rd32(reginfo->ofs));
333
snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
334
printk(KERN_INFO "%-15s ", rname);
335
for (n = 0; n < 4; n++)
336
printk(KERN_CONT "%08x ", regs[n]);
337
printk(KERN_CONT "\n");
341
* igb_dump - Print registers, tx-rings and rx-rings
343
static void igb_dump(struct igb_adapter *adapter)
345
struct net_device *netdev = adapter->netdev;
346
struct e1000_hw *hw = &adapter->hw;
347
struct igb_reg_info *reginfo;
348
struct igb_ring *tx_ring;
349
union e1000_adv_tx_desc *tx_desc;
350
struct my_u0 { u64 a; u64 b; } *u0;
351
struct igb_ring *rx_ring;
352
union e1000_adv_rx_desc *rx_desc;
356
if (!netif_msg_hw(adapter))
359
/* Print netdevice Info */
361
dev_info(&adapter->pdev->dev, "Net device Info\n");
362
printk(KERN_INFO "Device Name state "
363
"trans_start last_rx\n");
364
printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
371
/* Print Registers */
372
dev_info(&adapter->pdev->dev, "Register Dump\n");
373
printk(KERN_INFO " Register Name Value\n");
374
for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
375
reginfo->name; reginfo++) {
376
igb_regdump(hw, reginfo);
379
/* Print TX Ring Summary */
380
if (!netdev || !netif_running(netdev))
383
dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
384
printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
385
" leng ntw timestamp\n");
386
for (n = 0; n < adapter->num_tx_queues; n++) {
387
struct igb_tx_buffer *buffer_info;
388
tx_ring = adapter->tx_ring[n];
389
buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
390
printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n",
391
n, tx_ring->next_to_use, tx_ring->next_to_clean,
392
(u64)buffer_info->dma,
394
buffer_info->next_to_watch,
395
(u64)buffer_info->time_stamp);
399
if (!netif_msg_tx_done(adapter))
400
goto rx_ring_summary;
402
dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
404
/* Transmit Descriptor Formats
406
* Advanced Transmit Descriptor
407
* +--------------------------------------------------------------+
408
* 0 | Buffer Address [63:0] |
409
* +--------------------------------------------------------------+
410
* 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
411
* +--------------------------------------------------------------+
412
* 63 46 45 40 39 38 36 35 32 31 24 15 0
415
for (n = 0; n < adapter->num_tx_queues; n++) {
416
tx_ring = adapter->tx_ring[n];
417
printk(KERN_INFO "------------------------------------\n");
418
printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
419
printk(KERN_INFO "------------------------------------\n");
420
printk(KERN_INFO "T [desc] [address 63:0 ] "
421
"[PlPOCIStDDM Ln] [bi->dma ] "
422
"leng ntw timestamp bi->skb\n");
424
for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
425
struct igb_tx_buffer *buffer_info;
426
tx_desc = IGB_TX_DESC(tx_ring, i);
427
buffer_info = &tx_ring->tx_buffer_info[i];
428
u0 = (struct my_u0 *)tx_desc;
429
printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
430
" %04X %p %016llX %p", i,
433
(u64)buffer_info->dma,
435
buffer_info->next_to_watch,
436
(u64)buffer_info->time_stamp,
438
if (i == tx_ring->next_to_use &&
439
i == tx_ring->next_to_clean)
440
printk(KERN_CONT " NTC/U\n");
441
else if (i == tx_ring->next_to_use)
442
printk(KERN_CONT " NTU\n");
443
else if (i == tx_ring->next_to_clean)
444
printk(KERN_CONT " NTC\n");
446
printk(KERN_CONT "\n");
448
if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
449
print_hex_dump(KERN_INFO, "",
451
16, 1, phys_to_virt(buffer_info->dma),
452
buffer_info->length, true);
456
/* Print RX Rings Summary */
458
dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
459
printk(KERN_INFO "Queue [NTU] [NTC]\n");
460
for (n = 0; n < adapter->num_rx_queues; n++) {
461
rx_ring = adapter->rx_ring[n];
462
printk(KERN_INFO " %5d %5X %5X\n", n,
463
rx_ring->next_to_use, rx_ring->next_to_clean);
467
if (!netif_msg_rx_status(adapter))
470
dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
472
/* Advanced Receive Descriptor (Read) Format
474
* +-----------------------------------------------------+
475
* 0 | Packet Buffer Address [63:1] |A0/NSE|
476
* +----------------------------------------------+------+
477
* 8 | Header Buffer Address [63:1] | DD |
478
* +-----------------------------------------------------+
481
* Advanced Receive Descriptor (Write-Back) Format
483
* 63 48 47 32 31 30 21 20 17 16 4 3 0
484
* +------------------------------------------------------+
485
* 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
486
* | Checksum Ident | | | | Type | Type |
487
* +------------------------------------------------------+
488
* 8 | VLAN Tag | Length | Extended Error | Extended Status |
489
* +------------------------------------------------------+
490
* 63 48 47 32 31 20 19 0
493
for (n = 0; n < adapter->num_rx_queues; n++) {
494
rx_ring = adapter->rx_ring[n];
495
printk(KERN_INFO "------------------------------------\n");
496
printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
497
printk(KERN_INFO "------------------------------------\n");
498
printk(KERN_INFO "R [desc] [ PktBuf A0] "
499
"[ HeadBuf DD] [bi->dma ] [bi->skb] "
500
"<-- Adv Rx Read format\n");
501
printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
502
"[vl er S cks ln] ---------------- [bi->skb] "
503
"<-- Adv Rx Write-Back format\n");
505
for (i = 0; i < rx_ring->count; i++) {
506
struct igb_rx_buffer *buffer_info;
507
buffer_info = &rx_ring->rx_buffer_info[i];
508
rx_desc = IGB_RX_DESC(rx_ring, i);
509
u0 = (struct my_u0 *)rx_desc;
510
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
511
if (staterr & E1000_RXD_STAT_DD) {
512
/* Descriptor Done */
513
printk(KERN_INFO "RWB[0x%03X] %016llX "
514
"%016llX ---------------- %p", i,
519
printk(KERN_INFO "R [0x%03X] %016llX "
520
"%016llX %016llX %p", i,
523
(u64)buffer_info->dma,
526
if (netif_msg_pktdata(adapter)) {
527
print_hex_dump(KERN_INFO, "",
530
phys_to_virt(buffer_info->dma),
531
IGB_RX_HDR_LEN, true);
532
print_hex_dump(KERN_INFO, "",
536
buffer_info->page_dma +
537
buffer_info->page_offset),
542
if (i == rx_ring->next_to_use)
543
printk(KERN_CONT " NTU\n");
544
else if (i == rx_ring->next_to_clean)
545
printk(KERN_CONT " NTC\n");
547
printk(KERN_CONT "\n");
558
* igb_read_clock - read raw cycle counter (to be used by time counter)
560
static cycle_t igb_read_clock(const struct cyclecounter *tc)
562
struct igb_adapter *adapter =
563
container_of(tc, struct igb_adapter, cycles);
564
struct e1000_hw *hw = &adapter->hw;
569
* The timestamp latches on lowest register read. For the 82580
570
* the lowest register is SYSTIMR instead of SYSTIML. However we never
571
* adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
573
if (hw->mac.type >= e1000_82580) {
574
stamp = rd32(E1000_SYSTIMR) >> 8;
575
shift = IGB_82580_TSYNC_SHIFT;
578
stamp |= (u64)rd32(E1000_SYSTIML) << shift;
579
stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
584
* igb_get_hw_dev - return device
585
* used by hardware layer to print debugging information
587
struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
589
struct igb_adapter *adapter = hw->back;
590
return adapter->netdev;
594
* igb_init_module - Driver Registration Routine
596
* igb_init_module is the first routine called when the driver is
597
* loaded. All it does is register with the PCI subsystem.
599
static int __init igb_init_module(void)
602
printk(KERN_INFO "%s - version %s\n",
603
igb_driver_string, igb_driver_version);
605
printk(KERN_INFO "%s\n", igb_copyright);
607
#ifdef CONFIG_IGB_DCA
608
dca_register_notify(&dca_notifier);
610
ret = pci_register_driver(&igb_driver);
614
module_init(igb_init_module);
617
* igb_exit_module - Driver Exit Cleanup Routine
619
* igb_exit_module is called just before the driver is removed
622
static void __exit igb_exit_module(void)
624
#ifdef CONFIG_IGB_DCA
625
dca_unregister_notify(&dca_notifier);
627
pci_unregister_driver(&igb_driver);
630
module_exit(igb_exit_module);
632
#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
634
* igb_cache_ring_register - Descriptor ring to register mapping
635
* @adapter: board private structure to initialize
637
* Once we know the feature-set enabled for the device, we'll cache
638
* the register offset the descriptor ring is assigned to.
640
static void igb_cache_ring_register(struct igb_adapter *adapter)
643
u32 rbase_offset = adapter->vfs_allocated_count;
645
switch (adapter->hw.mac.type) {
647
/* The queues are allocated for virtualization such that VF 0
648
* is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
649
* In order to avoid collision we start at the first free queue
650
* and continue consuming queues in the same sequence
652
if (adapter->vfs_allocated_count) {
653
for (; i < adapter->rss_queues; i++)
654
adapter->rx_ring[i]->reg_idx = rbase_offset +
661
for (; i < adapter->num_rx_queues; i++)
662
adapter->rx_ring[i]->reg_idx = rbase_offset + i;
663
for (; j < adapter->num_tx_queues; j++)
664
adapter->tx_ring[j]->reg_idx = rbase_offset + j;
669
static void igb_free_queues(struct igb_adapter *adapter)
673
for (i = 0; i < adapter->num_tx_queues; i++) {
674
kfree(adapter->tx_ring[i]);
675
adapter->tx_ring[i] = NULL;
677
for (i = 0; i < adapter->num_rx_queues; i++) {
678
kfree(adapter->rx_ring[i]);
679
adapter->rx_ring[i] = NULL;
681
adapter->num_rx_queues = 0;
682
adapter->num_tx_queues = 0;
686
* igb_alloc_queues - Allocate memory for all rings
687
* @adapter: board private structure to initialize
689
* We allocate one ring per queue at run-time since we don't know the
690
* number of queues at compile-time.
692
static int igb_alloc_queues(struct igb_adapter *adapter)
694
struct igb_ring *ring;
696
int orig_node = adapter->node;
698
for (i = 0; i < adapter->num_tx_queues; i++) {
699
if (orig_node == -1) {
700
int cur_node = next_online_node(adapter->node);
701
if (cur_node == MAX_NUMNODES)
702
cur_node = first_online_node;
703
adapter->node = cur_node;
705
ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
708
ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
711
ring->count = adapter->tx_ring_count;
712
ring->queue_index = i;
713
ring->dev = &adapter->pdev->dev;
714
ring->netdev = adapter->netdev;
715
ring->numa_node = adapter->node;
716
/* For 82575, context index must be unique per ring. */
717
if (adapter->hw.mac.type == e1000_82575)
718
set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
719
adapter->tx_ring[i] = ring;
721
/* Restore the adapter's original node */
722
adapter->node = orig_node;
724
for (i = 0; i < adapter->num_rx_queues; i++) {
725
if (orig_node == -1) {
726
int cur_node = next_online_node(adapter->node);
727
if (cur_node == MAX_NUMNODES)
728
cur_node = first_online_node;
729
adapter->node = cur_node;
731
ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
734
ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
737
ring->count = adapter->rx_ring_count;
738
ring->queue_index = i;
739
ring->dev = &adapter->pdev->dev;
740
ring->netdev = adapter->netdev;
741
ring->numa_node = adapter->node;
742
/* set flag indicating ring supports SCTP checksum offload */
743
if (adapter->hw.mac.type >= e1000_82576)
744
set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
746
/* On i350, loopback VLAN packets have the tag byte-swapped. */
747
if (adapter->hw.mac.type == e1000_i350)
748
set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
750
adapter->rx_ring[i] = ring;
752
/* Restore the adapter's original node */
753
adapter->node = orig_node;
755
igb_cache_ring_register(adapter);
760
/* Restore the adapter's original node */
761
adapter->node = orig_node;
762
igb_free_queues(adapter);
768
* igb_write_ivar - configure ivar for given MSI-X vector
769
* @hw: pointer to the HW structure
770
* @msix_vector: vector number we are allocating to a given ring
771
* @index: row index of IVAR register to write within IVAR table
772
* @offset: column offset of in IVAR, should be multiple of 8
774
* This function is intended to handle the writing of the IVAR register
775
* for adapters 82576 and newer. The IVAR table consists of 2 columns,
776
* each containing an cause allocation for an Rx and Tx ring, and a
777
* variable number of rows depending on the number of queues supported.
779
static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
780
int index, int offset)
782
u32 ivar = array_rd32(E1000_IVAR0, index);
784
/* clear any bits that are currently set */
785
ivar &= ~((u32)0xFF << offset);
787
/* write vector and valid bit */
788
ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
790
array_wr32(E1000_IVAR0, index, ivar);
793
#define IGB_N0_QUEUE -1
794
static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
796
struct igb_adapter *adapter = q_vector->adapter;
797
struct e1000_hw *hw = &adapter->hw;
798
int rx_queue = IGB_N0_QUEUE;
799
int tx_queue = IGB_N0_QUEUE;
802
if (q_vector->rx.ring)
803
rx_queue = q_vector->rx.ring->reg_idx;
804
if (q_vector->tx.ring)
805
tx_queue = q_vector->tx.ring->reg_idx;
807
switch (hw->mac.type) {
809
/* The 82575 assigns vectors using a bitmask, which matches the
810
bitmask for the EICR/EIMS/EIMC registers. To assign one
811
or more queues to a vector, we write the appropriate bits
812
into the MSIXBM register for that vector. */
813
if (rx_queue > IGB_N0_QUEUE)
814
msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
815
if (tx_queue > IGB_N0_QUEUE)
816
msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
817
if (!adapter->msix_entries && msix_vector == 0)
818
msixbm |= E1000_EIMS_OTHER;
819
array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
820
q_vector->eims_value = msixbm;
824
* 82576 uses a table that essentially consists of 2 columns
825
* with 8 rows. The ordering is column-major so we use the
826
* lower 3 bits as the row index, and the 4th bit as the
829
if (rx_queue > IGB_N0_QUEUE)
830
igb_write_ivar(hw, msix_vector,
832
(rx_queue & 0x8) << 1);
833
if (tx_queue > IGB_N0_QUEUE)
834
igb_write_ivar(hw, msix_vector,
836
((tx_queue & 0x8) << 1) + 8);
837
q_vector->eims_value = 1 << msix_vector;
842
* On 82580 and newer adapters the scheme is similar to 82576
843
* however instead of ordering column-major we have things
844
* ordered row-major. So we traverse the table by using
845
* bit 0 as the column offset, and the remaining bits as the
848
if (rx_queue > IGB_N0_QUEUE)
849
igb_write_ivar(hw, msix_vector,
851
(rx_queue & 0x1) << 4);
852
if (tx_queue > IGB_N0_QUEUE)
853
igb_write_ivar(hw, msix_vector,
855
((tx_queue & 0x1) << 4) + 8);
856
q_vector->eims_value = 1 << msix_vector;
863
/* add q_vector eims value to global eims_enable_mask */
864
adapter->eims_enable_mask |= q_vector->eims_value;
866
/* configure q_vector to set itr on first interrupt */
867
q_vector->set_itr = 1;
871
* igb_configure_msix - Configure MSI-X hardware
873
* igb_configure_msix sets up the hardware to properly
874
* generate MSI-X interrupts.
876
static void igb_configure_msix(struct igb_adapter *adapter)
880
struct e1000_hw *hw = &adapter->hw;
882
adapter->eims_enable_mask = 0;
884
/* set vector for other causes, i.e. link changes */
885
switch (hw->mac.type) {
887
tmp = rd32(E1000_CTRL_EXT);
888
/* enable MSI-X PBA support*/
889
tmp |= E1000_CTRL_EXT_PBA_CLR;
891
/* Auto-Mask interrupts upon ICR read. */
892
tmp |= E1000_CTRL_EXT_EIAME;
893
tmp |= E1000_CTRL_EXT_IRCA;
895
wr32(E1000_CTRL_EXT, tmp);
897
/* enable msix_other interrupt */
898
array_wr32(E1000_MSIXBM(0), vector++,
900
adapter->eims_other = E1000_EIMS_OTHER;
907
/* Turn on MSI-X capability first, or our settings
908
* won't stick. And it will take days to debug. */
909
wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
910
E1000_GPIE_PBA | E1000_GPIE_EIAME |
913
/* enable msix_other interrupt */
914
adapter->eims_other = 1 << vector;
915
tmp = (vector++ | E1000_IVAR_VALID) << 8;
917
wr32(E1000_IVAR_MISC, tmp);
920
/* do nothing, since nothing else supports MSI-X */
922
} /* switch (hw->mac.type) */
924
adapter->eims_enable_mask |= adapter->eims_other;
926
for (i = 0; i < adapter->num_q_vectors; i++)
927
igb_assign_vector(adapter->q_vector[i], vector++);
933
* igb_request_msix - Initialize MSI-X interrupts
935
* igb_request_msix allocates MSI-X vectors and requests interrupts from the
938
static int igb_request_msix(struct igb_adapter *adapter)
940
struct net_device *netdev = adapter->netdev;
941
struct e1000_hw *hw = &adapter->hw;
942
int i, err = 0, vector = 0;
944
err = request_irq(adapter->msix_entries[vector].vector,
945
igb_msix_other, 0, netdev->name, adapter);
950
for (i = 0; i < adapter->num_q_vectors; i++) {
951
struct igb_q_vector *q_vector = adapter->q_vector[i];
953
q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
955
if (q_vector->rx.ring && q_vector->tx.ring)
956
sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
957
q_vector->rx.ring->queue_index);
958
else if (q_vector->tx.ring)
959
sprintf(q_vector->name, "%s-tx-%u", netdev->name,
960
q_vector->tx.ring->queue_index);
961
else if (q_vector->rx.ring)
962
sprintf(q_vector->name, "%s-rx-%u", netdev->name,
963
q_vector->rx.ring->queue_index);
965
sprintf(q_vector->name, "%s-unused", netdev->name);
967
err = request_irq(adapter->msix_entries[vector].vector,
968
igb_msix_ring, 0, q_vector->name,
975
igb_configure_msix(adapter);
981
static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
983
if (adapter->msix_entries) {
984
pci_disable_msix(adapter->pdev);
985
kfree(adapter->msix_entries);
986
adapter->msix_entries = NULL;
987
} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
988
pci_disable_msi(adapter->pdev);
993
* igb_free_q_vectors - Free memory allocated for interrupt vectors
994
* @adapter: board private structure to initialize
996
* This function frees the memory allocated to the q_vectors. In addition if
997
* NAPI is enabled it will delete any references to the NAPI struct prior
998
* to freeing the q_vector.
1000
static void igb_free_q_vectors(struct igb_adapter *adapter)
1004
for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1005
struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1006
adapter->q_vector[v_idx] = NULL;
1009
netif_napi_del(&q_vector->napi);
1012
adapter->num_q_vectors = 0;
1016
* igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1018
* This function resets the device so that it has 0 rx queues, tx queues, and
1019
* MSI-X interrupts allocated.
1021
static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1023
igb_free_queues(adapter);
1024
igb_free_q_vectors(adapter);
1025
igb_reset_interrupt_capability(adapter);
1029
* igb_set_interrupt_capability - set MSI or MSI-X if supported
1031
* Attempt to configure interrupts using the best available
1032
* capabilities of the hardware and kernel.
1034
static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1039
/* Number of supported queues. */
1040
adapter->num_rx_queues = adapter->rss_queues;
1041
if (adapter->vfs_allocated_count)
1042
adapter->num_tx_queues = 1;
1044
adapter->num_tx_queues = adapter->rss_queues;
1046
/* start with one vector for every rx queue */
1047
numvecs = adapter->num_rx_queues;
1049
/* if tx handler is separate add 1 for every tx queue */
1050
if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1051
numvecs += adapter->num_tx_queues;
1053
/* store the number of vectors reserved for queues */
1054
adapter->num_q_vectors = numvecs;
1056
/* add 1 vector for link status interrupts */
1058
adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1060
if (!adapter->msix_entries)
1063
for (i = 0; i < numvecs; i++)
1064
adapter->msix_entries[i].entry = i;
1066
err = pci_enable_msix(adapter->pdev,
1067
adapter->msix_entries,
1072
igb_reset_interrupt_capability(adapter);
1074
/* If we can't do MSI-X, try MSI */
1076
#ifdef CONFIG_PCI_IOV
1077
/* disable SR-IOV for non MSI-X configurations */
1078
if (adapter->vf_data) {
1079
struct e1000_hw *hw = &adapter->hw;
1080
/* disable iov and allow time for transactions to clear */
1081
pci_disable_sriov(adapter->pdev);
1084
kfree(adapter->vf_data);
1085
adapter->vf_data = NULL;
1086
wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1089
dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1092
adapter->vfs_allocated_count = 0;
1093
adapter->rss_queues = 1;
1094
adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1095
adapter->num_rx_queues = 1;
1096
adapter->num_tx_queues = 1;
1097
adapter->num_q_vectors = 1;
1098
if (!pci_enable_msi(adapter->pdev))
1099
adapter->flags |= IGB_FLAG_HAS_MSI;
1101
/* Notify the stack of the (possibly) reduced queue counts. */
1102
netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103
return netif_set_real_num_rx_queues(adapter->netdev,
1104
adapter->num_rx_queues);
1108
* igb_alloc_q_vectors - Allocate memory for interrupt vectors
1109
* @adapter: board private structure to initialize
1111
* We allocate one q_vector per queue interrupt. If allocation fails we
1114
static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1116
struct igb_q_vector *q_vector;
1117
struct e1000_hw *hw = &adapter->hw;
1119
int orig_node = adapter->node;
1121
for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1122
if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1123
adapter->num_tx_queues)) &&
1124
(adapter->num_rx_queues == v_idx))
1125
adapter->node = orig_node;
1126
if (orig_node == -1) {
1127
int cur_node = next_online_node(adapter->node);
1128
if (cur_node == MAX_NUMNODES)
1129
cur_node = first_online_node;
1130
adapter->node = cur_node;
1132
q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1135
q_vector = kzalloc(sizeof(struct igb_q_vector),
1139
q_vector->adapter = adapter;
1140
q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1141
q_vector->itr_val = IGB_START_ITR;
1142
netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1143
adapter->q_vector[v_idx] = q_vector;
1145
/* Restore the adapter's original node */
1146
adapter->node = orig_node;
1151
/* Restore the adapter's original node */
1152
adapter->node = orig_node;
1153
igb_free_q_vectors(adapter);
1157
static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1158
int ring_idx, int v_idx)
1160
struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1162
q_vector->rx.ring = adapter->rx_ring[ring_idx];
1163
q_vector->rx.ring->q_vector = q_vector;
1164
q_vector->rx.count++;
1165
q_vector->itr_val = adapter->rx_itr_setting;
1166
if (q_vector->itr_val && q_vector->itr_val <= 3)
1167
q_vector->itr_val = IGB_START_ITR;
1170
static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1171
int ring_idx, int v_idx)
1173
struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1175
q_vector->tx.ring = adapter->tx_ring[ring_idx];
1176
q_vector->tx.ring->q_vector = q_vector;
1177
q_vector->tx.count++;
1178
q_vector->itr_val = adapter->tx_itr_setting;
1179
q_vector->tx.work_limit = adapter->tx_work_limit;
1180
if (q_vector->itr_val && q_vector->itr_val <= 3)
1181
q_vector->itr_val = IGB_START_ITR;
1185
* igb_map_ring_to_vector - maps allocated queues to vectors
1187
* This function maps the recently allocated queues to vectors.
1189
static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1194
if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1195
(adapter->num_q_vectors < adapter->num_tx_queues))
1198
if (adapter->num_q_vectors >=
1199
(adapter->num_rx_queues + adapter->num_tx_queues)) {
1200
for (i = 0; i < adapter->num_rx_queues; i++)
1201
igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1202
for (i = 0; i < adapter->num_tx_queues; i++)
1203
igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1205
for (i = 0; i < adapter->num_rx_queues; i++) {
1206
if (i < adapter->num_tx_queues)
1207
igb_map_tx_ring_to_vector(adapter, i, v_idx);
1208
igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1210
for (; i < adapter->num_tx_queues; i++)
1211
igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1217
* igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1219
* This function initializes the interrupts and allocates all of the queues.
1221
static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1223
struct pci_dev *pdev = adapter->pdev;
1226
err = igb_set_interrupt_capability(adapter);
1230
err = igb_alloc_q_vectors(adapter);
1232
dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1233
goto err_alloc_q_vectors;
1236
err = igb_alloc_queues(adapter);
1238
dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1239
goto err_alloc_queues;
1242
err = igb_map_ring_to_vector(adapter);
1244
dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1245
goto err_map_queues;
1251
igb_free_queues(adapter);
1253
igb_free_q_vectors(adapter);
1254
err_alloc_q_vectors:
1255
igb_reset_interrupt_capability(adapter);
1260
* igb_request_irq - initialize interrupts
1262
* Attempts to configure interrupts using the best available
1263
* capabilities of the hardware and kernel.
1265
static int igb_request_irq(struct igb_adapter *adapter)
1267
struct net_device *netdev = adapter->netdev;
1268
struct pci_dev *pdev = adapter->pdev;
1271
if (adapter->msix_entries) {
1272
err = igb_request_msix(adapter);
1275
/* fall back to MSI */
1276
igb_clear_interrupt_scheme(adapter);
1277
if (!pci_enable_msi(pdev))
1278
adapter->flags |= IGB_FLAG_HAS_MSI;
1279
igb_free_all_tx_resources(adapter);
1280
igb_free_all_rx_resources(adapter);
1281
adapter->num_tx_queues = 1;
1282
adapter->num_rx_queues = 1;
1283
adapter->num_q_vectors = 1;
1284
err = igb_alloc_q_vectors(adapter);
1287
"Unable to allocate memory for vectors\n");
1290
err = igb_alloc_queues(adapter);
1293
"Unable to allocate memory for queues\n");
1294
igb_free_q_vectors(adapter);
1297
igb_setup_all_tx_resources(adapter);
1298
igb_setup_all_rx_resources(adapter);
1301
igb_assign_vector(adapter->q_vector[0], 0);
1303
if (adapter->flags & IGB_FLAG_HAS_MSI) {
1304
err = request_irq(pdev->irq, igb_intr_msi, 0,
1305
netdev->name, adapter);
1309
/* fall back to legacy interrupts */
1310
igb_reset_interrupt_capability(adapter);
1311
adapter->flags &= ~IGB_FLAG_HAS_MSI;
1314
err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1315
netdev->name, adapter);
1318
dev_err(&pdev->dev, "Error %d getting interrupt\n",
1325
static void igb_free_irq(struct igb_adapter *adapter)
1327
if (adapter->msix_entries) {
1330
free_irq(adapter->msix_entries[vector++].vector, adapter);
1332
for (i = 0; i < adapter->num_q_vectors; i++)
1333
free_irq(adapter->msix_entries[vector++].vector,
1334
adapter->q_vector[i]);
1336
free_irq(adapter->pdev->irq, adapter);
1341
* igb_irq_disable - Mask off interrupt generation on the NIC
1342
* @adapter: board private structure
1344
static void igb_irq_disable(struct igb_adapter *adapter)
1346
struct e1000_hw *hw = &adapter->hw;
1349
* we need to be careful when disabling interrupts. The VFs are also
1350
* mapped into these registers and so clearing the bits can cause
1351
* issues on the VF drivers so we only need to clear what we set
1353
if (adapter->msix_entries) {
1354
u32 regval = rd32(E1000_EIAM);
1355
wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1356
wr32(E1000_EIMC, adapter->eims_enable_mask);
1357
regval = rd32(E1000_EIAC);
1358
wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1362
wr32(E1000_IMC, ~0);
1364
if (adapter->msix_entries) {
1366
for (i = 0; i < adapter->num_q_vectors; i++)
1367
synchronize_irq(adapter->msix_entries[i].vector);
1369
synchronize_irq(adapter->pdev->irq);
1374
* igb_irq_enable - Enable default interrupt generation settings
1375
* @adapter: board private structure
1377
static void igb_irq_enable(struct igb_adapter *adapter)
1379
struct e1000_hw *hw = &adapter->hw;
1381
if (adapter->msix_entries) {
1382
u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1383
u32 regval = rd32(E1000_EIAC);
1384
wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1385
regval = rd32(E1000_EIAM);
1386
wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1387
wr32(E1000_EIMS, adapter->eims_enable_mask);
1388
if (adapter->vfs_allocated_count) {
1389
wr32(E1000_MBVFIMR, 0xFF);
1390
ims |= E1000_IMS_VMMB;
1392
wr32(E1000_IMS, ims);
1394
wr32(E1000_IMS, IMS_ENABLE_MASK |
1396
wr32(E1000_IAM, IMS_ENABLE_MASK |
1401
static void igb_update_mng_vlan(struct igb_adapter *adapter)
1403
struct e1000_hw *hw = &adapter->hw;
1404
u16 vid = adapter->hw.mng_cookie.vlan_id;
1405
u16 old_vid = adapter->mng_vlan_id;
1407
if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1408
/* add VID to filter table */
1409
igb_vfta_set(hw, vid, true);
1410
adapter->mng_vlan_id = vid;
1412
adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1415
if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1417
!test_bit(old_vid, adapter->active_vlans)) {
1418
/* remove VID from filter table */
1419
igb_vfta_set(hw, old_vid, false);
1424
* igb_release_hw_control - release control of the h/w to f/w
1425
* @adapter: address of board private structure
1427
* igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1428
* For ASF and Pass Through versions of f/w this means that the
1429
* driver is no longer loaded.
1432
static void igb_release_hw_control(struct igb_adapter *adapter)
1434
struct e1000_hw *hw = &adapter->hw;
1437
/* Let firmware take over control of h/w */
1438
ctrl_ext = rd32(E1000_CTRL_EXT);
1439
wr32(E1000_CTRL_EXT,
1440
ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1444
* igb_get_hw_control - get control of the h/w from f/w
1445
* @adapter: address of board private structure
1447
* igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1448
* For ASF and Pass Through versions of f/w this means that
1449
* the driver is loaded.
1452
static void igb_get_hw_control(struct igb_adapter *adapter)
1454
struct e1000_hw *hw = &adapter->hw;
1457
/* Let firmware know the driver has taken over */
1458
ctrl_ext = rd32(E1000_CTRL_EXT);
1459
wr32(E1000_CTRL_EXT,
1460
ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1464
* igb_configure - configure the hardware for RX and TX
1465
* @adapter: private board structure
1467
static void igb_configure(struct igb_adapter *adapter)
1469
struct net_device *netdev = adapter->netdev;
1472
igb_get_hw_control(adapter);
1473
igb_set_rx_mode(netdev);
1475
igb_restore_vlan(adapter);
1477
igb_setup_tctl(adapter);
1478
igb_setup_mrqc(adapter);
1479
igb_setup_rctl(adapter);
1481
igb_configure_tx(adapter);
1482
igb_configure_rx(adapter);
1484
igb_rx_fifo_flush_82575(&adapter->hw);
1486
/* call igb_desc_unused which always leaves
1487
* at least 1 descriptor unused to make sure
1488
* next_to_use != next_to_clean */
1489
for (i = 0; i < adapter->num_rx_queues; i++) {
1490
struct igb_ring *ring = adapter->rx_ring[i];
1491
igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1496
* igb_power_up_link - Power up the phy/serdes link
1497
* @adapter: address of board private structure
1499
void igb_power_up_link(struct igb_adapter *adapter)
1501
if (adapter->hw.phy.media_type == e1000_media_type_copper)
1502
igb_power_up_phy_copper(&adapter->hw);
1504
igb_power_up_serdes_link_82575(&adapter->hw);
1508
* igb_power_down_link - Power down the phy/serdes link
1509
* @adapter: address of board private structure
1511
static void igb_power_down_link(struct igb_adapter *adapter)
1513
if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514
igb_power_down_phy_copper_82575(&adapter->hw);
1516
igb_shutdown_serdes_link_82575(&adapter->hw);
1520
* igb_up - Open the interface and prepare it to handle traffic
1521
* @adapter: board private structure
1523
int igb_up(struct igb_adapter *adapter)
1525
struct e1000_hw *hw = &adapter->hw;
1528
/* hardware has been reset, we need to reload some things */
1529
igb_configure(adapter);
1531
clear_bit(__IGB_DOWN, &adapter->state);
1533
for (i = 0; i < adapter->num_q_vectors; i++)
1534
napi_enable(&(adapter->q_vector[i]->napi));
1536
if (adapter->msix_entries)
1537
igb_configure_msix(adapter);
1539
igb_assign_vector(adapter->q_vector[0], 0);
1541
/* Clear any pending interrupts. */
1543
igb_irq_enable(adapter);
1545
/* notify VFs that reset has been completed */
1546
if (adapter->vfs_allocated_count) {
1547
u32 reg_data = rd32(E1000_CTRL_EXT);
1548
reg_data |= E1000_CTRL_EXT_PFRSTD;
1549
wr32(E1000_CTRL_EXT, reg_data);
1552
netif_tx_start_all_queues(adapter->netdev);
1554
/* start the watchdog. */
1555
hw->mac.get_link_status = 1;
1556
schedule_work(&adapter->watchdog_task);
1561
void igb_down(struct igb_adapter *adapter)
1563
struct net_device *netdev = adapter->netdev;
1564
struct e1000_hw *hw = &adapter->hw;
1568
/* signal that we're down so the interrupt handler does not
1569
* reschedule our watchdog timer */
1570
set_bit(__IGB_DOWN, &adapter->state);
1572
/* disable receives in the hardware */
1573
rctl = rd32(E1000_RCTL);
1574
wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1575
/* flush and sleep below */
1577
netif_tx_stop_all_queues(netdev);
1579
/* disable transmits in the hardware */
1580
tctl = rd32(E1000_TCTL);
1581
tctl &= ~E1000_TCTL_EN;
1582
wr32(E1000_TCTL, tctl);
1583
/* flush both disables and wait for them to finish */
1587
for (i = 0; i < adapter->num_q_vectors; i++)
1588
napi_disable(&(adapter->q_vector[i]->napi));
1590
igb_irq_disable(adapter);
1592
del_timer_sync(&adapter->watchdog_timer);
1593
del_timer_sync(&adapter->phy_info_timer);
1595
netif_carrier_off(netdev);
1597
/* record the stats before reset*/
1598
spin_lock(&adapter->stats64_lock);
1599
igb_update_stats(adapter, &adapter->stats64);
1600
spin_unlock(&adapter->stats64_lock);
1602
adapter->link_speed = 0;
1603
adapter->link_duplex = 0;
1605
if (!pci_channel_offline(adapter->pdev))
1607
igb_clean_all_tx_rings(adapter);
1608
igb_clean_all_rx_rings(adapter);
1609
#ifdef CONFIG_IGB_DCA
1611
/* since we reset the hardware DCA settings were cleared */
1612
igb_setup_dca(adapter);
1616
void igb_reinit_locked(struct igb_adapter *adapter)
1618
WARN_ON(in_interrupt());
1619
while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1623
clear_bit(__IGB_RESETTING, &adapter->state);
1626
void igb_reset(struct igb_adapter *adapter)
1628
struct pci_dev *pdev = adapter->pdev;
1629
struct e1000_hw *hw = &adapter->hw;
1630
struct e1000_mac_info *mac = &hw->mac;
1631
struct e1000_fc_info *fc = &hw->fc;
1632
u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1635
/* Repartition Pba for greater than 9k mtu
1636
* To take effect CTRL.RST is required.
1638
switch (mac->type) {
1641
pba = rd32(E1000_RXPBS);
1642
pba = igb_rxpbs_adjust_82580(pba);
1645
pba = rd32(E1000_RXPBS);
1646
pba &= E1000_RXPBS_SIZE_MASK_82576;
1650
pba = E1000_PBA_34K;
1654
if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1655
(mac->type < e1000_82576)) {
1656
/* adjust PBA for jumbo frames */
1657
wr32(E1000_PBA, pba);
1659
/* To maintain wire speed transmits, the Tx FIFO should be
1660
* large enough to accommodate two full transmit packets,
1661
* rounded up to the next 1KB and expressed in KB. Likewise,
1662
* the Rx FIFO should be large enough to accommodate at least
1663
* one full receive packet and is similarly rounded up and
1664
* expressed in KB. */
1665
pba = rd32(E1000_PBA);
1666
/* upper 16 bits has Tx packet buffer allocation size in KB */
1667
tx_space = pba >> 16;
1668
/* lower 16 bits has Rx packet buffer allocation size in KB */
1670
/* the tx fifo also stores 16 bytes of information about the tx
1671
* but don't include ethernet FCS because hardware appends it */
1672
min_tx_space = (adapter->max_frame_size +
1673
sizeof(union e1000_adv_tx_desc) -
1675
min_tx_space = ALIGN(min_tx_space, 1024);
1676
min_tx_space >>= 10;
1677
/* software strips receive CRC, so leave room for it */
1678
min_rx_space = adapter->max_frame_size;
1679
min_rx_space = ALIGN(min_rx_space, 1024);
1680
min_rx_space >>= 10;
1682
/* If current Tx allocation is less than the min Tx FIFO size,
1683
* and the min Tx FIFO size is less than the current Rx FIFO
1684
* allocation, take space away from current Rx allocation */
1685
if (tx_space < min_tx_space &&
1686
((min_tx_space - tx_space) < pba)) {
1687
pba = pba - (min_tx_space - tx_space);
1689
/* if short on rx space, rx wins and must trump tx
1691
if (pba < min_rx_space)
1694
wr32(E1000_PBA, pba);
1697
/* flow control settings */
1698
/* The high water mark must be low enough to fit one full frame
1699
* (or the size used for early receive) above it in the Rx FIFO.
1700
* Set it to the lower of:
1701
* - 90% of the Rx FIFO size, or
1702
* - the full Rx FIFO size minus one full frame */
1703
hwm = min(((pba << 10) * 9 / 10),
1704
((pba << 10) - 2 * adapter->max_frame_size));
1706
fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1707
fc->low_water = fc->high_water - 16;
1708
fc->pause_time = 0xFFFF;
1710
fc->current_mode = fc->requested_mode;
1712
/* disable receive for all VFs and wait one second */
1713
if (adapter->vfs_allocated_count) {
1715
for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1716
adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1718
/* ping all the active vfs to let them know we are going down */
1719
igb_ping_all_vfs(adapter);
1721
/* disable transmits and receives */
1722
wr32(E1000_VFRE, 0);
1723
wr32(E1000_VFTE, 0);
1726
/* Allow time for pending master requests to run */
1727
hw->mac.ops.reset_hw(hw);
1730
if (hw->mac.ops.init_hw(hw))
1731
dev_err(&pdev->dev, "Hardware Error\n");
1733
igb_init_dmac(adapter, pba);
1734
if (!netif_running(adapter->netdev))
1735
igb_power_down_link(adapter);
1737
igb_update_mng_vlan(adapter);
1739
/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1740
wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1742
igb_get_phy_info(hw);
1745
static u32 igb_fix_features(struct net_device *netdev, u32 features)
1748
* Since there is no support for separate rx/tx vlan accel
1749
* enable/disable make sure tx flag is always in same state as rx.
1751
if (features & NETIF_F_HW_VLAN_RX)
1752
features |= NETIF_F_HW_VLAN_TX;
1754
features &= ~NETIF_F_HW_VLAN_TX;
1759
static int igb_set_features(struct net_device *netdev, u32 features)
1761
u32 changed = netdev->features ^ features;
1763
if (changed & NETIF_F_HW_VLAN_RX)
1764
igb_vlan_mode(netdev, features);
1769
static const struct net_device_ops igb_netdev_ops = {
1770
.ndo_open = igb_open,
1771
.ndo_stop = igb_close,
1772
.ndo_start_xmit = igb_xmit_frame,
1773
.ndo_get_stats64 = igb_get_stats64,
1774
.ndo_set_rx_mode = igb_set_rx_mode,
1775
.ndo_set_mac_address = igb_set_mac,
1776
.ndo_change_mtu = igb_change_mtu,
1777
.ndo_do_ioctl = igb_ioctl,
1778
.ndo_tx_timeout = igb_tx_timeout,
1779
.ndo_validate_addr = eth_validate_addr,
1780
.ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1781
.ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1782
.ndo_set_vf_mac = igb_ndo_set_vf_mac,
1783
.ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1784
.ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1785
.ndo_get_vf_config = igb_ndo_get_vf_config,
1786
#ifdef CONFIG_NET_POLL_CONTROLLER
1787
.ndo_poll_controller = igb_netpoll,
1789
.ndo_fix_features = igb_fix_features,
1790
.ndo_set_features = igb_set_features,
1794
* igb_probe - Device Initialization Routine
1795
* @pdev: PCI device information struct
1796
* @ent: entry in igb_pci_tbl
1798
* Returns 0 on success, negative on failure
1800
* igb_probe initializes an adapter identified by a pci_dev structure.
1801
* The OS initialization, configuring of the adapter private structure,
1802
* and a hardware reset occur.
1804
static int __devinit igb_probe(struct pci_dev *pdev,
1805
const struct pci_device_id *ent)
1807
struct net_device *netdev;
1808
struct igb_adapter *adapter;
1809
struct e1000_hw *hw;
1810
u16 eeprom_data = 0;
1812
static int global_quad_port_a; /* global quad port a indication */
1813
const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1814
unsigned long mmio_start, mmio_len;
1815
int err, pci_using_dac;
1816
u16 eeprom_apme_mask = IGB_EEPROM_APME;
1817
u8 part_str[E1000_PBANUM_LENGTH];
1819
/* Catch broken hardware that put the wrong VF device ID in
1820
* the PCIe SR-IOV capability.
1822
if (pdev->is_virtfn) {
1823
WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1824
pci_name(pdev), pdev->vendor, pdev->device);
1828
err = pci_enable_device_mem(pdev);
1833
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1835
err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1839
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1841
err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1843
dev_err(&pdev->dev, "No usable DMA "
1844
"configuration, aborting\n");
1850
err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1856
pci_enable_pcie_error_reporting(pdev);
1858
pci_set_master(pdev);
1859
pci_save_state(pdev);
1862
netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1865
goto err_alloc_etherdev;
1867
SET_NETDEV_DEV(netdev, &pdev->dev);
1869
pci_set_drvdata(pdev, netdev);
1870
adapter = netdev_priv(netdev);
1871
adapter->netdev = netdev;
1872
adapter->pdev = pdev;
1875
adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1877
mmio_start = pci_resource_start(pdev, 0);
1878
mmio_len = pci_resource_len(pdev, 0);
1881
hw->hw_addr = ioremap(mmio_start, mmio_len);
1885
netdev->netdev_ops = &igb_netdev_ops;
1886
igb_set_ethtool_ops(netdev);
1887
netdev->watchdog_timeo = 5 * HZ;
1889
strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1891
netdev->mem_start = mmio_start;
1892
netdev->mem_end = mmio_start + mmio_len;
1894
/* PCI config space info */
1895
hw->vendor_id = pdev->vendor;
1896
hw->device_id = pdev->device;
1897
hw->revision_id = pdev->revision;
1898
hw->subsystem_vendor_id = pdev->subsystem_vendor;
1899
hw->subsystem_device_id = pdev->subsystem_device;
1901
/* Copy the default MAC, PHY and NVM function pointers */
1902
memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1903
memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1904
memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1905
/* Initialize skew-specific constants */
1906
err = ei->get_invariants(hw);
1910
/* setup the private structure */
1911
err = igb_sw_init(adapter);
1915
igb_get_bus_info_pcie(hw);
1917
hw->phy.autoneg_wait_to_complete = false;
1919
/* Copper options */
1920
if (hw->phy.media_type == e1000_media_type_copper) {
1921
hw->phy.mdix = AUTO_ALL_MODES;
1922
hw->phy.disable_polarity_correction = false;
1923
hw->phy.ms_type = e1000_ms_hw_default;
1926
if (igb_check_reset_block(hw))
1927
dev_info(&pdev->dev,
1928
"PHY reset is blocked due to SOL/IDER session.\n");
1931
* features is initialized to 0 in allocation, it might have bits
1932
* set by igb_sw_init so we should use an or instead of an
1935
netdev->features |= NETIF_F_SG |
1942
NETIF_F_HW_VLAN_RX |
1945
/* copy netdev features into list of user selectable features */
1946
netdev->hw_features |= netdev->features;
1948
/* set this bit last since it cannot be part of hw_features */
1949
netdev->features |= NETIF_F_HW_VLAN_FILTER;
1951
netdev->vlan_features |= NETIF_F_TSO |
1957
if (pci_using_dac) {
1958
netdev->features |= NETIF_F_HIGHDMA;
1959
netdev->vlan_features |= NETIF_F_HIGHDMA;
1962
if (hw->mac.type >= e1000_82576) {
1963
netdev->hw_features |= NETIF_F_SCTP_CSUM;
1964
netdev->features |= NETIF_F_SCTP_CSUM;
1967
netdev->priv_flags |= IFF_UNICAST_FLT;
1969
adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1971
/* before reading the NVM, reset the controller to put the device in a
1972
* known good starting state */
1973
hw->mac.ops.reset_hw(hw);
1975
/* make sure the NVM is good */
1976
if (hw->nvm.ops.validate(hw) < 0) {
1977
dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1982
/* copy the MAC address out of the NVM */
1983
if (hw->mac.ops.read_mac_addr(hw))
1984
dev_err(&pdev->dev, "NVM Read Error\n");
1986
memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1987
memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1989
if (!is_valid_ether_addr(netdev->perm_addr)) {
1990
dev_err(&pdev->dev, "Invalid MAC Address\n");
1995
setup_timer(&adapter->watchdog_timer, igb_watchdog,
1996
(unsigned long) adapter);
1997
setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1998
(unsigned long) adapter);
2000
INIT_WORK(&adapter->reset_task, igb_reset_task);
2001
INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2003
/* Initialize link properties that are user-changeable */
2004
adapter->fc_autoneg = true;
2005
hw->mac.autoneg = true;
2006
hw->phy.autoneg_advertised = 0x2f;
2008
hw->fc.requested_mode = e1000_fc_default;
2009
hw->fc.current_mode = e1000_fc_default;
2011
igb_validate_mdi_setting(hw);
2013
/* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2014
* enable the ACPI Magic Packet filter
2017
if (hw->bus.func == 0)
2018
hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2019
else if (hw->mac.type >= e1000_82580)
2020
hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2021
NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2023
else if (hw->bus.func == 1)
2024
hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2026
if (eeprom_data & eeprom_apme_mask)
2027
adapter->eeprom_wol |= E1000_WUFC_MAG;
2029
/* now that we have the eeprom settings, apply the special cases where
2030
* the eeprom may be wrong or the board simply won't support wake on
2031
* lan on a particular port */
2032
switch (pdev->device) {
2033
case E1000_DEV_ID_82575GB_QUAD_COPPER:
2034
adapter->eeprom_wol = 0;
2036
case E1000_DEV_ID_82575EB_FIBER_SERDES:
2037
case E1000_DEV_ID_82576_FIBER:
2038
case E1000_DEV_ID_82576_SERDES:
2039
/* Wake events only supported on port A for dual fiber
2040
* regardless of eeprom setting */
2041
if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2042
adapter->eeprom_wol = 0;
2044
case E1000_DEV_ID_82576_QUAD_COPPER:
2045
case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2046
/* if quad port adapter, disable WoL on all but port A */
2047
if (global_quad_port_a != 0)
2048
adapter->eeprom_wol = 0;
2050
adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2051
/* Reset for multiple quad port adapters */
2052
if (++global_quad_port_a == 4)
2053
global_quad_port_a = 0;
2057
/* initialize the wol settings based on the eeprom settings */
2058
adapter->wol = adapter->eeprom_wol;
2059
device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2061
/* reset the hardware with the new settings */
2064
/* let the f/w know that the h/w is now under the control of the
2066
igb_get_hw_control(adapter);
2068
strcpy(netdev->name, "eth%d");
2069
err = register_netdev(netdev);
2073
/* carrier off reporting is important to ethtool even BEFORE open */
2074
netif_carrier_off(netdev);
2076
#ifdef CONFIG_IGB_DCA
2077
if (dca_add_requester(&pdev->dev) == 0) {
2078
adapter->flags |= IGB_FLAG_DCA_ENABLED;
2079
dev_info(&pdev->dev, "DCA enabled\n");
2080
igb_setup_dca(adapter);
2084
/* do hw tstamp init after resetting */
2085
igb_init_hw_timer(adapter);
2087
dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2088
/* print bus type/speed/width info */
2089
dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2091
((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2092
(hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2094
((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2095
(hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2096
(hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2100
ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2102
strcpy(part_str, "Unknown");
2103
dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2104
dev_info(&pdev->dev,
2105
"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2106
adapter->msix_entries ? "MSI-X" :
2107
(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2108
adapter->num_rx_queues, adapter->num_tx_queues);
2109
switch (hw->mac.type) {
2111
igb_set_eee_i350(hw);
2119
igb_release_hw_control(adapter);
2121
if (!igb_check_reset_block(hw))
2124
if (hw->flash_address)
2125
iounmap(hw->flash_address);
2127
igb_clear_interrupt_scheme(adapter);
2128
iounmap(hw->hw_addr);
2130
free_netdev(netdev);
2132
pci_release_selected_regions(pdev,
2133
pci_select_bars(pdev, IORESOURCE_MEM));
2136
pci_disable_device(pdev);
2141
* igb_remove - Device Removal Routine
2142
* @pdev: PCI device information struct
2144
* igb_remove is called by the PCI subsystem to alert the driver
2145
* that it should release a PCI device. The could be caused by a
2146
* Hot-Plug event, or because the driver is going to be removed from
2149
static void __devexit igb_remove(struct pci_dev *pdev)
2151
struct net_device *netdev = pci_get_drvdata(pdev);
2152
struct igb_adapter *adapter = netdev_priv(netdev);
2153
struct e1000_hw *hw = &adapter->hw;
2156
* The watchdog timer may be rescheduled, so explicitly
2157
* disable watchdog from being rescheduled.
2159
set_bit(__IGB_DOWN, &adapter->state);
2160
del_timer_sync(&adapter->watchdog_timer);
2161
del_timer_sync(&adapter->phy_info_timer);
2163
cancel_work_sync(&adapter->reset_task);
2164
cancel_work_sync(&adapter->watchdog_task);
2166
#ifdef CONFIG_IGB_DCA
2167
if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2168
dev_info(&pdev->dev, "DCA disabled\n");
2169
dca_remove_requester(&pdev->dev);
2170
adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2171
wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2175
/* Release control of h/w to f/w. If f/w is AMT enabled, this
2176
* would have already happened in close and is redundant. */
2177
igb_release_hw_control(adapter);
2179
unregister_netdev(netdev);
2181
igb_clear_interrupt_scheme(adapter);
2183
#ifdef CONFIG_PCI_IOV
2184
/* reclaim resources allocated to VFs */
2185
if (adapter->vf_data) {
2186
/* disable iov and allow time for transactions to clear */
2187
if (!igb_check_vf_assignment(adapter)) {
2188
pci_disable_sriov(pdev);
2191
dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2194
kfree(adapter->vf_data);
2195
adapter->vf_data = NULL;
2196
wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2199
dev_info(&pdev->dev, "IOV Disabled\n");
2203
iounmap(hw->hw_addr);
2204
if (hw->flash_address)
2205
iounmap(hw->flash_address);
2206
pci_release_selected_regions(pdev,
2207
pci_select_bars(pdev, IORESOURCE_MEM));
2209
kfree(adapter->shadow_vfta);
2210
free_netdev(netdev);
2212
pci_disable_pcie_error_reporting(pdev);
2214
pci_disable_device(pdev);
2218
* igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2219
* @adapter: board private structure to initialize
2221
* This function initializes the vf specific data storage and then attempts to
2222
* allocate the VFs. The reason for ordering it this way is because it is much
2223
* mor expensive time wise to disable SR-IOV than it is to allocate and free
2224
* the memory for the VFs.
2226
static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2228
#ifdef CONFIG_PCI_IOV
2229
struct pci_dev *pdev = adapter->pdev;
2230
int old_vfs = igb_find_enabled_vfs(adapter);
2234
dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2235
"max_vfs setting of %d\n", old_vfs, max_vfs);
2236
adapter->vfs_allocated_count = old_vfs;
2239
if (!adapter->vfs_allocated_count)
2242
adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2243
sizeof(struct vf_data_storage), GFP_KERNEL);
2244
/* if allocation failed then we do not support SR-IOV */
2245
if (!adapter->vf_data) {
2246
adapter->vfs_allocated_count = 0;
2247
dev_err(&pdev->dev, "Unable to allocate memory for VF "
2253
if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2256
dev_info(&pdev->dev, "%d VFs allocated\n",
2257
adapter->vfs_allocated_count);
2258
for (i = 0; i < adapter->vfs_allocated_count; i++)
2259
igb_vf_configure(adapter, i);
2261
/* DMA Coalescing is not supported in IOV mode. */
2262
adapter->flags &= ~IGB_FLAG_DMAC;
2265
kfree(adapter->vf_data);
2266
adapter->vf_data = NULL;
2267
adapter->vfs_allocated_count = 0;
2270
#endif /* CONFIG_PCI_IOV */
2274
* igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2275
* @adapter: board private structure to initialize
2277
* igb_init_hw_timer initializes the function pointer and values for the hw
2278
* timer found in hardware.
2280
static void igb_init_hw_timer(struct igb_adapter *adapter)
2282
struct e1000_hw *hw = &adapter->hw;
2284
switch (hw->mac.type) {
2287
memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2288
adapter->cycles.read = igb_read_clock;
2289
adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2290
adapter->cycles.mult = 1;
2292
* The 82580 timesync updates the system timer every 8ns by 8ns
2293
* and the value cannot be shifted. Instead we need to shift
2294
* the registers to generate a 64bit timer value. As a result
2295
* SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2296
* 24 in order to generate a larger value for synchronization.
2298
adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2299
/* disable system timer temporarily by setting bit 31 */
2300
wr32(E1000_TSAUXC, 0x80000000);
2303
/* Set registers so that rollover occurs soon to test this. */
2304
wr32(E1000_SYSTIMR, 0x00000000);
2305
wr32(E1000_SYSTIML, 0x80000000);
2306
wr32(E1000_SYSTIMH, 0x000000FF);
2309
/* enable system timer by clearing bit 31 */
2310
wr32(E1000_TSAUXC, 0x0);
2313
timecounter_init(&adapter->clock,
2315
ktime_to_ns(ktime_get_real()));
2317
* Synchronize our NIC clock against system wall clock. NIC
2318
* time stamp reading requires ~3us per sample, each sample
2319
* was pretty stable even under load => only require 10
2320
* samples for each offset comparison.
2322
memset(&adapter->compare, 0, sizeof(adapter->compare));
2323
adapter->compare.source = &adapter->clock;
2324
adapter->compare.target = ktime_get_real;
2325
adapter->compare.num_samples = 10;
2326
timecompare_update(&adapter->compare, 0);
2330
* Initialize hardware timer: we keep it running just in case
2331
* that some program needs it later on.
2333
memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334
adapter->cycles.read = igb_read_clock;
2335
adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336
adapter->cycles.mult = 1;
2338
* Scale the NIC clock cycle by a large factor so that
2339
* relatively small clock corrections can be added or
2340
* subtracted at each clock tick. The drawbacks of a large
2341
* factor are a) that the clock register overflows more quickly
2342
* (not such a big deal) and b) that the increment per tick has
2343
* to fit into 24 bits. As a result we need to use a shift of
2344
* 19 so we can fit a value of 16 into the TIMINCA register.
2346
adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2348
(1 << E1000_TIMINCA_16NS_SHIFT) |
2349
(16 << IGB_82576_TSYNC_SHIFT));
2351
/* Set registers so that rollover occurs soon to test this. */
2352
wr32(E1000_SYSTIML, 0x00000000);
2353
wr32(E1000_SYSTIMH, 0xFF800000);
2356
timecounter_init(&adapter->clock,
2358
ktime_to_ns(ktime_get_real()));
2360
* Synchronize our NIC clock against system wall clock. NIC
2361
* time stamp reading requires ~3us per sample, each sample
2362
* was pretty stable even under load => only require 10
2363
* samples for each offset comparison.
2365
memset(&adapter->compare, 0, sizeof(adapter->compare));
2366
adapter->compare.source = &adapter->clock;
2367
adapter->compare.target = ktime_get_real;
2368
adapter->compare.num_samples = 10;
2369
timecompare_update(&adapter->compare, 0);
2372
/* 82575 does not support timesync */
2380
* igb_sw_init - Initialize general software structures (struct igb_adapter)
2381
* @adapter: board private structure to initialize
2383
* igb_sw_init initializes the Adapter private data structure.
2384
* Fields are initialized based on PCI device information and
2385
* OS network device settings (MTU size).
2387
static int __devinit igb_sw_init(struct igb_adapter *adapter)
2389
struct e1000_hw *hw = &adapter->hw;
2390
struct net_device *netdev = adapter->netdev;
2391
struct pci_dev *pdev = adapter->pdev;
2393
pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2395
/* set default ring sizes */
2396
adapter->tx_ring_count = IGB_DEFAULT_TXD;
2397
adapter->rx_ring_count = IGB_DEFAULT_RXD;
2399
/* set default ITR values */
2400
adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2401
adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2403
/* set default work limits */
2404
adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2406
adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2408
adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2412
spin_lock_init(&adapter->stats64_lock);
2413
#ifdef CONFIG_PCI_IOV
2414
switch (hw->mac.type) {
2418
dev_warn(&pdev->dev,
2419
"Maximum of 7 VFs per PF, using max\n");
2420
adapter->vfs_allocated_count = 7;
2422
adapter->vfs_allocated_count = max_vfs;
2427
#endif /* CONFIG_PCI_IOV */
2428
adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2429
/* i350 cannot do RSS and SR-IOV at the same time */
2430
if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2431
adapter->rss_queues = 1;
2434
* if rss_queues > 4 or vfs are going to be allocated with rss_queues
2435
* then we should combine the queues into a queue pair in order to
2436
* conserve interrupts due to limited supply
2438
if ((adapter->rss_queues > 4) ||
2439
((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2440
adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2442
/* Setup and initialize a copy of the hw vlan table array */
2443
adapter->shadow_vfta = kzalloc(sizeof(u32) *
2444
E1000_VLAN_FILTER_TBL_SIZE,
2447
/* This call may decrease the number of queues */
2448
if (igb_init_interrupt_scheme(adapter)) {
2449
dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2453
igb_probe_vfs(adapter);
2455
/* Explicitly disable IRQ since the NIC can be in any state. */
2456
igb_irq_disable(adapter);
2458
if (hw->mac.type == e1000_i350)
2459
adapter->flags &= ~IGB_FLAG_DMAC;
2461
set_bit(__IGB_DOWN, &adapter->state);
2466
* igb_open - Called when a network interface is made active
2467
* @netdev: network interface device structure
2469
* Returns 0 on success, negative value on failure
2471
* The open entry point is called when a network interface is made
2472
* active by the system (IFF_UP). At this point all resources needed
2473
* for transmit and receive operations are allocated, the interrupt
2474
* handler is registered with the OS, the watchdog timer is started,
2475
* and the stack is notified that the interface is ready.
2477
static int igb_open(struct net_device *netdev)
2479
struct igb_adapter *adapter = netdev_priv(netdev);
2480
struct e1000_hw *hw = &adapter->hw;
2484
/* disallow open during test */
2485
if (test_bit(__IGB_TESTING, &adapter->state))
2488
netif_carrier_off(netdev);
2490
/* allocate transmit descriptors */
2491
err = igb_setup_all_tx_resources(adapter);
2495
/* allocate receive descriptors */
2496
err = igb_setup_all_rx_resources(adapter);
2500
igb_power_up_link(adapter);
2502
/* before we allocate an interrupt, we must be ready to handle it.
2503
* Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2504
* as soon as we call pci_request_irq, so we have to setup our
2505
* clean_rx handler before we do so. */
2506
igb_configure(adapter);
2508
err = igb_request_irq(adapter);
2512
/* From here on the code is the same as igb_up() */
2513
clear_bit(__IGB_DOWN, &adapter->state);
2515
for (i = 0; i < adapter->num_q_vectors; i++)
2516
napi_enable(&(adapter->q_vector[i]->napi));
2518
/* Clear any pending interrupts. */
2521
igb_irq_enable(adapter);
2523
/* notify VFs that reset has been completed */
2524
if (adapter->vfs_allocated_count) {
2525
u32 reg_data = rd32(E1000_CTRL_EXT);
2526
reg_data |= E1000_CTRL_EXT_PFRSTD;
2527
wr32(E1000_CTRL_EXT, reg_data);
2530
netif_tx_start_all_queues(netdev);
2532
/* start the watchdog. */
2533
hw->mac.get_link_status = 1;
2534
schedule_work(&adapter->watchdog_task);
2539
igb_release_hw_control(adapter);
2540
igb_power_down_link(adapter);
2541
igb_free_all_rx_resources(adapter);
2543
igb_free_all_tx_resources(adapter);
2551
* igb_close - Disables a network interface
2552
* @netdev: network interface device structure
2554
* Returns 0, this is not allowed to fail
2556
* The close entry point is called when an interface is de-activated
2557
* by the OS. The hardware is still under the driver's control, but
2558
* needs to be disabled. A global MAC reset is issued to stop the
2559
* hardware, and all transmit and receive resources are freed.
2561
static int igb_close(struct net_device *netdev)
2563
struct igb_adapter *adapter = netdev_priv(netdev);
2565
WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2568
igb_free_irq(adapter);
2570
igb_free_all_tx_resources(adapter);
2571
igb_free_all_rx_resources(adapter);
2577
* igb_setup_tx_resources - allocate Tx resources (Descriptors)
2578
* @tx_ring: tx descriptor ring (for a specific queue) to setup
2580
* Return 0 on success, negative on failure
2582
int igb_setup_tx_resources(struct igb_ring *tx_ring)
2584
struct device *dev = tx_ring->dev;
2585
int orig_node = dev_to_node(dev);
2588
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2589
tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2590
if (!tx_ring->tx_buffer_info)
2591
tx_ring->tx_buffer_info = vzalloc(size);
2592
if (!tx_ring->tx_buffer_info)
2595
/* round up to nearest 4K */
2596
tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2597
tx_ring->size = ALIGN(tx_ring->size, 4096);
2599
set_dev_node(dev, tx_ring->numa_node);
2600
tx_ring->desc = dma_alloc_coherent(dev,
2604
set_dev_node(dev, orig_node);
2606
tx_ring->desc = dma_alloc_coherent(dev,
2614
tx_ring->next_to_use = 0;
2615
tx_ring->next_to_clean = 0;
2620
vfree(tx_ring->tx_buffer_info);
2622
"Unable to allocate memory for the transmit descriptor ring\n");
2627
* igb_setup_all_tx_resources - wrapper to allocate Tx resources
2628
* (Descriptors) for all queues
2629
* @adapter: board private structure
2631
* Return 0 on success, negative on failure
2633
static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2635
struct pci_dev *pdev = adapter->pdev;
2638
for (i = 0; i < adapter->num_tx_queues; i++) {
2639
err = igb_setup_tx_resources(adapter->tx_ring[i]);
2642
"Allocation for Tx Queue %u failed\n", i);
2643
for (i--; i >= 0; i--)
2644
igb_free_tx_resources(adapter->tx_ring[i]);
2653
* igb_setup_tctl - configure the transmit control registers
2654
* @adapter: Board private structure
2656
void igb_setup_tctl(struct igb_adapter *adapter)
2658
struct e1000_hw *hw = &adapter->hw;
2661
/* disable queue 0 which is enabled by default on 82575 and 82576 */
2662
wr32(E1000_TXDCTL(0), 0);
2664
/* Program the Transmit Control Register */
2665
tctl = rd32(E1000_TCTL);
2666
tctl &= ~E1000_TCTL_CT;
2667
tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2668
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2670
igb_config_collision_dist(hw);
2672
/* Enable transmits */
2673
tctl |= E1000_TCTL_EN;
2675
wr32(E1000_TCTL, tctl);
2679
* igb_configure_tx_ring - Configure transmit ring after Reset
2680
* @adapter: board private structure
2681
* @ring: tx ring to configure
2683
* Configure a transmit ring after a reset.
2685
void igb_configure_tx_ring(struct igb_adapter *adapter,
2686
struct igb_ring *ring)
2688
struct e1000_hw *hw = &adapter->hw;
2690
u64 tdba = ring->dma;
2691
int reg_idx = ring->reg_idx;
2693
/* disable the queue */
2694
wr32(E1000_TXDCTL(reg_idx), 0);
2698
wr32(E1000_TDLEN(reg_idx),
2699
ring->count * sizeof(union e1000_adv_tx_desc));
2700
wr32(E1000_TDBAL(reg_idx),
2701
tdba & 0x00000000ffffffffULL);
2702
wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2704
ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2705
wr32(E1000_TDH(reg_idx), 0);
2706
writel(0, ring->tail);
2708
txdctl |= IGB_TX_PTHRESH;
2709
txdctl |= IGB_TX_HTHRESH << 8;
2710
txdctl |= IGB_TX_WTHRESH << 16;
2712
txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2713
wr32(E1000_TXDCTL(reg_idx), txdctl);
2717
* igb_configure_tx - Configure transmit Unit after Reset
2718
* @adapter: board private structure
2720
* Configure the Tx unit of the MAC after a reset.
2722
static void igb_configure_tx(struct igb_adapter *adapter)
2726
for (i = 0; i < adapter->num_tx_queues; i++)
2727
igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2731
* igb_setup_rx_resources - allocate Rx resources (Descriptors)
2732
* @rx_ring: rx descriptor ring (for a specific queue) to setup
2734
* Returns 0 on success, negative on failure
2736
int igb_setup_rx_resources(struct igb_ring *rx_ring)
2738
struct device *dev = rx_ring->dev;
2739
int orig_node = dev_to_node(dev);
2742
size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2743
rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2744
if (!rx_ring->rx_buffer_info)
2745
rx_ring->rx_buffer_info = vzalloc(size);
2746
if (!rx_ring->rx_buffer_info)
2749
desc_len = sizeof(union e1000_adv_rx_desc);
2751
/* Round up to nearest 4K */
2752
rx_ring->size = rx_ring->count * desc_len;
2753
rx_ring->size = ALIGN(rx_ring->size, 4096);
2755
set_dev_node(dev, rx_ring->numa_node);
2756
rx_ring->desc = dma_alloc_coherent(dev,
2760
set_dev_node(dev, orig_node);
2762
rx_ring->desc = dma_alloc_coherent(dev,
2770
rx_ring->next_to_clean = 0;
2771
rx_ring->next_to_use = 0;
2776
vfree(rx_ring->rx_buffer_info);
2777
rx_ring->rx_buffer_info = NULL;
2778
dev_err(dev, "Unable to allocate memory for the receive descriptor"
2784
* igb_setup_all_rx_resources - wrapper to allocate Rx resources
2785
* (Descriptors) for all queues
2786
* @adapter: board private structure
2788
* Return 0 on success, negative on failure
2790
static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2792
struct pci_dev *pdev = adapter->pdev;
2795
for (i = 0; i < adapter->num_rx_queues; i++) {
2796
err = igb_setup_rx_resources(adapter->rx_ring[i]);
2799
"Allocation for Rx Queue %u failed\n", i);
2800
for (i--; i >= 0; i--)
2801
igb_free_rx_resources(adapter->rx_ring[i]);
2810
* igb_setup_mrqc - configure the multiple receive queue control registers
2811
* @adapter: Board private structure
2813
static void igb_setup_mrqc(struct igb_adapter *adapter)
2815
struct e1000_hw *hw = &adapter->hw;
2817
u32 j, num_rx_queues, shift = 0, shift2 = 0;
2822
static const u8 rsshash[40] = {
2823
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2824
0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2825
0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2826
0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2828
/* Fill out hash function seeds */
2829
for (j = 0; j < 10; j++) {
2830
u32 rsskey = rsshash[(j * 4)];
2831
rsskey |= rsshash[(j * 4) + 1] << 8;
2832
rsskey |= rsshash[(j * 4) + 2] << 16;
2833
rsskey |= rsshash[(j * 4) + 3] << 24;
2834
array_wr32(E1000_RSSRK(0), j, rsskey);
2837
num_rx_queues = adapter->rss_queues;
2839
if (adapter->vfs_allocated_count) {
2840
/* 82575 and 82576 supports 2 RSS queues for VMDq */
2841
switch (hw->mac.type) {
2858
if (hw->mac.type == e1000_82575)
2862
for (j = 0; j < (32 * 4); j++) {
2863
reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2865
reta.bytes[j & 3] |= num_rx_queues << shift2;
2867
wr32(E1000_RETA(j >> 2), reta.dword);
2871
* Disable raw packet checksumming so that RSS hash is placed in
2872
* descriptor on writeback. No need to enable TCP/UDP/IP checksum
2873
* offloads as they are enabled by default
2875
rxcsum = rd32(E1000_RXCSUM);
2876
rxcsum |= E1000_RXCSUM_PCSD;
2878
if (adapter->hw.mac.type >= e1000_82576)
2879
/* Enable Receive Checksum Offload for SCTP */
2880
rxcsum |= E1000_RXCSUM_CRCOFL;
2882
/* Don't need to set TUOFL or IPOFL, they default to 1 */
2883
wr32(E1000_RXCSUM, rxcsum);
2885
/* If VMDq is enabled then we set the appropriate mode for that, else
2886
* we default to RSS so that an RSS hash is calculated per packet even
2887
* if we are only using one queue */
2888
if (adapter->vfs_allocated_count) {
2889
if (hw->mac.type > e1000_82575) {
2890
/* Set the default pool for the PF's first queue */
2891
u32 vtctl = rd32(E1000_VT_CTL);
2892
vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2893
E1000_VT_CTL_DISABLE_DEF_POOL);
2894
vtctl |= adapter->vfs_allocated_count <<
2895
E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2896
wr32(E1000_VT_CTL, vtctl);
2898
if (adapter->rss_queues > 1)
2899
mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2901
mrqc = E1000_MRQC_ENABLE_VMDQ;
2903
mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2905
igb_vmm_control(adapter);
2908
* Generate RSS hash based on TCP port numbers and/or
2909
* IPv4/v6 src and dst addresses since UDP cannot be
2910
* hashed reliably due to IP fragmentation
2912
mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2913
E1000_MRQC_RSS_FIELD_IPV4_TCP |
2914
E1000_MRQC_RSS_FIELD_IPV6 |
2915
E1000_MRQC_RSS_FIELD_IPV6_TCP |
2916
E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2918
wr32(E1000_MRQC, mrqc);
2922
* igb_setup_rctl - configure the receive control registers
2923
* @adapter: Board private structure
2925
void igb_setup_rctl(struct igb_adapter *adapter)
2927
struct e1000_hw *hw = &adapter->hw;
2930
rctl = rd32(E1000_RCTL);
2932
rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2933
rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2935
rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2936
(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2939
* enable stripping of CRC. It's unlikely this will break BMC
2940
* redirection as it did with e1000. Newer features require
2941
* that the HW strips the CRC.
2943
rctl |= E1000_RCTL_SECRC;
2945
/* disable store bad packets and clear size bits. */
2946
rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2948
/* enable LPE to prevent packets larger than max_frame_size */
2949
rctl |= E1000_RCTL_LPE;
2951
/* disable queue 0 to prevent tail write w/o re-config */
2952
wr32(E1000_RXDCTL(0), 0);
2954
/* Attention!!! For SR-IOV PF driver operations you must enable
2955
* queue drop for all VF and PF queues to prevent head of line blocking
2956
* if an un-trusted VF does not provide descriptors to hardware.
2958
if (adapter->vfs_allocated_count) {
2959
/* set all queue drop enable bits */
2960
wr32(E1000_QDE, ALL_QUEUES);
2963
wr32(E1000_RCTL, rctl);
2966
static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2969
struct e1000_hw *hw = &adapter->hw;
2972
/* if it isn't the PF check to see if VFs are enabled and
2973
* increase the size to support vlan tags */
2974
if (vfn < adapter->vfs_allocated_count &&
2975
adapter->vf_data[vfn].vlans_enabled)
2976
size += VLAN_TAG_SIZE;
2978
vmolr = rd32(E1000_VMOLR(vfn));
2979
vmolr &= ~E1000_VMOLR_RLPML_MASK;
2980
vmolr |= size | E1000_VMOLR_LPE;
2981
wr32(E1000_VMOLR(vfn), vmolr);
2987
* igb_rlpml_set - set maximum receive packet size
2988
* @adapter: board private structure
2990
* Configure maximum receivable packet size.
2992
static void igb_rlpml_set(struct igb_adapter *adapter)
2994
u32 max_frame_size = adapter->max_frame_size;
2995
struct e1000_hw *hw = &adapter->hw;
2996
u16 pf_id = adapter->vfs_allocated_count;
2999
igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3001
* If we're in VMDQ or SR-IOV mode, then set global RLPML
3002
* to our max jumbo frame size, in case we need to enable
3003
* jumbo frames on one of the rings later.
3004
* This will not pass over-length frames into the default
3005
* queue because it's gated by the VMOLR.RLPML.
3007
max_frame_size = MAX_JUMBO_FRAME_SIZE;
3010
wr32(E1000_RLPML, max_frame_size);
3013
static inline void igb_set_vmolr(struct igb_adapter *adapter,
3016
struct e1000_hw *hw = &adapter->hw;
3020
* This register exists only on 82576 and newer so if we are older then
3021
* we should exit and do nothing
3023
if (hw->mac.type < e1000_82576)
3026
vmolr = rd32(E1000_VMOLR(vfn));
3027
vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3029
vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3031
vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3033
/* clear all bits that might not be set */
3034
vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3036
if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3037
vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3039
* for VMDq only allow the VFs and pool 0 to accept broadcast and
3042
if (vfn <= adapter->vfs_allocated_count)
3043
vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3045
wr32(E1000_VMOLR(vfn), vmolr);
3049
* igb_configure_rx_ring - Configure a receive ring after Reset
3050
* @adapter: board private structure
3051
* @ring: receive ring to be configured
3053
* Configure the Rx unit of the MAC after a reset.
3055
void igb_configure_rx_ring(struct igb_adapter *adapter,
3056
struct igb_ring *ring)
3058
struct e1000_hw *hw = &adapter->hw;
3059
u64 rdba = ring->dma;
3060
int reg_idx = ring->reg_idx;
3061
u32 srrctl = 0, rxdctl = 0;
3063
/* disable the queue */
3064
wr32(E1000_RXDCTL(reg_idx), 0);
3066
/* Set DMA base address registers */
3067
wr32(E1000_RDBAL(reg_idx),
3068
rdba & 0x00000000ffffffffULL);
3069
wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3070
wr32(E1000_RDLEN(reg_idx),
3071
ring->count * sizeof(union e1000_adv_rx_desc));
3073
/* initialize head and tail */
3074
ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3075
wr32(E1000_RDH(reg_idx), 0);
3076
writel(0, ring->tail);
3078
/* set descriptor configuration */
3079
srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3080
#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3081
srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3083
srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3085
srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3086
if (hw->mac.type >= e1000_82580)
3087
srrctl |= E1000_SRRCTL_TIMESTAMP;
3088
/* Only set Drop Enable if we are supporting multiple queues */
3089
if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3090
srrctl |= E1000_SRRCTL_DROP_EN;
3092
wr32(E1000_SRRCTL(reg_idx), srrctl);
3094
/* set filtering for VMDQ pools */
3095
igb_set_vmolr(adapter, reg_idx & 0x7, true);
3097
rxdctl |= IGB_RX_PTHRESH;
3098
rxdctl |= IGB_RX_HTHRESH << 8;
3099
rxdctl |= IGB_RX_WTHRESH << 16;
3101
/* enable receive descriptor fetching */
3102
rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3103
wr32(E1000_RXDCTL(reg_idx), rxdctl);
3107
* igb_configure_rx - Configure receive Unit after Reset
3108
* @adapter: board private structure
3110
* Configure the Rx unit of the MAC after a reset.
3112
static void igb_configure_rx(struct igb_adapter *adapter)
3116
/* set UTA to appropriate mode */
3117
igb_set_uta(adapter);
3119
/* set the correct pool for the PF default MAC address in entry 0 */
3120
igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3121
adapter->vfs_allocated_count);
3123
/* Setup the HW Rx Head and Tail Descriptor Pointers and
3124
* the Base and Length of the Rx Descriptor Ring */
3125
for (i = 0; i < adapter->num_rx_queues; i++)
3126
igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3130
* igb_free_tx_resources - Free Tx Resources per Queue
3131
* @tx_ring: Tx descriptor ring for a specific queue
3133
* Free all transmit software resources
3135
void igb_free_tx_resources(struct igb_ring *tx_ring)
3137
igb_clean_tx_ring(tx_ring);
3139
vfree(tx_ring->tx_buffer_info);
3140
tx_ring->tx_buffer_info = NULL;
3142
/* if not set, then don't free */
3146
dma_free_coherent(tx_ring->dev, tx_ring->size,
3147
tx_ring->desc, tx_ring->dma);
3149
tx_ring->desc = NULL;
3153
* igb_free_all_tx_resources - Free Tx Resources for All Queues
3154
* @adapter: board private structure
3156
* Free all transmit software resources
3158
static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3162
for (i = 0; i < adapter->num_tx_queues; i++)
3163
igb_free_tx_resources(adapter->tx_ring[i]);
3166
void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3167
struct igb_tx_buffer *tx_buffer)
3169
if (tx_buffer->skb) {
3170
dev_kfree_skb_any(tx_buffer->skb);
3172
dma_unmap_single(ring->dev,
3176
} else if (tx_buffer->dma) {
3177
dma_unmap_page(ring->dev,
3182
tx_buffer->next_to_watch = NULL;
3183
tx_buffer->skb = NULL;
3185
/* buffer_info must be completely set up in the transmit path */
3189
* igb_clean_tx_ring - Free Tx Buffers
3190
* @tx_ring: ring to be cleaned
3192
static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3194
struct igb_tx_buffer *buffer_info;
3198
if (!tx_ring->tx_buffer_info)
3200
/* Free all the Tx ring sk_buffs */
3202
for (i = 0; i < tx_ring->count; i++) {
3203
buffer_info = &tx_ring->tx_buffer_info[i];
3204
igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3207
size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3208
memset(tx_ring->tx_buffer_info, 0, size);
3210
/* Zero out the descriptor ring */
3211
memset(tx_ring->desc, 0, tx_ring->size);
3213
tx_ring->next_to_use = 0;
3214
tx_ring->next_to_clean = 0;
3218
* igb_clean_all_tx_rings - Free Tx Buffers for all queues
3219
* @adapter: board private structure
3221
static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3225
for (i = 0; i < adapter->num_tx_queues; i++)
3226
igb_clean_tx_ring(adapter->tx_ring[i]);
3230
* igb_free_rx_resources - Free Rx Resources
3231
* @rx_ring: ring to clean the resources from
3233
* Free all receive software resources
3235
void igb_free_rx_resources(struct igb_ring *rx_ring)
3237
igb_clean_rx_ring(rx_ring);
3239
vfree(rx_ring->rx_buffer_info);
3240
rx_ring->rx_buffer_info = NULL;
3242
/* if not set, then don't free */
3246
dma_free_coherent(rx_ring->dev, rx_ring->size,
3247
rx_ring->desc, rx_ring->dma);
3249
rx_ring->desc = NULL;
3253
* igb_free_all_rx_resources - Free Rx Resources for All Queues
3254
* @adapter: board private structure
3256
* Free all receive software resources
3258
static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3262
for (i = 0; i < adapter->num_rx_queues; i++)
3263
igb_free_rx_resources(adapter->rx_ring[i]);
3267
* igb_clean_rx_ring - Free Rx Buffers per Queue
3268
* @rx_ring: ring to free buffers from
3270
static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3275
if (!rx_ring->rx_buffer_info)
3278
/* Free all the Rx ring sk_buffs */
3279
for (i = 0; i < rx_ring->count; i++) {
3280
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3281
if (buffer_info->dma) {
3282
dma_unmap_single(rx_ring->dev,
3286
buffer_info->dma = 0;
3289
if (buffer_info->skb) {
3290
dev_kfree_skb(buffer_info->skb);
3291
buffer_info->skb = NULL;
3293
if (buffer_info->page_dma) {
3294
dma_unmap_page(rx_ring->dev,
3295
buffer_info->page_dma,
3298
buffer_info->page_dma = 0;
3300
if (buffer_info->page) {
3301
put_page(buffer_info->page);
3302
buffer_info->page = NULL;
3303
buffer_info->page_offset = 0;
3307
size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3308
memset(rx_ring->rx_buffer_info, 0, size);
3310
/* Zero out the descriptor ring */
3311
memset(rx_ring->desc, 0, rx_ring->size);
3313
rx_ring->next_to_clean = 0;
3314
rx_ring->next_to_use = 0;
3318
* igb_clean_all_rx_rings - Free Rx Buffers for all queues
3319
* @adapter: board private structure
3321
static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3325
for (i = 0; i < adapter->num_rx_queues; i++)
3326
igb_clean_rx_ring(adapter->rx_ring[i]);
3330
* igb_set_mac - Change the Ethernet Address of the NIC
3331
* @netdev: network interface device structure
3332
* @p: pointer to an address structure
3334
* Returns 0 on success, negative on failure
3336
static int igb_set_mac(struct net_device *netdev, void *p)
3338
struct igb_adapter *adapter = netdev_priv(netdev);
3339
struct e1000_hw *hw = &adapter->hw;
3340
struct sockaddr *addr = p;
3342
if (!is_valid_ether_addr(addr->sa_data))
3343
return -EADDRNOTAVAIL;
3345
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3346
memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3348
/* set the correct pool for the new PF MAC address in entry 0 */
3349
igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3350
adapter->vfs_allocated_count);
3356
* igb_write_mc_addr_list - write multicast addresses to MTA
3357
* @netdev: network interface device structure
3359
* Writes multicast address list to the MTA hash table.
3360
* Returns: -ENOMEM on failure
3361
* 0 on no addresses written
3362
* X on writing X addresses to MTA
3364
static int igb_write_mc_addr_list(struct net_device *netdev)
3366
struct igb_adapter *adapter = netdev_priv(netdev);
3367
struct e1000_hw *hw = &adapter->hw;
3368
struct netdev_hw_addr *ha;
3372
if (netdev_mc_empty(netdev)) {
3373
/* nothing to program, so clear mc list */
3374
igb_update_mc_addr_list(hw, NULL, 0);
3375
igb_restore_vf_multicasts(adapter);
3379
mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3383
/* The shared function expects a packed array of only addresses. */
3385
netdev_for_each_mc_addr(ha, netdev)
3386
memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3388
igb_update_mc_addr_list(hw, mta_list, i);
3391
return netdev_mc_count(netdev);
3395
* igb_write_uc_addr_list - write unicast addresses to RAR table
3396
* @netdev: network interface device structure
3398
* Writes unicast address list to the RAR table.
3399
* Returns: -ENOMEM on failure/insufficient address space
3400
* 0 on no addresses written
3401
* X on writing X addresses to the RAR table
3403
static int igb_write_uc_addr_list(struct net_device *netdev)
3405
struct igb_adapter *adapter = netdev_priv(netdev);
3406
struct e1000_hw *hw = &adapter->hw;
3407
unsigned int vfn = adapter->vfs_allocated_count;
3408
unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3411
/* return ENOMEM indicating insufficient memory for addresses */
3412
if (netdev_uc_count(netdev) > rar_entries)
3415
if (!netdev_uc_empty(netdev) && rar_entries) {
3416
struct netdev_hw_addr *ha;
3418
netdev_for_each_uc_addr(ha, netdev) {
3421
igb_rar_set_qsel(adapter, ha->addr,
3427
/* write the addresses in reverse order to avoid write combining */
3428
for (; rar_entries > 0 ; rar_entries--) {
3429
wr32(E1000_RAH(rar_entries), 0);
3430
wr32(E1000_RAL(rar_entries), 0);
3438
* igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3439
* @netdev: network interface device structure
3441
* The set_rx_mode entry point is called whenever the unicast or multicast
3442
* address lists or the network interface flags are updated. This routine is
3443
* responsible for configuring the hardware for proper unicast, multicast,
3444
* promiscuous mode, and all-multi behavior.
3446
static void igb_set_rx_mode(struct net_device *netdev)
3448
struct igb_adapter *adapter = netdev_priv(netdev);
3449
struct e1000_hw *hw = &adapter->hw;
3450
unsigned int vfn = adapter->vfs_allocated_count;
3451
u32 rctl, vmolr = 0;
3454
/* Check for Promiscuous and All Multicast modes */
3455
rctl = rd32(E1000_RCTL);
3457
/* clear the effected bits */
3458
rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3460
if (netdev->flags & IFF_PROMISC) {
3461
rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3462
vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3464
if (netdev->flags & IFF_ALLMULTI) {
3465
rctl |= E1000_RCTL_MPE;
3466
vmolr |= E1000_VMOLR_MPME;
3469
* Write addresses to the MTA, if the attempt fails
3470
* then we should just turn on promiscuous mode so
3471
* that we can at least receive multicast traffic
3473
count = igb_write_mc_addr_list(netdev);
3475
rctl |= E1000_RCTL_MPE;
3476
vmolr |= E1000_VMOLR_MPME;
3478
vmolr |= E1000_VMOLR_ROMPE;
3482
* Write addresses to available RAR registers, if there is not
3483
* sufficient space to store all the addresses then enable
3484
* unicast promiscuous mode
3486
count = igb_write_uc_addr_list(netdev);
3488
rctl |= E1000_RCTL_UPE;
3489
vmolr |= E1000_VMOLR_ROPE;
3491
rctl |= E1000_RCTL_VFE;
3493
wr32(E1000_RCTL, rctl);
3496
* In order to support SR-IOV and eventually VMDq it is necessary to set
3497
* the VMOLR to enable the appropriate modes. Without this workaround
3498
* we will have issues with VLAN tag stripping not being done for frames
3499
* that are only arriving because we are the default pool
3501
if (hw->mac.type < e1000_82576)
3504
vmolr |= rd32(E1000_VMOLR(vfn)) &
3505
~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3506
wr32(E1000_VMOLR(vfn), vmolr);
3507
igb_restore_vf_multicasts(adapter);
3510
static void igb_check_wvbr(struct igb_adapter *adapter)
3512
struct e1000_hw *hw = &adapter->hw;
3515
switch (hw->mac.type) {
3518
if (!(wvbr = rd32(E1000_WVBR)))
3525
adapter->wvbr |= wvbr;
3528
#define IGB_STAGGERED_QUEUE_OFFSET 8
3530
static void igb_spoof_check(struct igb_adapter *adapter)
3537
for(j = 0; j < adapter->vfs_allocated_count; j++) {
3538
if (adapter->wvbr & (1 << j) ||
3539
adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3540
dev_warn(&adapter->pdev->dev,
3541
"Spoof event(s) detected on VF %d\n", j);
3544
(1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3549
/* Need to wait a few seconds after link up to get diagnostic information from
3551
static void igb_update_phy_info(unsigned long data)
3553
struct igb_adapter *adapter = (struct igb_adapter *) data;
3554
igb_get_phy_info(&adapter->hw);
3558
* igb_has_link - check shared code for link and determine up/down
3559
* @adapter: pointer to driver private info
3561
bool igb_has_link(struct igb_adapter *adapter)
3563
struct e1000_hw *hw = &adapter->hw;
3564
bool link_active = false;
3567
/* get_link_status is set on LSC (link status) interrupt or
3568
* rx sequence error interrupt. get_link_status will stay
3569
* false until the e1000_check_for_link establishes link
3570
* for copper adapters ONLY
3572
switch (hw->phy.media_type) {
3573
case e1000_media_type_copper:
3574
if (hw->mac.get_link_status) {
3575
ret_val = hw->mac.ops.check_for_link(hw);
3576
link_active = !hw->mac.get_link_status;
3581
case e1000_media_type_internal_serdes:
3582
ret_val = hw->mac.ops.check_for_link(hw);
3583
link_active = hw->mac.serdes_has_link;
3586
case e1000_media_type_unknown:
3593
static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3596
u32 ctrl_ext, thstat;
3598
/* check for thermal sensor event on i350, copper only */
3599
if (hw->mac.type == e1000_i350) {
3600
thstat = rd32(E1000_THSTAT);
3601
ctrl_ext = rd32(E1000_CTRL_EXT);
3603
if ((hw->phy.media_type == e1000_media_type_copper) &&
3604
!(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3605
ret = !!(thstat & event);
3613
* igb_watchdog - Timer Call-back
3614
* @data: pointer to adapter cast into an unsigned long
3616
static void igb_watchdog(unsigned long data)
3618
struct igb_adapter *adapter = (struct igb_adapter *)data;
3619
/* Do the rest outside of interrupt context */
3620
schedule_work(&adapter->watchdog_task);
3623
static void igb_watchdog_task(struct work_struct *work)
3625
struct igb_adapter *adapter = container_of(work,
3628
struct e1000_hw *hw = &adapter->hw;
3629
struct net_device *netdev = adapter->netdev;
3633
link = igb_has_link(adapter);
3635
if (!netif_carrier_ok(netdev)) {
3637
hw->mac.ops.get_speed_and_duplex(hw,
3638
&adapter->link_speed,
3639
&adapter->link_duplex);
3641
ctrl = rd32(E1000_CTRL);
3642
/* Links status message must follow this format */
3643
printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3644
"Flow Control: %s\n",
3646
adapter->link_speed,
3647
adapter->link_duplex == FULL_DUPLEX ?
3648
"Full Duplex" : "Half Duplex",
3649
((ctrl & E1000_CTRL_TFCE) &&
3650
(ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3651
((ctrl & E1000_CTRL_RFCE) ? "RX" :
3652
((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3654
/* check for thermal sensor event */
3655
if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3656
printk(KERN_INFO "igb: %s The network adapter "
3657
"link speed was downshifted "
3658
"because it overheated.\n",
3662
/* adjust timeout factor according to speed/duplex */
3663
adapter->tx_timeout_factor = 1;
3664
switch (adapter->link_speed) {
3666
adapter->tx_timeout_factor = 14;
3669
/* maybe add some timeout factor ? */
3673
netif_carrier_on(netdev);
3675
igb_ping_all_vfs(adapter);
3676
igb_check_vf_rate_limit(adapter);
3678
/* link state has changed, schedule phy info update */
3679
if (!test_bit(__IGB_DOWN, &adapter->state))
3680
mod_timer(&adapter->phy_info_timer,
3681
round_jiffies(jiffies + 2 * HZ));
3684
if (netif_carrier_ok(netdev)) {
3685
adapter->link_speed = 0;
3686
adapter->link_duplex = 0;
3688
/* check for thermal sensor event */
3689
if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3690
printk(KERN_ERR "igb: %s The network adapter "
3691
"was stopped because it "
3696
/* Links status message must follow this format */
3697
printk(KERN_INFO "igb: %s NIC Link is Down\n",
3699
netif_carrier_off(netdev);
3701
igb_ping_all_vfs(adapter);
3703
/* link state has changed, schedule phy info update */
3704
if (!test_bit(__IGB_DOWN, &adapter->state))
3705
mod_timer(&adapter->phy_info_timer,
3706
round_jiffies(jiffies + 2 * HZ));
3710
spin_lock(&adapter->stats64_lock);
3711
igb_update_stats(adapter, &adapter->stats64);
3712
spin_unlock(&adapter->stats64_lock);
3714
for (i = 0; i < adapter->num_tx_queues; i++) {
3715
struct igb_ring *tx_ring = adapter->tx_ring[i];
3716
if (!netif_carrier_ok(netdev)) {
3717
/* We've lost link, so the controller stops DMA,
3718
* but we've got queued Tx work that's never going
3719
* to get done, so reset controller to flush Tx.
3720
* (Do the reset outside of interrupt context). */
3721
if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3722
adapter->tx_timeout_count++;
3723
schedule_work(&adapter->reset_task);
3724
/* return immediately since reset is imminent */
3729
/* Force detection of hung controller every watchdog period */
3730
set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3733
/* Cause software interrupt to ensure rx ring is cleaned */
3734
if (adapter->msix_entries) {
3736
for (i = 0; i < adapter->num_q_vectors; i++)
3737
eics |= adapter->q_vector[i]->eims_value;
3738
wr32(E1000_EICS, eics);
3740
wr32(E1000_ICS, E1000_ICS_RXDMT0);
3743
igb_spoof_check(adapter);
3745
/* Reset the timer */
3746
if (!test_bit(__IGB_DOWN, &adapter->state))
3747
mod_timer(&adapter->watchdog_timer,
3748
round_jiffies(jiffies + 2 * HZ));
3751
enum latency_range {
3755
latency_invalid = 255
3759
* igb_update_ring_itr - update the dynamic ITR value based on packet size
3761
* Stores a new ITR value based on strictly on packet size. This
3762
* algorithm is less sophisticated than that used in igb_update_itr,
3763
* due to the difficulty of synchronizing statistics across multiple
3764
* receive rings. The divisors and thresholds used by this function
3765
* were determined based on theoretical maximum wire speed and testing
3766
* data, in order to minimize response time while increasing bulk
3768
* This functionality is controlled by the InterruptThrottleRate module
3769
* parameter (see igb_param.c)
3770
* NOTE: This function is called only when operating in a multiqueue
3771
* receive environment.
3772
* @q_vector: pointer to q_vector
3774
static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3776
int new_val = q_vector->itr_val;
3777
int avg_wire_size = 0;
3778
struct igb_adapter *adapter = q_vector->adapter;
3779
unsigned int packets;
3781
/* For non-gigabit speeds, just fix the interrupt rate at 4000
3782
* ints/sec - ITR timer value of 120 ticks.
3784
if (adapter->link_speed != SPEED_1000) {
3785
new_val = IGB_4K_ITR;
3789
packets = q_vector->rx.total_packets;
3791
avg_wire_size = q_vector->rx.total_bytes / packets;
3793
packets = q_vector->tx.total_packets;
3795
avg_wire_size = max_t(u32, avg_wire_size,
3796
q_vector->tx.total_bytes / packets);
3798
/* if avg_wire_size isn't set no work was done */
3802
/* Add 24 bytes to size to account for CRC, preamble, and gap */
3803
avg_wire_size += 24;
3805
/* Don't starve jumbo frames */
3806
avg_wire_size = min(avg_wire_size, 3000);
3808
/* Give a little boost to mid-size frames */
3809
if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3810
new_val = avg_wire_size / 3;
3812
new_val = avg_wire_size / 2;
3814
/* conservative mode (itr 3) eliminates the lowest_latency setting */
3815
if (new_val < IGB_20K_ITR &&
3816
((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3817
(!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3818
new_val = IGB_20K_ITR;
3821
if (new_val != q_vector->itr_val) {
3822
q_vector->itr_val = new_val;
3823
q_vector->set_itr = 1;
3826
q_vector->rx.total_bytes = 0;
3827
q_vector->rx.total_packets = 0;
3828
q_vector->tx.total_bytes = 0;
3829
q_vector->tx.total_packets = 0;
3833
* igb_update_itr - update the dynamic ITR value based on statistics
3834
* Stores a new ITR value based on packets and byte
3835
* counts during the last interrupt. The advantage of per interrupt
3836
* computation is faster updates and more accurate ITR for the current
3837
* traffic pattern. Constants in this function were computed
3838
* based on theoretical maximum wire speed and thresholds were set based
3839
* on testing data as well as attempting to minimize response time
3840
* while increasing bulk throughput.
3841
* this functionality is controlled by the InterruptThrottleRate module
3842
* parameter (see igb_param.c)
3843
* NOTE: These calculations are only valid when operating in a single-
3844
* queue environment.
3845
* @q_vector: pointer to q_vector
3846
* @ring_container: ring info to update the itr for
3848
static void igb_update_itr(struct igb_q_vector *q_vector,
3849
struct igb_ring_container *ring_container)
3851
unsigned int packets = ring_container->total_packets;
3852
unsigned int bytes = ring_container->total_bytes;
3853
u8 itrval = ring_container->itr;
3855
/* no packets, exit with status unchanged */
3860
case lowest_latency:
3861
/* handle TSO and jumbo frames */
3862
if (bytes/packets > 8000)
3863
itrval = bulk_latency;
3864
else if ((packets < 5) && (bytes > 512))
3865
itrval = low_latency;
3867
case low_latency: /* 50 usec aka 20000 ints/s */
3868
if (bytes > 10000) {
3869
/* this if handles the TSO accounting */
3870
if (bytes/packets > 8000) {
3871
itrval = bulk_latency;
3872
} else if ((packets < 10) || ((bytes/packets) > 1200)) {
3873
itrval = bulk_latency;
3874
} else if ((packets > 35)) {
3875
itrval = lowest_latency;
3877
} else if (bytes/packets > 2000) {
3878
itrval = bulk_latency;
3879
} else if (packets <= 2 && bytes < 512) {
3880
itrval = lowest_latency;
3883
case bulk_latency: /* 250 usec aka 4000 ints/s */
3884
if (bytes > 25000) {
3886
itrval = low_latency;
3887
} else if (bytes < 1500) {
3888
itrval = low_latency;
3893
/* clear work counters since we have the values we need */
3894
ring_container->total_bytes = 0;
3895
ring_container->total_packets = 0;
3897
/* write updated itr to ring container */
3898
ring_container->itr = itrval;
3901
static void igb_set_itr(struct igb_q_vector *q_vector)
3903
struct igb_adapter *adapter = q_vector->adapter;
3904
u32 new_itr = q_vector->itr_val;
3907
/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3908
if (adapter->link_speed != SPEED_1000) {
3910
new_itr = IGB_4K_ITR;
3914
igb_update_itr(q_vector, &q_vector->tx);
3915
igb_update_itr(q_vector, &q_vector->rx);
3917
current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3919
/* conservative mode (itr 3) eliminates the lowest_latency setting */
3920
if (current_itr == lowest_latency &&
3921
((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3922
(!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3923
current_itr = low_latency;
3925
switch (current_itr) {
3926
/* counts and packets in update_itr are dependent on these numbers */
3927
case lowest_latency:
3928
new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
3931
new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
3934
new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
3941
if (new_itr != q_vector->itr_val) {
3942
/* this attempts to bias the interrupt rate towards Bulk
3943
* by adding intermediate steps when interrupt rate is
3945
new_itr = new_itr > q_vector->itr_val ?
3946
max((new_itr * q_vector->itr_val) /
3947
(new_itr + (q_vector->itr_val >> 2)),
3950
/* Don't write the value here; it resets the adapter's
3951
* internal timer, and causes us to delay far longer than
3952
* we should between interrupts. Instead, we write the ITR
3953
* value at the beginning of the next interrupt so the timing
3954
* ends up being correct.
3956
q_vector->itr_val = new_itr;
3957
q_vector->set_itr = 1;
3961
void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
3962
u32 type_tucmd, u32 mss_l4len_idx)
3964
struct e1000_adv_tx_context_desc *context_desc;
3965
u16 i = tx_ring->next_to_use;
3967
context_desc = IGB_TX_CTXTDESC(tx_ring, i);
3970
tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
3972
/* set bits to identify this as an advanced context descriptor */
3973
type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3975
/* For 82575, context index must be unique per ring. */
3976
if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
3977
mss_l4len_idx |= tx_ring->reg_idx << 4;
3979
context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
3980
context_desc->seqnum_seed = 0;
3981
context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
3982
context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3985
static int igb_tso(struct igb_ring *tx_ring,
3986
struct igb_tx_buffer *first,
3989
struct sk_buff *skb = first->skb;
3990
u32 vlan_macip_lens, type_tucmd;
3991
u32 mss_l4len_idx, l4len;
3993
if (!skb_is_gso(skb))
3996
if (skb_header_cloned(skb)) {
3997
int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4002
/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4003
type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4005
if (first->protocol == __constant_htons(ETH_P_IP)) {
4006
struct iphdr *iph = ip_hdr(skb);
4009
tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4013
type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4014
first->tx_flags |= IGB_TX_FLAGS_TSO |
4017
} else if (skb_is_gso_v6(skb)) {
4018
ipv6_hdr(skb)->payload_len = 0;
4019
tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4020
&ipv6_hdr(skb)->daddr,
4022
first->tx_flags |= IGB_TX_FLAGS_TSO |
4026
/* compute header lengths */
4027
l4len = tcp_hdrlen(skb);
4028
*hdr_len = skb_transport_offset(skb) + l4len;
4030
/* update gso size and bytecount with header size */
4031
first->gso_segs = skb_shinfo(skb)->gso_segs;
4032
first->bytecount += (first->gso_segs - 1) * *hdr_len;
4035
mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4036
mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4038
/* VLAN MACLEN IPLEN */
4039
vlan_macip_lens = skb_network_header_len(skb);
4040
vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4041
vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4043
igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4048
static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4050
struct sk_buff *skb = first->skb;
4051
u32 vlan_macip_lens = 0;
4052
u32 mss_l4len_idx = 0;
4055
if (skb->ip_summed != CHECKSUM_PARTIAL) {
4056
if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4060
switch (first->protocol) {
4061
case __constant_htons(ETH_P_IP):
4062
vlan_macip_lens |= skb_network_header_len(skb);
4063
type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4064
l4_hdr = ip_hdr(skb)->protocol;
4066
case __constant_htons(ETH_P_IPV6):
4067
vlan_macip_lens |= skb_network_header_len(skb);
4068
l4_hdr = ipv6_hdr(skb)->nexthdr;
4071
if (unlikely(net_ratelimit())) {
4072
dev_warn(tx_ring->dev,
4073
"partial checksum but proto=%x!\n",
4081
type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4082
mss_l4len_idx = tcp_hdrlen(skb) <<
4083
E1000_ADVTXD_L4LEN_SHIFT;
4086
type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4087
mss_l4len_idx = sizeof(struct sctphdr) <<
4088
E1000_ADVTXD_L4LEN_SHIFT;
4091
mss_l4len_idx = sizeof(struct udphdr) <<
4092
E1000_ADVTXD_L4LEN_SHIFT;
4095
if (unlikely(net_ratelimit())) {
4096
dev_warn(tx_ring->dev,
4097
"partial checksum but l4 proto=%x!\n",
4103
/* update TX checksum flag */
4104
first->tx_flags |= IGB_TX_FLAGS_CSUM;
4107
vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4108
vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4110
igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4113
static __le32 igb_tx_cmd_type(u32 tx_flags)
4115
/* set type for advanced descriptor with frame checksum insertion */
4116
__le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4117
E1000_ADVTXD_DCMD_IFCS |
4118
E1000_ADVTXD_DCMD_DEXT);
4120
/* set HW vlan bit if vlan is present */
4121
if (tx_flags & IGB_TX_FLAGS_VLAN)
4122
cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4124
/* set timestamp bit if present */
4125
if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4126
cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4128
/* set segmentation bits for TSO */
4129
if (tx_flags & IGB_TX_FLAGS_TSO)
4130
cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4135
static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4136
union e1000_adv_tx_desc *tx_desc,
4137
u32 tx_flags, unsigned int paylen)
4139
u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4141
/* 82575 requires a unique index per ring if any offload is enabled */
4142
if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4143
test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4144
olinfo_status |= tx_ring->reg_idx << 4;
4146
/* insert L4 checksum */
4147
if (tx_flags & IGB_TX_FLAGS_CSUM) {
4148
olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4150
/* insert IPv4 checksum */
4151
if (tx_flags & IGB_TX_FLAGS_IPV4)
4152
olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4155
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4159
* The largest size we can write to the descriptor is 65535. In order to
4160
* maintain a power of two alignment we have to limit ourselves to 32K.
4162
#define IGB_MAX_TXD_PWR 15
4163
#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4165
static void igb_tx_map(struct igb_ring *tx_ring,
4166
struct igb_tx_buffer *first,
4169
struct sk_buff *skb = first->skb;
4170
struct igb_tx_buffer *tx_buffer_info;
4171
union e1000_adv_tx_desc *tx_desc;
4173
struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4174
unsigned int data_len = skb->data_len;
4175
unsigned int size = skb_headlen(skb);
4176
unsigned int paylen = skb->len - hdr_len;
4178
u32 tx_flags = first->tx_flags;
4179
u16 i = tx_ring->next_to_use;
4181
tx_desc = IGB_TX_DESC(tx_ring, i);
4183
igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4184
cmd_type = igb_tx_cmd_type(tx_flags);
4186
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4187
if (dma_mapping_error(tx_ring->dev, dma))
4190
/* record length, and DMA address */
4191
first->length = size;
4193
tx_desc->read.buffer_addr = cpu_to_le64(dma);
4196
while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4197
tx_desc->read.cmd_type_len =
4198
cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4202
if (i == tx_ring->count) {
4203
tx_desc = IGB_TX_DESC(tx_ring, 0);
4207
dma += IGB_MAX_DATA_PER_TXD;
4208
size -= IGB_MAX_DATA_PER_TXD;
4210
tx_desc->read.olinfo_status = 0;
4211
tx_desc->read.buffer_addr = cpu_to_le64(dma);
4214
if (likely(!data_len))
4217
tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4221
if (i == tx_ring->count) {
4222
tx_desc = IGB_TX_DESC(tx_ring, 0);
4226
size = skb_frag_size(frag);
4229
dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4230
size, DMA_TO_DEVICE);
4231
if (dma_mapping_error(tx_ring->dev, dma))
4234
tx_buffer_info = &tx_ring->tx_buffer_info[i];
4235
tx_buffer_info->length = size;
4236
tx_buffer_info->dma = dma;
4238
tx_desc->read.olinfo_status = 0;
4239
tx_desc->read.buffer_addr = cpu_to_le64(dma);
4244
/* write last descriptor with RS and EOP bits */
4245
cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4246
tx_desc->read.cmd_type_len = cmd_type;
4248
/* set the timestamp */
4249
first->time_stamp = jiffies;
4252
* Force memory writes to complete before letting h/w know there
4253
* are new descriptors to fetch. (Only applicable for weak-ordered
4254
* memory model archs, such as IA-64).
4256
* We also need this memory barrier to make certain all of the
4257
* status bits have been updated before next_to_watch is written.
4261
/* set next_to_watch value indicating a packet is present */
4262
first->next_to_watch = tx_desc;
4265
if (i == tx_ring->count)
4268
tx_ring->next_to_use = i;
4270
writel(i, tx_ring->tail);
4272
/* we need this if more than one processor can write to our tail
4273
* at a time, it syncronizes IO on IA64/Altix systems */
4279
dev_err(tx_ring->dev, "TX DMA map failed\n");
4281
/* clear dma mappings for failed tx_buffer_info map */
4283
tx_buffer_info = &tx_ring->tx_buffer_info[i];
4284
igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4285
if (tx_buffer_info == first)
4292
tx_ring->next_to_use = i;
4295
static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4297
struct net_device *netdev = tx_ring->netdev;
4299
netif_stop_subqueue(netdev, tx_ring->queue_index);
4301
/* Herbert's original patch had:
4302
* smp_mb__after_netif_stop_queue();
4303
* but since that doesn't exist yet, just open code it. */
4306
/* We need to check again in a case another CPU has just
4307
* made room available. */
4308
if (igb_desc_unused(tx_ring) < size)
4312
netif_wake_subqueue(netdev, tx_ring->queue_index);
4314
u64_stats_update_begin(&tx_ring->tx_syncp2);
4315
tx_ring->tx_stats.restart_queue2++;
4316
u64_stats_update_end(&tx_ring->tx_syncp2);
4321
static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4323
if (igb_desc_unused(tx_ring) >= size)
4325
return __igb_maybe_stop_tx(tx_ring, size);
4328
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4329
struct igb_ring *tx_ring)
4331
struct igb_tx_buffer *first;
4334
__be16 protocol = vlan_get_protocol(skb);
4337
/* need: 1 descriptor per page,
4338
* + 2 desc gap to keep tail from touching head,
4339
* + 1 desc for skb->data,
4340
* + 1 desc for context descriptor,
4341
* otherwise try next time */
4342
if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4343
/* this is a hard error */
4344
return NETDEV_TX_BUSY;
4347
/* record the location of the first descriptor for this packet */
4348
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4350
first->bytecount = skb->len;
4351
first->gso_segs = 1;
4353
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4354
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4355
tx_flags |= IGB_TX_FLAGS_TSTAMP;
4358
if (vlan_tx_tag_present(skb)) {
4359
tx_flags |= IGB_TX_FLAGS_VLAN;
4360
tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4363
/* record initial flags and protocol */
4364
first->tx_flags = tx_flags;
4365
first->protocol = protocol;
4367
tso = igb_tso(tx_ring, first, &hdr_len);
4371
igb_tx_csum(tx_ring, first);
4373
igb_tx_map(tx_ring, first, hdr_len);
4375
/* Make sure there is space in the ring for the next send. */
4376
igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4378
return NETDEV_TX_OK;
4381
igb_unmap_and_free_tx_resource(tx_ring, first);
4383
return NETDEV_TX_OK;
4386
static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4387
struct sk_buff *skb)
4389
unsigned int r_idx = skb->queue_mapping;
4391
if (r_idx >= adapter->num_tx_queues)
4392
r_idx = r_idx % adapter->num_tx_queues;
4394
return adapter->tx_ring[r_idx];
4397
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4398
struct net_device *netdev)
4400
struct igb_adapter *adapter = netdev_priv(netdev);
4402
if (test_bit(__IGB_DOWN, &adapter->state)) {
4403
dev_kfree_skb_any(skb);
4404
return NETDEV_TX_OK;
4407
if (skb->len <= 0) {
4408
dev_kfree_skb_any(skb);
4409
return NETDEV_TX_OK;
4413
* The minimum packet size with TCTL.PSP set is 17 so pad the skb
4414
* in order to meet this minimum size requirement.
4416
if (skb->len < 17) {
4417
if (skb_padto(skb, 17))
4418
return NETDEV_TX_OK;
4422
return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4426
* igb_tx_timeout - Respond to a Tx Hang
4427
* @netdev: network interface device structure
4429
static void igb_tx_timeout(struct net_device *netdev)
4431
struct igb_adapter *adapter = netdev_priv(netdev);
4432
struct e1000_hw *hw = &adapter->hw;
4434
/* Do the reset outside of interrupt context */
4435
adapter->tx_timeout_count++;
4437
if (hw->mac.type >= e1000_82580)
4438
hw->dev_spec._82575.global_device_reset = true;
4440
schedule_work(&adapter->reset_task);
4442
(adapter->eims_enable_mask & ~adapter->eims_other));
4445
static void igb_reset_task(struct work_struct *work)
4447
struct igb_adapter *adapter;
4448
adapter = container_of(work, struct igb_adapter, reset_task);
4451
netdev_err(adapter->netdev, "Reset adapter\n");
4452
igb_reinit_locked(adapter);
4456
* igb_get_stats64 - Get System Network Statistics
4457
* @netdev: network interface device structure
4458
* @stats: rtnl_link_stats64 pointer
4461
static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4462
struct rtnl_link_stats64 *stats)
4464
struct igb_adapter *adapter = netdev_priv(netdev);
4466
spin_lock(&adapter->stats64_lock);
4467
igb_update_stats(adapter, &adapter->stats64);
4468
memcpy(stats, &adapter->stats64, sizeof(*stats));
4469
spin_unlock(&adapter->stats64_lock);
4475
* igb_change_mtu - Change the Maximum Transfer Unit
4476
* @netdev: network interface device structure
4477
* @new_mtu: new value for maximum frame size
4479
* Returns 0 on success, negative on failure
4481
static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4483
struct igb_adapter *adapter = netdev_priv(netdev);
4484
struct pci_dev *pdev = adapter->pdev;
4485
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4487
if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4488
dev_err(&pdev->dev, "Invalid MTU setting\n");
4492
#define MAX_STD_JUMBO_FRAME_SIZE 9238
4493
if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4494
dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4498
while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4501
/* igb_down has a dependency on max_frame_size */
4502
adapter->max_frame_size = max_frame;
4504
if (netif_running(netdev))
4507
dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4508
netdev->mtu, new_mtu);
4509
netdev->mtu = new_mtu;
4511
if (netif_running(netdev))
4516
clear_bit(__IGB_RESETTING, &adapter->state);
4522
* igb_update_stats - Update the board statistics counters
4523
* @adapter: board private structure
4526
void igb_update_stats(struct igb_adapter *adapter,
4527
struct rtnl_link_stats64 *net_stats)
4529
struct e1000_hw *hw = &adapter->hw;
4530
struct pci_dev *pdev = adapter->pdev;
4536
u64 _bytes, _packets;
4538
#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4541
* Prevent stats update while adapter is being reset, or if the pci
4542
* connection is down.
4544
if (adapter->link_speed == 0)
4546
if (pci_channel_offline(pdev))
4551
for (i = 0; i < adapter->num_rx_queues; i++) {
4552
u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4553
struct igb_ring *ring = adapter->rx_ring[i];
4555
ring->rx_stats.drops += rqdpc_tmp;
4556
net_stats->rx_fifo_errors += rqdpc_tmp;
4559
start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4560
_bytes = ring->rx_stats.bytes;
4561
_packets = ring->rx_stats.packets;
4562
} while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4564
packets += _packets;
4567
net_stats->rx_bytes = bytes;
4568
net_stats->rx_packets = packets;
4572
for (i = 0; i < adapter->num_tx_queues; i++) {
4573
struct igb_ring *ring = adapter->tx_ring[i];
4575
start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4576
_bytes = ring->tx_stats.bytes;
4577
_packets = ring->tx_stats.packets;
4578
} while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4580
packets += _packets;
4582
net_stats->tx_bytes = bytes;
4583
net_stats->tx_packets = packets;
4585
/* read stats registers */
4586
adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4587
adapter->stats.gprc += rd32(E1000_GPRC);
4588
adapter->stats.gorc += rd32(E1000_GORCL);
4589
rd32(E1000_GORCH); /* clear GORCL */
4590
adapter->stats.bprc += rd32(E1000_BPRC);
4591
adapter->stats.mprc += rd32(E1000_MPRC);
4592
adapter->stats.roc += rd32(E1000_ROC);
4594
adapter->stats.prc64 += rd32(E1000_PRC64);
4595
adapter->stats.prc127 += rd32(E1000_PRC127);
4596
adapter->stats.prc255 += rd32(E1000_PRC255);
4597
adapter->stats.prc511 += rd32(E1000_PRC511);
4598
adapter->stats.prc1023 += rd32(E1000_PRC1023);
4599
adapter->stats.prc1522 += rd32(E1000_PRC1522);
4600
adapter->stats.symerrs += rd32(E1000_SYMERRS);
4601
adapter->stats.sec += rd32(E1000_SEC);
4603
mpc = rd32(E1000_MPC);
4604
adapter->stats.mpc += mpc;
4605
net_stats->rx_fifo_errors += mpc;
4606
adapter->stats.scc += rd32(E1000_SCC);
4607
adapter->stats.ecol += rd32(E1000_ECOL);
4608
adapter->stats.mcc += rd32(E1000_MCC);
4609
adapter->stats.latecol += rd32(E1000_LATECOL);
4610
adapter->stats.dc += rd32(E1000_DC);
4611
adapter->stats.rlec += rd32(E1000_RLEC);
4612
adapter->stats.xonrxc += rd32(E1000_XONRXC);
4613
adapter->stats.xontxc += rd32(E1000_XONTXC);
4614
adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4615
adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4616
adapter->stats.fcruc += rd32(E1000_FCRUC);
4617
adapter->stats.gptc += rd32(E1000_GPTC);
4618
adapter->stats.gotc += rd32(E1000_GOTCL);
4619
rd32(E1000_GOTCH); /* clear GOTCL */
4620
adapter->stats.rnbc += rd32(E1000_RNBC);
4621
adapter->stats.ruc += rd32(E1000_RUC);
4622
adapter->stats.rfc += rd32(E1000_RFC);
4623
adapter->stats.rjc += rd32(E1000_RJC);
4624
adapter->stats.tor += rd32(E1000_TORH);
4625
adapter->stats.tot += rd32(E1000_TOTH);
4626
adapter->stats.tpr += rd32(E1000_TPR);
4628
adapter->stats.ptc64 += rd32(E1000_PTC64);
4629
adapter->stats.ptc127 += rd32(E1000_PTC127);
4630
adapter->stats.ptc255 += rd32(E1000_PTC255);
4631
adapter->stats.ptc511 += rd32(E1000_PTC511);
4632
adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4633
adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4635
adapter->stats.mptc += rd32(E1000_MPTC);
4636
adapter->stats.bptc += rd32(E1000_BPTC);
4638
adapter->stats.tpt += rd32(E1000_TPT);
4639
adapter->stats.colc += rd32(E1000_COLC);
4641
adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4642
/* read internal phy specific stats */
4643
reg = rd32(E1000_CTRL_EXT);
4644
if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4645
adapter->stats.rxerrc += rd32(E1000_RXERRC);
4646
adapter->stats.tncrs += rd32(E1000_TNCRS);
4649
adapter->stats.tsctc += rd32(E1000_TSCTC);
4650
adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4652
adapter->stats.iac += rd32(E1000_IAC);
4653
adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4654
adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4655
adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4656
adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4657
adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4658
adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4659
adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4660
adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4662
/* Fill out the OS statistics structure */
4663
net_stats->multicast = adapter->stats.mprc;
4664
net_stats->collisions = adapter->stats.colc;
4668
/* RLEC on some newer hardware can be incorrect so build
4669
* our own version based on RUC and ROC */
4670
net_stats->rx_errors = adapter->stats.rxerrc +
4671
adapter->stats.crcerrs + adapter->stats.algnerrc +
4672
adapter->stats.ruc + adapter->stats.roc +
4673
adapter->stats.cexterr;
4674
net_stats->rx_length_errors = adapter->stats.ruc +
4676
net_stats->rx_crc_errors = adapter->stats.crcerrs;
4677
net_stats->rx_frame_errors = adapter->stats.algnerrc;
4678
net_stats->rx_missed_errors = adapter->stats.mpc;
4681
net_stats->tx_errors = adapter->stats.ecol +
4682
adapter->stats.latecol;
4683
net_stats->tx_aborted_errors = adapter->stats.ecol;
4684
net_stats->tx_window_errors = adapter->stats.latecol;
4685
net_stats->tx_carrier_errors = adapter->stats.tncrs;
4687
/* Tx Dropped needs to be maintained elsewhere */
4690
if (hw->phy.media_type == e1000_media_type_copper) {
4691
if ((adapter->link_speed == SPEED_1000) &&
4692
(!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4693
phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4694
adapter->phy_stats.idle_errors += phy_tmp;
4698
/* Management Stats */
4699
adapter->stats.mgptc += rd32(E1000_MGTPTC);
4700
adapter->stats.mgprc += rd32(E1000_MGTPRC);
4701
adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4704
reg = rd32(E1000_MANC);
4705
if (reg & E1000_MANC_EN_BMC2OS) {
4706
adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4707
adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4708
adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4709
adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4713
static irqreturn_t igb_msix_other(int irq, void *data)
4715
struct igb_adapter *adapter = data;
4716
struct e1000_hw *hw = &adapter->hw;
4717
u32 icr = rd32(E1000_ICR);
4718
/* reading ICR causes bit 31 of EICR to be cleared */
4720
if (icr & E1000_ICR_DRSTA)
4721
schedule_work(&adapter->reset_task);
4723
if (icr & E1000_ICR_DOUTSYNC) {
4724
/* HW is reporting DMA is out of sync */
4725
adapter->stats.doosync++;
4726
/* The DMA Out of Sync is also indication of a spoof event
4727
* in IOV mode. Check the Wrong VM Behavior register to
4728
* see if it is really a spoof event. */
4729
igb_check_wvbr(adapter);
4732
/* Check for a mailbox event */
4733
if (icr & E1000_ICR_VMMB)
4734
igb_msg_task(adapter);
4736
if (icr & E1000_ICR_LSC) {
4737
hw->mac.get_link_status = 1;
4738
/* guard against interrupt when we're going down */
4739
if (!test_bit(__IGB_DOWN, &adapter->state))
4740
mod_timer(&adapter->watchdog_timer, jiffies + 1);
4743
wr32(E1000_EIMS, adapter->eims_other);
4748
static void igb_write_itr(struct igb_q_vector *q_vector)
4750
struct igb_adapter *adapter = q_vector->adapter;
4751
u32 itr_val = q_vector->itr_val & 0x7FFC;
4753
if (!q_vector->set_itr)
4759
if (adapter->hw.mac.type == e1000_82575)
4760
itr_val |= itr_val << 16;
4762
itr_val |= E1000_EITR_CNT_IGNR;
4764
writel(itr_val, q_vector->itr_register);
4765
q_vector->set_itr = 0;
4768
static irqreturn_t igb_msix_ring(int irq, void *data)
4770
struct igb_q_vector *q_vector = data;
4772
/* Write the ITR value calculated from the previous interrupt. */
4773
igb_write_itr(q_vector);
4775
napi_schedule(&q_vector->napi);
4780
#ifdef CONFIG_IGB_DCA
4781
static void igb_update_dca(struct igb_q_vector *q_vector)
4783
struct igb_adapter *adapter = q_vector->adapter;
4784
struct e1000_hw *hw = &adapter->hw;
4785
int cpu = get_cpu();
4787
if (q_vector->cpu == cpu)
4790
if (q_vector->tx.ring) {
4791
int q = q_vector->tx.ring->reg_idx;
4792
u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4793
if (hw->mac.type == e1000_82575) {
4794
dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4795
dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4797
dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4798
dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4799
E1000_DCA_TXCTRL_CPUID_SHIFT;
4801
dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4802
wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4804
if (q_vector->rx.ring) {
4805
int q = q_vector->rx.ring->reg_idx;
4806
u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4807
if (hw->mac.type == e1000_82575) {
4808
dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4809
dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4811
dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4812
dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4813
E1000_DCA_RXCTRL_CPUID_SHIFT;
4815
dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4816
dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4817
dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4818
wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4820
q_vector->cpu = cpu;
4825
static void igb_setup_dca(struct igb_adapter *adapter)
4827
struct e1000_hw *hw = &adapter->hw;
4830
if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4833
/* Always use CB2 mode, difference is masked in the CB driver. */
4834
wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4836
for (i = 0; i < adapter->num_q_vectors; i++) {
4837
adapter->q_vector[i]->cpu = -1;
4838
igb_update_dca(adapter->q_vector[i]);
4842
static int __igb_notify_dca(struct device *dev, void *data)
4844
struct net_device *netdev = dev_get_drvdata(dev);
4845
struct igb_adapter *adapter = netdev_priv(netdev);
4846
struct pci_dev *pdev = adapter->pdev;
4847
struct e1000_hw *hw = &adapter->hw;
4848
unsigned long event = *(unsigned long *)data;
4851
case DCA_PROVIDER_ADD:
4852
/* if already enabled, don't do it again */
4853
if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4855
if (dca_add_requester(dev) == 0) {
4856
adapter->flags |= IGB_FLAG_DCA_ENABLED;
4857
dev_info(&pdev->dev, "DCA enabled\n");
4858
igb_setup_dca(adapter);
4861
/* Fall Through since DCA is disabled. */
4862
case DCA_PROVIDER_REMOVE:
4863
if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4864
/* without this a class_device is left
4865
* hanging around in the sysfs model */
4866
dca_remove_requester(dev);
4867
dev_info(&pdev->dev, "DCA disabled\n");
4868
adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4869
wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4877
static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4882
ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4885
return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4887
#endif /* CONFIG_IGB_DCA */
4889
#ifdef CONFIG_PCI_IOV
4890
static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4892
unsigned char mac_addr[ETH_ALEN];
4893
struct pci_dev *pdev = adapter->pdev;
4894
struct e1000_hw *hw = &adapter->hw;
4895
struct pci_dev *pvfdev;
4896
unsigned int device_id;
4899
random_ether_addr(mac_addr);
4900
igb_set_vf_mac(adapter, vf, mac_addr);
4902
switch (adapter->hw.mac.type) {
4904
device_id = IGB_82576_VF_DEV_ID;
4905
/* VF Stride for 82576 is 2 */
4906
thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4910
device_id = IGB_I350_VF_DEV_ID;
4911
/* VF Stride for I350 is 4 */
4912
thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
4921
pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4923
if (pvfdev->devfn == thisvf_devfn)
4925
pvfdev = pci_get_device(hw->vendor_id,
4930
adapter->vf_data[vf].vfdev = pvfdev;
4933
"Couldn't find pci dev ptr for VF %4.4x\n",
4935
return pvfdev != NULL;
4938
static int igb_find_enabled_vfs(struct igb_adapter *adapter)
4940
struct e1000_hw *hw = &adapter->hw;
4941
struct pci_dev *pdev = adapter->pdev;
4942
struct pci_dev *pvfdev;
4945
unsigned int device_id;
4948
switch (adapter->hw.mac.type) {
4950
device_id = IGB_82576_VF_DEV_ID;
4951
/* VF Stride for 82576 is 2 */
4955
device_id = IGB_I350_VF_DEV_ID;
4956
/* VF Stride for I350 is 4 */
4965
vf_devfn = pdev->devfn + 0x80;
4966
pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
4968
if (pvfdev->devfn == vf_devfn)
4970
vf_devfn += vf_stride;
4971
pvfdev = pci_get_device(hw->vendor_id,
4978
static int igb_check_vf_assignment(struct igb_adapter *adapter)
4981
for (i = 0; i < adapter->vfs_allocated_count; i++) {
4982
if (adapter->vf_data[i].vfdev) {
4983
if (adapter->vf_data[i].vfdev->dev_flags &
4984
PCI_DEV_FLAGS_ASSIGNED)
4992
static void igb_ping_all_vfs(struct igb_adapter *adapter)
4994
struct e1000_hw *hw = &adapter->hw;
4998
for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4999
ping = E1000_PF_CONTROL_MSG;
5000
if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5001
ping |= E1000_VT_MSGTYPE_CTS;
5002
igb_write_mbx(hw, &ping, 1, i);
5006
static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5008
struct e1000_hw *hw = &adapter->hw;
5009
u32 vmolr = rd32(E1000_VMOLR(vf));
5010
struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5012
vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5013
IGB_VF_FLAG_MULTI_PROMISC);
5014
vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5016
if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5017
vmolr |= E1000_VMOLR_MPME;
5018
vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5019
*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5022
* if we have hashes and we are clearing a multicast promisc
5023
* flag we need to write the hashes to the MTA as this step
5024
* was previously skipped
5026
if (vf_data->num_vf_mc_hashes > 30) {
5027
vmolr |= E1000_VMOLR_MPME;
5028
} else if (vf_data->num_vf_mc_hashes) {
5030
vmolr |= E1000_VMOLR_ROMPE;
5031
for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5032
igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5036
wr32(E1000_VMOLR(vf), vmolr);
5038
/* there are flags left unprocessed, likely not supported */
5039
if (*msgbuf & E1000_VT_MSGINFO_MASK)
5046
static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5047
u32 *msgbuf, u32 vf)
5049
int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5050
u16 *hash_list = (u16 *)&msgbuf[1];
5051
struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5054
/* salt away the number of multicast addresses assigned
5055
* to this VF for later use to restore when the PF multi cast
5058
vf_data->num_vf_mc_hashes = n;
5060
/* only up to 30 hash values supported */
5064
/* store the hashes for later use */
5065
for (i = 0; i < n; i++)
5066
vf_data->vf_mc_hashes[i] = hash_list[i];
5068
/* Flush and reset the mta with the new values */
5069
igb_set_rx_mode(adapter->netdev);
5074
static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5076
struct e1000_hw *hw = &adapter->hw;
5077
struct vf_data_storage *vf_data;
5080
for (i = 0; i < adapter->vfs_allocated_count; i++) {
5081
u32 vmolr = rd32(E1000_VMOLR(i));
5082
vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5084
vf_data = &adapter->vf_data[i];
5086
if ((vf_data->num_vf_mc_hashes > 30) ||
5087
(vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5088
vmolr |= E1000_VMOLR_MPME;
5089
} else if (vf_data->num_vf_mc_hashes) {
5090
vmolr |= E1000_VMOLR_ROMPE;
5091
for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5092
igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5094
wr32(E1000_VMOLR(i), vmolr);
5098
static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5100
struct e1000_hw *hw = &adapter->hw;
5101
u32 pool_mask, reg, vid;
5104
pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5106
/* Find the vlan filter for this id */
5107
for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5108
reg = rd32(E1000_VLVF(i));
5110
/* remove the vf from the pool */
5113
/* if pool is empty then remove entry from vfta */
5114
if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5115
(reg & E1000_VLVF_VLANID_ENABLE)) {
5117
vid = reg & E1000_VLVF_VLANID_MASK;
5118
igb_vfta_set(hw, vid, false);
5121
wr32(E1000_VLVF(i), reg);
5124
adapter->vf_data[vf].vlans_enabled = 0;
5127
static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5129
struct e1000_hw *hw = &adapter->hw;
5132
/* The vlvf table only exists on 82576 hardware and newer */
5133
if (hw->mac.type < e1000_82576)
5136
/* we only need to do this if VMDq is enabled */
5137
if (!adapter->vfs_allocated_count)
5140
/* Find the vlan filter for this id */
5141
for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5142
reg = rd32(E1000_VLVF(i));
5143
if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5144
vid == (reg & E1000_VLVF_VLANID_MASK))
5149
if (i == E1000_VLVF_ARRAY_SIZE) {
5150
/* Did not find a matching VLAN ID entry that was
5151
* enabled. Search for a free filter entry, i.e.
5152
* one without the enable bit set
5154
for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5155
reg = rd32(E1000_VLVF(i));
5156
if (!(reg & E1000_VLVF_VLANID_ENABLE))
5160
if (i < E1000_VLVF_ARRAY_SIZE) {
5161
/* Found an enabled/available entry */
5162
reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5164
/* if !enabled we need to set this up in vfta */
5165
if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5166
/* add VID to filter table */
5167
igb_vfta_set(hw, vid, true);
5168
reg |= E1000_VLVF_VLANID_ENABLE;
5170
reg &= ~E1000_VLVF_VLANID_MASK;
5172
wr32(E1000_VLVF(i), reg);
5174
/* do not modify RLPML for PF devices */
5175
if (vf >= adapter->vfs_allocated_count)
5178
if (!adapter->vf_data[vf].vlans_enabled) {
5180
reg = rd32(E1000_VMOLR(vf));
5181
size = reg & E1000_VMOLR_RLPML_MASK;
5183
reg &= ~E1000_VMOLR_RLPML_MASK;
5185
wr32(E1000_VMOLR(vf), reg);
5188
adapter->vf_data[vf].vlans_enabled++;
5191
if (i < E1000_VLVF_ARRAY_SIZE) {
5192
/* remove vf from the pool */
5193
reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5194
/* if pool is empty then remove entry from vfta */
5195
if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5197
igb_vfta_set(hw, vid, false);
5199
wr32(E1000_VLVF(i), reg);
5201
/* do not modify RLPML for PF devices */
5202
if (vf >= adapter->vfs_allocated_count)
5205
adapter->vf_data[vf].vlans_enabled--;
5206
if (!adapter->vf_data[vf].vlans_enabled) {
5208
reg = rd32(E1000_VMOLR(vf));
5209
size = reg & E1000_VMOLR_RLPML_MASK;
5211
reg &= ~E1000_VMOLR_RLPML_MASK;
5213
wr32(E1000_VMOLR(vf), reg);
5220
static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5222
struct e1000_hw *hw = &adapter->hw;
5225
wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5227
wr32(E1000_VMVIR(vf), 0);
5230
static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5231
int vf, u16 vlan, u8 qos)
5234
struct igb_adapter *adapter = netdev_priv(netdev);
5236
if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5239
err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5242
igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5243
igb_set_vmolr(adapter, vf, !vlan);
5244
adapter->vf_data[vf].pf_vlan = vlan;
5245
adapter->vf_data[vf].pf_qos = qos;
5246
dev_info(&adapter->pdev->dev,
5247
"Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5248
if (test_bit(__IGB_DOWN, &adapter->state)) {
5249
dev_warn(&adapter->pdev->dev,
5250
"The VF VLAN has been set,"
5251
" but the PF device is not up.\n");
5252
dev_warn(&adapter->pdev->dev,
5253
"Bring the PF device up before"
5254
" attempting to use the VF device.\n");
5257
igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5259
igb_set_vmvir(adapter, vlan, vf);
5260
igb_set_vmolr(adapter, vf, true);
5261
adapter->vf_data[vf].pf_vlan = 0;
5262
adapter->vf_data[vf].pf_qos = 0;
5268
static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5270
int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5271
int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5273
return igb_vlvf_set(adapter, vid, add, vf);
5276
static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5278
/* clear flags - except flag that indicates PF has set the MAC */
5279
adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5280
adapter->vf_data[vf].last_nack = jiffies;
5282
/* reset offloads to defaults */
5283
igb_set_vmolr(adapter, vf, true);
5285
/* reset vlans for device */
5286
igb_clear_vf_vfta(adapter, vf);
5287
if (adapter->vf_data[vf].pf_vlan)
5288
igb_ndo_set_vf_vlan(adapter->netdev, vf,
5289
adapter->vf_data[vf].pf_vlan,
5290
adapter->vf_data[vf].pf_qos);
5292
igb_clear_vf_vfta(adapter, vf);
5294
/* reset multicast table array for vf */
5295
adapter->vf_data[vf].num_vf_mc_hashes = 0;
5297
/* Flush and reset the mta with the new values */
5298
igb_set_rx_mode(adapter->netdev);
5301
static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5303
unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5305
/* generate a new mac address as we were hotplug removed/added */
5306
if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5307
random_ether_addr(vf_mac);
5309
/* process remaining reset events */
5310
igb_vf_reset(adapter, vf);
5313
static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5315
struct e1000_hw *hw = &adapter->hw;
5316
unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5317
int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5319
u8 *addr = (u8 *)(&msgbuf[1]);
5321
/* process all the same items cleared in a function level reset */
5322
igb_vf_reset(adapter, vf);
5324
/* set vf mac address */
5325
igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5327
/* enable transmit and receive for vf */
5328
reg = rd32(E1000_VFTE);
5329
wr32(E1000_VFTE, reg | (1 << vf));
5330
reg = rd32(E1000_VFRE);
5331
wr32(E1000_VFRE, reg | (1 << vf));
5333
adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5335
/* reply to reset with ack and vf mac address */
5336
msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5337
memcpy(addr, vf_mac, 6);
5338
igb_write_mbx(hw, msgbuf, 3, vf);
5341
static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5344
* The VF MAC Address is stored in a packed array of bytes
5345
* starting at the second 32 bit word of the msg array
5347
unsigned char *addr = (char *)&msg[1];
5350
if (is_valid_ether_addr(addr))
5351
err = igb_set_vf_mac(adapter, vf, addr);
5356
static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5358
struct e1000_hw *hw = &adapter->hw;
5359
struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5360
u32 msg = E1000_VT_MSGTYPE_NACK;
5362
/* if device isn't clear to send it shouldn't be reading either */
5363
if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5364
time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5365
igb_write_mbx(hw, &msg, 1, vf);
5366
vf_data->last_nack = jiffies;
5370
static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5372
struct pci_dev *pdev = adapter->pdev;
5373
u32 msgbuf[E1000_VFMAILBOX_SIZE];
5374
struct e1000_hw *hw = &adapter->hw;
5375
struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5378
retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5381
/* if receive failed revoke VF CTS stats and restart init */
5382
dev_err(&pdev->dev, "Error receiving message from VF\n");
5383
vf_data->flags &= ~IGB_VF_FLAG_CTS;
5384
if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5389
/* this is a message we already processed, do nothing */
5390
if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5394
* until the vf completes a reset it should not be
5395
* allowed to start any configuration.
5398
if (msgbuf[0] == E1000_VF_RESET) {
5399
igb_vf_reset_msg(adapter, vf);
5403
if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5404
if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5410
switch ((msgbuf[0] & 0xFFFF)) {
5411
case E1000_VF_SET_MAC_ADDR:
5413
if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5414
retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5416
dev_warn(&pdev->dev,
5417
"VF %d attempted to override administratively "
5418
"set MAC address\nReload the VF driver to "
5419
"resume operations\n", vf);
5421
case E1000_VF_SET_PROMISC:
5422
retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5424
case E1000_VF_SET_MULTICAST:
5425
retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5427
case E1000_VF_SET_LPE:
5428
retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5430
case E1000_VF_SET_VLAN:
5432
if (vf_data->pf_vlan)
5433
dev_warn(&pdev->dev,
5434
"VF %d attempted to override administratively "
5435
"set VLAN tag\nReload the VF driver to "
5436
"resume operations\n", vf);
5438
retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5441
dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5446
msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5448
/* notify the VF of the results of what it sent us */
5450
msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5452
msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5454
igb_write_mbx(hw, msgbuf, 1, vf);
5457
static void igb_msg_task(struct igb_adapter *adapter)
5459
struct e1000_hw *hw = &adapter->hw;
5462
for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5463
/* process any reset requests */
5464
if (!igb_check_for_rst(hw, vf))
5465
igb_vf_reset_event(adapter, vf);
5467
/* process any messages pending */
5468
if (!igb_check_for_msg(hw, vf))
5469
igb_rcv_msg_from_vf(adapter, vf);
5471
/* process any acks */
5472
if (!igb_check_for_ack(hw, vf))
5473
igb_rcv_ack_from_vf(adapter, vf);
5478
* igb_set_uta - Set unicast filter table address
5479
* @adapter: board private structure
5481
* The unicast table address is a register array of 32-bit registers.
5482
* The table is meant to be used in a way similar to how the MTA is used
5483
* however due to certain limitations in the hardware it is necessary to
5484
* set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5485
* enable bit to allow vlan tag stripping when promiscuous mode is enabled
5487
static void igb_set_uta(struct igb_adapter *adapter)
5489
struct e1000_hw *hw = &adapter->hw;
5492
/* The UTA table only exists on 82576 hardware and newer */
5493
if (hw->mac.type < e1000_82576)
5496
/* we only need to do this if VMDq is enabled */
5497
if (!adapter->vfs_allocated_count)
5500
for (i = 0; i < hw->mac.uta_reg_count; i++)
5501
array_wr32(E1000_UTA, i, ~0);
5505
* igb_intr_msi - Interrupt Handler
5506
* @irq: interrupt number
5507
* @data: pointer to a network interface device structure
5509
static irqreturn_t igb_intr_msi(int irq, void *data)
5511
struct igb_adapter *adapter = data;
5512
struct igb_q_vector *q_vector = adapter->q_vector[0];
5513
struct e1000_hw *hw = &adapter->hw;
5514
/* read ICR disables interrupts using IAM */
5515
u32 icr = rd32(E1000_ICR);
5517
igb_write_itr(q_vector);
5519
if (icr & E1000_ICR_DRSTA)
5520
schedule_work(&adapter->reset_task);
5522
if (icr & E1000_ICR_DOUTSYNC) {
5523
/* HW is reporting DMA is out of sync */
5524
adapter->stats.doosync++;
5527
if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5528
hw->mac.get_link_status = 1;
5529
if (!test_bit(__IGB_DOWN, &adapter->state))
5530
mod_timer(&adapter->watchdog_timer, jiffies + 1);
5533
napi_schedule(&q_vector->napi);
5539
* igb_intr - Legacy Interrupt Handler
5540
* @irq: interrupt number
5541
* @data: pointer to a network interface device structure
5543
static irqreturn_t igb_intr(int irq, void *data)
5545
struct igb_adapter *adapter = data;
5546
struct igb_q_vector *q_vector = adapter->q_vector[0];
5547
struct e1000_hw *hw = &adapter->hw;
5548
/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5549
* need for the IMC write */
5550
u32 icr = rd32(E1000_ICR);
5552
/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5553
* not set, then the adapter didn't send an interrupt */
5554
if (!(icr & E1000_ICR_INT_ASSERTED))
5557
igb_write_itr(q_vector);
5559
if (icr & E1000_ICR_DRSTA)
5560
schedule_work(&adapter->reset_task);
5562
if (icr & E1000_ICR_DOUTSYNC) {
5563
/* HW is reporting DMA is out of sync */
5564
adapter->stats.doosync++;
5567
if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5568
hw->mac.get_link_status = 1;
5569
/* guard against interrupt when we're going down */
5570
if (!test_bit(__IGB_DOWN, &adapter->state))
5571
mod_timer(&adapter->watchdog_timer, jiffies + 1);
5574
napi_schedule(&q_vector->napi);
5579
void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5581
struct igb_adapter *adapter = q_vector->adapter;
5582
struct e1000_hw *hw = &adapter->hw;
5584
if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5585
(!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5586
if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5587
igb_set_itr(q_vector);
5589
igb_update_ring_itr(q_vector);
5592
if (!test_bit(__IGB_DOWN, &adapter->state)) {
5593
if (adapter->msix_entries)
5594
wr32(E1000_EIMS, q_vector->eims_value);
5596
igb_irq_enable(adapter);
5601
* igb_poll - NAPI Rx polling callback
5602
* @napi: napi polling structure
5603
* @budget: count of how many packets we should handle
5605
static int igb_poll(struct napi_struct *napi, int budget)
5607
struct igb_q_vector *q_vector = container_of(napi,
5608
struct igb_q_vector,
5610
bool clean_complete = true;
5612
#ifdef CONFIG_IGB_DCA
5613
if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5614
igb_update_dca(q_vector);
5616
if (q_vector->tx.ring)
5617
clean_complete = igb_clean_tx_irq(q_vector);
5619
if (q_vector->rx.ring)
5620
clean_complete &= igb_clean_rx_irq(q_vector, budget);
5622
/* If all work not completed, return budget and keep polling */
5623
if (!clean_complete)
5626
/* If not enough Rx work done, exit the polling mode */
5627
napi_complete(napi);
5628
igb_ring_irq_enable(q_vector);
5634
* igb_systim_to_hwtstamp - convert system time value to hw timestamp
5635
* @adapter: board private structure
5636
* @shhwtstamps: timestamp structure to update
5637
* @regval: unsigned 64bit system time value.
5639
* We need to convert the system time value stored in the RX/TXSTMP registers
5640
* into a hwtstamp which can be used by the upper level timestamping functions
5642
static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5643
struct skb_shared_hwtstamps *shhwtstamps,
5649
* The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5650
* 24 to match clock shift we setup earlier.
5652
if (adapter->hw.mac.type >= e1000_82580)
5653
regval <<= IGB_82580_TSYNC_SHIFT;
5655
ns = timecounter_cyc2time(&adapter->clock, regval);
5656
timecompare_update(&adapter->compare, ns);
5657
memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5658
shhwtstamps->hwtstamp = ns_to_ktime(ns);
5659
shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5663
* igb_tx_hwtstamp - utility function which checks for TX time stamp
5664
* @q_vector: pointer to q_vector containing needed info
5665
* @buffer: pointer to igb_tx_buffer structure
5667
* If we were asked to do hardware stamping and such a time stamp is
5668
* available, then it must have been for this skb here because we only
5669
* allow only one such packet into the queue.
5671
static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5672
struct igb_tx_buffer *buffer_info)
5674
struct igb_adapter *adapter = q_vector->adapter;
5675
struct e1000_hw *hw = &adapter->hw;
5676
struct skb_shared_hwtstamps shhwtstamps;
5679
/* if skb does not support hw timestamp or TX stamp not valid exit */
5680
if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5681
!(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5684
regval = rd32(E1000_TXSTMPL);
5685
regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5687
igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5688
skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5692
* igb_clean_tx_irq - Reclaim resources after transmit completes
5693
* @q_vector: pointer to q_vector containing needed info
5694
* returns true if ring is completely cleaned
5696
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5698
struct igb_adapter *adapter = q_vector->adapter;
5699
struct igb_ring *tx_ring = q_vector->tx.ring;
5700
struct igb_tx_buffer *tx_buffer;
5701
union e1000_adv_tx_desc *tx_desc, *eop_desc;
5702
unsigned int total_bytes = 0, total_packets = 0;
5703
unsigned int budget = q_vector->tx.work_limit;
5704
unsigned int i = tx_ring->next_to_clean;
5706
if (test_bit(__IGB_DOWN, &adapter->state))
5709
tx_buffer = &tx_ring->tx_buffer_info[i];
5710
tx_desc = IGB_TX_DESC(tx_ring, i);
5711
i -= tx_ring->count;
5713
for (; budget; budget--) {
5714
eop_desc = tx_buffer->next_to_watch;
5716
/* prevent any other reads prior to eop_desc */
5719
/* if next_to_watch is not set then there is no work pending */
5723
/* if DD is not set pending work has not been completed */
5724
if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5727
/* clear next_to_watch to prevent false hangs */
5728
tx_buffer->next_to_watch = NULL;
5730
/* update the statistics for this packet */
5731
total_bytes += tx_buffer->bytecount;
5732
total_packets += tx_buffer->gso_segs;
5734
/* retrieve hardware timestamp */
5735
igb_tx_hwtstamp(q_vector, tx_buffer);
5738
dev_kfree_skb_any(tx_buffer->skb);
5739
tx_buffer->skb = NULL;
5741
/* unmap skb header data */
5742
dma_unmap_single(tx_ring->dev,
5747
/* clear last DMA location and unmap remaining buffers */
5748
while (tx_desc != eop_desc) {
5755
i -= tx_ring->count;
5756
tx_buffer = tx_ring->tx_buffer_info;
5757
tx_desc = IGB_TX_DESC(tx_ring, 0);
5760
/* unmap any remaining paged data */
5761
if (tx_buffer->dma) {
5762
dma_unmap_page(tx_ring->dev,
5769
/* clear last DMA location */
5772
/* move us one more past the eop_desc for start of next pkt */
5777
i -= tx_ring->count;
5778
tx_buffer = tx_ring->tx_buffer_info;
5779
tx_desc = IGB_TX_DESC(tx_ring, 0);
5783
i += tx_ring->count;
5784
tx_ring->next_to_clean = i;
5785
u64_stats_update_begin(&tx_ring->tx_syncp);
5786
tx_ring->tx_stats.bytes += total_bytes;
5787
tx_ring->tx_stats.packets += total_packets;
5788
u64_stats_update_end(&tx_ring->tx_syncp);
5789
q_vector->tx.total_bytes += total_bytes;
5790
q_vector->tx.total_packets += total_packets;
5792
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5793
struct e1000_hw *hw = &adapter->hw;
5795
eop_desc = tx_buffer->next_to_watch;
5797
/* Detect a transmit hang in hardware, this serializes the
5798
* check with the clearing of time_stamp and movement of i */
5799
clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5801
time_after(jiffies, tx_buffer->time_stamp +
5802
(adapter->tx_timeout_factor * HZ)) &&
5803
!(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5805
/* detected Tx unit hang */
5806
dev_err(tx_ring->dev,
5807
"Detected Tx Unit Hang\n"
5811
" next_to_use <%x>\n"
5812
" next_to_clean <%x>\n"
5813
"buffer_info[next_to_clean]\n"
5814
" time_stamp <%lx>\n"
5815
" next_to_watch <%p>\n"
5817
" desc.status <%x>\n",
5818
tx_ring->queue_index,
5819
rd32(E1000_TDH(tx_ring->reg_idx)),
5820
readl(tx_ring->tail),
5821
tx_ring->next_to_use,
5822
tx_ring->next_to_clean,
5823
tx_buffer->time_stamp,
5826
eop_desc->wb.status);
5827
netif_stop_subqueue(tx_ring->netdev,
5828
tx_ring->queue_index);
5830
/* we are about to reset, no point in enabling stuff */
5835
if (unlikely(total_packets &&
5836
netif_carrier_ok(tx_ring->netdev) &&
5837
igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5838
/* Make sure that anybody stopping the queue after this
5839
* sees the new next_to_clean.
5842
if (__netif_subqueue_stopped(tx_ring->netdev,
5843
tx_ring->queue_index) &&
5844
!(test_bit(__IGB_DOWN, &adapter->state))) {
5845
netif_wake_subqueue(tx_ring->netdev,
5846
tx_ring->queue_index);
5848
u64_stats_update_begin(&tx_ring->tx_syncp);
5849
tx_ring->tx_stats.restart_queue++;
5850
u64_stats_update_end(&tx_ring->tx_syncp);
5857
static inline void igb_rx_checksum(struct igb_ring *ring,
5858
union e1000_adv_rx_desc *rx_desc,
5859
struct sk_buff *skb)
5861
skb_checksum_none_assert(skb);
5863
/* Ignore Checksum bit is set */
5864
if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5867
/* Rx checksum disabled via ethtool */
5868
if (!(ring->netdev->features & NETIF_F_RXCSUM))
5871
/* TCP/UDP checksum error bit is set */
5872
if (igb_test_staterr(rx_desc,
5873
E1000_RXDEXT_STATERR_TCPE |
5874
E1000_RXDEXT_STATERR_IPE)) {
5876
* work around errata with sctp packets where the TCPE aka
5877
* L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5878
* packets, (aka let the stack check the crc32c)
5880
if (!((skb->len == 60) &&
5881
test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5882
u64_stats_update_begin(&ring->rx_syncp);
5883
ring->rx_stats.csum_err++;
5884
u64_stats_update_end(&ring->rx_syncp);
5886
/* let the stack verify checksum errors */
5889
/* It must be a TCP or UDP packet with a valid checksum */
5890
if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5891
E1000_RXD_STAT_UDPCS))
5892
skb->ip_summed = CHECKSUM_UNNECESSARY;
5894
dev_dbg(ring->dev, "cksum success: bits %08X\n",
5895
le32_to_cpu(rx_desc->wb.upper.status_error));
5898
static inline void igb_rx_hash(struct igb_ring *ring,
5899
union e1000_adv_rx_desc *rx_desc,
5900
struct sk_buff *skb)
5902
if (ring->netdev->features & NETIF_F_RXHASH)
5903
skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5906
static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5907
union e1000_adv_rx_desc *rx_desc,
5908
struct sk_buff *skb)
5910
struct igb_adapter *adapter = q_vector->adapter;
5911
struct e1000_hw *hw = &adapter->hw;
5914
if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
5915
E1000_RXDADV_STAT_TS))
5919
* If this bit is set, then the RX registers contain the time stamp. No
5920
* other packet will be time stamped until we read these registers, so
5921
* read the registers to make them available again. Because only one
5922
* packet can be time stamped at a time, we know that the register
5923
* values must belong to this one here and therefore we don't need to
5924
* compare any of the additional attributes stored for it.
5926
* If nothing went wrong, then it should have a shared tx_flags that we
5927
* can turn into a skb_shared_hwtstamps.
5929
if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
5930
u32 *stamp = (u32 *)skb->data;
5931
regval = le32_to_cpu(*(stamp + 2));
5932
regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5933
skb_pull(skb, IGB_TS_HDR_LEN);
5935
if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5938
regval = rd32(E1000_RXSTMPL);
5939
regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5942
igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5945
static void igb_rx_vlan(struct igb_ring *ring,
5946
union e1000_adv_rx_desc *rx_desc,
5947
struct sk_buff *skb)
5949
if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
5951
if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
5952
test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
5953
vid = be16_to_cpu(rx_desc->wb.upper.vlan);
5955
vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5957
__vlan_hwaccel_put_tag(skb, vid);
5961
static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5963
/* HW will not DMA in data larger than the given buffer, even if it
5964
* parses the (NFS, of course) header to be larger. In that case, it
5965
* fills the header buffer and spills the rest into the page.
5967
u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5968
E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5969
if (hlen > IGB_RX_HDR_LEN)
5970
hlen = IGB_RX_HDR_LEN;
5974
static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
5976
struct igb_ring *rx_ring = q_vector->rx.ring;
5977
union e1000_adv_rx_desc *rx_desc;
5978
const int current_node = numa_node_id();
5979
unsigned int total_bytes = 0, total_packets = 0;
5980
u16 cleaned_count = igb_desc_unused(rx_ring);
5981
u16 i = rx_ring->next_to_clean;
5983
rx_desc = IGB_RX_DESC(rx_ring, i);
5985
while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
5986
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
5987
struct sk_buff *skb = buffer_info->skb;
5988
union e1000_adv_rx_desc *next_rxd;
5990
buffer_info->skb = NULL;
5991
prefetch(skb->data);
5994
if (i == rx_ring->count)
5997
next_rxd = IGB_RX_DESC(rx_ring, i);
6001
* This memory barrier is needed to keep us from reading
6002
* any other fields out of the rx_desc until we know the
6003
* RXD_STAT_DD bit is set
6007
if (!skb_is_nonlinear(skb)) {
6008
__skb_put(skb, igb_get_hlen(rx_desc));
6009
dma_unmap_single(rx_ring->dev, buffer_info->dma,
6012
buffer_info->dma = 0;
6015
if (rx_desc->wb.upper.length) {
6016
u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6018
skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6020
buffer_info->page_offset,
6024
skb->data_len += length;
6025
skb->truesize += PAGE_SIZE / 2;
6027
if ((page_count(buffer_info->page) != 1) ||
6028
(page_to_nid(buffer_info->page) != current_node))
6029
buffer_info->page = NULL;
6031
get_page(buffer_info->page);
6033
dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6034
PAGE_SIZE / 2, DMA_FROM_DEVICE);
6035
buffer_info->page_dma = 0;
6038
if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6039
struct igb_rx_buffer *next_buffer;
6040
next_buffer = &rx_ring->rx_buffer_info[i];
6041
buffer_info->skb = next_buffer->skb;
6042
buffer_info->dma = next_buffer->dma;
6043
next_buffer->skb = skb;
6044
next_buffer->dma = 0;
6048
if (igb_test_staterr(rx_desc,
6049
E1000_RXDEXT_ERR_FRAME_ERR_MASK)) {
6050
dev_kfree_skb_any(skb);
6054
igb_rx_hwtstamp(q_vector, rx_desc, skb);
6055
igb_rx_hash(rx_ring, rx_desc, skb);
6056
igb_rx_checksum(rx_ring, rx_desc, skb);
6057
igb_rx_vlan(rx_ring, rx_desc, skb);
6059
total_bytes += skb->len;
6062
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6064
napi_gro_receive(&q_vector->napi, skb);
6072
/* return some buffers to hardware, one at a time is too slow */
6073
if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6074
igb_alloc_rx_buffers(rx_ring, cleaned_count);
6078
/* use prefetched values */
6082
rx_ring->next_to_clean = i;
6083
u64_stats_update_begin(&rx_ring->rx_syncp);
6084
rx_ring->rx_stats.packets += total_packets;
6085
rx_ring->rx_stats.bytes += total_bytes;
6086
u64_stats_update_end(&rx_ring->rx_syncp);
6087
q_vector->rx.total_packets += total_packets;
6088
q_vector->rx.total_bytes += total_bytes;
6091
igb_alloc_rx_buffers(rx_ring, cleaned_count);
6096
static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6097
struct igb_rx_buffer *bi)
6099
struct sk_buff *skb = bi->skb;
6100
dma_addr_t dma = bi->dma;
6106
skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6110
rx_ring->rx_stats.alloc_failed++;
6114
/* initialize skb for ring */
6115
skb_record_rx_queue(skb, rx_ring->queue_index);
6118
dma = dma_map_single(rx_ring->dev, skb->data,
6119
IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6121
if (dma_mapping_error(rx_ring->dev, dma)) {
6122
rx_ring->rx_stats.alloc_failed++;
6130
static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6131
struct igb_rx_buffer *bi)
6133
struct page *page = bi->page;
6134
dma_addr_t page_dma = bi->page_dma;
6135
unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6141
page = netdev_alloc_page(rx_ring->netdev);
6143
if (unlikely(!page)) {
6144
rx_ring->rx_stats.alloc_failed++;
6149
page_dma = dma_map_page(rx_ring->dev, page,
6150
page_offset, PAGE_SIZE / 2,
6153
if (dma_mapping_error(rx_ring->dev, page_dma)) {
6154
rx_ring->rx_stats.alloc_failed++;
6158
bi->page_dma = page_dma;
6159
bi->page_offset = page_offset;
6164
* igb_alloc_rx_buffers - Replace used receive buffers; packet split
6165
* @adapter: address of board private structure
6167
void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6169
union e1000_adv_rx_desc *rx_desc;
6170
struct igb_rx_buffer *bi;
6171
u16 i = rx_ring->next_to_use;
6173
rx_desc = IGB_RX_DESC(rx_ring, i);
6174
bi = &rx_ring->rx_buffer_info[i];
6175
i -= rx_ring->count;
6177
while (cleaned_count--) {
6178
if (!igb_alloc_mapped_skb(rx_ring, bi))
6181
/* Refresh the desc even if buffer_addrs didn't change
6182
* because each write-back erases this info. */
6183
rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6185
if (!igb_alloc_mapped_page(rx_ring, bi))
6188
rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6194
rx_desc = IGB_RX_DESC(rx_ring, 0);
6195
bi = rx_ring->rx_buffer_info;
6196
i -= rx_ring->count;
6199
/* clear the hdr_addr for the next_to_use descriptor */
6200
rx_desc->read.hdr_addr = 0;
6203
i += rx_ring->count;
6205
if (rx_ring->next_to_use != i) {
6206
rx_ring->next_to_use = i;
6208
/* Force memory writes to complete before letting h/w
6209
* know there are new descriptors to fetch. (Only
6210
* applicable for weak-ordered memory model archs,
6211
* such as IA-64). */
6213
writel(i, rx_ring->tail);
6223
static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6225
struct igb_adapter *adapter = netdev_priv(netdev);
6226
struct mii_ioctl_data *data = if_mii(ifr);
6228
if (adapter->hw.phy.media_type != e1000_media_type_copper)
6233
data->phy_id = adapter->hw.phy.addr;
6236
if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6248
* igb_hwtstamp_ioctl - control hardware time stamping
6253
* Outgoing time stamping can be enabled and disabled. Play nice and
6254
* disable it when requested, although it shouldn't case any overhead
6255
* when no packet needs it. At most one packet in the queue may be
6256
* marked for time stamping, otherwise it would be impossible to tell
6257
* for sure to which packet the hardware time stamp belongs.
6259
* Incoming time stamping has to be configured via the hardware
6260
* filters. Not all combinations are supported, in particular event
6261
* type has to be specified. Matching the kind of event packet is
6262
* not supported, with the exception of "all V2 events regardless of
6266
static int igb_hwtstamp_ioctl(struct net_device *netdev,
6267
struct ifreq *ifr, int cmd)
6269
struct igb_adapter *adapter = netdev_priv(netdev);
6270
struct e1000_hw *hw = &adapter->hw;
6271
struct hwtstamp_config config;
6272
u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6273
u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6274
u32 tsync_rx_cfg = 0;
6279
if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6282
/* reserved for future extensions */
6286
switch (config.tx_type) {
6287
case HWTSTAMP_TX_OFF:
6289
case HWTSTAMP_TX_ON:
6295
switch (config.rx_filter) {
6296
case HWTSTAMP_FILTER_NONE:
6299
case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6300
case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6301
case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6302
case HWTSTAMP_FILTER_ALL:
6304
* register TSYNCRXCFG must be set, therefore it is not
6305
* possible to time stamp both Sync and Delay_Req messages
6306
* => fall back to time stamping all packets
6308
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6309
config.rx_filter = HWTSTAMP_FILTER_ALL;
6311
case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6312
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6313
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6316
case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6317
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6318
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6321
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6322
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6323
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6324
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6327
config.rx_filter = HWTSTAMP_FILTER_SOME;
6329
case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6330
case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6331
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6332
tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6335
config.rx_filter = HWTSTAMP_FILTER_SOME;
6337
case HWTSTAMP_FILTER_PTP_V2_EVENT:
6338
case HWTSTAMP_FILTER_PTP_V2_SYNC:
6339
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6340
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6341
config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6349
if (hw->mac.type == e1000_82575) {
6350
if (tsync_rx_ctl | tsync_tx_ctl)
6356
* Per-packet timestamping only works if all packets are
6357
* timestamped, so enable timestamping in all packets as
6358
* long as one rx filter was configured.
6360
if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6361
tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6362
tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6365
/* enable/disable TX */
6366
regval = rd32(E1000_TSYNCTXCTL);
6367
regval &= ~E1000_TSYNCTXCTL_ENABLED;
6368
regval |= tsync_tx_ctl;
6369
wr32(E1000_TSYNCTXCTL, regval);
6371
/* enable/disable RX */
6372
regval = rd32(E1000_TSYNCRXCTL);
6373
regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6374
regval |= tsync_rx_ctl;
6375
wr32(E1000_TSYNCRXCTL, regval);
6377
/* define which PTP packets are time stamped */
6378
wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6380
/* define ethertype filter for timestamped packets */
6383
(E1000_ETQF_FILTER_ENABLE | /* enable filter */
6384
E1000_ETQF_1588 | /* enable timestamping */
6385
ETH_P_1588)); /* 1588 eth protocol type */
6387
wr32(E1000_ETQF(3), 0);
6389
#define PTP_PORT 319
6390
/* L4 Queue Filter[3]: filter by destination port and protocol */
6392
u32 ftqf = (IPPROTO_UDP /* UDP */
6393
| E1000_FTQF_VF_BP /* VF not compared */
6394
| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6395
| E1000_FTQF_MASK); /* mask all inputs */
6396
ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6398
wr32(E1000_IMIR(3), htons(PTP_PORT));
6399
wr32(E1000_IMIREXT(3),
6400
(E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6401
if (hw->mac.type == e1000_82576) {
6402
/* enable source port check */
6403
wr32(E1000_SPQF(3), htons(PTP_PORT));
6404
ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6406
wr32(E1000_FTQF(3), ftqf);
6408
wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6412
adapter->hwtstamp_config = config;
6414
/* clear TX/RX time stamp registers, just to be sure */
6415
regval = rd32(E1000_TXSTMPH);
6416
regval = rd32(E1000_RXSTMPH);
6418
return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6428
static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6434
return igb_mii_ioctl(netdev, ifr, cmd);
6436
return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6442
s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6444
struct igb_adapter *adapter = hw->back;
6447
cap_offset = adapter->pdev->pcie_cap;
6449
return -E1000_ERR_CONFIG;
6451
pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6456
s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6458
struct igb_adapter *adapter = hw->back;
6461
cap_offset = adapter->pdev->pcie_cap;
6463
return -E1000_ERR_CONFIG;
6465
pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6470
static void igb_vlan_mode(struct net_device *netdev, u32 features)
6472
struct igb_adapter *adapter = netdev_priv(netdev);
6473
struct e1000_hw *hw = &adapter->hw;
6475
bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6478
/* enable VLAN tag insert/strip */
6479
ctrl = rd32(E1000_CTRL);
6480
ctrl |= E1000_CTRL_VME;
6481
wr32(E1000_CTRL, ctrl);
6483
/* Disable CFI check */
6484
rctl = rd32(E1000_RCTL);
6485
rctl &= ~E1000_RCTL_CFIEN;
6486
wr32(E1000_RCTL, rctl);
6488
/* disable VLAN tag insert/strip */
6489
ctrl = rd32(E1000_CTRL);
6490
ctrl &= ~E1000_CTRL_VME;
6491
wr32(E1000_CTRL, ctrl);
6494
igb_rlpml_set(adapter);
6497
static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6499
struct igb_adapter *adapter = netdev_priv(netdev);
6500
struct e1000_hw *hw = &adapter->hw;
6501
int pf_id = adapter->vfs_allocated_count;
6503
/* attempt to add filter to vlvf array */
6504
igb_vlvf_set(adapter, vid, true, pf_id);
6506
/* add the filter since PF can receive vlans w/o entry in vlvf */
6507
igb_vfta_set(hw, vid, true);
6509
set_bit(vid, adapter->active_vlans);
6512
static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6514
struct igb_adapter *adapter = netdev_priv(netdev);
6515
struct e1000_hw *hw = &adapter->hw;
6516
int pf_id = adapter->vfs_allocated_count;
6519
/* remove vlan from VLVF table array */
6520
err = igb_vlvf_set(adapter, vid, false, pf_id);
6522
/* if vid was not present in VLVF just remove it from table */
6524
igb_vfta_set(hw, vid, false);
6526
clear_bit(vid, adapter->active_vlans);
6529
static void igb_restore_vlan(struct igb_adapter *adapter)
6533
igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6535
for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6536
igb_vlan_rx_add_vid(adapter->netdev, vid);
6539
int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6541
struct pci_dev *pdev = adapter->pdev;
6542
struct e1000_mac_info *mac = &adapter->hw.mac;
6546
/* Make sure dplx is at most 1 bit and lsb of speed is not set
6547
* for the switch() below to work */
6548
if ((spd & 1) || (dplx & ~1))
6551
/* Fiber NIC's only allow 1000 Gbps Full duplex */
6552
if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6553
spd != SPEED_1000 &&
6554
dplx != DUPLEX_FULL)
6557
switch (spd + dplx) {
6558
case SPEED_10 + DUPLEX_HALF:
6559
mac->forced_speed_duplex = ADVERTISE_10_HALF;
6561
case SPEED_10 + DUPLEX_FULL:
6562
mac->forced_speed_duplex = ADVERTISE_10_FULL;
6564
case SPEED_100 + DUPLEX_HALF:
6565
mac->forced_speed_duplex = ADVERTISE_100_HALF;
6567
case SPEED_100 + DUPLEX_FULL:
6568
mac->forced_speed_duplex = ADVERTISE_100_FULL;
6570
case SPEED_1000 + DUPLEX_FULL:
6572
adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6574
case SPEED_1000 + DUPLEX_HALF: /* not supported */
6581
dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6585
static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6587
struct net_device *netdev = pci_get_drvdata(pdev);
6588
struct igb_adapter *adapter = netdev_priv(netdev);
6589
struct e1000_hw *hw = &adapter->hw;
6590
u32 ctrl, rctl, status;
6591
u32 wufc = adapter->wol;
6596
netif_device_detach(netdev);
6598
if (netif_running(netdev))
6601
igb_clear_interrupt_scheme(adapter);
6604
retval = pci_save_state(pdev);
6609
status = rd32(E1000_STATUS);
6610
if (status & E1000_STATUS_LU)
6611
wufc &= ~E1000_WUFC_LNKC;
6614
igb_setup_rctl(adapter);
6615
igb_set_rx_mode(netdev);
6617
/* turn on all-multi mode if wake on multicast is enabled */
6618
if (wufc & E1000_WUFC_MC) {
6619
rctl = rd32(E1000_RCTL);
6620
rctl |= E1000_RCTL_MPE;
6621
wr32(E1000_RCTL, rctl);
6624
ctrl = rd32(E1000_CTRL);
6625
/* advertise wake from D3Cold */
6626
#define E1000_CTRL_ADVD3WUC 0x00100000
6627
/* phy power management enable */
6628
#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6629
ctrl |= E1000_CTRL_ADVD3WUC;
6630
wr32(E1000_CTRL, ctrl);
6632
/* Allow time for pending master requests to run */
6633
igb_disable_pcie_master(hw);
6635
wr32(E1000_WUC, E1000_WUC_PME_EN);
6636
wr32(E1000_WUFC, wufc);
6639
wr32(E1000_WUFC, 0);
6642
*enable_wake = wufc || adapter->en_mng_pt;
6644
igb_power_down_link(adapter);
6646
igb_power_up_link(adapter);
6648
/* Release control of h/w to f/w. If f/w is AMT enabled, this
6649
* would have already happened in close and is redundant. */
6650
igb_release_hw_control(adapter);
6652
pci_disable_device(pdev);
6658
static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6663
retval = __igb_shutdown(pdev, &wake);
6668
pci_prepare_to_sleep(pdev);
6670
pci_wake_from_d3(pdev, false);
6671
pci_set_power_state(pdev, PCI_D3hot);
6677
static int igb_resume(struct pci_dev *pdev)
6679
struct net_device *netdev = pci_get_drvdata(pdev);
6680
struct igb_adapter *adapter = netdev_priv(netdev);
6681
struct e1000_hw *hw = &adapter->hw;
6684
pci_set_power_state(pdev, PCI_D0);
6685
pci_restore_state(pdev);
6686
pci_save_state(pdev);
6688
err = pci_enable_device_mem(pdev);
6691
"igb: Cannot enable PCI device from suspend\n");
6694
pci_set_master(pdev);
6696
pci_enable_wake(pdev, PCI_D3hot, 0);
6697
pci_enable_wake(pdev, PCI_D3cold, 0);
6699
if (igb_init_interrupt_scheme(adapter)) {
6700
dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6706
/* let the f/w know that the h/w is now under the control of the
6708
igb_get_hw_control(adapter);
6710
wr32(E1000_WUS, ~0);
6712
if (netif_running(netdev)) {
6713
err = igb_open(netdev);
6718
netif_device_attach(netdev);
6724
static void igb_shutdown(struct pci_dev *pdev)
6728
__igb_shutdown(pdev, &wake);
6730
if (system_state == SYSTEM_POWER_OFF) {
6731
pci_wake_from_d3(pdev, wake);
6732
pci_set_power_state(pdev, PCI_D3hot);
6736
#ifdef CONFIG_NET_POLL_CONTROLLER
6738
* Polling 'interrupt' - used by things like netconsole to send skbs
6739
* without having to re-enable interrupts. It's not called while
6740
* the interrupt routine is executing.
6742
static void igb_netpoll(struct net_device *netdev)
6744
struct igb_adapter *adapter = netdev_priv(netdev);
6745
struct e1000_hw *hw = &adapter->hw;
6746
struct igb_q_vector *q_vector;
6749
for (i = 0; i < adapter->num_q_vectors; i++) {
6750
q_vector = adapter->q_vector[i];
6751
if (adapter->msix_entries)
6752
wr32(E1000_EIMC, q_vector->eims_value);
6754
igb_irq_disable(adapter);
6755
napi_schedule(&q_vector->napi);
6758
#endif /* CONFIG_NET_POLL_CONTROLLER */
6761
* igb_io_error_detected - called when PCI error is detected
6762
* @pdev: Pointer to PCI device
6763
* @state: The current pci connection state
6765
* This function is called after a PCI bus error affecting
6766
* this device has been detected.
6768
static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6769
pci_channel_state_t state)
6771
struct net_device *netdev = pci_get_drvdata(pdev);
6772
struct igb_adapter *adapter = netdev_priv(netdev);
6774
netif_device_detach(netdev);
6776
if (state == pci_channel_io_perm_failure)
6777
return PCI_ERS_RESULT_DISCONNECT;
6779
if (netif_running(netdev))
6781
pci_disable_device(pdev);
6783
/* Request a slot slot reset. */
6784
return PCI_ERS_RESULT_NEED_RESET;
6788
* igb_io_slot_reset - called after the pci bus has been reset.
6789
* @pdev: Pointer to PCI device
6791
* Restart the card from scratch, as if from a cold-boot. Implementation
6792
* resembles the first-half of the igb_resume routine.
6794
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6796
struct net_device *netdev = pci_get_drvdata(pdev);
6797
struct igb_adapter *adapter = netdev_priv(netdev);
6798
struct e1000_hw *hw = &adapter->hw;
6799
pci_ers_result_t result;
6802
if (pci_enable_device_mem(pdev)) {
6804
"Cannot re-enable PCI device after reset.\n");
6805
result = PCI_ERS_RESULT_DISCONNECT;
6807
pci_set_master(pdev);
6808
pci_restore_state(pdev);
6809
pci_save_state(pdev);
6811
pci_enable_wake(pdev, PCI_D3hot, 0);
6812
pci_enable_wake(pdev, PCI_D3cold, 0);
6815
wr32(E1000_WUS, ~0);
6816
result = PCI_ERS_RESULT_RECOVERED;
6819
err = pci_cleanup_aer_uncorrect_error_status(pdev);
6821
dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6822
"failed 0x%0x\n", err);
6823
/* non-fatal, continue */
6830
* igb_io_resume - called when traffic can start flowing again.
6831
* @pdev: Pointer to PCI device
6833
* This callback is called when the error recovery driver tells us that
6834
* its OK to resume normal operation. Implementation resembles the
6835
* second-half of the igb_resume routine.
6837
static void igb_io_resume(struct pci_dev *pdev)
6839
struct net_device *netdev = pci_get_drvdata(pdev);
6840
struct igb_adapter *adapter = netdev_priv(netdev);
6842
if (netif_running(netdev)) {
6843
if (igb_up(adapter)) {
6844
dev_err(&pdev->dev, "igb_up failed after reset\n");
6849
netif_device_attach(netdev);
6851
/* let the f/w know that the h/w is now under the control of the
6853
igb_get_hw_control(adapter);
6856
static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6859
u32 rar_low, rar_high;
6860
struct e1000_hw *hw = &adapter->hw;
6862
/* HW expects these in little endian so we reverse the byte order
6863
* from network order (big endian) to little endian
6865
rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6866
((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6867
rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6869
/* Indicate to hardware the Address is Valid. */
6870
rar_high |= E1000_RAH_AV;
6872
if (hw->mac.type == e1000_82575)
6873
rar_high |= E1000_RAH_POOL_1 * qsel;
6875
rar_high |= E1000_RAH_POOL_1 << qsel;
6877
wr32(E1000_RAL(index), rar_low);
6879
wr32(E1000_RAH(index), rar_high);
6883
static int igb_set_vf_mac(struct igb_adapter *adapter,
6884
int vf, unsigned char *mac_addr)
6886
struct e1000_hw *hw = &adapter->hw;
6887
/* VF MAC addresses start at end of receive addresses and moves
6888
* torwards the first, as a result a collision should not be possible */
6889
int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6891
memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6893
igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6898
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6900
struct igb_adapter *adapter = netdev_priv(netdev);
6901
if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6903
adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6904
dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6905
dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6906
" change effective.");
6907
if (test_bit(__IGB_DOWN, &adapter->state)) {
6908
dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6909
" but the PF device is not up.\n");
6910
dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6911
" attempting to use the VF device.\n");
6913
return igb_set_vf_mac(adapter, vf, mac);
6916
static int igb_link_mbps(int internal_link_speed)
6918
switch (internal_link_speed) {
6928
static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6935
/* Calculate the rate factor values to set */
6936
rf_int = link_speed / tx_rate;
6937
rf_dec = (link_speed - (rf_int * tx_rate));
6938
rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6940
bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6941
bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6942
E1000_RTTBCNRC_RF_INT_MASK);
6943
bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6948
wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6949
wr32(E1000_RTTBCNRC, bcnrc_val);
6952
static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6954
int actual_link_speed, i;
6955
bool reset_rate = false;
6957
/* VF TX rate limit was not set or not supported */
6958
if ((adapter->vf_rate_link_speed == 0) ||
6959
(adapter->hw.mac.type != e1000_82576))
6962
actual_link_speed = igb_link_mbps(adapter->link_speed);
6963
if (actual_link_speed != adapter->vf_rate_link_speed) {
6965
adapter->vf_rate_link_speed = 0;
6966
dev_info(&adapter->pdev->dev,
6967
"Link speed has been changed. VF Transmit "
6968
"rate is disabled\n");
6971
for (i = 0; i < adapter->vfs_allocated_count; i++) {
6973
adapter->vf_data[i].tx_rate = 0;
6975
igb_set_vf_rate_limit(&adapter->hw, i,
6976
adapter->vf_data[i].tx_rate,
6981
static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6983
struct igb_adapter *adapter = netdev_priv(netdev);
6984
struct e1000_hw *hw = &adapter->hw;
6985
int actual_link_speed;
6987
if (hw->mac.type != e1000_82576)
6990
actual_link_speed = igb_link_mbps(adapter->link_speed);
6991
if ((vf >= adapter->vfs_allocated_count) ||
6992
(!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6993
(tx_rate < 0) || (tx_rate > actual_link_speed))
6996
adapter->vf_rate_link_speed = actual_link_speed;
6997
adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6998
igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7003
static int igb_ndo_get_vf_config(struct net_device *netdev,
7004
int vf, struct ifla_vf_info *ivi)
7006
struct igb_adapter *adapter = netdev_priv(netdev);
7007
if (vf >= adapter->vfs_allocated_count)
7010
memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7011
ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7012
ivi->vlan = adapter->vf_data[vf].pf_vlan;
7013
ivi->qos = adapter->vf_data[vf].pf_qos;
7017
static void igb_vmm_control(struct igb_adapter *adapter)
7019
struct e1000_hw *hw = &adapter->hw;
7022
switch (hw->mac.type) {
7025
/* replication is not supported for 82575 */
7028
/* notify HW that the MAC is adding vlan tags */
7029
reg = rd32(E1000_DTXCTL);
7030
reg |= E1000_DTXCTL_VLAN_ADDED;
7031
wr32(E1000_DTXCTL, reg);
7033
/* enable replication vlan tag stripping */
7034
reg = rd32(E1000_RPLOLR);
7035
reg |= E1000_RPLOLR_STRVLAN;
7036
wr32(E1000_RPLOLR, reg);
7038
/* none of the above registers are supported by i350 */
7042
if (adapter->vfs_allocated_count) {
7043
igb_vmdq_set_loopback_pf(hw, true);
7044
igb_vmdq_set_replication_pf(hw, true);
7045
igb_vmdq_set_anti_spoofing_pf(hw, true,
7046
adapter->vfs_allocated_count);
7048
igb_vmdq_set_loopback_pf(hw, false);
7049
igb_vmdq_set_replication_pf(hw, false);
7053
static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7055
struct e1000_hw *hw = &adapter->hw;
7059
if (hw->mac.type > e1000_82580) {
7060
if (adapter->flags & IGB_FLAG_DMAC) {
7063
/* force threshold to 0. */
7064
wr32(E1000_DMCTXTH, 0);
7067
* DMA Coalescing high water mark needs to be higher
7068
* than the RX threshold. set hwm to PBA - 2 * max
7071
hwm = pba - (2 * adapter->max_frame_size);
7072
reg = rd32(E1000_DMACR);
7073
reg &= ~E1000_DMACR_DMACTHR_MASK;
7076
reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7077
& E1000_DMACR_DMACTHR_MASK);
7079
/* transition to L0x or L1 if available..*/
7080
reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7082
/* watchdog timer= +-1000 usec in 32usec intervals */
7084
wr32(E1000_DMACR, reg);
7087
* no lower threshold to disable
7088
* coalescing(smart fifb)-UTRESH=0
7090
wr32(E1000_DMCRTRH, 0);
7091
wr32(E1000_FCRTC, hwm);
7093
reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7095
wr32(E1000_DMCTLX, reg);
7098
* free space in tx packet buffer to wake from
7101
wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7102
(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7105
* make low power state decision controlled
7108
reg = rd32(E1000_PCIEMISC);
7109
reg &= ~E1000_PCIEMISC_LX_DECISION;
7110
wr32(E1000_PCIEMISC, reg);
7111
} /* endif adapter->dmac is not disabled */
7112
} else if (hw->mac.type == e1000_82580) {
7113
u32 reg = rd32(E1000_PCIEMISC);
7114
wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7115
wr32(E1000_DMACR, 0);