Merge remote-tracking branch 'torvalds/master' into perf/core
[linux-2.6-microblaze.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005 - 2016 Broadcom
4  * All rights reserved.
5  *
6  * Contact Information:
7  * linux-drivers@emulex.com
8  *
9  * Emulex
10  * 3333 Susan Street
11  * Costa Mesa, CA 92626
12  */
13
14 #include <linux/prefetch.h>
15 #include <linux/module.h>
16 #include "be.h"
17 #include "be_cmds.h"
18 #include <asm/div64.h>
19 #include <linux/aer.h>
20 #include <linux/if_bridge.h>
21 #include <net/busy_poll.h>
22 #include <net/vxlan.h>
23
24 MODULE_DESCRIPTION(DRV_DESC);
25 MODULE_AUTHOR("Emulex Corporation");
26 MODULE_LICENSE("GPL");
27
28 /* num_vfs module param is obsolete.
29  * Use sysfs method to enable/disable VFs.
30  */
31 static unsigned int num_vfs;
32 module_param(num_vfs, uint, 0444);
33 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
34
35 static ushort rx_frag_size = 2048;
36 module_param(rx_frag_size, ushort, 0444);
37 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
38
39 /* Per-module error detection/recovery workq shared across all functions.
40  * Each function schedules its own work request on this shared workq.
41  */
42 static struct workqueue_struct *be_err_recovery_workq;
43
44 static const struct pci_device_id be_dev_ids[] = {
45 #ifdef CONFIG_BE2NET_BE2
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48 #endif /* CONFIG_BE2NET_BE2 */
49 #ifdef CONFIG_BE2NET_BE3
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
51         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
52 #endif /* CONFIG_BE2NET_BE3 */
53 #ifdef CONFIG_BE2NET_LANCER
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56 #endif /* CONFIG_BE2NET_LANCER */
57 #ifdef CONFIG_BE2NET_SKYHAWK
58         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
60 #endif /* CONFIG_BE2NET_SKYHAWK */
61         { 0 }
62 };
63 MODULE_DEVICE_TABLE(pci, be_dev_ids);
64
65 /* Workqueue used by all functions for defering cmd calls to the adapter */
66 static struct workqueue_struct *be_wq;
67
68 /* UE Status Low CSR */
69 static const char * const ue_status_low_desc[] = {
70         "CEV",
71         "CTX",
72         "DBUF",
73         "ERX",
74         "Host",
75         "MPU",
76         "NDMA",
77         "PTC ",
78         "RDMA ",
79         "RXF ",
80         "RXIPS ",
81         "RXULP0 ",
82         "RXULP1 ",
83         "RXULP2 ",
84         "TIM ",
85         "TPOST ",
86         "TPRE ",
87         "TXIPS ",
88         "TXULP0 ",
89         "TXULP1 ",
90         "UC ",
91         "WDMA ",
92         "TXULP2 ",
93         "HOST1 ",
94         "P0_OB_LINK ",
95         "P1_OB_LINK ",
96         "HOST_GPIO ",
97         "MBOX ",
98         "ERX2 ",
99         "SPARE ",
100         "JTAG ",
101         "MPU_INTPEND "
102 };
103
104 /* UE Status High CSR */
105 static const char * const ue_status_hi_desc[] = {
106         "LPCMEMHOST",
107         "MGMT_MAC",
108         "PCS0ONLINE",
109         "MPU_IRAM",
110         "PCS1ONLINE",
111         "PCTL0",
112         "PCTL1",
113         "PMEM",
114         "RR",
115         "TXPB",
116         "RXPP",
117         "XAUI",
118         "TXP",
119         "ARM",
120         "IPC",
121         "HOST2",
122         "HOST3",
123         "HOST4",
124         "HOST5",
125         "HOST6",
126         "HOST7",
127         "ECRC",
128         "Poison TLP",
129         "NETC",
130         "PERIPH",
131         "LLTXULP",
132         "D2P",
133         "RCON",
134         "LDMA",
135         "LLTXP",
136         "LLTXPB",
137         "Unknown"
138 };
139
140 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
141                                  BE_IF_FLAGS_BROADCAST | \
142                                  BE_IF_FLAGS_MULTICAST | \
143                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
144
145 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         if (mem->va) {
150                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
151                                   mem->dma);
152                 mem->va = NULL;
153         }
154 }
155
156 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
157                           u16 len, u16 entry_size)
158 {
159         struct be_dma_mem *mem = &q->dma_mem;
160
161         memset(q, 0, sizeof(*q));
162         q->len = len;
163         q->entry_size = entry_size;
164         mem->size = len * entry_size;
165         mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
166                                      &mem->dma, GFP_KERNEL);
167         if (!mem->va)
168                 return -ENOMEM;
169         return 0;
170 }
171
172 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
173 {
174         u32 reg, enabled;
175
176         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
177                               &reg);
178         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179
180         if (!enabled && enable)
181                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
182         else if (enabled && !enable)
183                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184         else
185                 return;
186
187         pci_write_config_dword(adapter->pdev,
188                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
189 }
190
191 static void be_intr_set(struct be_adapter *adapter, bool enable)
192 {
193         int status = 0;
194
195         /* On lancer interrupts can't be controlled via this register */
196         if (lancer_chip(adapter))
197                 return;
198
199         if (be_check_error(adapter, BE_ERROR_EEH))
200                 return;
201
202         status = be_cmd_intr_set(adapter, enable);
203         if (status)
204                 be_reg_intr_set(adapter, enable);
205 }
206
207 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
208 {
209         u32 val = 0;
210
211         if (be_check_error(adapter, BE_ERROR_HW))
212                 return;
213
214         val |= qid & DB_RQ_RING_ID_MASK;
215         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
216
217         wmb();
218         iowrite32(val, adapter->db + DB_RQ_OFFSET);
219 }
220
221 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
222                           u16 posted)
223 {
224         u32 val = 0;
225
226         if (be_check_error(adapter, BE_ERROR_HW))
227                 return;
228
229         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
230         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
231
232         wmb();
233         iowrite32(val, adapter->db + txo->db_offset);
234 }
235
236 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
237                          bool arm, bool clear_int, u16 num_popped,
238                          u32 eq_delay_mult_enc)
239 {
240         u32 val = 0;
241
242         val |= qid & DB_EQ_RING_ID_MASK;
243         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
244
245         if (be_check_error(adapter, BE_ERROR_HW))
246                 return;
247
248         if (arm)
249                 val |= 1 << DB_EQ_REARM_SHIFT;
250         if (clear_int)
251                 val |= 1 << DB_EQ_CLR_SHIFT;
252         val |= 1 << DB_EQ_EVNT_SHIFT;
253         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
254         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
255         iowrite32(val, adapter->db + DB_EQ_OFFSET);
256 }
257
258 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
259 {
260         u32 val = 0;
261
262         val |= qid & DB_CQ_RING_ID_MASK;
263         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
264                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
265
266         if (be_check_error(adapter, BE_ERROR_HW))
267                 return;
268
269         if (arm)
270                 val |= 1 << DB_CQ_REARM_SHIFT;
271         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
272         iowrite32(val, adapter->db + DB_CQ_OFFSET);
273 }
274
275 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
276 {
277         int i;
278
279         /* Check if mac has already been added as part of uc-list */
280         for (i = 0; i < adapter->uc_macs; i++) {
281                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
282                         /* mac already added, skip addition */
283                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
284                         return 0;
285                 }
286         }
287
288         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
289                                &adapter->pmac_id[0], 0);
290 }
291
292 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
293 {
294         int i;
295
296         /* Skip deletion if the programmed mac is
297          * being used in uc-list
298          */
299         for (i = 0; i < adapter->uc_macs; i++) {
300                 if (adapter->pmac_id[i + 1] == pmac_id)
301                         return;
302         }
303         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
304 }
305
306 static int be_mac_addr_set(struct net_device *netdev, void *p)
307 {
308         struct be_adapter *adapter = netdev_priv(netdev);
309         struct device *dev = &adapter->pdev->dev;
310         struct sockaddr *addr = p;
311         int status;
312         u8 mac[ETH_ALEN];
313         u32 old_pmac_id = adapter->pmac_id[0];
314
315         if (!is_valid_ether_addr(addr->sa_data))
316                 return -EADDRNOTAVAIL;
317
318         /* Proceed further only if, User provided MAC is different
319          * from active MAC
320          */
321         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
322                 return 0;
323
324         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
325          * address
326          */
327         if (BEx_chip(adapter) && be_virtfn(adapter) &&
328             !check_privilege(adapter, BE_PRIV_FILTMGMT))
329                 return -EPERM;
330
331         /* if device is not running, copy MAC to netdev->dev_addr */
332         if (!netif_running(netdev))
333                 goto done;
334
335         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
336          * privilege or if PF did not provision the new MAC address.
337          * On BE3, this cmd will always fail if the VF doesn't have the
338          * FILTMGMT privilege. This failure is OK, only if the PF programmed
339          * the MAC for the VF.
340          */
341         mutex_lock(&adapter->rx_filter_lock);
342         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
343         if (!status) {
344
345                 /* Delete the old programmed MAC. This call may fail if the
346                  * old MAC was already deleted by the PF driver.
347                  */
348                 if (adapter->pmac_id[0] != old_pmac_id)
349                         be_dev_mac_del(adapter, old_pmac_id);
350         }
351
352         mutex_unlock(&adapter->rx_filter_lock);
353         /* Decide if the new MAC is successfully activated only after
354          * querying the FW
355          */
356         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
357                                        adapter->if_handle, true, 0);
358         if (status)
359                 goto err;
360
361         /* The MAC change did not happen, either due to lack of privilege
362          * or PF didn't pre-provision.
363          */
364         if (!ether_addr_equal(addr->sa_data, mac)) {
365                 status = -EPERM;
366                 goto err;
367         }
368
369         /* Remember currently programmed MAC */
370         ether_addr_copy(adapter->dev_mac, addr->sa_data);
371 done:
372         ether_addr_copy(netdev->dev_addr, addr->sa_data);
373         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
374         return 0;
375 err:
376         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
377         return status;
378 }
379
380 /* BE2 supports only v0 cmd */
381 static void *hw_stats_from_cmd(struct be_adapter *adapter)
382 {
383         if (BE2_chip(adapter)) {
384                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
385
386                 return &cmd->hw_stats;
387         } else if (BE3_chip(adapter)) {
388                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
389
390                 return &cmd->hw_stats;
391         } else {
392                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
393
394                 return &cmd->hw_stats;
395         }
396 }
397
398 /* BE2 supports only v0 cmd */
399 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
400 {
401         if (BE2_chip(adapter)) {
402                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
403
404                 return &hw_stats->erx;
405         } else if (BE3_chip(adapter)) {
406                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
407
408                 return &hw_stats->erx;
409         } else {
410                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
411
412                 return &hw_stats->erx;
413         }
414 }
415
416 static void populate_be_v0_stats(struct be_adapter *adapter)
417 {
418         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
419         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
420         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
421         struct be_port_rxf_stats_v0 *port_stats =
422                                         &rxf_stats->port[adapter->port_num];
423         struct be_drv_stats *drvs = &adapter->drv_stats;
424
425         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
426         drvs->rx_pause_frames = port_stats->rx_pause_frames;
427         drvs->rx_crc_errors = port_stats->rx_crc_errors;
428         drvs->rx_control_frames = port_stats->rx_control_frames;
429         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
430         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
431         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
432         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
433         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
434         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
435         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
436         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
437         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
438         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
439         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
440         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
441         drvs->rx_dropped_header_too_small =
442                 port_stats->rx_dropped_header_too_small;
443         drvs->rx_address_filtered =
444                                         port_stats->rx_address_filtered +
445                                         port_stats->rx_vlan_filtered;
446         drvs->rx_alignment_symbol_errors =
447                 port_stats->rx_alignment_symbol_errors;
448
449         drvs->tx_pauseframes = port_stats->tx_pauseframes;
450         drvs->tx_controlframes = port_stats->tx_controlframes;
451
452         if (adapter->port_num)
453                 drvs->jabber_events = rxf_stats->port1_jabber_events;
454         else
455                 drvs->jabber_events = rxf_stats->port0_jabber_events;
456         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
457         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
458         drvs->forwarded_packets = rxf_stats->forwarded_packets;
459         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
460         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
461         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
462         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
463 }
464
465 static void populate_be_v1_stats(struct be_adapter *adapter)
466 {
467         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
468         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
469         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
470         struct be_port_rxf_stats_v1 *port_stats =
471                                         &rxf_stats->port[adapter->port_num];
472         struct be_drv_stats *drvs = &adapter->drv_stats;
473
474         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
475         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
476         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
477         drvs->rx_pause_frames = port_stats->rx_pause_frames;
478         drvs->rx_crc_errors = port_stats->rx_crc_errors;
479         drvs->rx_control_frames = port_stats->rx_control_frames;
480         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
481         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
482         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
483         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
484         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
485         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
486         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
487         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
488         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
489         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
490         drvs->rx_dropped_header_too_small =
491                 port_stats->rx_dropped_header_too_small;
492         drvs->rx_input_fifo_overflow_drop =
493                 port_stats->rx_input_fifo_overflow_drop;
494         drvs->rx_address_filtered = port_stats->rx_address_filtered;
495         drvs->rx_alignment_symbol_errors =
496                 port_stats->rx_alignment_symbol_errors;
497         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
498         drvs->tx_pauseframes = port_stats->tx_pauseframes;
499         drvs->tx_controlframes = port_stats->tx_controlframes;
500         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
501         drvs->jabber_events = port_stats->jabber_events;
502         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
503         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
504         drvs->forwarded_packets = rxf_stats->forwarded_packets;
505         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
506         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
507         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
508         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
509 }
510
511 static void populate_be_v2_stats(struct be_adapter *adapter)
512 {
513         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
514         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
515         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
516         struct be_port_rxf_stats_v2 *port_stats =
517                                         &rxf_stats->port[adapter->port_num];
518         struct be_drv_stats *drvs = &adapter->drv_stats;
519
520         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
521         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
522         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
523         drvs->rx_pause_frames = port_stats->rx_pause_frames;
524         drvs->rx_crc_errors = port_stats->rx_crc_errors;
525         drvs->rx_control_frames = port_stats->rx_control_frames;
526         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
527         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
528         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
529         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
530         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
531         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
532         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
533         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
534         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
535         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
536         drvs->rx_dropped_header_too_small =
537                 port_stats->rx_dropped_header_too_small;
538         drvs->rx_input_fifo_overflow_drop =
539                 port_stats->rx_input_fifo_overflow_drop;
540         drvs->rx_address_filtered = port_stats->rx_address_filtered;
541         drvs->rx_alignment_symbol_errors =
542                 port_stats->rx_alignment_symbol_errors;
543         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
544         drvs->tx_pauseframes = port_stats->tx_pauseframes;
545         drvs->tx_controlframes = port_stats->tx_controlframes;
546         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
547         drvs->jabber_events = port_stats->jabber_events;
548         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
549         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
550         drvs->forwarded_packets = rxf_stats->forwarded_packets;
551         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
552         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
553         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
554         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
555         if (be_roce_supported(adapter)) {
556                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
557                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
558                 drvs->rx_roce_frames = port_stats->roce_frames_received;
559                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
560                 drvs->roce_drops_payload_len =
561                         port_stats->roce_drops_payload_len;
562         }
563 }
564
565 static void populate_lancer_stats(struct be_adapter *adapter)
566 {
567         struct be_drv_stats *drvs = &adapter->drv_stats;
568         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
569
570         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
571         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
572         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
573         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
574         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
575         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
576         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
577         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
578         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
579         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
580         drvs->rx_dropped_tcp_length =
581                                 pport_stats->rx_dropped_invalid_tcp_length;
582         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
583         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
584         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
585         drvs->rx_dropped_header_too_small =
586                                 pport_stats->rx_dropped_header_too_small;
587         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588         drvs->rx_address_filtered =
589                                         pport_stats->rx_address_filtered +
590                                         pport_stats->rx_vlan_filtered;
591         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
592         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
594         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
595         drvs->jabber_events = pport_stats->rx_jabbers;
596         drvs->forwarded_packets = pport_stats->num_forwards_lo;
597         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
598         drvs->rx_drops_too_many_frags =
599                                 pport_stats->rx_drops_too_many_frags_lo;
600 }
601
602 static void accumulate_16bit_val(u32 *acc, u16 val)
603 {
604 #define lo(x)                   (x & 0xFFFF)
605 #define hi(x)                   (x & 0xFFFF0000)
606         bool wrapped = val < lo(*acc);
607         u32 newacc = hi(*acc) + val;
608
609         if (wrapped)
610                 newacc += 65536;
611         WRITE_ONCE(*acc, newacc);
612 }
613
614 static void populate_erx_stats(struct be_adapter *adapter,
615                                struct be_rx_obj *rxo, u32 erx_stat)
616 {
617         if (!BEx_chip(adapter))
618                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
619         else
620                 /* below erx HW counter can actually wrap around after
621                  * 65535. Driver accumulates a 32-bit value
622                  */
623                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
624                                      (u16)erx_stat);
625 }
626
627 void be_parse_stats(struct be_adapter *adapter)
628 {
629         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
630         struct be_rx_obj *rxo;
631         int i;
632         u32 erx_stat;
633
634         if (lancer_chip(adapter)) {
635                 populate_lancer_stats(adapter);
636         } else {
637                 if (BE2_chip(adapter))
638                         populate_be_v0_stats(adapter);
639                 else if (BE3_chip(adapter))
640                         /* for BE3 */
641                         populate_be_v1_stats(adapter);
642                 else
643                         populate_be_v2_stats(adapter);
644
645                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
646                 for_all_rx_queues(adapter, rxo, i) {
647                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
648                         populate_erx_stats(adapter, rxo, erx_stat);
649                 }
650         }
651 }
652
653 static void be_get_stats64(struct net_device *netdev,
654                            struct rtnl_link_stats64 *stats)
655 {
656         struct be_adapter *adapter = netdev_priv(netdev);
657         struct be_drv_stats *drvs = &adapter->drv_stats;
658         struct be_rx_obj *rxo;
659         struct be_tx_obj *txo;
660         u64 pkts, bytes;
661         unsigned int start;
662         int i;
663
664         for_all_rx_queues(adapter, rxo, i) {
665                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
666
667                 do {
668                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
669                         pkts = rx_stats(rxo)->rx_pkts;
670                         bytes = rx_stats(rxo)->rx_bytes;
671                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
672                 stats->rx_packets += pkts;
673                 stats->rx_bytes += bytes;
674                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
675                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
676                                         rx_stats(rxo)->rx_drops_no_frags;
677         }
678
679         for_all_tx_queues(adapter, txo, i) {
680                 const struct be_tx_stats *tx_stats = tx_stats(txo);
681
682                 do {
683                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
684                         pkts = tx_stats(txo)->tx_pkts;
685                         bytes = tx_stats(txo)->tx_bytes;
686                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
687                 stats->tx_packets += pkts;
688                 stats->tx_bytes += bytes;
689         }
690
691         /* bad pkts received */
692         stats->rx_errors = drvs->rx_crc_errors +
693                 drvs->rx_alignment_symbol_errors +
694                 drvs->rx_in_range_errors +
695                 drvs->rx_out_range_errors +
696                 drvs->rx_frame_too_long +
697                 drvs->rx_dropped_too_small +
698                 drvs->rx_dropped_too_short +
699                 drvs->rx_dropped_header_too_small +
700                 drvs->rx_dropped_tcp_length +
701                 drvs->rx_dropped_runt;
702
703         /* detailed rx errors */
704         stats->rx_length_errors = drvs->rx_in_range_errors +
705                 drvs->rx_out_range_errors +
706                 drvs->rx_frame_too_long;
707
708         stats->rx_crc_errors = drvs->rx_crc_errors;
709
710         /* frame alignment errors */
711         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
712
713         /* receiver fifo overrun */
714         /* drops_no_pbuf is no per i/f, it's per BE card */
715         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
716                                 drvs->rx_input_fifo_overflow_drop +
717                                 drvs->rx_drops_no_pbuf;
718 }
719
720 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
721 {
722         struct net_device *netdev = adapter->netdev;
723
724         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
725                 netif_carrier_off(netdev);
726                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
727         }
728
729         if (link_status)
730                 netif_carrier_on(netdev);
731         else
732                 netif_carrier_off(netdev);
733
734         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
735 }
736
737 static int be_gso_hdr_len(struct sk_buff *skb)
738 {
739         if (skb->encapsulation)
740                 return skb_inner_transport_offset(skb) +
741                        inner_tcp_hdrlen(skb);
742         return skb_transport_offset(skb) + tcp_hdrlen(skb);
743 }
744
745 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
746 {
747         struct be_tx_stats *stats = tx_stats(txo);
748         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
749         /* Account for headers which get duplicated in TSO pkt */
750         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
751
752         u64_stats_update_begin(&stats->sync);
753         stats->tx_reqs++;
754         stats->tx_bytes += skb->len + dup_hdr_len;
755         stats->tx_pkts += tx_pkts;
756         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
757                 stats->tx_vxlan_offload_pkts += tx_pkts;
758         u64_stats_update_end(&stats->sync);
759 }
760
761 /* Returns number of WRBs needed for the skb */
762 static u32 skb_wrb_cnt(struct sk_buff *skb)
763 {
764         /* +1 for the header wrb */
765         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
766 }
767
768 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
769 {
770         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
771         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
772         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
773         wrb->rsvd0 = 0;
774 }
775
776 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
777  * to avoid the swap and shift/mask operations in wrb_fill().
778  */
779 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
780 {
781         wrb->frag_pa_hi = 0;
782         wrb->frag_pa_lo = 0;
783         wrb->frag_len = 0;
784         wrb->rsvd0 = 0;
785 }
786
787 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
788                                      struct sk_buff *skb)
789 {
790         u8 vlan_prio;
791         u16 vlan_tag;
792
793         vlan_tag = skb_vlan_tag_get(skb);
794         vlan_prio = skb_vlan_tag_get_prio(skb);
795         /* If vlan priority provided by OS is NOT in available bmap */
796         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
797                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
798                                 adapter->recommended_prio_bits;
799
800         return vlan_tag;
801 }
802
803 /* Used only for IP tunnel packets */
804 static u16 skb_inner_ip_proto(struct sk_buff *skb)
805 {
806         return (inner_ip_hdr(skb)->version == 4) ?
807                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
808 }
809
810 static u16 skb_ip_proto(struct sk_buff *skb)
811 {
812         return (ip_hdr(skb)->version == 4) ?
813                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
814 }
815
816 static inline bool be_is_txq_full(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
819 }
820
821 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) < txo->q.len / 2;
824 }
825
826 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
829 }
830
831 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
832                                        struct sk_buff *skb,
833                                        struct be_wrb_params *wrb_params)
834 {
835         u16 proto;
836
837         if (skb_is_gso(skb)) {
838                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
839                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
840                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
841                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
842         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
843                 if (skb->encapsulation) {
844                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
845                         proto = skb_inner_ip_proto(skb);
846                 } else {
847                         proto = skb_ip_proto(skb);
848                 }
849                 if (proto == IPPROTO_TCP)
850                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
851                 else if (proto == IPPROTO_UDP)
852                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
853         }
854
855         if (skb_vlan_tag_present(skb)) {
856                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
857                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
858         }
859
860         BE_WRB_F_SET(wrb_params->features, CRC, 1);
861 }
862
863 static void wrb_fill_hdr(struct be_adapter *adapter,
864                          struct be_eth_hdr_wrb *hdr,
865                          struct be_wrb_params *wrb_params,
866                          struct sk_buff *skb)
867 {
868         memset(hdr, 0, sizeof(*hdr));
869
870         SET_TX_WRB_HDR_BITS(crc, hdr,
871                             BE_WRB_F_GET(wrb_params->features, CRC));
872         SET_TX_WRB_HDR_BITS(ipcs, hdr,
873                             BE_WRB_F_GET(wrb_params->features, IPCS));
874         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
875                             BE_WRB_F_GET(wrb_params->features, TCPCS));
876         SET_TX_WRB_HDR_BITS(udpcs, hdr,
877                             BE_WRB_F_GET(wrb_params->features, UDPCS));
878
879         SET_TX_WRB_HDR_BITS(lso, hdr,
880                             BE_WRB_F_GET(wrb_params->features, LSO));
881         SET_TX_WRB_HDR_BITS(lso6, hdr,
882                             BE_WRB_F_GET(wrb_params->features, LSO6));
883         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
884
885         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
886          * hack is not needed, the evt bit is set while ringing DB.
887          */
888         SET_TX_WRB_HDR_BITS(event, hdr,
889                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
890         SET_TX_WRB_HDR_BITS(vlan, hdr,
891                             BE_WRB_F_GET(wrb_params->features, VLAN));
892         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
893
894         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
895         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
896         SET_TX_WRB_HDR_BITS(mgmt, hdr,
897                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
898 }
899
900 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
901                           bool unmap_single)
902 {
903         dma_addr_t dma;
904         u32 frag_len = le32_to_cpu(wrb->frag_len);
905
906
907         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
908                 (u64)le32_to_cpu(wrb->frag_pa_lo);
909         if (frag_len) {
910                 if (unmap_single)
911                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
912                 else
913                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
914         }
915 }
916
917 /* Grab a WRB header for xmit */
918 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
919 {
920         u32 head = txo->q.head;
921
922         queue_head_inc(&txo->q);
923         return head;
924 }
925
926 /* Set up the WRB header for xmit */
927 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
928                                 struct be_tx_obj *txo,
929                                 struct be_wrb_params *wrb_params,
930                                 struct sk_buff *skb, u16 head)
931 {
932         u32 num_frags = skb_wrb_cnt(skb);
933         struct be_queue_info *txq = &txo->q;
934         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
935
936         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
937         be_dws_cpu_to_le(hdr, sizeof(*hdr));
938
939         BUG_ON(txo->sent_skb_list[head]);
940         txo->sent_skb_list[head] = skb;
941         txo->last_req_hdr = head;
942         atomic_add(num_frags, &txq->used);
943         txo->last_req_wrb_cnt = num_frags;
944         txo->pend_wrb_cnt += num_frags;
945 }
946
947 /* Setup a WRB fragment (buffer descriptor) for xmit */
948 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
949                                  int len)
950 {
951         struct be_eth_wrb *wrb;
952         struct be_queue_info *txq = &txo->q;
953
954         wrb = queue_head_node(txq);
955         wrb_fill(wrb, busaddr, len);
956         queue_head_inc(txq);
957 }
958
959 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
960  * was invoked. The producer index is restored to the previous packet and the
961  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
962  */
963 static void be_xmit_restore(struct be_adapter *adapter,
964                             struct be_tx_obj *txo, u32 head, bool map_single,
965                             u32 copied)
966 {
967         struct device *dev;
968         struct be_eth_wrb *wrb;
969         struct be_queue_info *txq = &txo->q;
970
971         dev = &adapter->pdev->dev;
972         txq->head = head;
973
974         /* skip the first wrb (hdr); it's not mapped */
975         queue_head_inc(txq);
976         while (copied) {
977                 wrb = queue_head_node(txq);
978                 unmap_tx_frag(dev, wrb, map_single);
979                 map_single = false;
980                 copied -= le32_to_cpu(wrb->frag_len);
981                 queue_head_inc(txq);
982         }
983
984         txq->head = head;
985 }
986
987 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
988  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
989  * of WRBs used up by the packet.
990  */
991 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
992                            struct sk_buff *skb,
993                            struct be_wrb_params *wrb_params)
994 {
995         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
996         struct device *dev = &adapter->pdev->dev;
997         bool map_single = false;
998         u32 head;
999         dma_addr_t busaddr;
1000         int len;
1001
1002         head = be_tx_get_wrb_hdr(txo);
1003
1004         if (skb->len > skb->data_len) {
1005                 len = skb_headlen(skb);
1006
1007                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008                 if (dma_mapping_error(dev, busaddr))
1009                         goto dma_err;
1010                 map_single = true;
1011                 be_tx_setup_wrb_frag(txo, busaddr, len);
1012                 copied += len;
1013         }
1014
1015         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017                 len = skb_frag_size(frag);
1018
1019                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020                 if (dma_mapping_error(dev, busaddr))
1021                         goto dma_err;
1022                 be_tx_setup_wrb_frag(txo, busaddr, len);
1023                 copied += len;
1024         }
1025
1026         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028         be_tx_stats_update(txo, skb);
1029         return wrb_cnt;
1030
1031 dma_err:
1032         adapter->drv_stats.dma_map_errors++;
1033         be_xmit_restore(adapter, txo, head, map_single, copied);
1034         return 0;
1035 }
1036
1037 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038 {
1039         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040 }
1041
1042 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043                                              struct sk_buff *skb,
1044                                              struct be_wrb_params
1045                                              *wrb_params)
1046 {
1047         bool insert_vlan = false;
1048         u16 vlan_tag = 0;
1049
1050         skb = skb_share_check(skb, GFP_ATOMIC);
1051         if (unlikely(!skb))
1052                 return skb;
1053
1054         if (skb_vlan_tag_present(skb)) {
1055                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056                 insert_vlan = true;
1057         }
1058
1059         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060                 if (!insert_vlan) {
1061                         vlan_tag = adapter->pvid;
1062                         insert_vlan = true;
1063                 }
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (insert_vlan) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 __vlan_hwaccel_clear_tag(skb);
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1269
1270 #define is_arp_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1272
1273 #define is_dhcp_client_filt_enabled(adapter)    \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1275
1276 #define is_dhcp_srvr_filt_enabled(adapter)      \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1278
1279 #define is_nbios_filt_enabled(adapter)  \
1280                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1281
1282 #define is_ipv6_na_filt_enabled(adapter)        \
1283                 (adapter->bmc_filt_mask &       \
1284                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1285
1286 #define is_ipv6_ra_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1288
1289 #define is_ipv6_ras_filt_enabled(adapter)       \
1290                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1291
1292 #define is_broadcast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1294
1295 #define is_multicast_filt_enabled(adapter)      \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1297
1298 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1299                                struct sk_buff **skb)
1300 {
1301         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1302         bool os2bmc = false;
1303
1304         if (!be_is_os2bmc_enabled(adapter))
1305                 goto done;
1306
1307         if (!is_multicast_ether_addr(eh->h_dest))
1308                 goto done;
1309
1310         if (is_mc_allowed_on_bmc(adapter, eh) ||
1311             is_bc_allowed_on_bmc(adapter, eh) ||
1312             is_arp_allowed_on_bmc(adapter, (*skb))) {
1313                 os2bmc = true;
1314                 goto done;
1315         }
1316
1317         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1318                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1319                 u8 nexthdr = hdr->nexthdr;
1320
1321                 if (nexthdr == IPPROTO_ICMPV6) {
1322                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1323
1324                         switch (icmp6->icmp6_type) {
1325                         case NDISC_ROUTER_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1327                                 goto done;
1328                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1329                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1330                                 goto done;
1331                         default:
1332                                 break;
1333                         }
1334                 }
1335         }
1336
1337         if (is_udp_pkt((*skb))) {
1338                 struct udphdr *udp = udp_hdr((*skb));
1339
1340                 switch (ntohs(udp->dest)) {
1341                 case DHCP_CLIENT_PORT:
1342                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1343                         goto done;
1344                 case DHCP_SERVER_PORT:
1345                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1346                         goto done;
1347                 case NET_BIOS_PORT1:
1348                 case NET_BIOS_PORT2:
1349                         os2bmc = is_nbios_filt_enabled(adapter);
1350                         goto done;
1351                 case DHCPV6_RAS_PORT:
1352                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1353                         goto done;
1354                 default:
1355                         break;
1356                 }
1357         }
1358 done:
1359         /* For packets over a vlan, which are destined
1360          * to BMC, asic expects the vlan to be inline in the packet.
1361          */
1362         if (os2bmc)
1363                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1364
1365         return os2bmc;
1366 }
1367
1368 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1369 {
1370         struct be_adapter *adapter = netdev_priv(netdev);
1371         u16 q_idx = skb_get_queue_mapping(skb);
1372         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1373         struct be_wrb_params wrb_params = { 0 };
1374         bool flush = !netdev_xmit_more();
1375         u16 wrb_cnt;
1376
1377         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1378         if (unlikely(!skb))
1379                 goto drop;
1380
1381         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1382
1383         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1384         if (unlikely(!wrb_cnt)) {
1385                 dev_kfree_skb_any(skb);
1386                 goto drop;
1387         }
1388
1389         /* if os2bmc is enabled and if the pkt is destined to bmc,
1390          * enqueue the pkt a 2nd time with mgmt bit set.
1391          */
1392         if (be_send_pkt_to_bmc(adapter, &skb)) {
1393                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1394                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1395                 if (unlikely(!wrb_cnt))
1396                         goto drop;
1397                 else
1398                         skb_get(skb);
1399         }
1400
1401         if (be_is_txq_full(txo)) {
1402                 netif_stop_subqueue(netdev, q_idx);
1403                 tx_stats(txo)->tx_stops++;
1404         }
1405
1406         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1407                 be_xmit_flush(adapter, txo);
1408
1409         return NETDEV_TX_OK;
1410 drop:
1411         tx_stats(txo)->tx_drv_drops++;
1412         /* Flush the already enqueued tx requests */
1413         if (flush && txo->pend_wrb_cnt)
1414                 be_xmit_flush(adapter, txo);
1415
1416         return NETDEV_TX_OK;
1417 }
1418
1419 static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1420 {
1421         struct be_adapter *adapter = netdev_priv(netdev);
1422         struct device *dev = &adapter->pdev->dev;
1423         struct be_tx_obj *txo;
1424         struct sk_buff *skb;
1425         struct tcphdr *tcphdr;
1426         struct udphdr *udphdr;
1427         u32 *entry;
1428         int status;
1429         int i, j;
1430
1431         for_all_tx_queues(adapter, txo, i) {
1432                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1433                          i, txo->q.head, txo->q.tail,
1434                          atomic_read(&txo->q.used), txo->q.id);
1435
1436                 entry = txo->q.dma_mem.va;
1437                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1438                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1439                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1440                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1441                                          j, entry[j], entry[j + 1],
1442                                          entry[j + 2], entry[j + 3]);
1443                         }
1444                 }
1445
1446                 entry = txo->cq.dma_mem.va;
1447                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1448                          i, txo->cq.head, txo->cq.tail,
1449                          atomic_read(&txo->cq.used));
1450                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1451                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1452                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1453                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1454                                          j, entry[j], entry[j + 1],
1455                                          entry[j + 2], entry[j + 3]);
1456                         }
1457                 }
1458
1459                 for (j = 0; j < TX_Q_LEN; j++) {
1460                         if (txo->sent_skb_list[j]) {
1461                                 skb = txo->sent_skb_list[j];
1462                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1463                                         tcphdr = tcp_hdr(skb);
1464                                         dev_info(dev, "TCP source port %d\n",
1465                                                  ntohs(tcphdr->source));
1466                                         dev_info(dev, "TCP dest port %d\n",
1467                                                  ntohs(tcphdr->dest));
1468                                         dev_info(dev, "TCP sequence num %d\n",
1469                                                  ntohs(tcphdr->seq));
1470                                         dev_info(dev, "TCP ack_seq %d\n",
1471                                                  ntohs(tcphdr->ack_seq));
1472                                 } else if (ip_hdr(skb)->protocol ==
1473                                            IPPROTO_UDP) {
1474                                         udphdr = udp_hdr(skb);
1475                                         dev_info(dev, "UDP source port %d\n",
1476                                                  ntohs(udphdr->source));
1477                                         dev_info(dev, "UDP dest port %d\n",
1478                                                  ntohs(udphdr->dest));
1479                                 }
1480                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1481                                          j, skb, skb->len, skb->protocol);
1482                         }
1483                 }
1484         }
1485
1486         if (lancer_chip(adapter)) {
1487                 dev_info(dev, "Initiating reset due to tx timeout\n");
1488                 dev_info(dev, "Resetting adapter\n");
1489                 status = lancer_physdev_ctrl(adapter,
1490                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1491                 if (status)
1492                         dev_err(dev, "Reset failed .. Reboot server\n");
1493         }
1494 }
1495
1496 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1497 {
1498         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1499                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1500 }
1501
1502 static int be_set_vlan_promisc(struct be_adapter *adapter)
1503 {
1504         struct device *dev = &adapter->pdev->dev;
1505         int status;
1506
1507         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1508                 return 0;
1509
1510         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1511         if (!status) {
1512                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1513                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1514         } else {
1515                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1516         }
1517         return status;
1518 }
1519
1520 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1521 {
1522         struct device *dev = &adapter->pdev->dev;
1523         int status;
1524
1525         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1526         if (!status) {
1527                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1528                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1529         }
1530         return status;
1531 }
1532
1533 /*
1534  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1535  * If the user configures more, place BE in vlan promiscuous mode.
1536  */
1537 static int be_vid_config(struct be_adapter *adapter)
1538 {
1539         struct device *dev = &adapter->pdev->dev;
1540         u16 vids[BE_NUM_VLANS_SUPPORTED];
1541         u16 num = 0, i = 0;
1542         int status = 0;
1543
1544         /* No need to change the VLAN state if the I/F is in promiscuous */
1545         if (adapter->netdev->flags & IFF_PROMISC)
1546                 return 0;
1547
1548         if (adapter->vlans_added > be_max_vlans(adapter))
1549                 return be_set_vlan_promisc(adapter);
1550
1551         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1552                 status = be_clear_vlan_promisc(adapter);
1553                 if (status)
1554                         return status;
1555         }
1556         /* Construct VLAN Table to give to HW */
1557         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1558                 vids[num++] = cpu_to_le16(i);
1559
1560         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1561         if (status) {
1562                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1563                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1564                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1565                     addl_status(status) ==
1566                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1567                         return be_set_vlan_promisc(adapter);
1568         }
1569         return status;
1570 }
1571
1572 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1573 {
1574         struct be_adapter *adapter = netdev_priv(netdev);
1575         int status = 0;
1576
1577         mutex_lock(&adapter->rx_filter_lock);
1578
1579         /* Packets with VID 0 are always received by Lancer by default */
1580         if (lancer_chip(adapter) && vid == 0)
1581                 goto done;
1582
1583         if (test_bit(vid, adapter->vids))
1584                 goto done;
1585
1586         set_bit(vid, adapter->vids);
1587         adapter->vlans_added++;
1588
1589         status = be_vid_config(adapter);
1590 done:
1591         mutex_unlock(&adapter->rx_filter_lock);
1592         return status;
1593 }
1594
1595 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598         int status = 0;
1599
1600         mutex_lock(&adapter->rx_filter_lock);
1601
1602         /* Packets with VID 0 are always received by Lancer by default */
1603         if (lancer_chip(adapter) && vid == 0)
1604                 goto done;
1605
1606         if (!test_bit(vid, adapter->vids))
1607                 goto done;
1608
1609         clear_bit(vid, adapter->vids);
1610         adapter->vlans_added--;
1611
1612         status = be_vid_config(adapter);
1613 done:
1614         mutex_unlock(&adapter->rx_filter_lock);
1615         return status;
1616 }
1617
1618 static void be_set_all_promisc(struct be_adapter *adapter)
1619 {
1620         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1621         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1622 }
1623
1624 static void be_set_mc_promisc(struct be_adapter *adapter)
1625 {
1626         int status;
1627
1628         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1629                 return;
1630
1631         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1632         if (!status)
1633                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1634 }
1635
1636 static void be_set_uc_promisc(struct be_adapter *adapter)
1637 {
1638         int status;
1639
1640         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1641                 return;
1642
1643         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1644         if (!status)
1645                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1646 }
1647
1648 static void be_clear_uc_promisc(struct be_adapter *adapter)
1649 {
1650         int status;
1651
1652         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1653                 return;
1654
1655         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1656         if (!status)
1657                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1658 }
1659
1660 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1661  * We use a single callback function for both sync and unsync. We really don't
1662  * add/remove addresses through this callback. But, we use it to detect changes
1663  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1664  */
1665 static int be_uc_list_update(struct net_device *netdev,
1666                              const unsigned char *addr)
1667 {
1668         struct be_adapter *adapter = netdev_priv(netdev);
1669
1670         adapter->update_uc_list = true;
1671         return 0;
1672 }
1673
1674 static int be_mc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_mc_list = true;
1680         return 0;
1681 }
1682
1683 static void be_set_mc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool mc_promisc = false;
1688         int status;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_mc_list = false;
1695         } else if (netdev->flags & IFF_ALLMULTI ||
1696                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1697                 /* Enable multicast promisc if num configured exceeds
1698                  * what we support
1699                  */
1700                 mc_promisc = true;
1701                 adapter->update_mc_list = false;
1702         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1703                 /* Update mc-list unconditionally if the iface was previously
1704                  * in mc-promisc mode and now is out of that mode.
1705                  */
1706                 adapter->update_mc_list = true;
1707         }
1708
1709         if (adapter->update_mc_list) {
1710                 int i = 0;
1711
1712                 /* cache the mc-list in adapter */
1713                 netdev_for_each_mc_addr(ha, netdev) {
1714                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1715                         i++;
1716                 }
1717                 adapter->mc_count = netdev_mc_count(netdev);
1718         }
1719         netif_addr_unlock_bh(netdev);
1720
1721         if (mc_promisc) {
1722                 be_set_mc_promisc(adapter);
1723         } else if (adapter->update_mc_list) {
1724                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1725                 if (!status)
1726                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1727                 else
1728                         be_set_mc_promisc(adapter);
1729
1730                 adapter->update_mc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_mc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737
1738         __dev_mc_unsync(netdev, NULL);
1739         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1740         adapter->mc_count = 0;
1741 }
1742
1743 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1744 {
1745         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1746                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1747                 return 0;
1748         }
1749
1750         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1751                                adapter->if_handle,
1752                                &adapter->pmac_id[uc_idx + 1], 0);
1753 }
1754
1755 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1756 {
1757         if (pmac_id == adapter->pmac_id[0])
1758                 return;
1759
1760         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1761 }
1762
1763 static void be_set_uc_list(struct be_adapter *adapter)
1764 {
1765         struct net_device *netdev = adapter->netdev;
1766         struct netdev_hw_addr *ha;
1767         bool uc_promisc = false;
1768         int curr_uc_macs = 0, i;
1769
1770         netif_addr_lock_bh(netdev);
1771         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1772
1773         if (netdev->flags & IFF_PROMISC) {
1774                 adapter->update_uc_list = false;
1775         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1776                 uc_promisc = true;
1777                 adapter->update_uc_list = false;
1778         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1779                 /* Update uc-list unconditionally if the iface was previously
1780                  * in uc-promisc mode and now is out of that mode.
1781                  */
1782                 adapter->update_uc_list = true;
1783         }
1784
1785         if (adapter->update_uc_list) {
1786                 /* cache the uc-list in adapter array */
1787                 i = 0;
1788                 netdev_for_each_uc_addr(ha, netdev) {
1789                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1790                         i++;
1791                 }
1792                 curr_uc_macs = netdev_uc_count(netdev);
1793         }
1794         netif_addr_unlock_bh(netdev);
1795
1796         if (uc_promisc) {
1797                 be_set_uc_promisc(adapter);
1798         } else if (adapter->update_uc_list) {
1799                 be_clear_uc_promisc(adapter);
1800
1801                 for (i = 0; i < adapter->uc_macs; i++)
1802                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1803
1804                 for (i = 0; i < curr_uc_macs; i++)
1805                         be_uc_mac_add(adapter, i);
1806                 adapter->uc_macs = curr_uc_macs;
1807                 adapter->update_uc_list = false;
1808         }
1809 }
1810
1811 static void be_clear_uc_list(struct be_adapter *adapter)
1812 {
1813         struct net_device *netdev = adapter->netdev;
1814         int i;
1815
1816         __dev_uc_unsync(netdev, NULL);
1817         for (i = 0; i < adapter->uc_macs; i++)
1818                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1819
1820         adapter->uc_macs = 0;
1821 }
1822
1823 static void __be_set_rx_mode(struct be_adapter *adapter)
1824 {
1825         struct net_device *netdev = adapter->netdev;
1826
1827         mutex_lock(&adapter->rx_filter_lock);
1828
1829         if (netdev->flags & IFF_PROMISC) {
1830                 if (!be_in_all_promisc(adapter))
1831                         be_set_all_promisc(adapter);
1832         } else if (be_in_all_promisc(adapter)) {
1833                 /* We need to re-program the vlan-list or clear
1834                  * vlan-promisc mode (if needed) when the interface
1835                  * comes out of promisc mode.
1836                  */
1837                 be_vid_config(adapter);
1838         }
1839
1840         be_set_uc_list(adapter);
1841         be_set_mc_list(adapter);
1842
1843         mutex_unlock(&adapter->rx_filter_lock);
1844 }
1845
1846 static void be_work_set_rx_mode(struct work_struct *work)
1847 {
1848         struct be_cmd_work *cmd_work =
1849                                 container_of(work, struct be_cmd_work, work);
1850
1851         __be_set_rx_mode(cmd_work->adapter);
1852         kfree(cmd_work);
1853 }
1854
1855 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1856 {
1857         struct be_adapter *adapter = netdev_priv(netdev);
1858         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1859         int status;
1860
1861         if (!sriov_enabled(adapter))
1862                 return -EPERM;
1863
1864         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1865                 return -EINVAL;
1866
1867         /* Proceed further only if user provided MAC is different
1868          * from active MAC
1869          */
1870         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1871                 return 0;
1872
1873         if (BEx_chip(adapter)) {
1874                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1875                                 vf + 1);
1876
1877                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1878                                          &vf_cfg->pmac_id, vf + 1);
1879         } else {
1880                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1881                                         vf + 1);
1882         }
1883
1884         if (status) {
1885                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1886                         mac, vf, status);
1887                 return be_cmd_status(status);
1888         }
1889
1890         ether_addr_copy(vf_cfg->mac_addr, mac);
1891
1892         return 0;
1893 }
1894
1895 static int be_get_vf_config(struct net_device *netdev, int vf,
1896                             struct ifla_vf_info *vi)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900
1901         if (!sriov_enabled(adapter))
1902                 return -EPERM;
1903
1904         if (vf >= adapter->num_vfs)
1905                 return -EINVAL;
1906
1907         vi->vf = vf;
1908         vi->max_tx_rate = vf_cfg->tx_rate;
1909         vi->min_tx_rate = 0;
1910         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1911         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1912         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1913         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1914         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1915
1916         return 0;
1917 }
1918
1919 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1920 {
1921         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1922         u16 vids[BE_NUM_VLANS_SUPPORTED];
1923         int vf_if_id = vf_cfg->if_handle;
1924         int status;
1925
1926         /* Enable Transparent VLAN Tagging */
1927         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1928         if (status)
1929                 return status;
1930
1931         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1932         vids[0] = 0;
1933         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1934         if (!status)
1935                 dev_info(&adapter->pdev->dev,
1936                          "Cleared guest VLANs on VF%d", vf);
1937
1938         /* After TVT is enabled, disallow VFs to program VLAN filters */
1939         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1940                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1941                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1942                 if (!status)
1943                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1944         }
1945         return 0;
1946 }
1947
1948 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1949 {
1950         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1951         struct device *dev = &adapter->pdev->dev;
1952         int status;
1953
1954         /* Reset Transparent VLAN Tagging. */
1955         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1956                                        vf_cfg->if_handle, 0, 0);
1957         if (status)
1958                 return status;
1959
1960         /* Allow VFs to program VLAN filtering */
1961         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1962                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1963                                                   BE_PRIV_FILTMGMT, vf + 1);
1964                 if (!status) {
1965                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1966                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1967                 }
1968         }
1969
1970         dev_info(dev,
1971                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1972         return 0;
1973 }
1974
1975 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1976                           __be16 vlan_proto)
1977 {
1978         struct be_adapter *adapter = netdev_priv(netdev);
1979         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1980         int status;
1981
1982         if (!sriov_enabled(adapter))
1983                 return -EPERM;
1984
1985         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1986                 return -EINVAL;
1987
1988         if (vlan_proto != htons(ETH_P_8021Q))
1989                 return -EPROTONOSUPPORT;
1990
1991         if (vlan || qos) {
1992                 vlan |= qos << VLAN_PRIO_SHIFT;
1993                 status = be_set_vf_tvt(adapter, vf, vlan);
1994         } else {
1995                 status = be_clear_vf_tvt(adapter, vf);
1996         }
1997
1998         if (status) {
1999                 dev_err(&adapter->pdev->dev,
2000                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2001                         status);
2002                 return be_cmd_status(status);
2003         }
2004
2005         vf_cfg->vlan_tag = vlan;
2006         return 0;
2007 }
2008
2009 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2010                              int min_tx_rate, int max_tx_rate)
2011 {
2012         struct be_adapter *adapter = netdev_priv(netdev);
2013         struct device *dev = &adapter->pdev->dev;
2014         int percent_rate, status = 0;
2015         u16 link_speed = 0;
2016         u8 link_status;
2017
2018         if (!sriov_enabled(adapter))
2019                 return -EPERM;
2020
2021         if (vf >= adapter->num_vfs)
2022                 return -EINVAL;
2023
2024         if (min_tx_rate)
2025                 return -EINVAL;
2026
2027         if (!max_tx_rate)
2028                 goto config_qos;
2029
2030         status = be_cmd_link_status_query(adapter, &link_speed,
2031                                           &link_status, 0);
2032         if (status)
2033                 goto err;
2034
2035         if (!link_status) {
2036                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2037                 status = -ENETDOWN;
2038                 goto err;
2039         }
2040
2041         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2042                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2043                         link_speed);
2044                 status = -EINVAL;
2045                 goto err;
2046         }
2047
2048         /* On Skyhawk the QOS setting must be done only as a % value */
2049         percent_rate = link_speed / 100;
2050         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2051                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2052                         percent_rate);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057 config_qos:
2058         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2059         if (status)
2060                 goto err;
2061
2062         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2063         return 0;
2064
2065 err:
2066         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2067                 max_tx_rate, vf);
2068         return be_cmd_status(status);
2069 }
2070
2071 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2072                                 int link_state)
2073 {
2074         struct be_adapter *adapter = netdev_priv(netdev);
2075         int status;
2076
2077         if (!sriov_enabled(adapter))
2078                 return -EPERM;
2079
2080         if (vf >= adapter->num_vfs)
2081                 return -EINVAL;
2082
2083         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2084         if (status) {
2085                 dev_err(&adapter->pdev->dev,
2086                         "Link state change on VF %d failed: %#x\n", vf, status);
2087                 return be_cmd_status(status);
2088         }
2089
2090         adapter->vf_cfg[vf].plink_tracking = link_state;
2091
2092         return 0;
2093 }
2094
2095 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2096 {
2097         struct be_adapter *adapter = netdev_priv(netdev);
2098         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2099         u8 spoofchk;
2100         int status;
2101
2102         if (!sriov_enabled(adapter))
2103                 return -EPERM;
2104
2105         if (vf >= adapter->num_vfs)
2106                 return -EINVAL;
2107
2108         if (BEx_chip(adapter))
2109                 return -EOPNOTSUPP;
2110
2111         if (enable == vf_cfg->spoofchk)
2112                 return 0;
2113
2114         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2115
2116         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2117                                        0, spoofchk);
2118         if (status) {
2119                 dev_err(&adapter->pdev->dev,
2120                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2121                 return be_cmd_status(status);
2122         }
2123
2124         vf_cfg->spoofchk = enable;
2125         return 0;
2126 }
2127
2128 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2129                           ulong now)
2130 {
2131         aic->rx_pkts_prev = rx_pkts;
2132         aic->tx_reqs_prev = tx_pkts;
2133         aic->jiffies = now;
2134 }
2135
2136 static int be_get_new_eqd(struct be_eq_obj *eqo)
2137 {
2138         struct be_adapter *adapter = eqo->adapter;
2139         int eqd, start;
2140         struct be_aic_obj *aic;
2141         struct be_rx_obj *rxo;
2142         struct be_tx_obj *txo;
2143         u64 rx_pkts = 0, tx_pkts = 0;
2144         ulong now;
2145         u32 pps, delta;
2146         int i;
2147
2148         aic = &adapter->aic_obj[eqo->idx];
2149         if (!adapter->aic_enabled) {
2150                 if (aic->jiffies)
2151                         aic->jiffies = 0;
2152                 eqd = aic->et_eqd;
2153                 return eqd;
2154         }
2155
2156         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2157                 do {
2158                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2159                         rx_pkts += rxo->stats.rx_pkts;
2160                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2161         }
2162
2163         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2164                 do {
2165                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2166                         tx_pkts += txo->stats.tx_reqs;
2167                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2168         }
2169
2170         /* Skip, if wrapped around or first calculation */
2171         now = jiffies;
2172         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2173             rx_pkts < aic->rx_pkts_prev ||
2174             tx_pkts < aic->tx_reqs_prev) {
2175                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2176                 return aic->prev_eqd;
2177         }
2178
2179         delta = jiffies_to_msecs(now - aic->jiffies);
2180         if (delta == 0)
2181                 return aic->prev_eqd;
2182
2183         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2184                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2185         eqd = (pps / 15000) << 2;
2186
2187         if (eqd < 8)
2188                 eqd = 0;
2189         eqd = min_t(u32, eqd, aic->max_eqd);
2190         eqd = max_t(u32, eqd, aic->min_eqd);
2191
2192         be_aic_update(aic, rx_pkts, tx_pkts, now);
2193
2194         return eqd;
2195 }
2196
2197 /* For Skyhawk-R only */
2198 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2199 {
2200         struct be_adapter *adapter = eqo->adapter;
2201         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2202         ulong now = jiffies;
2203         int eqd;
2204         u32 mult_enc;
2205
2206         if (!adapter->aic_enabled)
2207                 return 0;
2208
2209         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2210                 eqd = aic->prev_eqd;
2211         else
2212                 eqd = be_get_new_eqd(eqo);
2213
2214         if (eqd > 100)
2215                 mult_enc = R2I_DLY_ENC_1;
2216         else if (eqd > 60)
2217                 mult_enc = R2I_DLY_ENC_2;
2218         else if (eqd > 20)
2219                 mult_enc = R2I_DLY_ENC_3;
2220         else
2221                 mult_enc = R2I_DLY_ENC_0;
2222
2223         aic->prev_eqd = eqd;
2224
2225         return mult_enc;
2226 }
2227
2228 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2229 {
2230         struct be_set_eqd set_eqd[MAX_EVT_QS];
2231         struct be_aic_obj *aic;
2232         struct be_eq_obj *eqo;
2233         int i, num = 0, eqd;
2234
2235         for_all_evt_queues(adapter, eqo, i) {
2236                 aic = &adapter->aic_obj[eqo->idx];
2237                 eqd = be_get_new_eqd(eqo);
2238                 if (force_update || eqd != aic->prev_eqd) {
2239                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2240                         set_eqd[num].eq_id = eqo->q.id;
2241                         aic->prev_eqd = eqd;
2242                         num++;
2243                 }
2244         }
2245
2246         if (num)
2247                 be_cmd_modify_eqd(adapter, set_eqd, num);
2248 }
2249
2250 static void be_rx_stats_update(struct be_rx_obj *rxo,
2251                                struct be_rx_compl_info *rxcp)
2252 {
2253         struct be_rx_stats *stats = rx_stats(rxo);
2254
2255         u64_stats_update_begin(&stats->sync);
2256         stats->rx_compl++;
2257         stats->rx_bytes += rxcp->pkt_size;
2258         stats->rx_pkts++;
2259         if (rxcp->tunneled)
2260                 stats->rx_vxlan_offload_pkts++;
2261         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2262                 stats->rx_mcast_pkts++;
2263         if (rxcp->err)
2264                 stats->rx_compl_err++;
2265         u64_stats_update_end(&stats->sync);
2266 }
2267
2268 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2269 {
2270         /* L4 checksum is not reliable for non TCP/UDP packets.
2271          * Also ignore ipcksm for ipv6 pkts
2272          */
2273         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2274                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2275 }
2276
2277 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2278 {
2279         struct be_adapter *adapter = rxo->adapter;
2280         struct be_rx_page_info *rx_page_info;
2281         struct be_queue_info *rxq = &rxo->q;
2282         u32 frag_idx = rxq->tail;
2283
2284         rx_page_info = &rxo->page_info_tbl[frag_idx];
2285         BUG_ON(!rx_page_info->page);
2286
2287         if (rx_page_info->last_frag) {
2288                 dma_unmap_page(&adapter->pdev->dev,
2289                                dma_unmap_addr(rx_page_info, bus),
2290                                adapter->big_page_size, DMA_FROM_DEVICE);
2291                 rx_page_info->last_frag = false;
2292         } else {
2293                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2294                                         dma_unmap_addr(rx_page_info, bus),
2295                                         rx_frag_size, DMA_FROM_DEVICE);
2296         }
2297
2298         queue_tail_inc(rxq);
2299         atomic_dec(&rxq->used);
2300         return rx_page_info;
2301 }
2302
2303 /* Throwaway the data in the Rx completion */
2304 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2305                                 struct be_rx_compl_info *rxcp)
2306 {
2307         struct be_rx_page_info *page_info;
2308         u16 i, num_rcvd = rxcp->num_rcvd;
2309
2310         for (i = 0; i < num_rcvd; i++) {
2311                 page_info = get_rx_page_info(rxo);
2312                 put_page(page_info->page);
2313                 memset(page_info, 0, sizeof(*page_info));
2314         }
2315 }
2316
2317 /*
2318  * skb_fill_rx_data forms a complete skb for an ether frame
2319  * indicated by rxcp.
2320  */
2321 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2322                              struct be_rx_compl_info *rxcp)
2323 {
2324         struct be_rx_page_info *page_info;
2325         u16 i, j;
2326         u16 hdr_len, curr_frag_len, remaining;
2327         u8 *start;
2328
2329         page_info = get_rx_page_info(rxo);
2330         start = page_address(page_info->page) + page_info->page_offset;
2331         prefetch(start);
2332
2333         /* Copy data in the first descriptor of this completion */
2334         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2335
2336         skb->len = curr_frag_len;
2337         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2338                 memcpy(skb->data, start, curr_frag_len);
2339                 /* Complete packet has now been moved to data */
2340                 put_page(page_info->page);
2341                 skb->data_len = 0;
2342                 skb->tail += curr_frag_len;
2343         } else {
2344                 hdr_len = ETH_HLEN;
2345                 memcpy(skb->data, start, hdr_len);
2346                 skb_shinfo(skb)->nr_frags = 1;
2347                 skb_frag_set_page(skb, 0, page_info->page);
2348                 skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2349                                  page_info->page_offset + hdr_len);
2350                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2351                                   curr_frag_len - hdr_len);
2352                 skb->data_len = curr_frag_len - hdr_len;
2353                 skb->truesize += rx_frag_size;
2354                 skb->tail += hdr_len;
2355         }
2356         page_info->page = NULL;
2357
2358         if (rxcp->pkt_size <= rx_frag_size) {
2359                 BUG_ON(rxcp->num_rcvd != 1);
2360                 return;
2361         }
2362
2363         /* More frags present for this completion */
2364         remaining = rxcp->pkt_size - curr_frag_len;
2365         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2366                 page_info = get_rx_page_info(rxo);
2367                 curr_frag_len = min(remaining, rx_frag_size);
2368
2369                 /* Coalesce all frags from the same physical page in one slot */
2370                 if (page_info->page_offset == 0) {
2371                         /* Fresh page */
2372                         j++;
2373                         skb_frag_set_page(skb, j, page_info->page);
2374                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2375                                          page_info->page_offset);
2376                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2377                         skb_shinfo(skb)->nr_frags++;
2378                 } else {
2379                         put_page(page_info->page);
2380                 }
2381
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->len += curr_frag_len;
2384                 skb->data_len += curr_frag_len;
2385                 skb->truesize += rx_frag_size;
2386                 remaining -= curr_frag_len;
2387                 page_info->page = NULL;
2388         }
2389         BUG_ON(j > MAX_SKB_FRAGS);
2390 }
2391
2392 /* Process the RX completion indicated by rxcp when GRO is disabled */
2393 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2394                                 struct be_rx_compl_info *rxcp)
2395 {
2396         struct be_adapter *adapter = rxo->adapter;
2397         struct net_device *netdev = adapter->netdev;
2398         struct sk_buff *skb;
2399
2400         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2401         if (unlikely(!skb)) {
2402                 rx_stats(rxo)->rx_drops_no_skbs++;
2403                 be_rx_compl_discard(rxo, rxcp);
2404                 return;
2405         }
2406
2407         skb_fill_rx_data(rxo, skb, rxcp);
2408
2409         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2410                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2411         else
2412                 skb_checksum_none_assert(skb);
2413
2414         skb->protocol = eth_type_trans(skb, netdev);
2415         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2416         if (netdev->features & NETIF_F_RXHASH)
2417                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2418
2419         skb->csum_level = rxcp->tunneled;
2420         skb_mark_napi_id(skb, napi);
2421
2422         if (rxcp->vlanf)
2423                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2424
2425         netif_receive_skb(skb);
2426 }
2427
2428 /* Process the RX completion indicated by rxcp when GRO is enabled */
2429 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2430                                     struct napi_struct *napi,
2431                                     struct be_rx_compl_info *rxcp)
2432 {
2433         struct be_adapter *adapter = rxo->adapter;
2434         struct be_rx_page_info *page_info;
2435         struct sk_buff *skb = NULL;
2436         u16 remaining, curr_frag_len;
2437         u16 i, j;
2438
2439         skb = napi_get_frags(napi);
2440         if (!skb) {
2441                 be_rx_compl_discard(rxo, rxcp);
2442                 return;
2443         }
2444
2445         remaining = rxcp->pkt_size;
2446         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2447                 page_info = get_rx_page_info(rxo);
2448
2449                 curr_frag_len = min(remaining, rx_frag_size);
2450
2451                 /* Coalesce all frags from the same physical page in one slot */
2452                 if (i == 0 || page_info->page_offset == 0) {
2453                         /* First frag or Fresh page */
2454                         j++;
2455                         skb_frag_set_page(skb, j, page_info->page);
2456                         skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2457                                          page_info->page_offset);
2458                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2459                 } else {
2460                         put_page(page_info->page);
2461                 }
2462                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2463                 skb->truesize += rx_frag_size;
2464                 remaining -= curr_frag_len;
2465                 memset(page_info, 0, sizeof(*page_info));
2466         }
2467         BUG_ON(j > MAX_SKB_FRAGS);
2468
2469         skb_shinfo(skb)->nr_frags = j + 1;
2470         skb->len = rxcp->pkt_size;
2471         skb->data_len = rxcp->pkt_size;
2472         skb->ip_summed = CHECKSUM_UNNECESSARY;
2473         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2474         if (adapter->netdev->features & NETIF_F_RXHASH)
2475                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2476
2477         skb->csum_level = rxcp->tunneled;
2478
2479         if (rxcp->vlanf)
2480                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2481
2482         napi_gro_frags(napi);
2483 }
2484
2485 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2486                                  struct be_rx_compl_info *rxcp)
2487 {
2488         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2489         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2490         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2491         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2492         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2493         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2494         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2495         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2496         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2497         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2498         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2499         if (rxcp->vlanf) {
2500                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2501                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2502         }
2503         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2504         rxcp->tunneled =
2505                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2506 }
2507
2508 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2509                                  struct be_rx_compl_info *rxcp)
2510 {
2511         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2512         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2513         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2514         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2515         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2516         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2517         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2518         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2519         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2520         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2521         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2522         if (rxcp->vlanf) {
2523                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2524                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2525         }
2526         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2527         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2528 }
2529
2530 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2531 {
2532         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2533         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2534         struct be_adapter *adapter = rxo->adapter;
2535
2536         /* For checking the valid bit it is Ok to use either definition as the
2537          * valid bit is at the same position in both v0 and v1 Rx compl */
2538         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2539                 return NULL;
2540
2541         rmb();
2542         be_dws_le_to_cpu(compl, sizeof(*compl));
2543
2544         if (adapter->be3_native)
2545                 be_parse_rx_compl_v1(compl, rxcp);
2546         else
2547                 be_parse_rx_compl_v0(compl, rxcp);
2548
2549         if (rxcp->ip_frag)
2550                 rxcp->l4_csum = 0;
2551
2552         if (rxcp->vlanf) {
2553                 /* In QNQ modes, if qnq bit is not set, then the packet was
2554                  * tagged only with the transparent outer vlan-tag and must
2555                  * not be treated as a vlan packet by host
2556                  */
2557                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2558                         rxcp->vlanf = 0;
2559
2560                 if (!lancer_chip(adapter))
2561                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2562
2563                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2564                     !test_bit(rxcp->vlan_tag, adapter->vids))
2565                         rxcp->vlanf = 0;
2566         }
2567
2568         /* As the compl has been parsed, reset it; we wont touch it again */
2569         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2570
2571         queue_tail_inc(&rxo->cq);
2572         return rxcp;
2573 }
2574
2575 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2576 {
2577         u32 order = get_order(size);
2578
2579         if (order > 0)
2580                 gfp |= __GFP_COMP;
2581         return  alloc_pages(gfp, order);
2582 }
2583
2584 /*
2585  * Allocate a page, split it to fragments of size rx_frag_size and post as
2586  * receive buffers to BE
2587  */
2588 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2589 {
2590         struct be_adapter *adapter = rxo->adapter;
2591         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2592         struct be_queue_info *rxq = &rxo->q;
2593         struct page *pagep = NULL;
2594         struct device *dev = &adapter->pdev->dev;
2595         struct be_eth_rx_d *rxd;
2596         u64 page_dmaaddr = 0, frag_dmaaddr;
2597         u32 posted, page_offset = 0, notify = 0;
2598
2599         page_info = &rxo->page_info_tbl[rxq->head];
2600         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2601                 if (!pagep) {
2602                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2603                         if (unlikely(!pagep)) {
2604                                 rx_stats(rxo)->rx_post_fail++;
2605                                 break;
2606                         }
2607                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2608                                                     adapter->big_page_size,
2609                                                     DMA_FROM_DEVICE);
2610                         if (dma_mapping_error(dev, page_dmaaddr)) {
2611                                 put_page(pagep);
2612                                 pagep = NULL;
2613                                 adapter->drv_stats.dma_map_errors++;
2614                                 break;
2615                         }
2616                         page_offset = 0;
2617                 } else {
2618                         get_page(pagep);
2619                         page_offset += rx_frag_size;
2620                 }
2621                 page_info->page_offset = page_offset;
2622                 page_info->page = pagep;
2623
2624                 rxd = queue_head_node(rxq);
2625                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2626                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2627                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2628
2629                 /* Any space left in the current big page for another frag? */
2630                 if ((page_offset + rx_frag_size + rx_frag_size) >
2631                                         adapter->big_page_size) {
2632                         pagep = NULL;
2633                         page_info->last_frag = true;
2634                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2635                 } else {
2636                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2637                 }
2638
2639                 prev_page_info = page_info;
2640                 queue_head_inc(rxq);
2641                 page_info = &rxo->page_info_tbl[rxq->head];
2642         }
2643
2644         /* Mark the last frag of a page when we break out of the above loop
2645          * with no more slots available in the RXQ
2646          */
2647         if (pagep) {
2648                 prev_page_info->last_frag = true;
2649                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2650         }
2651
2652         if (posted) {
2653                 atomic_add(posted, &rxq->used);
2654                 if (rxo->rx_post_starved)
2655                         rxo->rx_post_starved = false;
2656                 do {
2657                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2658                         be_rxq_notify(adapter, rxq->id, notify);
2659                         posted -= notify;
2660                 } while (posted);
2661         } else if (atomic_read(&rxq->used) == 0) {
2662                 /* Let be_worker replenish when memory is available */
2663                 rxo->rx_post_starved = true;
2664         }
2665 }
2666
2667 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2668 {
2669         switch (status) {
2670         case BE_TX_COMP_HDR_PARSE_ERR:
2671                 tx_stats(txo)->tx_hdr_parse_err++;
2672                 break;
2673         case BE_TX_COMP_NDMA_ERR:
2674                 tx_stats(txo)->tx_dma_err++;
2675                 break;
2676         case BE_TX_COMP_ACL_ERR:
2677                 tx_stats(txo)->tx_spoof_check_err++;
2678                 break;
2679         }
2680 }
2681
2682 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2683 {
2684         switch (status) {
2685         case LANCER_TX_COMP_LSO_ERR:
2686                 tx_stats(txo)->tx_tso_err++;
2687                 break;
2688         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2689         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2690                 tx_stats(txo)->tx_spoof_check_err++;
2691                 break;
2692         case LANCER_TX_COMP_QINQ_ERR:
2693                 tx_stats(txo)->tx_qinq_err++;
2694                 break;
2695         case LANCER_TX_COMP_PARITY_ERR:
2696                 tx_stats(txo)->tx_internal_parity_err++;
2697                 break;
2698         case LANCER_TX_COMP_DMA_ERR:
2699                 tx_stats(txo)->tx_dma_err++;
2700                 break;
2701         case LANCER_TX_COMP_SGE_ERR:
2702                 tx_stats(txo)->tx_sge_err++;
2703                 break;
2704         }
2705 }
2706
2707 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2708                                                 struct be_tx_obj *txo)
2709 {
2710         struct be_queue_info *tx_cq = &txo->cq;
2711         struct be_tx_compl_info *txcp = &txo->txcp;
2712         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2713
2714         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2715                 return NULL;
2716
2717         /* Ensure load ordering of valid bit dword and other dwords below */
2718         rmb();
2719         be_dws_le_to_cpu(compl, sizeof(*compl));
2720
2721         txcp->status = GET_TX_COMPL_BITS(status, compl);
2722         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2723
2724         if (txcp->status) {
2725                 if (lancer_chip(adapter)) {
2726                         lancer_update_tx_err(txo, txcp->status);
2727                         /* Reset the adapter incase of TSO,
2728                          * SGE or Parity error
2729                          */
2730                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2731                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2732                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2733                                 be_set_error(adapter, BE_ERROR_TX);
2734                 } else {
2735                         be_update_tx_err(txo, txcp->status);
2736                 }
2737         }
2738
2739         if (be_check_error(adapter, BE_ERROR_TX))
2740                 return NULL;
2741
2742         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2743         queue_tail_inc(tx_cq);
2744         return txcp;
2745 }
2746
2747 static u16 be_tx_compl_process(struct be_adapter *adapter,
2748                                struct be_tx_obj *txo, u16 last_index)
2749 {
2750         struct sk_buff **sent_skbs = txo->sent_skb_list;
2751         struct be_queue_info *txq = &txo->q;
2752         struct sk_buff *skb = NULL;
2753         bool unmap_skb_hdr = false;
2754         struct be_eth_wrb *wrb;
2755         u16 num_wrbs = 0;
2756         u32 frag_index;
2757
2758         do {
2759                 if (sent_skbs[txq->tail]) {
2760                         /* Free skb from prev req */
2761                         if (skb)
2762                                 dev_consume_skb_any(skb);
2763                         skb = sent_skbs[txq->tail];
2764                         sent_skbs[txq->tail] = NULL;
2765                         queue_tail_inc(txq);  /* skip hdr wrb */
2766                         num_wrbs++;
2767                         unmap_skb_hdr = true;
2768                 }
2769                 wrb = queue_tail_node(txq);
2770                 frag_index = txq->tail;
2771                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2772                               (unmap_skb_hdr && skb_headlen(skb)));
2773                 unmap_skb_hdr = false;
2774                 queue_tail_inc(txq);
2775                 num_wrbs++;
2776         } while (frag_index != last_index);
2777         dev_consume_skb_any(skb);
2778
2779         return num_wrbs;
2780 }
2781
2782 /* Return the number of events in the event queue */
2783 static inline int events_get(struct be_eq_obj *eqo)
2784 {
2785         struct be_eq_entry *eqe;
2786         int num = 0;
2787
2788         do {
2789                 eqe = queue_tail_node(&eqo->q);
2790                 if (eqe->evt == 0)
2791                         break;
2792
2793                 rmb();
2794                 eqe->evt = 0;
2795                 num++;
2796                 queue_tail_inc(&eqo->q);
2797         } while (true);
2798
2799         return num;
2800 }
2801
2802 /* Leaves the EQ is disarmed state */
2803 static void be_eq_clean(struct be_eq_obj *eqo)
2804 {
2805         int num = events_get(eqo);
2806
2807         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2808 }
2809
2810 /* Free posted rx buffers that were not used */
2811 static void be_rxq_clean(struct be_rx_obj *rxo)
2812 {
2813         struct be_queue_info *rxq = &rxo->q;
2814         struct be_rx_page_info *page_info;
2815
2816         while (atomic_read(&rxq->used) > 0) {
2817                 page_info = get_rx_page_info(rxo);
2818                 put_page(page_info->page);
2819                 memset(page_info, 0, sizeof(*page_info));
2820         }
2821         BUG_ON(atomic_read(&rxq->used));
2822         rxq->tail = 0;
2823         rxq->head = 0;
2824 }
2825
2826 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2827 {
2828         struct be_queue_info *rx_cq = &rxo->cq;
2829         struct be_rx_compl_info *rxcp;
2830         struct be_adapter *adapter = rxo->adapter;
2831         int flush_wait = 0;
2832
2833         /* Consume pending rx completions.
2834          * Wait for the flush completion (identified by zero num_rcvd)
2835          * to arrive. Notify CQ even when there are no more CQ entries
2836          * for HW to flush partially coalesced CQ entries.
2837          * In Lancer, there is no need to wait for flush compl.
2838          */
2839         for (;;) {
2840                 rxcp = be_rx_compl_get(rxo);
2841                 if (!rxcp) {
2842                         if (lancer_chip(adapter))
2843                                 break;
2844
2845                         if (flush_wait++ > 50 ||
2846                             be_check_error(adapter,
2847                                            BE_ERROR_HW)) {
2848                                 dev_warn(&adapter->pdev->dev,
2849                                          "did not receive flush compl\n");
2850                                 break;
2851                         }
2852                         be_cq_notify(adapter, rx_cq->id, true, 0);
2853                         mdelay(1);
2854                 } else {
2855                         be_rx_compl_discard(rxo, rxcp);
2856                         be_cq_notify(adapter, rx_cq->id, false, 1);
2857                         if (rxcp->num_rcvd == 0)
2858                                 break;
2859                 }
2860         }
2861
2862         /* After cleanup, leave the CQ in unarmed state */
2863         be_cq_notify(adapter, rx_cq->id, false, 0);
2864 }
2865
2866 static void be_tx_compl_clean(struct be_adapter *adapter)
2867 {
2868         struct device *dev = &adapter->pdev->dev;
2869         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2870         struct be_tx_compl_info *txcp;
2871         struct be_queue_info *txq;
2872         u32 end_idx, notified_idx;
2873         struct be_tx_obj *txo;
2874         int i, pending_txqs;
2875
2876         /* Stop polling for compls when HW has been silent for 10ms */
2877         do {
2878                 pending_txqs = adapter->num_tx_qs;
2879
2880                 for_all_tx_queues(adapter, txo, i) {
2881                         cmpl = 0;
2882                         num_wrbs = 0;
2883                         txq = &txo->q;
2884                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2885                                 num_wrbs +=
2886                                         be_tx_compl_process(adapter, txo,
2887                                                             txcp->end_index);
2888                                 cmpl++;
2889                         }
2890                         if (cmpl) {
2891                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2892                                 atomic_sub(num_wrbs, &txq->used);
2893                                 timeo = 0;
2894                         }
2895                         if (!be_is_tx_compl_pending(txo))
2896                                 pending_txqs--;
2897                 }
2898
2899                 if (pending_txqs == 0 || ++timeo > 10 ||
2900                     be_check_error(adapter, BE_ERROR_HW))
2901                         break;
2902
2903                 mdelay(1);
2904         } while (true);
2905
2906         /* Free enqueued TX that was never notified to HW */
2907         for_all_tx_queues(adapter, txo, i) {
2908                 txq = &txo->q;
2909
2910                 if (atomic_read(&txq->used)) {
2911                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2912                                  i, atomic_read(&txq->used));
2913                         notified_idx = txq->tail;
2914                         end_idx = txq->tail;
2915                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2916                                   txq->len);
2917                         /* Use the tx-compl process logic to handle requests
2918                          * that were not sent to the HW.
2919                          */
2920                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2921                         atomic_sub(num_wrbs, &txq->used);
2922                         BUG_ON(atomic_read(&txq->used));
2923                         txo->pend_wrb_cnt = 0;
2924                         /* Since hw was never notified of these requests,
2925                          * reset TXQ indices
2926                          */
2927                         txq->head = notified_idx;
2928                         txq->tail = notified_idx;
2929                 }
2930         }
2931 }
2932
2933 static void be_evt_queues_destroy(struct be_adapter *adapter)
2934 {
2935         struct be_eq_obj *eqo;
2936         int i;
2937
2938         for_all_evt_queues(adapter, eqo, i) {
2939                 if (eqo->q.created) {
2940                         be_eq_clean(eqo);
2941                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2942                         netif_napi_del(&eqo->napi);
2943                         free_cpumask_var(eqo->affinity_mask);
2944                 }
2945                 be_queue_free(adapter, &eqo->q);
2946         }
2947 }
2948
2949 static int be_evt_queues_create(struct be_adapter *adapter)
2950 {
2951         struct be_queue_info *eq;
2952         struct be_eq_obj *eqo;
2953         struct be_aic_obj *aic;
2954         int i, rc;
2955
2956         /* need enough EQs to service both RX and TX queues */
2957         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2958                                     max(adapter->cfg_num_rx_irqs,
2959                                         adapter->cfg_num_tx_irqs));
2960
2961         adapter->aic_enabled = true;
2962
2963         for_all_evt_queues(adapter, eqo, i) {
2964                 int numa_node = dev_to_node(&adapter->pdev->dev);
2965
2966                 aic = &adapter->aic_obj[i];
2967                 eqo->adapter = adapter;
2968                 eqo->idx = i;
2969                 aic->max_eqd = BE_MAX_EQD;
2970
2971                 eq = &eqo->q;
2972                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2973                                     sizeof(struct be_eq_entry));
2974                 if (rc)
2975                         return rc;
2976
2977                 rc = be_cmd_eq_create(adapter, eqo);
2978                 if (rc)
2979                         return rc;
2980
2981                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2982                         return -ENOMEM;
2983                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2984                                 eqo->affinity_mask);
2985                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2986                                BE_NAPI_WEIGHT);
2987         }
2988         return 0;
2989 }
2990
2991 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2992 {
2993         struct be_queue_info *q;
2994
2995         q = &adapter->mcc_obj.q;
2996         if (q->created)
2997                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2998         be_queue_free(adapter, q);
2999
3000         q = &adapter->mcc_obj.cq;
3001         if (q->created)
3002                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3003         be_queue_free(adapter, q);
3004 }
3005
3006 /* Must be called only after TX qs are created as MCC shares TX EQ */
3007 static int be_mcc_queues_create(struct be_adapter *adapter)
3008 {
3009         struct be_queue_info *q, *cq;
3010
3011         cq = &adapter->mcc_obj.cq;
3012         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3013                            sizeof(struct be_mcc_compl)))
3014                 goto err;
3015
3016         /* Use the default EQ for MCC completions */
3017         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3018                 goto mcc_cq_free;
3019
3020         q = &adapter->mcc_obj.q;
3021         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3022                 goto mcc_cq_destroy;
3023
3024         if (be_cmd_mccq_create(adapter, q, cq))
3025                 goto mcc_q_free;
3026
3027         return 0;
3028
3029 mcc_q_free:
3030         be_queue_free(adapter, q);
3031 mcc_cq_destroy:
3032         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3033 mcc_cq_free:
3034         be_queue_free(adapter, cq);
3035 err:
3036         return -1;
3037 }
3038
3039 static void be_tx_queues_destroy(struct be_adapter *adapter)
3040 {
3041         struct be_queue_info *q;
3042         struct be_tx_obj *txo;
3043         u8 i;
3044
3045         for_all_tx_queues(adapter, txo, i) {
3046                 q = &txo->q;
3047                 if (q->created)
3048                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3049                 be_queue_free(adapter, q);
3050
3051                 q = &txo->cq;
3052                 if (q->created)
3053                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3054                 be_queue_free(adapter, q);
3055         }
3056 }
3057
3058 static int be_tx_qs_create(struct be_adapter *adapter)
3059 {
3060         struct be_queue_info *cq;
3061         struct be_tx_obj *txo;
3062         struct be_eq_obj *eqo;
3063         int status, i;
3064
3065         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3066
3067         for_all_tx_queues(adapter, txo, i) {
3068                 cq = &txo->cq;
3069                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3070                                         sizeof(struct be_eth_tx_compl));
3071                 if (status)
3072                         return status;
3073
3074                 u64_stats_init(&txo->stats.sync);
3075                 u64_stats_init(&txo->stats.sync_compl);
3076
3077                 /* If num_evt_qs is less than num_tx_qs, then more than
3078                  * one txq share an eq
3079                  */
3080                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3081                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3082                 if (status)
3083                         return status;
3084
3085                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3086                                         sizeof(struct be_eth_wrb));
3087                 if (status)
3088                         return status;
3089
3090                 status = be_cmd_txq_create(adapter, txo);
3091                 if (status)
3092                         return status;
3093
3094                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3095                                     eqo->idx);
3096         }
3097
3098         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3099                  adapter->num_tx_qs);
3100         return 0;
3101 }
3102
3103 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3104 {
3105         struct be_queue_info *q;
3106         struct be_rx_obj *rxo;
3107         int i;
3108
3109         for_all_rx_queues(adapter, rxo, i) {
3110                 q = &rxo->cq;
3111                 if (q->created)
3112                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3113                 be_queue_free(adapter, q);
3114         }
3115 }
3116
3117 static int be_rx_cqs_create(struct be_adapter *adapter)
3118 {
3119         struct be_queue_info *eq, *cq;
3120         struct be_rx_obj *rxo;
3121         int rc, i;
3122
3123         adapter->num_rss_qs =
3124                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3125
3126         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3127         if (adapter->num_rss_qs < 2)
3128                 adapter->num_rss_qs = 0;
3129
3130         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3131
3132         /* When the interface is not capable of RSS rings (and there is no
3133          * need to create a default RXQ) we'll still need one RXQ
3134          */
3135         if (adapter->num_rx_qs == 0)
3136                 adapter->num_rx_qs = 1;
3137
3138         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3139         for_all_rx_queues(adapter, rxo, i) {
3140                 rxo->adapter = adapter;
3141                 cq = &rxo->cq;
3142                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3143                                     sizeof(struct be_eth_rx_compl));
3144                 if (rc)
3145                         return rc;
3146
3147                 u64_stats_init(&rxo->stats.sync);
3148                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3149                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3150                 if (rc)
3151                         return rc;
3152         }
3153
3154         dev_info(&adapter->pdev->dev,
3155                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3156         return 0;
3157 }
3158
3159 static irqreturn_t be_intx(int irq, void *dev)
3160 {
3161         struct be_eq_obj *eqo = dev;
3162         struct be_adapter *adapter = eqo->adapter;
3163         int num_evts = 0;
3164
3165         /* IRQ is not expected when NAPI is scheduled as the EQ
3166          * will not be armed.
3167          * But, this can happen on Lancer INTx where it takes
3168          * a while to de-assert INTx or in BE2 where occasionaly
3169          * an interrupt may be raised even when EQ is unarmed.
3170          * If NAPI is already scheduled, then counting & notifying
3171          * events will orphan them.
3172          */
3173         if (napi_schedule_prep(&eqo->napi)) {
3174                 num_evts = events_get(eqo);
3175                 __napi_schedule(&eqo->napi);
3176                 if (num_evts)
3177                         eqo->spurious_intr = 0;
3178         }
3179         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3180
3181         /* Return IRQ_HANDLED only for the the first spurious intr
3182          * after a valid intr to stop the kernel from branding
3183          * this irq as a bad one!
3184          */
3185         if (num_evts || eqo->spurious_intr++ == 0)
3186                 return IRQ_HANDLED;
3187         else
3188                 return IRQ_NONE;
3189 }
3190
3191 static irqreturn_t be_msix(int irq, void *dev)
3192 {
3193         struct be_eq_obj *eqo = dev;
3194
3195         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3196         napi_schedule(&eqo->napi);
3197         return IRQ_HANDLED;
3198 }
3199
3200 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3201 {
3202         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3203 }
3204
3205 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3206                          int budget)
3207 {
3208         struct be_adapter *adapter = rxo->adapter;
3209         struct be_queue_info *rx_cq = &rxo->cq;
3210         struct be_rx_compl_info *rxcp;
3211         u32 work_done;
3212         u32 frags_consumed = 0;
3213
3214         for (work_done = 0; work_done < budget; work_done++) {
3215                 rxcp = be_rx_compl_get(rxo);
3216                 if (!rxcp)
3217                         break;
3218
3219                 /* Is it a flush compl that has no data */
3220                 if (unlikely(rxcp->num_rcvd == 0))
3221                         goto loop_continue;
3222
3223                 /* Discard compl with partial DMA Lancer B0 */
3224                 if (unlikely(!rxcp->pkt_size)) {
3225                         be_rx_compl_discard(rxo, rxcp);
3226                         goto loop_continue;
3227                 }
3228
3229                 /* On BE drop pkts that arrive due to imperfect filtering in
3230                  * promiscuous mode on some skews
3231                  */
3232                 if (unlikely(rxcp->port != adapter->port_num &&
3233                              !lancer_chip(adapter))) {
3234                         be_rx_compl_discard(rxo, rxcp);
3235                         goto loop_continue;
3236                 }
3237
3238                 if (do_gro(rxcp))
3239                         be_rx_compl_process_gro(rxo, napi, rxcp);
3240                 else
3241                         be_rx_compl_process(rxo, napi, rxcp);
3242
3243 loop_continue:
3244                 frags_consumed += rxcp->num_rcvd;
3245                 be_rx_stats_update(rxo, rxcp);
3246         }
3247
3248         if (work_done) {
3249                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3250
3251                 /* When an rx-obj gets into post_starved state, just
3252                  * let be_worker do the posting.
3253                  */
3254                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3255                     !rxo->rx_post_starved)
3256                         be_post_rx_frags(rxo, GFP_ATOMIC,
3257                                          max_t(u32, MAX_RX_POST,
3258                                                frags_consumed));
3259         }
3260
3261         return work_done;
3262 }
3263
3264
3265 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3266                           int idx)
3267 {
3268         int num_wrbs = 0, work_done = 0;
3269         struct be_tx_compl_info *txcp;
3270
3271         while ((txcp = be_tx_compl_get(adapter, txo))) {
3272                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3273                 work_done++;
3274         }
3275
3276         if (work_done) {
3277                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3278                 atomic_sub(num_wrbs, &txo->q.used);
3279
3280                 /* As Tx wrbs have been freed up, wake up netdev queue
3281                  * if it was stopped due to lack of tx wrbs.  */
3282                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3283                     be_can_txq_wake(txo)) {
3284                         netif_wake_subqueue(adapter->netdev, idx);
3285                 }
3286
3287                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3288                 tx_stats(txo)->tx_compl += work_done;
3289                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3290         }
3291 }
3292
3293 int be_poll(struct napi_struct *napi, int budget)
3294 {
3295         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3296         struct be_adapter *adapter = eqo->adapter;
3297         int max_work = 0, work, i, num_evts;
3298         struct be_rx_obj *rxo;
3299         struct be_tx_obj *txo;
3300         u32 mult_enc = 0;
3301
3302         num_evts = events_get(eqo);
3303
3304         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3305                 be_process_tx(adapter, txo, i);
3306
3307         /* This loop will iterate twice for EQ0 in which
3308          * completions of the last RXQ (default one) are also processed
3309          * For other EQs the loop iterates only once
3310          */
3311         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3312                 work = be_process_rx(rxo, napi, budget);
3313                 max_work = max(work, max_work);
3314         }
3315
3316         if (is_mcc_eqo(eqo))
3317                 be_process_mcc(adapter);
3318
3319         if (max_work < budget) {
3320                 napi_complete_done(napi, max_work);
3321
3322                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3323                  * delay via a delay multiplier encoding value
3324                  */
3325                 if (skyhawk_chip(adapter))
3326                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3327
3328                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3329                              mult_enc);
3330         } else {
3331                 /* As we'll continue in polling mode, count and clear events */
3332                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3333         }
3334         return max_work;
3335 }
3336
3337 void be_detect_error(struct be_adapter *adapter)
3338 {
3339         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3340         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3341         struct device *dev = &adapter->pdev->dev;
3342         u16 val;
3343         u32 i;
3344
3345         if (be_check_error(adapter, BE_ERROR_HW))
3346                 return;
3347
3348         if (lancer_chip(adapter)) {
3349                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3350                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3351                         be_set_error(adapter, BE_ERROR_UE);
3352                         sliport_err1 = ioread32(adapter->db +
3353                                                 SLIPORT_ERROR1_OFFSET);
3354                         sliport_err2 = ioread32(adapter->db +
3355                                                 SLIPORT_ERROR2_OFFSET);
3356                         /* Do not log error messages if its a FW reset */
3357                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3358                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3359                                 dev_info(dev, "Reset is in progress\n");
3360                         } else {
3361                                 dev_err(dev, "Error detected in the card\n");
3362                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3363                                         sliport_status);
3364                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3365                                         sliport_err1);
3366                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3367                                         sliport_err2);
3368                         }
3369                 }
3370         } else {
3371                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3372                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3373                 ue_lo_mask = ioread32(adapter->pcicfg +
3374                                       PCICFG_UE_STATUS_LOW_MASK);
3375                 ue_hi_mask = ioread32(adapter->pcicfg +
3376                                       PCICFG_UE_STATUS_HI_MASK);
3377
3378                 ue_lo = (ue_lo & ~ue_lo_mask);
3379                 ue_hi = (ue_hi & ~ue_hi_mask);
3380
3381                 if (ue_lo || ue_hi) {
3382                         /* On certain platforms BE3 hardware can indicate
3383                          * spurious UEs. In case of a UE in the chip,
3384                          * the POST register correctly reports either a
3385                          * FAT_LOG_START state (FW is currently dumping
3386                          * FAT log data) or a ARMFW_UE state. Check for the
3387                          * above states to ascertain if the UE is valid or not.
3388                          */
3389                         if (BE3_chip(adapter)) {
3390                                 val = be_POST_stage_get(adapter);
3391                                 if ((val & POST_STAGE_FAT_LOG_START)
3392                                      != POST_STAGE_FAT_LOG_START &&
3393                                     (val & POST_STAGE_ARMFW_UE)
3394                                      != POST_STAGE_ARMFW_UE &&
3395                                     (val & POST_STAGE_RECOVERABLE_ERR)
3396                                      != POST_STAGE_RECOVERABLE_ERR)
3397                                         return;
3398                         }
3399
3400                         dev_err(dev, "Error detected in the adapter");
3401                         be_set_error(adapter, BE_ERROR_UE);
3402
3403                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3404                                 if (ue_lo & 1)
3405                                         dev_err(dev, "UE: %s bit set\n",
3406                                                 ue_status_low_desc[i]);
3407                         }
3408                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3409                                 if (ue_hi & 1)
3410                                         dev_err(dev, "UE: %s bit set\n",
3411                                                 ue_status_hi_desc[i]);
3412                         }
3413                 }
3414         }
3415 }
3416
3417 static void be_msix_disable(struct be_adapter *adapter)
3418 {
3419         if (msix_enabled(adapter)) {
3420                 pci_disable_msix(adapter->pdev);
3421                 adapter->num_msix_vec = 0;
3422                 adapter->num_msix_roce_vec = 0;
3423         }
3424 }
3425
3426 static int be_msix_enable(struct be_adapter *adapter)
3427 {
3428         unsigned int i, max_roce_eqs;
3429         struct device *dev = &adapter->pdev->dev;
3430         int num_vec;
3431
3432         /* If RoCE is supported, program the max number of vectors that
3433          * could be used for NIC and RoCE, else, just program the number
3434          * we'll use initially.
3435          */
3436         if (be_roce_supported(adapter)) {
3437                 max_roce_eqs =
3438                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3439                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3440                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3441         } else {
3442                 num_vec = max(adapter->cfg_num_rx_irqs,
3443                               adapter->cfg_num_tx_irqs);
3444         }
3445
3446         for (i = 0; i < num_vec; i++)
3447                 adapter->msix_entries[i].entry = i;
3448
3449         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3450                                         MIN_MSIX_VECTORS, num_vec);
3451         if (num_vec < 0)
3452                 goto fail;
3453
3454         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3455                 adapter->num_msix_roce_vec = num_vec / 2;
3456                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3457                          adapter->num_msix_roce_vec);
3458         }
3459
3460         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3461
3462         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3463                  adapter->num_msix_vec);
3464         return 0;
3465
3466 fail:
3467         dev_warn(dev, "MSIx enable failed\n");
3468
3469         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3470         if (be_virtfn(adapter))
3471                 return num_vec;
3472         return 0;
3473 }
3474
3475 static inline int be_msix_vec_get(struct be_adapter *adapter,
3476                                   struct be_eq_obj *eqo)
3477 {
3478         return adapter->msix_entries[eqo->msix_idx].vector;
3479 }
3480
3481 static int be_msix_register(struct be_adapter *adapter)
3482 {
3483         struct net_device *netdev = adapter->netdev;
3484         struct be_eq_obj *eqo;
3485         int status, i, vec;
3486
3487         for_all_evt_queues(adapter, eqo, i) {
3488                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3489                 vec = be_msix_vec_get(adapter, eqo);
3490                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3491                 if (status)
3492                         goto err_msix;
3493
3494                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3495         }
3496
3497         return 0;
3498 err_msix:
3499         for (i--; i >= 0; i--) {
3500                 eqo = &adapter->eq_obj[i];
3501                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3502         }
3503         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3504                  status);
3505         be_msix_disable(adapter);
3506         return status;
3507 }
3508
3509 static int be_irq_register(struct be_adapter *adapter)
3510 {
3511         struct net_device *netdev = adapter->netdev;
3512         int status;
3513
3514         if (msix_enabled(adapter)) {
3515                 status = be_msix_register(adapter);
3516                 if (status == 0)
3517                         goto done;
3518                 /* INTx is not supported for VF */
3519                 if (be_virtfn(adapter))
3520                         return status;
3521         }
3522
3523         /* INTx: only the first EQ is used */
3524         netdev->irq = adapter->pdev->irq;
3525         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3526                              &adapter->eq_obj[0]);
3527         if (status) {
3528                 dev_err(&adapter->pdev->dev,
3529                         "INTx request IRQ failed - err %d\n", status);
3530                 return status;
3531         }
3532 done:
3533         adapter->isr_registered = true;
3534         return 0;
3535 }
3536
3537 static void be_irq_unregister(struct be_adapter *adapter)
3538 {
3539         struct net_device *netdev = adapter->netdev;
3540         struct be_eq_obj *eqo;
3541         int i, vec;
3542
3543         if (!adapter->isr_registered)
3544                 return;
3545
3546         /* INTx */
3547         if (!msix_enabled(adapter)) {
3548                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3549                 goto done;
3550         }
3551
3552         /* MSIx */
3553         for_all_evt_queues(adapter, eqo, i) {
3554                 vec = be_msix_vec_get(adapter, eqo);
3555                 irq_set_affinity_hint(vec, NULL);
3556                 free_irq(vec, eqo);
3557         }
3558
3559 done:
3560         adapter->isr_registered = false;
3561 }
3562
3563 static void be_rx_qs_destroy(struct be_adapter *adapter)
3564 {
3565         struct rss_info *rss = &adapter->rss_info;
3566         struct be_queue_info *q;
3567         struct be_rx_obj *rxo;
3568         int i;
3569
3570         for_all_rx_queues(adapter, rxo, i) {
3571                 q = &rxo->q;
3572                 if (q->created) {
3573                         /* If RXQs are destroyed while in an "out of buffer"
3574                          * state, there is a possibility of an HW stall on
3575                          * Lancer. So, post 64 buffers to each queue to relieve
3576                          * the "out of buffer" condition.
3577                          * Make sure there's space in the RXQ before posting.
3578                          */
3579                         if (lancer_chip(adapter)) {
3580                                 be_rx_cq_clean(rxo);
3581                                 if (atomic_read(&q->used) == 0)
3582                                         be_post_rx_frags(rxo, GFP_KERNEL,
3583                                                          MAX_RX_POST);
3584                         }
3585
3586                         be_cmd_rxq_destroy(adapter, q);
3587                         be_rx_cq_clean(rxo);
3588                         be_rxq_clean(rxo);
3589                 }
3590                 be_queue_free(adapter, q);
3591         }
3592
3593         if (rss->rss_flags) {
3594                 rss->rss_flags = RSS_ENABLE_NONE;
3595                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3596                                   128, rss->rss_hkey);
3597         }
3598 }
3599
3600 static void be_disable_if_filters(struct be_adapter *adapter)
3601 {
3602         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3603         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3604             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3605                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3606                 eth_zero_addr(adapter->dev_mac);
3607         }
3608
3609         be_clear_uc_list(adapter);
3610         be_clear_mc_list(adapter);
3611
3612         /* The IFACE flags are enabled in the open path and cleared
3613          * in the close path. When a VF gets detached from the host and
3614          * assigned to a VM the following happens:
3615          *      - VF's IFACE flags get cleared in the detach path
3616          *      - IFACE create is issued by the VF in the attach path
3617          * Due to a bug in the BE3/Skyhawk-R FW
3618          * (Lancer FW doesn't have the bug), the IFACE capability flags
3619          * specified along with the IFACE create cmd issued by a VF are not
3620          * honoured by FW.  As a consequence, if a *new* driver
3621          * (that enables/disables IFACE flags in open/close)
3622          * is loaded in the host and an *old* driver is * used by a VM/VF,
3623          * the IFACE gets created *without* the needed flags.
3624          * To avoid this, disable RX-filter flags only for Lancer.
3625          */
3626         if (lancer_chip(adapter)) {
3627                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3628                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3629         }
3630 }
3631
3632 static int be_close(struct net_device *netdev)
3633 {
3634         struct be_adapter *adapter = netdev_priv(netdev);
3635         struct be_eq_obj *eqo;
3636         int i;
3637
3638         /* This protection is needed as be_close() may be called even when the
3639          * adapter is in cleared state (after eeh perm failure)
3640          */
3641         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3642                 return 0;
3643
3644         /* Before attempting cleanup ensure all the pending cmds in the
3645          * config_wq have finished execution
3646          */
3647         flush_workqueue(be_wq);
3648
3649         be_disable_if_filters(adapter);
3650
3651         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3652                 for_all_evt_queues(adapter, eqo, i) {
3653                         napi_disable(&eqo->napi);
3654                 }
3655                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3656         }
3657
3658         be_async_mcc_disable(adapter);
3659
3660         /* Wait for all pending tx completions to arrive so that
3661          * all tx skbs are freed.
3662          */
3663         netif_tx_disable(netdev);
3664         be_tx_compl_clean(adapter);
3665
3666         be_rx_qs_destroy(adapter);
3667
3668         for_all_evt_queues(adapter, eqo, i) {
3669                 if (msix_enabled(adapter))
3670                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3671                 else
3672                         synchronize_irq(netdev->irq);
3673                 be_eq_clean(eqo);
3674         }
3675
3676         be_irq_unregister(adapter);
3677
3678         return 0;
3679 }
3680
3681 static int be_rx_qs_create(struct be_adapter *adapter)
3682 {
3683         struct rss_info *rss = &adapter->rss_info;
3684         u8 rss_key[RSS_HASH_KEY_LEN];
3685         struct be_rx_obj *rxo;
3686         int rc, i, j;
3687
3688         for_all_rx_queues(adapter, rxo, i) {
3689                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3690                                     sizeof(struct be_eth_rx_d));
3691                 if (rc)
3692                         return rc;
3693         }
3694
3695         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3696                 rxo = default_rxo(adapter);
3697                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3698                                        rx_frag_size, adapter->if_handle,
3699                                        false, &rxo->rss_id);
3700                 if (rc)
3701                         return rc;
3702         }
3703
3704         for_all_rss_queues(adapter, rxo, i) {
3705                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                        rx_frag_size, adapter->if_handle,
3707                                        true, &rxo->rss_id);
3708                 if (rc)
3709                         return rc;
3710         }
3711
3712         if (be_multi_rxq(adapter)) {
3713                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3714                         for_all_rss_queues(adapter, rxo, i) {
3715                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3716                                         break;
3717                                 rss->rsstable[j + i] = rxo->rss_id;
3718                                 rss->rss_queue[j + i] = i;
3719                         }
3720                 }
3721                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3722                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3723
3724                 if (!BEx_chip(adapter))
3725                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3726                                 RSS_ENABLE_UDP_IPV6;
3727
3728                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3729                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3730                                        RSS_INDIR_TABLE_LEN, rss_key);
3731                 if (rc) {
3732                         rss->rss_flags = RSS_ENABLE_NONE;
3733                         return rc;
3734                 }
3735
3736                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3737         } else {
3738                 /* Disable RSS, if only default RX Q is created */
3739                 rss->rss_flags = RSS_ENABLE_NONE;
3740         }
3741
3742
3743         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3744          * which is a queue empty condition
3745          */
3746         for_all_rx_queues(adapter, rxo, i)
3747                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3748
3749         return 0;
3750 }
3751
3752 static int be_enable_if_filters(struct be_adapter *adapter)
3753 {
3754         int status;
3755
3756         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3757         if (status)
3758                 return status;
3759
3760         /* Normally this condition usually true as the ->dev_mac is zeroed.
3761          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3762          * subsequent be_dev_mac_add() can fail (after fresh boot)
3763          */
3764         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3765                 int old_pmac_id = -1;
3766
3767                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3768                 if (!is_zero_ether_addr(adapter->dev_mac))
3769                         old_pmac_id = adapter->pmac_id[0];
3770
3771                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3772                 if (status)
3773                         return status;
3774
3775                 /* Delete the old programmed MAC as we successfully programmed
3776                  * a new MAC
3777                  */
3778                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3779                         be_dev_mac_del(adapter, old_pmac_id);
3780
3781                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3782         }
3783
3784         if (adapter->vlans_added)
3785                 be_vid_config(adapter);
3786
3787         __be_set_rx_mode(adapter);
3788
3789         return 0;
3790 }
3791
3792 static int be_open(struct net_device *netdev)
3793 {
3794         struct be_adapter *adapter = netdev_priv(netdev);
3795         struct be_eq_obj *eqo;
3796         struct be_rx_obj *rxo;
3797         struct be_tx_obj *txo;
3798         u8 link_status;
3799         int status, i;
3800
3801         status = be_rx_qs_create(adapter);
3802         if (status)
3803                 goto err;
3804
3805         status = be_enable_if_filters(adapter);
3806         if (status)
3807                 goto err;
3808
3809         status = be_irq_register(adapter);
3810         if (status)
3811                 goto err;
3812
3813         for_all_rx_queues(adapter, rxo, i)
3814                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3815
3816         for_all_tx_queues(adapter, txo, i)
3817                 be_cq_notify(adapter, txo->cq.id, true, 0);
3818
3819         be_async_mcc_enable(adapter);
3820
3821         for_all_evt_queues(adapter, eqo, i) {
3822                 napi_enable(&eqo->napi);
3823                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3824         }
3825         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3826
3827         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3828         if (!status)
3829                 be_link_status_update(adapter, link_status);
3830
3831         netif_tx_start_all_queues(netdev);
3832
3833         udp_tunnel_nic_reset_ntf(netdev);
3834
3835         return 0;
3836 err:
3837         be_close(adapter->netdev);
3838         return -EIO;
3839 }
3840
3841 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3842 {
3843         u32 addr;
3844
3845         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3846
3847         mac[5] = (u8)(addr & 0xFF);
3848         mac[4] = (u8)((addr >> 8) & 0xFF);
3849         mac[3] = (u8)((addr >> 16) & 0xFF);
3850         /* Use the OUI from the current MAC address */
3851         memcpy(mac, adapter->netdev->dev_addr, 3);
3852 }
3853
3854 /*
3855  * Generate a seed MAC address from the PF MAC Address using jhash.
3856  * MAC Address for VFs are assigned incrementally starting from the seed.
3857  * These addresses are programmed in the ASIC by the PF and the VF driver
3858  * queries for the MAC address during its probe.
3859  */
3860 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3861 {
3862         u32 vf;
3863         int status = 0;
3864         u8 mac[ETH_ALEN];
3865         struct be_vf_cfg *vf_cfg;
3866
3867         be_vf_eth_addr_generate(adapter, mac);
3868
3869         for_all_vfs(adapter, vf_cfg, vf) {
3870                 if (BEx_chip(adapter))
3871                         status = be_cmd_pmac_add(adapter, mac,
3872                                                  vf_cfg->if_handle,
3873                                                  &vf_cfg->pmac_id, vf + 1);
3874                 else
3875                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3876                                                 vf + 1);
3877
3878                 if (status)
3879                         dev_err(&adapter->pdev->dev,
3880                                 "Mac address assignment failed for VF %d\n",
3881                                 vf);
3882                 else
3883                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3884
3885                 mac[5] += 1;
3886         }
3887         return status;
3888 }
3889
3890 static int be_vfs_mac_query(struct be_adapter *adapter)
3891 {
3892         int status, vf;
3893         u8 mac[ETH_ALEN];
3894         struct be_vf_cfg *vf_cfg;
3895
3896         for_all_vfs(adapter, vf_cfg, vf) {
3897                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3898                                                mac, vf_cfg->if_handle,
3899                                                false, vf+1);
3900                 if (status)
3901                         return status;
3902                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3903         }
3904         return 0;
3905 }
3906
3907 static void be_vf_clear(struct be_adapter *adapter)
3908 {
3909         struct be_vf_cfg *vf_cfg;
3910         u32 vf;
3911
3912         if (pci_vfs_assigned(adapter->pdev)) {
3913                 dev_warn(&adapter->pdev->dev,
3914                          "VFs are assigned to VMs: not disabling VFs\n");
3915                 goto done;
3916         }
3917
3918         pci_disable_sriov(adapter->pdev);
3919
3920         for_all_vfs(adapter, vf_cfg, vf) {
3921                 if (BEx_chip(adapter))
3922                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3923                                         vf_cfg->pmac_id, vf + 1);
3924                 else
3925                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3926                                        vf + 1);
3927
3928                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3929         }
3930
3931         if (BE3_chip(adapter))
3932                 be_cmd_set_hsw_config(adapter, 0, 0,
3933                                       adapter->if_handle,
3934                                       PORT_FWD_TYPE_PASSTHRU, 0);
3935 done:
3936         kfree(adapter->vf_cfg);
3937         adapter->num_vfs = 0;
3938         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3939 }
3940
3941 static void be_clear_queues(struct be_adapter *adapter)
3942 {
3943         be_mcc_queues_destroy(adapter);
3944         be_rx_cqs_destroy(adapter);
3945         be_tx_queues_destroy(adapter);
3946         be_evt_queues_destroy(adapter);
3947 }
3948
3949 static void be_cancel_worker(struct be_adapter *adapter)
3950 {
3951         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3952                 cancel_delayed_work_sync(&adapter->work);
3953                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3954         }
3955 }
3956
3957 static void be_cancel_err_detection(struct be_adapter *adapter)
3958 {
3959         struct be_error_recovery *err_rec = &adapter->error_recovery;
3960
3961         if (!be_err_recovery_workq)
3962                 return;
3963
3964         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3965                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3966                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3967         }
3968 }
3969
3970 /* VxLAN offload Notes:
3971  *
3972  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3973  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3974  * is expected to work across all types of IP tunnels once exported. Skyhawk
3975  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3976  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3977  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3978  * those other tunnels are unexported on the fly through ndo_features_check().
3979  */
3980 static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3981                              unsigned int entry, struct udp_tunnel_info *ti)
3982 {
3983         struct be_adapter *adapter = netdev_priv(netdev);
3984         struct device *dev = &adapter->pdev->dev;
3985         int status;
3986
3987         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3988                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3989         if (status) {
3990                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3991                 return status;
3992         }
3993         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3994
3995         status = be_cmd_set_vxlan_port(adapter, ti->port);
3996         if (status) {
3997                 dev_warn(dev, "Failed to add VxLAN port\n");
3998                 return status;
3999         }
4000         adapter->vxlan_port = ti->port;
4001
4002         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4003                                    NETIF_F_TSO | NETIF_F_TSO6 |
4004                                    NETIF_F_GSO_UDP_TUNNEL;
4005
4006         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4007                  be16_to_cpu(ti->port));
4008         return 0;
4009 }
4010
4011 static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4012                                unsigned int entry, struct udp_tunnel_info *ti)
4013 {
4014         struct be_adapter *adapter = netdev_priv(netdev);
4015
4016         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4017                 be_cmd_manage_iface(adapter, adapter->if_handle,
4018                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4019
4020         if (adapter->vxlan_port)
4021                 be_cmd_set_vxlan_port(adapter, 0);
4022
4023         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4024         adapter->vxlan_port = 0;
4025
4026         netdev->hw_enc_features = 0;
4027         return 0;
4028 }
4029
4030 static const struct udp_tunnel_nic_info be_udp_tunnels = {
4031         .set_port       = be_vxlan_set_port,
4032         .unset_port     = be_vxlan_unset_port,
4033         .flags          = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4034                           UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4035         .tables         = {
4036                 { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4037         },
4038 };
4039
4040 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4041                                 struct be_resources *vft_res)
4042 {
4043         struct be_resources res = adapter->pool_res;
4044         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4045         struct be_resources res_mod = {0};
4046         u16 num_vf_qs = 1;
4047
4048         /* Distribute the queue resources among the PF and it's VFs */
4049         if (num_vfs) {
4050                 /* Divide the rx queues evenly among the VFs and the PF, capped
4051                  * at VF-EQ-count. Any remainder queues belong to the PF.
4052                  */
4053                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4054                                 res.max_rss_qs / (num_vfs + 1));
4055
4056                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4057                  * RSS Tables per port. Provide RSS on VFs, only if number of
4058                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4059                  */
4060                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4061                         num_vf_qs = 1;
4062         }
4063
4064         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4065          * which are modifiable using SET_PROFILE_CONFIG cmd.
4066          */
4067         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4068                                   RESOURCE_MODIFIABLE, 0);
4069
4070         /* If RSS IFACE capability flags are modifiable for a VF, set the
4071          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4072          * more than 1 RSSQ is available for a VF.
4073          * Otherwise, provision only 1 queue pair for VF.
4074          */
4075         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4076                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077                 if (num_vf_qs > 1) {
4078                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4079                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4080                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4081                 } else {
4082                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4083                                              BE_IF_FLAGS_DEFQ_RSS);
4084                 }
4085         } else {
4086                 num_vf_qs = 1;
4087         }
4088
4089         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4090                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4091                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4092         }
4093
4094         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4095         vft_res->max_rx_qs = num_vf_qs;
4096         vft_res->max_rss_qs = num_vf_qs;
4097         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4098         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4099
4100         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4101          * among the PF and it's VFs, if the fields are changeable
4102          */
4103         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4104                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4105
4106         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4107                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4108
4109         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4110                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4111
4112         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4113                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4114 }
4115
4116 static void be_if_destroy(struct be_adapter *adapter)
4117 {
4118         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4119
4120         kfree(adapter->pmac_id);
4121         adapter->pmac_id = NULL;
4122
4123         kfree(adapter->mc_list);
4124         adapter->mc_list = NULL;
4125
4126         kfree(adapter->uc_list);
4127         adapter->uc_list = NULL;
4128 }
4129
4130 static int be_clear(struct be_adapter *adapter)
4131 {
4132         struct pci_dev *pdev = adapter->pdev;
4133         struct  be_resources vft_res = {0};
4134
4135         be_cancel_worker(adapter);
4136
4137         flush_workqueue(be_wq);
4138
4139         if (sriov_enabled(adapter))
4140                 be_vf_clear(adapter);
4141
4142         /* Re-configure FW to distribute resources evenly across max-supported
4143          * number of VFs, only when VFs are not already enabled.
4144          */
4145         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4146             !pci_vfs_assigned(pdev)) {
4147                 be_calculate_vf_res(adapter,
4148                                     pci_sriov_get_totalvfs(pdev),
4149                                     &vft_res);
4150                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4151                                         pci_sriov_get_totalvfs(pdev),
4152                                         &vft_res);
4153         }
4154
4155         be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4156
4157         be_if_destroy(adapter);
4158
4159         be_clear_queues(adapter);
4160
4161         be_msix_disable(adapter);
4162         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4163         return 0;
4164 }
4165
4166 static int be_vfs_if_create(struct be_adapter *adapter)
4167 {
4168         struct be_resources res = {0};
4169         u32 cap_flags, en_flags, vf;
4170         struct be_vf_cfg *vf_cfg;
4171         int status;
4172
4173         /* If a FW profile exists, then cap_flags are updated */
4174         cap_flags = BE_VF_IF_EN_FLAGS;
4175
4176         for_all_vfs(adapter, vf_cfg, vf) {
4177                 if (!BE3_chip(adapter)) {
4178                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4179                                                            ACTIVE_PROFILE_TYPE,
4180                                                            RESOURCE_LIMITS,
4181                                                            vf + 1);
4182                         if (!status) {
4183                                 cap_flags = res.if_cap_flags;
4184                                 /* Prevent VFs from enabling VLAN promiscuous
4185                                  * mode
4186                                  */
4187                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4188                         }
4189                 }
4190
4191                 /* PF should enable IF flags during proxy if_create call */
4192                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4193                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4194                                           &vf_cfg->if_handle, vf + 1);
4195                 if (status)
4196                         return status;
4197         }
4198
4199         return 0;
4200 }
4201
4202 static int be_vf_setup_init(struct be_adapter *adapter)
4203 {
4204         struct be_vf_cfg *vf_cfg;
4205         int vf;
4206
4207         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4208                                   GFP_KERNEL);
4209         if (!adapter->vf_cfg)
4210                 return -ENOMEM;
4211
4212         for_all_vfs(adapter, vf_cfg, vf) {
4213                 vf_cfg->if_handle = -1;
4214                 vf_cfg->pmac_id = -1;
4215         }
4216         return 0;
4217 }
4218
4219 static int be_vf_setup(struct be_adapter *adapter)
4220 {
4221         struct device *dev = &adapter->pdev->dev;
4222         struct be_vf_cfg *vf_cfg;
4223         int status, old_vfs, vf;
4224         bool spoofchk;
4225
4226         old_vfs = pci_num_vf(adapter->pdev);
4227
4228         status = be_vf_setup_init(adapter);
4229         if (status)
4230                 goto err;
4231
4232         if (old_vfs) {
4233                 for_all_vfs(adapter, vf_cfg, vf) {
4234                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4235                         if (status)
4236                                 goto err;
4237                 }
4238
4239                 status = be_vfs_mac_query(adapter);
4240                 if (status)
4241                         goto err;
4242         } else {
4243                 status = be_vfs_if_create(adapter);
4244                 if (status)
4245                         goto err;
4246
4247                 status = be_vf_eth_addr_config(adapter);
4248                 if (status)
4249                         goto err;
4250         }
4251
4252         for_all_vfs(adapter, vf_cfg, vf) {
4253                 /* Allow VFs to programs MAC/VLAN filters */
4254                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4255                                                   vf + 1);
4256                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4257                         status = be_cmd_set_fn_privileges(adapter,
4258                                                           vf_cfg->privileges |
4259                                                           BE_PRIV_FILTMGMT,
4260                                                           vf + 1);
4261                         if (!status) {
4262                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4263                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4264                                          vf);
4265                         }
4266                 }
4267
4268                 /* Allow full available bandwidth */
4269                 if (!old_vfs)
4270                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4271
4272                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4273                                                vf_cfg->if_handle, NULL,
4274                                                &spoofchk);
4275                 if (!status)
4276                         vf_cfg->spoofchk = spoofchk;
4277
4278                 if (!old_vfs) {
4279                         be_cmd_enable_vf(adapter, vf + 1);
4280                         be_cmd_set_logical_link_config(adapter,
4281                                                        IFLA_VF_LINK_STATE_AUTO,
4282                                                        vf+1);
4283                 }
4284         }
4285
4286         if (!old_vfs) {
4287                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4288                 if (status) {
4289                         dev_err(dev, "SRIOV enable failed\n");
4290                         adapter->num_vfs = 0;
4291                         goto err;
4292                 }
4293         }
4294
4295         if (BE3_chip(adapter)) {
4296                 /* On BE3, enable VEB only when SRIOV is enabled */
4297                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4298                                                adapter->if_handle,
4299                                                PORT_FWD_TYPE_VEB, 0);
4300                 if (status)
4301                         goto err;
4302         }
4303
4304         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4305         return 0;
4306 err:
4307         dev_err(dev, "VF setup failed\n");
4308         be_vf_clear(adapter);
4309         return status;
4310 }
4311
4312 /* Converting function_mode bits on BE3 to SH mc_type enums */
4313
4314 static u8 be_convert_mc_type(u32 function_mode)
4315 {
4316         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4317                 return vNIC1;
4318         else if (function_mode & QNQ_MODE)
4319                 return FLEX10;
4320         else if (function_mode & VNIC_MODE)
4321                 return vNIC2;
4322         else if (function_mode & UMC_ENABLED)
4323                 return UMC;
4324         else
4325                 return MC_NONE;
4326 }
4327
4328 /* On BE2/BE3 FW does not suggest the supported limits */
4329 static void BEx_get_resources(struct be_adapter *adapter,
4330                               struct be_resources *res)
4331 {
4332         bool use_sriov = adapter->num_vfs ? 1 : 0;
4333
4334         if (be_physfn(adapter))
4335                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4336         else
4337                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4338
4339         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4340
4341         if (be_is_mc(adapter)) {
4342                 /* Assuming that there are 4 channels per port,
4343                  * when multi-channel is enabled
4344                  */
4345                 if (be_is_qnq_mode(adapter))
4346                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4347                 else
4348                         /* In a non-qnq multichannel mode, the pvid
4349                          * takes up one vlan entry
4350                          */
4351                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4352         } else {
4353                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4354         }
4355
4356         res->max_mcast_mac = BE_MAX_MC;
4357
4358         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4359          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4360          *    *only* if it is RSS-capable.
4361          */
4362         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4363             be_virtfn(adapter) ||
4364             (be_is_mc(adapter) &&
4365              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4366                 res->max_tx_qs = 1;
4367         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4368                 struct be_resources super_nic_res = {0};
4369
4370                 /* On a SuperNIC profile, the driver needs to use the
4371                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4372                  */
4373                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4374                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4375                                           0);
4376                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4377                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4378         } else {
4379                 res->max_tx_qs = BE3_MAX_TX_QS;
4380         }
4381
4382         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4383             !use_sriov && be_physfn(adapter))
4384                 res->max_rss_qs = (adapter->be3_native) ?
4385                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4386         res->max_rx_qs = res->max_rss_qs + 1;
4387
4388         if (be_physfn(adapter))
4389                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4390                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4391         else
4392                 res->max_evt_qs = 1;
4393
4394         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4395         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4396         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4397                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4398 }
4399
4400 static void be_setup_init(struct be_adapter *adapter)
4401 {
4402         adapter->vlan_prio_bmap = 0xff;
4403         adapter->phy.link_speed = -1;
4404         adapter->if_handle = -1;
4405         adapter->be3_native = false;
4406         adapter->if_flags = 0;
4407         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4408         if (be_physfn(adapter))
4409                 adapter->cmd_privileges = MAX_PRIVILEGES;
4410         else
4411                 adapter->cmd_privileges = MIN_PRIVILEGES;
4412 }
4413
4414 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4415  * However, this HW limitation is not exposed to the host via any SLI cmd.
4416  * As a result, in the case of SRIOV and in particular multi-partition configs
4417  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4418  * for distribution between the VFs. This self-imposed limit will determine the
4419  * no: of VFs for which RSS can be enabled.
4420  */
4421 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4422 {
4423         struct be_port_resources port_res = {0};
4424         u8 rss_tables_on_port;
4425         u16 max_vfs = be_max_vfs(adapter);
4426
4427         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4428                                   RESOURCE_LIMITS, 0);
4429
4430         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4431
4432         /* Each PF Pool's RSS Tables limit =
4433          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4434          */
4435         adapter->pool_res.max_rss_tables =
4436                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4437 }
4438
4439 static int be_get_sriov_config(struct be_adapter *adapter)
4440 {
4441         struct be_resources res = {0};
4442         int max_vfs, old_vfs;
4443
4444         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4445                                   RESOURCE_LIMITS, 0);
4446
4447         /* Some old versions of BE3 FW don't report max_vfs value */
4448         if (BE3_chip(adapter) && !res.max_vfs) {
4449                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4450                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4451         }
4452
4453         adapter->pool_res = res;
4454
4455         /* If during previous unload of the driver, the VFs were not disabled,
4456          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4457          * Instead use the TotalVFs value stored in the pci-dev struct.
4458          */
4459         old_vfs = pci_num_vf(adapter->pdev);
4460         if (old_vfs) {
4461                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4462                          old_vfs);
4463
4464                 adapter->pool_res.max_vfs =
4465                         pci_sriov_get_totalvfs(adapter->pdev);
4466                 adapter->num_vfs = old_vfs;
4467         }
4468
4469         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4470                 be_calculate_pf_pool_rss_tables(adapter);
4471                 dev_info(&adapter->pdev->dev,
4472                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4473                          be_max_pf_pool_rss_tables(adapter));
4474         }
4475         return 0;
4476 }
4477
4478 static void be_alloc_sriov_res(struct be_adapter *adapter)
4479 {
4480         int old_vfs = pci_num_vf(adapter->pdev);
4481         struct  be_resources vft_res = {0};
4482         int status;
4483
4484         be_get_sriov_config(adapter);
4485
4486         if (!old_vfs)
4487                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4488
4489         /* When the HW is in SRIOV capable configuration, the PF-pool
4490          * resources are given to PF during driver load, if there are no
4491          * old VFs. This facility is not available in BE3 FW.
4492          * Also, this is done by FW in Lancer chip.
4493          */
4494         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4495                 be_calculate_vf_res(adapter, 0, &vft_res);
4496                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4497                                                  &vft_res);
4498                 if (status)
4499                         dev_err(&adapter->pdev->dev,
4500                                 "Failed to optimize SRIOV resources\n");
4501         }
4502 }
4503
4504 static int be_get_resources(struct be_adapter *adapter)
4505 {
4506         struct device *dev = &adapter->pdev->dev;
4507         struct be_resources res = {0};
4508         int status;
4509
4510         /* For Lancer, SH etc read per-function resource limits from FW.
4511          * GET_FUNC_CONFIG returns per function guaranteed limits.
4512          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4513          */
4514         if (BEx_chip(adapter)) {
4515                 BEx_get_resources(adapter, &res);
4516         } else {
4517                 status = be_cmd_get_func_config(adapter, &res);
4518                 if (status)
4519                         return status;
4520
4521                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4522                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4523                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4524                         res.max_rss_qs -= 1;
4525         }
4526
4527         /* If RoCE is supported stash away half the EQs for RoCE */
4528         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4529                                 res.max_evt_qs / 2 : res.max_evt_qs;
4530         adapter->res = res;
4531
4532         /* If FW supports RSS default queue, then skip creating non-RSS
4533          * queue for non-IP traffic.
4534          */
4535         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4536                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4537
4538         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4539                  be_max_txqs(adapter), be_max_rxqs(adapter),
4540                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4541                  be_max_vfs(adapter));
4542         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4543                  be_max_uc(adapter), be_max_mc(adapter),
4544                  be_max_vlans(adapter));
4545
4546         /* Ensure RX and TX queues are created in pairs at init time */
4547         adapter->cfg_num_rx_irqs =
4548                                 min_t(u16, netif_get_num_default_rss_queues(),
4549                                       be_max_qp_irqs(adapter));
4550         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4551         return 0;
4552 }
4553
4554 static int be_get_config(struct be_adapter *adapter)
4555 {
4556         int status, level;
4557         u16 profile_id;
4558
4559         status = be_cmd_get_cntl_attributes(adapter);
4560         if (status)
4561                 return status;
4562
4563         status = be_cmd_query_fw_cfg(adapter);
4564         if (status)
4565                 return status;
4566
4567         if (!lancer_chip(adapter) && be_physfn(adapter))
4568                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4569
4570         if (BEx_chip(adapter)) {
4571                 level = be_cmd_get_fw_log_level(adapter);
4572                 adapter->msg_enable =
4573                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4574         }
4575
4576         be_cmd_get_acpi_wol_cap(adapter);
4577         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4578         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4579
4580         be_cmd_query_port_name(adapter);
4581
4582         if (be_physfn(adapter)) {
4583                 status = be_cmd_get_active_profile(adapter, &profile_id);
4584                 if (!status)
4585                         dev_info(&adapter->pdev->dev,
4586                                  "Using profile 0x%x\n", profile_id);
4587         }
4588
4589         return 0;
4590 }
4591
4592 static int be_mac_setup(struct be_adapter *adapter)
4593 {
4594         u8 mac[ETH_ALEN];
4595         int status;
4596
4597         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4598                 status = be_cmd_get_perm_mac(adapter, mac);
4599                 if (status)
4600                         return status;
4601
4602                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4603                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4604
4605                 /* Initial MAC for BE3 VFs is already programmed by PF */
4606                 if (BEx_chip(adapter) && be_virtfn(adapter))
4607                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4608         }
4609
4610         return 0;
4611 }
4612
4613 static void be_schedule_worker(struct be_adapter *adapter)
4614 {
4615         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4616         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4617 }
4618
4619 static void be_destroy_err_recovery_workq(void)
4620 {
4621         if (!be_err_recovery_workq)
4622                 return;
4623
4624         flush_workqueue(be_err_recovery_workq);
4625         destroy_workqueue(be_err_recovery_workq);
4626         be_err_recovery_workq = NULL;
4627 }
4628
4629 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4630 {
4631         struct be_error_recovery *err_rec = &adapter->error_recovery;
4632
4633         if (!be_err_recovery_workq)
4634                 return;
4635
4636         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4637                            msecs_to_jiffies(delay));
4638         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4639 }
4640
4641 static int be_setup_queues(struct be_adapter *adapter)
4642 {
4643         struct net_device *netdev = adapter->netdev;
4644         int status;
4645
4646         status = be_evt_queues_create(adapter);
4647         if (status)
4648                 goto err;
4649
4650         status = be_tx_qs_create(adapter);
4651         if (status)
4652                 goto err;
4653
4654         status = be_rx_cqs_create(adapter);
4655         if (status)
4656                 goto err;
4657
4658         status = be_mcc_queues_create(adapter);
4659         if (status)
4660                 goto err;
4661
4662         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4663         if (status)
4664                 goto err;
4665
4666         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4667         if (status)
4668                 goto err;
4669
4670         return 0;
4671 err:
4672         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4673         return status;
4674 }
4675
4676 static int be_if_create(struct be_adapter *adapter)
4677 {
4678         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4679         u32 cap_flags = be_if_cap_flags(adapter);
4680
4681         /* alloc required memory for other filtering fields */
4682         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4683                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4684         if (!adapter->pmac_id)
4685                 return -ENOMEM;
4686
4687         adapter->mc_list = kcalloc(be_max_mc(adapter),
4688                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4689         if (!adapter->mc_list)
4690                 return -ENOMEM;
4691
4692         adapter->uc_list = kcalloc(be_max_uc(adapter),
4693                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4694         if (!adapter->uc_list)
4695                 return -ENOMEM;
4696
4697         if (adapter->cfg_num_rx_irqs == 1)
4698                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4699
4700         en_flags &= cap_flags;
4701         /* will enable all the needed filter flags in be_open() */
4702         return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4703                                   &adapter->if_handle, 0);
4704 }
4705
4706 int be_update_queues(struct be_adapter *adapter)
4707 {
4708         struct net_device *netdev = adapter->netdev;
4709         int status;
4710
4711         if (netif_running(netdev)) {
4712                 /* be_tx_timeout() must not run concurrently with this
4713                  * function, synchronize with an already-running dev_watchdog
4714                  */
4715                 netif_tx_lock_bh(netdev);
4716                 /* device cannot transmit now, avoid dev_watchdog timeouts */
4717                 netif_carrier_off(netdev);
4718                 netif_tx_unlock_bh(netdev);
4719
4720                 be_close(netdev);
4721         }
4722
4723         be_cancel_worker(adapter);
4724
4725         /* If any vectors have been shared with RoCE we cannot re-program
4726          * the MSIx table.
4727          */
4728         if (!adapter->num_msix_roce_vec)
4729                 be_msix_disable(adapter);
4730
4731         be_clear_queues(adapter);
4732         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4733         if (status)
4734                 return status;
4735
4736         if (!msix_enabled(adapter)) {
4737                 status = be_msix_enable(adapter);
4738                 if (status)
4739                         return status;
4740         }
4741
4742         status = be_if_create(adapter);
4743         if (status)
4744                 return status;
4745
4746         status = be_setup_queues(adapter);
4747         if (status)
4748                 return status;
4749
4750         be_schedule_worker(adapter);
4751
4752         /* The IF was destroyed and re-created. We need to clear
4753          * all promiscuous flags valid for the destroyed IF.
4754          * Without this promisc mode is not restored during
4755          * be_open() because the driver thinks that it is
4756          * already enabled in HW.
4757          */
4758         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4759
4760         if (netif_running(netdev))
4761                 status = be_open(netdev);
4762
4763         return status;
4764 }
4765
4766 static inline int fw_major_num(const char *fw_ver)
4767 {
4768         int fw_major = 0, i;
4769
4770         i = sscanf(fw_ver, "%d.", &fw_major);
4771         if (i != 1)
4772                 return 0;
4773
4774         return fw_major;
4775 }
4776
4777 /* If it is error recovery, FLR the PF
4778  * Else if any VFs are already enabled don't FLR the PF
4779  */
4780 static bool be_reset_required(struct be_adapter *adapter)
4781 {
4782         if (be_error_recovering(adapter))
4783                 return true;
4784         else
4785                 return pci_num_vf(adapter->pdev) == 0;
4786 }
4787
4788 /* Wait for the FW to be ready and perform the required initialization */
4789 static int be_func_init(struct be_adapter *adapter)
4790 {
4791         int status;
4792
4793         status = be_fw_wait_ready(adapter);
4794         if (status)
4795                 return status;
4796
4797         /* FW is now ready; clear errors to allow cmds/doorbell */
4798         be_clear_error(adapter, BE_CLEAR_ALL);
4799
4800         if (be_reset_required(adapter)) {
4801                 status = be_cmd_reset_function(adapter);
4802                 if (status)
4803                         return status;
4804
4805                 /* Wait for interrupts to quiesce after an FLR */
4806                 msleep(100);
4807         }
4808
4809         /* Tell FW we're ready to fire cmds */
4810         status = be_cmd_fw_init(adapter);
4811         if (status)
4812                 return status;
4813
4814         /* Allow interrupts for other ULPs running on NIC function */
4815         be_intr_set(adapter, true);
4816
4817         return 0;
4818 }
4819
4820 static int be_setup(struct be_adapter *adapter)
4821 {
4822         struct device *dev = &adapter->pdev->dev;
4823         int status;
4824
4825         status = be_func_init(adapter);
4826         if (status)
4827                 return status;
4828
4829         be_setup_init(adapter);
4830
4831         if (!lancer_chip(adapter))
4832                 be_cmd_req_native_mode(adapter);
4833
4834         /* invoke this cmd first to get pf_num and vf_num which are needed
4835          * for issuing profile related cmds
4836          */
4837         if (!BEx_chip(adapter)) {
4838                 status = be_cmd_get_func_config(adapter, NULL);
4839                 if (status)
4840                         return status;
4841         }
4842
4843         status = be_get_config(adapter);
4844         if (status)
4845                 goto err;
4846
4847         if (!BE2_chip(adapter) && be_physfn(adapter))
4848                 be_alloc_sriov_res(adapter);
4849
4850         status = be_get_resources(adapter);
4851         if (status)
4852                 goto err;
4853
4854         status = be_msix_enable(adapter);
4855         if (status)
4856                 goto err;
4857
4858         /* will enable all the needed filter flags in be_open() */
4859         status = be_if_create(adapter);
4860         if (status)
4861                 goto err;
4862
4863         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4864         rtnl_lock();
4865         status = be_setup_queues(adapter);
4866         rtnl_unlock();
4867         if (status)
4868                 goto err;
4869
4870         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4871
4872         status = be_mac_setup(adapter);
4873         if (status)
4874                 goto err;
4875
4876         be_cmd_get_fw_ver(adapter);
4877         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4878
4879         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4880                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4881                         adapter->fw_ver);
4882                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4883         }
4884
4885         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4886                                          adapter->rx_fc);
4887         if (status)
4888                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4889                                         &adapter->rx_fc);
4890
4891         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4892                  adapter->tx_fc, adapter->rx_fc);
4893
4894         if (be_physfn(adapter))
4895                 be_cmd_set_logical_link_config(adapter,
4896                                                IFLA_VF_LINK_STATE_AUTO, 0);
4897
4898         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4899          * confusing a linux bridge or OVS that it might be connected to.
4900          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4901          * when SRIOV is not enabled.
4902          */
4903         if (BE3_chip(adapter))
4904                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4905                                       PORT_FWD_TYPE_PASSTHRU, 0);
4906
4907         if (adapter->num_vfs)
4908                 be_vf_setup(adapter);
4909
4910         status = be_cmd_get_phy_info(adapter);
4911         if (!status && be_pause_supported(adapter))
4912                 adapter->phy.fc_autoneg = 1;
4913
4914         if (be_physfn(adapter) && !lancer_chip(adapter))
4915                 be_cmd_set_features(adapter);
4916
4917         be_schedule_worker(adapter);
4918         adapter->flags |= BE_FLAGS_SETUP_DONE;
4919         return 0;
4920 err:
4921         be_clear(adapter);
4922         return status;
4923 }
4924
4925 #ifdef CONFIG_NET_POLL_CONTROLLER
4926 static void be_netpoll(struct net_device *netdev)
4927 {
4928         struct be_adapter *adapter = netdev_priv(netdev);
4929         struct be_eq_obj *eqo;
4930         int i;
4931
4932         for_all_evt_queues(adapter, eqo, i) {
4933                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4934                 napi_schedule(&eqo->napi);
4935         }
4936 }
4937 #endif
4938
4939 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4940 {
4941         const struct firmware *fw;
4942         int status;
4943
4944         if (!netif_running(adapter->netdev)) {
4945                 dev_err(&adapter->pdev->dev,
4946                         "Firmware load not allowed (interface is down)\n");
4947                 return -ENETDOWN;
4948         }
4949
4950         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4951         if (status)
4952                 goto fw_exit;
4953
4954         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4955
4956         if (lancer_chip(adapter))
4957                 status = lancer_fw_download(adapter, fw);
4958         else
4959                 status = be_fw_download(adapter, fw);
4960
4961         if (!status)
4962                 be_cmd_get_fw_ver(adapter);
4963
4964 fw_exit:
4965         release_firmware(fw);
4966         return status;
4967 }
4968
4969 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4970                                  u16 flags, struct netlink_ext_ack *extack)
4971 {
4972         struct be_adapter *adapter = netdev_priv(dev);
4973         struct nlattr *attr, *br_spec;
4974         int rem;
4975         int status = 0;
4976         u16 mode = 0;
4977
4978         if (!sriov_enabled(adapter))
4979                 return -EOPNOTSUPP;
4980
4981         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4982         if (!br_spec)
4983                 return -EINVAL;
4984
4985         nla_for_each_nested(attr, br_spec, rem) {
4986                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4987                         continue;
4988
4989                 if (nla_len(attr) < sizeof(mode))
4990                         return -EINVAL;
4991
4992                 mode = nla_get_u16(attr);
4993                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4994                         return -EOPNOTSUPP;
4995
4996                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4997                         return -EINVAL;
4998
4999                 status = be_cmd_set_hsw_config(adapter, 0, 0,
5000                                                adapter->if_handle,
5001                                                mode == BRIDGE_MODE_VEPA ?
5002                                                PORT_FWD_TYPE_VEPA :
5003                                                PORT_FWD_TYPE_VEB, 0);
5004                 if (status)
5005                         goto err;
5006
5007                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5008                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5009
5010                 return status;
5011         }
5012 err:
5013         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5014                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5015
5016         return status;
5017 }
5018
5019 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5020                                  struct net_device *dev, u32 filter_mask,
5021                                  int nlflags)
5022 {
5023         struct be_adapter *adapter = netdev_priv(dev);
5024         int status = 0;
5025         u8 hsw_mode;
5026
5027         /* BE and Lancer chips support VEB mode only */
5028         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5029                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5030                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5031                         return 0;
5032                 hsw_mode = PORT_FWD_TYPE_VEB;
5033         } else {
5034                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5035                                                adapter->if_handle, &hsw_mode,
5036                                                NULL);
5037                 if (status)
5038                         return 0;
5039
5040                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5041                         return 0;
5042         }
5043
5044         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5045                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5046                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5047                                        0, 0, nlflags, filter_mask, NULL);
5048 }
5049
5050 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5051                                          void (*func)(struct work_struct *))
5052 {
5053         struct be_cmd_work *work;
5054
5055         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5056         if (!work) {
5057                 dev_err(&adapter->pdev->dev,
5058                         "be_work memory allocation failed\n");
5059                 return NULL;
5060         }
5061
5062         INIT_WORK(&work->work, func);
5063         work->adapter = adapter;
5064         return work;
5065 }
5066
5067 static netdev_features_t be_features_check(struct sk_buff *skb,
5068                                            struct net_device *dev,
5069                                            netdev_features_t features)
5070 {
5071         struct be_adapter *adapter = netdev_priv(dev);
5072         u8 l4_hdr = 0;
5073
5074         if (skb_is_gso(skb)) {
5075                 /* IPv6 TSO requests with extension hdrs are a problem
5076                  * to Lancer and BE3 HW. Disable TSO6 feature.
5077                  */
5078                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5079                         features &= ~NETIF_F_TSO6;
5080
5081                 /* Lancer cannot handle the packet with MSS less than 256.
5082                  * Also it can't handle a TSO packet with a single segment
5083                  * Disable the GSO support in such cases
5084                  */
5085                 if (lancer_chip(adapter) &&
5086                     (skb_shinfo(skb)->gso_size < 256 ||
5087                      skb_shinfo(skb)->gso_segs == 1))
5088                         features &= ~NETIF_F_GSO_MASK;
5089         }
5090
5091         /* The code below restricts offload features for some tunneled and
5092          * Q-in-Q packets.
5093          * Offload features for normal (non tunnel) packets are unchanged.
5094          */
5095         features = vlan_features_check(skb, features);
5096         if (!skb->encapsulation ||
5097             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5098                 return features;
5099
5100         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5101          * should disable tunnel offload features if it's not a VxLAN packet,
5102          * as tunnel offloads have been enabled only for VxLAN. This is done to
5103          * allow other tunneled traffic like GRE work fine while VxLAN
5104          * offloads are configured in Skyhawk-R.
5105          */
5106         switch (vlan_get_protocol(skb)) {
5107         case htons(ETH_P_IP):
5108                 l4_hdr = ip_hdr(skb)->protocol;
5109                 break;
5110         case htons(ETH_P_IPV6):
5111                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5112                 break;
5113         default:
5114                 return features;
5115         }
5116
5117         if (l4_hdr != IPPROTO_UDP ||
5118             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5119             skb->inner_protocol != htons(ETH_P_TEB) ||
5120             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5121                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5122             !adapter->vxlan_port ||
5123             udp_hdr(skb)->dest != adapter->vxlan_port)
5124                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5125
5126         return features;
5127 }
5128
5129 static int be_get_phys_port_id(struct net_device *dev,
5130                                struct netdev_phys_item_id *ppid)
5131 {
5132         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5133         struct be_adapter *adapter = netdev_priv(dev);
5134         u8 *id;
5135
5136         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5137                 return -ENOSPC;
5138
5139         ppid->id[0] = adapter->hba_port_num + 1;
5140         id = &ppid->id[1];
5141         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5142              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5143                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5144
5145         ppid->id_len = id_len;
5146
5147         return 0;
5148 }
5149
5150 static void be_set_rx_mode(struct net_device *dev)
5151 {
5152         struct be_adapter *adapter = netdev_priv(dev);
5153         struct be_cmd_work *work;
5154
5155         work = be_alloc_work(adapter, be_work_set_rx_mode);
5156         if (work)
5157                 queue_work(be_wq, &work->work);
5158 }
5159
5160 static const struct net_device_ops be_netdev_ops = {
5161         .ndo_open               = be_open,
5162         .ndo_stop               = be_close,
5163         .ndo_start_xmit         = be_xmit,
5164         .ndo_set_rx_mode        = be_set_rx_mode,
5165         .ndo_set_mac_address    = be_mac_addr_set,
5166         .ndo_get_stats64        = be_get_stats64,
5167         .ndo_validate_addr      = eth_validate_addr,
5168         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5169         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5170         .ndo_set_vf_mac         = be_set_vf_mac,
5171         .ndo_set_vf_vlan        = be_set_vf_vlan,
5172         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5173         .ndo_get_vf_config      = be_get_vf_config,
5174         .ndo_set_vf_link_state  = be_set_vf_link_state,
5175         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5176         .ndo_tx_timeout         = be_tx_timeout,
5177 #ifdef CONFIG_NET_POLL_CONTROLLER
5178         .ndo_poll_controller    = be_netpoll,
5179 #endif
5180         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5181         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5182         .ndo_features_check     = be_features_check,
5183         .ndo_get_phys_port_id   = be_get_phys_port_id,
5184 };
5185
5186 static void be_netdev_init(struct net_device *netdev)
5187 {
5188         struct be_adapter *adapter = netdev_priv(netdev);
5189
5190         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5191                 NETIF_F_GSO_UDP_TUNNEL |
5192                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5193                 NETIF_F_HW_VLAN_CTAG_TX;
5194         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5195                 netdev->hw_features |= NETIF_F_RXHASH;
5196
5197         netdev->features |= netdev->hw_features |
5198                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5199
5200         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5201                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5202
5203         netdev->priv_flags |= IFF_UNICAST_FLT;
5204
5205         netdev->flags |= IFF_MULTICAST;
5206
5207         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5208
5209         netdev->netdev_ops = &be_netdev_ops;
5210
5211         netdev->ethtool_ops = &be_ethtool_ops;
5212
5213         if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5214                 netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5215
5216         /* MTU range: 256 - 9000 */
5217         netdev->min_mtu = BE_MIN_MTU;
5218         netdev->max_mtu = BE_MAX_MTU;
5219 }
5220
5221 static void be_cleanup(struct be_adapter *adapter)
5222 {
5223         struct net_device *netdev = adapter->netdev;
5224
5225         rtnl_lock();
5226         netif_device_detach(netdev);
5227         if (netif_running(netdev))
5228                 be_close(netdev);
5229         rtnl_unlock();
5230
5231         be_clear(adapter);
5232 }
5233
5234 static int be_resume(struct be_adapter *adapter)
5235 {
5236         struct net_device *netdev = adapter->netdev;
5237         int status;
5238
5239         status = be_setup(adapter);
5240         if (status)
5241                 return status;
5242
5243         rtnl_lock();
5244         if (netif_running(netdev))
5245                 status = be_open(netdev);
5246         rtnl_unlock();
5247
5248         if (status)
5249                 return status;
5250
5251         netif_device_attach(netdev);
5252
5253         return 0;
5254 }
5255
5256 static void be_soft_reset(struct be_adapter *adapter)
5257 {
5258         u32 val;
5259
5260         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5261         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5262         val |= SLIPORT_SOFTRESET_SR_MASK;
5263         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5264 }
5265
5266 static bool be_err_is_recoverable(struct be_adapter *adapter)
5267 {
5268         struct be_error_recovery *err_rec = &adapter->error_recovery;
5269         unsigned long initial_idle_time =
5270                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5271         unsigned long recovery_interval =
5272                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5273         u16 ue_err_code;
5274         u32 val;
5275
5276         val = be_POST_stage_get(adapter);
5277         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5278                 return false;
5279         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5280         if (ue_err_code == 0)
5281                 return false;
5282
5283         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5284                 ue_err_code);
5285
5286         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5287                 dev_err(&adapter->pdev->dev,
5288                         "Cannot recover within %lu sec from driver load\n",
5289                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5290                 return false;
5291         }
5292
5293         if (err_rec->last_recovery_time && time_before_eq(
5294                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5295                 dev_err(&adapter->pdev->dev,
5296                         "Cannot recover within %lu sec from last recovery\n",
5297                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5298                 return false;
5299         }
5300
5301         if (ue_err_code == err_rec->last_err_code) {
5302                 dev_err(&adapter->pdev->dev,
5303                         "Cannot recover from a consecutive TPE error\n");
5304                 return false;
5305         }
5306
5307         err_rec->last_recovery_time = jiffies;
5308         err_rec->last_err_code = ue_err_code;
5309         return true;
5310 }
5311
5312 static int be_tpe_recover(struct be_adapter *adapter)
5313 {
5314         struct be_error_recovery *err_rec = &adapter->error_recovery;
5315         int status = -EAGAIN;
5316         u32 val;
5317
5318         switch (err_rec->recovery_state) {
5319         case ERR_RECOVERY_ST_NONE:
5320                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5321                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5322                 break;
5323
5324         case ERR_RECOVERY_ST_DETECT:
5325                 val = be_POST_stage_get(adapter);
5326                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5327                     POST_STAGE_RECOVERABLE_ERR) {
5328                         dev_err(&adapter->pdev->dev,
5329                                 "Unrecoverable HW error detected: 0x%x\n", val);
5330                         status = -EINVAL;
5331                         err_rec->resched_delay = 0;
5332                         break;
5333                 }
5334
5335                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5336
5337                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5338                  * milliseconds before it checks for final error status in
5339                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5340                  * If it does, then PF0 initiates a Soft Reset.
5341                  */
5342                 if (adapter->pf_num == 0) {
5343                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5344                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5345                                         ERR_RECOVERY_UE_DETECT_DURATION;
5346                         break;
5347                 }
5348
5349                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5350                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5351                                         ERR_RECOVERY_UE_DETECT_DURATION;
5352                 break;
5353
5354         case ERR_RECOVERY_ST_RESET:
5355                 if (!be_err_is_recoverable(adapter)) {
5356                         dev_err(&adapter->pdev->dev,
5357                                 "Failed to meet recovery criteria\n");
5358                         status = -EIO;
5359                         err_rec->resched_delay = 0;
5360                         break;
5361                 }
5362                 be_soft_reset(adapter);
5363                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5364                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5365                                         err_rec->ue_to_reset_time;
5366                 break;
5367
5368         case ERR_RECOVERY_ST_PRE_POLL:
5369                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5370                 err_rec->resched_delay = 0;
5371                 status = 0;                     /* done */
5372                 break;
5373
5374         default:
5375                 status = -EINVAL;
5376                 err_rec->resched_delay = 0;
5377                 break;
5378         }
5379
5380         return status;
5381 }
5382
5383 static int be_err_recover(struct be_adapter *adapter)
5384 {
5385         int status;
5386
5387         if (!lancer_chip(adapter)) {
5388                 if (!adapter->error_recovery.recovery_supported ||
5389                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5390                         return -EIO;
5391                 status = be_tpe_recover(adapter);
5392                 if (status)
5393                         goto err;
5394         }
5395
5396         /* Wait for adapter to reach quiescent state before
5397          * destroying queues
5398          */
5399         status = be_fw_wait_ready(adapter);
5400         if (status)
5401                 goto err;
5402
5403         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5404
5405         be_cleanup(adapter);
5406
5407         status = be_resume(adapter);
5408         if (status)
5409                 goto err;
5410
5411         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5412
5413 err:
5414         return status;
5415 }
5416
5417 static void be_err_detection_task(struct work_struct *work)
5418 {
5419         struct be_error_recovery *err_rec =
5420                         container_of(work, struct be_error_recovery,
5421                                      err_detection_work.work);
5422         struct be_adapter *adapter =
5423                         container_of(err_rec, struct be_adapter,
5424                                      error_recovery);
5425         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5426         struct device *dev = &adapter->pdev->dev;
5427         int recovery_status;
5428
5429         be_detect_error(adapter);
5430         if (!be_check_error(adapter, BE_ERROR_HW))
5431                 goto reschedule_task;
5432
5433         recovery_status = be_err_recover(adapter);
5434         if (!recovery_status) {
5435                 err_rec->recovery_retries = 0;
5436                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5437                 dev_info(dev, "Adapter recovery successful\n");
5438                 goto reschedule_task;
5439         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5440                 /* BEx/SH recovery state machine */
5441                 if (adapter->pf_num == 0 &&
5442                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5443                         dev_err(&adapter->pdev->dev,
5444                                 "Adapter recovery in progress\n");
5445                 resched_delay = err_rec->resched_delay;
5446                 goto reschedule_task;
5447         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5448                 /* For VFs, check if PF have allocated resources
5449                  * every second.
5450                  */
5451                 dev_err(dev, "Re-trying adapter recovery\n");
5452                 goto reschedule_task;
5453         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5454                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5455                 /* In case of another error during recovery, it takes 30 sec
5456                  * for adapter to come out of error. Retry error recovery after
5457                  * this time interval.
5458                  */
5459                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5460                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5461                 goto reschedule_task;
5462         } else {
5463                 dev_err(dev, "Adapter recovery failed\n");
5464                 dev_err(dev, "Please reboot server to recover\n");
5465         }
5466
5467         return;
5468
5469 reschedule_task:
5470         be_schedule_err_detection(adapter, resched_delay);
5471 }
5472
5473 static void be_log_sfp_info(struct be_adapter *adapter)
5474 {
5475         int status;
5476
5477         status = be_cmd_query_sfp_info(adapter);
5478         if (!status) {
5479                 dev_err(&adapter->pdev->dev,
5480                         "Port %c: %s Vendor: %s part no: %s",
5481                         adapter->port_name,
5482                         be_misconfig_evt_port_state[adapter->phy_state],
5483                         adapter->phy.vendor_name,
5484                         adapter->phy.vendor_pn);
5485         }
5486         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5487 }
5488
5489 static void be_worker(struct work_struct *work)
5490 {
5491         struct be_adapter *adapter =
5492                 container_of(work, struct be_adapter, work.work);
5493         struct be_rx_obj *rxo;
5494         int i;
5495
5496         if (be_physfn(adapter) &&
5497             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5498                 be_cmd_get_die_temperature(adapter);
5499
5500         /* when interrupts are not yet enabled, just reap any pending
5501          * mcc completions
5502          */
5503         if (!netif_running(adapter->netdev)) {
5504                 be_process_mcc(adapter);
5505                 goto reschedule;
5506         }
5507
5508         if (!adapter->stats_cmd_sent) {
5509                 if (lancer_chip(adapter))
5510                         lancer_cmd_get_pport_stats(adapter,
5511                                                    &adapter->stats_cmd);
5512                 else
5513                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5514         }
5515
5516         for_all_rx_queues(adapter, rxo, i) {
5517                 /* Replenish RX-queues starved due to memory
5518                  * allocation failures.
5519                  */
5520                 if (rxo->rx_post_starved)
5521                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5522         }
5523
5524         /* EQ-delay update for Skyhawk is done while notifying EQ */
5525         if (!skyhawk_chip(adapter))
5526                 be_eqd_update(adapter, false);
5527
5528         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5529                 be_log_sfp_info(adapter);
5530
5531 reschedule:
5532         adapter->work_counter++;
5533         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5534 }
5535
5536 static void be_unmap_pci_bars(struct be_adapter *adapter)
5537 {
5538         if (adapter->csr)
5539                 pci_iounmap(adapter->pdev, adapter->csr);
5540         if (adapter->db)
5541                 pci_iounmap(adapter->pdev, adapter->db);
5542         if (adapter->pcicfg && adapter->pcicfg_mapped)
5543                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5544 }
5545
5546 static int db_bar(struct be_adapter *adapter)
5547 {
5548         if (lancer_chip(adapter) || be_virtfn(adapter))
5549                 return 0;
5550         else
5551                 return 4;
5552 }
5553
5554 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5555 {
5556         if (skyhawk_chip(adapter)) {
5557                 adapter->roce_db.size = 4096;
5558                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5559                                                               db_bar(adapter));
5560                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5561                                                                db_bar(adapter));
5562         }
5563         return 0;
5564 }
5565
5566 static int be_map_pci_bars(struct be_adapter *adapter)
5567 {
5568         struct pci_dev *pdev = adapter->pdev;
5569         u8 __iomem *addr;
5570         u32 sli_intf;
5571
5572         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5573         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5574                                 SLI_INTF_FAMILY_SHIFT;
5575         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5576
5577         if (BEx_chip(adapter) && be_physfn(adapter)) {
5578                 adapter->csr = pci_iomap(pdev, 2, 0);
5579                 if (!adapter->csr)
5580                         return -ENOMEM;
5581         }
5582
5583         addr = pci_iomap(pdev, db_bar(adapter), 0);
5584         if (!addr)
5585                 goto pci_map_err;
5586         adapter->db = addr;
5587
5588         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5589                 if (be_physfn(adapter)) {
5590                         /* PCICFG is the 2nd BAR in BE2 */
5591                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5592                         if (!addr)
5593                                 goto pci_map_err;
5594                         adapter->pcicfg = addr;
5595                         adapter->pcicfg_mapped = true;
5596                 } else {
5597                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5598                         adapter->pcicfg_mapped = false;
5599                 }
5600         }
5601
5602         be_roce_map_pci_bars(adapter);
5603         return 0;
5604
5605 pci_map_err:
5606         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5607         be_unmap_pci_bars(adapter);
5608         return -ENOMEM;
5609 }
5610
5611 static void be_drv_cleanup(struct be_adapter *adapter)
5612 {
5613         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5614         struct device *dev = &adapter->pdev->dev;
5615
5616         if (mem->va)
5617                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5618
5619         mem = &adapter->rx_filter;
5620         if (mem->va)
5621                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5622
5623         mem = &adapter->stats_cmd;
5624         if (mem->va)
5625                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5626 }
5627
5628 /* Allocate and initialize various fields in be_adapter struct */
5629 static int be_drv_init(struct be_adapter *adapter)
5630 {
5631         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5632         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5633         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5634         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5635         struct device *dev = &adapter->pdev->dev;
5636         int status = 0;
5637
5638         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5639         mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5640                                                 &mbox_mem_alloc->dma,
5641                                                 GFP_KERNEL);
5642         if (!mbox_mem_alloc->va)
5643                 return -ENOMEM;
5644
5645         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5646         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5647         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5648
5649         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5650         rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5651                                            &rx_filter->dma, GFP_KERNEL);
5652         if (!rx_filter->va) {
5653                 status = -ENOMEM;
5654                 goto free_mbox;
5655         }
5656
5657         if (lancer_chip(adapter))
5658                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5659         else if (BE2_chip(adapter))
5660                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5661         else if (BE3_chip(adapter))
5662                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5663         else
5664                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5665         stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5666                                            &stats_cmd->dma, GFP_KERNEL);
5667         if (!stats_cmd->va) {
5668                 status = -ENOMEM;
5669                 goto free_rx_filter;
5670         }
5671
5672         mutex_init(&adapter->mbox_lock);
5673         mutex_init(&adapter->mcc_lock);
5674         mutex_init(&adapter->rx_filter_lock);
5675         spin_lock_init(&adapter->mcc_cq_lock);
5676         init_completion(&adapter->et_cmd_compl);
5677
5678         pci_save_state(adapter->pdev);
5679
5680         INIT_DELAYED_WORK(&adapter->work, be_worker);
5681
5682         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5683         adapter->error_recovery.resched_delay = 0;
5684         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5685                           be_err_detection_task);
5686
5687         adapter->rx_fc = true;
5688         adapter->tx_fc = true;
5689
5690         /* Must be a power of 2 or else MODULO will BUG_ON */
5691         adapter->be_get_temp_freq = 64;
5692
5693         return 0;
5694
5695 free_rx_filter:
5696         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5697 free_mbox:
5698         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5699                           mbox_mem_alloc->dma);
5700         return status;
5701 }
5702
5703 static void be_remove(struct pci_dev *pdev)
5704 {
5705         struct be_adapter *adapter = pci_get_drvdata(pdev);
5706
5707         if (!adapter)
5708                 return;
5709
5710         be_roce_dev_remove(adapter);
5711         be_intr_set(adapter, false);
5712
5713         be_cancel_err_detection(adapter);
5714
5715         unregister_netdev(adapter->netdev);
5716
5717         be_clear(adapter);
5718
5719         if (!pci_vfs_assigned(adapter->pdev))
5720                 be_cmd_reset_function(adapter);
5721
5722         /* tell fw we're done with firing cmds */
5723         be_cmd_fw_clean(adapter);
5724
5725         be_unmap_pci_bars(adapter);
5726         be_drv_cleanup(adapter);
5727
5728         pci_disable_pcie_error_reporting(pdev);
5729
5730         pci_release_regions(pdev);
5731         pci_disable_device(pdev);
5732
5733         free_netdev(adapter->netdev);
5734 }
5735
5736 static ssize_t be_hwmon_show_temp(struct device *dev,
5737                                   struct device_attribute *dev_attr,
5738                                   char *buf)
5739 {
5740         struct be_adapter *adapter = dev_get_drvdata(dev);
5741
5742         /* Unit: millidegree Celsius */
5743         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5744                 return -EIO;
5745         else
5746                 return sprintf(buf, "%u\n",
5747                                adapter->hwmon_info.be_on_die_temp * 1000);
5748 }
5749
5750 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5751                           be_hwmon_show_temp, NULL, 1);
5752
5753 static struct attribute *be_hwmon_attrs[] = {
5754         &sensor_dev_attr_temp1_input.dev_attr.attr,
5755         NULL
5756 };
5757
5758 ATTRIBUTE_GROUPS(be_hwmon);
5759
5760 static char *mc_name(struct be_adapter *adapter)
5761 {
5762         char *str = ""; /* default */
5763
5764         switch (adapter->mc_type) {
5765         case UMC:
5766                 str = "UMC";
5767                 break;
5768         case FLEX10:
5769                 str = "FLEX10";
5770                 break;
5771         case vNIC1:
5772                 str = "vNIC-1";
5773                 break;
5774         case nPAR:
5775                 str = "nPAR";
5776                 break;
5777         case UFP:
5778                 str = "UFP";
5779                 break;
5780         case vNIC2:
5781                 str = "vNIC-2";
5782                 break;
5783         default:
5784                 str = "";
5785         }
5786
5787         return str;
5788 }
5789
5790 static inline char *func_name(struct be_adapter *adapter)
5791 {
5792         return be_physfn(adapter) ? "PF" : "VF";
5793 }
5794
5795 static inline char *nic_name(struct pci_dev *pdev)
5796 {
5797         switch (pdev->device) {
5798         case OC_DEVICE_ID1:
5799                 return OC_NAME;
5800         case OC_DEVICE_ID2:
5801                 return OC_NAME_BE;
5802         case OC_DEVICE_ID3:
5803         case OC_DEVICE_ID4:
5804                 return OC_NAME_LANCER;
5805         case BE_DEVICE_ID2:
5806                 return BE3_NAME;
5807         case OC_DEVICE_ID5:
5808         case OC_DEVICE_ID6:
5809                 return OC_NAME_SH;
5810         default:
5811                 return BE_NAME;
5812         }
5813 }
5814
5815 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5816 {
5817         struct be_adapter *adapter;
5818         struct net_device *netdev;
5819         int status = 0;
5820
5821         status = pci_enable_device(pdev);
5822         if (status)
5823                 goto do_none;
5824
5825         status = pci_request_regions(pdev, DRV_NAME);
5826         if (status)
5827                 goto disable_dev;
5828         pci_set_master(pdev);
5829
5830         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5831         if (!netdev) {
5832                 status = -ENOMEM;
5833                 goto rel_reg;
5834         }
5835         adapter = netdev_priv(netdev);
5836         adapter->pdev = pdev;
5837         pci_set_drvdata(pdev, adapter);
5838         adapter->netdev = netdev;
5839         SET_NETDEV_DEV(netdev, &pdev->dev);
5840
5841         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5842         if (!status) {
5843                 netdev->features |= NETIF_F_HIGHDMA;
5844         } else {
5845                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5846                 if (status) {
5847                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5848                         goto free_netdev;
5849                 }
5850         }
5851
5852         status = pci_enable_pcie_error_reporting(pdev);
5853         if (!status)
5854                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5855
5856         status = be_map_pci_bars(adapter);
5857         if (status)
5858                 goto free_netdev;
5859
5860         status = be_drv_init(adapter);
5861         if (status)
5862                 goto unmap_bars;
5863
5864         status = be_setup(adapter);
5865         if (status)
5866                 goto drv_cleanup;
5867
5868         be_netdev_init(netdev);
5869         status = register_netdev(netdev);
5870         if (status != 0)
5871                 goto unsetup;
5872
5873         be_roce_dev_add(adapter);
5874
5875         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5876         adapter->error_recovery.probe_time = jiffies;
5877
5878         /* On Die temperature not supported for VF. */
5879         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5880                 adapter->hwmon_info.hwmon_dev =
5881                         devm_hwmon_device_register_with_groups(&pdev->dev,
5882                                                                DRV_NAME,
5883                                                                adapter,
5884                                                                be_hwmon_groups);
5885                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5886         }
5887
5888         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5889                  func_name(adapter), mc_name(adapter), adapter->port_name);
5890
5891         return 0;
5892
5893 unsetup:
5894         be_clear(adapter);
5895 drv_cleanup:
5896         be_drv_cleanup(adapter);
5897 unmap_bars:
5898         be_unmap_pci_bars(adapter);
5899 free_netdev:
5900         pci_disable_pcie_error_reporting(pdev);
5901         free_netdev(netdev);
5902 rel_reg:
5903         pci_release_regions(pdev);
5904 disable_dev:
5905         pci_disable_device(pdev);
5906 do_none:
5907         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5908         return status;
5909 }
5910
5911 static int __maybe_unused be_suspend(struct device *dev_d)
5912 {
5913         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5914
5915         be_intr_set(adapter, false);
5916         be_cancel_err_detection(adapter);
5917
5918         be_cleanup(adapter);
5919
5920         return 0;
5921 }
5922
5923 static int __maybe_unused be_pci_resume(struct device *dev_d)
5924 {
5925         struct be_adapter *adapter = dev_get_drvdata(dev_d);
5926         int status = 0;
5927
5928         status = be_resume(adapter);
5929         if (status)
5930                 return status;
5931
5932         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5933
5934         return 0;
5935 }
5936
5937 /*
5938  * An FLR will stop BE from DMAing any data.
5939  */
5940 static void be_shutdown(struct pci_dev *pdev)
5941 {
5942         struct be_adapter *adapter = pci_get_drvdata(pdev);
5943
5944         if (!adapter)
5945                 return;
5946
5947         be_roce_dev_shutdown(adapter);
5948         cancel_delayed_work_sync(&adapter->work);
5949         be_cancel_err_detection(adapter);
5950
5951         netif_device_detach(adapter->netdev);
5952
5953         be_cmd_reset_function(adapter);
5954
5955         pci_disable_device(pdev);
5956 }
5957
5958 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5959                                             pci_channel_state_t state)
5960 {
5961         struct be_adapter *adapter = pci_get_drvdata(pdev);
5962
5963         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5964
5965         be_roce_dev_remove(adapter);
5966
5967         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5968                 be_set_error(adapter, BE_ERROR_EEH);
5969
5970                 be_cancel_err_detection(adapter);
5971
5972                 be_cleanup(adapter);
5973         }
5974
5975         if (state == pci_channel_io_perm_failure)
5976                 return PCI_ERS_RESULT_DISCONNECT;
5977
5978         pci_disable_device(pdev);
5979
5980         /* The error could cause the FW to trigger a flash debug dump.
5981          * Resetting the card while flash dump is in progress
5982          * can cause it not to recover; wait for it to finish.
5983          * Wait only for first function as it is needed only once per
5984          * adapter.
5985          */
5986         if (pdev->devfn == 0)
5987                 ssleep(30);
5988
5989         return PCI_ERS_RESULT_NEED_RESET;
5990 }
5991
5992 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5993 {
5994         struct be_adapter *adapter = pci_get_drvdata(pdev);
5995         int status;
5996
5997         dev_info(&adapter->pdev->dev, "EEH reset\n");
5998
5999         status = pci_enable_device(pdev);
6000         if (status)
6001                 return PCI_ERS_RESULT_DISCONNECT;
6002
6003         pci_set_master(pdev);
6004         pci_restore_state(pdev);
6005
6006         /* Check if card is ok and fw is ready */
6007         dev_info(&adapter->pdev->dev,
6008                  "Waiting for FW to be ready after EEH reset\n");
6009         status = be_fw_wait_ready(adapter);
6010         if (status)
6011                 return PCI_ERS_RESULT_DISCONNECT;
6012
6013         be_clear_error(adapter, BE_CLEAR_ALL);
6014         return PCI_ERS_RESULT_RECOVERED;
6015 }
6016
6017 static void be_eeh_resume(struct pci_dev *pdev)
6018 {
6019         int status = 0;
6020         struct be_adapter *adapter = pci_get_drvdata(pdev);
6021
6022         dev_info(&adapter->pdev->dev, "EEH resume\n");
6023
6024         pci_save_state(pdev);
6025
6026         status = be_resume(adapter);
6027         if (status)
6028                 goto err;
6029
6030         be_roce_dev_add(adapter);
6031
6032         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6033         return;
6034 err:
6035         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6036 }
6037
6038 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6039 {
6040         struct be_adapter *adapter = pci_get_drvdata(pdev);
6041         struct be_resources vft_res = {0};
6042         int status;
6043
6044         if (!num_vfs)
6045                 be_vf_clear(adapter);
6046
6047         adapter->num_vfs = num_vfs;
6048
6049         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6050                 dev_warn(&pdev->dev,
6051                          "Cannot disable VFs while they are assigned\n");
6052                 return -EBUSY;
6053         }
6054
6055         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6056          * are equally distributed across the max-number of VFs. The user may
6057          * request only a subset of the max-vfs to be enabled.
6058          * Based on num_vfs, redistribute the resources across num_vfs so that
6059          * each VF will have access to more number of resources.
6060          * This facility is not available in BE3 FW.
6061          * Also, this is done by FW in Lancer chip.
6062          */
6063         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6064                 be_calculate_vf_res(adapter, adapter->num_vfs,
6065                                     &vft_res);
6066                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6067                                                  adapter->num_vfs, &vft_res);
6068                 if (status)
6069                         dev_err(&pdev->dev,
6070                                 "Failed to optimize SR-IOV resources\n");
6071         }
6072
6073         status = be_get_resources(adapter);
6074         if (status)
6075                 return be_cmd_status(status);
6076
6077         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6078         rtnl_lock();
6079         status = be_update_queues(adapter);
6080         rtnl_unlock();
6081         if (status)
6082                 return be_cmd_status(status);
6083
6084         if (adapter->num_vfs)
6085                 status = be_vf_setup(adapter);
6086
6087         if (!status)
6088                 return adapter->num_vfs;
6089
6090         return 0;
6091 }
6092
6093 static const struct pci_error_handlers be_eeh_handlers = {
6094         .error_detected = be_eeh_err_detected,
6095         .slot_reset = be_eeh_reset,
6096         .resume = be_eeh_resume,
6097 };
6098
6099 static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6100
6101 static struct pci_driver be_driver = {
6102         .name = DRV_NAME,
6103         .id_table = be_dev_ids,
6104         .probe = be_probe,
6105         .remove = be_remove,
6106         .driver.pm = &be_pci_pm_ops,
6107         .shutdown = be_shutdown,
6108         .sriov_configure = be_pci_sriov_configure,
6109         .err_handler = &be_eeh_handlers
6110 };
6111
6112 static int __init be_init_module(void)
6113 {
6114         int status;
6115
6116         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6117             rx_frag_size != 2048) {
6118                 printk(KERN_WARNING DRV_NAME
6119                         " : Module param rx_frag_size must be 2048/4096/8192."
6120                         " Using 2048\n");
6121                 rx_frag_size = 2048;
6122         }
6123
6124         if (num_vfs > 0) {
6125                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6126                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6127         }
6128
6129         be_wq = create_singlethread_workqueue("be_wq");
6130         if (!be_wq) {
6131                 pr_warn(DRV_NAME "workqueue creation failed\n");
6132                 return -1;
6133         }
6134
6135         be_err_recovery_workq =
6136                 create_singlethread_workqueue("be_err_recover");
6137         if (!be_err_recovery_workq)
6138                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6139
6140         status = pci_register_driver(&be_driver);
6141         if (status) {
6142                 destroy_workqueue(be_wq);
6143                 be_destroy_err_recovery_workq();
6144         }
6145         return status;
6146 }
6147 module_init(be_init_module);
6148
6149 static void __exit be_exit_module(void)
6150 {
6151         pci_unregister_driver(&be_driver);
6152
6153         be_destroy_err_recovery_workq();
6154
6155         if (be_wq)
6156                 destroy_workqueue(be_wq);
6157 }
6158 module_exit(be_exit_module);