2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/bitmap.h>
38 #include <linux/crc32.h>
39 #include <linux/ctype.h>
40 #include <linux/debugfs.h>
41 #include <linux/err.h>
42 #include <linux/etherdevice.h>
43 #include <linux/firmware.h>
44 #include <linux/if_vlan.h>
45 #include <linux/init.h>
46 #include <linux/log2.h>
47 #include <linux/mdio.h>
48 #include <linux/module.h>
49 #include <linux/moduleparam.h>
50 #include <linux/mutex.h>
51 #include <linux/netdevice.h>
52 #include <linux/pci.h>
53 #include <linux/aer.h>
54 #include <linux/rtnetlink.h>
55 #include <linux/sched.h>
56 #include <linux/seq_file.h>
57 #include <linux/sockios.h>
58 #include <linux/vmalloc.h>
59 #include <linux/workqueue.h>
60 #include <net/neighbour.h>
61 #include <net/netevent.h>
62 #include <asm/uaccess.h>
70 #define DRV_VERSION "1.3.0-ko"
71 #define DRV_DESC "Chelsio T4 Network Driver"
74 * Max interrupt hold-off timer value in us. Queues fall back to this value
75 * under extreme memory pressure so it's largish to give the system time to
78 #define MAX_SGE_TIMERVAL 200U
82 * Virtual Function provisioning constants. We need two extra Ingress Queues
83 * with Interrupt capability to serve as the VF's Firmware Event Queue and
84 * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free
85 * Lists associated with them). For each Ethernet/Control Egress Queue and
86 * for each Free List, we need an Egress Context.
89 VFRES_NPORTS = 1, /* # of "ports" per VF */
90 VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */
92 VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */
93 VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */
94 VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */
95 VFRES_NIQ = 0, /* # of non-fl/int ingress queues */
96 VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */
97 VFRES_TC = 0, /* PCI-E traffic class */
98 VFRES_NEXACTF = 16, /* # of exact MPS filters */
100 VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT,
101 VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF,
105 * Provide a Port Access Rights Mask for the specified PF/VF. This is very
106 * static and likely not to be useful in the long run. We really need to
107 * implement some form of persistent configuration which the firmware
110 static unsigned int pfvfres_pmask(struct adapter *adapter,
111 unsigned int pf, unsigned int vf)
113 unsigned int portn, portvec;
116 * Give PF's access to all of the ports.
119 return FW_PFVF_CMD_PMASK_MASK;
122 * For VFs, we'll assign them access to the ports based purely on the
123 * PF. We assign active ports in order, wrapping around if there are
124 * fewer active ports than PFs: e.g. active port[pf % nports].
125 * Unfortunately the adapter's port_info structs haven't been
126 * initialized yet so we have to compute this.
128 if (adapter->params.nports == 0)
131 portn = pf % adapter->params.nports;
132 portvec = adapter->params.portvec;
135 * Isolate the lowest set bit in the port vector. If we're at
136 * the port number that we want, return that as the pmask.
137 * otherwise mask that bit out of the port vector and
138 * decrement our port number ...
140 unsigned int pmask = portvec ^ (portvec & (portvec-1));
151 MEMWIN0_APERTURE = 65536,
152 MEMWIN0_BASE = 0x30000,
153 MEMWIN1_APERTURE = 32768,
154 MEMWIN1_BASE = 0x28000,
155 MEMWIN2_APERTURE = 2048,
156 MEMWIN2_BASE = 0x1b800,
160 MAX_TXQ_ENTRIES = 16384,
161 MAX_CTRL_TXQ_ENTRIES = 1024,
162 MAX_RSPQ_ENTRIES = 16384,
163 MAX_RX_BUFFERS = 16384,
164 MIN_TXQ_ENTRIES = 32,
165 MIN_CTRL_TXQ_ENTRIES = 32,
166 MIN_RSPQ_ENTRIES = 128,
170 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
171 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
172 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
174 #define CH_DEVICE(devid, data) { PCI_VDEVICE(CHELSIO, devid), (data) }
176 static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = {
177 CH_DEVICE(0xa000, 0), /* PE10K */
178 CH_DEVICE(0x4001, -1),
179 CH_DEVICE(0x4002, -1),
180 CH_DEVICE(0x4003, -1),
181 CH_DEVICE(0x4004, -1),
182 CH_DEVICE(0x4005, -1),
183 CH_DEVICE(0x4006, -1),
184 CH_DEVICE(0x4007, -1),
185 CH_DEVICE(0x4008, -1),
186 CH_DEVICE(0x4009, -1),
187 CH_DEVICE(0x400a, -1),
188 CH_DEVICE(0x4401, 4),
189 CH_DEVICE(0x4402, 4),
190 CH_DEVICE(0x4403, 4),
191 CH_DEVICE(0x4404, 4),
192 CH_DEVICE(0x4405, 4),
193 CH_DEVICE(0x4406, 4),
194 CH_DEVICE(0x4407, 4),
195 CH_DEVICE(0x4408, 4),
196 CH_DEVICE(0x4409, 4),
197 CH_DEVICE(0x440a, 4),
201 #define FW_FNAME "cxgb4/t4fw.bin"
203 MODULE_DESCRIPTION(DRV_DESC);
204 MODULE_AUTHOR("Chelsio Communications");
205 MODULE_LICENSE("Dual BSD/GPL");
206 MODULE_VERSION(DRV_VERSION);
207 MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl);
208 MODULE_FIRMWARE(FW_FNAME);
210 static int dflt_msg_enable = DFLT_MSG_ENABLE;
212 module_param(dflt_msg_enable, int, 0644);
213 MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
216 * The driver uses the best interrupt scheme available on a platform in the
217 * order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
218 * of these schemes the driver may consider as follows:
220 * msi = 2: choose from among all three options
221 * msi = 1: only consider MSI and INTx interrupts
222 * msi = 0: force INTx interrupts
226 module_param(msi, int, 0644);
227 MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
230 * Queue interrupt hold-off timer values. Queues default to the first of these
233 static unsigned int intr_holdoff[SGE_NTIMERS - 1] = { 5, 10, 20, 50, 100 };
235 module_param_array(intr_holdoff, uint, NULL, 0644);
236 MODULE_PARM_DESC(intr_holdoff, "values for queue interrupt hold-off timers "
237 "0..4 in microseconds");
239 static unsigned int intr_cnt[SGE_NCOUNTERS - 1] = { 4, 8, 16 };
241 module_param_array(intr_cnt, uint, NULL, 0644);
242 MODULE_PARM_DESC(intr_cnt,
243 "thresholds 1..3 for queue interrupt packet counters");
247 #ifdef CONFIG_PCI_IOV
248 module_param(vf_acls, bool, 0644);
249 MODULE_PARM_DESC(vf_acls, "if set enable virtualization L2 ACL enforcement");
251 static unsigned int num_vf[4];
253 module_param_array(num_vf, uint, NULL, 0644);
254 MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3");
257 static struct dentry *cxgb4_debugfs_root;
259 static LIST_HEAD(adapter_list);
260 static DEFINE_MUTEX(uld_mutex);
261 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
262 static const char *uld_str[] = { "RDMA", "iSCSI" };
264 static void link_report(struct net_device *dev)
266 if (!netif_carrier_ok(dev))
267 netdev_info(dev, "link down\n");
269 static const char *fc[] = { "no", "Rx", "Tx", "Tx/Rx" };
271 const char *s = "10Mbps";
272 const struct port_info *p = netdev_priv(dev);
274 switch (p->link_cfg.speed) {
286 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s,
291 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
293 struct net_device *dev = adapter->port[port_id];
295 /* Skip changes from disabled ports. */
296 if (netif_running(dev) && link_stat != netif_carrier_ok(dev)) {
298 netif_carrier_on(dev);
300 netif_carrier_off(dev);
306 void t4_os_portmod_changed(const struct adapter *adap, int port_id)
308 static const char *mod_str[] = {
309 NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
312 const struct net_device *dev = adap->port[port_id];
313 const struct port_info *pi = netdev_priv(dev);
315 if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
316 netdev_info(dev, "port module unplugged\n");
317 else if (pi->mod_type < ARRAY_SIZE(mod_str))
318 netdev_info(dev, "%s module inserted\n", mod_str[pi->mod_type]);
322 * Configure the exact and hash address filters to handle a port's multicast
323 * and secondary unicast MAC addresses.
325 static int set_addr_filters(const struct net_device *dev, bool sleep)
333 const struct netdev_hw_addr *ha;
334 int uc_cnt = netdev_uc_count(dev);
335 int mc_cnt = netdev_mc_count(dev);
336 const struct port_info *pi = netdev_priv(dev);
337 unsigned int mb = pi->adapter->fn;
339 /* first do the secondary unicast addresses */
340 netdev_for_each_uc_addr(ha, dev) {
341 addr[naddr++] = ha->addr;
342 if (--uc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
343 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
344 naddr, addr, filt_idx, &uhash, sleep);
353 /* next set up the multicast addresses */
354 netdev_for_each_mc_addr(ha, dev) {
355 addr[naddr++] = ha->addr;
356 if (--mc_cnt == 0 || naddr >= ARRAY_SIZE(addr)) {
357 ret = t4_alloc_mac_filt(pi->adapter, mb, pi->viid, free,
358 naddr, addr, filt_idx, &mhash, sleep);
367 return t4_set_addr_hash(pi->adapter, mb, pi->viid, uhash != 0,
368 uhash | mhash, sleep);
372 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
373 * If @mtu is -1 it is left unchanged.
375 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
378 struct port_info *pi = netdev_priv(dev);
380 ret = set_addr_filters(dev, sleep_ok);
382 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, mtu,
383 (dev->flags & IFF_PROMISC) ? 1 : 0,
384 (dev->flags & IFF_ALLMULTI) ? 1 : 0, 1, -1,
390 * link_start - enable a port
391 * @dev: the port to enable
393 * Performs the MAC and PHY actions needed to enable a port.
395 static int link_start(struct net_device *dev)
398 struct port_info *pi = netdev_priv(dev);
399 unsigned int mb = pi->adapter->fn;
402 * We do not set address filters and promiscuity here, the stack does
403 * that step explicitly.
405 ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1,
406 !!(dev->features & NETIF_F_HW_VLAN_RX), true);
408 ret = t4_change_mac(pi->adapter, mb, pi->viid,
409 pi->xact_addr_filt, dev->dev_addr, true,
412 pi->xact_addr_filt = ret;
417 ret = t4_link_start(pi->adapter, mb, pi->tx_chan,
420 ret = t4_enable_vi(pi->adapter, mb, pi->viid, true, true);
425 * Response queue handler for the FW event queue.
427 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
428 const struct pkt_gl *gl)
430 u8 opcode = ((const struct rss_header *)rsp)->opcode;
432 rsp++; /* skip RSS header */
433 if (likely(opcode == CPL_SGE_EGR_UPDATE)) {
434 const struct cpl_sge_egr_update *p = (void *)rsp;
435 unsigned int qid = EGR_QID(ntohl(p->opcode_qid));
438 txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
440 if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
441 struct sge_eth_txq *eq;
443 eq = container_of(txq, struct sge_eth_txq, q);
444 netif_tx_wake_queue(eq->txq);
446 struct sge_ofld_txq *oq;
448 oq = container_of(txq, struct sge_ofld_txq, q);
449 tasklet_schedule(&oq->qresume_tsk);
451 } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
452 const struct cpl_fw6_msg *p = (void *)rsp;
455 t4_handle_fw_rpl(q->adap, p->data);
456 } else if (opcode == CPL_L2T_WRITE_RPL) {
457 const struct cpl_l2t_write_rpl *p = (void *)rsp;
459 do_l2t_write_rpl(q->adap, p);
461 dev_err(q->adap->pdev_dev,
462 "unexpected CPL %#x on FW event queue\n", opcode);
467 * uldrx_handler - response queue handler for ULD queues
468 * @q: the response queue that received the packet
469 * @rsp: the response queue descriptor holding the offload message
470 * @gl: the gather list of packet fragments
472 * Deliver an ingress offload packet to a ULD. All processing is done by
473 * the ULD, we just maintain statistics.
475 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
476 const struct pkt_gl *gl)
478 struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
480 if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
486 else if (gl == CXGB4_MSG_AN)
493 static void disable_msi(struct adapter *adapter)
495 if (adapter->flags & USING_MSIX) {
496 pci_disable_msix(adapter->pdev);
497 adapter->flags &= ~USING_MSIX;
498 } else if (adapter->flags & USING_MSI) {
499 pci_disable_msi(adapter->pdev);
500 adapter->flags &= ~USING_MSI;
505 * Interrupt handler for non-data events used with MSI-X.
507 static irqreturn_t t4_nondata_intr(int irq, void *cookie)
509 struct adapter *adap = cookie;
511 u32 v = t4_read_reg(adap, MYPF_REG(PL_PF_INT_CAUSE));
514 t4_write_reg(adap, MYPF_REG(PL_PF_INT_CAUSE), v);
516 t4_slow_intr_handler(adap);
521 * Name the MSI-X interrupts.
523 static void name_msix_vecs(struct adapter *adap)
525 int i, j, msi_idx = 2, n = sizeof(adap->msix_info[0].desc) - 1;
527 /* non-data interrupts */
528 snprintf(adap->msix_info[0].desc, n, "%s", adap->name);
529 adap->msix_info[0].desc[n] = 0;
532 snprintf(adap->msix_info[1].desc, n, "%s-FWeventq", adap->name);
533 adap->msix_info[1].desc[n] = 0;
535 /* Ethernet queues */
536 for_each_port(adap, j) {
537 struct net_device *d = adap->port[j];
538 const struct port_info *pi = netdev_priv(d);
540 for (i = 0; i < pi->nqsets; i++, msi_idx++) {
541 snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
543 adap->msix_info[msi_idx].desc[n] = 0;
548 for_each_ofldrxq(&adap->sge, i) {
549 snprintf(adap->msix_info[msi_idx].desc, n, "%s-ofld%d",
551 adap->msix_info[msi_idx++].desc[n] = 0;
553 for_each_rdmarxq(&adap->sge, i) {
554 snprintf(adap->msix_info[msi_idx].desc, n, "%s-rdma%d",
556 adap->msix_info[msi_idx++].desc[n] = 0;
560 static int request_msix_queue_irqs(struct adapter *adap)
562 struct sge *s = &adap->sge;
563 int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2;
565 err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
566 adap->msix_info[1].desc, &s->fw_evtq);
570 for_each_ethrxq(s, ethqidx) {
571 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
572 adap->msix_info[msi].desc,
573 &s->ethrxq[ethqidx].rspq);
578 for_each_ofldrxq(s, ofldqidx) {
579 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
580 adap->msix_info[msi].desc,
581 &s->ofldrxq[ofldqidx].rspq);
586 for_each_rdmarxq(s, rdmaqidx) {
587 err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0,
588 adap->msix_info[msi].desc,
589 &s->rdmarxq[rdmaqidx].rspq);
597 while (--rdmaqidx >= 0)
598 free_irq(adap->msix_info[--msi].vec,
599 &s->rdmarxq[rdmaqidx].rspq);
600 while (--ofldqidx >= 0)
601 free_irq(adap->msix_info[--msi].vec,
602 &s->ofldrxq[ofldqidx].rspq);
603 while (--ethqidx >= 0)
604 free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq);
605 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
609 static void free_msix_queue_irqs(struct adapter *adap)
612 struct sge *s = &adap->sge;
614 free_irq(adap->msix_info[1].vec, &s->fw_evtq);
615 for_each_ethrxq(s, i)
616 free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq);
617 for_each_ofldrxq(s, i)
618 free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq);
619 for_each_rdmarxq(s, i)
620 free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq);
624 * write_rss - write the RSS table for a given port
626 * @queues: array of queue indices for RSS
628 * Sets up the portion of the HW RSS table for the port's VI to distribute
629 * packets to the Rx queues in @queues.
631 static int write_rss(const struct port_info *pi, const u16 *queues)
635 const struct sge_eth_rxq *q = &pi->adapter->sge.ethrxq[pi->first_qset];
637 rss = kmalloc(pi->rss_size * sizeof(u16), GFP_KERNEL);
641 /* map the queue indices to queue ids */
642 for (i = 0; i < pi->rss_size; i++, queues++)
643 rss[i] = q[*queues].rspq.abs_id;
645 err = t4_config_rss_range(pi->adapter, pi->adapter->fn, pi->viid, 0,
646 pi->rss_size, rss, pi->rss_size);
652 * setup_rss - configure RSS
655 * Sets up RSS for each port.
657 static int setup_rss(struct adapter *adap)
661 for_each_port(adap, i) {
662 const struct port_info *pi = adap2pinfo(adap, i);
664 err = write_rss(pi, pi->rss);
672 * Return the channel of the ingress queue with the given qid.
674 static unsigned int rxq_to_chan(const struct sge *p, unsigned int qid)
676 qid -= p->ingr_start;
677 return netdev2pinfo(p->ingr_map[qid]->netdev)->tx_chan;
681 * Wait until all NAPI handlers are descheduled.
683 static void quiesce_rx(struct adapter *adap)
687 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
688 struct sge_rspq *q = adap->sge.ingr_map[i];
691 napi_disable(&q->napi);
696 * Enable NAPI scheduling and interrupt generation for all Rx queues.
698 static void enable_rx(struct adapter *adap)
702 for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
703 struct sge_rspq *q = adap->sge.ingr_map[i];
708 napi_enable(&q->napi);
709 /* 0-increment GTS to start the timer and enable interrupts */
710 t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
711 SEINTARM(q->intr_params) |
712 INGRESSQID(q->cntxt_id));
717 * setup_sge_queues - configure SGE Tx/Rx/response queues
720 * Determines how many sets of SGE queues to use and initializes them.
721 * We support multiple queue sets per port if we have MSI-X, otherwise
722 * just one queue set per port.
724 static int setup_sge_queues(struct adapter *adap)
726 int err, msi_idx, i, j;
727 struct sge *s = &adap->sge;
729 bitmap_zero(s->starving_fl, MAX_EGRQ);
730 bitmap_zero(s->txq_maperr, MAX_EGRQ);
732 if (adap->flags & USING_MSIX)
733 msi_idx = 1; /* vector 0 is for non-queue interrupts */
735 err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
739 msi_idx = -((int)s->intrq.abs_id + 1);
742 err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
743 msi_idx, NULL, fwevtq_handler);
745 freeout: t4_free_sge_resources(adap);
749 for_each_port(adap, i) {
750 struct net_device *dev = adap->port[i];
751 struct port_info *pi = netdev_priv(dev);
752 struct sge_eth_rxq *q = &s->ethrxq[pi->first_qset];
753 struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
755 for (j = 0; j < pi->nqsets; j++, q++) {
758 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
764 memset(&q->stats, 0, sizeof(q->stats));
766 for (j = 0; j < pi->nqsets; j++, t++) {
767 err = t4_sge_alloc_eth_txq(adap, t, dev,
768 netdev_get_tx_queue(dev, j),
769 s->fw_evtq.cntxt_id);
775 j = s->ofldqsets / adap->params.nports; /* ofld queues per channel */
776 for_each_ofldrxq(s, i) {
777 struct sge_ofld_rxq *q = &s->ofldrxq[i];
778 struct net_device *dev = adap->port[i / j];
782 err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, msi_idx,
783 &q->fl, uldrx_handler);
786 memset(&q->stats, 0, sizeof(q->stats));
787 s->ofld_rxq[i] = q->rspq.abs_id;
788 err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], dev,
789 s->fw_evtq.cntxt_id);
794 for_each_rdmarxq(s, i) {
795 struct sge_ofld_rxq *q = &s->rdmarxq[i];
799 err = t4_sge_alloc_rxq(adap, &q->rspq, false, adap->port[i],
800 msi_idx, &q->fl, uldrx_handler);
803 memset(&q->stats, 0, sizeof(q->stats));
804 s->rdma_rxq[i] = q->rspq.abs_id;
807 for_each_port(adap, i) {
809 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
810 * have RDMA queues, and that's the right value.
812 err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
814 s->rdmarxq[i].rspq.cntxt_id);
819 t4_write_reg(adap, MPS_TRC_RSS_CONTROL,
820 RSSCONTROL(netdev2pinfo(adap->port[0])->tx_chan) |
821 QUEUENUMBER(s->ethrxq[0].rspq.abs_id));
826 * Returns 0 if new FW was successfully loaded, a positive errno if a load was
827 * started but failed, and a negative errno if flash load couldn't start.
829 static int upgrade_fw(struct adapter *adap)
833 const struct fw_hdr *hdr;
834 const struct firmware *fw;
835 struct device *dev = adap->pdev_dev;
837 ret = request_firmware(&fw, FW_FNAME, dev);
839 dev_err(dev, "unable to load firmware image " FW_FNAME
840 ", error %d\n", ret);
844 hdr = (const struct fw_hdr *)fw->data;
845 vers = ntohl(hdr->fw_ver);
846 if (FW_HDR_FW_VER_MAJOR_GET(vers) != FW_VERSION_MAJOR) {
847 ret = -EINVAL; /* wrong major version, won't do */
852 * If the flash FW is unusable or we found something newer, load it.
854 if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR ||
855 vers > adap->params.fw_vers) {
856 ret = -t4_load_fw(adap, fw->data, fw->size);
858 dev_info(dev, "firmware upgraded to version %pI4 from "
859 FW_FNAME "\n", &hdr->fw_ver);
861 out: release_firmware(fw);
866 * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
867 * The allocated memory is cleared.
869 void *t4_alloc_mem(size_t size)
871 void *p = kzalloc(size, GFP_KERNEL);
879 * Free memory allocated through alloc_mem().
881 static void t4_free_mem(void *addr)
883 if (is_vmalloc_addr(addr))
889 static inline int is_offload(const struct adapter *adap)
891 return adap->params.offload;
895 * Implementation of ethtool operations.
898 static u32 get_msglevel(struct net_device *dev)
900 return netdev2adap(dev)->msg_enable;
903 static void set_msglevel(struct net_device *dev, u32 val)
905 netdev2adap(dev)->msg_enable = val;
908 static char stats_strings[][ETH_GSTRING_LEN] = {
911 "TxBroadcastFrames ",
912 "TxMulticastFrames ",
920 "TxFrames512To1023 ",
921 "TxFrames1024To1518 ",
922 "TxFrames1519ToMax ",
937 "RxBroadcastFrames ",
938 "RxMulticastFrames ",
952 "RxFrames512To1023 ",
953 "RxFrames1024To1518 ",
954 "RxFrames1519ToMax ",
966 "RxBG0FramesDropped ",
967 "RxBG1FramesDropped ",
968 "RxBG2FramesDropped ",
969 "RxBG3FramesDropped ",
984 static int get_sset_count(struct net_device *dev, int sset)
988 return ARRAY_SIZE(stats_strings);
994 #define T4_REGMAP_SIZE (160 * 1024)
996 static int get_regs_len(struct net_device *dev)
998 return T4_REGMAP_SIZE;
1001 static int get_eeprom_len(struct net_device *dev)
1006 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1008 struct adapter *adapter = netdev2adap(dev);
1010 strcpy(info->driver, KBUILD_MODNAME);
1011 strcpy(info->version, DRV_VERSION);
1012 strcpy(info->bus_info, pci_name(adapter->pdev));
1014 if (!adapter->params.fw_vers)
1015 strcpy(info->fw_version, "N/A");
1017 snprintf(info->fw_version, sizeof(info->fw_version),
1018 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1019 FW_HDR_FW_VER_MAJOR_GET(adapter->params.fw_vers),
1020 FW_HDR_FW_VER_MINOR_GET(adapter->params.fw_vers),
1021 FW_HDR_FW_VER_MICRO_GET(adapter->params.fw_vers),
1022 FW_HDR_FW_VER_BUILD_GET(adapter->params.fw_vers),
1023 FW_HDR_FW_VER_MAJOR_GET(adapter->params.tp_vers),
1024 FW_HDR_FW_VER_MINOR_GET(adapter->params.tp_vers),
1025 FW_HDR_FW_VER_MICRO_GET(adapter->params.tp_vers),
1026 FW_HDR_FW_VER_BUILD_GET(adapter->params.tp_vers));
1029 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
1031 if (stringset == ETH_SS_STATS)
1032 memcpy(data, stats_strings, sizeof(stats_strings));
1036 * port stats maintained per queue of the port. They should be in the same
1037 * order as in stats_strings above.
1039 struct queue_port_stats {
1049 static void collect_sge_port_stats(const struct adapter *adap,
1050 const struct port_info *p, struct queue_port_stats *s)
1053 const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset];
1054 const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset];
1056 memset(s, 0, sizeof(*s));
1057 for (i = 0; i < p->nqsets; i++, rx++, tx++) {
1059 s->tx_csum += tx->tx_cso;
1060 s->rx_csum += rx->stats.rx_cso;
1061 s->vlan_ex += rx->stats.vlan_ex;
1062 s->vlan_ins += tx->vlan_ins;
1063 s->gro_pkts += rx->stats.lro_pkts;
1064 s->gro_merged += rx->stats.lro_merged;
1068 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
1071 struct port_info *pi = netdev_priv(dev);
1072 struct adapter *adapter = pi->adapter;
1074 t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);
1076 data += sizeof(struct port_stats) / sizeof(u64);
1077 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1081 * Return a version number to identify the type of adapter. The scheme is:
1082 * - bits 0..9: chip version
1083 * - bits 10..15: chip revision
1084 * - bits 16..23: register dump version
1086 static inline unsigned int mk_adap_vers(const struct adapter *ap)
1088 return 4 | (ap->params.rev << 10) | (1 << 16);
1091 static void reg_block_dump(struct adapter *ap, void *buf, unsigned int start,
1094 u32 *p = buf + start;
1096 for ( ; start <= end; start += sizeof(u32))
1097 *p++ = t4_read_reg(ap, start);
1100 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
1103 static const unsigned int reg_ranges[] = {
1324 struct adapter *ap = netdev2adap(dev);
1326 regs->version = mk_adap_vers(ap);
1328 memset(buf, 0, T4_REGMAP_SIZE);
1329 for (i = 0; i < ARRAY_SIZE(reg_ranges); i += 2)
1330 reg_block_dump(ap, buf, reg_ranges[i], reg_ranges[i + 1]);
1333 static int restart_autoneg(struct net_device *dev)
1335 struct port_info *p = netdev_priv(dev);
1337 if (!netif_running(dev))
1339 if (p->link_cfg.autoneg != AUTONEG_ENABLE)
1341 t4_restart_aneg(p->adapter, p->adapter->fn, p->tx_chan);
1345 static int identify_port(struct net_device *dev, u32 data)
1347 struct adapter *adap = netdev2adap(dev);
1350 data = 2; /* default to 2 seconds */
1352 return t4_identify_port(adap, adap->fn, netdev2pinfo(dev)->viid,
1356 static unsigned int from_fw_linkcaps(unsigned int type, unsigned int caps)
1360 if (type == FW_PORT_TYPE_BT_SGMII || type == FW_PORT_TYPE_BT_XFI ||
1361 type == FW_PORT_TYPE_BT_XAUI) {
1363 if (caps & FW_PORT_CAP_SPEED_100M)
1364 v |= SUPPORTED_100baseT_Full;
1365 if (caps & FW_PORT_CAP_SPEED_1G)
1366 v |= SUPPORTED_1000baseT_Full;
1367 if (caps & FW_PORT_CAP_SPEED_10G)
1368 v |= SUPPORTED_10000baseT_Full;
1369 } else if (type == FW_PORT_TYPE_KX4 || type == FW_PORT_TYPE_KX) {
1370 v |= SUPPORTED_Backplane;
1371 if (caps & FW_PORT_CAP_SPEED_1G)
1372 v |= SUPPORTED_1000baseKX_Full;
1373 if (caps & FW_PORT_CAP_SPEED_10G)
1374 v |= SUPPORTED_10000baseKX4_Full;
1375 } else if (type == FW_PORT_TYPE_KR)
1376 v |= SUPPORTED_Backplane | SUPPORTED_10000baseKR_Full;
1377 else if (type == FW_PORT_TYPE_BP_AP)
1378 v |= SUPPORTED_Backplane | SUPPORTED_10000baseR_FEC;
1379 else if (type == FW_PORT_TYPE_FIBER_XFI ||
1380 type == FW_PORT_TYPE_FIBER_XAUI || type == FW_PORT_TYPE_SFP)
1381 v |= SUPPORTED_FIBRE;
1383 if (caps & FW_PORT_CAP_ANEG)
1384 v |= SUPPORTED_Autoneg;
1388 static unsigned int to_fw_linkcaps(unsigned int caps)
1392 if (caps & ADVERTISED_100baseT_Full)
1393 v |= FW_PORT_CAP_SPEED_100M;
1394 if (caps & ADVERTISED_1000baseT_Full)
1395 v |= FW_PORT_CAP_SPEED_1G;
1396 if (caps & ADVERTISED_10000baseT_Full)
1397 v |= FW_PORT_CAP_SPEED_10G;
1401 static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1403 const struct port_info *p = netdev_priv(dev);
1405 if (p->port_type == FW_PORT_TYPE_BT_SGMII ||
1406 p->port_type == FW_PORT_TYPE_BT_XFI ||
1407 p->port_type == FW_PORT_TYPE_BT_XAUI)
1408 cmd->port = PORT_TP;
1409 else if (p->port_type == FW_PORT_TYPE_FIBER_XFI ||
1410 p->port_type == FW_PORT_TYPE_FIBER_XAUI)
1411 cmd->port = PORT_FIBRE;
1412 else if (p->port_type == FW_PORT_TYPE_SFP) {
1413 if (p->mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1414 p->mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1415 cmd->port = PORT_DA;
1417 cmd->port = PORT_FIBRE;
1419 cmd->port = PORT_OTHER;
1421 if (p->mdio_addr >= 0) {
1422 cmd->phy_address = p->mdio_addr;
1423 cmd->transceiver = XCVR_EXTERNAL;
1424 cmd->mdio_support = p->port_type == FW_PORT_TYPE_BT_SGMII ?
1425 MDIO_SUPPORTS_C22 : MDIO_SUPPORTS_C45;
1427 cmd->phy_address = 0; /* not really, but no better option */
1428 cmd->transceiver = XCVR_INTERNAL;
1429 cmd->mdio_support = 0;
1432 cmd->supported = from_fw_linkcaps(p->port_type, p->link_cfg.supported);
1433 cmd->advertising = from_fw_linkcaps(p->port_type,
1434 p->link_cfg.advertising);
1435 cmd->speed = netif_carrier_ok(dev) ? p->link_cfg.speed : 0;
1436 cmd->duplex = DUPLEX_FULL;
1437 cmd->autoneg = p->link_cfg.autoneg;
1443 static unsigned int speed_to_caps(int speed)
1445 if (speed == SPEED_100)
1446 return FW_PORT_CAP_SPEED_100M;
1447 if (speed == SPEED_1000)
1448 return FW_PORT_CAP_SPEED_1G;
1449 if (speed == SPEED_10000)
1450 return FW_PORT_CAP_SPEED_10G;
1454 static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1457 struct port_info *p = netdev_priv(dev);
1458 struct link_config *lc = &p->link_cfg;
1460 if (cmd->duplex != DUPLEX_FULL) /* only full-duplex supported */
1463 if (!(lc->supported & FW_PORT_CAP_ANEG)) {
1465 * PHY offers a single speed. See if that's what's
1468 if (cmd->autoneg == AUTONEG_DISABLE &&
1469 (lc->supported & speed_to_caps(cmd->speed)))
1474 if (cmd->autoneg == AUTONEG_DISABLE) {
1475 cap = speed_to_caps(cmd->speed);
1477 if (!(lc->supported & cap) || cmd->speed == SPEED_1000 ||
1478 cmd->speed == SPEED_10000)
1480 lc->requested_speed = cap;
1481 lc->advertising = 0;
1483 cap = to_fw_linkcaps(cmd->advertising);
1484 if (!(lc->supported & cap))
1486 lc->requested_speed = 0;
1487 lc->advertising = cap | FW_PORT_CAP_ANEG;
1489 lc->autoneg = cmd->autoneg;
1491 if (netif_running(dev))
1492 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1497 static void get_pauseparam(struct net_device *dev,
1498 struct ethtool_pauseparam *epause)
1500 struct port_info *p = netdev_priv(dev);
1502 epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1503 epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
1504 epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
1507 static int set_pauseparam(struct net_device *dev,
1508 struct ethtool_pauseparam *epause)
1510 struct port_info *p = netdev_priv(dev);
1511 struct link_config *lc = &p->link_cfg;
1513 if (epause->autoneg == AUTONEG_DISABLE)
1514 lc->requested_fc = 0;
1515 else if (lc->supported & FW_PORT_CAP_ANEG)
1516 lc->requested_fc = PAUSE_AUTONEG;
1520 if (epause->rx_pause)
1521 lc->requested_fc |= PAUSE_RX;
1522 if (epause->tx_pause)
1523 lc->requested_fc |= PAUSE_TX;
1524 if (netif_running(dev))
1525 return t4_link_start(p->adapter, p->adapter->fn, p->tx_chan,
1530 static u32 get_rx_csum(struct net_device *dev)
1532 struct port_info *p = netdev_priv(dev);
1534 return p->rx_offload & RX_CSO;
1537 static int set_rx_csum(struct net_device *dev, u32 data)
1539 struct port_info *p = netdev_priv(dev);
1542 p->rx_offload |= RX_CSO;
1544 p->rx_offload &= ~RX_CSO;
1548 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1550 const struct port_info *pi = netdev_priv(dev);
1551 const struct sge *s = &pi->adapter->sge;
1553 e->rx_max_pending = MAX_RX_BUFFERS;
1554 e->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1555 e->rx_jumbo_max_pending = 0;
1556 e->tx_max_pending = MAX_TXQ_ENTRIES;
1558 e->rx_pending = s->ethrxq[pi->first_qset].fl.size - 8;
1559 e->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1560 e->rx_jumbo_pending = 0;
1561 e->tx_pending = s->ethtxq[pi->first_qset].q.size;
1564 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
1567 const struct port_info *pi = netdev_priv(dev);
1568 struct adapter *adapter = pi->adapter;
1569 struct sge *s = &adapter->sge;
1571 if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending ||
1572 e->tx_pending > MAX_TXQ_ENTRIES ||
1573 e->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1574 e->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1575 e->rx_pending < MIN_FL_ENTRIES || e->tx_pending < MIN_TXQ_ENTRIES)
1578 if (adapter->flags & FULL_INIT_DONE)
1581 for (i = 0; i < pi->nqsets; ++i) {
1582 s->ethtxq[pi->first_qset + i].q.size = e->tx_pending;
1583 s->ethrxq[pi->first_qset + i].fl.size = e->rx_pending + 8;
1584 s->ethrxq[pi->first_qset + i].rspq.size = e->rx_mini_pending;
1589 static int closest_timer(const struct sge *s, int time)
1591 int i, delta, match = 0, min_delta = INT_MAX;
1593 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1594 delta = time - s->timer_val[i];
1597 if (delta < min_delta) {
1605 static int closest_thres(const struct sge *s, int thres)
1607 int i, delta, match = 0, min_delta = INT_MAX;
1609 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1610 delta = thres - s->counter_val[i];
1613 if (delta < min_delta) {
1622 * Return a queue's interrupt hold-off time in us. 0 means no timer.
1624 static unsigned int qtimer_val(const struct adapter *adap,
1625 const struct sge_rspq *q)
1627 unsigned int idx = q->intr_params >> 1;
1629 return idx < SGE_NTIMERS ? adap->sge.timer_val[idx] : 0;
1633 * set_rxq_intr_params - set a queue's interrupt holdoff parameters
1634 * @adap: the adapter
1636 * @us: the hold-off time in us, or 0 to disable timer
1637 * @cnt: the hold-off packet count, or 0 to disable counter
1639 * Sets an Rx queue's interrupt hold-off time and packet count. At least
1640 * one of the two needs to be enabled for the queue to generate interrupts.
1642 static int set_rxq_intr_params(struct adapter *adap, struct sge_rspq *q,
1643 unsigned int us, unsigned int cnt)
1645 if ((us | cnt) == 0)
1652 new_idx = closest_thres(&adap->sge, cnt);
1653 if (q->desc && q->pktcnt_idx != new_idx) {
1654 /* the queue has already been created, update it */
1655 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1656 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1657 FW_PARAMS_PARAM_YZ(q->cntxt_id);
1658 err = t4_set_params(adap, adap->fn, adap->fn, 0, 1, &v,
1663 q->pktcnt_idx = new_idx;
1666 us = us == 0 ? 6 : closest_timer(&adap->sge, us);
1667 q->intr_params = QINTR_TIMER_IDX(us) | (cnt > 0 ? QINTR_CNT_EN : 0);
1671 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1673 const struct port_info *pi = netdev_priv(dev);
1674 struct adapter *adap = pi->adapter;
1676 return set_rxq_intr_params(adap, &adap->sge.ethrxq[pi->first_qset].rspq,
1677 c->rx_coalesce_usecs, c->rx_max_coalesced_frames);
1680 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
1682 const struct port_info *pi = netdev_priv(dev);
1683 const struct adapter *adap = pi->adapter;
1684 const struct sge_rspq *rq = &adap->sge.ethrxq[pi->first_qset].rspq;
1686 c->rx_coalesce_usecs = qtimer_val(adap, rq);
1687 c->rx_max_coalesced_frames = (rq->intr_params & QINTR_CNT_EN) ?
1688 adap->sge.counter_val[rq->pktcnt_idx] : 0;
1693 * eeprom_ptov - translate a physical EEPROM address to virtual
1694 * @phys_addr: the physical EEPROM address
1695 * @fn: the PCI function number
1696 * @sz: size of function-specific area
1698 * Translate a physical EEPROM address to virtual. The first 1K is
1699 * accessed through virtual addresses starting at 31K, the rest is
1700 * accessed through virtual addresses starting at 0.
1702 * The mapping is as follows:
1703 * [0..1K) -> [31K..32K)
1704 * [1K..1K+A) -> [31K-A..31K)
1705 * [1K+A..ES) -> [0..ES-A-1K)
1707 * where A = @fn * @sz, and ES = EEPROM size.
1709 static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz)
1712 if (phys_addr < 1024)
1713 return phys_addr + (31 << 10);
1714 if (phys_addr < 1024 + fn)
1715 return 31744 - fn + phys_addr - 1024;
1716 if (phys_addr < EEPROMSIZE)
1717 return phys_addr - 1024 - fn;
1722 * The next two routines implement eeprom read/write from physical addresses.
1724 static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v)
1726 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1729 vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v);
1730 return vaddr < 0 ? vaddr : 0;
1733 static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v)
1735 int vaddr = eeprom_ptov(phys_addr, adap->fn, EEPROMPFSIZE);
1738 vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v);
1739 return vaddr < 0 ? vaddr : 0;
1742 #define EEPROM_MAGIC 0x38E2F10C
1744 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
1748 struct adapter *adapter = netdev2adap(dev);
1750 u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
1754 e->magic = EEPROM_MAGIC;
1755 for (i = e->offset & ~3; !err && i < e->offset + e->len; i += 4)
1756 err = eeprom_rd_phys(adapter, i, (u32 *)&buf[i]);
1759 memcpy(data, buf + e->offset, e->len);
1764 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
1769 u32 aligned_offset, aligned_len, *p;
1770 struct adapter *adapter = netdev2adap(dev);
1772 if (eeprom->magic != EEPROM_MAGIC)
1775 aligned_offset = eeprom->offset & ~3;
1776 aligned_len = (eeprom->len + (eeprom->offset & 3) + 3) & ~3;
1778 if (adapter->fn > 0) {
1779 u32 start = 1024 + adapter->fn * EEPROMPFSIZE;
1781 if (aligned_offset < start ||
1782 aligned_offset + aligned_len > start + EEPROMPFSIZE)
1786 if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
1788 * RMW possibly needed for first or last words.
1790 buf = kmalloc(aligned_len, GFP_KERNEL);
1793 err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
1794 if (!err && aligned_len > 4)
1795 err = eeprom_rd_phys(adapter,
1796 aligned_offset + aligned_len - 4,
1797 (u32 *)&buf[aligned_len - 4]);
1800 memcpy(buf + (eeprom->offset & 3), data, eeprom->len);
1804 err = t4_seeprom_wp(adapter, false);
1808 for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
1809 err = eeprom_wr_phys(adapter, aligned_offset, *p);
1810 aligned_offset += 4;
1814 err = t4_seeprom_wp(adapter, true);
1821 static int set_flash(struct net_device *netdev, struct ethtool_flash *ef)
1824 const struct firmware *fw;
1825 struct adapter *adap = netdev2adap(netdev);
1827 ef->data[sizeof(ef->data) - 1] = '\0';
1828 ret = request_firmware(&fw, ef->data, adap->pdev_dev);
1832 ret = t4_load_fw(adap, fw->data, fw->size);
1833 release_firmware(fw);
1835 dev_info(adap->pdev_dev, "loaded firmware %s\n", ef->data);
1839 #define WOL_SUPPORTED (WAKE_BCAST | WAKE_MAGIC)
1840 #define BCAST_CRC 0xa0ccc1a6
1842 static void get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1844 wol->supported = WAKE_BCAST | WAKE_MAGIC;
1845 wol->wolopts = netdev2adap(dev)->wol;
1846 memset(&wol->sopass, 0, sizeof(wol->sopass));
1849 static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
1852 struct port_info *pi = netdev_priv(dev);
1854 if (wol->wolopts & ~WOL_SUPPORTED)
1856 t4_wol_magic_enable(pi->adapter, pi->tx_chan,
1857 (wol->wolopts & WAKE_MAGIC) ? dev->dev_addr : NULL);
1858 if (wol->wolopts & WAKE_BCAST) {
1859 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0xfe, ~0ULL,
1862 err = t4_wol_pat_enable(pi->adapter, pi->tx_chan, 1,
1863 ~6ULL, ~0ULL, BCAST_CRC, true);
1865 t4_wol_pat_enable(pi->adapter, pi->tx_chan, 0, 0, 0, 0, false);
1869 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1871 static int set_tso(struct net_device *dev, u32 value)
1874 dev->features |= TSO_FLAGS;
1876 dev->features &= ~TSO_FLAGS;
1880 static int set_flags(struct net_device *dev, u32 flags)
1883 unsigned long old_feat = dev->features;
1885 err = ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH |
1886 ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN);
1890 if ((old_feat ^ dev->features) & NETIF_F_HW_VLAN_RX) {
1891 const struct port_info *pi = netdev_priv(dev);
1893 err = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, -1,
1894 -1, -1, -1, !!(flags & ETH_FLAG_RXVLAN),
1897 dev->features = old_feat;
1902 static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
1904 const struct port_info *pi = netdev_priv(dev);
1905 unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
1907 p->size = pi->rss_size;
1909 p->ring_index[n] = pi->rss[n];
1913 static int set_rss_table(struct net_device *dev,
1914 const struct ethtool_rxfh_indir *p)
1917 struct port_info *pi = netdev_priv(dev);
1919 if (p->size != pi->rss_size)
1921 for (i = 0; i < p->size; i++)
1922 if (p->ring_index[i] >= pi->nqsets)
1924 for (i = 0; i < p->size; i++)
1925 pi->rss[i] = p->ring_index[i];
1926 if (pi->adapter->flags & FULL_INIT_DONE)
1927 return write_rss(pi, pi->rss);
1931 static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
1934 const struct port_info *pi = netdev_priv(dev);
1936 switch (info->cmd) {
1937 case ETHTOOL_GRXFH: {
1938 unsigned int v = pi->rss_mode;
1941 switch (info->flow_type) {
1943 if (v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
1944 info->data = RXH_IP_SRC | RXH_IP_DST |
1945 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1946 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1947 info->data = RXH_IP_SRC | RXH_IP_DST;
1950 if ((v & FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) &&
1951 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1952 info->data = RXH_IP_SRC | RXH_IP_DST |
1953 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1954 else if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1955 info->data = RXH_IP_SRC | RXH_IP_DST;
1958 case AH_ESP_V4_FLOW:
1960 if (v & FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
1961 info->data = RXH_IP_SRC | RXH_IP_DST;
1964 if (v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
1965 info->data = RXH_IP_SRC | RXH_IP_DST |
1966 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1967 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1968 info->data = RXH_IP_SRC | RXH_IP_DST;
1971 if ((v & FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) &&
1972 (v & FW_RSS_VI_CONFIG_CMD_UDPEN))
1973 info->data = RXH_IP_SRC | RXH_IP_DST |
1974 RXH_L4_B_0_1 | RXH_L4_B_2_3;
1975 else if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1976 info->data = RXH_IP_SRC | RXH_IP_DST;
1979 case AH_ESP_V6_FLOW:
1981 if (v & FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
1982 info->data = RXH_IP_SRC | RXH_IP_DST;
1987 case ETHTOOL_GRXRINGS:
1988 info->data = pi->nqsets;
1994 static struct ethtool_ops cxgb_ethtool_ops = {
1995 .get_settings = get_settings,
1996 .set_settings = set_settings,
1997 .get_drvinfo = get_drvinfo,
1998 .get_msglevel = get_msglevel,
1999 .set_msglevel = set_msglevel,
2000 .get_ringparam = get_sge_param,
2001 .set_ringparam = set_sge_param,
2002 .get_coalesce = get_coalesce,
2003 .set_coalesce = set_coalesce,
2004 .get_eeprom_len = get_eeprom_len,
2005 .get_eeprom = get_eeprom,
2006 .set_eeprom = set_eeprom,
2007 .get_pauseparam = get_pauseparam,
2008 .set_pauseparam = set_pauseparam,
2009 .get_rx_csum = get_rx_csum,
2010 .set_rx_csum = set_rx_csum,
2011 .set_tx_csum = ethtool_op_set_tx_ipv6_csum,
2012 .set_sg = ethtool_op_set_sg,
2013 .get_link = ethtool_op_get_link,
2014 .get_strings = get_strings,
2015 .phys_id = identify_port,
2016 .nway_reset = restart_autoneg,
2017 .get_sset_count = get_sset_count,
2018 .get_ethtool_stats = get_stats,
2019 .get_regs_len = get_regs_len,
2020 .get_regs = get_regs,
2024 .set_flags = set_flags,
2025 .get_rxnfc = get_rxnfc,
2026 .get_rxfh_indir = get_rss_table,
2027 .set_rxfh_indir = set_rss_table,
2028 .flash_device = set_flash,
2035 static int mem_open(struct inode *inode, struct file *file)
2037 file->private_data = inode->i_private;
2041 static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
2045 loff_t avail = file->f_path.dentry->d_inode->i_size;
2046 unsigned int mem = (uintptr_t)file->private_data & 3;
2047 struct adapter *adap = file->private_data - mem;
2053 if (count > avail - pos)
2054 count = avail - pos;
2062 ret = t4_mc_read(adap, pos, data, NULL);
2064 ret = t4_edc_read(adap, mem, pos, data, NULL);
2068 ofst = pos % sizeof(data);
2069 len = min(count, sizeof(data) - ofst);
2070 if (copy_to_user(buf, (u8 *)data + ofst, len))
2077 count = pos - *ppos;
2082 static const struct file_operations mem_debugfs_fops = {
2083 .owner = THIS_MODULE,
2086 .llseek = default_llseek,
2089 static void __devinit add_debugfs_mem(struct adapter *adap, const char *name,
2090 unsigned int idx, unsigned int size_mb)
2094 de = debugfs_create_file(name, S_IRUSR, adap->debugfs_root,
2095 (void *)adap + idx, &mem_debugfs_fops);
2096 if (de && de->d_inode)
2097 de->d_inode->i_size = size_mb << 20;
2100 static int __devinit setup_debugfs(struct adapter *adap)
2104 if (IS_ERR_OR_NULL(adap->debugfs_root))
2107 i = t4_read_reg(adap, MA_TARGET_MEM_ENABLE);
2108 if (i & EDRAM0_ENABLE)
2109 add_debugfs_mem(adap, "edc0", MEM_EDC0, 5);
2110 if (i & EDRAM1_ENABLE)
2111 add_debugfs_mem(adap, "edc1", MEM_EDC1, 5);
2112 if (i & EXT_MEM_ENABLE)
2113 add_debugfs_mem(adap, "mc", MEM_MC,
2114 EXT_MEM_SIZE_GET(t4_read_reg(adap, MA_EXT_MEMORY_BAR)));
2116 debugfs_create_file("l2t", S_IRUSR, adap->debugfs_root, adap,
2122 * upper-layer driver support
2126 * Allocate an active-open TID and set it to the supplied value.
2128 int cxgb4_alloc_atid(struct tid_info *t, void *data)
2132 spin_lock_bh(&t->atid_lock);
2134 union aopen_entry *p = t->afree;
2136 atid = p - t->atid_tab;
2141 spin_unlock_bh(&t->atid_lock);
2144 EXPORT_SYMBOL(cxgb4_alloc_atid);
2147 * Release an active-open TID.
2149 void cxgb4_free_atid(struct tid_info *t, unsigned int atid)
2151 union aopen_entry *p = &t->atid_tab[atid];
2153 spin_lock_bh(&t->atid_lock);
2157 spin_unlock_bh(&t->atid_lock);
2159 EXPORT_SYMBOL(cxgb4_free_atid);
2162 * Allocate a server TID and set it to the supplied value.
2164 int cxgb4_alloc_stid(struct tid_info *t, int family, void *data)
2168 spin_lock_bh(&t->stid_lock);
2169 if (family == PF_INET) {
2170 stid = find_first_zero_bit(t->stid_bmap, t->nstids);
2171 if (stid < t->nstids)
2172 __set_bit(stid, t->stid_bmap);
2176 stid = bitmap_find_free_region(t->stid_bmap, t->nstids, 2);
2181 t->stid_tab[stid].data = data;
2182 stid += t->stid_base;
2185 spin_unlock_bh(&t->stid_lock);
2188 EXPORT_SYMBOL(cxgb4_alloc_stid);
2191 * Release a server TID.
2193 void cxgb4_free_stid(struct tid_info *t, unsigned int stid, int family)
2195 stid -= t->stid_base;
2196 spin_lock_bh(&t->stid_lock);
2197 if (family == PF_INET)
2198 __clear_bit(stid, t->stid_bmap);
2200 bitmap_release_region(t->stid_bmap, stid, 2);
2201 t->stid_tab[stid].data = NULL;
2203 spin_unlock_bh(&t->stid_lock);
2205 EXPORT_SYMBOL(cxgb4_free_stid);
2208 * Populate a TID_RELEASE WR. Caller must properly size the skb.
2210 static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
2213 struct cpl_tid_release *req;
2215 set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
2216 req = (struct cpl_tid_release *)__skb_put(skb, sizeof(*req));
2217 INIT_TP_WR(req, tid);
2218 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
2222 * Queue a TID release request and if necessary schedule a work queue to
2225 static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
2228 void **p = &t->tid_tab[tid];
2229 struct adapter *adap = container_of(t, struct adapter, tids);
2231 spin_lock_bh(&adap->tid_release_lock);
2232 *p = adap->tid_release_head;
2233 /* Low 2 bits encode the Tx channel number */
2234 adap->tid_release_head = (void **)((uintptr_t)p | chan);
2235 if (!adap->tid_release_task_busy) {
2236 adap->tid_release_task_busy = true;
2237 schedule_work(&adap->tid_release_task);
2239 spin_unlock_bh(&adap->tid_release_lock);
2243 * Process the list of pending TID release requests.
2245 static void process_tid_release_list(struct work_struct *work)
2247 struct sk_buff *skb;
2248 struct adapter *adap;
2250 adap = container_of(work, struct adapter, tid_release_task);
2252 spin_lock_bh(&adap->tid_release_lock);
2253 while (adap->tid_release_head) {
2254 void **p = adap->tid_release_head;
2255 unsigned int chan = (uintptr_t)p & 3;
2256 p = (void *)p - chan;
2258 adap->tid_release_head = *p;
2260 spin_unlock_bh(&adap->tid_release_lock);
2262 while (!(skb = alloc_skb(sizeof(struct cpl_tid_release),
2264 schedule_timeout_uninterruptible(1);
2266 mk_tid_release(skb, chan, p - adap->tids.tid_tab);
2267 t4_ofld_send(adap, skb);
2268 spin_lock_bh(&adap->tid_release_lock);
2270 adap->tid_release_task_busy = false;
2271 spin_unlock_bh(&adap->tid_release_lock);
2275 * Release a TID and inform HW. If we are unable to allocate the release
2276 * message we defer to a work queue.
2278 void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid)
2281 struct sk_buff *skb;
2282 struct adapter *adap = container_of(t, struct adapter, tids);
2284 old = t->tid_tab[tid];
2285 skb = alloc_skb(sizeof(struct cpl_tid_release), GFP_ATOMIC);
2287 t->tid_tab[tid] = NULL;
2288 mk_tid_release(skb, chan, tid);
2289 t4_ofld_send(adap, skb);
2291 cxgb4_queue_tid_release(t, chan, tid);
2293 atomic_dec(&t->tids_in_use);
2295 EXPORT_SYMBOL(cxgb4_remove_tid);
2298 * Allocate and initialize the TID tables. Returns 0 on success.
2300 static int tid_init(struct tid_info *t)
2303 unsigned int natids = t->natids;
2305 size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) +
2306 t->nstids * sizeof(*t->stid_tab) +
2307 BITS_TO_LONGS(t->nstids) * sizeof(long);
2308 t->tid_tab = t4_alloc_mem(size);
2312 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
2313 t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
2314 t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids];
2315 spin_lock_init(&t->stid_lock);
2316 spin_lock_init(&t->atid_lock);
2318 t->stids_in_use = 0;
2320 t->atids_in_use = 0;
2321 atomic_set(&t->tids_in_use, 0);
2323 /* Setup the free list for atid_tab and clear the stid bitmap. */
2326 t->atid_tab[natids - 1].next = &t->atid_tab[natids];
2327 t->afree = t->atid_tab;
2329 bitmap_zero(t->stid_bmap, t->nstids);
2334 * cxgb4_create_server - create an IP server
2336 * @stid: the server TID
2337 * @sip: local IP address to bind server to
2338 * @sport: the server's TCP port
2339 * @queue: queue to direct messages from this server to
2341 * Create an IP server for the given port and address.
2342 * Returns <0 on error and one of the %NET_XMIT_* values on success.
2344 int cxgb4_create_server(const struct net_device *dev, unsigned int stid,
2345 __be32 sip, __be16 sport, unsigned int queue)
2348 struct sk_buff *skb;
2349 struct adapter *adap;
2350 struct cpl_pass_open_req *req;
2352 skb = alloc_skb(sizeof(*req), GFP_KERNEL);
2356 adap = netdev2adap(dev);
2357 req = (struct cpl_pass_open_req *)__skb_put(skb, sizeof(*req));
2359 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
2360 req->local_port = sport;
2361 req->peer_port = htons(0);
2362 req->local_ip = sip;
2363 req->peer_ip = htonl(0);
2364 chan = rxq_to_chan(&adap->sge, queue);
2365 req->opt0 = cpu_to_be64(TX_CHAN(chan));
2366 req->opt1 = cpu_to_be64(CONN_POLICY_ASK |
2367 SYN_RSS_ENABLE | SYN_RSS_QUEUE(queue));
2368 return t4_mgmt_tx(adap, skb);
2370 EXPORT_SYMBOL(cxgb4_create_server);
2373 * cxgb4_best_mtu - find the entry in the MTU table closest to an MTU
2374 * @mtus: the HW MTU table
2375 * @mtu: the target MTU
2376 * @idx: index of selected entry in the MTU table
2378 * Returns the index and the value in the HW MTU table that is closest to
2379 * but does not exceed @mtu, unless @mtu is smaller than any value in the
2380 * table, in which case that smallest available value is selected.
2382 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
2387 while (i < NMTUS - 1 && mtus[i + 1] <= mtu)
2393 EXPORT_SYMBOL(cxgb4_best_mtu);
2396 * cxgb4_port_chan - get the HW channel of a port
2397 * @dev: the net device for the port
2399 * Return the HW Tx channel of the given port.
2401 unsigned int cxgb4_port_chan(const struct net_device *dev)
2403 return netdev2pinfo(dev)->tx_chan;
2405 EXPORT_SYMBOL(cxgb4_port_chan);
2408 * cxgb4_port_viid - get the VI id of a port
2409 * @dev: the net device for the port
2411 * Return the VI id of the given port.
2413 unsigned int cxgb4_port_viid(const struct net_device *dev)
2415 return netdev2pinfo(dev)->viid;
2417 EXPORT_SYMBOL(cxgb4_port_viid);
2420 * cxgb4_port_idx - get the index of a port
2421 * @dev: the net device for the port
2423 * Return the index of the given port.
2425 unsigned int cxgb4_port_idx(const struct net_device *dev)
2427 return netdev2pinfo(dev)->port_id;
2429 EXPORT_SYMBOL(cxgb4_port_idx);
2431 void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
2432 struct tp_tcp_stats *v6)
2434 struct adapter *adap = pci_get_drvdata(pdev);
2436 spin_lock(&adap->stats_lock);
2437 t4_tp_get_tcp_stats(adap, v4, v6);
2438 spin_unlock(&adap->stats_lock);
2440 EXPORT_SYMBOL(cxgb4_get_tcp_stats);
2442 void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
2443 const unsigned int *pgsz_order)
2445 struct adapter *adap = netdev2adap(dev);
2447 t4_write_reg(adap, ULP_RX_ISCSI_TAGMASK, tag_mask);
2448 t4_write_reg(adap, ULP_RX_ISCSI_PSZ, HPZ0(pgsz_order[0]) |
2449 HPZ1(pgsz_order[1]) | HPZ2(pgsz_order[2]) |
2450 HPZ3(pgsz_order[3]));
2452 EXPORT_SYMBOL(cxgb4_iscsi_init);
2454 static struct pci_driver cxgb4_driver;
2456 static void check_neigh_update(struct neighbour *neigh)
2458 const struct device *parent;
2459 const struct net_device *netdev = neigh->dev;
2461 if (netdev->priv_flags & IFF_802_1Q_VLAN)
2462 netdev = vlan_dev_real_dev(netdev);
2463 parent = netdev->dev.parent;
2464 if (parent && parent->driver == &cxgb4_driver.driver)
2465 t4_l2t_update(dev_get_drvdata(parent), neigh);
2468 static int netevent_cb(struct notifier_block *nb, unsigned long event,
2472 case NETEVENT_NEIGH_UPDATE:
2473 check_neigh_update(data);
2475 case NETEVENT_PMTU_UPDATE:
2476 case NETEVENT_REDIRECT:
2483 static bool netevent_registered;
2484 static struct notifier_block cxgb4_netevent_nb = {
2485 .notifier_call = netevent_cb
2488 static void uld_attach(struct adapter *adap, unsigned int uld)
2491 struct cxgb4_lld_info lli;
2493 lli.pdev = adap->pdev;
2494 lli.l2t = adap->l2t;
2495 lli.tids = &adap->tids;
2496 lli.ports = adap->port;
2497 lli.vr = &adap->vres;
2498 lli.mtus = adap->params.mtus;
2499 if (uld == CXGB4_ULD_RDMA) {
2500 lli.rxq_ids = adap->sge.rdma_rxq;
2501 lli.nrxq = adap->sge.rdmaqs;
2502 } else if (uld == CXGB4_ULD_ISCSI) {
2503 lli.rxq_ids = adap->sge.ofld_rxq;
2504 lli.nrxq = adap->sge.ofldqsets;
2506 lli.ntxq = adap->sge.ofldqsets;
2507 lli.nchan = adap->params.nports;
2508 lli.nports = adap->params.nports;
2509 lli.wr_cred = adap->params.ofldq_wr_cred;
2510 lli.adapter_type = adap->params.rev;
2511 lli.iscsi_iolen = MAXRXDATA_GET(t4_read_reg(adap, TP_PARA_REG2));
2512 lli.udb_density = 1 << QUEUESPERPAGEPF0_GET(
2513 t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF) >>
2515 lli.ucq_density = 1 << QUEUESPERPAGEPF0_GET(
2516 t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF) >>
2518 lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
2519 lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
2520 lli.fw_vers = adap->params.fw_vers;
2522 handle = ulds[uld].add(&lli);
2523 if (IS_ERR(handle)) {
2524 dev_warn(adap->pdev_dev,
2525 "could not attach to the %s driver, error %ld\n",
2526 uld_str[uld], PTR_ERR(handle));
2530 adap->uld_handle[uld] = handle;
2532 if (!netevent_registered) {
2533 register_netevent_notifier(&cxgb4_netevent_nb);
2534 netevent_registered = true;
2537 if (adap->flags & FULL_INIT_DONE)
2538 ulds[uld].state_change(handle, CXGB4_STATE_UP);
2541 static void attach_ulds(struct adapter *adap)
2545 mutex_lock(&uld_mutex);
2546 list_add_tail(&adap->list_node, &adapter_list);
2547 for (i = 0; i < CXGB4_ULD_MAX; i++)
2549 uld_attach(adap, i);
2550 mutex_unlock(&uld_mutex);
2553 static void detach_ulds(struct adapter *adap)
2557 mutex_lock(&uld_mutex);
2558 list_del(&adap->list_node);
2559 for (i = 0; i < CXGB4_ULD_MAX; i++)
2560 if (adap->uld_handle[i]) {
2561 ulds[i].state_change(adap->uld_handle[i],
2562 CXGB4_STATE_DETACH);
2563 adap->uld_handle[i] = NULL;
2565 if (netevent_registered && list_empty(&adapter_list)) {
2566 unregister_netevent_notifier(&cxgb4_netevent_nb);
2567 netevent_registered = false;
2569 mutex_unlock(&uld_mutex);
2572 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
2576 mutex_lock(&uld_mutex);
2577 for (i = 0; i < CXGB4_ULD_MAX; i++)
2578 if (adap->uld_handle[i])
2579 ulds[i].state_change(adap->uld_handle[i], new_state);
2580 mutex_unlock(&uld_mutex);
2584 * cxgb4_register_uld - register an upper-layer driver
2585 * @type: the ULD type
2586 * @p: the ULD methods
2588 * Registers an upper-layer driver with this driver and notifies the ULD
2589 * about any presently available devices that support its type. Returns
2590 * %-EBUSY if a ULD of the same type is already registered.
2592 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
2595 struct adapter *adap;
2597 if (type >= CXGB4_ULD_MAX)
2599 mutex_lock(&uld_mutex);
2600 if (ulds[type].add) {
2605 list_for_each_entry(adap, &adapter_list, list_node)
2606 uld_attach(adap, type);
2607 out: mutex_unlock(&uld_mutex);
2610 EXPORT_SYMBOL(cxgb4_register_uld);
2613 * cxgb4_unregister_uld - unregister an upper-layer driver
2614 * @type: the ULD type
2616 * Unregisters an existing upper-layer driver.
2618 int cxgb4_unregister_uld(enum cxgb4_uld type)
2620 struct adapter *adap;
2622 if (type >= CXGB4_ULD_MAX)
2624 mutex_lock(&uld_mutex);
2625 list_for_each_entry(adap, &adapter_list, list_node)
2626 adap->uld_handle[type] = NULL;
2627 ulds[type].add = NULL;
2628 mutex_unlock(&uld_mutex);
2631 EXPORT_SYMBOL(cxgb4_unregister_uld);
2634 * cxgb_up - enable the adapter
2635 * @adap: adapter being enabled
2637 * Called when the first port is enabled, this function performs the
2638 * actions necessary to make an adapter operational, such as completing
2639 * the initialization of HW modules, and enabling interrupts.
2641 * Must be called with the rtnl lock held.
2643 static int cxgb_up(struct adapter *adap)
2647 err = setup_sge_queues(adap);
2650 err = setup_rss(adap);
2654 if (adap->flags & USING_MSIX) {
2655 name_msix_vecs(adap);
2656 err = request_irq(adap->msix_info[0].vec, t4_nondata_intr, 0,
2657 adap->msix_info[0].desc, adap);
2661 err = request_msix_queue_irqs(adap);
2663 free_irq(adap->msix_info[0].vec, adap);
2667 err = request_irq(adap->pdev->irq, t4_intr_handler(adap),
2668 (adap->flags & USING_MSI) ? 0 : IRQF_SHARED,
2675 t4_intr_enable(adap);
2676 adap->flags |= FULL_INIT_DONE;
2677 notify_ulds(adap, CXGB4_STATE_UP);
2681 dev_err(adap->pdev_dev, "request_irq failed, err %d\n", err);
2683 t4_free_sge_resources(adap);
2687 static void cxgb_down(struct adapter *adapter)
2689 t4_intr_disable(adapter);
2690 cancel_work_sync(&adapter->tid_release_task);
2691 adapter->tid_release_task_busy = false;
2692 adapter->tid_release_head = NULL;
2694 if (adapter->flags & USING_MSIX) {
2695 free_msix_queue_irqs(adapter);
2696 free_irq(adapter->msix_info[0].vec, adapter);
2698 free_irq(adapter->pdev->irq, adapter);
2699 quiesce_rx(adapter);
2700 t4_sge_stop(adapter);
2701 t4_free_sge_resources(adapter);
2702 adapter->flags &= ~FULL_INIT_DONE;
2706 * net_device operations
2708 static int cxgb_open(struct net_device *dev)
2711 struct port_info *pi = netdev_priv(dev);
2712 struct adapter *adapter = pi->adapter;
2714 if (!(adapter->flags & FULL_INIT_DONE)) {
2715 err = cxgb_up(adapter);
2720 netif_set_real_num_tx_queues(dev, pi->nqsets);
2721 err = netif_set_real_num_rx_queues(dev, pi->nqsets);
2724 err = link_start(dev);
2726 netif_tx_start_all_queues(dev);
2730 static int cxgb_close(struct net_device *dev)
2732 struct port_info *pi = netdev_priv(dev);
2733 struct adapter *adapter = pi->adapter;
2735 netif_tx_stop_all_queues(dev);
2736 netif_carrier_off(dev);
2737 return t4_enable_vi(adapter, adapter->fn, pi->viid, false, false);
2740 static struct rtnl_link_stats64 *cxgb_get_stats(struct net_device *dev,
2741 struct rtnl_link_stats64 *ns)
2743 struct port_stats stats;
2744 struct port_info *p = netdev_priv(dev);
2745 struct adapter *adapter = p->adapter;
2747 spin_lock(&adapter->stats_lock);
2748 t4_get_port_stats(adapter, p->tx_chan, &stats);
2749 spin_unlock(&adapter->stats_lock);
2751 ns->tx_bytes = stats.tx_octets;
2752 ns->tx_packets = stats.tx_frames;
2753 ns->rx_bytes = stats.rx_octets;
2754 ns->rx_packets = stats.rx_frames;
2755 ns->multicast = stats.rx_mcast_frames;
2757 /* detailed rx_errors */
2758 ns->rx_length_errors = stats.rx_jabber + stats.rx_too_long +
2760 ns->rx_over_errors = 0;
2761 ns->rx_crc_errors = stats.rx_fcs_err;
2762 ns->rx_frame_errors = stats.rx_symbol_err;
2763 ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 +
2764 stats.rx_ovflow2 + stats.rx_ovflow3 +
2765 stats.rx_trunc0 + stats.rx_trunc1 +
2766 stats.rx_trunc2 + stats.rx_trunc3;
2767 ns->rx_missed_errors = 0;
2769 /* detailed tx_errors */
2770 ns->tx_aborted_errors = 0;
2771 ns->tx_carrier_errors = 0;
2772 ns->tx_fifo_errors = 0;
2773 ns->tx_heartbeat_errors = 0;
2774 ns->tx_window_errors = 0;
2776 ns->tx_errors = stats.tx_error_frames;
2777 ns->rx_errors = stats.rx_symbol_err + stats.rx_fcs_err +
2778 ns->rx_length_errors + stats.rx_len_err + ns->rx_fifo_errors;
2782 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
2785 int ret = 0, prtad, devad;
2786 struct port_info *pi = netdev_priv(dev);
2787 struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data;
2791 if (pi->mdio_addr < 0)
2793 data->phy_id = pi->mdio_addr;
2797 if (mdio_phy_id_is_c45(data->phy_id)) {
2798 prtad = mdio_phy_id_prtad(data->phy_id);
2799 devad = mdio_phy_id_devad(data->phy_id);
2800 } else if (data->phy_id < 32) {
2801 prtad = data->phy_id;
2803 data->reg_num &= 0x1f;
2807 mbox = pi->adapter->fn;
2808 if (cmd == SIOCGMIIREG)
2809 ret = t4_mdio_rd(pi->adapter, mbox, prtad, devad,
2810 data->reg_num, &data->val_out);
2812 ret = t4_mdio_wr(pi->adapter, mbox, prtad, devad,
2813 data->reg_num, data->val_in);
2821 static void cxgb_set_rxmode(struct net_device *dev)
2823 /* unfortunately we can't return errors to the stack */
2824 set_rxmode(dev, -1, false);
2827 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
2830 struct port_info *pi = netdev_priv(dev);
2832 if (new_mtu < 81 || new_mtu > MAX_MTU) /* accommodate SACK */
2834 ret = t4_set_rxmode(pi->adapter, pi->adapter->fn, pi->viid, new_mtu, -1,
2841 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
2844 struct sockaddr *addr = p;
2845 struct port_info *pi = netdev_priv(dev);
2847 if (!is_valid_ether_addr(addr->sa_data))
2850 ret = t4_change_mac(pi->adapter, pi->adapter->fn, pi->viid,
2851 pi->xact_addr_filt, addr->sa_data, true, true);
2855 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
2856 pi->xact_addr_filt = ret;
2860 #ifdef CONFIG_NET_POLL_CONTROLLER
2861 static void cxgb_netpoll(struct net_device *dev)
2863 struct port_info *pi = netdev_priv(dev);
2864 struct adapter *adap = pi->adapter;
2866 if (adap->flags & USING_MSIX) {
2868 struct sge_eth_rxq *rx = &adap->sge.ethrxq[pi->first_qset];
2870 for (i = pi->nqsets; i; i--, rx++)
2871 t4_sge_intr_msix(0, &rx->rspq);
2873 t4_intr_handler(adap)(0, adap);
2877 static const struct net_device_ops cxgb4_netdev_ops = {
2878 .ndo_open = cxgb_open,
2879 .ndo_stop = cxgb_close,
2880 .ndo_start_xmit = t4_eth_xmit,
2881 .ndo_get_stats64 = cxgb_get_stats,
2882 .ndo_set_rx_mode = cxgb_set_rxmode,
2883 .ndo_set_mac_address = cxgb_set_mac_addr,
2884 .ndo_validate_addr = eth_validate_addr,
2885 .ndo_do_ioctl = cxgb_ioctl,
2886 .ndo_change_mtu = cxgb_change_mtu,
2887 #ifdef CONFIG_NET_POLL_CONTROLLER
2888 .ndo_poll_controller = cxgb_netpoll,
2892 void t4_fatal_err(struct adapter *adap)
2894 t4_set_reg_field(adap, SGE_CONTROL, GLOBALENABLE, 0);
2895 t4_intr_disable(adap);
2896 dev_alert(adap->pdev_dev, "encountered fatal error, adapter stopped\n");
2899 static void setup_memwin(struct adapter *adap)
2903 bar0 = pci_resource_start(adap->pdev, 0); /* truncation intentional */
2904 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 0),
2905 (bar0 + MEMWIN0_BASE) | BIR(0) |
2906 WINDOW(ilog2(MEMWIN0_APERTURE) - 10));
2907 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 1),
2908 (bar0 + MEMWIN1_BASE) | BIR(0) |
2909 WINDOW(ilog2(MEMWIN1_APERTURE) - 10));
2910 t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2),
2911 (bar0 + MEMWIN2_BASE) | BIR(0) |
2912 WINDOW(ilog2(MEMWIN2_APERTURE) - 10));
2913 if (adap->vres.ocq.size) {
2914 unsigned int start, sz_kb;
2916 start = pci_resource_start(adap->pdev, 2) +
2917 OCQ_WIN_OFFSET(adap->pdev, &adap->vres);
2918 sz_kb = roundup_pow_of_two(adap->vres.ocq.size) >> 10;
2920 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 3),
2921 start | BIR(1) | WINDOW(ilog2(sz_kb)));
2923 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3),
2924 adap->vres.ocq.start);
2926 PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET, 3));
2930 static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
2935 /* get device capabilities */
2936 memset(c, 0, sizeof(*c));
2937 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2938 FW_CMD_REQUEST | FW_CMD_READ);
2939 c->retval_len16 = htonl(FW_LEN16(*c));
2940 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
2944 /* select capabilities we'll be using */
2945 if (c->niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) {
2947 c->niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM);
2949 c->niccaps = htons(FW_CAPS_CONFIG_NIC_VM);
2950 } else if (vf_acls) {
2951 dev_err(adap->pdev_dev, "virtualization ACLs not supported");
2954 c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
2955 FW_CMD_REQUEST | FW_CMD_WRITE);
2956 ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), NULL);
2960 ret = t4_config_glbl_rss(adap, adap->fn,
2961 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL,
2962 FW_RSS_GLB_CONFIG_CMD_TNLMAPEN |
2963 FW_RSS_GLB_CONFIG_CMD_TNLALLLKP);
2967 ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
2968 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
2974 /* tweak some settings */
2975 t4_write_reg(adap, TP_SHIFT_CNT, 0x64f8849);
2976 t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(PAGE_SHIFT - 12));
2977 t4_write_reg(adap, TP_PIO_ADDR, TP_INGRESS_CONFIG);
2978 v = t4_read_reg(adap, TP_PIO_DATA);
2979 t4_write_reg(adap, TP_PIO_DATA, v & ~CSUM_HAS_PSEUDO_HDR);
2981 /* get basic stuff going */
2982 return t4_early_init(adap, adap->fn);
2986 * Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
2988 #define MAX_ATIDS 8192U
2991 * Phase 0 of initialization: contact FW, obtain config, perform basic init.
2993 static int adap_init0(struct adapter *adap)
2997 enum dev_state state;
2998 u32 params[7], val[7];
2999 struct fw_caps_config_cmd c;
3001 ret = t4_check_fw_version(adap);
3002 if (ret == -EINVAL || ret > 0) {
3003 if (upgrade_fw(adap) >= 0) /* recache FW version */
3004 ret = t4_check_fw_version(adap);
3009 /* contact FW, request master */
3010 ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state);
3012 dev_err(adap->pdev_dev, "could not connect to FW, error %d\n",
3018 ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST);
3022 for (v = 0; v < SGE_NTIMERS - 1; v++)
3023 adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL);
3024 adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL;
3025 adap->sge.counter_val[0] = 1;
3026 for (v = 1; v < SGE_NCOUNTERS; v++)
3027 adap->sge.counter_val[v] = min(intr_cnt[v - 1],
3029 #define FW_PARAM_DEV(param) \
3030 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
3031 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
3033 params[0] = FW_PARAM_DEV(CCLK);
3034 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val);
3037 adap->params.vpd.cclk = val[0];
3039 ret = adap_init1(adap, &c);
3043 #define FW_PARAM_PFVF(param) \
3044 (FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
3045 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \
3046 FW_PARAMS_PARAM_Y(adap->fn))
3048 params[0] = FW_PARAM_DEV(PORTVEC);
3049 params[1] = FW_PARAM_PFVF(L2T_START);
3050 params[2] = FW_PARAM_PFVF(L2T_END);
3051 params[3] = FW_PARAM_PFVF(FILTER_START);
3052 params[4] = FW_PARAM_PFVF(FILTER_END);
3053 params[5] = FW_PARAM_PFVF(IQFLINT_START);
3054 params[6] = FW_PARAM_PFVF(EQ_START);
3055 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 7, params, val);
3059 adap->tids.ftid_base = val[3];
3060 adap->tids.nftids = val[4] - val[3] + 1;
3061 adap->sge.ingr_start = val[5];
3062 adap->sge.egr_start = val[6];
3065 /* query offload-related parameters */
3066 params[0] = FW_PARAM_DEV(NTID);
3067 params[1] = FW_PARAM_PFVF(SERVER_START);
3068 params[2] = FW_PARAM_PFVF(SERVER_END);
3069 params[3] = FW_PARAM_PFVF(TDDP_START);
3070 params[4] = FW_PARAM_PFVF(TDDP_END);
3071 params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
3072 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3076 adap->tids.ntids = val[0];
3077 adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS);
3078 adap->tids.stid_base = val[1];
3079 adap->tids.nstids = val[2] - val[1] + 1;
3080 adap->vres.ddp.start = val[3];
3081 adap->vres.ddp.size = val[4] - val[3] + 1;
3082 adap->params.ofldq_wr_cred = val[5];
3083 adap->params.offload = 1;
3086 params[0] = FW_PARAM_PFVF(STAG_START);
3087 params[1] = FW_PARAM_PFVF(STAG_END);
3088 params[2] = FW_PARAM_PFVF(RQ_START);
3089 params[3] = FW_PARAM_PFVF(RQ_END);
3090 params[4] = FW_PARAM_PFVF(PBL_START);
3091 params[5] = FW_PARAM_PFVF(PBL_END);
3092 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3096 adap->vres.stag.start = val[0];
3097 adap->vres.stag.size = val[1] - val[0] + 1;
3098 adap->vres.rq.start = val[2];
3099 adap->vres.rq.size = val[3] - val[2] + 1;
3100 adap->vres.pbl.start = val[4];
3101 adap->vres.pbl.size = val[5] - val[4] + 1;
3103 params[0] = FW_PARAM_PFVF(SQRQ_START);
3104 params[1] = FW_PARAM_PFVF(SQRQ_END);
3105 params[2] = FW_PARAM_PFVF(CQ_START);
3106 params[3] = FW_PARAM_PFVF(CQ_END);
3107 params[4] = FW_PARAM_PFVF(OCQ_START);
3108 params[5] = FW_PARAM_PFVF(OCQ_END);
3109 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params,
3113 adap->vres.qp.start = val[0];
3114 adap->vres.qp.size = val[1] - val[0] + 1;
3115 adap->vres.cq.start = val[2];
3116 adap->vres.cq.size = val[3] - val[2] + 1;
3117 adap->vres.ocq.start = val[4];
3118 adap->vres.ocq.size = val[5] - val[4] + 1;
3121 params[0] = FW_PARAM_PFVF(ISCSI_START);
3122 params[1] = FW_PARAM_PFVF(ISCSI_END);
3123 ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params,
3127 adap->vres.iscsi.start = val[0];
3128 adap->vres.iscsi.size = val[1] - val[0] + 1;
3130 #undef FW_PARAM_PFVF
3133 adap->params.nports = hweight32(port_vec);
3134 adap->params.portvec = port_vec;
3135 adap->flags |= FW_OK;
3137 /* These are finalized by FW initialization, load their values now */
3138 v = t4_read_reg(adap, TP_TIMER_RESOLUTION);
3139 adap->params.tp.tre = TIMERRESOLUTION_GET(v);
3140 t4_read_mtu_tbl(adap, adap->params.mtus, NULL);
3141 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3142 adap->params.b_wnd);
3144 #ifdef CONFIG_PCI_IOV
3146 * Provision resource limits for Virtual Functions. We currently
3147 * grant them all the same static resource limits except for the Port
3148 * Access Rights Mask which we're assigning based on the PF. All of
3149 * the static provisioning stuff for both the PF and VF really needs
3150 * to be managed in a persistent manner for each device which the
3151 * firmware controls.
3156 for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) {
3157 if (num_vf[pf] <= 0)
3160 /* VF numbering starts at 1! */
3161 for (vf = 1; vf <= num_vf[pf]; vf++) {
3162 ret = t4_cfg_pfvf(adap, adap->fn, pf, vf,
3163 VFRES_NEQ, VFRES_NETHCTRL,
3164 VFRES_NIQFLINT, VFRES_NIQ,
3165 VFRES_TC, VFRES_NVI,
3166 FW_PFVF_CMD_CMASK_MASK,
3167 pfvfres_pmask(adap, pf, vf),
3169 VFRES_R_CAPS, VFRES_WX_CAPS);
3171 dev_warn(adap->pdev_dev, "failed to "
3172 "provision pf/vf=%d/%d; "
3173 "err=%d\n", pf, vf, ret);
3183 * If a command timed out or failed with EIO FW does not operate within
3184 * its spec or something catastrophic happened to HW/FW, stop issuing
3187 bye: if (ret != -ETIMEDOUT && ret != -EIO)
3188 t4_fw_bye(adap, adap->fn);
3194 static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
3195 pci_channel_state_t state)
3198 struct adapter *adap = pci_get_drvdata(pdev);
3204 adap->flags &= ~FW_OK;
3205 notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
3206 for_each_port(adap, i) {
3207 struct net_device *dev = adap->port[i];
3209 netif_device_detach(dev);
3210 netif_carrier_off(dev);
3212 if (adap->flags & FULL_INIT_DONE)
3215 pci_disable_device(pdev);
3216 out: return state == pci_channel_io_perm_failure ?
3217 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
3220 static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
3223 struct fw_caps_config_cmd c;
3224 struct adapter *adap = pci_get_drvdata(pdev);
3227 pci_restore_state(pdev);
3228 pci_save_state(pdev);
3229 return PCI_ERS_RESULT_RECOVERED;
3232 if (pci_enable_device(pdev)) {
3233 dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
3234 return PCI_ERS_RESULT_DISCONNECT;
3237 pci_set_master(pdev);
3238 pci_restore_state(pdev);
3239 pci_save_state(pdev);
3240 pci_cleanup_aer_uncorrect_error_status(pdev);
3242 if (t4_wait_dev_ready(adap) < 0)
3243 return PCI_ERS_RESULT_DISCONNECT;
3244 if (t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, NULL))
3245 return PCI_ERS_RESULT_DISCONNECT;
3246 adap->flags |= FW_OK;
3247 if (adap_init1(adap, &c))
3248 return PCI_ERS_RESULT_DISCONNECT;
3250 for_each_port(adap, i) {
3251 struct port_info *p = adap2pinfo(adap, i);
3253 ret = t4_alloc_vi(adap, adap->fn, p->tx_chan, adap->fn, 0, 1,
3256 return PCI_ERS_RESULT_DISCONNECT;
3258 p->xact_addr_filt = -1;
3261 t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
3262 adap->params.b_wnd);
3265 return PCI_ERS_RESULT_DISCONNECT;
3266 return PCI_ERS_RESULT_RECOVERED;
3269 static void eeh_resume(struct pci_dev *pdev)
3272 struct adapter *adap = pci_get_drvdata(pdev);
3278 for_each_port(adap, i) {
3279 struct net_device *dev = adap->port[i];
3281 if (netif_running(dev)) {
3283 cxgb_set_rxmode(dev);
3285 netif_device_attach(dev);
3290 static struct pci_error_handlers cxgb4_eeh = {
3291 .error_detected = eeh_err_detected,
3292 .slot_reset = eeh_slot_reset,
3293 .resume = eeh_resume,
3296 static inline bool is_10g_port(const struct link_config *lc)
3298 return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
3301 static inline void init_rspq(struct sge_rspq *q, u8 timer_idx, u8 pkt_cnt_idx,
3302 unsigned int size, unsigned int iqe_size)
3304 q->intr_params = QINTR_TIMER_IDX(timer_idx) |
3305 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0);
3306 q->pktcnt_idx = pkt_cnt_idx < SGE_NCOUNTERS ? pkt_cnt_idx : 0;
3307 q->iqe_len = iqe_size;
3312 * Perform default configuration of DMA queues depending on the number and type
3313 * of ports we found and the number of available CPUs. Most settings can be
3314 * modified by the admin prior to actual use.
3316 static void __devinit cfg_queues(struct adapter *adap)
3318 struct sge *s = &adap->sge;
3319 int i, q10g = 0, n10g = 0, qidx = 0;
3321 for_each_port(adap, i)
3322 n10g += is_10g_port(&adap2pinfo(adap, i)->link_cfg);
3325 * We default to 1 queue per non-10G port and up to # of cores queues
3329 q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
3330 if (q10g > num_online_cpus())
3331 q10g = num_online_cpus();
3333 for_each_port(adap, i) {
3334 struct port_info *pi = adap2pinfo(adap, i);
3336 pi->first_qset = qidx;
3337 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
3342 s->max_ethqsets = qidx; /* MSI-X may lower it later */
3344 if (is_offload(adap)) {
3346 * For offload we use 1 queue/channel if all ports are up to 1G,
3347 * otherwise we divide all available queues amongst the channels
3348 * capped by the number of available cores.
3351 i = min_t(int, ARRAY_SIZE(s->ofldrxq),
3353 s->ofldqsets = roundup(i, adap->params.nports);
3355 s->ofldqsets = adap->params.nports;
3356 /* For RDMA one Rx queue per channel suffices */
3357 s->rdmaqs = adap->params.nports;
3360 for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
3361 struct sge_eth_rxq *r = &s->ethrxq[i];
3363 init_rspq(&r->rspq, 0, 0, 1024, 64);
3367 for (i = 0; i < ARRAY_SIZE(s->ethtxq); i++)
3368 s->ethtxq[i].q.size = 1024;
3370 for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
3371 s->ctrlq[i].q.size = 512;
3373 for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
3374 s->ofldtxq[i].q.size = 1024;
3376 for (i = 0; i < ARRAY_SIZE(s->ofldrxq); i++) {
3377 struct sge_ofld_rxq *r = &s->ofldrxq[i];
3379 init_rspq(&r->rspq, 0, 0, 1024, 64);
3380 r->rspq.uld = CXGB4_ULD_ISCSI;
3384 for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
3385 struct sge_ofld_rxq *r = &s->rdmarxq[i];
3387 init_rspq(&r->rspq, 0, 0, 511, 64);
3388 r->rspq.uld = CXGB4_ULD_RDMA;
3392 init_rspq(&s->fw_evtq, 6, 0, 512, 64);
3393 init_rspq(&s->intrq, 6, 0, 2 * MAX_INGQ, 64);
3397 * Reduce the number of Ethernet queues across all ports to at most n.
3398 * n provides at least one queue per port.
3400 static void __devinit reduce_ethqs(struct adapter *adap, int n)
3403 struct port_info *pi;
3405 while (n < adap->sge.ethqsets)
3406 for_each_port(adap, i) {
3407 pi = adap2pinfo(adap, i);
3408 if (pi->nqsets > 1) {
3410 adap->sge.ethqsets--;
3411 if (adap->sge.ethqsets <= n)
3417 for_each_port(adap, i) {
3418 pi = adap2pinfo(adap, i);
3424 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
3425 #define EXTRA_VECS 2
3427 static int __devinit enable_msix(struct adapter *adap)
3430 int i, err, want, need;
3431 struct sge *s = &adap->sge;
3432 unsigned int nchan = adap->params.nports;
3433 struct msix_entry entries[MAX_INGQ + 1];
3435 for (i = 0; i < ARRAY_SIZE(entries); ++i)
3436 entries[i].entry = i;
3438 want = s->max_ethqsets + EXTRA_VECS;
3439 if (is_offload(adap)) {
3440 want += s->rdmaqs + s->ofldqsets;
3441 /* need nchan for each possible ULD */
3442 ofld_need = 2 * nchan;
3444 need = adap->params.nports + EXTRA_VECS + ofld_need;
3446 while ((err = pci_enable_msix(adap->pdev, entries, want)) >= need)
3451 * Distribute available vectors to the various queue groups.
3452 * Every group gets its minimum requirement and NIC gets top
3453 * priority for leftovers.
3455 i = want - EXTRA_VECS - ofld_need;
3456 if (i < s->max_ethqsets) {
3457 s->max_ethqsets = i;
3458 if (i < s->ethqsets)
3459 reduce_ethqs(adap, i);
3461 if (is_offload(adap)) {
3462 i = want - EXTRA_VECS - s->max_ethqsets;
3463 i -= ofld_need - nchan;
3464 s->ofldqsets = (i / nchan) * nchan; /* round down */
3466 for (i = 0; i < want; ++i)
3467 adap->msix_info[i].vec = entries[i].vector;
3469 dev_info(adap->pdev_dev,
3470 "only %d MSI-X vectors left, not using MSI-X\n", err);
3476 static int __devinit init_rss(struct adapter *adap)
3480 for_each_port(adap, i) {
3481 struct port_info *pi = adap2pinfo(adap, i);
3483 pi->rss = kcalloc(pi->rss_size, sizeof(u16), GFP_KERNEL);
3486 for (j = 0; j < pi->rss_size; j++)
3487 pi->rss[j] = j % pi->nqsets;
3492 static void __devinit print_port_info(struct adapter *adap)
3494 static const char *base[] = {
3495 "R XFI", "R XAUI", "T SGMII", "T XFI", "T XAUI", "KX4", "CX4",
3496 "KX", "KR", "KR SFP+", "KR FEC"
3501 const char *spd = "";
3503 if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_2_5GB)
3505 else if (adap->params.pci.speed == PCI_EXP_LNKSTA_CLS_5_0GB)
3508 for_each_port(adap, i) {
3509 struct net_device *dev = adap->port[i];
3510 const struct port_info *pi = netdev_priv(dev);
3513 if (!test_bit(i, &adap->registered_device_map))
3516 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100M)
3517 bufp += sprintf(bufp, "100/");
3518 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G)
3519 bufp += sprintf(bufp, "1000/");
3520 if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
3521 bufp += sprintf(bufp, "10G/");
3524 sprintf(bufp, "BASE-%s", base[pi->port_type]);
3526 netdev_info(dev, "Chelsio %s rev %d %s %sNIC PCIe x%d%s%s\n",
3527 adap->params.vpd.id, adap->params.rev,
3528 buf, is_offload(adap) ? "R" : "",
3529 adap->params.pci.width, spd,
3530 (adap->flags & USING_MSIX) ? " MSI-X" :
3531 (adap->flags & USING_MSI) ? " MSI" : "");
3532 if (adap->name == dev->name)
3533 netdev_info(dev, "S/N: %s, E/C: %s\n",
3534 adap->params.vpd.sn, adap->params.vpd.ec);
3539 * Free the following resources:
3540 * - memory used for tables
3543 * - resources FW is holding for us
3545 static void free_some_resources(struct adapter *adapter)
3549 t4_free_mem(adapter->l2t);
3550 t4_free_mem(adapter->tids.tid_tab);
3551 disable_msi(adapter);
3553 for_each_port(adapter, i)
3554 if (adapter->port[i]) {
3555 kfree(adap2pinfo(adapter, i)->rss);
3556 free_netdev(adapter->port[i]);
3558 if (adapter->flags & FW_OK)
3559 t4_fw_bye(adapter, adapter->fn);
3562 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
3563 NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
3565 static int __devinit init_one(struct pci_dev *pdev,
3566 const struct pci_device_id *ent)
3569 struct port_info *pi;
3570 unsigned int highdma = 0;
3571 struct adapter *adapter = NULL;
3573 printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
3575 err = pci_request_regions(pdev, KBUILD_MODNAME);
3577 /* Just info, some other driver may have claimed the device. */
3578 dev_info(&pdev->dev, "cannot obtain PCI resources\n");
3582 /* We control everything through one PF */
3583 func = PCI_FUNC(pdev->devfn);
3584 if (func != ent->driver_data) {
3585 pci_save_state(pdev); /* to restore SR-IOV later */
3589 err = pci_enable_device(pdev);
3591 dev_err(&pdev->dev, "cannot enable PCI device\n");
3592 goto out_release_regions;
3595 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
3596 highdma = NETIF_F_HIGHDMA;
3597 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3599 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
3600 "coherent allocations\n");
3601 goto out_disable_device;
3604 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3606 dev_err(&pdev->dev, "no usable DMA configuration\n");
3607 goto out_disable_device;
3611 pci_enable_pcie_error_reporting(pdev);
3612 pci_set_master(pdev);
3613 pci_save_state(pdev);
3615 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
3618 goto out_disable_device;
3621 adapter->regs = pci_ioremap_bar(pdev, 0);
3622 if (!adapter->regs) {
3623 dev_err(&pdev->dev, "cannot map device registers\n");
3625 goto out_free_adapter;
3628 adapter->pdev = pdev;
3629 adapter->pdev_dev = &pdev->dev;
3631 adapter->name = pci_name(pdev);
3632 adapter->msg_enable = dflt_msg_enable;
3633 memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
3635 spin_lock_init(&adapter->stats_lock);
3636 spin_lock_init(&adapter->tid_release_lock);
3638 INIT_WORK(&adapter->tid_release_task, process_tid_release_list);
3640 err = t4_prep_adapter(adapter);
3643 err = adap_init0(adapter);
3647 for_each_port(adapter, i) {
3648 struct net_device *netdev;
3650 netdev = alloc_etherdev_mq(sizeof(struct port_info),
3657 SET_NETDEV_DEV(netdev, &pdev->dev);
3659 adapter->port[i] = netdev;
3660 pi = netdev_priv(netdev);
3661 pi->adapter = adapter;
3662 pi->xact_addr_filt = -1;
3663 pi->rx_offload = RX_CSO;
3665 netif_carrier_off(netdev);
3666 netdev->irq = pdev->irq;
3668 netdev->features |= NETIF_F_SG | TSO_FLAGS;
3669 netdev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
3670 netdev->features |= NETIF_F_GRO | NETIF_F_RXHASH | highdma;
3671 netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
3672 netdev->vlan_features = netdev->features & VLAN_FEAT;
3674 netdev->netdev_ops = &cxgb4_netdev_ops;
3675 SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
3678 pci_set_drvdata(pdev, adapter);
3680 if (adapter->flags & FW_OK) {
3681 err = t4_port_init(adapter, func, func, 0);
3687 * Configure queues and allocate tables now, they can be needed as
3688 * soon as the first register_netdev completes.
3690 cfg_queues(adapter);
3692 adapter->l2t = t4_init_l2t();
3693 if (!adapter->l2t) {
3694 /* We tolerate a lack of L2T, giving up some functionality */
3695 dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
3696 adapter->params.offload = 0;
3699 if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
3700 dev_warn(&pdev->dev, "could not allocate TID table, "
3702 adapter->params.offload = 0;
3705 /* See what interrupts we'll be using */
3706 if (msi > 1 && enable_msix(adapter) == 0)
3707 adapter->flags |= USING_MSIX;
3708 else if (msi > 0 && pci_enable_msi(pdev) == 0)
3709 adapter->flags |= USING_MSI;
3711 err = init_rss(adapter);
3716 * The card is now ready to go. If any errors occur during device
3717 * registration we do not fail the whole card but rather proceed only
3718 * with the ports we manage to register successfully. However we must
3719 * register at least one net device.
3721 for_each_port(adapter, i) {
3722 err = register_netdev(adapter->port[i]);
3724 dev_warn(&pdev->dev,
3725 "cannot register net device %s, skipping\n",
3726 adapter->port[i]->name);
3729 * Change the name we use for messages to the name of
3730 * the first successfully registered interface.
3732 if (!adapter->registered_device_map)
3733 adapter->name = adapter->port[i]->name;
3735 __set_bit(i, &adapter->registered_device_map);
3736 adapter->chan_map[adap2pinfo(adapter, i)->tx_chan] = i;
3739 if (!adapter->registered_device_map) {
3740 dev_err(&pdev->dev, "could not register any net devices\n");
3744 if (cxgb4_debugfs_root) {
3745 adapter->debugfs_root = debugfs_create_dir(pci_name(pdev),
3746 cxgb4_debugfs_root);
3747 setup_debugfs(adapter);
3750 if (is_offload(adapter))
3751 attach_ulds(adapter);
3753 print_port_info(adapter);
3756 #ifdef CONFIG_PCI_IOV
3757 if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0)
3758 if (pci_enable_sriov(pdev, num_vf[func]) == 0)
3759 dev_info(&pdev->dev,
3760 "instantiated %u virtual functions\n",
3766 free_some_resources(adapter);
3768 iounmap(adapter->regs);
3772 pci_disable_pcie_error_reporting(pdev);
3773 pci_disable_device(pdev);
3774 out_release_regions:
3775 pci_release_regions(pdev);
3776 pci_set_drvdata(pdev, NULL);
3780 static void __devexit remove_one(struct pci_dev *pdev)
3782 struct adapter *adapter = pci_get_drvdata(pdev);
3784 pci_disable_sriov(pdev);
3789 if (is_offload(adapter))
3790 detach_ulds(adapter);
3792 for_each_port(adapter, i)
3793 if (test_bit(i, &adapter->registered_device_map))
3794 unregister_netdev(adapter->port[i]);
3796 if (adapter->debugfs_root)
3797 debugfs_remove_recursive(adapter->debugfs_root);
3799 if (adapter->flags & FULL_INIT_DONE)
3802 free_some_resources(adapter);
3803 iounmap(adapter->regs);
3805 pci_disable_pcie_error_reporting(pdev);
3806 pci_disable_device(pdev);
3807 pci_release_regions(pdev);
3808 pci_set_drvdata(pdev, NULL);
3810 pci_release_regions(pdev);
3813 static struct pci_driver cxgb4_driver = {
3814 .name = KBUILD_MODNAME,
3815 .id_table = cxgb4_pci_tbl,
3817 .remove = __devexit_p(remove_one),
3818 .err_handler = &cxgb4_eeh,
3821 static int __init cxgb4_init_module(void)
3825 /* Debugfs support is optional, just warn if this fails */
3826 cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3827 if (!cxgb4_debugfs_root)
3828 pr_warning("could not create debugfs entry, continuing\n");
3830 ret = pci_register_driver(&cxgb4_driver);
3832 debugfs_remove(cxgb4_debugfs_root);
3836 static void __exit cxgb4_cleanup_module(void)
3838 pci_unregister_driver(&cxgb4_driver);
3839 debugfs_remove(cxgb4_debugfs_root); /* NULL ok */
3842 module_init(cxgb4_init_module);
3843 module_exit(cxgb4_cleanup_module);