MIPS: check return value of pgtable_pmd_page_ctor
[linux-2.6-microblaze.git] / drivers / net / ethernet / sfc / efx_channels.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2018 Solarflare Communications Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include "net_driver.h"
12 #include <linux/module.h>
13 #include "efx_channels.h"
14 #include "efx.h"
15 #include "efx_common.h"
16 #include "tx_common.h"
17 #include "rx_common.h"
18 #include "nic.h"
19 #include "sriov.h"
20 #include "workarounds.h"
21
22 /* This is the first interrupt mode to try out of:
23  * 0 => MSI-X
24  * 1 => MSI
25  * 2 => legacy
26  */
27 unsigned int efx_interrupt_mode = EFX_INT_MODE_MSIX;
28
29 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
30  * i.e. the number of CPUs among which we may distribute simultaneous
31  * interrupt handling.
32  *
33  * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
34  * The default (0) means to assign an interrupt to each core.
35  */
36 unsigned int rss_cpus;
37
38 static unsigned int irq_adapt_low_thresh = 8000;
39 module_param(irq_adapt_low_thresh, uint, 0644);
40 MODULE_PARM_DESC(irq_adapt_low_thresh,
41                  "Threshold score for reducing IRQ moderation");
42
43 static unsigned int irq_adapt_high_thresh = 16000;
44 module_param(irq_adapt_high_thresh, uint, 0644);
45 MODULE_PARM_DESC(irq_adapt_high_thresh,
46                  "Threshold score for increasing IRQ moderation");
47
48 /* This is the weight assigned to each of the (per-channel) virtual
49  * NAPI devices.
50  */
51 static int napi_weight = 64;
52
53 /***************
54  * Housekeeping
55  ***************/
56
57 int efx_channel_dummy_op_int(struct efx_channel *channel)
58 {
59         return 0;
60 }
61
62 void efx_channel_dummy_op_void(struct efx_channel *channel)
63 {
64 }
65
66 static const struct efx_channel_type efx_default_channel_type = {
67         .pre_probe              = efx_channel_dummy_op_int,
68         .post_remove            = efx_channel_dummy_op_void,
69         .get_name               = efx_get_channel_name,
70         .copy                   = efx_copy_channel,
71         .want_txqs              = efx_default_channel_want_txqs,
72         .keep_eventq            = false,
73         .want_pio               = true,
74 };
75
76 /*************
77  * INTERRUPTS
78  *************/
79
80 static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
81 {
82         cpumask_var_t thread_mask;
83         unsigned int count;
84         int cpu;
85
86         if (rss_cpus) {
87                 count = rss_cpus;
88         } else {
89                 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
90                         netif_warn(efx, probe, efx->net_dev,
91                                    "RSS disabled due to allocation failure\n");
92                         return 1;
93                 }
94
95                 count = 0;
96                 for_each_online_cpu(cpu) {
97                         if (!cpumask_test_cpu(cpu, thread_mask)) {
98                                 ++count;
99                                 cpumask_or(thread_mask, thread_mask,
100                                            topology_sibling_cpumask(cpu));
101                         }
102                 }
103
104                 free_cpumask_var(thread_mask);
105         }
106
107         if (count > EFX_MAX_RX_QUEUES) {
108                 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
109                                "Reducing number of rx queues from %u to %u.\n",
110                                count, EFX_MAX_RX_QUEUES);
111                 count = EFX_MAX_RX_QUEUES;
112         }
113
114         /* If RSS is requested for the PF *and* VFs then we can't write RSS
115          * table entries that are inaccessible to VFs
116          */
117 #ifdef CONFIG_SFC_SRIOV
118         if (efx->type->sriov_wanted) {
119                 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
120                     count > efx_vf_size(efx)) {
121                         netif_warn(efx, probe, efx->net_dev,
122                                    "Reducing number of RSS channels from %u to %u for "
123                                    "VF support. Increase vf-msix-limit to use more "
124                                    "channels on the PF.\n",
125                                    count, efx_vf_size(efx));
126                         count = efx_vf_size(efx);
127                 }
128         }
129 #endif
130
131         return count;
132 }
133
134 static int efx_allocate_msix_channels(struct efx_nic *efx,
135                                       unsigned int max_channels,
136                                       unsigned int extra_channels,
137                                       unsigned int parallelism)
138 {
139         unsigned int n_channels = parallelism;
140         int vec_count;
141         int tx_per_ev;
142         int n_xdp_tx;
143         int n_xdp_ev;
144
145         if (efx_separate_tx_channels)
146                 n_channels *= 2;
147         n_channels += extra_channels;
148
149         /* To allow XDP transmit to happen from arbitrary NAPI contexts
150          * we allocate a TX queue per CPU. We share event queues across
151          * multiple tx queues, assuming tx and ev queues are both
152          * maximum size.
153          */
154         tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
155         n_xdp_tx = num_possible_cpus();
156         n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
157
158         vec_count = pci_msix_vec_count(efx->pci_dev);
159         if (vec_count < 0)
160                 return vec_count;
161
162         max_channels = min_t(unsigned int, vec_count, max_channels);
163
164         /* Check resources.
165          * We need a channel per event queue, plus a VI per tx queue.
166          * This may be more pessimistic than it needs to be.
167          */
168         if (n_channels + n_xdp_ev > max_channels) {
169                 netif_err(efx, drv, efx->net_dev,
170                           "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
171                           n_xdp_ev, n_channels, max_channels);
172                 efx->n_xdp_channels = 0;
173                 efx->xdp_tx_per_channel = 0;
174                 efx->xdp_tx_queue_count = 0;
175         } else if (n_channels + n_xdp_tx > efx->max_vis) {
176                 netif_err(efx, drv, efx->net_dev,
177                           "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
178                           n_xdp_tx, n_channels, efx->max_vis);
179                 efx->n_xdp_channels = 0;
180                 efx->xdp_tx_per_channel = 0;
181                 efx->xdp_tx_queue_count = 0;
182         } else {
183                 efx->n_xdp_channels = n_xdp_ev;
184                 efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
185                 efx->xdp_tx_queue_count = n_xdp_tx;
186                 n_channels += n_xdp_ev;
187                 netif_dbg(efx, drv, efx->net_dev,
188                           "Allocating %d TX and %d event queues for XDP\n",
189                           n_xdp_tx, n_xdp_ev);
190         }
191
192         if (vec_count < n_channels) {
193                 netif_err(efx, drv, efx->net_dev,
194                           "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
195                           vec_count, n_channels);
196                 netif_err(efx, drv, efx->net_dev,
197                           "WARNING: Performance may be reduced.\n");
198                 n_channels = vec_count;
199         }
200
201         n_channels = min(n_channels, max_channels);
202
203         efx->n_channels = n_channels;
204
205         /* Ignore XDP tx channels when creating rx channels. */
206         n_channels -= efx->n_xdp_channels;
207
208         if (efx_separate_tx_channels) {
209                 efx->n_tx_channels =
210                         min(max(n_channels / 2, 1U),
211                             efx->max_tx_channels);
212                 efx->tx_channel_offset =
213                         n_channels - efx->n_tx_channels;
214                 efx->n_rx_channels =
215                         max(n_channels -
216                             efx->n_tx_channels, 1U);
217         } else {
218                 efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
219                 efx->tx_channel_offset = 0;
220                 efx->n_rx_channels = n_channels;
221         }
222
223         efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
224         efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
225
226         efx->xdp_channel_offset = n_channels;
227
228         netif_dbg(efx, drv, efx->net_dev,
229                   "Allocating %u RX channels\n",
230                   efx->n_rx_channels);
231
232         return efx->n_channels;
233 }
234
235 /* Probe the number and type of interrupts we are able to obtain, and
236  * the resulting numbers of channels and RX queues.
237  */
238 int efx_probe_interrupts(struct efx_nic *efx)
239 {
240         unsigned int extra_channels = 0;
241         unsigned int rss_spread;
242         unsigned int i, j;
243         int rc;
244
245         for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
246                 if (efx->extra_channel_type[i])
247                         ++extra_channels;
248
249         if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
250                 unsigned int parallelism = efx_wanted_parallelism(efx);
251                 struct msix_entry xentries[EFX_MAX_CHANNELS];
252                 unsigned int n_channels;
253
254                 rc = efx_allocate_msix_channels(efx, efx->max_channels,
255                                                 extra_channels, parallelism);
256                 if (rc >= 0) {
257                         n_channels = rc;
258                         for (i = 0; i < n_channels; i++)
259                                 xentries[i].entry = i;
260                         rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
261                                                    n_channels);
262                 }
263                 if (rc < 0) {
264                         /* Fall back to single channel MSI */
265                         netif_err(efx, drv, efx->net_dev,
266                                   "could not enable MSI-X\n");
267                         if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
268                                 efx->interrupt_mode = EFX_INT_MODE_MSI;
269                         else
270                                 return rc;
271                 } else if (rc < n_channels) {
272                         netif_err(efx, drv, efx->net_dev,
273                                   "WARNING: Insufficient MSI-X vectors"
274                                   " available (%d < %u).\n", rc, n_channels);
275                         netif_err(efx, drv, efx->net_dev,
276                                   "WARNING: Performance may be reduced.\n");
277                         n_channels = rc;
278                 }
279
280                 if (rc > 0) {
281                         for (i = 0; i < efx->n_channels; i++)
282                                 efx_get_channel(efx, i)->irq =
283                                         xentries[i].vector;
284                 }
285         }
286
287         /* Try single interrupt MSI */
288         if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
289                 efx->n_channels = 1;
290                 efx->n_rx_channels = 1;
291                 efx->n_tx_channels = 1;
292                 efx->n_xdp_channels = 0;
293                 efx->xdp_channel_offset = efx->n_channels;
294                 rc = pci_enable_msi(efx->pci_dev);
295                 if (rc == 0) {
296                         efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
297                 } else {
298                         netif_err(efx, drv, efx->net_dev,
299                                   "could not enable MSI\n");
300                         if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
301                                 efx->interrupt_mode = EFX_INT_MODE_LEGACY;
302                         else
303                                 return rc;
304                 }
305         }
306
307         /* Assume legacy interrupts */
308         if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
309                 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
310                 efx->n_rx_channels = 1;
311                 efx->n_tx_channels = 1;
312                 efx->n_xdp_channels = 0;
313                 efx->xdp_channel_offset = efx->n_channels;
314                 efx->legacy_irq = efx->pci_dev->irq;
315         }
316
317         /* Assign extra channels if possible, before XDP channels */
318         efx->n_extra_tx_channels = 0;
319         j = efx->xdp_channel_offset;
320         for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
321                 if (!efx->extra_channel_type[i])
322                         continue;
323                 if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
324                         efx->extra_channel_type[i]->handle_no_channel(efx);
325                 } else {
326                         --j;
327                         efx_get_channel(efx, j)->type =
328                                 efx->extra_channel_type[i];
329                         if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
330                                 efx->n_extra_tx_channels++;
331                 }
332         }
333
334         rss_spread = efx->n_rx_channels;
335         /* RSS might be usable on VFs even if it is disabled on the PF */
336 #ifdef CONFIG_SFC_SRIOV
337         if (efx->type->sriov_wanted) {
338                 efx->rss_spread = ((rss_spread > 1 ||
339                                     !efx->type->sriov_wanted(efx)) ?
340                                    rss_spread : efx_vf_size(efx));
341                 return 0;
342         }
343 #endif
344         efx->rss_spread = rss_spread;
345
346         return 0;
347 }
348
349 #if defined(CONFIG_SMP)
350 void efx_set_interrupt_affinity(struct efx_nic *efx)
351 {
352         struct efx_channel *channel;
353         unsigned int cpu;
354
355         efx_for_each_channel(channel, efx) {
356                 cpu = cpumask_local_spread(channel->channel,
357                                            pcibus_to_node(efx->pci_dev->bus));
358                 irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
359         }
360 }
361
362 void efx_clear_interrupt_affinity(struct efx_nic *efx)
363 {
364         struct efx_channel *channel;
365
366         efx_for_each_channel(channel, efx)
367                 irq_set_affinity_hint(channel->irq, NULL);
368 }
369 #else
370 void
371 efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
372 {
373 }
374
375 void
376 efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
377 {
378 }
379 #endif /* CONFIG_SMP */
380
381 void efx_remove_interrupts(struct efx_nic *efx)
382 {
383         struct efx_channel *channel;
384
385         /* Remove MSI/MSI-X interrupts */
386         efx_for_each_channel(channel, efx)
387                 channel->irq = 0;
388         pci_disable_msi(efx->pci_dev);
389         pci_disable_msix(efx->pci_dev);
390
391         /* Remove legacy interrupt */
392         efx->legacy_irq = 0;
393 }
394
395 /***************
396  * EVENT QUEUES
397  ***************/
398
399 /* Create event queue
400  * Event queue memory allocations are done only once.  If the channel
401  * is reset, the memory buffer will be reused; this guards against
402  * errors during channel reset and also simplifies interrupt handling.
403  */
404 int efx_probe_eventq(struct efx_channel *channel)
405 {
406         struct efx_nic *efx = channel->efx;
407         unsigned long entries;
408
409         netif_dbg(efx, probe, efx->net_dev,
410                   "chan %d create event queue\n", channel->channel);
411
412         /* Build an event queue with room for one event per tx and rx buffer,
413          * plus some extra for link state events and MCDI completions.
414          */
415         entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
416         EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
417         channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
418
419         return efx_nic_probe_eventq(channel);
420 }
421
422 /* Prepare channel's event queue */
423 int efx_init_eventq(struct efx_channel *channel)
424 {
425         struct efx_nic *efx = channel->efx;
426         int rc;
427
428         EFX_WARN_ON_PARANOID(channel->eventq_init);
429
430         netif_dbg(efx, drv, efx->net_dev,
431                   "chan %d init event queue\n", channel->channel);
432
433         rc = efx_nic_init_eventq(channel);
434         if (rc == 0) {
435                 efx->type->push_irq_moderation(channel);
436                 channel->eventq_read_ptr = 0;
437                 channel->eventq_init = true;
438         }
439         return rc;
440 }
441
442 /* Enable event queue processing and NAPI */
443 void efx_start_eventq(struct efx_channel *channel)
444 {
445         netif_dbg(channel->efx, ifup, channel->efx->net_dev,
446                   "chan %d start event queue\n", channel->channel);
447
448         /* Make sure the NAPI handler sees the enabled flag set */
449         channel->enabled = true;
450         smp_wmb();
451
452         napi_enable(&channel->napi_str);
453         efx_nic_eventq_read_ack(channel);
454 }
455
456 /* Disable event queue processing and NAPI */
457 void efx_stop_eventq(struct efx_channel *channel)
458 {
459         if (!channel->enabled)
460                 return;
461
462         napi_disable(&channel->napi_str);
463         channel->enabled = false;
464 }
465
466 void efx_fini_eventq(struct efx_channel *channel)
467 {
468         if (!channel->eventq_init)
469                 return;
470
471         netif_dbg(channel->efx, drv, channel->efx->net_dev,
472                   "chan %d fini event queue\n", channel->channel);
473
474         efx_nic_fini_eventq(channel);
475         channel->eventq_init = false;
476 }
477
478 void efx_remove_eventq(struct efx_channel *channel)
479 {
480         netif_dbg(channel->efx, drv, channel->efx->net_dev,
481                   "chan %d remove event queue\n", channel->channel);
482
483         efx_nic_remove_eventq(channel);
484 }
485
486 /**************************************************************************
487  *
488  * Channel handling
489  *
490  *************************************************************************/
491
492 #ifdef CONFIG_RFS_ACCEL
493 static void efx_filter_rfs_expire(struct work_struct *data)
494 {
495         struct delayed_work *dwork = to_delayed_work(data);
496         struct efx_channel *channel;
497         unsigned int time, quota;
498
499         channel = container_of(dwork, struct efx_channel, filter_work);
500         time = jiffies - channel->rfs_last_expiry;
501         quota = channel->rfs_filter_count * time / (30 * HZ);
502         if (quota >= 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota)))
503                 channel->rfs_last_expiry += time;
504         /* Ensure we do more work eventually even if NAPI poll is not happening */
505         schedule_delayed_work(dwork, 30 * HZ);
506 }
507 #endif
508
509 /* Allocate and initialise a channel structure. */
510 static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
511 {
512         struct efx_rx_queue *rx_queue;
513         struct efx_tx_queue *tx_queue;
514         struct efx_channel *channel;
515         int j;
516
517         channel = kzalloc(sizeof(*channel), GFP_KERNEL);
518         if (!channel)
519                 return NULL;
520
521         channel->efx = efx;
522         channel->channel = i;
523         channel->type = &efx_default_channel_type;
524
525         for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
526                 tx_queue = &channel->tx_queue[j];
527                 tx_queue->efx = efx;
528                 tx_queue->queue = -1;
529                 tx_queue->label = j;
530                 tx_queue->channel = channel;
531         }
532
533 #ifdef CONFIG_RFS_ACCEL
534         INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
535 #endif
536
537         rx_queue = &channel->rx_queue;
538         rx_queue->efx = efx;
539         timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
540
541         return channel;
542 }
543
544 int efx_init_channels(struct efx_nic *efx)
545 {
546         unsigned int i;
547
548         for (i = 0; i < EFX_MAX_CHANNELS; i++) {
549                 efx->channel[i] = efx_alloc_channel(efx, i);
550                 if (!efx->channel[i])
551                         return -ENOMEM;
552                 efx->msi_context[i].efx = efx;
553                 efx->msi_context[i].index = i;
554         }
555
556         /* Higher numbered interrupt modes are less capable! */
557         efx->interrupt_mode = min(efx->type->min_interrupt_mode,
558                                   efx_interrupt_mode);
559
560         efx->max_channels = EFX_MAX_CHANNELS;
561         efx->max_tx_channels = EFX_MAX_CHANNELS;
562
563         return 0;
564 }
565
566 void efx_fini_channels(struct efx_nic *efx)
567 {
568         unsigned int i;
569
570         for (i = 0; i < EFX_MAX_CHANNELS; i++)
571                 if (efx->channel[i]) {
572                         kfree(efx->channel[i]);
573                         efx->channel[i] = NULL;
574                 }
575 }
576
577 /* Allocate and initialise a channel structure, copying parameters
578  * (but not resources) from an old channel structure.
579  */
580 struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
581 {
582         struct efx_rx_queue *rx_queue;
583         struct efx_tx_queue *tx_queue;
584         struct efx_channel *channel;
585         int j;
586
587         channel = kmalloc(sizeof(*channel), GFP_KERNEL);
588         if (!channel)
589                 return NULL;
590
591         *channel = *old_channel;
592
593         channel->napi_dev = NULL;
594         INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
595         channel->napi_str.napi_id = 0;
596         channel->napi_str.state = 0;
597         memset(&channel->eventq, 0, sizeof(channel->eventq));
598
599         for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
600                 tx_queue = &channel->tx_queue[j];
601                 if (tx_queue->channel)
602                         tx_queue->channel = channel;
603                 tx_queue->buffer = NULL;
604                 tx_queue->cb_page = NULL;
605                 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
606         }
607
608         rx_queue = &channel->rx_queue;
609         rx_queue->buffer = NULL;
610         memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
611         timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
612 #ifdef CONFIG_RFS_ACCEL
613         INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
614 #endif
615
616         return channel;
617 }
618
619 static int efx_probe_channel(struct efx_channel *channel)
620 {
621         struct efx_tx_queue *tx_queue;
622         struct efx_rx_queue *rx_queue;
623         int rc;
624
625         netif_dbg(channel->efx, probe, channel->efx->net_dev,
626                   "creating channel %d\n", channel->channel);
627
628         rc = channel->type->pre_probe(channel);
629         if (rc)
630                 goto fail;
631
632         rc = efx_probe_eventq(channel);
633         if (rc)
634                 goto fail;
635
636         efx_for_each_channel_tx_queue(tx_queue, channel) {
637                 rc = efx_probe_tx_queue(tx_queue);
638                 if (rc)
639                         goto fail;
640         }
641
642         efx_for_each_channel_rx_queue(rx_queue, channel) {
643                 rc = efx_probe_rx_queue(rx_queue);
644                 if (rc)
645                         goto fail;
646         }
647
648         channel->rx_list = NULL;
649
650         return 0;
651
652 fail:
653         efx_remove_channel(channel);
654         return rc;
655 }
656
657 void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
658 {
659         struct efx_nic *efx = channel->efx;
660         const char *type;
661         int number;
662
663         number = channel->channel;
664
665         if (number >= efx->xdp_channel_offset &&
666             !WARN_ON_ONCE(!efx->n_xdp_channels)) {
667                 type = "-xdp";
668                 number -= efx->xdp_channel_offset;
669         } else if (efx->tx_channel_offset == 0) {
670                 type = "";
671         } else if (number < efx->tx_channel_offset) {
672                 type = "-rx";
673         } else {
674                 type = "-tx";
675                 number -= efx->tx_channel_offset;
676         }
677         snprintf(buf, len, "%s%s-%d", efx->name, type, number);
678 }
679
680 void efx_set_channel_names(struct efx_nic *efx)
681 {
682         struct efx_channel *channel;
683
684         efx_for_each_channel(channel, efx)
685                 channel->type->get_name(channel,
686                                         efx->msi_context[channel->channel].name,
687                                         sizeof(efx->msi_context[0].name));
688 }
689
690 int efx_probe_channels(struct efx_nic *efx)
691 {
692         struct efx_channel *channel;
693         int rc;
694
695         /* Restart special buffer allocation */
696         efx->next_buffer_table = 0;
697
698         /* Probe channels in reverse, so that any 'extra' channels
699          * use the start of the buffer table. This allows the traffic
700          * channels to be resized without moving them or wasting the
701          * entries before them.
702          */
703         efx_for_each_channel_rev(channel, efx) {
704                 rc = efx_probe_channel(channel);
705                 if (rc) {
706                         netif_err(efx, probe, efx->net_dev,
707                                   "failed to create channel %d\n",
708                                   channel->channel);
709                         goto fail;
710                 }
711         }
712         efx_set_channel_names(efx);
713
714         return 0;
715
716 fail:
717         efx_remove_channels(efx);
718         return rc;
719 }
720
721 void efx_remove_channel(struct efx_channel *channel)
722 {
723         struct efx_tx_queue *tx_queue;
724         struct efx_rx_queue *rx_queue;
725
726         netif_dbg(channel->efx, drv, channel->efx->net_dev,
727                   "destroy chan %d\n", channel->channel);
728
729         efx_for_each_channel_rx_queue(rx_queue, channel)
730                 efx_remove_rx_queue(rx_queue);
731         efx_for_each_channel_tx_queue(tx_queue, channel)
732                 efx_remove_tx_queue(tx_queue);
733         efx_remove_eventq(channel);
734         channel->type->post_remove(channel);
735 }
736
737 void efx_remove_channels(struct efx_nic *efx)
738 {
739         struct efx_channel *channel;
740
741         efx_for_each_channel(channel, efx)
742                 efx_remove_channel(channel);
743
744         kfree(efx->xdp_tx_queues);
745 }
746
747 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
748 {
749         struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
750         unsigned int i, next_buffer_table = 0;
751         u32 old_rxq_entries, old_txq_entries;
752         int rc, rc2;
753
754         rc = efx_check_disabled(efx);
755         if (rc)
756                 return rc;
757
758         /* Not all channels should be reallocated. We must avoid
759          * reallocating their buffer table entries.
760          */
761         efx_for_each_channel(channel, efx) {
762                 struct efx_rx_queue *rx_queue;
763                 struct efx_tx_queue *tx_queue;
764
765                 if (channel->type->copy)
766                         continue;
767                 next_buffer_table = max(next_buffer_table,
768                                         channel->eventq.index +
769                                         channel->eventq.entries);
770                 efx_for_each_channel_rx_queue(rx_queue, channel)
771                         next_buffer_table = max(next_buffer_table,
772                                                 rx_queue->rxd.index +
773                                                 rx_queue->rxd.entries);
774                 efx_for_each_channel_tx_queue(tx_queue, channel)
775                         next_buffer_table = max(next_buffer_table,
776                                                 tx_queue->txd.index +
777                                                 tx_queue->txd.entries);
778         }
779
780         efx_device_detach_sync(efx);
781         efx_stop_all(efx);
782         efx_soft_disable_interrupts(efx);
783
784         /* Clone channels (where possible) */
785         memset(other_channel, 0, sizeof(other_channel));
786         for (i = 0; i < efx->n_channels; i++) {
787                 channel = efx->channel[i];
788                 if (channel->type->copy)
789                         channel = channel->type->copy(channel);
790                 if (!channel) {
791                         rc = -ENOMEM;
792                         goto out;
793                 }
794                 other_channel[i] = channel;
795         }
796
797         /* Swap entry counts and channel pointers */
798         old_rxq_entries = efx->rxq_entries;
799         old_txq_entries = efx->txq_entries;
800         efx->rxq_entries = rxq_entries;
801         efx->txq_entries = txq_entries;
802         for (i = 0; i < efx->n_channels; i++) {
803                 channel = efx->channel[i];
804                 efx->channel[i] = other_channel[i];
805                 other_channel[i] = channel;
806         }
807
808         /* Restart buffer table allocation */
809         efx->next_buffer_table = next_buffer_table;
810
811         for (i = 0; i < efx->n_channels; i++) {
812                 channel = efx->channel[i];
813                 if (!channel->type->copy)
814                         continue;
815                 rc = efx_probe_channel(channel);
816                 if (rc)
817                         goto rollback;
818                 efx_init_napi_channel(efx->channel[i]);
819         }
820
821 out:
822         /* Destroy unused channel structures */
823         for (i = 0; i < efx->n_channels; i++) {
824                 channel = other_channel[i];
825                 if (channel && channel->type->copy) {
826                         efx_fini_napi_channel(channel);
827                         efx_remove_channel(channel);
828                         kfree(channel);
829                 }
830         }
831
832         rc2 = efx_soft_enable_interrupts(efx);
833         if (rc2) {
834                 rc = rc ? rc : rc2;
835                 netif_err(efx, drv, efx->net_dev,
836                           "unable to restart interrupts on channel reallocation\n");
837                 efx_schedule_reset(efx, RESET_TYPE_DISABLE);
838         } else {
839                 efx_start_all(efx);
840                 efx_device_attach_if_not_resetting(efx);
841         }
842         return rc;
843
844 rollback:
845         /* Swap back */
846         efx->rxq_entries = old_rxq_entries;
847         efx->txq_entries = old_txq_entries;
848         for (i = 0; i < efx->n_channels; i++) {
849                 channel = efx->channel[i];
850                 efx->channel[i] = other_channel[i];
851                 other_channel[i] = channel;
852         }
853         goto out;
854 }
855
856 int efx_set_channels(struct efx_nic *efx)
857 {
858         struct efx_tx_queue *tx_queue;
859         struct efx_channel *channel;
860         unsigned int next_queue = 0;
861         int xdp_queue_number;
862         int rc;
863
864         efx->tx_channel_offset =
865                 efx_separate_tx_channels ?
866                 efx->n_channels - efx->n_tx_channels : 0;
867
868         if (efx->xdp_tx_queue_count) {
869                 EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
870
871                 /* Allocate array for XDP TX queue lookup. */
872                 efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
873                                              sizeof(*efx->xdp_tx_queues),
874                                              GFP_KERNEL);
875                 if (!efx->xdp_tx_queues)
876                         return -ENOMEM;
877         }
878
879         /* We need to mark which channels really have RX and TX
880          * queues, and adjust the TX queue numbers if we have separate
881          * RX-only and TX-only channels.
882          */
883         xdp_queue_number = 0;
884         efx_for_each_channel(channel, efx) {
885                 if (channel->channel < efx->n_rx_channels)
886                         channel->rx_queue.core_index = channel->channel;
887                 else
888                         channel->rx_queue.core_index = -1;
889
890                 if (channel->channel >= efx->tx_channel_offset) {
891                         if (efx_channel_is_xdp_tx(channel)) {
892                                 efx_for_each_channel_tx_queue(tx_queue, channel) {
893                                         tx_queue->queue = next_queue++;
894                                         netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is XDP %u, HW %u\n",
895                                                   channel->channel, tx_queue->label,
896                                                   xdp_queue_number, tx_queue->queue);
897                                         /* We may have a few left-over XDP TX
898                                          * queues owing to xdp_tx_queue_count
899                                          * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
900                                          * We still allocate and probe those
901                                          * TXQs, but never use them.
902                                          */
903                                         if (xdp_queue_number < efx->xdp_tx_queue_count)
904                                                 efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
905                                         xdp_queue_number++;
906                                 }
907                         } else {
908                                 efx_for_each_channel_tx_queue(tx_queue, channel) {
909                                         tx_queue->queue = next_queue++;
910                                         netif_dbg(efx, drv, efx->net_dev, "Channel %u TXQ %u is HW %u\n",
911                                                   channel->channel, tx_queue->label,
912                                                   tx_queue->queue);
913                                 }
914                         }
915                 }
916         }
917         if (xdp_queue_number)
918                 efx->xdp_tx_queue_count = xdp_queue_number;
919
920         rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
921         if (rc)
922                 return rc;
923         return netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
924 }
925
926 bool efx_default_channel_want_txqs(struct efx_channel *channel)
927 {
928         return channel->channel - channel->efx->tx_channel_offset <
929                 channel->efx->n_tx_channels;
930 }
931
932 /*************
933  * START/STOP
934  *************/
935
936 int efx_soft_enable_interrupts(struct efx_nic *efx)
937 {
938         struct efx_channel *channel, *end_channel;
939         int rc;
940
941         BUG_ON(efx->state == STATE_DISABLED);
942
943         efx->irq_soft_enabled = true;
944         smp_wmb();
945
946         efx_for_each_channel(channel, efx) {
947                 if (!channel->type->keep_eventq) {
948                         rc = efx_init_eventq(channel);
949                         if (rc)
950                                 goto fail;
951                 }
952                 efx_start_eventq(channel);
953         }
954
955         efx_mcdi_mode_event(efx);
956
957         return 0;
958 fail:
959         end_channel = channel;
960         efx_for_each_channel(channel, efx) {
961                 if (channel == end_channel)
962                         break;
963                 efx_stop_eventq(channel);
964                 if (!channel->type->keep_eventq)
965                         efx_fini_eventq(channel);
966         }
967
968         return rc;
969 }
970
971 void efx_soft_disable_interrupts(struct efx_nic *efx)
972 {
973         struct efx_channel *channel;
974
975         if (efx->state == STATE_DISABLED)
976                 return;
977
978         efx_mcdi_mode_poll(efx);
979
980         efx->irq_soft_enabled = false;
981         smp_wmb();
982
983         if (efx->legacy_irq)
984                 synchronize_irq(efx->legacy_irq);
985
986         efx_for_each_channel(channel, efx) {
987                 if (channel->irq)
988                         synchronize_irq(channel->irq);
989
990                 efx_stop_eventq(channel);
991                 if (!channel->type->keep_eventq)
992                         efx_fini_eventq(channel);
993         }
994
995         /* Flush the asynchronous MCDI request queue */
996         efx_mcdi_flush_async(efx);
997 }
998
999 int efx_enable_interrupts(struct efx_nic *efx)
1000 {
1001         struct efx_channel *channel, *end_channel;
1002         int rc;
1003
1004         /* TODO: Is this really a bug? */
1005         BUG_ON(efx->state == STATE_DISABLED);
1006
1007         if (efx->eeh_disabled_legacy_irq) {
1008                 enable_irq(efx->legacy_irq);
1009                 efx->eeh_disabled_legacy_irq = false;
1010         }
1011
1012         efx->type->irq_enable_master(efx);
1013
1014         efx_for_each_channel(channel, efx) {
1015                 if (channel->type->keep_eventq) {
1016                         rc = efx_init_eventq(channel);
1017                         if (rc)
1018                                 goto fail;
1019                 }
1020         }
1021
1022         rc = efx_soft_enable_interrupts(efx);
1023         if (rc)
1024                 goto fail;
1025
1026         return 0;
1027
1028 fail:
1029         end_channel = channel;
1030         efx_for_each_channel(channel, efx) {
1031                 if (channel == end_channel)
1032                         break;
1033                 if (channel->type->keep_eventq)
1034                         efx_fini_eventq(channel);
1035         }
1036
1037         efx->type->irq_disable_non_ev(efx);
1038
1039         return rc;
1040 }
1041
1042 void efx_disable_interrupts(struct efx_nic *efx)
1043 {
1044         struct efx_channel *channel;
1045
1046         efx_soft_disable_interrupts(efx);
1047
1048         efx_for_each_channel(channel, efx) {
1049                 if (channel->type->keep_eventq)
1050                         efx_fini_eventq(channel);
1051         }
1052
1053         efx->type->irq_disable_non_ev(efx);
1054 }
1055
1056 void efx_start_channels(struct efx_nic *efx)
1057 {
1058         struct efx_tx_queue *tx_queue;
1059         struct efx_rx_queue *rx_queue;
1060         struct efx_channel *channel;
1061
1062         efx_for_each_channel(channel, efx) {
1063                 efx_for_each_channel_tx_queue(tx_queue, channel) {
1064                         efx_init_tx_queue(tx_queue);
1065                         atomic_inc(&efx->active_queues);
1066                 }
1067
1068                 efx_for_each_channel_rx_queue(rx_queue, channel) {
1069                         efx_init_rx_queue(rx_queue);
1070                         atomic_inc(&efx->active_queues);
1071                         efx_stop_eventq(channel);
1072                         efx_fast_push_rx_descriptors(rx_queue, false);
1073                         efx_start_eventq(channel);
1074                 }
1075
1076                 WARN_ON(channel->rx_pkt_n_frags);
1077         }
1078 }
1079
1080 void efx_stop_channels(struct efx_nic *efx)
1081 {
1082         struct efx_tx_queue *tx_queue;
1083         struct efx_rx_queue *rx_queue;
1084         struct efx_channel *channel;
1085         int rc = 0;
1086
1087         /* Stop RX refill */
1088         efx_for_each_channel(channel, efx) {
1089                 efx_for_each_channel_rx_queue(rx_queue, channel)
1090                         rx_queue->refill_enabled = false;
1091         }
1092
1093         efx_for_each_channel(channel, efx) {
1094                 /* RX packet processing is pipelined, so wait for the
1095                  * NAPI handler to complete.  At least event queue 0
1096                  * might be kept active by non-data events, so don't
1097                  * use napi_synchronize() but actually disable NAPI
1098                  * temporarily.
1099                  */
1100                 if (efx_channel_has_rx_queue(channel)) {
1101                         efx_stop_eventq(channel);
1102                         efx_start_eventq(channel);
1103                 }
1104         }
1105
1106         if (efx->type->fini_dmaq)
1107                 rc = efx->type->fini_dmaq(efx);
1108
1109         if (rc) {
1110                 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
1111         } else {
1112                 netif_dbg(efx, drv, efx->net_dev,
1113                           "successfully flushed all queues\n");
1114         }
1115
1116         efx_for_each_channel(channel, efx) {
1117                 efx_for_each_channel_rx_queue(rx_queue, channel)
1118                         efx_fini_rx_queue(rx_queue);
1119                 efx_for_each_channel_tx_queue(tx_queue, channel)
1120                         efx_fini_tx_queue(tx_queue);
1121         }
1122 }
1123
1124 /**************************************************************************
1125  *
1126  * NAPI interface
1127  *
1128  *************************************************************************/
1129
1130 /* Process channel's event queue
1131  *
1132  * This function is responsible for processing the event queue of a
1133  * single channel.  The caller must guarantee that this function will
1134  * never be concurrently called more than once on the same channel,
1135  * though different channels may be being processed concurrently.
1136  */
1137 static int efx_process_channel(struct efx_channel *channel, int budget)
1138 {
1139         struct efx_tx_queue *tx_queue;
1140         struct list_head rx_list;
1141         int spent;
1142
1143         if (unlikely(!channel->enabled))
1144                 return 0;
1145
1146         /* Prepare the batch receive list */
1147         EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
1148         INIT_LIST_HEAD(&rx_list);
1149         channel->rx_list = &rx_list;
1150
1151         efx_for_each_channel_tx_queue(tx_queue, channel) {
1152                 tx_queue->pkts_compl = 0;
1153                 tx_queue->bytes_compl = 0;
1154         }
1155
1156         spent = efx_nic_process_eventq(channel, budget);
1157         if (spent && efx_channel_has_rx_queue(channel)) {
1158                 struct efx_rx_queue *rx_queue =
1159                         efx_channel_get_rx_queue(channel);
1160
1161                 efx_rx_flush_packet(channel);
1162                 efx_fast_push_rx_descriptors(rx_queue, true);
1163         }
1164
1165         /* Update BQL */
1166         efx_for_each_channel_tx_queue(tx_queue, channel) {
1167                 if (tx_queue->bytes_compl) {
1168                         netdev_tx_completed_queue(tx_queue->core_txq,
1169                                                   tx_queue->pkts_compl,
1170                                                   tx_queue->bytes_compl);
1171                 }
1172         }
1173
1174         /* Receive any packets we queued up */
1175         netif_receive_skb_list(channel->rx_list);
1176         channel->rx_list = NULL;
1177
1178         return spent;
1179 }
1180
1181 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
1182 {
1183         int step = efx->irq_mod_step_us;
1184
1185         if (channel->irq_mod_score < irq_adapt_low_thresh) {
1186                 if (channel->irq_moderation_us > step) {
1187                         channel->irq_moderation_us -= step;
1188                         efx->type->push_irq_moderation(channel);
1189                 }
1190         } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
1191                 if (channel->irq_moderation_us <
1192                     efx->irq_rx_moderation_us) {
1193                         channel->irq_moderation_us += step;
1194                         efx->type->push_irq_moderation(channel);
1195                 }
1196         }
1197
1198         channel->irq_count = 0;
1199         channel->irq_mod_score = 0;
1200 }
1201
1202 /* NAPI poll handler
1203  *
1204  * NAPI guarantees serialisation of polls of the same device, which
1205  * provides the guarantee required by efx_process_channel().
1206  */
1207 static int efx_poll(struct napi_struct *napi, int budget)
1208 {
1209         struct efx_channel *channel =
1210                 container_of(napi, struct efx_channel, napi_str);
1211         struct efx_nic *efx = channel->efx;
1212 #ifdef CONFIG_RFS_ACCEL
1213         unsigned int time;
1214 #endif
1215         int spent;
1216
1217         netif_vdbg(efx, intr, efx->net_dev,
1218                    "channel %d NAPI poll executing on CPU %d\n",
1219                    channel->channel, raw_smp_processor_id());
1220
1221         spent = efx_process_channel(channel, budget);
1222
1223         xdp_do_flush_map();
1224
1225         if (spent < budget) {
1226                 if (efx_channel_has_rx_queue(channel) &&
1227                     efx->irq_rx_adaptive &&
1228                     unlikely(++channel->irq_count == 1000)) {
1229                         efx_update_irq_mod(efx, channel);
1230                 }
1231
1232 #ifdef CONFIG_RFS_ACCEL
1233                 /* Perhaps expire some ARFS filters */
1234                 time = jiffies - channel->rfs_last_expiry;
1235                 /* Would our quota be >= 20? */
1236                 if (channel->rfs_filter_count * time >= 600 * HZ)
1237                         mod_delayed_work(system_wq, &channel->filter_work, 0);
1238 #endif
1239
1240                 /* There is no race here; although napi_disable() will
1241                  * only wait for napi_complete(), this isn't a problem
1242                  * since efx_nic_eventq_read_ack() will have no effect if
1243                  * interrupts have already been disabled.
1244                  */
1245                 if (napi_complete_done(napi, spent))
1246                         efx_nic_eventq_read_ack(channel);
1247         }
1248
1249         return spent;
1250 }
1251
1252 void efx_init_napi_channel(struct efx_channel *channel)
1253 {
1254         struct efx_nic *efx = channel->efx;
1255
1256         channel->napi_dev = efx->net_dev;
1257         netif_napi_add(channel->napi_dev, &channel->napi_str,
1258                        efx_poll, napi_weight);
1259 }
1260
1261 void efx_init_napi(struct efx_nic *efx)
1262 {
1263         struct efx_channel *channel;
1264
1265         efx_for_each_channel(channel, efx)
1266                 efx_init_napi_channel(channel);
1267 }
1268
1269 void efx_fini_napi_channel(struct efx_channel *channel)
1270 {
1271         if (channel->napi_dev)
1272                 netif_napi_del(&channel->napi_str);
1273
1274         channel->napi_dev = NULL;
1275 }
1276
1277 void efx_fini_napi(struct efx_nic *efx)
1278 {
1279         struct efx_channel *channel;
1280
1281         efx_for_each_channel(channel, efx)
1282                 efx_fini_napi_channel(channel);
1283 }