hv_netvsc: Ensure correct teardown message sequence order
[linux-2.6-microblaze.git] / drivers / net / hyperv / netvsc.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, see <http://www.gnu.org/licenses/>.
15  *
16  * Authors:
17  *   Haiyang Zhang <haiyangz@microsoft.com>
18  *   Hank Janssen  <hjanssen@microsoft.com>
19  */
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22 #include <linux/kernel.h>
23 #include <linux/sched.h>
24 #include <linux/wait.h>
25 #include <linux/mm.h>
26 #include <linux/delay.h>
27 #include <linux/io.h>
28 #include <linux/slab.h>
29 #include <linux/netdevice.h>
30 #include <linux/if_ether.h>
31 #include <linux/vmalloc.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/prefetch.h>
34 #include <linux/reciprocal_div.h>
35
36 #include <asm/sync_bitops.h>
37
38 #include "hyperv_net.h"
39 #include "netvsc_trace.h"
40
41 /*
42  * Switch the data path from the synthetic interface to the VF
43  * interface.
44  */
45 void netvsc_switch_datapath(struct net_device *ndev, bool vf)
46 {
47         struct net_device_context *net_device_ctx = netdev_priv(ndev);
48         struct hv_device *dev = net_device_ctx->device_ctx;
49         struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
50         struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
51
52         memset(init_pkt, 0, sizeof(struct nvsp_message));
53         init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH;
54         if (vf)
55                 init_pkt->msg.v4_msg.active_dp.active_datapath =
56                         NVSP_DATAPATH_VF;
57         else
58                 init_pkt->msg.v4_msg.active_dp.active_datapath =
59                         NVSP_DATAPATH_SYNTHETIC;
60
61         trace_nvsp_send(ndev, init_pkt);
62
63         vmbus_sendpacket(dev->channel, init_pkt,
64                                sizeof(struct nvsp_message),
65                                (unsigned long)init_pkt,
66                                VM_PKT_DATA_INBAND, 0);
67 }
68
69 static struct netvsc_device *alloc_net_device(void)
70 {
71         struct netvsc_device *net_device;
72
73         net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
74         if (!net_device)
75                 return NULL;
76
77         init_waitqueue_head(&net_device->wait_drain);
78         net_device->destroy = false;
79
80         net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
81         net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
82
83         init_completion(&net_device->channel_init_wait);
84         init_waitqueue_head(&net_device->subchan_open);
85         INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
86
87         return net_device;
88 }
89
90 static void free_netvsc_device(struct rcu_head *head)
91 {
92         struct netvsc_device *nvdev
93                 = container_of(head, struct netvsc_device, rcu);
94         int i;
95
96         kfree(nvdev->extension);
97         vfree(nvdev->recv_buf);
98         vfree(nvdev->send_buf);
99         kfree(nvdev->send_section_map);
100
101         for (i = 0; i < VRSS_CHANNEL_MAX; i++)
102                 vfree(nvdev->chan_table[i].mrc.slots);
103
104         kfree(nvdev);
105 }
106
107 static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
108 {
109         call_rcu(&nvdev->rcu, free_netvsc_device);
110 }
111
112 static void netvsc_revoke_recv_buf(struct hv_device *device,
113                                    struct netvsc_device *net_device)
114 {
115         struct net_device *ndev = hv_get_drvdata(device);
116         struct nvsp_message *revoke_packet;
117         int ret;
118
119         /*
120          * If we got a section count, it means we received a
121          * SendReceiveBufferComplete msg (ie sent
122          * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
123          * to send a revoke msg here
124          */
125         if (net_device->recv_section_cnt) {
126                 /* Send the revoke receive buffer */
127                 revoke_packet = &net_device->revoke_packet;
128                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
129
130                 revoke_packet->hdr.msg_type =
131                         NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
132                 revoke_packet->msg.v1_msg.
133                 revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
134
135                 trace_nvsp_send(ndev, revoke_packet);
136
137                 ret = vmbus_sendpacket(device->channel,
138                                        revoke_packet,
139                                        sizeof(struct nvsp_message),
140                                        (unsigned long)revoke_packet,
141                                        VM_PKT_DATA_INBAND, 0);
142                 /* If the failure is because the channel is rescinded;
143                  * ignore the failure since we cannot send on a rescinded
144                  * channel. This would allow us to properly cleanup
145                  * even when the channel is rescinded.
146                  */
147                 if (device->channel->rescind)
148                         ret = 0;
149                 /*
150                  * If we failed here, we might as well return and
151                  * have a leak rather than continue and a bugchk
152                  */
153                 if (ret != 0) {
154                         netdev_err(ndev, "unable to send "
155                                 "revoke receive buffer to netvsp\n");
156                         return;
157                 }
158                 net_device->recv_section_cnt = 0;
159         }
160 }
161
162 static void netvsc_revoke_send_buf(struct hv_device *device,
163                                    struct netvsc_device *net_device)
164 {
165         struct net_device *ndev = hv_get_drvdata(device);
166         struct nvsp_message *revoke_packet;
167         int ret;
168
169         /* Deal with the send buffer we may have setup.
170          * If we got a  send section size, it means we received a
171          * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
172          * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
173          * to send a revoke msg here
174          */
175         if (net_device->send_section_cnt) {
176                 /* Send the revoke receive buffer */
177                 revoke_packet = &net_device->revoke_packet;
178                 memset(revoke_packet, 0, sizeof(struct nvsp_message));
179
180                 revoke_packet->hdr.msg_type =
181                         NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
182                 revoke_packet->msg.v1_msg.revoke_send_buf.id =
183                         NETVSC_SEND_BUFFER_ID;
184
185                 trace_nvsp_send(ndev, revoke_packet);
186
187                 ret = vmbus_sendpacket(device->channel,
188                                        revoke_packet,
189                                        sizeof(struct nvsp_message),
190                                        (unsigned long)revoke_packet,
191                                        VM_PKT_DATA_INBAND, 0);
192
193                 /* If the failure is because the channel is rescinded;
194                  * ignore the failure since we cannot send on a rescinded
195                  * channel. This would allow us to properly cleanup
196                  * even when the channel is rescinded.
197                  */
198                 if (device->channel->rescind)
199                         ret = 0;
200
201                 /* If we failed here, we might as well return and
202                  * have a leak rather than continue and a bugchk
203                  */
204                 if (ret != 0) {
205                         netdev_err(ndev, "unable to send "
206                                    "revoke send buffer to netvsp\n");
207                         return;
208                 }
209                 net_device->send_section_cnt = 0;
210         }
211 }
212
213 static void netvsc_teardown_recv_gpadl(struct hv_device *device,
214                                        struct netvsc_device *net_device)
215 {
216         struct net_device *ndev = hv_get_drvdata(device);
217         int ret;
218
219         if (net_device->recv_buf_gpadl_handle) {
220                 ret = vmbus_teardown_gpadl(device->channel,
221                                            net_device->recv_buf_gpadl_handle);
222
223                 /* If we failed here, we might as well return and have a leak
224                  * rather than continue and a bugchk
225                  */
226                 if (ret != 0) {
227                         netdev_err(ndev,
228                                    "unable to teardown receive buffer's gpadl\n");
229                         return;
230                 }
231                 net_device->recv_buf_gpadl_handle = 0;
232         }
233 }
234
235 static void netvsc_teardown_send_gpadl(struct hv_device *device,
236                                        struct netvsc_device *net_device)
237 {
238         struct net_device *ndev = hv_get_drvdata(device);
239         int ret;
240
241         if (net_device->send_buf_gpadl_handle) {
242                 ret = vmbus_teardown_gpadl(device->channel,
243                                            net_device->send_buf_gpadl_handle);
244
245                 /* If we failed here, we might as well return and have a leak
246                  * rather than continue and a bugchk
247                  */
248                 if (ret != 0) {
249                         netdev_err(ndev,
250                                    "unable to teardown send buffer's gpadl\n");
251                         return;
252                 }
253                 net_device->send_buf_gpadl_handle = 0;
254         }
255 }
256
257 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
258 {
259         struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
260         int node = cpu_to_node(nvchan->channel->target_cpu);
261         size_t size;
262
263         size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
264         nvchan->mrc.slots = vzalloc_node(size, node);
265         if (!nvchan->mrc.slots)
266                 nvchan->mrc.slots = vzalloc(size);
267
268         return nvchan->mrc.slots ? 0 : -ENOMEM;
269 }
270
271 static int netvsc_init_buf(struct hv_device *device,
272                            struct netvsc_device *net_device,
273                            const struct netvsc_device_info *device_info)
274 {
275         struct nvsp_1_message_send_receive_buffer_complete *resp;
276         struct net_device *ndev = hv_get_drvdata(device);
277         struct nvsp_message *init_packet;
278         unsigned int buf_size;
279         size_t map_words;
280         int ret = 0;
281
282         /* Get receive buffer area. */
283         buf_size = device_info->recv_sections * device_info->recv_section_size;
284         buf_size = roundup(buf_size, PAGE_SIZE);
285
286         /* Legacy hosts only allow smaller receive buffer */
287         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
288                 buf_size = min_t(unsigned int, buf_size,
289                                  NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
290
291         net_device->recv_buf = vzalloc(buf_size);
292         if (!net_device->recv_buf) {
293                 netdev_err(ndev,
294                            "unable to allocate receive buffer of size %u\n",
295                            buf_size);
296                 ret = -ENOMEM;
297                 goto cleanup;
298         }
299
300         net_device->recv_buf_size = buf_size;
301
302         /*
303          * Establish the gpadl handle for this buffer on this
304          * channel.  Note: This call uses the vmbus connection rather
305          * than the channel to establish the gpadl handle.
306          */
307         ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
308                                     buf_size,
309                                     &net_device->recv_buf_gpadl_handle);
310         if (ret != 0) {
311                 netdev_err(ndev,
312                         "unable to establish receive buffer's gpadl\n");
313                 goto cleanup;
314         }
315
316         /* Notify the NetVsp of the gpadl handle */
317         init_packet = &net_device->channel_init_pkt;
318         memset(init_packet, 0, sizeof(struct nvsp_message));
319         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
320         init_packet->msg.v1_msg.send_recv_buf.
321                 gpadl_handle = net_device->recv_buf_gpadl_handle;
322         init_packet->msg.v1_msg.
323                 send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
324
325         trace_nvsp_send(ndev, init_packet);
326
327         /* Send the gpadl notification request */
328         ret = vmbus_sendpacket(device->channel, init_packet,
329                                sizeof(struct nvsp_message),
330                                (unsigned long)init_packet,
331                                VM_PKT_DATA_INBAND,
332                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
333         if (ret != 0) {
334                 netdev_err(ndev,
335                         "unable to send receive buffer's gpadl to netvsp\n");
336                 goto cleanup;
337         }
338
339         wait_for_completion(&net_device->channel_init_wait);
340
341         /* Check the response */
342         resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
343         if (resp->status != NVSP_STAT_SUCCESS) {
344                 netdev_err(ndev,
345                            "Unable to complete receive buffer initialization with NetVsp - status %d\n",
346                            resp->status);
347                 ret = -EINVAL;
348                 goto cleanup;
349         }
350
351         /* Parse the response */
352         netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
353                    resp->num_sections, resp->sections[0].sub_alloc_size,
354                    resp->sections[0].num_sub_allocs);
355
356         /* There should only be one section for the entire receive buffer */
357         if (resp->num_sections != 1 || resp->sections[0].offset != 0) {
358                 ret = -EINVAL;
359                 goto cleanup;
360         }
361
362         net_device->recv_section_size = resp->sections[0].sub_alloc_size;
363         net_device->recv_section_cnt = resp->sections[0].num_sub_allocs;
364
365         /* Setup receive completion ring */
366         net_device->recv_completion_cnt
367                 = round_up(net_device->recv_section_cnt + 1,
368                            PAGE_SIZE / sizeof(u64));
369         ret = netvsc_alloc_recv_comp_ring(net_device, 0);
370         if (ret)
371                 goto cleanup;
372
373         /* Now setup the send buffer. */
374         buf_size = device_info->send_sections * device_info->send_section_size;
375         buf_size = round_up(buf_size, PAGE_SIZE);
376
377         net_device->send_buf = vzalloc(buf_size);
378         if (!net_device->send_buf) {
379                 netdev_err(ndev, "unable to allocate send buffer of size %u\n",
380                            buf_size);
381                 ret = -ENOMEM;
382                 goto cleanup;
383         }
384
385         /* Establish the gpadl handle for this buffer on this
386          * channel.  Note: This call uses the vmbus connection rather
387          * than the channel to establish the gpadl handle.
388          */
389         ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
390                                     buf_size,
391                                     &net_device->send_buf_gpadl_handle);
392         if (ret != 0) {
393                 netdev_err(ndev,
394                            "unable to establish send buffer's gpadl\n");
395                 goto cleanup;
396         }
397
398         /* Notify the NetVsp of the gpadl handle */
399         init_packet = &net_device->channel_init_pkt;
400         memset(init_packet, 0, sizeof(struct nvsp_message));
401         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
402         init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
403                 net_device->send_buf_gpadl_handle;
404         init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
405
406         trace_nvsp_send(ndev, init_packet);
407
408         /* Send the gpadl notification request */
409         ret = vmbus_sendpacket(device->channel, init_packet,
410                                sizeof(struct nvsp_message),
411                                (unsigned long)init_packet,
412                                VM_PKT_DATA_INBAND,
413                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
414         if (ret != 0) {
415                 netdev_err(ndev,
416                            "unable to send send buffer's gpadl to netvsp\n");
417                 goto cleanup;
418         }
419
420         wait_for_completion(&net_device->channel_init_wait);
421
422         /* Check the response */
423         if (init_packet->msg.v1_msg.
424             send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
425                 netdev_err(ndev, "Unable to complete send buffer "
426                            "initialization with NetVsp - status %d\n",
427                            init_packet->msg.v1_msg.
428                            send_send_buf_complete.status);
429                 ret = -EINVAL;
430                 goto cleanup;
431         }
432
433         /* Parse the response */
434         net_device->send_section_size = init_packet->msg.
435                                 v1_msg.send_send_buf_complete.section_size;
436
437         /* Section count is simply the size divided by the section size. */
438         net_device->send_section_cnt = buf_size / net_device->send_section_size;
439
440         netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
441                    net_device->send_section_size, net_device->send_section_cnt);
442
443         /* Setup state for managing the send buffer. */
444         map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
445
446         net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
447         if (net_device->send_section_map == NULL) {
448                 ret = -ENOMEM;
449                 goto cleanup;
450         }
451
452         goto exit;
453
454 cleanup:
455         netvsc_revoke_recv_buf(device, net_device);
456         netvsc_revoke_send_buf(device, net_device);
457         netvsc_teardown_recv_gpadl(device, net_device);
458         netvsc_teardown_send_gpadl(device, net_device);
459
460 exit:
461         return ret;
462 }
463
464 /* Negotiate NVSP protocol version */
465 static int negotiate_nvsp_ver(struct hv_device *device,
466                               struct netvsc_device *net_device,
467                               struct nvsp_message *init_packet,
468                               u32 nvsp_ver)
469 {
470         struct net_device *ndev = hv_get_drvdata(device);
471         int ret;
472
473         memset(init_packet, 0, sizeof(struct nvsp_message));
474         init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
475         init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
476         init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
477
478         trace_nvsp_send(ndev, init_packet);
479
480         /* Send the init request */
481         ret = vmbus_sendpacket(device->channel, init_packet,
482                                sizeof(struct nvsp_message),
483                                (unsigned long)init_packet,
484                                VM_PKT_DATA_INBAND,
485                                VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
486
487         if (ret != 0)
488                 return ret;
489
490         wait_for_completion(&net_device->channel_init_wait);
491
492         if (init_packet->msg.init_msg.init_complete.status !=
493             NVSP_STAT_SUCCESS)
494                 return -EINVAL;
495
496         if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
497                 return 0;
498
499         /* NVSPv2 or later: Send NDIS config */
500         memset(init_packet, 0, sizeof(struct nvsp_message));
501         init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
502         init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
503         init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
504
505         if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
506                 init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
507
508                 /* Teaming bit is needed to receive link speed updates */
509                 init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
510         }
511
512         trace_nvsp_send(ndev, init_packet);
513
514         ret = vmbus_sendpacket(device->channel, init_packet,
515                                 sizeof(struct nvsp_message),
516                                 (unsigned long)init_packet,
517                                 VM_PKT_DATA_INBAND, 0);
518
519         return ret;
520 }
521
522 static int netvsc_connect_vsp(struct hv_device *device,
523                               struct netvsc_device *net_device,
524                               const struct netvsc_device_info *device_info)
525 {
526         struct net_device *ndev = hv_get_drvdata(device);
527         static const u32 ver_list[] = {
528                 NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
529                 NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
530         };
531         struct nvsp_message *init_packet;
532         int ndis_version, i, ret;
533
534         init_packet = &net_device->channel_init_pkt;
535
536         /* Negotiate the latest NVSP protocol supported */
537         for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
538                 if (negotiate_nvsp_ver(device, net_device, init_packet,
539                                        ver_list[i])  == 0) {
540                         net_device->nvsp_version = ver_list[i];
541                         break;
542                 }
543
544         if (i < 0) {
545                 ret = -EPROTO;
546                 goto cleanup;
547         }
548
549         pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);
550
551         /* Send the ndis version */
552         memset(init_packet, 0, sizeof(struct nvsp_message));
553
554         if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
555                 ndis_version = 0x00060001;
556         else
557                 ndis_version = 0x0006001e;
558
559         init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
560         init_packet->msg.v1_msg.
561                 send_ndis_ver.ndis_major_ver =
562                                 (ndis_version & 0xFFFF0000) >> 16;
563         init_packet->msg.v1_msg.
564                 send_ndis_ver.ndis_minor_ver =
565                                 ndis_version & 0xFFFF;
566
567         trace_nvsp_send(ndev, init_packet);
568
569         /* Send the init request */
570         ret = vmbus_sendpacket(device->channel, init_packet,
571                                 sizeof(struct nvsp_message),
572                                 (unsigned long)init_packet,
573                                 VM_PKT_DATA_INBAND, 0);
574         if (ret != 0)
575                 goto cleanup;
576
577
578         ret = netvsc_init_buf(device, net_device, device_info);
579
580 cleanup:
581         return ret;
582 }
583
584 /*
585  * netvsc_device_remove - Callback when the root bus device is removed
586  */
587 void netvsc_device_remove(struct hv_device *device)
588 {
589         struct net_device *ndev = hv_get_drvdata(device);
590         struct net_device_context *net_device_ctx = netdev_priv(ndev);
591         struct netvsc_device *net_device
592                 = rtnl_dereference(net_device_ctx->nvdev);
593         int i;
594
595         /*
596          * Revoke receive buffer. If host is pre-Win2016 then tear down
597          * receive buffer GPADL. Do the same for send buffer.
598          */
599         netvsc_revoke_recv_buf(device, net_device);
600         if (vmbus_proto_version < VERSION_WIN10)
601                 netvsc_teardown_recv_gpadl(device, net_device);
602
603         netvsc_revoke_send_buf(device, net_device);
604         if (vmbus_proto_version < VERSION_WIN10)
605                 netvsc_teardown_send_gpadl(device, net_device);
606
607         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
608
609         /* And disassociate NAPI context from device */
610         for (i = 0; i < net_device->num_chn; i++)
611                 netif_napi_del(&net_device->chan_table[i].napi);
612
613         /*
614          * At this point, no one should be accessing net_device
615          * except in here
616          */
617         netdev_dbg(ndev, "net device safe to remove\n");
618
619         /* Now, we can close the channel safely */
620         vmbus_close(device->channel);
621
622         /*
623          * If host is Win2016 or higher then we do the GPADL tear down
624          * here after VMBus is closed.
625         */
626         if (vmbus_proto_version >= VERSION_WIN10) {
627                 netvsc_teardown_recv_gpadl(device, net_device);
628                 netvsc_teardown_send_gpadl(device, net_device);
629         }
630
631         /* Release all resources */
632         free_netvsc_device_rcu(net_device);
633 }
634
635 #define RING_AVAIL_PERCENT_HIWATER 20
636 #define RING_AVAIL_PERCENT_LOWATER 10
637
638 /*
639  * Get the percentage of available bytes to write in the ring.
640  * The return value is in range from 0 to 100.
641  */
642 static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info *ring_info)
643 {
644         u32 avail_write = hv_get_bytes_to_write(ring_info);
645
646         return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
647 }
648
649 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
650                                          u32 index)
651 {
652         sync_change_bit(index, net_device->send_section_map);
653 }
654
655 static void netvsc_send_tx_complete(struct netvsc_device *net_device,
656                                     struct vmbus_channel *incoming_channel,
657                                     struct hv_device *device,
658                                     const struct vmpacket_descriptor *desc,
659                                     int budget)
660 {
661         struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
662         struct net_device *ndev = hv_get_drvdata(device);
663         struct net_device_context *ndev_ctx = netdev_priv(ndev);
664         struct vmbus_channel *channel = device->channel;
665         u16 q_idx = 0;
666         int queue_sends;
667
668         /* Notify the layer above us */
669         if (likely(skb)) {
670                 const struct hv_netvsc_packet *packet
671                         = (struct hv_netvsc_packet *)skb->cb;
672                 u32 send_index = packet->send_buf_index;
673                 struct netvsc_stats *tx_stats;
674
675                 if (send_index != NETVSC_INVALID_INDEX)
676                         netvsc_free_send_slot(net_device, send_index);
677                 q_idx = packet->q_idx;
678                 channel = incoming_channel;
679
680                 tx_stats = &net_device->chan_table[q_idx].tx_stats;
681
682                 u64_stats_update_begin(&tx_stats->syncp);
683                 tx_stats->packets += packet->total_packets;
684                 tx_stats->bytes += packet->total_bytes;
685                 u64_stats_update_end(&tx_stats->syncp);
686
687                 napi_consume_skb(skb, budget);
688         }
689
690         queue_sends =
691                 atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
692
693         if (unlikely(net_device->destroy)) {
694                 if (queue_sends == 0)
695                         wake_up(&net_device->wait_drain);
696         } else {
697                 struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
698
699                 if (netif_tx_queue_stopped(txq) &&
700                     (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
701                      queue_sends < 1)) {
702                         netif_tx_wake_queue(txq);
703                         ndev_ctx->eth_stats.wake_queue++;
704                 }
705         }
706 }
707
708 static void netvsc_send_completion(struct netvsc_device *net_device,
709                                    struct vmbus_channel *incoming_channel,
710                                    struct hv_device *device,
711                                    const struct vmpacket_descriptor *desc,
712                                    int budget)
713 {
714         struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
715         struct net_device *ndev = hv_get_drvdata(device);
716
717         switch (nvsp_packet->hdr.msg_type) {
718         case NVSP_MSG_TYPE_INIT_COMPLETE:
719         case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
720         case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
721         case NVSP_MSG5_TYPE_SUBCHANNEL:
722                 /* Copy the response back */
723                 memcpy(&net_device->channel_init_pkt, nvsp_packet,
724                        sizeof(struct nvsp_message));
725                 complete(&net_device->channel_init_wait);
726                 break;
727
728         case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
729                 netvsc_send_tx_complete(net_device, incoming_channel,
730                                         device, desc, budget);
731                 break;
732
733         default:
734                 netdev_err(ndev,
735                            "Unknown send completion type %d received!!\n",
736                            nvsp_packet->hdr.msg_type);
737         }
738 }
739
740 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
741 {
742         unsigned long *map_addr = net_device->send_section_map;
743         unsigned int i;
744
745         for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
746                 if (sync_test_and_set_bit(i, map_addr) == 0)
747                         return i;
748         }
749
750         return NETVSC_INVALID_INDEX;
751 }
752
753 static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
754                                     unsigned int section_index,
755                                     u32 pend_size,
756                                     struct hv_netvsc_packet *packet,
757                                     struct rndis_message *rndis_msg,
758                                     struct hv_page_buffer *pb,
759                                     bool xmit_more)
760 {
761         char *start = net_device->send_buf;
762         char *dest = start + (section_index * net_device->send_section_size)
763                      + pend_size;
764         int i;
765         u32 padding = 0;
766         u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
767                 packet->page_buf_cnt;
768         u32 remain;
769
770         /* Add padding */
771         remain = packet->total_data_buflen & (net_device->pkt_align - 1);
772         if (xmit_more && remain) {
773                 padding = net_device->pkt_align - remain;
774                 rndis_msg->msg_len += padding;
775                 packet->total_data_buflen += padding;
776         }
777
778         for (i = 0; i < page_count; i++) {
779                 char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
780                 u32 offset = pb[i].offset;
781                 u32 len = pb[i].len;
782
783                 memcpy(dest, (src + offset), len);
784                 dest += len;
785         }
786
787         if (padding)
788                 memset(dest, 0, padding);
789 }
790
791 static inline int netvsc_send_pkt(
792         struct hv_device *device,
793         struct hv_netvsc_packet *packet,
794         struct netvsc_device *net_device,
795         struct hv_page_buffer *pb,
796         struct sk_buff *skb)
797 {
798         struct nvsp_message nvmsg;
799         struct nvsp_1_message_send_rndis_packet *rpkt =
800                 &nvmsg.msg.v1_msg.send_rndis_pkt;
801         struct netvsc_channel * const nvchan =
802                 &net_device->chan_table[packet->q_idx];
803         struct vmbus_channel *out_channel = nvchan->channel;
804         struct net_device *ndev = hv_get_drvdata(device);
805         struct net_device_context *ndev_ctx = netdev_priv(ndev);
806         struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
807         u64 req_id;
808         int ret;
809         u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
810
811         nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
812         if (skb)
813                 rpkt->channel_type = 0;         /* 0 is RMC_DATA */
814         else
815                 rpkt->channel_type = 1;         /* 1 is RMC_CONTROL */
816
817         rpkt->send_buf_section_index = packet->send_buf_index;
818         if (packet->send_buf_index == NETVSC_INVALID_INDEX)
819                 rpkt->send_buf_section_size = 0;
820         else
821                 rpkt->send_buf_section_size = packet->total_data_buflen;
822
823         req_id = (ulong)skb;
824
825         if (out_channel->rescind)
826                 return -ENODEV;
827
828         trace_nvsp_send_pkt(ndev, out_channel, rpkt);
829
830         if (packet->page_buf_cnt) {
831                 if (packet->cp_partial)
832                         pb += packet->rmsg_pgcnt;
833
834                 ret = vmbus_sendpacket_pagebuffer(out_channel,
835                                                   pb, packet->page_buf_cnt,
836                                                   &nvmsg, sizeof(nvmsg),
837                                                   req_id);
838         } else {
839                 ret = vmbus_sendpacket(out_channel,
840                                        &nvmsg, sizeof(nvmsg),
841                                        req_id, VM_PKT_DATA_INBAND,
842                                        VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
843         }
844
845         if (ret == 0) {
846                 atomic_inc_return(&nvchan->queue_sends);
847
848                 if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
849                         netif_tx_stop_queue(txq);
850                         ndev_ctx->eth_stats.stop_queue++;
851                 }
852         } else if (ret == -EAGAIN) {
853                 netif_tx_stop_queue(txq);
854                 ndev_ctx->eth_stats.stop_queue++;
855                 if (atomic_read(&nvchan->queue_sends) < 1) {
856                         netif_tx_wake_queue(txq);
857                         ndev_ctx->eth_stats.wake_queue++;
858                         ret = -ENOSPC;
859                 }
860         } else {
861                 netdev_err(ndev,
862                            "Unable to send packet pages %u len %u, ret %d\n",
863                            packet->page_buf_cnt, packet->total_data_buflen,
864                            ret);
865         }
866
867         return ret;
868 }
869
870 /* Move packet out of multi send data (msd), and clear msd */
871 static inline void move_pkt_msd(struct hv_netvsc_packet **msd_send,
872                                 struct sk_buff **msd_skb,
873                                 struct multi_send_data *msdp)
874 {
875         *msd_skb = msdp->skb;
876         *msd_send = msdp->pkt;
877         msdp->skb = NULL;
878         msdp->pkt = NULL;
879         msdp->count = 0;
880 }
881
882 /* RCU already held by caller */
883 int netvsc_send(struct net_device *ndev,
884                 struct hv_netvsc_packet *packet,
885                 struct rndis_message *rndis_msg,
886                 struct hv_page_buffer *pb,
887                 struct sk_buff *skb)
888 {
889         struct net_device_context *ndev_ctx = netdev_priv(ndev);
890         struct netvsc_device *net_device
891                 = rcu_dereference_bh(ndev_ctx->nvdev);
892         struct hv_device *device = ndev_ctx->device_ctx;
893         int ret = 0;
894         struct netvsc_channel *nvchan;
895         u32 pktlen = packet->total_data_buflen, msd_len = 0;
896         unsigned int section_index = NETVSC_INVALID_INDEX;
897         struct multi_send_data *msdp;
898         struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
899         struct sk_buff *msd_skb = NULL;
900         bool try_batch, xmit_more;
901
902         /* If device is rescinded, return error and packet will get dropped. */
903         if (unlikely(!net_device || net_device->destroy))
904                 return -ENODEV;
905
906         nvchan = &net_device->chan_table[packet->q_idx];
907         packet->send_buf_index = NETVSC_INVALID_INDEX;
908         packet->cp_partial = false;
909
910         /* Send control message directly without accessing msd (Multi-Send
911          * Data) field which may be changed during data packet processing.
912          */
913         if (!skb)
914                 return netvsc_send_pkt(device, packet, net_device, pb, skb);
915
916         /* batch packets in send buffer if possible */
917         msdp = &nvchan->msd;
918         if (msdp->pkt)
919                 msd_len = msdp->pkt->total_data_buflen;
920
921         try_batch =  msd_len > 0 && msdp->count < net_device->max_pkt;
922         if (try_batch && msd_len + pktlen + net_device->pkt_align <
923             net_device->send_section_size) {
924                 section_index = msdp->pkt->send_buf_index;
925
926         } else if (try_batch && msd_len + packet->rmsg_size <
927                    net_device->send_section_size) {
928                 section_index = msdp->pkt->send_buf_index;
929                 packet->cp_partial = true;
930
931         } else if (pktlen + net_device->pkt_align <
932                    net_device->send_section_size) {
933                 section_index = netvsc_get_next_send_section(net_device);
934                 if (unlikely(section_index == NETVSC_INVALID_INDEX)) {
935                         ++ndev_ctx->eth_stats.tx_send_full;
936                 } else {
937                         move_pkt_msd(&msd_send, &msd_skb, msdp);
938                         msd_len = 0;
939                 }
940         }
941
942         /* Keep aggregating only if stack says more data is coming
943          * and not doing mixed modes send and not flow blocked
944          */
945         xmit_more = skb->xmit_more &&
946                 !packet->cp_partial &&
947                 !netif_xmit_stopped(netdev_get_tx_queue(ndev, packet->q_idx));
948
949         if (section_index != NETVSC_INVALID_INDEX) {
950                 netvsc_copy_to_send_buf(net_device,
951                                         section_index, msd_len,
952                                         packet, rndis_msg, pb, xmit_more);
953
954                 packet->send_buf_index = section_index;
955
956                 if (packet->cp_partial) {
957                         packet->page_buf_cnt -= packet->rmsg_pgcnt;
958                         packet->total_data_buflen = msd_len + packet->rmsg_size;
959                 } else {
960                         packet->page_buf_cnt = 0;
961                         packet->total_data_buflen += msd_len;
962                 }
963
964                 if (msdp->pkt) {
965                         packet->total_packets += msdp->pkt->total_packets;
966                         packet->total_bytes += msdp->pkt->total_bytes;
967                 }
968
969                 if (msdp->skb)
970                         dev_consume_skb_any(msdp->skb);
971
972                 if (xmit_more) {
973                         msdp->skb = skb;
974                         msdp->pkt = packet;
975                         msdp->count++;
976                 } else {
977                         cur_send = packet;
978                         msdp->skb = NULL;
979                         msdp->pkt = NULL;
980                         msdp->count = 0;
981                 }
982         } else {
983                 move_pkt_msd(&msd_send, &msd_skb, msdp);
984                 cur_send = packet;
985         }
986
987         if (msd_send) {
988                 int m_ret = netvsc_send_pkt(device, msd_send, net_device,
989                                             NULL, msd_skb);
990
991                 if (m_ret != 0) {
992                         netvsc_free_send_slot(net_device,
993                                               msd_send->send_buf_index);
994                         dev_kfree_skb_any(msd_skb);
995                 }
996         }
997
998         if (cur_send)
999                 ret = netvsc_send_pkt(device, cur_send, net_device, pb, skb);
1000
1001         if (ret != 0 && section_index != NETVSC_INVALID_INDEX)
1002                 netvsc_free_send_slot(net_device, section_index);
1003
1004         return ret;
1005 }
1006
1007 /* Send pending recv completions */
1008 static int send_recv_completions(struct net_device *ndev,
1009                                  struct netvsc_device *nvdev,
1010                                  struct netvsc_channel *nvchan)
1011 {
1012         struct multi_recv_comp *mrc = &nvchan->mrc;
1013         struct recv_comp_msg {
1014                 struct nvsp_message_header hdr;
1015                 u32 status;
1016         }  __packed;
1017         struct recv_comp_msg msg = {
1018                 .hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
1019         };
1020         int ret;
1021
1022         while (mrc->first != mrc->next) {
1023                 const struct recv_comp_data *rcd
1024                         = mrc->slots + mrc->first;
1025
1026                 msg.status = rcd->status;
1027                 ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
1028                                        rcd->tid, VM_PKT_COMP, 0);
1029                 if (unlikely(ret)) {
1030                         struct net_device_context *ndev_ctx = netdev_priv(ndev);
1031
1032                         ++ndev_ctx->eth_stats.rx_comp_busy;
1033                         return ret;
1034                 }
1035
1036                 if (++mrc->first == nvdev->recv_completion_cnt)
1037                         mrc->first = 0;
1038         }
1039
1040         /* receive completion ring has been emptied */
1041         if (unlikely(nvdev->destroy))
1042                 wake_up(&nvdev->wait_drain);
1043
1044         return 0;
1045 }
1046
1047 /* Count how many receive completions are outstanding */
1048 static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
1049                                  const struct multi_recv_comp *mrc,
1050                                  u32 *filled, u32 *avail)
1051 {
1052         u32 count = nvdev->recv_completion_cnt;
1053
1054         if (mrc->next >= mrc->first)
1055                 *filled = mrc->next - mrc->first;
1056         else
1057                 *filled = (count - mrc->first) + mrc->next;
1058
1059         *avail = count - *filled - 1;
1060 }
1061
1062 /* Add receive complete to ring to send to host. */
1063 static void enq_receive_complete(struct net_device *ndev,
1064                                  struct netvsc_device *nvdev, u16 q_idx,
1065                                  u64 tid, u32 status)
1066 {
1067         struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
1068         struct multi_recv_comp *mrc = &nvchan->mrc;
1069         struct recv_comp_data *rcd;
1070         u32 filled, avail;
1071
1072         recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1073
1074         if (unlikely(filled > NAPI_POLL_WEIGHT)) {
1075                 send_recv_completions(ndev, nvdev, nvchan);
1076                 recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
1077         }
1078
1079         if (unlikely(!avail)) {
1080                 netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
1081                            q_idx, tid);
1082                 return;
1083         }
1084
1085         rcd = mrc->slots + mrc->next;
1086         rcd->tid = tid;
1087         rcd->status = status;
1088
1089         if (++mrc->next == nvdev->recv_completion_cnt)
1090                 mrc->next = 0;
1091 }
1092
1093 static int netvsc_receive(struct net_device *ndev,
1094                           struct netvsc_device *net_device,
1095                           struct net_device_context *net_device_ctx,
1096                           struct hv_device *device,
1097                           struct vmbus_channel *channel,
1098                           const struct vmpacket_descriptor *desc,
1099                           struct nvsp_message *nvsp)
1100 {
1101         const struct vmtransfer_page_packet_header *vmxferpage_packet
1102                 = container_of(desc, const struct vmtransfer_page_packet_header, d);
1103         u16 q_idx = channel->offermsg.offer.sub_channel_index;
1104         char *recv_buf = net_device->recv_buf;
1105         u32 status = NVSP_STAT_SUCCESS;
1106         int i;
1107         int count = 0;
1108
1109         /* Make sure this is a valid nvsp packet */
1110         if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
1111                 netif_err(net_device_ctx, rx_err, ndev,
1112                           "Unknown nvsp packet type received %u\n",
1113                           nvsp->hdr.msg_type);
1114                 return 0;
1115         }
1116
1117         if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
1118                 netif_err(net_device_ctx, rx_err, ndev,
1119                           "Invalid xfer page set id - expecting %x got %x\n",
1120                           NETVSC_RECEIVE_BUFFER_ID,
1121                           vmxferpage_packet->xfer_pageset_id);
1122                 return 0;
1123         }
1124
1125         count = vmxferpage_packet->range_cnt;
1126
1127         /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1128         for (i = 0; i < count; i++) {
1129                 u32 offset = vmxferpage_packet->ranges[i].byte_offset;
1130                 u32 buflen = vmxferpage_packet->ranges[i].byte_count;
1131                 void *data;
1132                 int ret;
1133
1134                 if (unlikely(offset + buflen > net_device->recv_buf_size)) {
1135                         status = NVSP_STAT_FAIL;
1136                         netif_err(net_device_ctx, rx_err, ndev,
1137                                   "Packet offset:%u + len:%u too big\n",
1138                                   offset, buflen);
1139
1140                         continue;
1141                 }
1142
1143                 data = recv_buf + offset;
1144
1145                 trace_rndis_recv(ndev, q_idx, data);
1146
1147                 /* Pass it to the upper layer */
1148                 ret = rndis_filter_receive(ndev, net_device,
1149                                            channel, data, buflen);
1150
1151                 if (unlikely(ret != NVSP_STAT_SUCCESS))
1152                         status = NVSP_STAT_FAIL;
1153         }
1154
1155         enq_receive_complete(ndev, net_device, q_idx,
1156                              vmxferpage_packet->d.trans_id, status);
1157
1158         return count;
1159 }
1160
1161 static void netvsc_send_table(struct hv_device *hdev,
1162                               struct nvsp_message *nvmsg)
1163 {
1164         struct net_device *ndev = hv_get_drvdata(hdev);
1165         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1166         int i;
1167         u32 count, *tab;
1168
1169         count = nvmsg->msg.v5_msg.send_table.count;
1170         if (count != VRSS_SEND_TAB_SIZE) {
1171                 netdev_err(ndev, "Received wrong send-table size:%u\n", count);
1172                 return;
1173         }
1174
1175         tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
1176                       nvmsg->msg.v5_msg.send_table.offset);
1177
1178         for (i = 0; i < count; i++)
1179                 net_device_ctx->tx_table[i] = tab[i];
1180 }
1181
1182 static void netvsc_send_vf(struct net_device_context *net_device_ctx,
1183                            struct nvsp_message *nvmsg)
1184 {
1185         net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
1186         net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
1187 }
1188
1189 static inline void netvsc_receive_inband(struct hv_device *hdev,
1190                                  struct net_device_context *net_device_ctx,
1191                                  struct nvsp_message *nvmsg)
1192 {
1193         switch (nvmsg->hdr.msg_type) {
1194         case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
1195                 netvsc_send_table(hdev, nvmsg);
1196                 break;
1197
1198         case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
1199                 netvsc_send_vf(net_device_ctx, nvmsg);
1200                 break;
1201         }
1202 }
1203
1204 static int netvsc_process_raw_pkt(struct hv_device *device,
1205                                   struct vmbus_channel *channel,
1206                                   struct netvsc_device *net_device,
1207                                   struct net_device *ndev,
1208                                   const struct vmpacket_descriptor *desc,
1209                                   int budget)
1210 {
1211         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1212         struct nvsp_message *nvmsg = hv_pkt_data(desc);
1213
1214         trace_nvsp_recv(ndev, channel, nvmsg);
1215
1216         switch (desc->type) {
1217         case VM_PKT_COMP:
1218                 netvsc_send_completion(net_device, channel, device,
1219                                        desc, budget);
1220                 break;
1221
1222         case VM_PKT_DATA_USING_XFER_PAGES:
1223                 return netvsc_receive(ndev, net_device, net_device_ctx,
1224                                       device, channel, desc, nvmsg);
1225                 break;
1226
1227         case VM_PKT_DATA_INBAND:
1228                 netvsc_receive_inband(device, net_device_ctx, nvmsg);
1229                 break;
1230
1231         default:
1232                 netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
1233                            desc->type, desc->trans_id);
1234                 break;
1235         }
1236
1237         return 0;
1238 }
1239
1240 static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
1241 {
1242         struct vmbus_channel *primary = channel->primary_channel;
1243
1244         return primary ? primary->device_obj : channel->device_obj;
1245 }
1246
1247 /* Network processing softirq
1248  * Process data in incoming ring buffer from host
1249  * Stops when ring is empty or budget is met or exceeded.
1250  */
1251 int netvsc_poll(struct napi_struct *napi, int budget)
1252 {
1253         struct netvsc_channel *nvchan
1254                 = container_of(napi, struct netvsc_channel, napi);
1255         struct netvsc_device *net_device = nvchan->net_device;
1256         struct vmbus_channel *channel = nvchan->channel;
1257         struct hv_device *device = netvsc_channel_to_device(channel);
1258         struct net_device *ndev = hv_get_drvdata(device);
1259         int work_done = 0;
1260
1261         /* If starting a new interval */
1262         if (!nvchan->desc)
1263                 nvchan->desc = hv_pkt_iter_first(channel);
1264
1265         while (nvchan->desc && work_done < budget) {
1266                 work_done += netvsc_process_raw_pkt(device, channel, net_device,
1267                                                     ndev, nvchan->desc, budget);
1268                 nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
1269         }
1270
1271         /* If send of pending receive completions suceeded
1272          *   and did not exhaust NAPI budget this time
1273          *   and not doing busy poll
1274          * then re-enable host interrupts
1275          *     and reschedule if ring is not empty.
1276          */
1277         if (send_recv_completions(ndev, net_device, nvchan) == 0 &&
1278             work_done < budget &&
1279             napi_complete_done(napi, work_done) &&
1280             hv_end_read(&channel->inbound) &&
1281             napi_schedule_prep(napi)) {
1282                 hv_begin_read(&channel->inbound);
1283                 __napi_schedule(napi);
1284         }
1285
1286         /* Driver may overshoot since multiple packets per descriptor */
1287         return min(work_done, budget);
1288 }
1289
1290 /* Call back when data is available in host ring buffer.
1291  * Processing is deferred until network softirq (NAPI)
1292  */
1293 void netvsc_channel_cb(void *context)
1294 {
1295         struct netvsc_channel *nvchan = context;
1296         struct vmbus_channel *channel = nvchan->channel;
1297         struct hv_ring_buffer_info *rbi = &channel->inbound;
1298
1299         /* preload first vmpacket descriptor */
1300         prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
1301
1302         if (napi_schedule_prep(&nvchan->napi)) {
1303                 /* disable interupts from host */
1304                 hv_begin_read(rbi);
1305
1306                 __napi_schedule_irqoff(&nvchan->napi);
1307         }
1308 }
1309
1310 /*
1311  * netvsc_device_add - Callback when the device belonging to this
1312  * driver is added
1313  */
1314 struct netvsc_device *netvsc_device_add(struct hv_device *device,
1315                                 const struct netvsc_device_info *device_info)
1316 {
1317         int i, ret = 0;
1318         struct netvsc_device *net_device;
1319         struct net_device *ndev = hv_get_drvdata(device);
1320         struct net_device_context *net_device_ctx = netdev_priv(ndev);
1321
1322         net_device = alloc_net_device();
1323         if (!net_device)
1324                 return ERR_PTR(-ENOMEM);
1325
1326         for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
1327                 net_device_ctx->tx_table[i] = 0;
1328
1329         /* Because the device uses NAPI, all the interrupt batching and
1330          * control is done via Net softirq, not the channel handling
1331          */
1332         set_channel_read_mode(device->channel, HV_CALL_ISR);
1333
1334         /* If we're reopening the device we may have multiple queues, fill the
1335          * chn_table with the default channel to use it before subchannels are
1336          * opened.
1337          * Initialize the channel state before we open;
1338          * we can be interrupted as soon as we open the channel.
1339          */
1340
1341         for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
1342                 struct netvsc_channel *nvchan = &net_device->chan_table[i];
1343
1344                 nvchan->channel = device->channel;
1345                 nvchan->net_device = net_device;
1346                 u64_stats_init(&nvchan->tx_stats.syncp);
1347                 u64_stats_init(&nvchan->rx_stats.syncp);
1348         }
1349
1350         /* Enable NAPI handler before init callbacks */
1351         netif_napi_add(ndev, &net_device->chan_table[0].napi,
1352                        netvsc_poll, NAPI_POLL_WEIGHT);
1353
1354         /* Open the channel */
1355         ret = vmbus_open(device->channel, netvsc_ring_bytes,
1356                          netvsc_ring_bytes,  NULL, 0,
1357                          netvsc_channel_cb, net_device->chan_table);
1358
1359         if (ret != 0) {
1360                 netdev_err(ndev, "unable to open channel: %d\n", ret);
1361                 goto cleanup;
1362         }
1363
1364         /* Channel is opened */
1365         netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
1366
1367         napi_enable(&net_device->chan_table[0].napi);
1368
1369         /* Connect with the NetVsp */
1370         ret = netvsc_connect_vsp(device, net_device, device_info);
1371         if (ret != 0) {
1372                 netdev_err(ndev,
1373                         "unable to connect to NetVSP - %d\n", ret);
1374                 goto close;
1375         }
1376
1377         /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
1378          * populated.
1379          */
1380         rcu_assign_pointer(net_device_ctx->nvdev, net_device);
1381
1382         return net_device;
1383
1384 close:
1385         RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
1386         napi_disable(&net_device->chan_table[0].napi);
1387
1388         /* Now, we can close the channel safely */
1389         vmbus_close(device->channel);
1390
1391 cleanup:
1392         netif_napi_del(&net_device->chan_table[0].napi);
1393         free_netvsc_device(&net_device->rcu);
1394
1395         return ERR_PTR(ret);
1396 }